././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1579654747.9241881 hdmf-1.5.4/0000755000655200065520000000000000000000000014026 5ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/Legal.txt0000644000655200065520000000150600000000000015615 0ustar00circlecicircleci00000000000000“hdmf” Copyright (c) 2017-2020, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. If you have questions about your rights to use or distribute this software, please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov. NOTICE. This Software was developed under funding from the U.S. Department of Energy and the U.S. Government consequently retains certain rights. As such, the U.S. Government has been granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software to reproduce, distribute copies to the public, prepare derivative works, and perform publicly and display publicly, and to permit other to do so. ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/MANIFEST.in0000644000655200065520000000027500000000000015570 0ustar00circlecicircleci00000000000000include license.txt Legal.txt versioneer.py src/hdmf/_version.py include requirements.txt requirements-dev.txt requirements-doc.txt requirements-min.txt include test.py tox.ini graft tests ././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1579654747.9241881 hdmf-1.5.4/PKG-INFO0000644000655200065520000001744100000000000015132 0ustar00circlecicircleci00000000000000Metadata-Version: 2.1 Name: hdmf Version: 1.5.4 Summary: A package for standardizing hierarchical object data Home-page: https://github.com/hdmf-dev/hdmf Author: Andrew Tritt Author-email: ajtritt@lbl.gov License: BSD Description: ======================================== The Hierarchical Data Modeling Framework ======================================== The Hierarchical Data Modeling Framework, or *HDMF*, is a Python package for working with hierarchical data. It provides APIs for specifying data models, reading and writing data to different storage backends, and representing data with Python object. Documentation of HDMF can be found at https://hdmf.readthedocs.io Latest Release ============== .. image:: https://badge.fury.io/py/hdmf.svg :target: https://badge.fury.io/py/hdmf .. image:: https://anaconda.org/conda-forge/hdmf/badges/version.svg :target: https://anaconda.org/conda-forge/hdmf Build Status ============ .. table:: +---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+ | Linux | Windows and macOS | +=====================================================================+==================================================================================================+ | .. image:: https://circleci.com/gh/hdmf-dev/hdmf.svg?style=shield | .. image:: https://dev.azure.com/hdmf-dev/hdmf/_apis/build/status/hdmf-dev.hdmf?branchName=dev | | :target: https://circleci.com/gh/hdmf-dev/hdmf | :target: https://dev.azure.com/hdmf-dev/hdmf/_build/latest?definitionId=1&branchName=dev | +---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+ **Conda** .. image:: https://circleci.com/gh/conda-forge/hdmf-feedstock.svg?style=shield :target: https://circleci.com/gh/conda-forge/hdmf-feedstock Overall Health ============== .. image:: https://codecov.io/gh/hdmf-dev/hdmf/branch/dev/graph/badge.svg :target: https://codecov.io/gh/hdmf-dev/hdmf .. image:: https://requires.io/github/hdmf-dev/hdmf/requirements.svg?branch=dev :target: https://requires.io/github/hdmf-dev/hdmf/requirements/?branch=dev :alt: Requirements Status .. image:: https://readthedocs.org/projects/hdmf/badge/?version=latest :target: https://hdmf.readthedocs.io/en/latest/?badge=latest :alt: Documentation Status Installation ============ See the HDMF documentation for details http://hdmf.readthedocs.io/en/latest/getting_started.html#installation Code of Conduct =============== This project and everyone participating in it is governed by our `code of conduct guidelines <.github/CODE_OF_CONDUCT.md>`_. By participating, you are expected to uphold this code. Contributing ============ For details on how to contribute to HDMF see our `contribution guidelines `_. LICENSE ======= "hdmf" Copyright (c) 2017-2020, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: (1) Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. (2) Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. (3) Neither the name of the University of California, Lawrence Berkeley National Laboratory, U.S. Dept. of Energy nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You are under no obligation whatsoever to provide any bug fixes, patches, or upgrades to the features, functionality or performance of the source code ("Enhancements") to anyone; however, if you choose to make your Enhancements available either publicly, or directly to Lawrence Berkeley National Laboratory, without imposing a separate written license agreement for such Enhancements, then you hereby grant the following license: a non-exclusive, royalty-free perpetual license to install, use, modify, prepare derivative works, incorporate into other computer software, distribute, and sublicense such enhancements or derivative works thereof, in binary and source code form. COPYRIGHT ========= "hdmf" Copyright (c) 2017-2020, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. If you have questions about your rights to use or distribute this software, please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov. NOTICE. This Software was developed under funding from the U.S. Department of Energy and the U.S. Government consequently retains certain rights. As such, the U.S. Government has been granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software to reproduce, distribute copies to the public, prepare derivative works, and perform publicly and display publicly, and to permit other to do so. Keywords: python HDF HDF5 cross-platform open-data data-format open-source open-science reproducible-research Platform: UNKNOWN Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: 3.8 Classifier: License :: OSI Approved :: BSD License Classifier: Development Status :: 2 - Pre-Alpha Classifier: Intended Audience :: Developers Classifier: Intended Audience :: Science/Research Classifier: Operating System :: Microsoft :: Windows Classifier: Operating System :: MacOS Classifier: Operating System :: Unix Classifier: Topic :: Scientific/Engineering :: Medical Science Apps. Description-Content-Type: text/x-rst; charset=UTF-8 ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/README.rst0000644000655200065520000001406400000000000015522 0ustar00circlecicircleci00000000000000======================================== The Hierarchical Data Modeling Framework ======================================== The Hierarchical Data Modeling Framework, or *HDMF*, is a Python package for working with hierarchical data. It provides APIs for specifying data models, reading and writing data to different storage backends, and representing data with Python object. Documentation of HDMF can be found at https://hdmf.readthedocs.io Latest Release ============== .. image:: https://badge.fury.io/py/hdmf.svg :target: https://badge.fury.io/py/hdmf .. image:: https://anaconda.org/conda-forge/hdmf/badges/version.svg :target: https://anaconda.org/conda-forge/hdmf Build Status ============ .. table:: +---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+ | Linux | Windows and macOS | +=====================================================================+==================================================================================================+ | .. image:: https://circleci.com/gh/hdmf-dev/hdmf.svg?style=shield | .. image:: https://dev.azure.com/hdmf-dev/hdmf/_apis/build/status/hdmf-dev.hdmf?branchName=dev | | :target: https://circleci.com/gh/hdmf-dev/hdmf | :target: https://dev.azure.com/hdmf-dev/hdmf/_build/latest?definitionId=1&branchName=dev | +---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+ **Conda** .. image:: https://circleci.com/gh/conda-forge/hdmf-feedstock.svg?style=shield :target: https://circleci.com/gh/conda-forge/hdmf-feedstock Overall Health ============== .. image:: https://codecov.io/gh/hdmf-dev/hdmf/branch/dev/graph/badge.svg :target: https://codecov.io/gh/hdmf-dev/hdmf .. image:: https://requires.io/github/hdmf-dev/hdmf/requirements.svg?branch=dev :target: https://requires.io/github/hdmf-dev/hdmf/requirements/?branch=dev :alt: Requirements Status .. image:: https://readthedocs.org/projects/hdmf/badge/?version=latest :target: https://hdmf.readthedocs.io/en/latest/?badge=latest :alt: Documentation Status Installation ============ See the HDMF documentation for details http://hdmf.readthedocs.io/en/latest/getting_started.html#installation Code of Conduct =============== This project and everyone participating in it is governed by our `code of conduct guidelines <.github/CODE_OF_CONDUCT.md>`_. By participating, you are expected to uphold this code. Contributing ============ For details on how to contribute to HDMF see our `contribution guidelines `_. LICENSE ======= "hdmf" Copyright (c) 2017-2020, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: (1) Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. (2) Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. (3) Neither the name of the University of California, Lawrence Berkeley National Laboratory, U.S. Dept. of Energy nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You are under no obligation whatsoever to provide any bug fixes, patches, or upgrades to the features, functionality or performance of the source code ("Enhancements") to anyone; however, if you choose to make your Enhancements available either publicly, or directly to Lawrence Berkeley National Laboratory, without imposing a separate written license agreement for such Enhancements, then you hereby grant the following license: a non-exclusive, royalty-free perpetual license to install, use, modify, prepare derivative works, incorporate into other computer software, distribute, and sublicense such enhancements or derivative works thereof, in binary and source code form. COPYRIGHT ========= "hdmf" Copyright (c) 2017-2020, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. If you have questions about your rights to use or distribute this software, please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov. NOTICE. This Software was developed under funding from the U.S. Department of Energy and the U.S. Government consequently retains certain rights. As such, the U.S. Government has been granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software to reproduce, distribute copies to the public, prepare derivative works, and perform publicly and display publicly, and to permit other to do so. ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/license.txt0000644000655200065520000000454700000000000016223 0ustar00circlecicircleci00000000000000“hdmf” Copyright (c) 2017-2020, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: (1) Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. (2) Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. (3) Neither the name of the University of California, Lawrence Berkeley National Laboratory, U.S. Dept. of Energy nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You are under no obligation whatsoever to provide any bug fixes, patches, or upgrades to the features, functionality or performance of the source code ("Enhancements") to anyone; however, if you choose to make your Enhancements available either publicly, or directly to Lawrence Berkeley National Laboratory, without imposing a separate written license agreement for such Enhancements, then you hereby grant the following license: a non-exclusive, royalty-free perpetual license to install, use, modify, prepare derivative works, incorporate into other computer software, distribute, and sublicense such enhancements or derivative works thereof, in binary and source code form. ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/requirements-dev.txt0000644000655200065520000000012100000000000020060 0ustar00circlecicircleci00000000000000codecov==2.0.15 coverage==5.0.2 flake8==3.7.9 python-dateutil==2.8.1 tox==3.14.3 ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/requirements-doc.txt0000644000655200065520000000004700000000000020056 0ustar00circlecicircleci00000000000000sphinx sphinx_rtd_theme sphinx-gallery ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/requirements-min.txt0000644000655200065520000000032400000000000020072 0ustar00circlecicircleci00000000000000# these minimum requirements specify '==' for testing; setup.py replaces '==' with '>=' h5py==2.9 # support for setting attrs to lists of utf-8 added in 2.9 numpy==1.16 scipy==1.1 pandas==0.23 ruamel.yaml==0.15 ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/requirements.txt0000644000655200065520000000011300000000000017305 0ustar00circlecicircleci00000000000000h5py==2.10.0 numpy==1.18.1 scipy==1.4.1 pandas==0.25.3 ruamel.yaml==0.16.5 ././@PaxHeader0000000000000000000000000000003300000000000011451 xustar000000000000000027 mtime=1579654747.928188 hdmf-1.5.4/setup.cfg0000644000655200065520000000115300000000000015647 0ustar00circlecicircleci00000000000000[bdist_wheel] universal = 1 [versioneer] vcs = git versionfile_source = src/hdmf/_version.py versionfile_build = hdmf/_version.py style = pep440-pre tag_prefix = *.*.* [flake8] max-line-length = 120 max-complexity = 17 exclude = .git, .tox, __pycache__, build/, dist/, docs/source/conf.py versioneer.py per-file-ignores = src/hdmf/__init__.py:F401 src/hdmf/backends/__init__.py:F401 src/hdmf/backends/hdf5/__init__.py:F401 src/hdmf/build/__init__.py:F401 src/hdmf/spec/__init__.py:F401 src/hdmf/validate/__init__.py:F401 [metadata] description-file = README.rst [egg_info] tag_build = tag_date = 0 ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/setup.py0000755000655200065520000000411700000000000015546 0ustar00circlecicircleci00000000000000# -*- coding: utf-8 -*- from setuptools import setup, find_packages import versioneer with open('README.rst', 'r') as fp: readme = fp.read() pkgs = find_packages('src', exclude=['data']) print('found these packages:', pkgs) schema_dir = 'common/hdmf-common-schema/common' with open('requirements-min.txt', 'r') as fp: # replace == with >= and remove trailing comments and spaces reqs = [x.replace('==', '>=').split('#')[0].strip() for x in fp] reqs = [x for x in reqs if x] # remove empty strings print(reqs) setup_args = { 'name': 'hdmf', 'version': versioneer.get_version(), 'cmdclass': versioneer.get_cmdclass(), 'description': 'A package for standardizing hierarchical object data', 'long_description': readme, 'long_description_content_type': 'text/x-rst; charset=UTF-8', 'author': 'Andrew Tritt', 'author_email': 'ajtritt@lbl.gov', 'url': 'https://github.com/hdmf-dev/hdmf', 'license': "BSD", 'install_requires': reqs, 'packages': pkgs, 'package_dir': {'': 'src'}, 'package_data': {'hdmf': ["%s/*.yaml" % schema_dir, "%s/*.json" % schema_dir]}, 'classifiers': [ "Programming Language :: Python", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "License :: OSI Approved :: BSD License", "Development Status :: 2 - Pre-Alpha", "Intended Audience :: Developers", "Intended Audience :: Science/Research", "Operating System :: Microsoft :: Windows", "Operating System :: MacOS", "Operating System :: Unix", "Topic :: Scientific/Engineering :: Medical Science Apps." ], 'keywords': 'python ' 'HDF ' 'HDF5 ' 'cross-platform ' 'open-data ' 'data-format ' 'open-source ' 'open-science ' 'reproducible-research ', 'zip_safe': False } if __name__ == '__main__': setup(**setup_args) ././@PaxHeader0000000000000000000000000000003300000000000011451 xustar000000000000000027 mtime=1579654747.908188 hdmf-1.5.4/src/0000755000655200065520000000000000000000000014615 5ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000003300000000000011451 xustar000000000000000027 mtime=1579654747.928188 hdmf-1.5.4/src/hdmf/0000755000655200065520000000000000000000000015533 5ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/__init__.py0000644000655200065520000000127000000000000017644 0ustar00circlecicircleci00000000000000from . import query # noqa: F401 from .container import Container, Data, DataRegion from .utils import docval, getargs from .region import ListSlicer from .backends.hdf5.h5_utils import H5RegionSlicer, H5Dataset @docval({'name': 'dataset', 'type': None, 'doc': 'the HDF5 dataset to slice'}, {'name': 'region', 'type': None, 'doc': 'the region reference to use to slice'}, is_method=False) def get_region_slicer(**kwargs): dataset, region = getargs('dataset', 'region', kwargs) if isinstance(dataset, (list, tuple, Data)): return ListSlicer(dataset, region) elif isinstance(dataset, H5Dataset): return H5RegionSlicer(dataset, region) return None ././@PaxHeader0000000000000000000000000000003300000000000011451 xustar000000000000000027 mtime=1579654747.928188 hdmf-1.5.4/src/hdmf/_version.py0000644000655200065520000000076100000000000017735 0ustar00circlecicircleci00000000000000 # This file was generated by 'versioneer.py' (0.18) from # revision-control system data, or from the parent directory name of an # unpacked source archive. Distribution tarballs contain a pre-generated copy # of this file. import json version_json = ''' { "date": "2020-01-21T19:23:33-0500", "dirty": false, "error": null, "full-revisionid": "594d45ad3094b9f6545a208943b02c22c580c91d", "version": "1.5.4" } ''' # END VERSION_JSON def get_versions(): return json.loads(version_json) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/array.py0000644000655200065520000001243300000000000017226 0ustar00circlecicircleci00000000000000import numpy as np from abc import abstractmethod, ABCMeta class Array: def __init__(self, data): self.__data = data if hasattr(data, 'dtype'): self.dtype = data.dtype else: tmp = data while isinstance(tmp, (list, tuple)): tmp = tmp[0] self.dtype = type(tmp) @property def data(self): return self.__data def __len__(self): return len(self.__data) def get_data(self): return self.__data def __getidx__(self, arg): return self.__data[arg] def __sliceiter(self, arg): return (x for x in range(*arg.indices(len(self)))) def __getitem__(self, arg): if isinstance(arg, list): idx = list() for i in arg: if isinstance(i, slice): idx.extend(x for x in self.__sliceiter(i)) else: idx.append(i) return np.fromiter((self.__getidx__(x) for x in idx), dtype=self.dtype) elif isinstance(arg, slice): return np.fromiter((self.__getidx__(x) for x in self.__sliceiter(arg)), dtype=self.dtype) elif isinstance(arg, tuple): return (self.__getidx__(arg[0]), self.__getidx__(arg[1])) else: return self.__getidx__(arg) class AbstractSortedArray(Array, metaclass=ABCMeta): ''' An abstract class for representing sorted array ''' @abstractmethod def find_point(self, val): pass def get_data(self): return self def __lower(self, other): ins = self.find_point(other) return ins def __upper(self, other): ins = self.__lower(other) while self[ins] == other: ins += 1 return ins def __lt__(self, other): ins = self.__lower(other) return slice(0, ins) def __le__(self, other): ins = self.__upper(other) return slice(0, ins) def __gt__(self, other): ins = self.__upper(other) return slice(ins, len(self)) def __ge__(self, other): ins = self.__lower(other) return slice(ins, len(self)) @staticmethod def __sort(a): if isinstance(a, tuple): return a[0] else: return a def __eq__(self, other): if isinstance(other, list): ret = list() for i in other: eq = self == i ret.append(eq) ret = sorted(ret, key=self.__sort) tmp = list() for i in range(1, len(ret)): a, b = ret[i-1], ret[i] if isinstance(a, tuple): if isinstance(b, tuple): if a[1] >= b[0]: b[0] = a[0] else: tmp.append(slice(*a)) else: if b > a[1]: tmp.append(slice(*a)) elif b == a[1]: a[1] == b+1 else: ret[i] = a else: if isinstance(b, tuple): if a < b[0]: tmp.append(a) else: if b - a == 1: ret[i] = (a, b) else: tmp.append(a) if isinstance(ret[-1], tuple): tmp.append(slice(*ret[-1])) else: tmp.append(ret[-1]) ret = tmp return ret elif isinstance(other, tuple): ge = self >= other[0] ge = ge.start lt = self < other[1] lt = lt.stop if ge == lt: return ge else: return slice(ge, lt) else: lower = self.__lower(other) upper = self.__upper(other) d = upper - lower if d == 1: return lower elif d == 0: return None else: return slice(lower, upper) def __ne__(self, other): eq = self == other if isinstance(eq, tuple): return [slice(0, eq[0]), slice(eq[1], len(self))] else: return [slice(0, eq), slice(eq+1, len(self))] class SortedArray(AbstractSortedArray): ''' A class for wrapping sorted arrays. This class overrides <,>,<=,>=,==, and != to leverage the sorted content for efficiency. ''' def __init__(self, array): super().__init__(array) def find_point(self, val): return np.searchsorted(self.data, val) class LinSpace(SortedArray): def __init__(self, start, stop, step): self.start = start self.stop = stop self.step = step self.dtype = float if any(isinstance(s, float) for s in (start, stop, step)) else int self.__len = int((stop - start)/step) def __len__(self): return self.__len def find_point(self, val): nsteps = (val-self.start)/self.step fl = int(nsteps) if fl == nsteps: return int(fl) else: return int(fl+1) def __getidx__(self, arg): return self.start + self.step*arg ././@PaxHeader0000000000000000000000000000003300000000000011451 xustar000000000000000027 mtime=1579654747.916188 hdmf-1.5.4/src/hdmf/backends/0000755000655200065520000000000000000000000017305 5ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/backends/__init__.py0000644000655200065520000000002300000000000021411 0ustar00circlecicircleci00000000000000from . import hdf5 ././@PaxHeader0000000000000000000000000000003300000000000011451 xustar000000000000000027 mtime=1579654747.916188 hdmf-1.5.4/src/hdmf/backends/hdf5/0000755000655200065520000000000000000000000020133 5ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/backends/hdf5/__init__.py0000644000655200065520000000021000000000000022235 0ustar00circlecicircleci00000000000000from . import h5_utils, h5tools from .h5tools import HDF5IO, H5SpecWriter, H5SpecReader from .h5_utils import H5RegionSlicer, H5DataIO ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/backends/hdf5/h5_utils.py0000644000655200065520000004651700000000000022256 0ustar00circlecicircleci00000000000000from copy import copy from collections.abc import Iterable from abc import ABCMeta, abstractmethod from h5py import Group, Dataset, RegionReference, Reference, special_dtype from h5py import filters as h5py_filters import json import numpy as np import warnings import os from ...query import HDMFDataset, ReferenceResolver, ContainerResolver, BuilderResolver from ...array import Array from ...utils import docval, getargs, popargs, call_docval_func, get_docval from ...data_utils import DataIO, AbstractDataChunkIterator from ...region import RegionSlicer from ...spec import SpecWriter, SpecReader class H5Dataset(HDMFDataset): @docval({'name': 'dataset', 'type': (Dataset, Array), 'doc': 'the HDF5 file lazily evaluate'}, {'name': 'io', 'type': 'HDF5IO', 'doc': 'the IO object that was used to read the underlying dataset'}) def __init__(self, **kwargs): self.__io = popargs('io', kwargs) call_docval_func(super().__init__, kwargs) @property def io(self): return self.__io @property def regionref(self): return self.dataset.regionref @property def ref(self): return self.dataset.ref @property def shape(self): return self.dataset.shape class DatasetOfReferences(H5Dataset, ReferenceResolver, metaclass=ABCMeta): """ An extension of the base ReferenceResolver class to add more abstract methods for subclasses that will read HDF5 references """ @abstractmethod def get_object(self, h5obj): """ A class that maps an HDF5 object to a Builder or Container """ pass def invert(self): """ Return an object that defers reference resolution but in the opposite direction. """ if not hasattr(self, '__inverted'): cls = self.get_inverse_class() docval = get_docval(cls.__init__) kwargs = dict() for arg in docval: kwargs[arg['name']] = getattr(self, arg['name']) self.__inverted = cls(**kwargs) return self.__inverted class BuilderResolverMixin(BuilderResolver): """ A mixin for adding to HDF5 reference-resolving types the get_object method that returns Builders """ def get_object(self, h5obj): """ A class that maps an HDF5 object to a Builder """ return self.io.get_builder(h5obj) class ContainerResolverMixin(ContainerResolver): """ A mixin for adding to HDF5 reference-resolving types the get_object method that returns Containers """ def get_object(self, h5obj): """ A class that maps an HDF5 object to a Container """ return self.io.get_container(h5obj) class AbstractH5TableDataset(DatasetOfReferences): @docval({'name': 'dataset', 'type': (Dataset, Array), 'doc': 'the HDF5 file lazily evaluate'}, {'name': 'io', 'type': 'HDF5IO', 'doc': 'the IO object that was used to read the underlying dataset'}, {'name': 'types', 'type': (list, tuple), 'doc': 'the IO object that was used to read the underlying dataset'}) def __init__(self, **kwargs): types = popargs('types', kwargs) call_docval_func(super().__init__, kwargs) self.__refgetters = dict() for i, t in enumerate(types): if t is RegionReference: self.__refgetters[i] = self.__get_regref elif t is Reference: self.__refgetters[i] = self.__get_ref self.__types = types tmp = list() for i in range(len(self.dataset.dtype)): sub = self.dataset.dtype[i] if sub.metadata: if 'vlen' in sub.metadata: t = sub.metadata['vlen'] if t is str: tmp.append('utf') elif t is bytes: tmp.append('ascii') elif 'ref' in sub.metadata: t = sub.metadata['ref'] if t is Reference: tmp.append('object') elif t is RegionReference: tmp.append('region') else: tmp.append(sub.type.__name__) self.__dtype = tmp @property def types(self): return self.__types @property def dtype(self): return self.__dtype def __getitem__(self, arg): rows = copy(super().__getitem__(arg)) if np.issubdtype(type(arg), np.integer): self.__swap_refs(rows) else: for row in rows: self.__swap_refs(row) return rows def __swap_refs(self, row): for i in self.__refgetters: getref = self.__refgetters[i] row[i] = getref(row[i]) def __get_ref(self, ref): return self.get_object(self.dataset.file[ref]) def __get_regref(self, ref): obj = self.__get_ref(ref) return obj[ref] def resolve(self, manager): return self[0:len(self)] class AbstractH5ReferenceDataset(DatasetOfReferences): def __getitem__(self, arg): ref = super().__getitem__(arg) if isinstance(ref, np.ndarray): return [self.get_object(self.dataset.file[x]) for x in ref] else: return self.get_object(self.dataset.file[ref]) @property def dtype(self): return 'object' class AbstractH5RegionDataset(AbstractH5ReferenceDataset): def __getitem__(self, arg): obj = super().__getitem__(arg) ref = self.dataset[arg] return obj[ref] @property def dtype(self): return 'region' class ContainerH5TableDataset(ContainerResolverMixin, AbstractH5TableDataset): """ A reference-resolving dataset for resolving references inside tables (i.e. compound dtypes) that returns resolved references as Containers """ @classmethod def get_inverse_class(cls): return BuilderH5TableDataset class BuilderH5TableDataset(BuilderResolverMixin, AbstractH5TableDataset): """ A reference-resolving dataset for resolving references inside tables (i.e. compound dtypes) that returns resolved references as Builders """ @classmethod def get_inverse_class(cls): return ContainerH5TableDataset class ContainerH5ReferenceDataset(ContainerResolverMixin, AbstractH5ReferenceDataset): """ A reference-resolving dataset for resolving object references that returns resolved references as Containers """ @classmethod def get_inverse_class(cls): return BuilderH5ReferenceDataset class BuilderH5ReferenceDataset(BuilderResolverMixin, AbstractH5ReferenceDataset): """ A reference-resolving dataset for resolving object references that returns resolved references as Builders """ @classmethod def get_inverse_class(cls): return ContainerH5ReferenceDataset class ContainerH5RegionDataset(ContainerResolverMixin, AbstractH5RegionDataset): """ A reference-resolving dataset for resolving region references that returns resolved references as Containers """ @classmethod def get_inverse_class(cls): return BuilderH5RegionDataset class BuilderH5RegionDataset(BuilderResolverMixin, AbstractH5RegionDataset): """ A reference-resolving dataset for resolving region references that returns resolved references as Builders """ @classmethod def get_inverse_class(cls): return ContainerH5RegionDataset class H5SpecWriter(SpecWriter): __str_type = special_dtype(vlen=str) @docval({'name': 'group', 'type': Group, 'doc': 'the HDF5 file to write specs to'}) def __init__(self, **kwargs): self.__group = getargs('group', kwargs) @staticmethod def stringify(spec): ''' Converts a spec into a JSON string to write to a dataset ''' return json.dumps(spec, separators=(',', ':')) def __write(self, d, name): data = self.stringify(d) # create spec group if it does not exist. otherwise, do not overwrite existing spec dset = self.__group.require_dataset(name, shape=tuple(), data=data, dtype=self.__str_type) return dset def write_spec(self, spec, path): return self.__write(spec, path) def write_namespace(self, namespace, path): return self.__write({'namespaces': [namespace]}, path) class H5SpecReader(SpecReader): @docval({'name': 'group', 'type': Group, 'doc': 'the HDF5 file to read specs from'}) def __init__(self, **kwargs): self.__group = getargs('group', kwargs) super_kwargs = {'source': "%s:%s" % (os.path.abspath(self.__group.file.name), self.__group.name)} call_docval_func(super().__init__, super_kwargs) self.__cache = None def __read(self, path): s = self.__group[path][()] if isinstance(s, np.ndarray) \ and s.shape == (1,): s = s[0] if isinstance(s, bytes): s = s.decode('UTF-8') d = json.loads(s) return d def read_spec(self, spec_path): return self.__read(spec_path) def read_namespace(self, ns_path): if self.__cache is None: self.__cache = self.__read(ns_path) ret = self.__cache['namespaces'] return ret class H5RegionSlicer(RegionSlicer): @docval({'name': 'dataset', 'type': (Dataset, H5Dataset), 'doc': 'the HDF5 dataset to slice'}, {'name': 'region', 'type': RegionReference, 'doc': 'the region reference to use to slice'}) def __init__(self, **kwargs): self.__dataset = getargs('dataset', kwargs) self.__regref = getargs('region', kwargs) self.__len = self.__dataset.regionref.selection(self.__regref)[0] self.__region = None def __read_region(self): if self.__region is None: self.__region = self.__dataset[self.__regref] def __getitem__(self, idx): self.__read_region() return self.__region[idx] def __len__(self): return self.__len class H5DataIO(DataIO): """ Wrap data arrays for write via HDF5IO to customize I/O behavior, such as compression and chunking for data arrays. """ @docval({'name': 'data', 'type': (np.ndarray, list, tuple, Dataset, Iterable), 'doc': 'the data to be written. NOTE: If an h5py.Dataset is used, all other settings but link_data' + ' will be ignored as the dataset will either be linked to or copied as is in H5DataIO.', 'default': None}, {'name': 'maxshape', 'type': tuple, 'doc': 'Dataset will be resizable up to this shape (Tuple). Automatically enables chunking.' + 'Use None for the axes you want to be unlimited.', 'default': None}, {'name': 'chunks', 'type': (bool, tuple), 'doc': 'Chunk shape or True to enable auto-chunking', 'default': None}, {'name': 'compression', 'type': (str, bool, int), 'doc': 'Compression strategy. If a bool is given, then gzip compression will be used by default.' + 'http://docs.h5py.org/en/latest/high/dataset.html#dataset-compression', 'default': None}, {'name': 'compression_opts', 'type': (int, tuple), 'doc': 'Parameter for compression filter', 'default': None}, {'name': 'fillvalue', 'type': None, 'doc': 'Value to be returned when reading uninitialized parts of the dataset', 'default': None}, {'name': 'shuffle', 'type': bool, 'doc': 'Enable shuffle I/O filter. http://docs.h5py.org/en/latest/high/dataset.html#dataset-shuffle', 'default': None}, {'name': 'fletcher32', 'type': bool, 'doc': 'Enable fletcher32 checksum. http://docs.h5py.org/en/latest/high/dataset.html#dataset-fletcher32', 'default': None}, {'name': 'link_data', 'type': bool, 'doc': 'If data is an h5py.Dataset should it be linked to or copied. NOTE: This parameter is only ' + 'allowed if data is an h5py.Dataset', 'default': False}, {'name': 'allow_plugin_filters', 'type': bool, 'doc': 'Enable passing dynamically loaded filters as compression parameter', 'default': False} ) def __init__(self, **kwargs): # Get the list of I/O options that user has passed in ioarg_names = [name for name in kwargs.keys() if name not in['data', 'link_data', 'allow_plugin_filters']] # Remove the ioargs from kwargs ioarg_values = [popargs(argname, kwargs) for argname in ioarg_names] # Consume link_data parameter self.__link_data = popargs('link_data', kwargs) # Consume allow_plugin_filters parameter self.__allow_plugin_filters = popargs('allow_plugin_filters', kwargs) # Check for possible collision with other parameters if not isinstance(getargs('data', kwargs), Dataset) and self.__link_data: self.__link_data = False warnings.warn('link_data parameter in H5DataIO will be ignored') # Call the super constructor and consume the data parameter call_docval_func(super().__init__, kwargs) # Construct the dict with the io args, ignoring all options that were set to None self.__iosettings = {k: v for k, v in zip(ioarg_names, ioarg_values) if v is not None} # Set io_properties for DataChunkIterators if isinstance(self.data, AbstractDataChunkIterator): # Define the chunking options if the user has not set them explicitly. if 'chunks' not in self.__iosettings and self.data.recommended_chunk_shape() is not None: self.__iosettings['chunks'] = self.data.recommended_chunk_shape() # Define the maxshape of the data if not provided by the user if 'maxshape' not in self.__iosettings: self.__iosettings['maxshape'] = self.data.maxshape # Make default settings when compression set to bool (True/False) if isinstance(self.__iosettings.get('compression', None), bool): if self.__iosettings['compression']: self.__iosettings['compression'] = 'gzip' else: self.__iosettings.pop('compression', None) if 'compression_opts' in self.__iosettings: warnings.warn('Compression disabled by compression=False setting. ' + 'compression_opts parameter will, therefore, be ignored.') self.__iosettings.pop('compression_opts', None) # Validate the compression options used self._check_compression_options() # Confirm that the compressor is supported by h5py if not self.filter_available(self.__iosettings.get('compression', None), self.__allow_plugin_filters): raise ValueError("%s compression not support by this version of h5py." % str(self.__iosettings['compression'])) # Check possible parameter collisions if isinstance(self.data, Dataset): for k in self.__iosettings.keys(): warnings.warn("%s in H5DataIO will be ignored with H5DataIO.data being an HDF5 dataset" % k) def get_io_params(self): """ Returns a dict with the I/O parameters specifiedin in this DataIO. """ ret = dict(self.__iosettings) ret['link_data'] = self.__link_data return ret def _check_compression_options(self): """ Internal helper function used to check if compression options are compliant with the compression filter used. :raises ValueError: If incompatible options are detected """ if 'compression' in self.__iosettings: if 'compression_opts' in self.__iosettings: if self.__iosettings['compression'] == 'gzip': if self.__iosettings['compression_opts'] not in range(10): raise ValueError("GZIP compression_opts setting must be an integer from 0-9, " "not " + str(self.__iosettings['compression_opts'])) elif self.__iosettings['compression'] == 'lzf': if self.__iosettings['compression_opts'] is not None: raise ValueError("LZF compression filter accepts no compression_opts") elif self.__iosettings['compression'] == 'szip': szip_opts_error = False # Check that we have a tuple szip_opts_error |= not isinstance(self.__iosettings['compression_opts'], tuple) # Check that we have a tuple of the right length and correct settings if not szip_opts_error: try: szmethod, szpix = self.__iosettings['compression_opts'] szip_opts_error |= (szmethod not in ('ec', 'nn')) szip_opts_error |= (not (0 < szpix <= 32 and szpix % 2 == 0)) except ValueError: # ValueError is raised if tuple does not have the right length to unpack szip_opts_error = True if szip_opts_error: raise ValueError("SZIP compression filter compression_opts" " must be a 2-tuple ('ec'|'nn', even integer 0-32).") # Warn if compressor other than gzip is being used if self.__iosettings['compression'] not in ['gzip', h5py_filters.h5z.FILTER_DEFLATE]: warnings.warn(str(self.__iosettings['compression']) + " compression may not be available " "on all installations of HDF5. Use of gzip is recommended to ensure portability of " "the generated HDF5 files.") @staticmethod def filter_available(filter, allow_plugin_filters): """ Check if a given I/O filter is available :param filter: String with the name of the filter, e.g., gzip, szip etc. int with the registered filter ID, e.g. 307 :type filter: String, int :param allow_plugin_filters: bool indicating whether the given filter can be dynamically loaded :return: bool indicating wether the given filter is available """ if filter is not None: if filter in h5py_filters.encode: return True elif allow_plugin_filters is True: if type(filter) == int: if h5py_filters.h5z.filter_avail(filter): filter_info = h5py_filters.h5z.get_filter_info(filter) if filter_info == (h5py_filters.h5z.FILTER_CONFIG_DECODE_ENABLED + h5py_filters.h5z.FILTER_CONFIG_ENCODE_ENABLED): return True return False else: return True @property def link_data(self): return self.__link_data @property def io_settings(self): return self.__iosettings @property def valid(self): if isinstance(self.data, Dataset) and not self.data.id.valid: return False return super().valid ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/backends/hdf5/h5tools.py0000644000655200065520000014072200000000000022110 0ustar00circlecicircleci00000000000000from collections import deque import numpy as np import os.path from functools import partial from h5py import File, Group, Dataset, special_dtype, SoftLink, ExternalLink, Reference, RegionReference, check_dtype import warnings from ...container import Container from ...utils import docval, getargs, popargs, call_docval_func, get_data_shape from ...data_utils import AbstractDataChunkIterator from ...build import Builder, GroupBuilder, DatasetBuilder, LinkBuilder, BuildManager,\ RegionBuilder, ReferenceBuilder, TypeMap, ObjectMapper from ...spec import RefSpec, DtypeSpec, NamespaceCatalog, GroupSpec from ...spec import NamespaceBuilder from .h5_utils import BuilderH5ReferenceDataset, BuilderH5RegionDataset, BuilderH5TableDataset,\ H5DataIO, H5SpecReader, H5SpecWriter from ..io import HDMFIO, UnsupportedOperation from ..warnings import BrokenLinkWarning ROOT_NAME = 'root' SPEC_LOC_ATTR = '.specloc' H5_TEXT = special_dtype(vlen=str) H5_BINARY = special_dtype(vlen=bytes) H5_REF = special_dtype(ref=Reference) H5_REGREF = special_dtype(ref=RegionReference) class HDF5IO(HDMFIO): @docval({'name': 'path', 'type': str, 'doc': 'the path to the HDF5 file'}, {'name': 'manager', 'type': (TypeMap, BuildManager), 'doc': 'the BuildManager or a TypeMap to construct a BuildManager to use for I/O', 'default': None}, {'name': 'mode', 'type': str, 'doc': 'the mode to open the HDF5 file with, one of ("w", "r", "r+", "a", "w-", "x")'}, {'name': 'comm', 'type': 'Intracomm', 'doc': 'the MPI communicator to use for parallel I/O', 'default': None}, {'name': 'file', 'type': File, 'doc': 'a pre-existing h5py.File object', 'default': None}) def __init__(self, **kwargs): '''Open an HDF5 file for IO For `mode`, see `h5py.File _`. ''' path, manager, mode, comm, file_obj = popargs('path', 'manager', 'mode', 'comm', 'file', kwargs) if file_obj is not None and os.path.abspath(file_obj.filename) != os.path.abspath(path): msg = 'You argued %s as this object\'s path, ' % path msg += 'but supplied a file with filename: %s' % file_obj.filename raise ValueError(msg) if file_obj is None and not os.path.exists(path) and (mode == 'r' or mode == 'r+'): msg = "Unable to open file %s in '%s' mode. File does not exist." % (path, mode) raise UnsupportedOperation(msg) if file_obj is None and os.path.exists(path) and (mode == 'w-' or mode == 'x'): msg = "Unable to open file %s in '%s' mode. File already exists." % (path, mode) raise UnsupportedOperation(msg) if manager is None: manager = BuildManager(TypeMap(NamespaceCatalog())) elif isinstance(manager, TypeMap): manager = BuildManager(manager) self.__comm = comm self.__mode = mode self.__path = path self.__file = file_obj super().__init__(manager, source=path) self.__built = dict() # keep track of each builder for each dataset/group/link for each file self.__read = dict() # keep track of which files have been read. Key is the filename value is the builder self.__ref_queue = deque() # a queue of the references that need to be added self.__dci_queue = deque() # a queue of DataChunkIterators that need to be exhausted ObjectMapper.no_convert(Dataset) @property def comm(self): return self.__comm @property def _file(self): return self.__file @classmethod @docval({'name': 'namespace_catalog', 'type': (NamespaceCatalog, TypeMap), 'doc': 'the NamespaceCatalog or TypeMap to load namespaces into'}, {'name': 'path', 'type': str, 'doc': 'the path to the HDF5 file'}, {'name': 'namespaces', 'type': list, 'doc': 'the namespaces to load', 'default': None}, returns="dict with the loaded namespaces", rtype=dict) def load_namespaces(cls, namespace_catalog, path, namespaces=None): ''' Load cached namespaces from a file. ''' d = {} with File(path, 'r') as f: if SPEC_LOC_ATTR not in f.attrs: msg = "No cached namespaces found in %s" % path warnings.warn(msg) return d spec_group = f[f.attrs[SPEC_LOC_ATTR]] if namespaces is None: namespaces = list(spec_group.keys()) readers = dict() deps = dict() for ns in namespaces: ns_group = spec_group[ns] latest_version = list(ns_group.keys())[-1] ns_group = ns_group[latest_version] reader = H5SpecReader(ns_group) readers[ns] = reader for spec_ns in reader.read_namespace('namespace'): deps[ns] = list() for s in spec_ns['schema']: dep = s.get('namespace') if dep is not None: deps[ns].append(dep) order = cls._order_deps(deps) for ns in order: reader = readers[ns] d.update(namespace_catalog.load_namespaces('namespace', reader=reader)) return d @classmethod def _order_deps(cls, deps): """ Order namespaces according to dependency for loading into a NamespaceCatalog Args: deps (dict): a dictionary that maps a namespace name to a list of name of the namespaces on which the the namespace is directly dependent Example: {'a': ['b', 'c'], 'b': ['d'], c: ['d'], 'd': []} Expected output: ['d', 'b', 'c', 'a'] """ order = list() keys = list(deps.keys()) deps = dict(deps) for k in keys: if k in deps: cls.__order_deps_aux(order, deps, k) return order @classmethod def __order_deps_aux(cls, order, deps, key): """ A recursive helper function for _order_deps """ if key not in deps: return subdeps = deps.pop(key) for subk in subdeps: cls.__order_deps_aux(order, deps, subk) order.append(key) @classmethod def __convert_namespace(cls, ns_catalog, namespace): ns = ns_catalog.get_namespace(namespace) builder = NamespaceBuilder(ns.doc, ns.name, full_name=ns.full_name, version=ns.version, author=ns.author, contact=ns.contact) for elem in ns.schema: if 'namespace' in elem: inc_ns = elem['namespace'] builder.include_namespace(inc_ns) else: source = elem['source'] for dt in ns_catalog.get_types(source): spec = ns_catalog.get_spec(namespace, dt) if spec.parent is not None: continue h5_source = cls.__get_name(source) spec = cls.__copy_spec(spec) builder.add_spec(h5_source, spec) return builder @classmethod def __get_name(cls, path): return os.path.splitext(path)[0] @classmethod def __copy_spec(cls, spec): kwargs = dict() kwargs['attributes'] = cls.__get_new_specs(spec.attributes, spec) to_copy = ['doc', 'name', 'default_name', 'linkable', 'quantity', spec.inc_key(), spec.def_key()] if isinstance(spec, GroupSpec): kwargs['datasets'] = cls.__get_new_specs(spec.datasets, spec) kwargs['groups'] = cls.__get_new_specs(spec.groups, spec) kwargs['links'] = cls.__get_new_specs(spec.links, spec) else: to_copy.append('dtype') to_copy.append('shape') to_copy.append('dims') for key in to_copy: val = getattr(spec, key) if val is not None: kwargs[key] = val ret = spec.build_spec(kwargs) return ret @classmethod def __get_new_specs(cls, subspecs, spec): ret = list() for subspec in subspecs: if not spec.is_inherited_spec(subspec) or spec.is_overridden_spec(subspec): ret.append(subspec) return ret @classmethod @docval({'name': 'source_filename', 'type': str, 'doc': 'the path to the HDF5 file to copy'}, {'name': 'dest_filename', 'type': str, 'doc': 'the name of the destination file'}, {'name': 'expand_external', 'type': bool, 'doc': 'expand external links into new objects', 'default': True}, {'name': 'expand_refs', 'type': bool, 'doc': 'copy objects which are pointed to by reference', 'default': False}, {'name': 'expand_soft', 'type': bool, 'doc': 'expand soft links into new objects', 'default': False} ) def copy_file(self, **kwargs): """ Convenience function to copy an HDF5 file while allowing external links to be resolved. NOTE: The source file will be opened in 'r' mode and the destination file will be opened in 'w' mode using h5py. To avoid possible collisions, care should be taken that, e.g., the source file is not opened already when calling this function. """ source_filename, dest_filename, expand_external, expand_refs, expand_soft = getargs('source_filename', 'dest_filename', 'expand_external', 'expand_refs', 'expand_soft', kwargs) source_file = File(source_filename, 'r') dest_file = File(dest_filename, 'w') for objname in source_file["/"].keys(): source_file.copy(source=objname, dest=dest_file, name=objname, expand_external=expand_external, expand_refs=expand_refs, expand_soft=expand_soft, shallow=False, without_attrs=False, ) for objname in source_file['/'].attrs: dest_file['/'].attrs[objname] = source_file['/'].attrs[objname] source_file.close() dest_file.close() @docval({'name': 'container', 'type': Container, 'doc': 'the Container object to write'}, {'name': 'cache_spec', 'type': bool, 'doc': 'cache specification to file', 'default': True}, {'name': 'link_data', 'type': bool, 'doc': 'If not specified otherwise link (True) or copy (False) HDF5 Datasets', 'default': True}, {'name': 'exhaust_dci', 'type': bool, 'doc': 'exhaust DataChunkIterators one at a time. If False, exhaust them concurrently', 'default': True}) def write(self, **kwargs): if self.__mode == 'r': raise UnsupportedOperation(("Cannot write to file %s in mode '%s'. " "Please use mode 'r+', 'w', 'w-', 'x', or 'a'") % (self.__path, self.__mode)) cache_spec = popargs('cache_spec', kwargs) call_docval_func(super().write, kwargs) if cache_spec: ref = self.__file.attrs.get(SPEC_LOC_ATTR) spec_group = None if ref is not None: spec_group = self.__file[ref] else: path = 'specifications' # do something to figure out where the specifications should go spec_group = self.__file.require_group(path) self.__file.attrs[SPEC_LOC_ATTR] = spec_group.ref ns_catalog = self.manager.namespace_catalog for ns_name in ns_catalog.namespaces: ns_builder = self.__convert_namespace(ns_catalog, ns_name) namespace = ns_catalog.get_namespace(ns_name) if namespace.version is None: group_name = '%s/unversioned' % ns_name else: group_name = '%s/%s' % (ns_name, namespace.version) ns_group = spec_group.require_group(group_name) writer = H5SpecWriter(ns_group) ns_builder.export('namespace', writer=writer) def read(self, **kwargs): if self.__mode == 'w' or self.__mode == 'w-' or self.__mode == 'x': raise UnsupportedOperation("Cannot read from file %s in mode '%s'. Please use mode 'r', 'r+', or 'a'." % (self.__path, self.__mode)) try: return call_docval_func(super().read, kwargs) except UnsupportedOperation as e: if str(e) == 'Cannot build data. There are no values.': raise UnsupportedOperation("Cannot read data from file %s in mode '%s'. There are no values." % (self.__path, self.__mode)) @docval(returns='a GroupBuilder representing the data object', rtype='GroupBuilder') def read_builder(self): f_builder = self.__read.get(self.__file) # ignore cached specs when reading builder ignore = set() specloc = self.__file.attrs.get(SPEC_LOC_ATTR) if specloc is not None: ignore.add(self.__file[specloc].name) if f_builder is None: f_builder = self.__read_group(self.__file, ROOT_NAME, ignore=ignore) self.__read[self.__file] = f_builder return f_builder def __set_built(self, fpath, id, builder): """ Update self.__built to cache the given builder for the given file and id. :param fpath: Path to the HDF5 file containing the object :type fpath: str :param id: ID of the HDF5 object in the path :type id: h5py GroupID object :param builder: The builder to be cached """ self.__built.setdefault(fpath, dict()).setdefault(id, builder) def __get_built(self, fpath, id): """ Look up a builder for the given file and id in self.__built cache :param fpath: Path to the HDF5 file containing the object :type fpath: str :param id: ID of the HDF5 object in the path :type id: h5py GroupID object :return: Builder in the self.__built cache or None """ fdict = self.__built.get(fpath) if fdict: return fdict.get(id) else: return None @docval({'name': 'h5obj', 'type': (Dataset, Group), 'doc': 'the HDF5 object to the corresponding Builder object for'}) def get_builder(self, **kwargs): """ Get the builder for the corresponding h5py Group or Dataset :raises ValueError: When no builder has been constructed yet for the given h5py object """ h5obj = getargs('h5obj', kwargs) fpath = h5obj.file.filename builder = self.__get_built(fpath, h5obj.id) if builder is None: msg = '%s:%s has not been built' % (fpath, h5obj.name) raise ValueError(msg) return builder @docval({'name': 'h5obj', 'type': (Dataset, Group), 'doc': 'the HDF5 object to the corresponding Container/Data object for'}) def get_container(self, **kwargs): """ Get the container for the corresponding h5py Group or Dataset :raises ValueError: When no builder has been constructed yet for the given h5py object """ h5obj = getargs('h5obj', kwargs) builder = self.get_builder(h5obj) container = self.manager.construct(builder) return container def __read_group(self, h5obj, name=None, ignore=set()): kwargs = { "attributes": self.__read_attrs(h5obj), "groups": dict(), "datasets": dict(), "links": dict() } for key, val in kwargs['attributes'].items(): if isinstance(val, bytes): kwargs['attributes'][key] = val.decode('UTF-8') if name is None: name = str(os.path.basename(h5obj.name)) for k in h5obj: sub_h5obj = h5obj.get(k) if not (sub_h5obj is None): if sub_h5obj.name in ignore: continue link_type = h5obj.get(k, getlink=True) if isinstance(link_type, SoftLink) or isinstance(link_type, ExternalLink): # Reading links might be better suited in its own function # get path of link (the key used for tracking what's been built) target_path = link_type.path builder_name = os.path.basename(target_path) parent_loc = os.path.dirname(target_path) # get builder if already read, else build it builder = self.__get_built(sub_h5obj.file.filename, sub_h5obj.file[target_path].id) if builder is None: # NOTE: all links must have absolute paths if isinstance(sub_h5obj, Dataset): builder = self.__read_dataset(sub_h5obj, builder_name) else: builder = self.__read_group(sub_h5obj, builder_name, ignore=ignore) self.__set_built(sub_h5obj.file.filename, sub_h5obj.file[target_path].id, builder) builder.location = parent_loc link_builder = LinkBuilder(builder, k, source=h5obj.file.filename) link_builder.written = True kwargs['links'][builder_name] = link_builder else: builder = self.__get_built(sub_h5obj.file.filename, sub_h5obj.id) obj_type = None read_method = None if isinstance(sub_h5obj, Dataset): read_method = self.__read_dataset obj_type = kwargs['datasets'] else: read_method = partial(self.__read_group, ignore=ignore) obj_type = kwargs['groups'] if builder is None: builder = read_method(sub_h5obj) self.__set_built(sub_h5obj.file.filename, sub_h5obj.id, builder) obj_type[builder.name] = builder else: warnings.warn(os.path.join(h5obj.name, k), BrokenLinkWarning) kwargs['datasets'][k] = None continue kwargs['source'] = h5obj.file.filename ret = GroupBuilder(name, **kwargs) ret.written = True return ret def __read_dataset(self, h5obj, name=None): kwargs = { "attributes": self.__read_attrs(h5obj), "dtype": h5obj.dtype, "maxshape": h5obj.maxshape } for key, val in kwargs['attributes'].items(): if isinstance(val, bytes): kwargs['attributes'][key] = val.decode('UTF-8') if name is None: name = str(os.path.basename(h5obj.name)) kwargs['source'] = h5obj.file.filename ndims = len(h5obj.shape) if ndims == 0: # read scalar scalar = h5obj[()] if isinstance(scalar, bytes): scalar = scalar.decode('UTF-8') if isinstance(scalar, Reference): # TODO (AJTRITT): This should call __read_ref to support Group references target = h5obj.file[scalar] target_builder = self.__read_dataset(target) self.__set_built(target.file.filename, target.id, target_builder) if isinstance(scalar, RegionReference): kwargs['data'] = RegionBuilder(scalar, target_builder) else: kwargs['data'] = ReferenceBuilder(target_builder) else: kwargs["data"] = scalar elif ndims == 1: d = None if h5obj.dtype.kind == 'O': elem1 = h5obj[0] if isinstance(elem1, (str, bytes)): d = h5obj elif isinstance(elem1, RegionReference): # read list of references d = BuilderH5RegionDataset(h5obj, self) elif isinstance(elem1, Reference): d = BuilderH5ReferenceDataset(h5obj, self) elif h5obj.dtype.kind == 'V': # table cpd_dt = h5obj.dtype ref_cols = [check_dtype(ref=cpd_dt[i]) for i in range(len(cpd_dt))] d = BuilderH5TableDataset(h5obj, self, ref_cols) else: d = h5obj kwargs["data"] = d else: kwargs["data"] = h5obj ret = DatasetBuilder(name, **kwargs) ret.written = True return ret def __read_attrs(self, h5obj): ret = dict() for k, v in h5obj.attrs.items(): if k == SPEC_LOC_ATTR: # ignore cached spec continue if isinstance(v, RegionReference): raise ValueError("cannot read region reference attributes yet") elif isinstance(v, Reference): ret[k] = self.__read_ref(h5obj.file[v]) else: ret[k] = v return ret def __read_ref(self, h5obj): ret = None ret = self.__get_built(h5obj.file.filename, h5obj.id) if ret is None: if isinstance(h5obj, Dataset): ret = self.__read_dataset(h5obj) elif isinstance(h5obj, Group): ret = self.__read_group(h5obj) else: raise ValueError("h5obj must be a Dataset or a Group - got %s" % str(h5obj)) self.__set_built(h5obj.file.filename, h5obj.id, ret) return ret def open(self): if self.__file is None: open_flag = self.__mode if self.comm: kwargs = {'driver': 'mpio', 'comm': self.comm} else: kwargs = {} self.__file = File(self.__path, open_flag, **kwargs) def close(self): if self.__file is not None: self.__file.close() @docval({'name': 'builder', 'type': GroupBuilder, 'doc': 'the GroupBuilder object representing the HDF5 file'}, {'name': 'link_data', 'type': bool, 'doc': 'If not specified otherwise link (True) or copy (False) HDF5 Datasets', 'default': True}, {'name': 'exhaust_dci', 'type': bool, 'doc': 'exhaust DataChunkIterators one at a time. If False, exhaust them concurrently', 'default': True}) def write_builder(self, **kwargs): f_builder, link_data, exhaust_dci = getargs('builder', 'link_data', 'exhaust_dci', kwargs) for name, gbldr in f_builder.groups.items(): self.write_group(self.__file, gbldr, exhaust_dci=exhaust_dci) for name, dbldr in f_builder.datasets.items(): self.write_dataset(self.__file, dbldr, link_data, exhaust_dci=exhaust_dci) for name, lbldr in f_builder.links.items(): self.write_link(self.__file, lbldr) self.set_attributes(self.__file, f_builder.attributes) self.__add_refs() self.__exhaust_dcis() def __add_refs(self): ''' Add all references in the file. References get queued to be added at the end of write. This is because the current traversal algorithm (i.e. iterating over GroupBuilder items) does not happen in a guaranteed order. We need to figure out what objects will be references, and then write them after we write everything else. ''' failed = set() while len(self.__ref_queue) > 0: call = self.__ref_queue.popleft() try: call() except KeyError: if id(call) in failed: raise RuntimeError('Unable to resolve reference') failed.add(id(call)) self.__ref_queue.append(call) def __exhaust_dcis(self): """ Read and write from any queued DataChunkIterators in a round-robin fashion """ while len(self.__dci_queue) > 0: dset, data = self.__dci_queue.popleft() if self.__write_chunk__(dset, data): self.__dci_queue.append((dset, data)) @classmethod def get_type(cls, data): if isinstance(data, str): return H5_TEXT elif isinstance(data, Container): return H5_REF elif not hasattr(data, '__len__'): return type(data) else: if len(data) == 0: if hasattr(data, 'dtype'): return data.dtype else: raise ValueError('cannot determine type for empty data') return cls.get_type(data[0]) __dtypes = { "float": np.float32, "float32": np.float32, "double": np.float64, "float64": np.float64, "long": np.int64, "int64": np.int64, "uint64": np.uint64, "int": np.int32, "int32": np.int32, "int16": np.int16, "int8": np.int8, "bool": np.bool_, "text": H5_TEXT, "utf": H5_TEXT, "utf8": H5_TEXT, "utf-8": H5_TEXT, "ascii": H5_BINARY, "str": H5_BINARY, "isodatetime": H5_TEXT, "uint32": np.uint32, "uint16": np.uint16, "uint8": np.uint8, "ref": H5_REF, "reference": H5_REF, "object": H5_REF, "region": H5_REGREF, } @classmethod def __resolve_dtype__(cls, dtype, data): # TODO: These values exist, but I haven't solved them yet # binary # number dtype = cls.__resolve_dtype_helper__(dtype) if dtype is None: dtype = cls.get_type(data) return dtype @classmethod def __resolve_dtype_helper__(cls, dtype): if dtype is None: return None elif isinstance(dtype, str): return cls.__dtypes.get(dtype) elif isinstance(dtype, dict): return cls.__dtypes.get(dtype['reftype']) else: return np.dtype([(x['name'], cls.__resolve_dtype_helper__(x['dtype'])) for x in dtype]) @docval({'name': 'obj', 'type': (Group, Dataset), 'doc': 'the HDF5 object to add attributes to'}, {'name': 'attributes', 'type': dict, 'doc': 'a dict containing the attributes on the Group or Dataset, indexed by attribute name'}) def set_attributes(self, **kwargs): obj, attributes = getargs('obj', 'attributes', kwargs) for key, value in attributes.items(): if isinstance(value, (set, list, tuple)): tmp = tuple(value) if len(tmp) > 0: if isinstance(tmp[0], str): value = [np.unicode_(s) for s in tmp] elif isinstance(tmp[0], bytes): value = [np.string_(s) for s in tmp] elif isinstance(tmp[0], Container): # a list of references self.__queue_ref(self._make_attr_ref_filler(obj, key, tmp)) else: value = np.array(value) obj.attrs[key] = value elif isinstance(value, (Container, Builder, ReferenceBuilder)): # a reference self.__queue_ref(self._make_attr_ref_filler(obj, key, value)) else: obj.attrs[key] = value # a regular scalar def _make_attr_ref_filler(self, obj, key, value): ''' Make the callable for setting references to attributes ''' if isinstance(value, (tuple, list)): def _filler(): ret = list() for item in value: ret.append(self.__get_ref(item)) obj.attrs[key] = ret else: def _filler(): obj.attrs[key] = self.__get_ref(value) return _filler @docval({'name': 'parent', 'type': Group, 'doc': 'the parent HDF5 object'}, {'name': 'builder', 'type': GroupBuilder, 'doc': 'the GroupBuilder to write'}, {'name': 'exhaust_dci', 'type': bool, 'doc': 'exhaust DataChunkIterators one at a time. If False, exhaust them concurrently', 'default': True}, returns='the Group that was created', rtype='Group') def write_group(self, **kwargs): parent, builder, exhaust_dci = getargs('parent', 'builder', 'exhaust_dci', kwargs) if builder.written: group = parent[builder.name] else: group = parent.create_group(builder.name) # write all groups subgroups = builder.groups if subgroups: for subgroup_name, sub_builder in subgroups.items(): # do not create an empty group without attributes or links self.write_group(group, sub_builder, exhaust_dci=exhaust_dci) # write all datasets datasets = builder.datasets if datasets: for dset_name, sub_builder in datasets.items(): self.write_dataset(group, sub_builder, exhaust_dci=exhaust_dci) # write all links links = builder.links if links: for link_name, sub_builder in links.items(): self.write_link(group, sub_builder) attributes = builder.attributes self.set_attributes(group, attributes) builder.written = True return group def __get_path(self, builder): curr = builder names = list() while curr is not None and curr.name != ROOT_NAME: names.append(curr.name) curr = curr.parent delim = "/" path = "%s%s" % (delim, delim.join(reversed(names))) return path @docval({'name': 'parent', 'type': Group, 'doc': 'the parent HDF5 object'}, {'name': 'builder', 'type': LinkBuilder, 'doc': 'the LinkBuilder to write'}, returns='the Link that was created', rtype='Link') def write_link(self, **kwargs): parent, builder = getargs('parent', 'builder', kwargs) if builder.written: return None name = builder.name target_builder = builder.builder path = self.__get_path(target_builder) # source will indicate target_builder's location if parent.file.filename == target_builder.source: link_obj = SoftLink(path) elif target_builder.source is not None: target_filename = os.path.abspath(target_builder.source) parent_filename = os.path.abspath(parent.file.filename) relative_path = os.path.relpath(target_filename, os.path.dirname(parent_filename)) if target_builder.location is not None: path = target_builder.location + path link_obj = ExternalLink(relative_path, path) else: msg = 'cannot create external link to %s' % path raise ValueError(msg) parent[name] = link_obj builder.written = True return link_obj @docval({'name': 'parent', 'type': Group, 'doc': 'the parent HDF5 object'}, {'name': 'builder', 'type': DatasetBuilder, 'doc': 'the DatasetBuilder to write'}, {'name': 'link_data', 'type': bool, 'doc': 'If not specified otherwise link (True) or copy (False) HDF5 Datasets', 'default': True}, {'name': 'exhaust_dci', 'type': bool, 'doc': 'exhaust DataChunkIterators one at a time. If False, exhaust them concurrently', 'default': True}, returns='the Dataset that was created', rtype=Dataset) def write_dataset(self, **kwargs): # noqa: C901 """ Write a dataset to HDF5 The function uses other dataset-dependent write functions, e.g, __scalar_fill__, __list_fill__ and __setup_chunked_dset__ to write the data. """ parent, builder, link_data, exhaust_dci = getargs('parent', 'builder', 'link_data', 'exhaust_dci', kwargs) if builder.written: return None name = builder.name data = builder.data options = dict() # dict with additional if isinstance(data, H5DataIO): options['io_settings'] = data.io_settings link_data = data.link_data data = data.data else: options['io_settings'] = {} attributes = builder.attributes options['dtype'] = builder.dtype dset = None link = None # The user provided an existing h5py dataset as input and asked to create a link to the dataset if isinstance(data, Dataset): # Create a Soft/External link to the dataset if link_data: data_filename = os.path.abspath(data.file.filename) parent_filename = os.path.abspath(parent.file.filename) if data_filename != parent_filename: link = ExternalLink(os.path.relpath(data_filename, os.path.dirname(parent_filename)), data.name) else: link = SoftLink(data.name) parent[name] = link # Copy the dataset else: parent.copy(source=data, dest=parent, name=name, expand_soft=False, expand_external=False, expand_refs=False, without_attrs=True) dset = parent[name] # Write a compound dataset, i.e, a dataset with compound data type elif isinstance(options['dtype'], list): # do some stuff to figure out what data is a reference refs = list() for i, dts in enumerate(options['dtype']): if self.__is_ref(dts): refs.append(i) # If one ore more of the parts of the compound data type are references then we need to deal with those if len(refs) > 0: try: _dtype = self.__resolve_dtype__(options['dtype'], data) except Exception as exc: msg = 'cannot add %s to %s - could not determine type' % (name, parent.name) raise Exception(msg) from exc dset = parent.require_dataset(name, shape=(len(data),), dtype=_dtype, **options['io_settings']) builder.written = True @self.__queue_ref def _filler(): ret = list() for item in data: new_item = list(item) for i in refs: new_item[i] = self.__get_ref(item[i]) ret.append(tuple(new_item)) dset = parent[name] dset[:] = ret self.set_attributes(dset, attributes) return # If the compound data type contains only regular data (i.e., no references) then we can write it as usual else: dset = self.__list_fill__(parent, name, data, options) # Write a dataset containing references, i.e., a region or object reference. # NOTE: we can ignore options['io_settings'] for scalar data elif self.__is_ref(options['dtype']): _dtype = self.__dtypes.get(options['dtype']) # Write a scalar data region reference dataset if isinstance(data, RegionBuilder): dset = parent.require_dataset(name, shape=(), dtype=_dtype) builder.written = True @self.__queue_ref def _filler(): ref = self.__get_ref(data.builder, data.region) dset = parent[name] dset[()] = ref self.set_attributes(dset, attributes) # Write a scalar object reference dataset elif isinstance(data, ReferenceBuilder): dset = parent.require_dataset(name, dtype=_dtype, shape=()) builder.written = True @self.__queue_ref def _filler(): ref = self.__get_ref(data.builder) dset = parent[name] dset[()] = ref self.set_attributes(dset, attributes) # Write an array dataset of references else: # Write a array of region references if options['dtype'] == 'region': dset = parent.require_dataset(name, dtype=_dtype, shape=(len(data),), **options['io_settings']) builder.written = True @self.__queue_ref def _filler(): refs = list() for item in data: refs.append(self.__get_ref(item.builder, item.region)) dset = parent[name] dset[()] = refs self.set_attributes(dset, attributes) # Write array of object references else: dset = parent.require_dataset(name, shape=(len(data),), dtype=_dtype, ** options['io_settings']) builder.written = True @self.__queue_ref def _filler(): refs = list() for item in data: refs.append(self.__get_ref(item)) dset = parent[name] dset[()] = refs self.set_attributes(dset, attributes) return # write a "regular" dataset else: # Write a scalar dataset containing a single string if isinstance(data, (str, bytes)): dset = self.__scalar_fill__(parent, name, data, options) # Iterative write of a data chunk iterator elif isinstance(data, AbstractDataChunkIterator): dset = self.__setup_chunked_dset__(parent, name, data, options) self.__dci_queue.append((dset, data)) # Write a regular in memory array (e.g., numpy array, list etc.) elif hasattr(data, '__len__'): dset = self.__list_fill__(parent, name, data, options) # Write a regular scalar dataset else: dset = self.__scalar_fill__(parent, name, data, options) # Create the attributes on the dataset only if we are the primary and not just a Soft/External link if link is None: self.set_attributes(dset, attributes) # Validate the attributes on the linked dataset elif len(attributes) > 0: pass builder.written = True if exhaust_dci: self.__exhaust_dcis() return @classmethod def __selection_max_bounds__(cls, selection): """Determine the bounds of a numpy selection index tuple""" if isinstance(selection, int): return selection+1 elif isinstance(selection, slice): return selection.stop elif isinstance(selection, list) or isinstance(selection, np.ndarray): return np.nonzero(selection)[0][-1]+1 elif isinstance(selection, tuple): return tuple([cls.__selection_max_bounds__(i) for i in selection]) @classmethod def __scalar_fill__(cls, parent, name, data, options=None): dtype = None io_settings = {} if options is not None: dtype = options.get('dtype') io_settings = options.get('io_settings') if not isinstance(dtype, type): try: dtype = cls.__resolve_dtype__(dtype, data) except Exception as exc: msg = 'cannot add %s to %s - could not determine type' % (name, parent.name) raise Exception(msg) from exc try: dset = parent.create_dataset(name, data=data, shape=None, dtype=dtype, **io_settings) except Exception as exc: msg = "Could not create scalar dataset %s in %s" % (name, parent.name) raise Exception(msg) from exc return dset @classmethod def __setup_chunked_dset__(cls, parent, name, data, options=None): """ Setup a dataset for writing to one-chunk-at-a-time based on the given DataChunkIterator :param parent: The parent object to which the dataset should be added :type parent: h5py.Group, h5py.File :param name: The name of the dataset :type name: str :param data: The data to be written. :type data: DataChunkIterator :param options: Dict with options for creating a dataset. available options are 'dtype' and 'io_settings' :type options: dict """ io_settings = {} if options is not None: if 'io_settings' in options: io_settings = options.get('io_settings') # Define the chunking options if the user has not set them explicitly. We need chunking for the iterative write. if 'chunks' not in io_settings: recommended_chunks = data.recommended_chunk_shape() io_settings['chunks'] = True if recommended_chunks is None else recommended_chunks # Define the shape of the data if not provided by the user if 'shape' not in io_settings: io_settings['shape'] = data.recommended_data_shape() # Define the maxshape of the data if not provided by the user if 'maxshape' not in io_settings: io_settings['maxshape'] = data.maxshape if 'dtype' not in io_settings: if (options is not None) and ('dtype' in options): io_settings['dtype'] = options['dtype'] else: io_settings['dtype'] = data.dtype try: dset = parent.create_dataset(name, **io_settings) except Exception as exc: raise Exception("Could not create dataset %s in %s" % (name, parent.name)) from exc return dset @classmethod def __write_chunk__(cls, dset, data): """ Read a chunk from the given DataChunkIterator and write it to the given Dataset :param dset: The Dataset to write to :type dset: Dataset :param data: The DataChunkIterator to read from :type data: DataChunkIterator :return: True of a chunk was written, False otherwise :rtype: bool """ try: chunk_i = next(data) # Determine the minimum array dimensions to fit the chunk selection max_bounds = cls.__selection_max_bounds__(chunk_i.selection) if not hasattr(max_bounds, '__len__'): max_bounds = (max_bounds,) # Determine if we need to expand any of the data dimensions expand_dims = [i for i, v in enumerate(max_bounds) if v is not None and v > dset.shape[i]] # Expand the dataset if needed if len(expand_dims) > 0: new_shape = np.asarray(dset.shape) new_shape[expand_dims] = np.asarray(max_bounds)[expand_dims] dset.resize(new_shape) # Process and write the data dset[chunk_i.selection] = chunk_i.data except StopIteration: return False return True @classmethod def __chunked_iter_fill__(cls, parent, name, data, options=None): """ Write data to a dataset one-chunk-at-a-time based on the given DataChunkIterator :param parent: The parent object to which the dataset should be added :type parent: h5py.Group, h5py.File :param name: The name of the dataset :type name: str :param data: The data to be written. :type data: DataChunkIterator :param options: Dict with options for creating a dataset. available options are 'dtype' and 'io_settings' :type options: dict """ dset = cls.__setup_chunked_dset__(parent, name, data, options=options) read = True while read: read = cls.__write_chunk__(dset, data) return dset @classmethod def __list_fill__(cls, parent, name, data, options=None): # define the io settings and data type if necessary io_settings = {} dtype = None if options is not None: dtype = options.get('dtype') io_settings = options.get('io_settings') if not isinstance(dtype, type): try: dtype = cls.__resolve_dtype__(dtype, data) except Exception as exc: msg = 'cannot add %s to %s - could not determine type' % (name, parent.name) raise Exception(msg) from exc # define the data shape if 'shape' in io_settings: data_shape = io_settings.pop('shape') elif hasattr(data, 'shape'): data_shape = data.shape elif isinstance(dtype, np.dtype): data_shape = (len(data),) else: data_shape = get_data_shape(data) # Create the dataset try: dset = parent.create_dataset(name, shape=data_shape, dtype=dtype, **io_settings) except Exception as exc: msg = "Could not create dataset %s in %s with shape %s, dtype %s, and iosettings %s. %s" % \ (name, parent.name, str(data_shape), str(dtype), str(io_settings), str(exc)) raise Exception(msg) from exc # Write the data if len(data) > dset.shape[0]: new_shape = list(dset.shape) new_shape[0] = len(data) dset.resize(new_shape) try: dset[:] = data except Exception as e: raise e return dset @docval({'name': 'container', 'type': (Builder, Container, ReferenceBuilder), 'doc': 'the object to reference', 'default': None}, {'name': 'region', 'type': (slice, list, tuple), 'doc': 'the region reference indexing object', 'default': None}, returns='the reference', rtype=Reference) def __get_ref(self, **kwargs): container, region = getargs('container', 'region', kwargs) if container is None: return None if isinstance(container, Builder): if isinstance(container, LinkBuilder): builder = container.target_builder else: builder = container elif isinstance(container, ReferenceBuilder): builder = container.builder else: builder = self.manager.build(container) path = self.__get_path(builder) if isinstance(container, RegionBuilder): region = container.region if region is not None: dset = self.__file[path] if not isinstance(dset, Dataset): raise ValueError('cannot create region reference without Dataset') return self.__file[path].regionref[region] else: return self.__file[path].ref def __is_ref(self, dtype): if isinstance(dtype, DtypeSpec): return self.__is_ref(dtype.dtype) if isinstance(dtype, RefSpec): return True if isinstance(dtype, str): return dtype == DatasetBuilder.OBJECT_REF_TYPE or dtype == DatasetBuilder.REGION_REF_TYPE return False def __queue_ref(self, func): '''Set aside filling dset with references dest[sl] = func() Args: dset: the h5py.Dataset that the references need to be added to sl: the np.s_ (slice) object for indexing into dset func: a function to call to return the chunk of data, with references filled in ''' # TODO: come up with more intelligent way of # queueing reference resolution, based on reference # dependency self.__ref_queue.append(func) def __rec_get_ref(self, l): ret = list() for elem in l: if isinstance(elem, (list, tuple)): ret.append(self.__rec_get_ref(elem)) elif isinstance(elem, (Builder, Container)): ret.append(self.__get_ref(elem)) else: ret.append(elem) return ret ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/backends/io.py0000644000655200065520000000551400000000000020273 0ustar00circlecicircleci00000000000000from abc import ABCMeta, abstractmethod from ..build import BuildManager, GroupBuilder from ..utils import docval, getargs, popargs from ..container import Container class HDMFIO(metaclass=ABCMeta): @docval({'name': 'manager', 'type': BuildManager, 'doc': 'the BuildManager to use for I/O', 'default': None}, {"name": "source", "type": str, "doc": "the source of container being built i.e. file path", 'default': None}) def __init__(self, **kwargs): self.__manager = getargs('manager', kwargs) self.__built = dict() self.__source = getargs('source', kwargs) self.open() @property def manager(self): '''The BuildManager this HDMFIO is using''' return self.__manager @property def source(self): '''The source of the container being read/written i.e. file path''' return self.__source @docval(returns='the Container object that was read in', rtype=Container) def read(self, **kwargs): f_builder = self.read_builder() if all(len(v) == 0 for v in f_builder.values()): # TODO also check that the keys are appropriate. print a better error message raise UnsupportedOperation('Cannot build data. There are no values.') container = self.__manager.construct(f_builder) return container @docval({'name': 'container', 'type': Container, 'doc': 'the Container object to write'}, {'name': 'exhaust_dci', 'type': bool, 'doc': 'exhaust DataChunkIterators one at a time. If False, exhaust them concurrently', 'default': True}) def write(self, **kwargs): container = popargs('container', kwargs) f_builder = self.__manager.build(container, source=self.__source) self.write_builder(f_builder, **kwargs) @abstractmethod @docval(returns='a GroupBuilder representing the read data', rtype='GroupBuilder') def read_builder(self): ''' Read data and return the GroupBuilder representing ''' pass @abstractmethod @docval({'name': 'builder', 'type': GroupBuilder, 'doc': 'the GroupBuilder object representing the Container'}, {'name': 'exhaust_dci', 'type': bool, 'doc': 'exhaust DataChunkIterators one at a time. If False, exhaust them concurrently', 'default': True}) def write_builder(self, **kwargs): ''' Write a GroupBuilder representing an Container object ''' pass @abstractmethod def open(self): ''' Open this HDMFIO object for writing of the builder ''' pass @abstractmethod def close(self): ''' Close this HDMFIO object to further reading/writing''' pass def __enter__(self): return self def __exit__(self, type, value, traceback): self.close() class UnsupportedOperation(ValueError): pass ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/backends/warnings.py0000644000655200065520000000016400000000000021510 0ustar00circlecicircleci00000000000000class BrokenLinkWarning(UserWarning): """ Raised when a group has a key with a None value. """ pass ././@PaxHeader0000000000000000000000000000003300000000000011451 xustar000000000000000027 mtime=1579654747.916188 hdmf-1.5.4/src/hdmf/build/0000755000655200065520000000000000000000000016632 5ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/build/__init__.py0000644000655200065520000000047300000000000020747 0ustar00circlecicircleci00000000000000from .builders import Builder from .builders import GroupBuilder from .builders import DatasetBuilder from .builders import ReferenceBuilder from .builders import RegionBuilder from .builders import LinkBuilder from .objectmapper import ObjectMapper from .manager import BuildManager from .manager import TypeMap ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/build/builders.py0000644000655200065520000005476000000000000021031 0ustar00circlecicircleci00000000000000import numpy as np from h5py import RegionReference import copy as _copy import itertools as _itertools import posixpath as _posixpath from abc import ABCMeta import warnings from collections.abc import Iterable from datetime import datetime from ..utils import docval, getargs, popargs, call_docval_func, fmt_docval_args class Builder(dict, metaclass=ABCMeta): @docval({'name': 'name', 'type': str, 'doc': 'the name of the group'}, {'name': 'parent', 'type': 'Builder', 'doc': 'the parent builder of this Builder', 'default': None}, {'name': 'source', 'type': str, 'doc': 'the source of the data in this builder e.g. file name', 'default': None}) def __init__(self, **kwargs): name, parent, source = getargs('name', 'parent', 'source', kwargs) super().__init__() self.__name = name self.__parent = parent if source is not None: self.__source = source elif parent is not None: self.__source = parent.source else: self.__source = None self.__written = False @property def path(self): """ Get the path of this Builder """ s = list() c = self while c is not None: s.append(c.name) c = c.parent return "/".join(s[::-1]) @property def written(self): ''' The source of this Builder ''' return self.__written @written.setter def written(self, s): if self.__written and not s: raise ValueError("cannot change written to not written") self.__written = s @property def name(self): ''' The name of this Builder ''' return self.__name @property def source(self): ''' The source of this Builder ''' return self.__source @source.setter def source(self, s): if self.__source is None: self.__source = s else: raise ValueError('Cannot reset source once it is specified') @property def parent(self): ''' The parent Builder of this Builder ''' return self.__parent @parent.setter def parent(self, p): if self.__parent is None: self.__parent = p if self.__source is None: self.source = p.source else: raise ValueError('Cannot reset parent once it is specified') def __repr__(self): ret = "%s %s %s" % (self.path, self.__class__.__name__, super().__repr__()) return ret class BaseBuilder(Builder): __attribute = 'attributes' @docval({'name': 'name', 'type': str, 'doc': 'the name of the group'}, {'name': 'attributes', 'type': dict, 'doc': 'a dictionary of attributes to create in this group', 'default': dict()}, {'name': 'parent', 'type': 'GroupBuilder', 'doc': 'the parent builder of this Builder', 'default': None}, {'name': 'source', 'type': str, 'doc': 'the source of the data represented in this Builder', 'default': None}) def __init__(self, **kwargs): name, attributes, parent, source = getargs('name', 'attributes', 'parent', 'source', kwargs) super().__init__(name, parent, source) super().__setitem__(BaseBuilder.__attribute, dict()) for name, val in attributes.items(): self.set_attribute(name, val) self.__location = None @property def location(self): """ The location of this Builder in its source """ return self.__location @location.setter def location(self, val): self.__location = val @property def attributes(self): ''' The attributes stored in this Builder object ''' return super().__getitem__(BaseBuilder.__attribute) @docval({'name': 'name', 'type': str, 'doc': 'the name of the attribute'}, {'name': 'value', 'type': None, 'doc': 'the attribute value'}) def set_attribute(self, **kwargs): ''' Set an attribute for this group. ''' name, value = getargs('name', 'value', kwargs) super().__getitem__(BaseBuilder.__attribute)[name] = value # self.obj_type[name] = BaseBuilder.__attribute @docval({'name': 'builder', 'type': 'BaseBuilder', 'doc': 'the BaseBuilder to merge attributes from '}) def deep_update(self, **kwargs): ''' Merge attributes from the given BaseBuilder into this builder ''' builder = kwargs['builder'] # merge attributes for name, value in super(BaseBuilder, builder).__getitem__(BaseBuilder.__attribute).items(): self.set_attribute(name, value) class GroupBuilder(BaseBuilder): __link = 'links' __group = 'groups' __dataset = 'datasets' __attribute = 'attributes' @docval({'name': 'name', 'type': str, 'doc': 'the name of the group'}, {'name': 'groups', 'type': (dict, list), 'doc': 'a dictionary of subgroups to create in this group', 'default': dict()}, {'name': 'datasets', 'type': (dict, list), 'doc': 'a dictionary of datasets to create in this group', 'default': dict()}, {'name': 'attributes', 'type': dict, 'doc': 'a dictionary of attributes to create in this group', 'default': dict()}, {'name': 'links', 'type': (dict, list), 'doc': 'a dictionary of links to create in this group', 'default': dict()}, {'name': 'parent', 'type': 'GroupBuilder', 'doc': 'the parent builder of this Builder', 'default': None}, {'name': 'source', 'type': str, 'doc': 'the source of the data represented in this Builder', 'default': None}) def __init__(self, **kwargs): ''' Create a GroupBuilder object ''' name, groups, datasets, links, attributes, parent, source = getargs( 'name', 'groups', 'datasets', 'links', 'attributes', 'parent', 'source', kwargs) groups = self.__to_list(groups) datasets = self.__to_list(datasets) links = self.__to_list(links) self.obj_type = dict() super().__init__(name, attributes, parent, source) super().__setitem__(GroupBuilder.__group, dict()) super().__setitem__(GroupBuilder.__dataset, dict()) super().__setitem__(GroupBuilder.__link, dict()) self.__name = name for group in groups: self.set_group(group) for dataset in datasets: if not (dataset is None): self.set_dataset(dataset) for link in links: self.set_link(link) def __to_list(self, d): if isinstance(d, dict): return list(d.values()) return d @property def source(self): ''' The source of this Builder ''' return super().source @source.setter def source(self, s): ''' A recursive setter to set all subgroups/datasets/links source when this source is set ''' super(GroupBuilder, self.__class__).source.fset(self, s) for g in self.groups.values(): if g.source is None: g.source = s for d in self.datasets.values(): if d.source is None: d.source = s for l in self.links.values(): if l.source is None: l.source = s @property def groups(self): ''' The subgroups contained in this GroupBuilder ''' return super().__getitem__(GroupBuilder.__group) @property def datasets(self): ''' The datasets contained in this GroupBuilder ''' return super().__getitem__(GroupBuilder.__dataset) @property def links(self): ''' The datasets contained in this GroupBuilder ''' return super().__getitem__(GroupBuilder.__link) @docval({'name': 'name', 'type': str, 'doc': 'the name of the attribute'}, {'name': 'value', 'type': None, 'doc': 'the attribute value'}) def set_attribute(self, **kwargs): ''' Set an attribute for this group ''' name, value = getargs('name', 'value', kwargs) super().set_attribute(name, value) self.obj_type[name] = GroupBuilder.__attribute @docval({'name': 'builder', 'type': 'Builder', 'doc': 'the Builder to add to this GroupBuilder'}) def set_builder(self, **kwargs): ''' Add an existing builder to this this GroupBuilder ''' builder = getargs('builder', kwargs) if isinstance(builder, LinkBuilder): self.__set_builder(builder, GroupBuilder.__link) elif isinstance(builder, GroupBuilder): self.__set_builder(builder, GroupBuilder.__dataset) elif isinstance(builder, DatasetBuilder): self.__set_builder(builder, GroupBuilder.__dataset) else: raise ValueError("Got unexpected builder type: %s" % type(builder)) def __set_builder(self, builder, obj_type): name = builder.name if name in self.obj_type: if self.obj_type[name] != obj_type: if name == 'comments': # LEGACY: Support legacy files where "comments" exists as both an attribute and as dataset # in some groups. # To allow read to get past this special case, this will skip the issue. warnings.warn("'%s' already exists as %s; skipping..." % (name, self.obj_type[name])) else: raise KeyError("'%s' already exists as %s in %s, cannot set as %s" % (name, self.obj_type[name], self.name, obj_type)) super().__getitem__(obj_type)[name] = builder self.obj_type[name] = obj_type if builder.parent is None: builder.parent = self @docval({'name': 'name', 'type': str, 'doc': 'the name of this dataset'}, {'name': 'data', 'type': ('array_data', 'scalar_data', 'data', 'DatasetBuilder', Iterable), 'doc': 'a dictionary of datasets to create in this dataset', 'default': None}, {'name': 'dtype', 'type': (type, np.dtype, str, list), 'doc': 'the datatype of this dataset', 'default': None}, {'name': 'attributes', 'type': dict, 'doc': 'a dictionary of attributes to create in this dataset', 'default': dict()}, {'name': 'maxshape', 'type': (int, tuple), 'doc': 'the shape of this dataset. Use None for scalars', 'default': None}, {'name': 'chunks', 'type': bool, 'doc': 'whether or not to chunk this dataset', 'default': False}, returns='the DatasetBuilder object for the dataset', rtype='DatasetBuilder') def add_dataset(self, **kwargs): ''' Create a dataset and add it to this group ''' kwargs['parent'] = self kwargs['source'] = self.source pargs, pkwargs = fmt_docval_args(DatasetBuilder.__init__, kwargs) builder = DatasetBuilder(*pargs, **pkwargs) self.set_dataset(builder) return builder @docval({'name': 'builder', 'type': 'DatasetBuilder', 'doc': 'the DatasetBuilder that represents this dataset'}) def set_dataset(self, **kwargs): ''' Add a dataset to this group ''' builder = getargs('builder', kwargs) self.__set_builder(builder, GroupBuilder.__dataset) @docval({'name': 'name', 'type': str, 'doc': 'the name of this subgroup'}, {'name': 'groups', 'type': dict, 'doc': 'a dictionary of subgroups to create in this subgroup', 'default': dict()}, {'name': 'datasets', 'type': dict, 'doc': 'a dictionary of datasets to create in this subgroup', 'default': dict()}, {'name': 'attributes', 'type': dict, 'doc': 'a dictionary of attributes to create in this subgroup', 'default': dict()}, {'name': 'links', 'type': dict, 'doc': 'a dictionary of links to create in this subgroup', 'default': dict()}, returns='the GroupBuilder object for the subgroup', rtype='GroupBuilder') def add_group(self, **kwargs): ''' Add a subgroup with the given data to this group ''' name = kwargs.pop('name') builder = GroupBuilder(name, parent=self, **kwargs) self.set_group(builder) return builder @docval({'name': 'builder', 'type': 'GroupBuilder', 'doc': 'the GroupBuilder that represents this subgroup'}) def set_group(self, **kwargs): ''' Add a subgroup to this group ''' builder = getargs('builder', kwargs) self.__set_builder(builder, GroupBuilder.__group) @docval({'name': 'target', 'type': ('GroupBuilder', 'DatasetBuilder'), 'doc': 'the target Builder'}, {'name': 'name', 'type': str, 'doc': 'the name of this link', 'default': None}, returns='the builder object for the soft link', rtype='LinkBuilder') def add_link(self, **kwargs): ''' Create a soft link and add it to this group ''' name, target = getargs('name', 'target', kwargs) builder = LinkBuilder(target, name, self) self.set_link(builder) return builder @docval({'name': 'builder', 'type': 'LinkBuilder', 'doc': 'the LinkBuilder that represents this link'}) def set_link(self, **kwargs): ''' Add a link to this group ''' builder = getargs('builder', kwargs) self.__set_builder(builder, GroupBuilder.__link) # TODO: write unittests for this method def deep_update(self, builder): ''' Recursively update subgroups in this group ''' super().deep_update(builder) # merge subgroups groups = super(GroupBuilder, builder).__getitem__(GroupBuilder.__group) self_groups = super().__getitem__(GroupBuilder.__group) for name, subgroup in groups.items(): if name in self_groups: self_groups[name].deep_update(subgroup) else: self.set_group(subgroup) # merge datasets datasets = super(GroupBuilder, builder).__getitem__(GroupBuilder.__dataset) self_datasets = super().__getitem__(GroupBuilder.__dataset) for name, dataset in datasets.items(): # self.add_dataset(name, dataset) if name in self_datasets: self_datasets[name].deep_update(dataset) # super().__getitem__(GroupBuilder.__dataset)[name] = dataset else: self.set_dataset(dataset) # merge links for name, link in super(GroupBuilder, builder).__getitem__(GroupBuilder.__link).items(): self.set_link(link) def is_empty(self): '''Returns true if there are no datasets, attributes, links or subgroups that contain datasets, attributes or links. False otherwise. ''' if (len(super().__getitem__(GroupBuilder.__dataset)) or len(super().__getitem__(GroupBuilder.__attribute)) or len(super().__getitem__(GroupBuilder.__link))): return False elif len(super().__getitem__(GroupBuilder.__group)): return all(g.is_empty() for g in super().__getitem__(GroupBuilder.__group).values()) else: return True def __getitem__(self, key): '''Like dict.__getitem__, but looks in groups, datasets, attributes, and links sub-dictionaries. ''' try: key_ar = _posixpath.normpath(key).split('/') return self.__get_rec(key_ar) except KeyError: raise KeyError(key) def get(self, key, default=None): '''Like dict.get, but looks in groups, datasets, attributes, and links sub-dictionaries. ''' try: key_ar = _posixpath.normpath(key).split('/') return self.__get_rec(key_ar) except KeyError: return default def __get_rec(self, key_ar): # recursive helper for __getitem__ if len(key_ar) == 1: return super().__getitem__(self.obj_type[key_ar[0]])[key_ar[0]] else: if key_ar[0] in super().__getitem__(GroupBuilder.__group): return super().__getitem__(GroupBuilder.__group)[key_ar[0]].__get_rec(key_ar[1:]) raise KeyError(key_ar[0]) def __setitem__(self, args, val): raise NotImplementedError('__setitem__') def __contains__(self, item): return self.obj_type.__contains__(item) def items(self): '''Like dict.items, but iterates over key-value pairs in groups, datasets, attributes, and links sub-dictionaries. ''' return _itertools.chain(super().__getitem__(GroupBuilder.__group).items(), super().__getitem__(GroupBuilder.__dataset).items(), super().__getitem__(GroupBuilder.__attribute).items(), super().__getitem__(GroupBuilder.__link).items()) def keys(self): '''Like dict.keys, but iterates over keys in groups, datasets, attributes, and links sub-dictionaries. ''' return _itertools.chain(super().__getitem__(GroupBuilder.__group).keys(), super().__getitem__(GroupBuilder.__dataset).keys(), super().__getitem__(GroupBuilder.__attribute).keys(), super().__getitem__(GroupBuilder.__link).keys()) def values(self): '''Like dict.values, but iterates over values in groups, datasets, attributes, and links sub-dictionaries. ''' return _itertools.chain(super().__getitem__(GroupBuilder.__group).values(), super().__getitem__(GroupBuilder.__dataset).values(), super().__getitem__(GroupBuilder.__attribute).values(), super().__getitem__(GroupBuilder.__link).values()) class DatasetBuilder(BaseBuilder): OBJECT_REF_TYPE = 'object' REGION_REF_TYPE = 'region' @docval({'name': 'name', 'type': str, 'doc': 'the name of the dataset'}, {'name': 'data', 'type': ('array_data', 'scalar_data', 'data', 'DatasetBuilder', 'RegionBuilder', Iterable, datetime), 'doc': 'the data in this dataset', 'default': None}, {'name': 'dtype', 'type': (type, np.dtype, str, list), 'doc': 'the datatype of this dataset', 'default': None}, {'name': 'attributes', 'type': dict, 'doc': 'a dictionary of attributes to create in this dataset', 'default': dict()}, {'name': 'maxshape', 'type': (int, tuple), 'doc': 'the shape of this dataset. Use None for scalars', 'default': None}, {'name': 'chunks', 'type': bool, 'doc': 'whether or not to chunk this dataset', 'default': False}, {'name': 'parent', 'type': GroupBuilder, 'doc': 'the parent builder of this Builder', 'default': None}, {'name': 'source', 'type': str, 'doc': 'the source of the data in this builder', 'default': None}) def __init__(self, **kwargs): ''' Create a Builder object for a dataset ''' name, data, dtype, attributes, maxshape, chunks, parent, source = getargs( 'name', 'data', 'dtype', 'attributes', 'maxshape', 'chunks', 'parent', 'source', kwargs) super().__init__(name, attributes, parent, source) self['data'] = data self['attributes'] = _copy.copy(attributes) self.__chunks = chunks self.__maxshape = maxshape if isinstance(data, BaseBuilder): if dtype is None: dtype = self.OBJECT_REF_TYPE self.__dtype = dtype self.__name = name @property def data(self): ''' The data stored in the dataset represented by this builder ''' return self['data'] @data.setter def data(self, val): if self['data'] is not None: raise AttributeError("'data' already set") self['data'] = val @property def chunks(self): ''' Whether or not this dataset is chunked ''' return self.__chunks @property def maxshape(self): ''' The max shape of this object ''' return self.__maxshape @property def dtype(self): ''' The data type of this object ''' return self.__dtype @dtype.setter def dtype(self, val): ''' The data type of this object ''' if self.__dtype is None: self.__dtype = val else: raise AttributeError("cannot overwrite dtype") @docval({'name': 'dataset', 'type': 'DatasetBuilder', 'doc': 'the DatasetBuilder to merge into this DatasetBuilder'}) def deep_update(self, **kwargs): '''Merge data and attributes from given DatasetBuilder into this DatasetBuilder''' dataset = getargs('dataset', kwargs) if dataset.data: self['data'] = dataset.data # TODO: figure out if we want to add a check for overwrite self['attributes'].update(dataset.attributes) class LinkBuilder(Builder): @docval({'name': 'builder', 'type': (DatasetBuilder, GroupBuilder), 'doc': 'the target of this link'}, {'name': 'name', 'type': str, 'doc': 'the name of the dataset', 'default': None}, {'name': 'parent', 'type': GroupBuilder, 'doc': 'the parent builder of this Builder', 'default': None}, {'name': 'source', 'type': str, 'doc': 'the source of the data in this builder', 'default': None}) def __init__(self, **kwargs): name, builder, parent, source = getargs('name', 'builder', 'parent', 'source', kwargs) if name is None: name = builder.name super().__init__(name, parent, source) self['builder'] = builder @property def builder(self): ''' The target builder object ''' return self['builder'] class ReferenceBuilder(dict): @docval({'name': 'builder', 'type': (DatasetBuilder, GroupBuilder), 'doc': 'the Dataset this region applies to'}) def __init__(self, **kwargs): builder = getargs('builder', kwargs) self['builder'] = builder @property def builder(self): ''' The target builder object ''' return self['builder'] class RegionBuilder(ReferenceBuilder): @docval({'name': 'region', 'type': (slice, tuple, list, RegionReference), 'doc': 'the region i.e. slice or indices into the target Dataset'}, {'name': 'builder', 'type': DatasetBuilder, 'doc': 'the Dataset this region applies to'}) def __init__(self, **kwargs): region = popargs('region', kwargs) call_docval_func(super().__init__, kwargs) self['region'] = region @property def region(self): ''' The target builder object ''' return self['region'] ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/build/manager.py0000644000655200065520000010613600000000000020625 0ustar00circlecicircleci00000000000000import numpy as np from collections import OrderedDict from copy import copy from datetime import datetime from ..utils import docval, getargs, ExtenderMeta, get_docval, call_docval_func, fmt_docval_args from ..container import AbstractContainer, Container, Data, DataRegion from ..spec import AttributeSpec, DatasetSpec, GroupSpec, LinkSpec, NamespaceCatalog, RefSpec, SpecReader from ..spec.spec import BaseStorageSpec from .builders import DatasetBuilder, GroupBuilder, LinkBuilder, Builder, BaseBuilder class Proxy: """ A temporary object to represent a Container. This gets used when resolving the true location of a Container's parent. Proxy objects allow simple bookkeeping of all potential parents a Container may have. This object is used by providing all the necessary information for describing the object. This object gets passed around and candidates are accumulated. Upon calling resolve, all saved candidates are matched against the information (provided to the constructor). The candidate that has an exact match is returned. """ def __init__(self, manager, source, location, namespace, data_type): self.__source = source self.__location = location self.__namespace = namespace self.__data_type = data_type self.__manager = manager self.__candidates = list() @property def source(self): """The source of the object e.g. file source""" return self.__source @property def location(self): """The location of the object. This can be thought of as a unique path""" return self.__location @property def namespace(self): """The namespace from which the data_type of this Proxy came from""" return self.__namespace @property def data_type(self): """The data_type of Container that should match this Proxy""" return self.__data_type @docval({"name": "object", "type": (BaseBuilder, Container), "doc": "the container or builder to get a proxy for"}) def matches(self, **kwargs): obj = getargs('object', kwargs) if not isinstance(obj, Proxy): obj = self.__manager.get_proxy(obj) return self == obj @docval({"name": "container", "type": Container, "doc": "the Container to add as a candidate match"}) def add_candidate(self, **kwargs): container = getargs('container', kwargs) self.__candidates.append(container) def resolve(self): for candidate in self.__candidates: if self.matches(candidate): return candidate raise ValueError("No matching candidate Container found for " + self) def __eq__(self, other): return self.data_type == other.data_type and \ self.location == other.location and \ self.namespace == other.namespace and \ self.source == other.source def __repr__(self): ret = dict() for key in ('source', 'location', 'namespace', 'data_type'): ret[key] = getattr(self, key, None) return str(ret) class BuildManager: """ A class for managing builds of AbstractContainers """ def __init__(self, type_map): self.__builders = dict() self.__containers = dict() self.__type_map = type_map @property def namespace_catalog(self): return self.__type_map.namespace_catalog @property def type_map(self): return self.__type_map @docval({"name": "object", "type": (BaseBuilder, AbstractContainer), "doc": "the container or builder to get a proxy for"}, {"name": "source", "type": str, "doc": "the source of container being built i.e. file path", 'default': None}) def get_proxy(self, **kwargs): obj = getargs('object', kwargs) if isinstance(obj, BaseBuilder): return self.__get_proxy_builder(obj) elif isinstance(obj, AbstractContainer): return self.__get_proxy_container(obj) def __get_proxy_builder(self, builder): dt = self.__type_map.get_builder_dt(builder) ns = self.__type_map.get_builder_ns(builder) stack = list() tmp = builder while tmp is not None: stack.append(tmp.name) tmp = self.__get_parent_dt_builder(tmp) loc = "/".join(reversed(stack)) return Proxy(self, builder.source, loc, ns, dt) def __get_proxy_container(self, container): ns, dt = self.__type_map.get_container_ns_dt(container) stack = list() tmp = container while tmp is not None: if isinstance(tmp, Proxy): stack.append(tmp.location) break else: stack.append(tmp.name) tmp = tmp.parent loc = "/".join(reversed(stack)) return Proxy(self, container.container_source, loc, ns, dt) @docval({"name": "container", "type": AbstractContainer, "doc": "the container to convert to a Builder"}, {"name": "source", "type": str, "doc": "the source of container being built i.e. file path", 'default': None}, {"name": "spec_ext", "type": BaseStorageSpec, "doc": "a spec that further refines the base specificatoin", 'default': None}) def build(self, **kwargs): """ Build the GroupBuilder for the given AbstractContainer""" container = getargs('container', kwargs) container_id = self.__conthash__(container) result = self.__builders.get(container_id) source, spec_ext = getargs('source', 'spec_ext', kwargs) if result is None: if container.container_source is None: container.container_source = source else: if source is None: source = container.container_source else: if container.container_source != source: raise ValueError("Can't change container_source once set") result = self.__type_map.build(container, self, source=source, spec_ext=spec_ext) self.prebuilt(container, result) elif container.modified: if isinstance(result, GroupBuilder): # TODO: if Datasets attributes are allowed to be modified, we need to # figure out how to handle that starting here. result = self.__type_map.build(container, self, builder=result, source=source, spec_ext=spec_ext) return result @docval({"name": "container", "type": AbstractContainer, "doc": "the AbstractContainer to save as prebuilt"}, {'name': 'builder', 'type': (DatasetBuilder, GroupBuilder), 'doc': 'the Builder representation of the given container'}) def prebuilt(self, **kwargs): ''' Save the Builder for a given AbstractContainer for future use ''' container, builder = getargs('container', 'builder', kwargs) container_id = self.__conthash__(container) self.__builders[container_id] = builder builder_id = self.__bldrhash__(builder) self.__containers[builder_id] = container def __conthash__(self, obj): return id(obj) def __bldrhash__(self, obj): return id(obj) @docval({'name': 'builder', 'type': (DatasetBuilder, GroupBuilder), 'doc': 'the builder to construct the AbstractContainer from'}) def construct(self, **kwargs): """ Construct the AbstractContainer represented by the given builder """ builder = getargs('builder', kwargs) if isinstance(builder, LinkBuilder): builder = builder.target builder_id = self.__bldrhash__(builder) result = self.__containers.get(builder_id) if result is None: parent_builder = self.__get_parent_dt_builder(builder) if parent_builder is not None: parent = self.__get_proxy_builder(parent_builder) result = self.__type_map.construct(builder, self, parent) else: # we are at the top of the hierarchy, # so it must be time to resolve parents result = self.__type_map.construct(builder, self, None) self.__resolve_parents(result) self.prebuilt(result, builder) result.set_modified(False) return result def __resolve_parents(self, container): stack = [container] while len(stack) > 0: tmp = stack.pop() if isinstance(tmp.parent, Proxy): tmp.parent = tmp.parent.resolve() for child in tmp.children: stack.append(child) def __get_parent_dt_builder(self, builder): ''' Get the next builder above the given builder that has a data_type ''' tmp = builder.parent ret = None while tmp is not None: ret = tmp dt = self.__type_map.get_builder_dt(tmp) if dt is not None: break tmp = tmp.parent return ret # *** The following methods just delegate calls to self.__type_map *** @docval({'name': 'builder', 'type': Builder, 'doc': 'the Builder to get the class object for'}) def get_cls(self, **kwargs): ''' Get the class object for the given Builder ''' builder = getargs('builder', kwargs) return self.__type_map.get_cls(builder) @docval({"name": "container", "type": AbstractContainer, "doc": "the container to convert to a Builder"}, returns='The name a Builder should be given when building this container', rtype=str) def get_builder_name(self, **kwargs): ''' Get the name a Builder should be given ''' container = getargs('container', kwargs) return self.__type_map.get_builder_name(container) @docval({'name': 'spec', 'type': (DatasetSpec, GroupSpec), 'doc': 'the parent spec to search'}, {'name': 'builder', 'type': (DatasetBuilder, GroupBuilder, LinkBuilder), 'doc': 'the builder to get the sub-specification for'}) def get_subspec(self, **kwargs): ''' Get the specification from this spec that corresponds to the given builder ''' spec, builder = getargs('spec', 'builder', kwargs) return self.__type_map.get_subspec(spec, builder) @docval({'name': 'builder', 'type': (DatasetBuilder, GroupBuilder, LinkBuilder), 'doc': 'the builder to get the sub-specification for'}) def get_builder_ns(self, **kwargs): ''' Get the namespace of a builder ''' builder = getargs('builder', kwargs) return self.__type_map.get_builder_ns(builder) @docval({'name': 'builder', 'type': (DatasetBuilder, GroupBuilder, LinkBuilder), 'doc': 'the builder to get the data_type for'}) def get_builder_dt(self, **kwargs): ''' Get the data_type of a builder ''' builder = getargs('builder', kwargs) return self.__type_map.get_builder_dt(builder) class TypeSource: '''A class to indicate the source of a data_type in a namespace. This class should only be used by TypeMap ''' @docval({"name": "namespace", "type": str, "doc": "the namespace the from, which the data_type originated"}, {"name": "data_type", "type": str, "doc": "the name of the type"}) def __init__(self, **kwargs): namespace, data_type = getargs('namespace', 'data_type', kwargs) self.__namespace = namespace self.__data_type = data_type @property def namespace(self): return self.__namespace @property def data_type(self): return self.__data_type class TypeMap: ''' A class to maintain the map between ObjectMappers and AbstractContainer classes ''' @docval({'name': 'namespaces', 'type': NamespaceCatalog, 'doc': 'the NamespaceCatalog to use', 'default': None}, {'name': 'mapper_cls', 'type': type, 'doc': 'the ObjectMapper class to use', 'default': None}) def __init__(self, **kwargs): namespaces, mapper_cls = getargs('namespaces', 'mapper_cls', kwargs) if namespaces is None: namespaces = NamespaceCatalog() if mapper_cls is None: from .objectmapper import ObjectMapper # avoid circular import mapper_cls = ObjectMapper self.__ns_catalog = namespaces self.__mappers = dict() # already constructed ObjectMapper classes self.__mapper_cls = dict() # the ObjectMapper class to use for each container type self.__container_types = OrderedDict() self.__data_types = dict() self.__default_mapper_cls = mapper_cls @property def namespace_catalog(self): return self.__ns_catalog def __copy__(self): ret = TypeMap(copy(self.__ns_catalog), self.__default_mapper_cls) ret.merge(self) return ret def __deepcopy__(self, memo): # XXX: From @nicain: All of a sudden legacy tests started # needing this argument in deepcopy. Doesn't hurt anything, though. return self.__copy__() def copy_mappers(self, type_map): for namespace in self.__ns_catalog.namespaces: if namespace not in type_map.__container_types: continue for data_type in self.__ns_catalog.get_namespace(namespace).get_registered_types(): container_cls = type_map.__container_types[namespace].get(data_type) if container_cls is None: continue self.register_container_type(namespace, data_type, container_cls) if container_cls in type_map.__mapper_cls: self.register_map(container_cls, type_map.__mapper_cls[container_cls]) def merge(self, type_map, ns_catalog=False): if ns_catalog: self.namespace_catalog.merge(type_map.namespace_catalog) for namespace in type_map.__container_types: for data_type in type_map.__container_types[namespace]: container_cls = type_map.__container_types[namespace][data_type] self.register_container_type(namespace, data_type, container_cls) for container_cls in type_map.__mapper_cls: self.register_map(container_cls, type_map.__mapper_cls[container_cls]) @docval({'name': 'namespace_path', 'type': str, 'doc': 'the path to the file containing the namespaces(s) to load'}, {'name': 'resolve', 'type': bool, 'doc': 'whether or not to include objects from included/parent spec objects', 'default': True}, {'name': 'reader', 'type': SpecReader, 'doc': 'the class to user for reading specifications', 'default': None}, returns="the namespaces loaded from the given file", rtype=dict) def load_namespaces(self, **kwargs): '''Load namespaces from a namespace file. This method will call load_namespaces on the NamespaceCatalog used to construct this TypeMap. Additionally, it will process the return value to keep track of what types were included in the loaded namespaces. Calling load_namespaces here has the advantage of being able to keep track of type dependencies across namespaces. ''' deps = call_docval_func(self.__ns_catalog.load_namespaces, kwargs) for new_ns, ns_deps in deps.items(): for src_ns, types in ns_deps.items(): for dt in types: container_cls = self.get_container_cls(src_ns, dt) if container_cls is None: container_cls = TypeSource(src_ns, dt) self.register_container_type(new_ns, dt, container_cls) return deps _type_map = { 'text': str, 'float': float, 'float32': float, 'float64': float, 'int': int, 'int32': int, 'bool': bool, 'uint64': np.uint64, 'isodatetime': datetime } def __get_container_type(self, container_name): container_type = None for val in self.__container_types.values(): container_type = val.get(container_name) if container_type is not None: return container_type if container_type is None: raise TypeDoesNotExistError("Type '%s' does not exist." % container_name) def __get_type(self, spec): if isinstance(spec, AttributeSpec): if isinstance(spec.dtype, RefSpec): tgttype = spec.dtype.target_type for val in self.__container_types.values(): container_type = val.get(tgttype) if container_type is not None: return container_type return Data, Container elif spec.shape is None and spec.dims is None: return self._type_map.get(spec.dtype) else: return 'array_data', 'data' if isinstance(spec, LinkSpec): return AbstractContainer if spec.data_type_def is not None: return self.__get_container_type(spec.data_type_def) if spec.data_type_inc is not None: return self.__get_container_type(spec.data_type_inc) if spec.shape is None and spec.dims is None: return self._type_map.get(spec.dtype) return 'array_data', 'data' def __ischild(self, dtype): """ Check if dtype represents a type that is a child """ ret = False if isinstance(dtype, tuple): for sub in dtype: ret = ret or self.__ischild(sub) else: if isinstance(dtype, type) and issubclass(dtype, (Container, Data, DataRegion)): ret = True return ret @staticmethod def __set_default_name(docval_args, default_name): new_docval_args = [] for x in docval_args: if x['name'] == 'name': x['default'] = default_name new_docval_args.append(x) return new_docval_args def __get_cls_dict(self, base, addl_fields, name=None, default_name=None): """ Get __init__ and fields of new class. :param base: The base class of the new class :param addl_fields: Dict of additional fields that are not in the base class :param name: Fixed name of instances of this class, or None if name is not fixed to a particular value :param default_name: Default name of instances of this class, or None if not specified """ # TODO: fix this to be more maintainable and smarter if base is None: raise ValueError('cannot generate class without base class') existing_args = set() docval_args = list() new_args = list() fields = list() # copy docval args from superclass for arg in get_docval(base.__init__): existing_args.add(arg['name']) if arg['name'] in addl_fields: continue docval_args.append(arg) # set default name if provided if default_name is not None: docval_args = self.__set_default_name(docval_args, default_name) # add new fields to docval and class fields for f, field_spec in addl_fields.items(): if not f == 'help': # (legacy) do not all help to any part of class object # build docval arguments for generated constructor dtype = self.__get_type(field_spec) if dtype is None: raise ValueError("Got \"None\" for field specification: {}".format(field_spec)) docval_arg = {'name': f, 'type': dtype, 'doc': field_spec.doc} if hasattr(field_spec, 'shape') and field_spec.shape is not None: docval_arg.update(shape=field_spec.shape) # docval_arg['shape'] = field_spec.shape if not field_spec.required: docval_arg['default'] = getattr(field_spec, 'default_value', None) docval_args.append(docval_arg) # auto-initialize arguments not found in superclass if f not in existing_args: new_args.append(f) # add arguments not found in superclass to fields for getter/setter generation if self.__ischild(dtype): fields.append({'name': f, 'child': True}) else: fields.append(f) # if spec provides a fixed name for this type, remove the 'name' arg from docval_args so that values cannot # be passed for a name positional or keyword arg if name is not None: # fixed name is specified in spec, remove it from docval args docval_args = filter(lambda x: x['name'] != 'name', docval_args) @docval(*docval_args) def __init__(self, **kwargs): if name is not None: kwargs.update(name=name) pargs, pkwargs = fmt_docval_args(base.__init__, kwargs) base.__init__(self, *pargs, **pkwargs) # special case: need to pass self to __init__ for f in new_args: arg_val = kwargs.get(f, None) if arg_val is not None: setattr(self, f, arg_val) return {'__init__': __init__, base._fieldsname: tuple(fields)} @docval({"name": "namespace", "type": str, "doc": "the namespace containing the data_type"}, {"name": "data_type", "type": str, "doc": "the data type to create a AbstractContainer class for"}, returns='the class for the given namespace and data_type', rtype=type) def get_container_cls(self, **kwargs): '''Get the container class from data type specification If no class has been associated with the ``data_type`` from ``namespace``, a class will be dynamically created and returned. ''' namespace, data_type = getargs('namespace', 'data_type', kwargs) cls = self.__get_container_cls(namespace, data_type) if cls is None: spec = self.__ns_catalog.get_spec(namespace, data_type) dt_hier = self.__ns_catalog.get_hierarchy(namespace, data_type) parent_cls = None for t in dt_hier: parent_cls = self.__get_container_cls(namespace, t) if parent_cls is not None: break if parent_cls is not None: bases = (parent_cls,) else: if isinstance(spec, GroupSpec): bases = (Container,) elif isinstance(spec, DatasetSpec): bases = (Data,) else: raise ValueError("Cannot generate class from %s" % type(spec)) parent_cls = bases[0] if type(parent_cls) is not ExtenderMeta: raise ValueError("parent class %s is not of type ExtenderMeta - %s" % (parent_cls, type(parent_cls))) name = data_type attr_names = self.__default_mapper_cls.get_attr_names(spec) fields = dict() for k, field_spec in attr_names.items(): if not spec.is_inherited_spec(field_spec): fields[k] = field_spec try: d = self.__get_cls_dict(parent_cls, fields, spec.name, spec.default_name) except TypeDoesNotExistError as e: name = spec.get('data_type_def', 'Unknown') raise ValueError("Cannot dynamically generate class for type '%s'. " % name + str(e) + " Please define that type before defining '%s'." % name) cls = ExtenderMeta(str(name), bases, d) self.register_container_type(namespace, data_type, cls) return cls def __get_container_cls(self, namespace, data_type): if namespace not in self.__container_types: return None if data_type not in self.__container_types[namespace]: return None ret = self.__container_types[namespace][data_type] if isinstance(ret, TypeSource): ret = self.__get_container_cls(ret.namespace, ret.data_type) if ret is not None: self.register_container_type(namespace, data_type, ret) return ret @docval({'name': 'obj', 'type': (GroupBuilder, DatasetBuilder, LinkBuilder, GroupSpec, DatasetSpec), 'doc': 'the object to get the type key for'}) def __type_key(self, obj): """ A wrapper function to simplify the process of getting a type_key for an object. The type_key is used to get the data_type from a Builder's attributes. """ if isinstance(obj, LinkBuilder): obj = obj.builder if isinstance(obj, (GroupBuilder, GroupSpec)): return self.__ns_catalog.group_spec_cls.type_key() else: return self.__ns_catalog.dataset_spec_cls.type_key() @docval({'name': 'builder', 'type': (DatasetBuilder, GroupBuilder, LinkBuilder), 'doc': 'the builder to get the data_type for'}) def get_builder_dt(self, **kwargs): ''' Get the data_type of a builder ''' builder = getargs('builder', kwargs) ret = None if isinstance(builder, LinkBuilder): builder = builder.builder if isinstance(builder, GroupBuilder): ret = builder.attributes.get(self.__ns_catalog.group_spec_cls.type_key()) else: ret = builder.attributes.get(self.__ns_catalog.dataset_spec_cls.type_key()) if isinstance(ret, bytes): ret = ret.decode('UTF-8') return ret @docval({'name': 'builder', 'type': (DatasetBuilder, GroupBuilder, LinkBuilder), 'doc': 'the builder to get the sub-specification for'}) def get_builder_ns(self, **kwargs): ''' Get the namespace of a builder ''' builder = getargs('builder', kwargs) if isinstance(builder, LinkBuilder): builder = builder.builder ret = builder.attributes.get('namespace') return ret @docval({'name': 'builder', 'type': Builder, 'doc': 'the Builder object to get the corresponding AbstractContainer class for'}) def get_cls(self, **kwargs): ''' Get the class object for the given Builder ''' builder = getargs('builder', kwargs) data_type = self.get_builder_dt(builder) if data_type is None: raise ValueError("No data_type found for builder %s" % builder.path) namespace = self.get_builder_ns(builder) if namespace is None: raise ValueError("No namespace found for builder %s" % builder.path) return self.get_container_cls(namespace, data_type) @docval({'name': 'spec', 'type': (DatasetSpec, GroupSpec), 'doc': 'the parent spec to search'}, {'name': 'builder', 'type': (DatasetBuilder, GroupBuilder, LinkBuilder), 'doc': 'the builder to get the sub-specification for'}) def get_subspec(self, **kwargs): ''' Get the specification from this spec that corresponds to the given builder ''' spec, builder = getargs('spec', 'builder', kwargs) if isinstance(builder, LinkBuilder): builder_type = type(builder.builder) else: builder_type = type(builder) if issubclass(builder_type, DatasetBuilder): subspec = spec.get_dataset(builder.name) else: subspec = spec.get_group(builder.name) if subspec is None: # builder was generated from something with a data_type and a wildcard name if isinstance(builder, LinkBuilder): dt = self.get_builder_dt(builder.builder) else: dt = self.get_builder_dt(builder) if dt is not None: ns = self.get_builder_ns(builder) hierarchy = self.__ns_catalog.get_hierarchy(ns, dt) for t in hierarchy: subspec = spec.get_data_type(t) if subspec is not None: break return subspec def get_container_ns_dt(self, obj): container_cls = obj.__class__ namespace, data_type = self.get_container_cls_dt(container_cls) return namespace, data_type def get_container_cls_dt(self, cls): def_ret = (None, None) for _cls in cls.__mro__: ret = self.__data_types.get(_cls, def_ret) if ret is not def_ret: return ret return ret @docval({'name': 'namespace', 'type': str, 'doc': 'the namespace to get the container classes for', 'default': None}) def get_container_classes(self, **kwargs): namespace = getargs('namespace', kwargs) ret = self.__data_types.keys() if namespace is not None: ret = filter(lambda x: self.__data_types[x][0] == namespace, ret) return list(ret) @docval({'name': 'obj', 'type': (AbstractContainer, Builder), 'doc': 'the object to get the ObjectMapper for'}, returns='the ObjectMapper to use for mapping the given object', rtype='ObjectMapper') def get_map(self, **kwargs): """ Return the ObjectMapper object that should be used for the given container """ obj = getargs('obj', kwargs) # get the container class, and namespace/data_type if isinstance(obj, AbstractContainer): container_cls = obj.__class__ namespace, data_type = self.get_container_cls_dt(container_cls) if namespace is None: raise ValueError("class %s is not mapped to a data_type" % container_cls) else: data_type = self.get_builder_dt(obj) namespace = self.get_builder_ns(obj) container_cls = self.get_cls(obj) # now build the ObjectMapper class mapper = self.__mappers.get(container_cls) if mapper is None: mapper_cls = self.__default_mapper_cls for cls in container_cls.__mro__: tmp_mapper_cls = self.__mapper_cls.get(cls) if tmp_mapper_cls is not None: mapper_cls = tmp_mapper_cls break spec = self.__ns_catalog.get_spec(namespace, data_type) mapper = mapper_cls(spec) self.__mappers[container_cls] = mapper return mapper @docval({"name": "namespace", "type": str, "doc": "the namespace containing the data_type to map the class to"}, {"name": "data_type", "type": str, "doc": "the data_type to map the class to"}, {"name": "container_cls", "type": (TypeSource, type), "doc": "the class to map to the specified data_type"}) def register_container_type(self, **kwargs): ''' Map a container class to a data_type ''' namespace, data_type, container_cls = getargs('namespace', 'data_type', 'container_cls', kwargs) spec = self.__ns_catalog.get_spec(namespace, data_type) # make sure the spec exists self.__container_types.setdefault(namespace, dict()) self.__container_types[namespace][data_type] = container_cls self.__data_types.setdefault(container_cls, (namespace, data_type)) setattr(container_cls, spec.type_key(), data_type) setattr(container_cls, 'namespace', namespace) @docval({"name": "container_cls", "type": type, "doc": "the AbstractContainer class for which the given ObjectMapper class gets used for"}, {"name": "mapper_cls", "type": type, "doc": "the ObjectMapper class to use to map"}) def register_map(self, **kwargs): ''' Map a container class to an ObjectMapper class ''' container_cls, mapper_cls = getargs('container_cls', 'mapper_cls', kwargs) if self.get_container_cls_dt(container_cls) == (None, None): raise ValueError('cannot register map for type %s - no data_type found' % container_cls) self.__mapper_cls[container_cls] = mapper_cls @docval({"name": "container", "type": AbstractContainer, "doc": "the container to convert to a Builder"}, {"name": "manager", "type": BuildManager, "doc": "the BuildManager to use for managing this build", 'default': None}, {"name": "source", "type": str, "doc": "the source of container being built i.e. file path", 'default': None}, {"name": "builder", "type": GroupBuilder, "doc": "the Builder to build on", 'default': None}, {"name": "spec_ext", "type": BaseStorageSpec, "doc": "a spec extension", 'default': None}) def build(self, **kwargs): """ Build the GroupBuilder for the given AbstractContainer""" container, manager, builder = getargs('container', 'manager', 'builder', kwargs) source, spec_ext = getargs('source', 'spec_ext', kwargs) # get the ObjectMapper to map between Spec objects and AbstractContainer attributes obj_mapper = self.get_map(container) if obj_mapper is None: raise ValueError('No ObjectMapper found for container of type %s' % str(container.__class__.__name__)) # convert the container to a builder using the ObjectMapper if manager is None: manager = BuildManager(self) builder = obj_mapper.build(container, manager, builder=builder, source=source, spec_ext=spec_ext) # add additional attributes (namespace, data_type, object_id) to builder namespace, data_type = self.get_container_ns_dt(container) builder.set_attribute('namespace', namespace) builder.set_attribute(self.__type_key(obj_mapper.spec), data_type) builder.set_attribute(obj_mapper.spec.id_key(), container.object_id) return builder @docval({'name': 'builder', 'type': (DatasetBuilder, GroupBuilder), 'doc': 'the builder to construct the AbstractContainer from'}, {'name': 'build_manager', 'type': BuildManager, 'doc': 'the BuildManager for constructing', 'default': None}, {'name': 'parent', 'type': (Proxy, Container), 'doc': 'the parent Container/Proxy for the Container being built', 'default': None}) def construct(self, **kwargs): """ Construct the AbstractContainer represented by the given builder """ builder, build_manager, parent = getargs('builder', 'build_manager', 'parent', kwargs) if build_manager is None: build_manager = BuildManager(self) obj_mapper = self.get_map(builder) if obj_mapper is None: dt = builder.attributes[self.namespace_catalog.group_spec_cls.type_key()] raise ValueError('No ObjectMapper found for builder of type %s' % dt) else: return obj_mapper.construct(builder, build_manager, parent) @docval({"name": "container", "type": AbstractContainer, "doc": "the container to convert to a Builder"}, returns='The name a Builder should be given when building this container', rtype=str) def get_builder_name(self, **kwargs): ''' Get the name a Builder should be given ''' container = getargs('container', kwargs) obj_mapper = self.get_map(container) if obj_mapper is None: raise ValueError('No ObjectMapper found for container of type %s' % str(container.__class__.__name__)) else: return obj_mapper.get_builder_name(container) class TypeDoesNotExistError(Exception): pass ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/build/map.py0000644000655200065520000000066000000000000017763 0ustar00circlecicircleci00000000000000# this prevents breaking of code that imports these classes directly from map.py from .manager import Proxy, BuildManager, TypeSource, TypeMap # noqa: F401 from .objectmapper import ObjectMapper # noqa: F401 import warnings warnings.warn(PendingDeprecationWarning('Classes in map.py should be imported from hdmf.build. Importing from ' 'hdmf.build.map is subject to change in HDMF 2.0.')) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/build/objectmapper.py0000644000655200065520000013205400000000000021664 0ustar00circlecicircleci00000000000000import re import numpy as np import warnings from collections import OrderedDict from copy import copy from datetime import datetime from ..utils import docval, getargs, ExtenderMeta, get_docval from ..container import AbstractContainer, Container, Data, DataRegion from ..spec import Spec, AttributeSpec, DatasetSpec, GroupSpec, LinkSpec, NAME_WILDCARD, RefSpec from ..data_utils import DataIO, AbstractDataChunkIterator from ..query import ReferenceResolver from ..spec.spec import BaseStorageSpec from .builders import DatasetBuilder, GroupBuilder, LinkBuilder, Builder, ReferenceBuilder, RegionBuilder from .manager import Proxy, BuildManager from .warnings import OrphanContainerWarning, MissingRequiredWarning _const_arg = '__constructor_arg' @docval({'name': 'name', 'type': str, 'doc': 'the name of the constructor argument'}, is_method=False) def _constructor_arg(**kwargs): '''Decorator to override the default mapping scheme for a given constructor argument. Decorate ObjectMapper methods with this function when extending ObjectMapper to override the default scheme for mapping between AbstractContainer and Builder objects. The decorated method should accept as its first argument the Builder object that is being mapped. The method should return the value to be passed to the target AbstractContainer class constructor argument given by *name*. ''' name = getargs('name', kwargs) def _dec(func): setattr(func, _const_arg, name) return func return _dec _obj_attr = '__object_attr' @docval({'name': 'name', 'type': str, 'doc': 'the name of the constructor argument'}, is_method=False) def _object_attr(**kwargs): '''Decorator to override the default mapping scheme for a given object attribute. Decorate ObjectMapper methods with this function when extending ObjectMapper to override the default scheme for mapping between AbstractContainer and Builder objects. The decorated method should accept as its first argument the AbstractContainer object that is being mapped. The method should return the child Builder object (or scalar if the object attribute corresponds to an AttributeSpec) that represents the attribute given by *name*. ''' name = getargs('name', kwargs) def _dec(func): setattr(func, _obj_attr, name) return func return _dec def _unicode(s): """ A helper function for converting to Unicode """ if isinstance(s, str): return s elif isinstance(s, bytes): return s.decode('utf-8') else: raise ValueError("Expected unicode or ascii string, got %s" % type(s)) def _ascii(s): """ A helper function for converting to ASCII """ if isinstance(s, str): return s.encode('ascii', 'backslashreplace') elif isinstance(s, bytes): return s else: raise ValueError("Expected unicode or ascii string, got %s" % type(s)) class ObjectMapper(metaclass=ExtenderMeta): '''A class for mapping between Spec objects and AbstractContainer attributes ''' __dtypes = { "float": np.float32, "float32": np.float32, "double": np.float64, "float64": np.float64, "long": np.int64, "int64": np.int64, "uint64": np.uint64, "int": np.int32, "int32": np.int32, "int16": np.int16, "int8": np.int8, "bool": np.bool_, "text": _unicode, "text": _unicode, "utf": _unicode, "utf8": _unicode, "utf-8": _unicode, "ascii": _ascii, "str": _ascii, "isodatetime": _ascii, "uint32": np.uint32, "uint16": np.uint16, "uint8": np.uint8, "uint": np.uint32 } __no_convert = set() @classmethod def __resolve_dtype(cls, given, specified): """ Determine the dtype to use from the dtype of the given value and the specified dtype. This amounts to determining the greater precision of the two arguments, but also checks to make sure the same base dtype is being used. """ g = np.dtype(given) s = np.dtype(specified) if g.itemsize <= s.itemsize: return s.type else: if g.name[:3] != s.name[:3]: # different types if s.itemsize < 8: msg = "expected %s, received %s - must supply %s or higher precision" % (s.name, g.name, s.name) else: msg = "expected %s, received %s - must supply %s" % (s.name, g.name, s.name) raise ValueError(msg) else: return g.type @classmethod def no_convert(cls, obj_type): """ Specify an object type that ObjectMappers should not convert. """ cls.__no_convert.add(obj_type) @classmethod def convert_dtype(cls, spec, value): """ Convert values to the specified dtype. For example, if a literal int is passed in to a field that is specified as a unsigned integer, this function will convert the Python int to a numpy unsigned int. :return: The function returns a tuple consisting of 1) the value, and 2) the data type. The value is returned as the function may convert the input value to comply with the dtype specified in the schema. """ ret, ret_dtype = cls.__check_edgecases(spec, value) if ret is not None or ret_dtype is not None: return ret, ret_dtype spec_dtype = cls.__dtypes[spec.dtype] if isinstance(value, np.ndarray): if spec_dtype is _unicode: ret = value.astype('U') ret_dtype = "utf8" elif spec_dtype is _ascii: ret = value.astype('S') ret_dtype = "ascii" else: dtype_func = cls.__resolve_dtype(value.dtype, spec_dtype) ret = np.asarray(value).astype(dtype_func) ret_dtype = ret.dtype.type elif isinstance(value, (tuple, list)): if len(value) == 0: return value, spec_dtype ret = list() for elem in value: tmp, tmp_dtype = cls.convert_dtype(spec, elem) ret.append(tmp) ret = type(value)(ret) ret_dtype = tmp_dtype elif isinstance(value, AbstractDataChunkIterator): ret = value ret_dtype = cls.__resolve_dtype(value.dtype, spec_dtype) else: if spec_dtype in (_unicode, _ascii): ret_dtype = 'ascii' if spec_dtype == _unicode: ret_dtype = 'utf8' ret = spec_dtype(value) else: dtype_func = cls.__resolve_dtype(type(value), spec_dtype) ret = dtype_func(value) ret_dtype = type(ret) return ret, ret_dtype @classmethod def __check_edgecases(cls, spec, value): """ Check edge cases in converting data to a dtype """ if value is None: dt = spec.dtype if isinstance(dt, RefSpec): dt = dt.reftype return None, dt if isinstance(spec.dtype, list): # compound dtype - Since the I/O layer needs to determine how to handle these, # return the list of DtypeSpecs return value, spec.dtype if isinstance(value, DataIO): return value, cls.convert_dtype(spec, value.data)[1] if spec.dtype is None or spec.dtype == 'numeric' or type(value) in cls.__no_convert: # infer type from value if hasattr(value, 'dtype'): # covers numpy types, AbstractDataChunkIterator return value, value.dtype.type if isinstance(value, (list, tuple)): if len(value) == 0: msg = "cannot infer dtype of empty list or tuple. Please use numpy array with specified dtype." raise ValueError(msg) return value, cls.__check_edgecases(spec, value[0])[1] # infer dtype from first element ret_dtype = type(value) if ret_dtype is str: ret_dtype = 'utf8' elif ret_dtype is bytes: ret_dtype = 'ascii' return value, ret_dtype if isinstance(spec.dtype, RefSpec): if not isinstance(value, ReferenceBuilder): msg = "got RefSpec for value of type %s" % type(value) raise ValueError(msg) return value, spec.dtype if spec.dtype is not None and spec.dtype not in cls.__dtypes: msg = "unrecognized dtype: %s -- cannot convert value" % spec.dtype raise ValueError(msg) return None, None _const_arg = '__constructor_arg' @staticmethod @docval({'name': 'name', 'type': str, 'doc': 'the name of the constructor argument'}, is_method=False) def constructor_arg(**kwargs): '''Decorator to override the default mapping scheme for a given constructor argument. Decorate ObjectMapper methods with this function when extending ObjectMapper to override the default scheme for mapping between AbstractContainer and Builder objects. The decorated method should accept as its first argument the Builder object that is being mapped. The method should return the value to be passed to the target AbstractContainer class constructor argument given by *name*. ''' name = getargs('name', kwargs) return _constructor_arg(name) _obj_attr = '__object_attr' @staticmethod @docval({'name': 'name', 'type': str, 'doc': 'the name of the constructor argument'}, is_method=False) def object_attr(**kwargs): '''Decorator to override the default mapping scheme for a given object attribute. Decorate ObjectMapper methods with this function when extending ObjectMapper to override the default scheme for mapping between AbstractContainer and Builder objects. The decorated method should accept as its first argument the AbstractContainer object that is being mapped. The method should return the child Builder object (or scalar if the object attribute corresponds to an AttributeSpec) that represents the attribute given by *name*. ''' name = getargs('name', kwargs) return _object_attr(name) @staticmethod def __is_attr(attr_val): return hasattr(attr_val, _obj_attr) @staticmethod def __get_obj_attr(attr_val): return getattr(attr_val, _obj_attr) @staticmethod def __is_constructor_arg(attr_val): return hasattr(attr_val, _const_arg) @staticmethod def __get_cargname(attr_val): return getattr(attr_val, _const_arg) @ExtenderMeta.post_init def __gather_procedures(cls, name, bases, classdict): if hasattr(cls, 'constructor_args'): cls.constructor_args = copy(cls.constructor_args) else: cls.constructor_args = dict() if hasattr(cls, 'obj_attrs'): cls.obj_attrs = copy(cls.obj_attrs) else: cls.obj_attrs = dict() for name, func in cls.__dict__.items(): if cls.__is_constructor_arg(func): cls.constructor_args[cls.__get_cargname(func)] = getattr(cls, name) elif cls.__is_attr(func): cls.obj_attrs[cls.__get_obj_attr(func)] = getattr(cls, name) @docval({'name': 'spec', 'type': (DatasetSpec, GroupSpec), 'doc': 'The specification for mapping objects to builders'}) def __init__(self, **kwargs): """ Create a map from AbstractContainer attributes to specifications """ spec = getargs('spec', kwargs) self.__spec = spec self.__data_type_key = spec.type_key() self.__spec2attr = dict() self.__attr2spec = dict() self.__spec2carg = dict() self.__carg2spec = dict() self.__map_spec(spec) @property def spec(self): ''' the Spec used in this ObjectMapper ''' return self.__spec @_constructor_arg('name') def get_container_name(self, *args): builder = args[0] return builder.name @classmethod @docval({'name': 'spec', 'type': Spec, 'doc': 'the specification to get the name for'}) def convert_dt_name(cls, **kwargs): '''Get the attribute name corresponding to a specification''' spec = getargs('spec', kwargs) if spec.data_type_def is not None: name = spec.data_type_def elif spec.data_type_inc is not None: name = spec.data_type_inc else: raise ValueError('found spec without name or data_type') s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name) name = re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower() if name[-1] != 's' and spec.is_many(): name += 's' return name @classmethod def __get_fields(cls, name_stack, all_names, spec): name = spec.name if spec.name is None: name = cls.convert_dt_name(spec) name_stack.append(name) name = '__'.join(name_stack) all_names[name] = spec if isinstance(spec, BaseStorageSpec): if not (spec.data_type_def is None and spec.data_type_inc is None): # don't get names for components in data_types name_stack.pop() return for subspec in spec.attributes: cls.__get_fields(name_stack, all_names, subspec) if isinstance(spec, GroupSpec): for subspec in spec.datasets: cls.__get_fields(name_stack, all_names, subspec) for subspec in spec.groups: cls.__get_fields(name_stack, all_names, subspec) for subspec in spec.links: cls.__get_fields(name_stack, all_names, subspec) name_stack.pop() @classmethod @docval({'name': 'spec', 'type': Spec, 'doc': 'the specification to get the object attribute names for'}) def get_attr_names(cls, **kwargs): '''Get the attribute names for each subspecification in a Spec''' spec = getargs('spec', kwargs) names = OrderedDict() for subspec in spec.attributes: cls.__get_fields(list(), names, subspec) if isinstance(spec, GroupSpec): for subspec in spec.groups: cls.__get_fields(list(), names, subspec) for subspec in spec.datasets: cls.__get_fields(list(), names, subspec) for subspec in spec.links: cls.__get_fields(list(), names, subspec) return names def __map_spec(self, spec): attr_names = self.get_attr_names(spec) for k, v in attr_names.items(): self.map_spec(k, v) @docval({"name": "attr_name", "type": str, "doc": "the name of the object to map"}, {"name": "spec", "type": Spec, "doc": "the spec to map the attribute to"}) def map_attr(self, **kwargs): """ Map an attribute to spec. Use this to override default behavior """ attr_name, spec = getargs('attr_name', 'spec', kwargs) self.__spec2attr[spec] = attr_name self.__attr2spec[attr_name] = spec @docval({"name": "attr_name", "type": str, "doc": "the name of the attribute"}) def get_attr_spec(self, **kwargs): """ Return the Spec for a given attribute """ attr_name = getargs('attr_name', kwargs) return self.__attr2spec.get(attr_name) @docval({"name": "carg_name", "type": str, "doc": "the name of the constructor argument"}) def get_carg_spec(self, **kwargs): """ Return the Spec for a given constructor argument """ carg_name = getargs('carg_name', kwargs) return self.__carg2spec.get(carg_name) @docval({"name": "const_arg", "type": str, "doc": "the name of the constructor argument to map"}, {"name": "spec", "type": Spec, "doc": "the spec to map the attribute to"}) def map_const_arg(self, **kwargs): """ Map an attribute to spec. Use this to override default behavior """ const_arg, spec = getargs('const_arg', 'spec', kwargs) self.__spec2carg[spec] = const_arg self.__carg2spec[const_arg] = spec @docval({"name": "spec", "type": Spec, "doc": "the spec to map the attribute to"}) def unmap(self, **kwargs): """ Removing any mapping for a specification. Use this to override default mapping """ spec = getargs('spec', kwargs) self.__spec2attr.pop(spec, None) self.__spec2carg.pop(spec, None) @docval({"name": "attr_carg", "type": str, "doc": "the constructor argument/object attribute to map this spec to"}, {"name": "spec", "type": Spec, "doc": "the spec to map the attribute to"}) def map_spec(self, **kwargs): """ Map the given specification to the construct argument and object attribute """ spec, attr_carg = getargs('spec', 'attr_carg', kwargs) self.map_const_arg(attr_carg, spec) self.map_attr(attr_carg, spec) def __get_override_carg(self, *args): name = args[0] remaining_args = tuple(args[1:]) if name in self.constructor_args: func = self.constructor_args[name] return func(self, *remaining_args) return None def __get_override_attr(self, name, container, manager): if name in self.obj_attrs: func = self.obj_attrs[name] return func(self, container, manager) return None @docval({"name": "spec", "type": Spec, "doc": "the spec to get the attribute for"}, returns='the attribute name', rtype=str) def get_attribute(self, **kwargs): ''' Get the object attribute name for the given Spec ''' spec = getargs('spec', kwargs) val = self.__spec2attr.get(spec, None) return val @docval({"name": "spec", "type": Spec, "doc": "the spec to get the attribute value for"}, {"name": "container", "type": AbstractContainer, "doc": "the container to get the attribute value from"}, {"name": "manager", "type": BuildManager, "doc": "the BuildManager used for managing this build"}, returns='the value of the attribute') def get_attr_value(self, **kwargs): ''' Get the value of the attribute corresponding to this spec from the given container ''' spec, container, manager = getargs('spec', 'container', 'manager', kwargs) attr_name = self.get_attribute(spec) if attr_name is None: return None attr_val = self.__get_override_attr(attr_name, container, manager) if attr_val is None: try: attr_val = getattr(container, attr_name) except AttributeError: # raise error if an expected attribute (based on the spec) does not exist on a Container object msg = "Container '%s' (%s) does not have attribute '%s'" % (container.name, type(container), attr_name) raise Exception(msg) if attr_val is not None: attr_val = self.__convert_value(attr_val, spec) # else: attr_val is an attribute on the Container and its value is None return attr_val def __convert_value(self, value, spec): """ Convert string types to the specified dtype """ ret = value if isinstance(spec, AttributeSpec): if 'text' in spec.dtype: if spec.shape is not None or spec.dims is not None: ret = list(map(str, value)) else: ret = str(value) elif isinstance(spec, DatasetSpec): # TODO: make sure we can handle specs with data_type_inc set if spec.data_type_inc is not None: ret = value else: if spec.dtype is not None: string_type = None if 'text' in spec.dtype: string_type = str elif 'ascii' in spec.dtype: string_type = bytes elif 'isodatetime' in spec.dtype: string_type = datetime.isoformat if string_type is not None: if spec.shape is not None or spec.dims is not None: ret = list(map(string_type, value)) else: ret = string_type(value) # copy over any I/O parameters if they were specified if isinstance(value, DataIO): params = value.get_io_params() params['data'] = ret ret = value.__class__(**params) return ret @docval({"name": "spec", "type": Spec, "doc": "the spec to get the constructor argument for"}, returns="the name of the constructor argument", rtype=str) def get_const_arg(self, **kwargs): ''' Get the constructor argument for the given Spec ''' spec = getargs('spec', kwargs) return self.__spec2carg.get(spec, None) @docval({"name": "container", "type": AbstractContainer, "doc": "the container to convert to a Builder"}, {"name": "manager", "type": BuildManager, "doc": "the BuildManager to use for managing this build"}, {"name": "parent", "type": Builder, "doc": "the parent of the resulting Builder", 'default': None}, {"name": "source", "type": str, "doc": "the source of container being built i.e. file path", 'default': None}, {"name": "builder", "type": GroupBuilder, "doc": "the Builder to build on", 'default': None}, {"name": "spec_ext", "type": BaseStorageSpec, "doc": "a spec extension", 'default': None}, returns="the Builder representing the given AbstractContainer", rtype=Builder) def build(self, **kwargs): ''' Convert a AbstractContainer to a Builder representation ''' container, manager, parent, source = getargs('container', 'manager', 'parent', 'source', kwargs) spec_ext = getargs('spec_ext', kwargs) builder = getargs('builder', kwargs) name = manager.get_builder_name(container) if isinstance(self.__spec, GroupSpec): if builder is None: builder = GroupBuilder(name, parent=parent, source=source) self.__add_datasets(builder, self.__spec.datasets, container, manager, source) self.__add_groups(builder, self.__spec.groups, container, manager, source) self.__add_links(builder, self.__spec.links, container, manager, source) else: if not isinstance(container, Data): msg = "'container' must be of type Data with DatasetSpec" raise ValueError(msg) spec_dtype, spec_shape, spec = self.__check_dset_spec(self.spec, spec_ext) if isinstance(spec_dtype, RefSpec): # a dataset of references bldr_data = self.__get_ref_builder(spec_dtype, spec_shape, container, manager, source=source) builder = DatasetBuilder(name, bldr_data, parent=parent, source=source, dtype=spec_dtype.reftype) elif isinstance(spec_dtype, list): # a compound dataset # # check for any references in the compound dtype, and # convert them if necessary refs = [(i, subt) for i, subt in enumerate(spec_dtype) if isinstance(subt.dtype, RefSpec)] bldr_data = copy(container.data) bldr_data = list() for i, row in enumerate(container.data): tmp = list(row) for j, subt in refs: tmp[j] = self.__get_ref_builder(subt.dtype, None, row[j], manager, source=source) bldr_data.append(tuple(tmp)) try: bldr_data, dtype = self.convert_dtype(spec, bldr_data) except Exception as ex: msg = 'could not resolve dtype for %s \'%s\'' % (type(container).__name__, container.name) raise Exception(msg) from ex builder = DatasetBuilder(name, bldr_data, parent=parent, source=source, dtype=dtype) else: # a regular dtype if spec_dtype is None and self.__is_reftype(container.data): # an unspecified dtype and we were given references bldr_data = list() for d in container.data: if d is None: bldr_data.append(None) else: bldr_data.append(ReferenceBuilder(manager.build(d, source=source))) builder = DatasetBuilder(name, bldr_data, parent=parent, source=source, dtype='object') else: # a dataset that has no references, pass the donversion off to # the convert_dtype method try: bldr_data, dtype = self.convert_dtype(spec, container.data) except Exception as ex: msg = 'could not resolve dtype for %s \'%s\'' % (type(container).__name__, container.name) raise Exception(msg) from ex builder = DatasetBuilder(name, bldr_data, parent=parent, source=source, dtype=dtype) self.__add_attributes(builder, self.__spec.attributes, container, manager, source) return builder def __check_dset_spec(self, orig, ext): """ Check a dataset spec against a refining spec to see which dtype and shape should be used """ dtype = orig.dtype shape = orig.shape spec = orig if ext is not None: if ext.dtype is not None: dtype = ext.dtype if ext.shape is not None: shape = ext.shape spec = ext return dtype, shape, spec def __is_reftype(self, data): tmp = data while hasattr(tmp, '__len__') and not isinstance(tmp, (AbstractContainer, str, bytes)): tmptmp = None for t in tmp: # In case of a numeric array stop the iteration at the first element to avoid long-running loop if isinstance(t, (int, float, complex, bool)): break if hasattr(t, '__len__') and len(t) > 0 and not isinstance(t, (AbstractContainer, str, bytes)): tmptmp = tmp[0] break if tmptmp is not None: break else: if len(tmp) == 0: tmp = None else: tmp = tmp[0] if isinstance(tmp, AbstractContainer): return True else: return False def __get_ref_builder(self, dtype, shape, container, manager, source): bldr_data = None if dtype.is_region(): if shape is None: if not isinstance(container, DataRegion): msg = "'container' must be of type DataRegion if spec represents region reference" raise ValueError(msg) bldr_data = RegionBuilder(container.region, manager.build(container.data, source=source)) else: bldr_data = list() for d in container.data: bldr_data.append(RegionBuilder(d.slice, manager.build(d.target, source=source))) else: if isinstance(container, Data): bldr_data = list() if self.__is_reftype(container.data): for d in container.data: bldr_data.append(ReferenceBuilder(manager.build(d, source=source))) else: bldr_data = ReferenceBuilder(manager.build(container, source=source)) return bldr_data def __is_null(self, item): if item is None: return True else: if any(isinstance(item, t) for t in (list, tuple, dict, set)): return len(item) == 0 return False def __add_attributes(self, builder, attributes, container, build_manager, source): for spec in attributes: if spec.value is not None: attr_value = spec.value else: attr_value = self.get_attr_value(spec, container, build_manager) if attr_value is None: attr_value = spec.default_value attr_value = self.__check_ref_resolver(attr_value) if isinstance(spec.dtype, RefSpec): if not self.__is_reftype(attr_value): if attr_value is None: msg = "object of data_type %s not found on %s '%s'" % \ (spec.dtype.target_type, type(container).__name__, container.name) else: msg = "invalid type for reference '%s' (%s) - "\ "must be AbstractContainer" % (spec.name, type(attr_value)) raise ValueError(msg) target_builder = build_manager.build(attr_value, source=source) attr_value = ReferenceBuilder(target_builder) else: if attr_value is not None: try: attr_value, attr_dtype = self.convert_dtype(spec, attr_value) except Exception as ex: msg = 'could not convert %s for %s %s' % (spec.name, type(container).__name__, container.name) raise Exception(msg) from ex # do not write empty or null valued objects if attr_value is None: if spec.required: msg = "attribute '%s' for '%s' (%s)"\ % (spec.name, builder.name, self.spec.data_type_def) warnings.warn(msg, MissingRequiredWarning) continue builder.set_attribute(spec.name, attr_value) def __add_links(self, builder, links, container, build_manager, source): for spec in links: attr_value = self.get_attr_value(spec, container, build_manager) if not attr_value: continue self.__add_containers(builder, spec, attr_value, build_manager, source, container) def __add_datasets(self, builder, datasets, container, build_manager, source): for spec in datasets: attr_value = self.get_attr_value(spec, container, build_manager) if attr_value is None: continue attr_value = self.__check_ref_resolver(attr_value) if isinstance(attr_value, DataIO) and attr_value.data is None: continue if isinstance(attr_value, Builder): builder.set_builder(attr_value) elif spec.data_type_def is None and spec.data_type_inc is None: if spec.name in builder.datasets: sub_builder = builder.datasets[spec.name] else: try: data, dtype = self.convert_dtype(spec, attr_value) except Exception as ex: msg = 'could not convert \'%s\' for %s \'%s\'' msg = msg % (spec.name, type(container).__name__, container.name) raise Exception(msg) from ex sub_builder = builder.add_dataset(spec.name, data, dtype=dtype) self.__add_attributes(sub_builder, spec.attributes, container, build_manager, source) else: self.__add_containers(builder, spec, attr_value, build_manager, source, container) def __add_groups(self, builder, groups, container, build_manager, source): for spec in groups: if spec.data_type_def is None and spec.data_type_inc is None: # we don't need to get attr_name since any named # group does not have the concept of value sub_builder = builder.groups.get(spec.name) if sub_builder is None: sub_builder = GroupBuilder(spec.name, source=source) self.__add_attributes(sub_builder, spec.attributes, container, build_manager, source) self.__add_datasets(sub_builder, spec.datasets, container, build_manager, source) # handle subgroups that are not Containers attr_name = self.get_attribute(spec) if attr_name is not None: attr_value = self.get_attr_value(spec, container, build_manager) if any(isinstance(attr_value, t) for t in (list, tuple, set, dict)): it = iter(attr_value) if isinstance(attr_value, dict): it = iter(attr_value.values()) for item in it: if isinstance(item, Container): self.__add_containers(sub_builder, spec, item, build_manager, source, container) self.__add_groups(sub_builder, spec.groups, container, build_manager, source) empty = sub_builder.is_empty() if not empty or (empty and isinstance(spec.quantity, int)): if sub_builder.name not in builder.groups: builder.set_group(sub_builder) else: if spec.data_type_def is not None: attr_name = self.get_attribute(spec) if attr_name is not None: attr_value = getattr(container, attr_name, None) if attr_value is not None: self.__add_containers(builder, spec, attr_value, build_manager, source, container) else: attr_name = self.get_attribute(spec) attr_value = self.get_attr_value(spec, container, build_manager) if attr_value is not None: self.__add_containers(builder, spec, attr_value, build_manager, source, container) def __add_containers(self, builder, spec, value, build_manager, source, parent_container): if isinstance(value, AbstractContainer): if value.parent is None: msg = "'%s' (%s) for '%s' (%s)"\ % (value.name, getattr(value, self.spec.type_key()), builder.name, self.spec.data_type_def) warnings.warn(msg, OrphanContainerWarning) if value.modified: # writing a new container if isinstance(spec, BaseStorageSpec): rendered_obj = build_manager.build(value, source=source, spec_ext=spec) else: rendered_obj = build_manager.build(value, source=source) # use spec to determine what kind of HDF5 # object this AbstractContainer corresponds to if isinstance(spec, LinkSpec) or value.parent is not parent_container: name = spec.name builder.set_link(LinkBuilder(rendered_obj, name, builder)) elif isinstance(spec, DatasetSpec): if rendered_obj.dtype is None and spec.dtype is not None: val, dtype = self.convert_dtype(spec, rendered_obj.data) rendered_obj.dtype = dtype builder.set_dataset(rendered_obj) else: builder.set_group(rendered_obj) elif value.container_source: # make a link to an existing container if value.container_source != parent_container.container_source or\ value.parent is not parent_container: if isinstance(spec, BaseStorageSpec): rendered_obj = build_manager.build(value, source=source, spec_ext=spec) else: rendered_obj = build_manager.build(value, source=source) builder.set_link(LinkBuilder(rendered_obj, name=spec.name, parent=builder)) else: raise ValueError("Found unmodified AbstractContainer with no source - '%s' with parent '%s'" % (value.name, parent_container.name)) else: if any(isinstance(value, t) for t in (list, tuple)): values = value elif isinstance(value, dict): values = value.values() else: msg = ("received %s, expected AbstractContainer - 'value' " "must be an AbstractContainer a list/tuple/dict of " "AbstractContainers if 'spec' is a GroupSpec") raise ValueError(msg % value.__class__.__name__) for container in values: if container: self.__add_containers(builder, spec, container, build_manager, source, parent_container) def __get_subspec_values(self, builder, spec, manager): ret = dict() # First get attributes attributes = builder.attributes for attr_spec in spec.attributes: attr_val = attributes.get(attr_spec.name) if attr_val is None: continue if isinstance(attr_val, (GroupBuilder, DatasetBuilder)): ret[attr_spec] = manager.construct(attr_val) elif isinstance(attr_val, RegionBuilder): raise ValueError("RegionReferences as attributes is not yet supported") elif isinstance(attr_val, ReferenceBuilder): ret[attr_spec] = manager.construct(attr_val.builder) else: ret[attr_spec] = attr_val if isinstance(spec, GroupSpec): if not isinstance(builder, GroupBuilder): raise ValueError("__get_subspec_values - must pass GroupBuilder with GroupSpec") # first aggregate links by data type and separate them # by group and dataset groups = dict(builder.groups) # make a copy so we can separate links datasets = dict(builder.datasets) # make a copy so we can separate links links = builder.links link_dt = dict() for link_builder in links.values(): target = link_builder.builder if isinstance(target, DatasetBuilder): datasets[link_builder.name] = target else: groups[link_builder.name] = target dt = manager.get_builder_dt(target) if dt is not None: link_dt.setdefault(dt, list()).append(target) # now assign links to their respective specification for subspec in spec.links: if subspec.name is not None and subspec.name in links: ret[subspec] = manager.construct(links[subspec.name].builder) else: sub_builder = link_dt.get(subspec.target_type) if sub_builder is not None: ret[subspec] = self.__flatten(sub_builder, subspec, manager) # now process groups and datasets self.__get_sub_builders(groups, spec.groups, manager, ret) self.__get_sub_builders(datasets, spec.datasets, manager, ret) elif isinstance(spec, DatasetSpec): if not isinstance(builder, DatasetBuilder): raise ValueError("__get_subspec_values - must pass DatasetBuilder with DatasetSpec") if spec.shape is None and getattr(builder.data, 'shape', None) == (1, ): # if a scalar dataset is expected and a 1-element dataset is given, then read the dataset builder['data'] = builder.data[0] # use dictionary reference instead of .data to bypass error ret[spec] = self.__check_ref_resolver(builder.data) return ret @staticmethod def __check_ref_resolver(data): """ Check if this dataset is a reference resolver, and invert it if so. """ if isinstance(data, ReferenceResolver): return data.invert() return data def __get_sub_builders(self, sub_builders, subspecs, manager, ret): # index builders by data_type builder_dt = dict() for g in sub_builders.values(): dt = manager.get_builder_dt(g) ns = manager.get_builder_ns(g) if dt is None or ns is None: continue for parent_dt in manager.namespace_catalog.get_hierarchy(ns, dt): builder_dt.setdefault(parent_dt, list()).append(g) for subspec in subspecs: # first get data type for the spec if subspec.data_type_def is not None: dt = subspec.data_type_def elif subspec.data_type_inc is not None: dt = subspec.data_type_inc else: dt = None # use name if we can, otherwise use data_data if subspec.name is None: sub_builder = builder_dt.get(dt) if sub_builder is not None: sub_builder = self.__flatten(sub_builder, subspec, manager) ret[subspec] = sub_builder else: sub_builder = sub_builders.get(subspec.name) if sub_builder is None: continue if dt is None: # recurse ret.update(self.__get_subspec_values(sub_builder, subspec, manager)) else: ret[subspec] = manager.construct(sub_builder) def __flatten(self, sub_builder, subspec, manager): tmp = [manager.construct(b) for b in sub_builder] if len(tmp) == 1 and not subspec.is_many(): tmp = tmp[0] return tmp @docval({'name': 'builder', 'type': (DatasetBuilder, GroupBuilder), 'doc': 'the builder to construct the AbstractContainer from'}, {'name': 'manager', 'type': BuildManager, 'doc': 'the BuildManager for this build'}, {'name': 'parent', 'type': (Proxy, AbstractContainer), 'doc': 'the parent AbstractContainer/Proxy for the AbstractContainer being built', 'default': None}) def construct(self, **kwargs): ''' Construct an AbstractContainer from the given Builder ''' builder, manager, parent = getargs('builder', 'manager', 'parent', kwargs) cls = manager.get_cls(builder) # gather all subspecs subspecs = self.__get_subspec_values(builder, self.spec, manager) # get the constructor argument that each specification corresponds to const_args = dict() # For Data container classes, we need to populate the data constructor argument since # there is no sub-specification that maps to that argument under the default logic if issubclass(cls, Data): if not isinstance(builder, DatasetBuilder): raise ValueError('Can only construct a Data object from a DatasetBuilder - got %s' % type(builder)) const_args['data'] = self.__check_ref_resolver(builder.data) for subspec, value in subspecs.items(): const_arg = self.get_const_arg(subspec) if const_arg is not None: if isinstance(subspec, BaseStorageSpec) and subspec.is_many(): existing_value = const_args.get(const_arg) if isinstance(existing_value, list): value = existing_value + value const_args[const_arg] = value # build kwargs for the constructor kwargs = dict() for const_arg in get_docval(cls.__init__): argname = const_arg['name'] override = self.__get_override_carg(argname, builder, manager) if override is not None: val = override elif argname in const_args: val = const_args[argname] else: continue kwargs[argname] = val try: obj = cls.__new__(cls, container_source=builder.source, parent=parent, object_id=builder.attributes.get(self.__spec.id_key())) obj.__init__(**kwargs) except Exception as ex: msg = 'Could not construct %s object due to %s' % (cls.__name__, ex) raise Exception(msg) from ex return obj @docval({'name': 'container', 'type': AbstractContainer, 'doc': 'the AbstractContainer to get the Builder name for'}) def get_builder_name(self, **kwargs): '''Get the name of a Builder that represents a AbstractContainer''' container = getargs('container', kwargs) if self.__spec.name not in (NAME_WILDCARD, None): ret = self.__spec.name else: if container.name is None: if self.__spec.default_name is not None: ret = self.__spec.default_name else: msg = 'Unable to determine name of container type %s' % self.__spec.data_type_def raise ValueError(msg) else: ret = container.name return ret ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/build/warnings.py0000644000655200065520000000047000000000000021035 0ustar00circlecicircleci00000000000000class OrphanContainerWarning(UserWarning): """ Raised when a container does not have a parent. Only the top level container (e.g. file) should be without a parent """ pass class MissingRequiredWarning(UserWarning): """ Raised when a required field is missing. """ pass ././@PaxHeader0000000000000000000000000000003300000000000011451 xustar000000000000000027 mtime=1579654747.916188 hdmf-1.5.4/src/hdmf/common/0000755000655200065520000000000000000000000017023 5ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/common/__init__.py0000644000655200065520000001654700000000000021151 0ustar00circlecicircleci00000000000000'''This package will contain functions, classes, and objects for reading and writing data in according to the HDMF-common specification ''' import os.path from copy import deepcopy CORE_NAMESPACE = 'hdmf-common' from ..spec import NamespaceCatalog # noqa: E402 from ..utils import docval, getargs, call_docval_func # noqa: E402 from ..backends.io import HDMFIO # noqa: E402 from ..validate import ValidatorMap # noqa: E402 from ..build import BuildManager, TypeMap # noqa: E402 # a global type map global __TYPE_MAP # a function to register a container classes with the global map @docval({'name': 'data_type', 'type': str, 'doc': 'the data_type to get the spec for'}, {'name': 'namespace', 'type': str, 'doc': 'the name of the namespace', 'default': CORE_NAMESPACE}, {"name": "container_cls", "type": type, "doc": "the class to map to the specified data_type", 'default': None}, is_method=False) def register_class(**kwargs): """Register an Container class to use for reading and writing a data_type from a specification If container_cls is not specified, returns a decorator for registering an Container subclass as the class for data_type in namespace. """ data_type, namespace, container_cls = getargs('data_type', 'namespace', 'container_cls', kwargs) def _dec(cls): __TYPE_MAP.register_container_type(namespace, data_type, cls) return cls if container_cls is None: return _dec else: _dec(container_cls) # a function to register an object mapper for a container class @docval({"name": "container_cls", "type": type, "doc": "the Container class for which the given ObjectMapper class gets used for"}, {"name": "mapper_cls", "type": type, "doc": "the ObjectMapper class to use to map", 'default': None}, is_method=False) def register_map(**kwargs): """Register an ObjectMapper to use for a Container class type If mapper_cls is not specified, returns a decorator for registering an ObjectMapper class as the mapper for container_cls. If mapper_cls specified, register the class as the mapper for container_cls """ container_cls, mapper_cls = getargs('container_cls', 'mapper_cls', kwargs) def _dec(cls): __TYPE_MAP.register_map(container_cls, cls) return cls if mapper_cls is None: return _dec else: _dec(mapper_cls) def __get_resources(): from pkg_resources import resource_filename from os.path import join __core_ns_file_name = 'namespace.yaml' ret = dict() ret['namespace_path'] = join(resource_filename(__name__, 'hdmf-common-schema/common'), __core_ns_file_name) return ret def _get_resources(): # LEGACY: Needed to support legacy implementation. return __get_resources() @docval({'name': 'namespace_path', 'type': str, 'doc': 'the path to the YAML with the namespace definition'}, returns="the namespaces loaded from the given file", rtype=tuple, is_method=False) def load_namespaces(**kwargs): ''' Load namespaces from file ''' namespace_path = getargs('namespace_path', kwargs) return __TYPE_MAP.load_namespaces(namespace_path) def available_namespaces(): return __TYPE_MAP.namespace_catalog.namespaces # load the hdmf-common namespace __resources = __get_resources() if os.path.exists(__resources['namespace_path']): __TYPE_MAP = TypeMap(NamespaceCatalog()) load_namespaces(__resources['namespace_path']) # import these so the TypeMap gets populated from . import io as __io # noqa: F401,E402 from . import table # noqa: F401,E402 from . import sparse # noqa: F401,E402 from .. import Data, Container __TYPE_MAP.register_container_type(CORE_NAMESPACE, 'Container', Container) __TYPE_MAP.register_container_type(CORE_NAMESPACE, 'Data', Data) else: raise RuntimeError("Unable to load a TypeMap - no namespace file found") DynamicTable = __TYPE_MAP.get_container_cls(CORE_NAMESPACE, 'DynamicTable') VectorData = __TYPE_MAP.get_container_cls(CORE_NAMESPACE, 'VectorData') VectorIndex = __TYPE_MAP.get_container_cls(CORE_NAMESPACE, 'VectorIndex') ElementIdentifiers = __TYPE_MAP.get_container_cls(CORE_NAMESPACE, 'ElementIdentifiers') DynamicTableRegion = __TYPE_MAP.get_container_cls(CORE_NAMESPACE, 'DynamicTableRegion') CSRMatrix = __TYPE_MAP.get_container_cls(CORE_NAMESPACE, 'CSRMatrix') @docval({'name': 'extensions', 'type': (str, TypeMap, list), 'doc': 'a path to a namespace, a TypeMap, or a list consisting paths to namespaces and TypeMaps', 'default': None}, returns="the namespaces loaded from the given file", rtype=tuple, is_method=False) def get_type_map(**kwargs): ''' Get a BuildManager to use for I/O using the given extensions. If no extensions are provided, return a BuildManager that uses the core namespace ''' extensions = getargs('extensions', kwargs) type_map = None if extensions is None: type_map = deepcopy(__TYPE_MAP) else: if isinstance(extensions, TypeMap): type_map = extensions else: type_map = deepcopy(__TYPE_MAP) if isinstance(extensions, list): for ext in extensions: if isinstance(ext, str): type_map.load_namespaces(ext) elif isinstance(ext, TypeMap): type_map.merge(ext) else: msg = 'extensions must be a list of paths to namespace specs or a TypeMaps' raise ValueError(msg) elif isinstance(extensions, str): type_map.load_namespaces(extensions) elif isinstance(extensions, TypeMap): type_map.merge(extensions) return type_map @docval({'name': 'extensions', 'type': (str, TypeMap, list), 'doc': 'a path to a namespace, a TypeMap, or a list consisting paths to namespaces and TypeMaps', 'default': None}, returns="the namespaces loaded from the given file", rtype=tuple, is_method=False) def get_manager(**kwargs): ''' Get a BuildManager to use for I/O using the given extensions. If no extensions are provided, return a BuildManager that uses the core namespace ''' type_map = call_docval_func(get_type_map, kwargs) return BuildManager(type_map) # a function to get the container class for a give type @docval({'name': 'data_type', 'type': str, 'doc': 'the data_type to get the Container class for'}, {'name': 'namespace', 'type': str, 'doc': 'the namespace the data_type is defined in'}, is_method=False) def get_class(**kwargs): """Get the class object of the Container subclass corresponding to a given neurdata_type. """ data_type, namespace = getargs('data_type', 'namespace', kwargs) return __TYPE_MAP.get_container_cls(namespace, data_type) @docval({'name': 'io', 'type': HDMFIO, 'doc': 'the HDMFIO object to read from'}, {'name': 'namespace', 'type': str, 'doc': 'the namespace to validate against', 'default': CORE_NAMESPACE}, returns="errors in the file", rtype=list, is_method=False) def validate(**kwargs): """Validate an file against a namespace""" io, namespace = getargs('io', 'namespace', kwargs) builder = io.read_builder() validator = ValidatorMap(io.manager.namespace_catalog.get_namespace(name=namespace)) return validator.validate(builder) ././@PaxHeader0000000000000000000000000000003300000000000011451 xustar000000000000000027 mtime=1579654747.908188 hdmf-1.5.4/src/hdmf/common/hdmf-common-schema/0000755000655200065520000000000000000000000022465 5ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000003300000000000011451 xustar000000000000000027 mtime=1579654747.916188 hdmf-1.5.4/src/hdmf/common/hdmf-common-schema/common/0000755000655200065520000000000000000000000023755 5ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654658.0 hdmf-1.5.4/src/hdmf/common/hdmf-common-schema/common/namespace.yaml0000644000655200065520000000072700000000000026603 0ustar00circlecicircleci00000000000000namespaces: - name: hdmf-common doc: Common data structures provided by HDMF author: - Andrew Tritt - Oliver Ruebel - Ryan Ly - Ben Dichter contact: - ajtritt@lbl.gov - oruebel@lbl.gov - rly@lbl.gov - bdichter@lbl.gov full_name: HDMF Common schema: - doc: data types for a column-based table source: table.yaml title: Table data types - doc: data types for different types of sparse matrices source: sparse.yaml version: 1.1.3 ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654658.0 hdmf-1.5.4/src/hdmf/common/hdmf-common-schema/common/sparse.yaml0000644000655200065520000000063500000000000026142 0ustar00circlecicircleci00000000000000groups: - data_type_def: CSRMatrix doc: a compressed sparse row matrix attributes: - name: shape dtype: int shape: - 2 doc: the shape of this sparse matrix datasets: - name: indices dtype: int shape: - null doc: column indices - name: indptr dtype: int shape: - null doc: index pointer - name: data shape: - null doc: values in the matrix ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654658.0 hdmf-1.5.4/src/hdmf/common/hdmf-common-schema/common/table.yaml0000644000655200065520000001306700000000000025737 0ustar00circlecicircleci00000000000000datasets: - data_type_def: Data doc: An abstract data type for a dataset. - data_type_def: Index data_type_inc: Data doc: Pointers that index data values. attributes: - name: target dtype: target_type: Data reftype: object doc: Target dataset that this index applies to. - data_type_def: VectorData data_type_inc: Data doc: An n-dimensional dataset representing a column of a DynamicTable. If used without an accompanying VectorIndex, first dimension is along the rows of the DynamicTable and each step along the first dimension is a cell of the larger table. VectorData can also be used to represent a ragged array if paired with a VectorIndex. This allows for storing arrays of varying length in a single cell of the DynamicTable by indexing into this VectorData. The first vector is at VectorData[0:VectorIndex(0)+1]. The second vector is at VectorData[VectorIndex(0)+1:VectorIndex(1)+1], and so on. dims: - - dim0 - - dim0 - dim1 - - dim0 - dim1 - dim2 - - dim0 - dim1 - dim2 - dim3 shape: - - null - - null - null - - null - null - null - - null - null - null - null attributes: - name: description dtype: text doc: Description of what these vectors represent. - data_type_def: VectorIndex data_type_inc: Index doc: Used with VectorData to encode a ragged array. An array of indices into the first dimension of the target VectorData, and forming a map between the rows of a DynamicTable and the indices of the VectorData. dims: - num_rows shape: - null attributes: - name: target dtype: target_type: VectorData reftype: object doc: Reference to the target dataset that this index applies to. - data_type_def: ElementIdentifiers data_type_inc: Data default_name: element_id dtype: int dims: - num_elements shape: - null doc: A list of unique identifiers for values within a dataset, e.g. rows of a DynamicTable. - data_type_def: DynamicTableRegion data_type_inc: VectorData dtype: int doc: DynamicTableRegion provides a link from one table to an index or region of another. The `table` attribute is a link to another `DynamicTable`, indicating which table is referenced, and the data is int(s) indicating the row(s) (0-indexed) of the target array. `DynamicTableRegion`s can be used to associate rows with repeated meta-data without data duplication. They can also be used to create hierarchical relationships between multiple `DynamicTable`s. `DynamicTableRegion` objects may be paired with a `VectorIndex` object to create ragged references, so a single cell of a `DynamicTable` can reference many rows of another `DynamicTable`. dims: - num_rows shape: - null attributes: - name: table dtype: target_type: DynamicTable reftype: object doc: Reference to the DynamicTable object that this region applies to. - name: description dtype: text doc: Description of what this table region points to. groups: - data_type_def: Container doc: An abstract data type for a generic container storing collections of data and metadata. Base type for all data and metadata containers. - data_type_def: DynamicTable data_type_inc: Container doc: A group containing multiple datasets that are aligned on the first dimension (Currently, this requirement if left up to APIs to check and enforce). Apart from a column that contains unique identifiers for each row there are no other required datasets. Users are free to add any number of VectorData objects here. Table functionality is already supported through compound types, which is analogous to storing an array-of-structs. DynamicTable can be thought of as a struct-of-arrays. This provides an alternative structure to choose from when optimizing storage for anticipated access patterns. Additionally, this type provides a way of creating a table without having to define a compound type up front. Although this convenience may be attractive, users should think carefully about how data will be accessed. DynamicTable is more appropriate for column-centric access, whereas a dataset with a compound type would be more appropriate for row-centric access. Finally, data size should also be taken into account. For small tables, performance loss may be an acceptable trade-off for the flexibility of a DynamicTable. For example, DynamicTable was originally developed for storing trial data and spike unit metadata. Both of these use cases are expected to produce relatively small tables, so the spatial locality of multiple datasets present in a DynamicTable is not expected to have a significant performance impact. Additionally, requirements of trial and unit metadata tables are sufficiently diverse that performance implications can be overlooked in favor of usability. attributes: - name: colnames dtype: text dims: - num_columns shape: - null doc: The names of the columns in this table. This should be used to specify an order to the columns. - name: description dtype: text doc: Description of what is in this dynamic table. datasets: - name: id data_type_inc: ElementIdentifiers dtype: int dims: - num_rows shape: - null doc: Array of unique identifiers for the rows of this dynamic table. - data_type_inc: VectorData doc: Vector columns of this dynamic table. quantity: '*' - data_type_inc: VectorIndex doc: Indices for the vector columns of this dynamic table. quantity: '*' ././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1579654747.9201882 hdmf-1.5.4/src/hdmf/common/io/0000755000655200065520000000000000000000000017432 5ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/common/io/__init__.py0000644000655200065520000000004200000000000021537 0ustar00circlecicircleci00000000000000from . import table # noqa: F401 ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/common/io/table.py0000644000655200065520000000440300000000000021074 0ustar00circlecicircleci00000000000000from ...utils import docval, getargs from ...build import ObjectMapper, BuildManager from ...spec import Spec from ...container import Container from ..table import DynamicTable, VectorIndex from .. import register_map @register_map(DynamicTable) class DynamicTableMap(ObjectMapper): def __init__(self, spec): super().__init__(spec) vector_data_spec = spec.get_data_type('VectorData') vector_index_spec = spec.get_data_type('VectorIndex') self.map_spec('columns', vector_data_spec) self.map_spec('columns', vector_index_spec) @ObjectMapper.object_attr('colnames') def attr_columns(self, container, manager): if all(len(col) == 0 for col in container.columns): return tuple() return container.colnames @docval({"name": "spec", "type": Spec, "doc": "the spec to get the attribute value for"}, {"name": "container", "type": Container, "doc": "the container to get the attribute value from"}, {"name": "manager", "type": BuildManager, "doc": "the BuildManager used for managing this build"}, returns='the value of the attribute') def get_attr_value(self, **kwargs): ''' Get the value of the attribute corresponding to this spec from the given container ''' spec, container, manager = getargs('spec', 'container', 'manager', kwargs) attr_value = super().get_attr_value(spec, container, manager) if attr_value is None and spec.name in container: if spec.data_type_inc == 'VectorData': attr_value = container[spec.name] if isinstance(attr_value, VectorIndex): attr_value = attr_value.target elif spec.data_type_inc == 'DynamicTableRegion': attr_value = container[spec.name] if isinstance(attr_value, VectorIndex): attr_value = attr_value.target if attr_value.table is None: msg = "empty or missing table for DynamicTableRegion '%s' in DynamicTable '%s'" %\ (attr_value.name, container.name) raise ValueError(msg) elif spec.data_type_inc == 'VectorIndex': attr_value = container[spec.name] return attr_value ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/common/sparse.py0000644000655200065520000000445700000000000020704 0ustar00circlecicircleci00000000000000import scipy.sparse as sps import numpy as np import h5py from ..container import Container from ..utils import docval, getargs, call_docval_func from . import register_class @register_class('CSRMatrix') class CSRMatrix(Container): @docval({'name': 'data', 'type': (sps.csr_matrix, np.ndarray, h5py.Dataset), 'doc': 'the data to use for this CSRMatrix or CSR data array.' 'If passing CSR data array, *indices*, *indptr*, and *shape* must also be provided'}, {'name': 'indices', 'type': (np.ndarray, h5py.Dataset), 'doc': 'CSR index array', 'default': None}, {'name': 'indptr', 'type': (np.ndarray, h5py.Dataset), 'doc': 'CSR index pointer array', 'default': None}, {'name': 'shape', 'type': (list, tuple, np.ndarray), 'doc': 'the shape of the matrix', 'default': None}, {'name': 'name', 'type': str, 'doc': 'the name to use for this when storing', 'default': 'csr_matrix'}) def __init__(self, **kwargs): call_docval_func(super().__init__, kwargs) data = getargs('data', kwargs) if isinstance(data, (np.ndarray, h5py.Dataset)): if data.ndim == 2: data = sps.csr_matrix(self.data) elif data.ndim == 1: indptr, indices, shape = getargs('indptr', 'indices', 'shape', kwargs) if any(_ is None for _ in (indptr, indices, shape)): raise ValueError("must specify indptr, indices, and shape when passing data array") self.__check_ind(indptr, 'indptr') self.__check_ind(indices, 'indices') if len(shape) != 2: raise ValueError('shape must specify two and only two dimensions') data = sps.csr_matrix((data, indices, indptr), shape=shape) else: raise ValueError("cannot use ndarray of dimensionality > 2") self.__data = data self.__shape = data.shape @staticmethod def __check_ind(ar, arg): if not (ar.ndim == 1 or np.issubdtype(ar.dtype, int)): raise ValueError('%s must be a 1D array of integers' % arg) def __getattr__(self, val): return getattr(self.__data, val) @property def shape(self): return self.__shape def to_spmat(self): return self.__data ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/common/table.py0000644000655200065520000010032500000000000020465 0ustar00circlecicircleci00000000000000from h5py import Dataset import numpy as np import pandas as pd from collections import OrderedDict from ..utils import docval, getargs, ExtenderMeta, call_docval_func, popargs, pystr from ..container import Container, Data from . import register_class @register_class('Index') class Index(Data): __fields__ = ("target",) @docval({'name': 'name', 'type': str, 'doc': 'the name of this VectorData'}, {'name': 'data', 'type': ('array_data', 'data'), 'doc': 'a dataset where the first dimension is a concatenation of multiple vectors'}, {'name': 'target', 'type': Data, 'doc': 'the target dataset that this index applies to'}) def __init__(self, **kwargs): call_docval_func(super().__init__, kwargs) @register_class('VectorData') class VectorData(Data): """ A n-dimensional dataset representing a column of a DynamicTable. If used without an accompanying VectorIndex, first dimension is along the rows of the DynamicTable and each step along the first dimension is a cell of the larger table. VectorData can also be used to represent a ragged array if paired with a VectorIndex. This allows for storing arrays of varying length in a single cell of the DynamicTable by indexing into this VectorData. The first vector is at VectorData[0:VectorIndex(0)+1]. The second vector is at VectorData[VectorIndex(0)+1:VectorIndex(1)+1], and so on. """ __fields__ = ("description",) @docval({'name': 'name', 'type': str, 'doc': 'the name of this VectorData'}, {'name': 'description', 'type': str, 'doc': 'a description for this column'}, {'name': 'data', 'type': ('array_data', 'data'), 'doc': 'a dataset where the first dimension is a concatenation of multiple vectors', 'default': list()}) def __init__(self, **kwargs): call_docval_func(super().__init__, kwargs) self.description = getargs('description', kwargs) @docval({'name': 'val', 'type': None, 'doc': 'the value to add to this column'}) def add_row(self, **kwargs): val = getargs('val', kwargs) self.append(val) @register_class('VectorIndex') class VectorIndex(Index): """ When paired with a VectorData, this allows for storing arrays of varying length in a single cell of the DynamicTable by indexing into this VectorData. The first vector is at VectorData[0:VectorIndex(0)+1]. The second vector is at VectorData[VectorIndex(0)+1:VectorIndex(1)+1], and so on. """ @docval({'name': 'name', 'type': str, 'doc': 'the name of this VectorIndex'}, {'name': 'data', 'type': ('array_data', 'data'), 'doc': 'a 1D dataset containing indexes that apply to VectorData object'}, {'name': 'target', 'type': VectorData, 'doc': 'the target dataset that this index applies to'}) def __init__(self, **kwargs): call_docval_func(super().__init__, kwargs) self.target = getargs('target', kwargs) def add_vector(self, arg): self.target.extend(arg) self.append(len(self.target)) def add_row(self, arg): self.add_vector(arg) def __getitem_helper(self, arg): start = 0 if arg == 0 else self.data[arg-1] end = self.data[arg] return self.target[start:end] def __getitem__(self, arg): if isinstance(arg, slice): indices = list(range(*arg.indices(len(self.data)))) ret = list() for i in indices: ret.append(self.__getitem_helper(i)) return ret else: return self.__getitem_helper(arg) @register_class('ElementIdentifiers') class ElementIdentifiers(Data): @docval({'name': 'name', 'type': str, 'doc': 'the name of this ElementIdentifiers'}, {'name': 'data', 'type': ('array_data', 'data'), 'doc': 'a 1D dataset containing identifiers', 'default': list()}) def __init__(self, **kwargs): call_docval_func(super().__init__, kwargs) @docval({'name': 'other', 'type': (Data, np.ndarray, list, tuple, int), 'doc': 'List of ids to search for in this ElementIdentifer object'}, rtype=np.ndarray, returns='Array with the list of indices where the elements in the list where found.' 'Note, the elements in the returned list are ordered in increasing index' 'of the found elements, rather than in the order in which the elements' 'where given for the search. Also the length of the result may be different from the length' 'of the input array. E.g., if our ids are [1,2,3] and we are search for [3,1,5] the ' 'result would be [0,2] and NOT [2,0,None]') def __eq__(self, other): """ Given a list of ids return the indices in the ElementIdentifiers array where the indices are found. """ # Determine the ids we want to find search_ids = other if not isinstance(other, Data) else other.data if isinstance(search_ids, int): search_ids = [search_ids] # Find all matching locations return np.in1d(self.data, search_ids).nonzero()[0] @register_class('DynamicTable') class DynamicTable(Container): r""" A column-based table. Columns are defined by the argument *columns*. This argument must be a list/tuple of :class:`~hdmf.common.table.VectorData` and :class:`~hdmf.common.table.VectorIndex` objects or a list/tuple of dicts containing the keys ``name`` and ``description`` that provide the name and description of each column in the table. Additionally, the keys ``index`` and ``table`` for specifying additional structure to the table columns. Setting the key ``index`` to ``True`` can be used to indicate that the :class:`~hdmf.common.table.VectorData` column will store a ragged array (i.e. will be accompanied with a :class:`~hdmf.common.table.VectorIndex`). Setting the key ``table`` to ``True`` can be used to indicate that the column will store regions to another DynamicTable. Columns in DynamicTable subclasses can be statically defined by specifying the class attribute *\_\_columns\_\_*, rather than specifying them at runtime at the instance level. This is useful for defining a table structure that will get reused. The requirements for *\_\_columns\_\_* are the same as the requirements described above for specifying table columns with the *columns* argument to the DynamicTable constructor. """ __fields__ = ( {'name': 'id', 'child': True}, {'name': 'columns', 'child': True}, 'colnames', 'description' ) __columns__ = tuple() @ExtenderMeta.pre_init def __gather_columns(cls, name, bases, classdict): ''' This classmethod will be called during class declaration in the metaclass to automatically include all columns declared in subclasses ''' if not isinstance(cls.__columns__, tuple): msg = "'__columns__' must be of type tuple, found %s" % type(cls.__columns__) raise TypeError(msg) if len(bases) and 'DynamicTable' in globals() and issubclass(bases[-1], Container) \ and bases[-1].__columns__ is not cls.__columns__: new_columns = list(cls.__columns__) new_columns[0:0] = bases[-1].__columns__ cls.__columns__ = tuple(new_columns) @docval({'name': 'name', 'type': str, 'doc': 'the name of this table'}, {'name': 'description', 'type': str, 'doc': 'a description of what is in this table'}, {'name': 'id', 'type': ('array_data', ElementIdentifiers), 'doc': 'the identifiers for this table', 'default': None}, {'name': 'columns', 'type': (tuple, list), 'doc': 'the columns in this table', 'default': None}, {'name': 'colnames', 'type': 'array_data', 'doc': 'the names of the columns in this table', 'default': None}) def __init__(self, **kwargs): # noqa: C901 id, columns, desc, colnames = popargs('id', 'columns', 'description', 'colnames', kwargs) call_docval_func(super().__init__, kwargs) self.description = desc # All tables must have ElementIdentifiers (i.e. a primary key column) # Here, we figure out what to do for that if id is not None: if not isinstance(id, ElementIdentifiers): id = ElementIdentifiers('id', data=id) else: id = ElementIdentifiers('id') if columns is not None: if len(columns) > 0: # If columns have been passed in, check them over # and process accordingly if isinstance(columns[0], dict): columns = self.__build_columns(columns) elif not all(isinstance(c, (VectorData, VectorIndex)) for c in columns): raise ValueError("'columns' must be a list of VectorData, DynamicTableRegion or VectorIndex") colset = {c.name: c for c in columns} for c in columns: if isinstance(c, VectorIndex): colset.pop(c.target.name) lens = [len(c) for c in colset.values()] if not all(i == lens[0] for i in lens): raise ValueError("columns must be the same length") if lens[0] != len(id): if len(id) > 0: raise ValueError("must provide same number of ids as length of columns") else: id.data.extend(range(lens[0])) else: # if the user has not passed in columns, make a place to put them, # as they will presumably be adding new columns columns = list() self.id = id if colnames is None: if columns is None: # make placeholder for columns if nothing was given self.colnames = list() self.columns = list() else: # Figure out column names if columns were given tmp = list() for col in columns: if isinstance(col, VectorIndex): continue tmp.append(col.name) self.colnames = tuple(tmp) self.columns = columns else: # Calculate the order of column names if columns is None: raise ValueError("Must supply 'columns' if specifying 'colnames'") else: # order the columns according to the column names self.colnames = tuple(pystr(c) for c in colnames) col_dict = {col.name: col for col in columns} order = dict() indexed = dict() for col in columns: if isinstance(col, VectorIndex): indexed[col.target.name] = True else: if col.name in indexed: continue indexed[col.name] = False i = 0 for name in self.colnames: col = col_dict[name] order[col.name] = i if indexed[col.name]: i = i + 1 i = i + 1 tmp = [None] * i for col in columns: if indexed.get(col.name, False): continue if isinstance(col, VectorData): pos = order[col.name] tmp[pos] = col elif isinstance(col, VectorIndex): pos = order[col.target.name] tmp[pos] = col tmp[pos+1] = col.target self.columns = list(tmp) # to make generating DataFrames and Series easier col_dict = dict() self.__indices = dict() for col in self.columns: if hasattr(self, col.name): raise ValueError("Column name '%s' is not allowed because it is already an attribute" % col.name) setattr(self, col.name, col) if isinstance(col, VectorData): existing = col_dict.get(col.name) # if we added this column using its index, ignore this column if existing is not None: if isinstance(existing, VectorIndex): if existing.target.name == col.name: continue else: raise ValueError("duplicate column does not target VectorData '%s'" % col.name) else: raise ValueError("duplicate column found: '%s'" % col.name) else: col_dict[col.name] = col elif isinstance(col, VectorIndex): col_dict[col.target.name] = col # use target name for reference and VectorIndex for retrieval self.__indices[col.name] = col self.__df_cols = [self.id] + [col_dict[name] for name in self.colnames] self.__colids = {name: i+1 for i, name in enumerate(self.colnames)} for col in self.__columns__: if col['name'] not in self.__colids: if col.get('required', False): self.add_column(col['name'], col['description'], index=col.get('index', False), table=col.get('table', False)) else: # create column name attributes (set to None) on the object even if column is not required setattr(self, col['name'], None) if col.get('index', False): setattr(self, col['name'] + '_index', None) @staticmethod def __build_columns(columns, df=None): """ Build column objects according to specifications """ tmp = list() for d in columns: name = d['name'] desc = d.get('description', 'no description') data = None if df is not None: data = list(df[name].values) if d.get('index', False): index_data = None if data is not None: index_data = [len(data[0])] for i in range(1, len(data)): index_data.append(len(data[i]) + index_data[i-1]) # assume data came in through a DataFrame, so we need # to concatenate it tmp_data = list() for d in data: tmp_data.extend(d) data = tmp_data vdata = VectorData(name, desc, data=data) vindex = VectorIndex("%s_index" % name, index_data, target=vdata) tmp.append(vindex) tmp.append(vdata) else: if data is None: data = list() cls = VectorData if d.get('table', False): cls = DynamicTableRegion tmp.append(cls(name, desc, data=data)) return tmp def __len__(self): return len(self.id) @docval({'name': 'data', 'type': dict, 'doc': 'the data to put in this row', 'default': None}, {'name': 'id', 'type': int, 'doc': 'the ID for the row', 'default': None}, {'name': 'enforce_unique_id', 'type': bool, 'doc': 'enforce that the id in the table must be unique', 'default': False}, allow_extra=True) def add_row(self, **kwargs): ''' Add a row to the table. If *id* is not provided, it will auto-increment. ''' data, row_id, enforce_unique_id = popargs('data', 'id', 'enforce_unique_id', kwargs) data = data if data is not None else kwargs extra_columns = set(list(data.keys())) - set(list(self.__colids.keys())) missing_columns = set(list(self.__colids.keys())) - set(list(data.keys())) # check to see if any of the extra columns just need to be added if extra_columns: for col in self.__columns__: if col['name'] in extra_columns: if data[col['name']] is not None: self.add_column(col['name'], col['description'], index=col.get('index', False), table=col.get('table', False)) extra_columns.remove(col['name']) if extra_columns or missing_columns: raise ValueError( '\n'.join([ 'row data keys don\'t match available columns', 'you supplied {} extra keys: {}'.format(len(extra_columns), extra_columns), 'and were missing {} keys: {}'.format(len(missing_columns), missing_columns) ]) ) if row_id is None: row_id = data.pop('id', None) if row_id is None: row_id = len(self) if enforce_unique_id: if row_id in self.id: raise ValueError("id %i already in the table" % row_id) self.id.append(row_id) for colname, colnum in self.__colids.items(): if colname not in data: raise ValueError("column '%s' missing" % colname) c = self.__df_cols[colnum] if isinstance(c, VectorIndex): c.add_vector(data[colname]) else: c.add_row(data[colname]) def __eq__(self, other): return self.to_dataframe().equals(other.to_dataframe()) @docval({'name': 'name', 'type': str, 'doc': 'the name of this VectorData'}, {'name': 'description', 'type': str, 'doc': 'a description for this column'}, {'name': 'data', 'type': ('array_data', 'data'), 'doc': 'a dataset where the first dimension is a concatenation of multiple vectors', 'default': list()}, {'name': 'table', 'type': (bool, 'DynamicTable'), 'doc': 'whether or not this is a table region or the table the region applies to', 'default': False}, {'name': 'index', 'type': (bool, VectorIndex, 'array_data'), 'doc': 'whether or not this column should be indexed', 'default': False}) def add_column(self, **kwargs): """ Add a column to this table. If data is provided, it must contain the same number of rows as the current state of the table. """ name, data = getargs('name', 'data', kwargs) index, table = popargs('index', 'table', kwargs) if name in self.__colids: msg = "column '%s' already exists in DynamicTable '%s'" % (name, self.name) raise ValueError(msg) ckwargs = dict(kwargs) cls = VectorData # Add table if it's been specified if table is not False: cls = DynamicTableRegion if isinstance(table, DynamicTable): ckwargs['table'] = table col = cls(**ckwargs) col.parent = self columns = [col] setattr(self, name, col) # Add index if it's been specified if index is not False: if isinstance(index, VectorIndex): col_index = index elif isinstance(index, bool): # make empty VectorIndex if len(col) > 0: raise ValueError("cannot pass empty index with non-empty data to index") col_index = VectorIndex(name + "_index", list(), col) else: # make VectorIndex with supplied data if len(col) == 0: raise ValueError("cannot pass non-empty index with empty data to index") col_index = VectorIndex(name + "_index", index, col) columns.insert(0, col_index) if not isinstance(col_index.parent, Container): col_index.parent = self # else, the ObjectMapper will create a link from self (parent) to col_index (child with existing parent) col = col_index self.__indices[col_index.name] = col_index setattr(self, col_index.name, col_index) if len(col) != len(self.id): raise ValueError("column must have the same number of rows as 'id'") self.__colids[name] = len(self.__df_cols) self.fields['colnames'] = tuple(list(self.colnames)+[name]) self.fields['columns'] = tuple(list(self.columns)+columns) self.__df_cols.append(col) @docval({'name': 'name', 'type': str, 'doc': 'the name of the DynamicTableRegion object'}, {'name': 'region', 'type': (slice, list, tuple), 'doc': 'the indices of the table'}, {'name': 'description', 'type': str, 'doc': 'a brief description of what the region is'}) def create_region(self, **kwargs): region = getargs('region', kwargs) if isinstance(region, slice): if (region.start is not None and region.start < 0) or (region.stop is not None and region.stop > len(self)): msg = 'region slice %s is out of range for this DynamicTable of length ' % (str(region), len(self)) raise IndexError(msg) region = list(range(*region.indices(len(self)))) else: for idx in region: if idx < 0 or idx >= len(self): raise IndexError('The index ' + str(idx) + ' is out of range for this DynamicTable of length ' + str(len(self))) desc = getargs('description', kwargs) name = getargs('name', kwargs) return DynamicTableRegion(name, region, desc, self) def __getitem__(self, key): ret = None if isinstance(key, tuple): # index by row and column --> return specific cell arg1 = key[0] arg2 = key[1] if isinstance(arg2, str): arg2 = self.__colids[arg2] ret = self.__df_cols[arg2][arg1] elif isinstance(key, str): # index by one string --> return column if key in self.__colids: ret = self.__df_cols[self.__colids[key]] elif key in self.__indices: return self.__indices[key] else: raise KeyError(key) else: # index by int, list, or slice --> return pandas Dataframe consisting of one or more rows # determine the key. If the key is an int, then turn it into a slice to reduce the number of cases below arg = key if np.issubdtype(type(arg), np.integer): arg = np.s_[arg:(arg+1)] # index with a python slice (or single integer) to select one or multiple rows if isinstance(arg, slice): data = OrderedDict() for name in self.colnames: col = self.__df_cols[self.__colids[name]] if isinstance(col.data, (Dataset, np.ndarray)) and col.data.ndim > 1: data[name] = [x for x in col[arg]] else: currdata = col[arg] data[name] = currdata id_index = self.id.data[arg] if np.isscalar(id_index): id_index = [id_index, ] ret = pd.DataFrame(data, index=pd.Index(name=self.id.name, data=id_index), columns=self.colnames) # index by a list of ints, return multiple rows elif isinstance(arg, (tuple, list, np.ndarray)): if isinstance(arg, np.ndarray): if len(arg.shape) != 1: raise ValueError("cannot index DynamicTable with multiple dimensions") data = OrderedDict() for name in self.colnames: col = self.__df_cols[self.__colids[name]] if isinstance(col.data, (Dataset, np.ndarray)) and col.data.ndim > 1: data[name] = [x for x in col[arg]] elif isinstance(col.data, np.ndarray): data[name] = col[arg] else: data[name] = [col[i] for i in arg] id_index = (self.id.data[arg] if isinstance(self.id.data, np.ndarray) else [self.id.data[i] for i in arg]) ret = pd.DataFrame(data, index=pd.Index(name=self.id.name, data=id_index), columns=self.colnames) else: raise KeyError("Key type not supported by DynamicTable %s" % str(type(arg))) return ret def __contains__(self, val): return val in self.__colids or val in self.__indices def get(self, key, default=None): if key in self: return self[key] return default @docval({'name': 'exclude', 'type': set, 'doc': ' List of columns to exclude from the dataframe', 'default': None}) def to_dataframe(self, **kwargs): """ Produce a pandas DataFrame containing this table's data. """ exclude = popargs('exclude', kwargs) if exclude is None: exclude = set([]) data = OrderedDict() for name in self.colnames: if name in exclude: continue col = self.__df_cols[self.__colids[name]] if isinstance(col.data, (Dataset, np.ndarray)) and col.data.ndim > 1: data[name] = [x for x in col[:]] else: data[name] = col[:] return pd.DataFrame(data, index=pd.Index(name=self.id.name, data=self.id.data)) @classmethod @docval( {'name': 'df', 'type': pd.DataFrame, 'doc': 'source DataFrame'}, {'name': 'name', 'type': str, 'doc': 'the name of this table'}, { 'name': 'index_column', 'type': str, 'doc': 'if provided, this column will become the table\'s index', 'default': None }, { 'name': 'table_description', 'type': str, 'doc': 'a description of what is in the resulting table', 'default': '' }, { 'name': 'columns', 'type': (list, tuple), 'doc': 'a list/tuple of dictionaries specifying columns in the table', 'default': None }, allow_extra=True ) def from_dataframe(cls, **kwargs): ''' Construct an instance of DynamicTable (or a subclass) from a pandas DataFrame. The columns of the resulting table are defined by the columns of the dataframe and the index by the dataframe's index (make sure it has a name!) or by a column whose name is supplied to the index_column parameter. We recommend that you supply *columns* - a list/tuple of dictionaries containing the name and description of the column- to help others understand the contents of your table. See :py:class:`~hdmf.common.table.DynamicTable` for more details on *columns*. ''' columns = kwargs.pop('columns') df = kwargs.pop('df') name = kwargs.pop('name') index_column = kwargs.pop('index_column') table_description = kwargs.pop('table_description') column_descriptions = kwargs.pop('column_descriptions', dict()) supplied_columns = dict() if columns: supplied_columns = {x['name']: x for x in columns} class_cols = {x['name']: x for x in cls.__columns__} required_cols = set(x['name'] for x in cls.__columns__ if 'required' in x and x['required']) df_cols = df.columns if required_cols - set(df_cols): raise ValueError('missing required cols: ' + str(required_cols - set(df_cols))) if set(supplied_columns.keys()) - set(df_cols): raise ValueError('cols specified but not provided: ' + str(set(supplied_columns.keys()) - set(df_cols))) columns = [] for col_name in df_cols: if col_name in class_cols: columns.append(class_cols[col_name]) elif col_name in supplied_columns: columns.append(supplied_columns[col_name]) else: columns.append({'name': col_name, 'description': column_descriptions.get(col_name, 'no description')}) if hasattr(df[col_name].iloc[0], '__len__') and not isinstance(df[col_name].iloc[0], str): lengths = [len(x) for x in df[col_name]] if not lengths[1:] == lengths[:-1]: columns[-1].update(index=True) if index_column is not None: ids = ElementIdentifiers(name=index_column, data=df[index_column].values.tolist()) else: index_name = df.index.name if df.index.name is not None else 'id' ids = ElementIdentifiers(name=index_name, data=df.index.values.tolist()) columns = cls.__build_columns(columns, df=df) return cls(name=name, id=ids, columns=columns, description=table_description, **kwargs) def copy(self): """ Return a copy of this DynamicTable. This is useful for linking. """ kwargs = dict(name=self.name, id=self.id, columns=self.columns, description=self.description, colnames=self.colnames) return self.__class__(**kwargs) @register_class('DynamicTableRegion') class DynamicTableRegion(VectorData): """ DynamicTableRegion provides a link from one table to an index or region of another. The `table` attribute is another `DynamicTable`, indicating which table is referenced. The data is int(s) indicating the row(s) (0-indexed) of the target array. `DynamicTableRegion`s can be used to associate multiple rows with the same meta-data without data duplication. They can also be used to create hierarchical relationships between multiple `DynamicTable`s. `DynamicTableRegion` objects may be paired with a `VectorIndex` object to create ragged references, so a single cell of a `DynamicTable` can reference many rows of another `DynamicTable`. """ __fields__ = ( 'table', 'description' ) @docval({'name': 'name', 'type': str, 'doc': 'the name of this VectorData'}, {'name': 'data', 'type': ('array_data', 'data'), 'doc': 'a dataset where the first dimension is a concatenation of multiple vectors'}, {'name': 'description', 'type': str, 'doc': 'a description of what this region represents'}, {'name': 'table', 'type': DynamicTable, 'doc': 'the DynamicTable this region applies to', 'default': None}) def __init__(self, **kwargs): t = popargs('table', kwargs) call_docval_func(super().__init__, kwargs) self.table = t @property def table(self): return self.fields.get('table') @table.setter def table(self, val): if val is None: return if 'table' in self.fields: msg = "can't set attribute 'table' -- already set" raise AttributeError(msg) for idx in self.data: if idx < 0 or idx >= len(val): raise IndexError('The index ' + str(idx) + ' is out of range for this DynamicTable of length ' + str(len(val))) self.fields['table'] = val def __getitem__(self, key): # treat the list of indices as data that can be indexed. then pass the # result to the table to get the data if isinstance(key, tuple): arg1 = key[0] arg2 = key[1] return self.table[self.data[arg1], arg2] elif isinstance(key, (int, slice)): if isinstance(key, int) and key >= len(self.data): raise IndexError('index {} out of bounds for data of length {}'.format(key, len(self.data))) return self.table[self.data[key]] else: raise ValueError("unrecognized argument: '%s'" % key) @property def shape(self): """ Define the shape, i.e., (num_rows, num_columns) of the selected table region :return: Shape tuple with two integers indicating the number of rows and number of columns """ return (len(self.data), len(self.table.columns)) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/container.py0000644000655200065520000004036700000000000020101 0ustar00circlecicircleci00000000000000import numpy as np from abc import abstractmethod from uuid import uuid4 from .utils import docval, get_docval, call_docval_func, getargs, ExtenderMeta, get_data_shape from .data_utils import DataIO from warnings import warn import h5py class AbstractContainer(metaclass=ExtenderMeta): # The name of the class attribute that subclasses use to autogenerate properties # This parameterization is supplied in case users would like to configure # the class attribute name to something domain-specific _fieldsname = '__fields__' _data_type_attr = 'data_type' # Subclasses use this class attribute to add properties to autogenerate # Autogenerated properties will store values in self.__field_values __fields__ = tuple() _pconf_allowed_keys = {'name', 'doc', 'settable'} # Override the _setter factor function, so directives that apply to # Container do not get used on Data @classmethod def _setter(cls, field): """ Make a setter function for creating a :py:func:`property` """ name = field['name'] if not field.get('settable', True): return None def setter(self, val): if val is None: return if name in self.fields: msg = "can't set attribute '%s' -- already set" % name raise AttributeError(msg) self.fields[name] = val return setter @classmethod def _getter(cls, field): """ Make a getter function for creating a :py:func:`property` """ doc = field.get('doc') name = field['name'] def getter(self): return self.fields.get(name) setattr(getter, '__doc__', doc) return getter @staticmethod def _check_field_spec(field): """ A helper function for __gather_fields to make sure we are always working with a dict specification and that the specification contains the correct keys """ tmp = field if isinstance(tmp, dict): if 'name' not in tmp: raise ValueError("must specify 'name' if using dict in __fields__") else: tmp = {'name': tmp} return tmp @ExtenderMeta.pre_init def __gather_fields(cls, name, bases, classdict): ''' This classmethod will be called during class declaration in the metaclass to automatically create setters and getters for fields that need to be exported ''' fields = getattr(cls, cls._fieldsname) if not isinstance(fields, tuple): msg = "'%s' must be of type tuple" % cls._fieldsname raise TypeError(msg) if len(bases) and 'Container' in globals() and issubclass(bases[-1], Container) \ and getattr(bases[-1], bases[-1]._fieldsname) is not fields: new_fields = list(fields) new_fields[0:0] = getattr(bases[-1], bases[-1]._fieldsname) setattr(cls, cls._fieldsname, tuple(new_fields)) new_fields = list() docs = {dv['name']: dv['doc'] for dv in get_docval(cls.__init__)} for f in getattr(cls, cls._fieldsname): pconf = cls._check_field_spec(f) pname = pconf['name'] pconf.setdefault('doc', docs.get(pname)) if not hasattr(cls, pname): setattr(cls, pname, property(cls._getter(pconf), cls._setter(pconf))) new_fields.append(pname) setattr(cls, cls._fieldsname, tuple(new_fields)) def __new__(cls, *args, **kwargs): inst = super().__new__(cls) inst.__container_source = kwargs.pop('container_source', None) inst.__parent = None inst.__children = list() inst.__modified = True inst.__object_id = kwargs.pop('object_id', str(uuid4())) inst.parent = kwargs.pop('parent', None) return inst @docval({'name': 'name', 'type': str, 'doc': 'the name of this container'}) def __init__(self, **kwargs): name = getargs('name', kwargs) if '/' in name: raise ValueError("name '" + name + "' cannot contain '/'") self.__name = name self.__field_values = dict() @property def name(self): ''' The name of this Container ''' return self.__name @docval({'name': 'data_type', 'type': str, 'doc': 'the data_type to search for', 'default': None}) def get_ancestor(self, **kwargs): """ Traverse parent hierarchy and return first instance of the specified data_type """ data_type = getargs('data_type', kwargs) if data_type is None: return self.parent p = self.parent while p is not None: if getattr(p, p._data_type_attr) == data_type: return p p = p.parent return None @property def fields(self): return self.__field_values @property def object_id(self): if self.__object_id is None: self.__object_id = str(uuid4()) return self.__object_id @property def modified(self): return self.__modified @docval({'name': 'modified', 'type': bool, 'doc': 'whether or not this Container has been modified', 'default': True}) def set_modified(self, **kwargs): modified = getargs('modified', kwargs) self.__modified = modified if modified and isinstance(self.parent, Container): self.parent.set_modified() @property def children(self): return tuple(self.__children) @docval({'name': 'child', 'type': 'Container', 'doc': 'the child Container for this Container', 'default': None}) def add_child(self, **kwargs): warn(DeprecationWarning('add_child is deprecated. Set the parent attribute instead.')) child = getargs('child', kwargs) if child is not None: # if child.parent is a Container, then the mismatch between child.parent and parent # is used to make a soft/external link from the parent to a child elsewhere # if child.parent is not a Container, it is either None or a Proxy and should be set to self if not isinstance(child.parent, AbstractContainer): # actually add the child to the parent in parent setter child.parent = self else: warn('Cannot add None as child to a container %s' % self.name) @classmethod def type_hierarchy(cls): return cls.__mro__ @property def container_source(self): ''' The source of this Container ''' return self.__container_source @container_source.setter def container_source(self, source): if self.__container_source is not None: raise Exception('cannot reassign container_source') self.__container_source = source @property def parent(self): ''' The parent Container of this Container ''' # do it this way because __parent may not exist yet (not set in constructor) return getattr(self, '_AbstractContainer__parent', None) @parent.setter def parent(self, parent_container): if self.parent is parent_container: return if self.parent is not None: if isinstance(self.parent, AbstractContainer): raise ValueError(('Cannot reassign parent to Container: %s. ' 'Parent is already: %s.' % (repr(self), repr(self.parent)))) else: if parent_container is None: raise ValueError("Got None for parent of '%s' - cannot overwrite Proxy with NoneType" % repr(self)) # TODO this assumes isinstance(parent_container, Proxy) but # circular import if we try to do that. Proxy would need to move # or Container extended with this functionality in build/map.py if self.parent.matches(parent_container): self.__parent = parent_container parent_container.__children.append(self) parent_container.set_modified() else: self.__parent.add_candidate(parent_container) else: self.__parent = parent_container if isinstance(parent_container, Container): parent_container.__children.append(self) parent_container.set_modified() class Container(AbstractContainer): _pconf_allowed_keys = {'name', 'child', 'required_name', 'doc', 'settable'} @classmethod def _setter(cls, field): super_setter = AbstractContainer._setter(field) ret = [super_setter] if isinstance(field, dict): for k in field.keys(): if k not in cls._pconf_allowed_keys: msg = "Unrecognized key '%s' in __field__ config '%s' on %s" %\ (k, field['name'], cls.__name__) raise ValueError(msg) if field.get('required_name', None) is not None: name = field['required_name'] idx1 = len(ret) - 1 def container_setter(self, val): if val is not None and val.name != name: msg = "%s field on %s must be named '%s'" % (field['name'], self.__class__.__name__, name) raise ValueError(msg) ret[idx1](self, val) ret.append(container_setter) if field.get('child', False): idx2 = len(ret) - 1 def container_setter(self, val): ret[idx2](self, val) if val is not None: if isinstance(val, (tuple, list)): pass elif isinstance(val, dict): val = val.values() else: val = [val] for v in val: if not isinstance(v.parent, Container): v.parent = self # else, the ObjectMapper will create a link from self (parent) to v (child with existing # parent) ret.append(container_setter) return ret[-1] def __repr__(self): cls = self.__class__ template = "%s %s.%s at 0x%d" % (self.name, cls.__module__, cls.__name__, id(self)) if len(self.fields): template += "\nFields:\n" for k in sorted(self.fields): # sorted to enable tests v = self.fields[k] # if isinstance(v, DataIO) or not hasattr(v, '__len__') or len(v) > 0: if hasattr(v, '__len__'): if isinstance(v, (np.ndarray, list, tuple)): if len(v) > 0: template += " {}: {}\n".format(k, self.__smart_str(v, 1)) elif v: template += " {}: {}\n".format(k, self.__smart_str(v, 1)) else: template += " {}: {}\n".format(k, v) return template @staticmethod def __smart_str(v, num_indent): """ Print compact string representation of data. If v is a list, try to print it using numpy. This will condense the string representation of datasets with many elements. If that doesn't work, just print the list. If v is a dictionary, print the name and type of each element If v is a set, print it sorted If v is a neurodata_type, print the name of type Otherwise, use the built-in str() Parameters ---------- v Returns ------- str """ if isinstance(v, list) or isinstance(v, tuple): if len(v) and isinstance(v[0], AbstractContainer): return Container.__smart_str_list(v, num_indent, '(') try: return str(np.asarray(v)) except ValueError: return Container.__smart_str_list(v, num_indent, '(') elif isinstance(v, dict): return Container.__smart_str_dict(v, num_indent) elif isinstance(v, set): return Container.__smart_str_list(sorted(list(v)), num_indent, '{') elif isinstance(v, AbstractContainer): return "{} {}".format(getattr(v, 'name'), type(v)) else: return str(v) @staticmethod def __smart_str_list(l, num_indent, left_br): if left_br == '(': right_br = ')' if left_br == '{': right_br = '}' if len(l) == 0: return left_br + ' ' + right_br indent = num_indent * 2 * ' ' indent_in = (num_indent + 1) * 2 * ' ' out = left_br for v in l[:-1]: out += '\n' + indent_in + Container.__smart_str(v, num_indent + 1) + ',' if l: out += '\n' + indent_in + Container.__smart_str(l[-1], num_indent + 1) out += '\n' + indent + right_br return out @staticmethod def __smart_str_dict(d, num_indent): left_br = '{' right_br = '}' if len(d) == 0: return left_br + ' ' + right_br indent = num_indent * 2 * ' ' indent_in = (num_indent + 1) * 2 * ' ' out = left_br keys = sorted(list(d.keys())) for k in keys[:-1]: out += '\n' + indent_in + Container.__smart_str(k, num_indent + 1) + ' ' + str(type(d[k])) + ',' if keys: out += '\n' + indent_in + Container.__smart_str(keys[-1], num_indent + 1) + ' ' + str(type(d[keys[-1]])) out += '\n' + indent + right_br return out class Data(AbstractContainer): """ A class for representing dataset containers """ @docval({'name': 'name', 'type': str, 'doc': 'the name of this container'}, {'name': 'data', 'type': ('array_data', 'data'), 'doc': 'the source of the data'}) def __init__(self, **kwargs): call_docval_func(super().__init__, kwargs) self.__data = getargs('data', kwargs) @property def data(self): return self.__data @property def shape(self): """ Get the shape of the data represented by this container :return: Shape tuple :rtype: tuple of ints """ return get_data_shape(self.__data) @docval({'name': 'dataio', 'type': DataIO, 'doc': 'the DataIO to apply to the data held by this Data'}) def set_dataio(self, **kwargs): """ Apply DataIO object to the data held by this Data object """ dataio = getargs('dataio', kwargs) dataio.data = self.__data self.__data = dataio def __bool__(self): return len(self.data) != 0 def __len__(self): return len(self.__data) def __getitem__(self, args): if isinstance(self.data, (tuple, list)) and isinstance(args, (tuple, list)): return [self.data[i] for i in args] return self.data[args] def append(self, arg): if isinstance(self.data, list): self.data.append(arg) elif isinstance(self.data, np.ndarray): self.__data = np.append(self.__data, [arg]) elif isinstance(self.data, h5py.Dataset): shape = list(self.__data.shape) shape[0] += 1 self.__data.resize(shape) self.__data[-1] = arg else: msg = "Data cannot append to object of type '%s'" % type(self.__data) raise ValueError(msg) def extend(self, arg): if isinstance(self.data, list): self.data.extend(arg) elif isinstance(self.data, np.ndarray): self.__data = np.append(self.__data, [arg]) elif isinstance(self.data, h5py.Dataset): shape = list(self.__data.shape) shape[0] += len(arg) self.__data.resize(shape) self.__data[-len(arg):] = arg else: msg = "Data cannot extend object of type '%s'" % type(self.__data) raise ValueError(msg) class DataRegion(Data): @property @abstractmethod def data(self): ''' The target data that this region applies to ''' pass @property @abstractmethod def region(self): ''' The region that indexes into data e.g. slice or list of indices ''' pass ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/data_utils.py0000644000655200065520000007560200000000000020250 0ustar00circlecicircleci00000000000000from abc import ABCMeta, abstractmethod from collections.abc import Iterable import numpy as np from warnings import warn import copy from .utils import docval, getargs, popargs, docval_macro, get_data_shape @docval_macro('array_data') class AbstractDataChunkIterator(metaclass=ABCMeta): """ Abstract iterator class used to iterate over DataChunks. Derived classes must ensure that all abstract methods and abstract properties are implemented, in particular, dtype, maxshape, __iter__, ___next__, recommended_chunk_shape, and recommended_data_shape. """ @abstractmethod def __iter__(self): """Return the iterator object""" raise NotImplementedError("__iter__ not implemented for derived class") @abstractmethod def __next__(self): r""" Return the next data chunk or raise a StopIteration exception if all chunks have been retrieved. HINT: numpy.s\_ provides a convenient way to generate index tuples using standard array slicing. This is often useful to define the DataChunk.selection of the current chunk :returns: DataChunk object with the data and selection of the current chunk :rtype: DataChunk """ raise NotImplementedError("__next__ not implemented for derived class") @abstractmethod def recommended_chunk_shape(self): """ Recommend the chunk shape for the data array. :return: NumPy-style shape tuple describing the recommended shape for the chunks of the target array or None. This may or may not be the same as the shape of the chunks returned in the iteration process. """ raise NotImplementedError("recommended_chunk_shape not implemented for derived class") @abstractmethod def recommended_data_shape(self): """ Recommend the initial shape for the data array. This is useful in particular to avoid repeated resized of the target array when reading from this data iterator. This should typically be either the final size of the array or the known minimal shape of the array. :return: NumPy-style shape tuple indicating the recommended initial shape for the target array. This may or may not be the final full shape of the array, i.e., the array is allowed to grow. This should not be None. """ raise NotImplementedError("recommended_data_shape not implemented for derived class") @property @abstractmethod def dtype(self): """ Define the data type of the array :return: NumPy style dtype or otherwise compliant dtype string """ raise NotImplementedError("dtype not implemented for derived class") @property @abstractmethod def maxshape(self): """ Property describing the maximum shape of the data array that is being iterated over :return: NumPy-style shape tuple indicating the maxiumum dimensions up to which the dataset may be resized. Axes with None are unlimited. """ raise NotImplementedError("maxshape not implemented for derived class") class DataChunkIterator(AbstractDataChunkIterator): """ Custom iterator class used to iterate over chunks of data. This default implementation of AbstractDataChunkIterator accepts any iterable and assumes that we iterate over a single dimension of the data array (default: the first dimension). DataChunkIterator supports buffered read, i.e., multiple values from the input iterator can be combined to a single chunk. This is useful for buffered I/O operations, e.g., to improve performance by accumulating data in memory and writing larger blocks at once. """ __docval_init = ( {'name': 'data', 'type': None, 'doc': 'The data object used for iteration', 'default': None}, {'name': 'maxshape', 'type': tuple, 'doc': 'The maximum shape of the full data array. Use None to indicate unlimited dimensions', 'default': None}, {'name': 'dtype', 'type': np.dtype, 'doc': 'The Numpy data type for the array', 'default': None}, {'name': 'buffer_size', 'type': int, 'doc': 'Number of values to be buffered in a chunk', 'default': 1}, {'name': 'iter_axis', 'type': int, 'doc': 'The dimension to iterate over', 'default': 0} ) @docval(*__docval_init) def __init__(self, **kwargs): """Initialize the DataChunkIterator. If 'data' is an iterator and 'dtype' is not specified, then next is called on the iterator in order to determine the dtype of the data. """ # Get the user parameters self.data, self.__maxshape, self.__dtype, self.buffer_size, self.iter_axis = getargs('data', 'maxshape', 'dtype', 'buffer_size', 'iter_axis', kwargs) self.chunk_index = 0 # Create an iterator for the data if possible if isinstance(self.data, Iterable): if self.iter_axis != 0 and isinstance(self.data, (list, tuple)): warn('Iterating over an axis other than the first dimension of list or tuple data ' 'involves converting the data object to a numpy ndarray, which may incur a computational ' 'cost.') self.data = np.asarray(self.data) if isinstance(self.data, np.ndarray): # iterate over the given axis by adding a new view on data (iter only works on the first dim) self.__data_iter = iter(np.moveaxis(self.data, self.iter_axis, 0)) else: self.__data_iter = iter(self.data) else: self.__data_iter = None self.__next_chunk = DataChunk(None, None) self.__next_chunk_start = 0 self.__first_chunk_shape = None # Determine the shape of the data if possible if self.__maxshape is None: # If the self.data object identifies its shape, then use it if hasattr(self.data, "shape"): self.__maxshape = self.data.shape # Avoid the special case of scalar values by making them into a 1D numpy array if len(self.__maxshape) == 0: self.data = np.asarray([self.data, ]) self.__maxshape = self.data.shape self.__data_iter = iter(self.data) # Try to get an accurate idea of __maxshape for other Python data structures if possible. # Don't just callget_shape for a generator as that would potentially trigger loading of all the data elif isinstance(self.data, list) or isinstance(self.data, tuple): self.__maxshape = get_data_shape(self.data, strict_no_data_load=True) # If we have a data iterator and do not know the dtype, then read the first chunk if self.__data_iter is not None and self.__dtype is None: self._read_next_chunk() # Determine the type of the data if possible if self.__next_chunk.data is not None: self.__dtype = self.__next_chunk.data.dtype self.__first_chunk_shape = get_data_shape(self.__next_chunk.data) if self.__dtype is None: raise Exception('Data type could not be determined. Please specify dtype in DataChunkIterator init.') @classmethod @docval(*__docval_init) def from_iterable(cls, **kwargs): return cls(**kwargs) def __iter__(self): """Return the iterator object""" return self def _read_next_chunk(self): """Read a single chunk from self.__data_iter and store the results in self.__next_chunk :returns: self.__next_chunk, i.e., the DataChunk object describing the next chunk """ from h5py import Dataset as H5Dataset if isinstance(self.data, H5Dataset): start_index = self.chunk_index * self.buffer_size stop_index = start_index + self.buffer_size iter_data_bounds = self.data.shape[self.iter_axis] if start_index >= iter_data_bounds: self.__next_chunk = DataChunk(None, None) else: if stop_index > iter_data_bounds: stop_index = iter_data_bounds selection = [slice(None)] * len(self.maxshape) selection[self.iter_axis] = slice(start_index, stop_index) selection = tuple(selection) self.__next_chunk.data = self.data[selection] self.__next_chunk.selection = selection elif self.__data_iter is not None: # the pieces in the buffer - first dimension consists of individual calls to next iter_pieces = [] # offset of where data begins - shift the selection of where to place this chunk by this much curr_chunk_offset = 0 read_next_empty = False while len(iter_pieces) < self.buffer_size: try: dat = next(self.__data_iter) if dat is None and len(iter_pieces) == 0: # Skip forward in our chunk until we find data curr_chunk_offset += 1 elif dat is None and len(iter_pieces) > 0: # Stop iteration if we hit empty data while constructing our block # Buffer may not be full. read_next_empty = True break else: # Add pieces of data to our buffer iter_pieces.append(np.asarray(dat)) except StopIteration: break if len(iter_pieces) == 0: self.__next_chunk = DataChunk(None, None) # signal end of iteration else: # concatenate all the pieces into the chunk along the iteration axis piece_shape = list(get_data_shape(iter_pieces[0])) piece_shape.insert(self.iter_axis, 1) # insert the missing axis next_chunk_shape = piece_shape.copy() next_chunk_shape[self.iter_axis] *= len(iter_pieces) next_chunk_size = next_chunk_shape[self.iter_axis] # use the piece dtype because the actual dtype may not have been determined yet # NOTE: this could be problematic if a generator returns e.g. floats first and ints later self.__next_chunk.data = np.empty(next_chunk_shape, dtype=iter_pieces[0].dtype) self.__next_chunk.data = np.stack(iter_pieces, axis=self.iter_axis) selection = [slice(None)] * len(self.maxshape) selection[self.iter_axis] = slice(self.__next_chunk_start + curr_chunk_offset, self.__next_chunk_start + curr_chunk_offset + next_chunk_size) self.__next_chunk.selection = tuple(selection) # next chunk should start at self.__next_chunk.selection[self.iter_axis].stop # but if this chunk stopped because of reading empty data, then this should be adjusted by 1 self.__next_chunk_start = self.__next_chunk.selection[self.iter_axis].stop if read_next_empty: self.__next_chunk_start += 1 else: self.__next_chunk = DataChunk(None, None) self.chunk_index += 1 return self.__next_chunk def __next__(self): r"""Return the next data chunk or raise a StopIteration exception if all chunks have been retrieved. HINT: numpy.s\_ provides a convenient way to generate index tuples using standard array slicing. This is often useful to define the DataChunk.selection of the current chunk :returns: DataChunk object with the data and selection of the current chunk :rtype: DataChunk """ # If we have not already read the next chunk, then read it now if self.__next_chunk.data is None: self._read_next_chunk() # If we do not have any next chunk if self.__next_chunk.data is None: raise StopIteration # If this is the first time we see a chunk then remember the size of the first chunk if self.__first_chunk_shape is None: self.__first_chunk_shape = self.__next_chunk.data.shape # Keep the next chunk we need to return curr_chunk = DataChunk(self.__next_chunk.data, self.__next_chunk.selection) # Remove the data for the next chunk from our list since we are returning it here. # This is to allow the GarbageCollector to remmove the data when it goes out of scope and avoid # having 2 full chunks in memory if not necessary self.__next_chunk.data = None # Return the current next chunk return curr_chunk next = __next__ @docval(returns='Tuple with the recommended chunk shape or None if no particular shape is recommended.') def recommended_chunk_shape(self): """Recommend a chunk shape. To optimize iterative write the chunk should be aligned with the common shape of chunks returned by __next__ or if those chunks are too large, then a well-aligned subset of those chunks. This may also be any other value in case one wants to recommend chunk shapes to optimize read rather than write. The default implementation returns None, indicating no preferential chunking option.""" return None @docval(returns='Recommended initial shape for the full data. This should be the shape of the full dataset' + 'if known beforehand or alternatively the minimum shape of the dataset. Return None if no ' + 'recommendation is available') def recommended_data_shape(self): """Recommend an initial shape of the data. This is useful when progressively writing data and we want to recommend an initial size for the dataset""" if self.maxshape is not None: if np.all([i is not None for i in self.maxshape]): return self.maxshape return self.__first_chunk_shape @property def maxshape(self): """ Get a shape tuple describing the maximum shape of the array described by this DataChunkIterator. If an iterator is provided and no data has been read yet, then the first chunk will be read (i.e., next will be called on the iterator) in order to determine the maxshape. :return: Shape tuple. None is used for dimenwions where the maximum shape is not known or unlimited. """ if self.__maxshape is None: # If no data has been read from the iterator yet, read the first chunk and use it to determine the maxshape if self.__data_iter is not None and self.__next_chunk.data is None: self._read_next_chunk() # Determine maxshape from self.__next_chunk if self.__next_chunk.data is None: return None data_shape = get_data_shape(self.__next_chunk.data) self.__maxshape = list(data_shape) try: # Size of self.__next_chunk.data along self.iter_axis is not accurate for maxshape because it is just a # chunk. So try to set maxshape along the dimension self.iter_axis based on the shape of self.data if # possible. Otherwise, use None to represent an unlimited size if hasattr(self.data, '__len__') and self.iter_axis == 0: # special case of 1-D array self.__maxshape[0] = len(self.data) else: self.__maxshape[self.iter_axis] = self.data.shape[self.iter_axis] except AttributeError: # from self.data.shape self.__maxshape[self.iter_axis] = None self.__maxshape = tuple(self.__maxshape) return self.__maxshape @property def dtype(self): """ Get the value data type :return: np.dtype object describing the datatype """ return self.__dtype class DataChunk: """ Class used to describe a data chunk. Used in DataChunkIterator. """ @docval({'name': 'data', 'type': np.ndarray, 'doc': 'Numpy array with the data value(s) of the chunk', 'default': None}, {'name': 'selection', 'type': None, 'doc': 'Numpy index tuple describing the location of the chunk', 'default': None}) def __init__(self, **kwargs): self.data, self.selection = getargs('data', 'selection', kwargs) def __len__(self): """Get the number of values in the data chunk""" if self.data is not None: return len(self.data) else: return 0 def __getattr__(self, attr): """Delegate retrival of attributes to the data in self.data""" return getattr(self.data, attr) def __copy__(self): newobj = DataChunk(data=self.data, selection=self.selection) return newobj def __deepcopy__(self, memo): result = DataChunk(data=copy.deepcopy(self.data), selection=copy.deepcopy(self.selection)) memo[id(self)] = result return result def astype(self, dtype): """Get a new DataChunk with the self.data converted to the given type""" return DataChunk(data=self.data.astype(dtype), selection=self.selection) @property def dtype(self): """ Data type of the values in the chunk :returns: np.dtype of the values in the DataChunk """ return self.data.dtype def assertEqualShape(data1, data2, axes1=None, axes2=None, name1=None, name2=None, ignore_undetermined=True): """ Ensure that the shape of data1 and data2 match along the given dimensions :param data1: The first input array :type data1: List, Tuple, np.ndarray, DataChunkIterator etc. :param data2: The second input array :type data2: List, Tuple, np.ndarray, DataChunkIterator etc. :param name1: Optional string with the name of data1 :param name2: Optional string with the name of data2 :param axes1: The dimensions of data1 that should be matched to the dimensions of data2. Set to None to compare all axes in order. :type axes1: int, Tuple of ints, List of ints, or None :param axes2: The dimensions of data2 that should be matched to the dimensions of data1. Must have the same length as axes1. Set to None to compare all axes in order. :type axes1: int, Tuple of ints, List of ints, or None :param ignore_undetermined: Boolean indicating whether non-matching unlimited dimensions should be ignored, i.e., if two dimension don't match because we can't determine the shape of either one, then should we ignore that case or treat it as no match :return: Bool indicating whether the check passed and a string with a message about the matching process """ # Create the base return object response = ShapeValidatorResult() # Determine the shape of the datasets response.shape1 = get_data_shape(data1) response.shape2 = get_data_shape(data2) # Determine the number of dimensions of the datasets num_dims_1 = len(response.shape1) if response.shape1 is not None else None num_dims_2 = len(response.shape2) if response.shape2 is not None else None # Determine the string names of the datasets n1 = name1 if name1 is not None else ("data1 at " + str(hex(id(data1)))) n2 = name2 if name2 is not None else ("data2 at " + str(hex(id(data2)))) # Determine the axes we should compare response.axes1 = list(range(num_dims_1)) if axes1 is None else ([axes1] if isinstance(axes1, int) else axes1) response.axes2 = list(range(num_dims_2)) if axes2 is None else ([axes2] if isinstance(axes2, int) else axes2) # Validate the array shape # 1) Check the number of dimensions of the arrays if (response.axes1 is None and response.axes2 is None) and num_dims_1 != num_dims_2: response.result = False response.error = 'NUM_DIMS_ERROR' response.message = response.SHAPE_ERROR[response.error] response.message += " %s is %sD and %s is %sD" % (n1, num_dims_1, n2, num_dims_2) # 2) Check that we have the same number of dimensions to compare on both arrays elif len(response.axes1) != len(response.axes2): response.result = False response.error = 'NUM_AXES_ERROR' response.message = response.SHAPE_ERROR[response.error] response.message += " Cannot compare axes %s with %s" % (str(response.axes1), str(response.axes2)) # 3) Check that the datasets have sufficient numner of dimensions elif np.max(response.axes1) >= num_dims_1 or np.max(response.axes2) >= num_dims_2: response.result = False response.error = 'AXIS_OUT_OF_BOUNDS' response.message = response.SHAPE_ERROR[response.error] if np.max(response.axes1) >= num_dims_1: response.message += "Insufficient number of dimensions for %s -- Expected %i found %i" % \ (n1, np.max(response.axes1)+1, num_dims_1) elif np.max(response.axes2) >= num_dims_2: response.message += "Insufficient number of dimensions for %s -- Expected %i found %i" % \ (n2, np.max(response.axes2)+1, num_dims_2) # 4) Compare the length of the dimensions we should validate else: unmatched = [] ignored = [] for ax in zip(response.axes1, response.axes2): if response.shape1[ax[0]] != response.shape2[ax[1]]: if ignore_undetermined and (response.shape1[ax[0]] is None or response.shape2[ax[1]] is None): ignored.append(ax) else: unmatched.append(ax) response.unmatched = unmatched response.ignored = ignored # Check if everything checked out if len(response.unmatched) == 0: response.result = True response.error = None response.message = response.SHAPE_ERROR[response.error] if len(response.ignored) > 0: response.message += " Ignored undetermined axes %s" % str(response.ignored) else: response.result = False response.error = 'AXIS_LEN_ERROR' response.message = response.SHAPE_ERROR[response.error] response.message += "Axes %s with size %s of %s did not match dimensions %s with sizes %s of %s." % \ (str([un[0] for un in response.unmatched]), str([response.shape1[un[0]] for un in response.unmatched]), n1, str([un[1] for un in response.unmatched]), str([response.shape2[un[1]] for un in response.unmatched]), n2) if len(response.ignored) > 0: response.message += " Ignored undetermined axes %s" % str(response.ignored) return response class ShapeValidatorResult: """Class for storing results from validating the shape of multi-dimensional arrays. This class is used to store results generated by ShapeValidator :ivar result: Boolean indicating whether results matched or not :type result: bool :ivar message: Message indicating the result of the matching procedure :type messaage: str, None """ SHAPE_ERROR = {None: 'All required axes matched', 'NUM_DIMS_ERROR': 'Unequal number of dimensions.', 'NUM_AXES_ERROR': "Unequal number of axes for comparison.", 'AXIS_OUT_OF_BOUNDS': "Axis index for comparison out of bounds.", 'AXIS_LEN_ERROR': "Unequal length of axes."} """ Dict where the Keys are the type of errors that may have occurred during shape comparison and the values are strings with default error messages for the type. """ @docval({'name': 'result', 'type': bool, 'doc': 'Result of the shape validation', 'default': False}, {'name': 'message', 'type': str, 'doc': 'Message describing the result of the shape validation', 'default': None}, {'name': 'ignored', 'type': tuple, 'doc': 'Axes that have been ignored in the validaton process', 'default': tuple(), 'shape': (None, )}, {'name': 'unmatched', 'type': tuple, 'doc': 'List of axes that did not match during shape validation', 'default': tuple(), 'shape': (None, )}, {'name': 'error', 'type': str, 'doc': 'Error that may have occurred. One of ERROR_TYPE', 'default': None}, {'name': 'shape1', 'type': tuple, 'doc': 'Shape of the first array for comparison', 'default': tuple(), 'shape': (None, )}, {'name': 'shape2', 'type': tuple, 'doc': 'Shape of the second array for comparison', 'default': tuple(), 'shape': (None, )}, {'name': 'axes1', 'type': tuple, 'doc': 'Axes for the first array that should match', 'default': tuple(), 'shape': (None, )}, {'name': 'axes2', 'type': tuple, 'doc': 'Axes for the second array that should match', 'default': tuple(), 'shape': (None, )}, ) def __init__(self, **kwargs): self.result, self.message, self.ignored, self.unmatched, \ self.error, self.shape1, self.shape2, self.axes1, self.axes2 = getargs( 'result', 'message', 'ignored', 'unmatched', 'error', 'shape1', 'shape2', 'axes1', 'axes2', kwargs) def __setattr__(self, key, value): """ Overwrite to ensure that, e.g., error_message is not set to an illegal value. """ if key == 'error': if value not in self.SHAPE_ERROR.keys(): raise ValueError("Illegal error type. Error must be one of ShapeValidatorResult.SHAPE_ERROR: %s" % str(self.SHAPE_ERROR)) else: super().__setattr__(key, value) elif key in ['shape1', 'shape2', 'axes1', 'axes2', 'ignored', 'unmatched']: # Make sure we sore tuples super().__setattr__(key, tuple(value)) else: super().__setattr__(key, value) def __getattr__(self, item): """ Overwrite to allow dynamic retrival of the default message """ if item == 'default_message': return self.SHAPE_ERROR[self.error] return self.__getattribute__(item) @docval_macro('data') class DataIO: """ Base class for wrapping data arrays for I/O. Derived classes of DataIO are typically used to pass dataset-specific I/O parameters to the particular HDMFIO backend. """ @docval({'name': 'data', 'type': 'array_data', 'doc': 'the data to be written', 'default': None}) def __init__(self, **kwargs): data = popargs('data', kwargs) self.__data = data def get_io_params(self): """ Returns a dict with the I/O parameters specified in this DataIO. """ return dict() @property def data(self): """Get the wrapped data object""" return self.__data @data.setter def data(self, val): """Set the wrapped data object""" if self.__data is not None: raise ValueError("cannot overwrite 'data' on DataIO") self.__data = val def __copy__(self): """ Define a custom copy method for shallow copy.. This is needed due to delegation of __getattr__ to the data to ensure proper copy. :return: Shallow copy of self, ie., a new instance of DataIO wrapping the same self.data object """ newobj = DataIO(data=self.data) return newobj def __deepcopy__(self, memo): """ Define a custom copy method for deep copy. This is needed due to delegation of __getattr__ to the data to ensure proper copy. :param memo: :return: Deep copy of self, i.e., a new instance of DataIO wrapping a deepcopy of the self.data object. """ result = DataIO(data=copy.deepcopy(self.__data)) memo[id(self)] = result return result def __len__(self): """Number of values in self.data""" if not self.valid: raise InvalidDataIOError("Cannot get length of data. Data is not valid.") return len(self.data) def __bool__(self): return self.valid and len(self) > 0 def __getattr__(self, attr): """Delegate attribute lookup to data object""" if attr == '__array_struct__' and not self.valid: # np.array() checks __array__ or __array_struct__ attribute dep. on numpy version raise InvalidDataIOError("Cannot convert data to array. Data is not valid.") if not self.valid: raise InvalidDataIOError("Cannot get attribute '%s' of data. Data is not valid." % attr) return getattr(self.data, attr) def __getitem__(self, item): """Delegate slicing to the data object""" if not self.valid: raise InvalidDataIOError("Cannot get item from data. Data is not valid.") return self.data[item] def __array__(self): """ Support conversion of DataIO.data to a numpy array. This function is provided to improve transparent interoperability of DataIO with numpy. :return: An array instance of self.data """ if not self.valid: raise InvalidDataIOError("Cannot convert data to array. Data is not valid.") if hasattr(self.data, '__array__'): return self.data.__array__() elif isinstance(self.data, DataChunkIterator): raise NotImplementedError("Conversion of DataChunkIterator to array not supported") else: # NOTE this may result in a copy of the array return np.asarray(self.data) def __next__(self): """Delegate iteration interface to data object""" if not self.valid: raise InvalidDataIOError("Cannot iterate on data. Data is not valid.") return self.data.__next__() def __iter__(self): """Delegate iteration interface to the data object""" if not self.valid: raise InvalidDataIOError("Cannot iterate on data. Data is not valid.") return self.data.__iter__() @property def valid(self): """bool indicating if the data object is valid""" return self.data is not None class InvalidDataIOError(Exception): pass ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/monitor.py0000644000655200065520000000434000000000000017575 0ustar00circlecicircleci00000000000000from abc import ABCMeta, abstractmethod from .utils import docval, getargs, call_docval_func from .data_utils import AbstractDataChunkIterator, DataChunkIterator, DataChunk class NotYetExhausted(Exception): pass class DataChunkProcessor(AbstractDataChunkIterator, metaclass=ABCMeta): @docval({'name': 'data', 'type': DataChunkIterator, 'doc': 'the DataChunkIterator to analyze'}) def __init__(self, **kwargs): """Initialize the DataChunkIterator""" # Get the user parameters self.__dci = getargs('data', kwargs) def __next__(self): try: dc = self.__dci.__next__() except StopIteration as e: self.__done = True raise e self.process_data_chunk(dc) return dc def __iter__(self): return iter(self.__dci) def recommended_chunk_shape(self): return self.__dci.recommended_chunk_shape() def recommended_data_shape(self): return self.__dci.recommended_data_shape() def get_final_result(self, **kwargs): ''' Return the result of processing data fed by this DataChunkIterator ''' if not self.__done: raise NotYetExhausted() return self.compute_final_result() @abstractmethod @docval({'name': 'data_chunk', 'type': DataChunk, 'doc': 'a chunk to process'}) def process_data_chunk(self, **kwargs): ''' This method should take in a DataChunk, and process it. ''' pass @abstractmethod @docval(returns='the result of processing this stream') def compute_final_result(self, **kwargs): ''' Return the result of processing this stream Should raise NotYetExhaused exception ''' pass class NumSampleCounter(DataChunkProcessor): def __init__(self, **kwargs): call_docval_func(super().__init__, kwargs) self.__sample_count = 0 @docval({'name': 'data_chunk', 'type': DataChunk, 'doc': 'a chunk to process'}) def process_data_chunk(self, **kwargs): dc = getargs('data_chunk', kwargs) self.__sample_count += len(dc) @docval(returns='the result of processing this stream') def compute_final_result(self, **kwargs): return self.__sample_count ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/query.py0000644000655200065520000001413400000000000017255 0ustar00circlecicircleci00000000000000from abc import ABCMeta, abstractmethod import numpy as np from .utils import ExtenderMeta, docval_macro, docval, getargs from .array import Array class Query(metaclass=ExtenderMeta): __operations__ = ( '__lt__', '__gt__', '__le__', '__ge__', '__eq__', '__ne__', ) @classmethod def __build_operation(cls, op): def __func(self, arg): return cls(self, op, arg) @ExtenderMeta.pre_init def __make_operators(cls, name, bases, classdict): if not isinstance(cls.__operations__, tuple): raise TypeError("'__operations__' must be of type tuple") # add any new operations if len(bases) and 'Query' in globals() and issubclass(bases[-1], Query) \ and bases[-1].__operations__ is not cls.__operations__: new_operations = list(cls.__operations__) new_operations[0:0] = bases[-1].__operations__ cls.__operations__ = tuple(new_operations) for op in cls.__operations__: if not hasattr(cls, op): setattr(cls, op, cls.__build_operation(op)) def __init__(self, obj, op, arg): self.obj = obj self.op = op self.arg = arg self.collapsed = None self.expanded = None @docval({'name': 'expand', 'type': bool, 'help': 'whether or not to expand result', 'default': True}) def evaluate(self, **kwargs): expand = getargs('expand', kwargs) if expand: if self.expanded is None: self.expanded = self.__evalhelper() return self.expanded else: if self.collapsed is None: self.collapsed = self.__collapse(self.__evalhelper()) return self.collapsed def __evalhelper(self): obj = self.obj arg = self.arg if isinstance(obj, Query): obj = obj.evaluate() elif isinstance(obj, HDMFDataset): obj = obj.dataset if isinstance(arg, Query): arg = self.arg.evaluate() return getattr(obj, self.op)(self.arg) def __collapse(self, result): if isinstance(result, slice): return (result.start, result.stop) elif isinstance(result, list): ret = list() for idx in result: if isinstance(idx, slice) and (idx.step is None or idx.step == 1): ret.append((idx.start, idx.stop)) else: ret.append(idx) return ret else: return result def __and__(self, other): return NotImplemented def __or__(self, other): return NotImplemented def __xor__(self, other): return NotImplemented def __contains__(self, other): return NotImplemented @docval_macro('array_data') class HDMFDataset(metaclass=ExtenderMeta): __operations__ = ( '__lt__', '__gt__', '__le__', '__ge__', '__eq__', '__ne__', ) @classmethod def __build_operation(cls, op): def __func(self, arg): return Query(self, op, arg) setattr(__func, '__name__', op) return __func @ExtenderMeta.pre_init def __make_operators(cls, name, bases, classdict): if not isinstance(cls.__operations__, tuple): raise TypeError("'__operations__' must be of type tuple") # add any new operations if len(bases) and 'Query' in globals() and issubclass(bases[-1], Query) \ and bases[-1].__operations__ is not cls.__operations__: new_operations = list(cls.__operations__) new_operations[0:0] = bases[-1].__operations__ cls.__operations__ = tuple(new_operations) for op in cls.__operations__: setattr(cls, op, cls.__build_operation(op)) def __evaluate_key(self, key): if isinstance(key, (tuple, list, np.ndarray)): return tuple(map(self.__evaluate_key, key)) else: if isinstance(key, Query): return key.evaluate() return key def __getitem__(self, key): idx = self.__evaluate_key(key) return self.dataset[idx] @docval({'name': 'dataset', 'type': ('array_data', Array), 'doc': 'the HDF5 file lazily evaluate'}) def __init__(self, **kwargs): super().__init__() self.__dataset = getargs('dataset', kwargs) @property def dataset(self): return self.__dataset @property def dtype(self): return self.__dataset.dtype def __len__(self): return len(self.__dataset) def __iter__(self): return iter(self.dataset) def __next__(self): return next(self.dataset) def next(self): return self.dataset.next() class ReferenceResolver(metaclass=ABCMeta): """ A base class for classes that resolve references """ @classmethod @abstractmethod def get_inverse_class(cls): """ Return the class the represents the ReferenceResolver that resolves refernces to the opposite type. BuilderResolver.get_inverse_class should return a class that subclasses ContainerResolver. ContainerResolver.get_inverse_class should return a class that subclasses BuilderResolver. """ pass @abstractmethod def invert(self): """ Return an object that defers reference resolution but in the opposite direction. """ pass class BuilderResolver(ReferenceResolver): """ A reference resolver that resolves references to Builders Subclasses should implement the invert method and the get_inverse_class classmethod BuilderResolver.get_inverse_class should return a class that subclasses ContainerResolver. """ pass class ContainerResolver(ReferenceResolver): """ A reference resolver that resolves references to Containers Subclasses should implement the invert method and the get_inverse_class classmethod ContainerResolver.get_inverse_class should return a class that subclasses BuilderResolver. """ pass ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/region.py0000644000655200065520000000522500000000000017374 0ustar00circlecicircleci00000000000000from abc import ABCMeta, abstractmethod from operator import itemgetter from .container import Data, DataRegion from .utils import docval, getargs class RegionSlicer(DataRegion, metaclass=ABCMeta): ''' A abstract base class to control getting using a region Subclasses must implement `__getitem__` and `__len__` ''' @docval({'name': 'target', 'type': None, 'doc': 'the target to slice'}, {'name': 'slice', 'type': None, 'doc': 'the region to slice'}) def __init__(self, **kwargs): self.__target = getargs('target', kwargs) self.__slice = getargs('slice', kwargs) @property def data(self): """The target data. Same as self.target""" return self.target @property def region(self): """The selected region. Same as self.slice""" return self.slice @property def target(self): """The target data""" return self.__target @property def slice(self): """The selected slice""" return self.__slice @property @abstractmethod def __getitem__(self, idx): """Must be implemented by subclasses""" pass @property @abstractmethod def __len__(self): """Must be implemented by subclasses""" pass class ListSlicer(RegionSlicer): """Implementation of RegionSlicer for slicing Lists and Data""" @docval({'name': 'dataset', 'type': (list, tuple, Data), 'doc': 'the dataset to slice'}, {'name': 'region', 'type': (list, tuple, slice), 'doc': 'the region reference to use to slice'}) def __init__(self, **kwargs): self.__dataset, self.__region = getargs('dataset', 'region', kwargs) super().__init__(self.__dataset, self.__region) if isinstance(self.__region, slice): self.__getter = itemgetter(self.__region) self.__len = len(range(*self.__region.indices(len(self.__dataset)))) else: self.__getter = itemgetter(*self.__region) self.__len = len(self.__region) def __read_region(self): """ Internal helper function used to define self._read """ if not hasattr(self, '_read'): self._read = self.__getter(self.__dataset) del self.__getter def __getitem__(self, idx): """ Get data values from selected data """ self.__read_region() getter = None if isinstance(idx, (list, tuple)): getter = itemgetter(*idx) else: getter = itemgetter(idx) return getter(self._read) def __len__(self): """Number of values in the slice/region""" return self.__len ././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1579654747.9201882 hdmf-1.5.4/src/hdmf/spec/0000755000655200065520000000000000000000000016465 5ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/spec/__init__.py0000644000655200065520000000101500000000000020573 0ustar00circlecicircleci00000000000000from .spec import NAME_WILDCARD from .spec import Spec from .spec import AttributeSpec from .spec import DtypeSpec from .spec import DtypeHelper from .spec import RefSpec from .spec import DatasetSpec from .spec import LinkSpec from .spec import GroupSpec from .catalog import SpecCatalog from .namespace import SpecNamespace from .namespace import NamespaceCatalog from .namespace import SpecReader from .write import NamespaceBuilder from .write import SpecWriter from .write import export_spec from ..utils import docval ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/spec/catalog.py0000644000655200065520000002173600000000000020462 0ustar00circlecicircleci00000000000000from collections import OrderedDict import copy from .spec import BaseStorageSpec, GroupSpec from ..utils import docval, getargs class SpecCatalog: def __init__(self): ''' Create a new catalog for storing specifications ** Private Instance Variables ** :ivar __specs: Dict with the specification of each registered type :ivar __parent_types: Dict with parent types for each registered type :ivar __spec_source_files: Dict with the path to the source files (if available) for each registered type :ivar __hierarchy: Dict describing the hierarchy for each registered type. NOTE: Always use SpecCatalog.get_hierarchy(...) to retrieve the hierarchy as this dictionary is used like a cache, i.e., to avoid repeated calcuation of the hierarchy but the contents are computed on first request by SpecCatalog.get_hierarchy(...) ''' self.__specs = OrderedDict() self.__parent_types = dict() self.__hierarchy = dict() self.__spec_source_files = dict() @docval({'name': 'spec', 'type': BaseStorageSpec, 'doc': 'a Spec object'}, {'name': 'source_file', 'type': str, 'doc': 'path to the source file from which the spec was loaded', 'default': None}) def register_spec(self, **kwargs): ''' Associate a specified object type with an HDF5 specification ''' spec, source_file = getargs('spec', 'source_file', kwargs) ndt = spec.data_type_inc ndt_def = spec.data_type_def if ndt_def is None: raise ValueError('cannot register spec that has no data_type_def') if ndt_def != ndt: self.__parent_types[ndt_def] = ndt type_name = ndt_def if ndt_def is not None else ndt if type_name in self.__specs: raise ValueError("'%s' - cannot overwrite existing specification" % type_name) self.__specs[type_name] = spec self.__spec_source_files[type_name] = source_file @docval({'name': 'data_type', 'type': str, 'doc': 'the data_type to get the Spec for'}, returns="the specification for writing the given object type to HDF5 ", rtype='Spec') def get_spec(self, **kwargs): ''' Get the Spec object for the given type ''' data_type = getargs('data_type', kwargs) return self.__specs.get(data_type, None) @docval(rtype=tuple) def get_registered_types(self, **kwargs): ''' Return all registered specifications ''' # kwargs is not used here but is used by docval return tuple(self.__specs.keys()) @docval({'name': 'data_type', 'type': str, 'doc': 'the data_type of the spec to get the source file for'}, returns="the path to source specification file from which the spec was originally loaded or None ", rtype='str') def get_spec_source_file(self, **kwargs): ''' Return the path to the source file from which the spec for the given type was loaded from. None is returned if no file path is available for the spec. Note: The spec in the file may not be identical to the object in case the spec is modified after load. ''' data_type = getargs('data_type', kwargs) return self.__spec_source_files.get(data_type, None) @docval({'name': 'spec', 'type': BaseStorageSpec, 'doc': 'the Spec object to register'}, {'name': 'source_file', 'type': str, 'doc': 'path to the source file from which the spec was loaded', 'default': None}, rtype=tuple, returns='the types that were registered with this spec') def auto_register(self, **kwargs): ''' Register this specification and all sub-specification using data_type as object type name ''' spec, source_file = getargs('spec', 'source_file', kwargs) ndt = spec.data_type_def ret = list() if ndt is not None: self.register_spec(spec, source_file) ret.append(ndt) if isinstance(spec, GroupSpec): for dataset_spec in spec.datasets: dset_ndt = dataset_spec.data_type_def if dset_ndt is not None and not spec.is_inherited_type(dataset_spec): ret.append(dset_ndt) self.register_spec(dataset_spec, source_file) for group_spec in spec.groups: ret.extend(self.auto_register(group_spec, source_file)) return tuple(ret) @docval({'name': 'data_type', 'type': (str, type), 'doc': 'the data_type to get the hierarchy of'}, returns="Tuple of strings with the names of the types the given data_type inherits from.", rtype=tuple) def get_hierarchy(self, **kwargs): """ For a given type get the type inheritance hierarchy for that type. E.g., if we have a type MyContainer that inherits from BaseContainer then the result will be a tuple with the strings ('MyContainer', 'BaseContainer') """ data_type = getargs('data_type', kwargs) if isinstance(data_type, type): data_type = data_type.__name__ ret = self.__hierarchy.get(data_type) if ret is None: hierarchy = list() parent = data_type while parent is not None: hierarchy.append(parent) parent = self.__parent_types.get(parent) # store the computed hierarchy for data_type and all types in between it and # the top of the hierarchy tmp_hier = tuple(hierarchy) ret = tmp_hier while len(tmp_hier) > 0: self.__hierarchy[tmp_hier[0]] = tmp_hier tmp_hier = tmp_hier[1:] return tuple(ret) @docval(returns="Hierarchically nested OrderedDict with the hierarchy of all the types", rtype=OrderedDict) def get_full_hierarchy(self): """ Get the complete hierarchy of all types. The function attempts to sort types by name using standard Python sorted. """ # Get the list of all types registered_types = self.get_registered_types() type_hierarchy = OrderedDict() # Internal helper function to recurisvely construct the hierarchy of types def get_type_hierarchy(data_type, spec_catalog): dtype_hier = OrderedDict() for dtype in sorted(self.get_subtypes(data_type=data_type, recursive=False)): dtype_hier[dtype] = get_type_hierarchy(dtype, spec_catalog) return dtype_hier # Compute the type hierarchy for rt in sorted(registered_types): rt_spec = self.get_spec(rt) if isinstance(rt_spec, BaseStorageSpec): # Only BaseStorageSpec have data_type_inc/def keys if rt_spec.get(rt_spec.inc_key(), None) is None: type_hierarchy[rt] = get_type_hierarchy(rt, self) return type_hierarchy @docval({'name': 'data_type', 'type': (str, type), 'doc': 'the data_type to get the subtypes for'}, {'name': 'recursive', 'type': bool, 'doc': 'recursively get all subtypes. Set to False to only get the direct subtypes', 'default': True}, returns="Tuple of strings with the names of all types of the given data_type.", rtype=tuple) def get_subtypes(self, **kwargs): """ For a given data type recursively find all the subtypes that inherit from it. E.g., assume we have the following inheritance hierarchy:: -BaseContainer--+-->AContainer--->ADContainer | +-->BContainer In this case, the subtypes of BaseContainer would be (AContainer, ADContainer, BContainer), the subtypes of AContainer would be (ADContainer), and the subtypes of BContainer would be empty (). """ data_type, recursive = getargs('data_type', 'recursive', kwargs) curr_spec = self.get_spec(data_type) if isinstance(curr_spec, BaseStorageSpec): # Only BaseStorageSpec have data_type_inc/def keys subtypes = [] spec_inc_key = curr_spec.inc_key() spec_def_key = curr_spec.def_key() for rt in self.get_registered_types(): rt_spec = self.get_spec(rt) if rt_spec.get(spec_inc_key, None) == data_type and rt_spec.get(spec_def_key, None) != data_type: subtypes.append(rt) if recursive: subtypes += self.get_subtypes(rt) return tuple(set(subtypes)) # Convert to a set to make sure we don't have any duplicates else: return () def __copy__(self): ret = SpecCatalog() ret.__specs = copy.copy(self.__specs) return ret def __deepcopy__(self, memo): ret = SpecCatalog() ret.__specs = copy.deepcopy(self.__specs, memo) return ret ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/spec/namespace.py0000644000655200065520000004544400000000000021006 0ustar00circlecicircleci00000000000000from collections import OrderedDict from datetime import datetime from copy import copy import ruamel.yaml as yaml import os.path import string from warnings import warn from itertools import chain from abc import ABCMeta, abstractmethod from ..utils import docval, getargs, popargs, get_docval, call_docval_func from .catalog import SpecCatalog from .spec import DatasetSpec, GroupSpec _namespace_args = [ {'name': 'doc', 'type': str, 'doc': 'a description about what this namespace represents'}, {'name': 'name', 'type': str, 'doc': 'the name of this namespace'}, {'name': 'schema', 'type': list, 'doc': 'location of schema specification files or other Namespaces'}, {'name': 'full_name', 'type': str, 'doc': 'extended full name of this namespace', 'default': None}, {'name': 'version', 'type': (str, tuple, list), 'doc': 'Version number of the namespace', 'default': None}, {'name': 'date', 'type': (datetime, str), 'doc': "Date last modified or released. Formatting is %Y-%m-%d %H:%M:%S, e.g, 2017-04-25 17:14:13", 'default': None}, {'name': 'author', 'type': (str, list), 'doc': 'Author or list of authors.', 'default': None}, {'name': 'contact', 'type': (str, list), 'doc': 'List of emails. Ordering should be the same as for author', 'default': None}, {'name': 'catalog', 'type': SpecCatalog, 'doc': 'The SpecCatalog object for this SpecNamespace', 'default': None} ] class SpecNamespace(dict): """ A namespace for specifications """ __types_key = 'data_types' @docval(*_namespace_args) def __init__(self, **kwargs): doc, full_name, name, version, date, author, contact, schema, catalog = \ popargs('doc', 'full_name', 'name', 'version', 'date', 'author', 'contact', 'schema', 'catalog', kwargs) super().__init__() self['doc'] = doc self['schema'] = schema if any(c in string.whitespace for c in name): raise ValueError("'name' must not contain any whitespace") self['name'] = name if full_name is not None: self['full_name'] = full_name if version is None: raise TypeError('SpecNamespace missing arg `version`. Please specify a version for the extension.') self['version'] = version if date is not None: self['date'] = date if author is not None: self['author'] = author if contact is not None: self['contact'] = contact self.__catalog = catalog if catalog is not None else SpecCatalog() @classmethod def types_key(cls): ''' Get the key used for specifying types to include from a file or namespace Override this method to use a different name for 'data_types' ''' return cls.__types_key @property def full_name(self): """String with full name or None""" return self.get('full_name', None) @property def contact(self): """String or list of strings with the contacts or None""" return self.get('contact', None) @property def author(self): """String or list of strings with the authors or None""" return self.get('author', None) @property def version(self): """String, list, or tuple with the version or None """ return self.get('version', None) @property def date(self): """Date last modified or released. :return: datetime object, string, or None""" return self.get('date', None) @property def name(self): """String with short name or None""" return self.get('name', None) @property def doc(self): return self['doc'] @property def schema(self): return self['schema'] def get_source_files(self): """ Get the list of names of the source files included the schema of the namespace """ return [item['source'] for item in self.schema if 'source' in item] @docval({'name': 'sourcefile', 'type': str, 'doc': 'Name of the source file'}, returns='Dict with the source file documentation', rtype=dict) def get_source_description(self, sourcefile): """ Get the description of a source file as described in the namespace. The result is a dict which contains the 'source' and optionally 'title', 'doc' and 'data_types' imported from the source file """ for item in self.schema: if item.get('source', None) == sourcefile: return item @property def catalog(self): """The SpecCatalog containing all the Specs""" return self.__catalog @docval({'name': 'data_type', 'type': (str, type), 'doc': 'the data_type to get the spec for'}) def get_spec(self, **kwargs): """Get the Spec object for the given data type""" data_type = getargs('data_type', kwargs) spec = self.__catalog.get_spec(data_type) if spec is None: raise ValueError("No specification for '%s' in namespace '%s'" % (data_type, self.name)) return spec @docval(returns="the a tuple of the available data types", rtype=tuple) def get_registered_types(self, **kwargs): """Get the available types in this namespace""" return self.__catalog.get_registered_types() @docval({'name': 'data_type', 'type': (str, type), 'doc': 'the data_type to get the hierarchy of'}, returns="a tuple with the type hierarchy", rtype=tuple) def get_hierarchy(self, **kwargs): ''' Get the extension hierarchy for the given data_type in this namespace''' data_type = getargs('data_type', kwargs) return self.__catalog.get_hierarchy(data_type) @classmethod def build_namespace(cls, **spec_dict): kwargs = copy(spec_dict) try: args = [kwargs.pop(x['name']) for x in get_docval(cls.__init__) if 'default' not in x] except KeyError as e: raise KeyError("'%s' not found in %s" % (e.args[0], str(spec_dict))) return cls(*args, **kwargs) class SpecReader(metaclass=ABCMeta): @docval({'name': 'source', 'type': str, 'doc': 'the source from which this reader reads from'}) def __init__(self, **kwargs): self.__source = getargs('source', kwargs) @property def source(self): return self.__source @abstractmethod def read_spec(self): pass @abstractmethod def read_namespace(self): pass class YAMLSpecReader(SpecReader): @docval({'name': 'indir', 'type': str, 'doc': 'the path spec files are relative to', 'default': '.'}) def __init__(self, **kwargs): super_kwargs = {'source': kwargs['indir']} call_docval_func(super().__init__, super_kwargs) def read_namespace(self, namespace_path): namespaces = None with open(namespace_path, 'r') as stream: d = yaml.safe_load(stream) namespaces = d.get('namespaces') if namespaces is None: raise ValueError("no 'namespaces' found in %s" % namespace_path) return namespaces def read_spec(self, spec_path): specs = None with open(self.__get_spec_path(spec_path), 'r') as stream: specs = yaml.safe_load(stream) if not ('datasets' in specs or 'groups' in specs): raise ValueError("no 'groups' or 'datasets' found in %s" % spec_path) return specs def __get_spec_path(self, spec_path): if os.path.isabs(spec_path): return spec_path return os.path.join(self.source, spec_path) class NamespaceCatalog: @docval({'name': 'group_spec_cls', 'type': type, 'doc': 'the class to use for group specifications', 'default': GroupSpec}, {'name': 'dataset_spec_cls', 'type': type, 'doc': 'the class to use for dataset specifications', 'default': DatasetSpec}, {'name': 'spec_namespace_cls', 'type': type, 'doc': 'the class to use for specification namespaces', 'default': SpecNamespace},) def __init__(self, **kwargs): """Create a catalog for storing multiple Namespaces""" self.__namespaces = OrderedDict() self.__dataset_spec_cls = getargs('dataset_spec_cls', kwargs) self.__group_spec_cls = getargs('group_spec_cls', kwargs) self.__spec_namespace_cls = getargs('spec_namespace_cls', kwargs) # keep track of all spec objects ever loaded, so we don't have # multiple object instances of a spec self.__loaded_specs = dict() self.__included_specs = dict() self.__included_sources = dict() self._loaded_specs = self.__loaded_specs def __copy__(self): ret = NamespaceCatalog(self.__group_spec_cls, self.__dataset_spec_cls, self.__spec_namespace_cls) ret.__namespaces = copy(self.__namespaces) ret.__loaded_specs = copy(self.__loaded_specs) ret.__included_specs = copy(self.__included_specs) ret.__included_sources = copy(self.__included_sources) return ret def merge(self, ns_catalog): for name, namespace in ns_catalog.__namespaces.items(): self.add_namespace(name, namespace) @property @docval(returns='a tuple of the available namespaces', rtype=tuple) def namespaces(self): """The namespaces in this NamespaceCatalog""" return tuple(self.__namespaces.keys()) @property def dataset_spec_cls(self): """The DatasetSpec class used in this NamespaceCatalog""" return self.__dataset_spec_cls @property def group_spec_cls(self): """The GroupSpec class used in this NamespaceCatalog""" return self.__group_spec_cls @property def spec_namespace_cls(self): """The SpecNamespace class used in this NamespaceCatalog""" return self.__spec_namespace_cls @docval({'name': 'name', 'type': str, 'doc': 'the name of this namespace'}, {'name': 'namespace', 'type': SpecNamespace, 'doc': 'the SpecNamespace object'}) def add_namespace(self, **kwargs): """Add a namespace to this catalog""" name, namespace = getargs('name', 'namespace', kwargs) if name in self.__namespaces: raise KeyError("namespace '%s' already exists" % name) self.__namespaces[name] = namespace for dt in namespace.catalog.get_registered_types(): source = namespace.catalog.get_spec_source_file(dt) self.__loaded_specs.setdefault(source, list()).append(dt) @docval({'name': 'name', 'type': str, 'doc': 'the name of this namespace'}, returns="the SpecNamespace with the given name", rtype=SpecNamespace) def get_namespace(self, **kwargs): """Get the a SpecNamespace""" name = getargs('name', kwargs) ret = self.__namespaces.get(name) if ret is None: raise KeyError("'%s' not a namespace" % name) return ret @docval({'name': 'namespace', 'type': str, 'doc': 'the name of the namespace'}, {'name': 'data_type', 'type': (str, type), 'doc': 'the data_type to get the spec for'}, returns="the specification for writing the given object type to HDF5 ", rtype='Spec') def get_spec(self, **kwargs): ''' Get the Spec object for the given type from the given Namespace ''' namespace, data_type = getargs('namespace', 'data_type', kwargs) if namespace not in self.__namespaces: raise KeyError("'%s' not a namespace" % namespace) return self.__namespaces[namespace].get_spec(data_type) @docval({'name': 'namespace', 'type': str, 'doc': 'the name of the namespace'}, {'name': 'data_type', 'type': (str, type), 'doc': 'the data_type to get the spec for'}, returns="a tuple with the type hierarchy", rtype=tuple) def get_hierarchy(self, **kwargs): ''' Get the type hierarchy for a given data_type in a given namespace ''' namespace, data_type = getargs('namespace', 'data_type', kwargs) spec_ns = self.__namespaces.get(namespace) if spec_ns is None: raise KeyError("'%s' not a namespace" % namespace) return spec_ns.get_hierarchy(data_type) @docval(rtype=tuple) def get_sources(self, **kwargs): ''' Get all the source specification files that were loaded in this catalog ''' return tuple(self.__loaded_specs.keys()) @docval({'name': 'namespace', 'type': str, 'doc': 'the name of the namespace'}, rtype=tuple) def get_namespace_sources(self, **kwargs): ''' Get all the source specifications that were loaded for a given namespace ''' namespace = getargs('namespace', kwargs) return tuple(self.__included_sources[namespace]) @docval({'name': 'source', 'type': str, 'doc': 'the name of the source'}, rtype=tuple) def get_types(self, **kwargs): ''' Get the types that were loaded from a given source ''' source = getargs('source', kwargs) ret = self.__loaded_specs.get(source) if ret is not None: ret = tuple(ret) return ret def __load_spec_file(self, reader, spec_source, catalog, dtypes=None, resolve=True): ret = self.__loaded_specs.get(spec_source) if ret is not None: raise ValueError("spec source '%s' already loaded" % spec_source) def __reg_spec(spec_cls, spec_dict): parent_cls = GroupSpec if issubclass(spec_cls, GroupSpec) else DatasetSpec dt_def = spec_dict.get(spec_cls.def_key(), spec_dict.get(parent_cls.def_key())) if dt_def is None: msg = 'no %s or %s found in spec %s' % (spec_cls.def_key(), parent_cls.def_key(), spec_source) raise ValueError(msg) if dtypes and dt_def not in dtypes: return if resolve: self.__resolve_includes(spec_dict, catalog) spec_obj = spec_cls.build_spec(spec_dict) return catalog.auto_register(spec_obj, spec_source) if ret is None: ret = list() d = reader.read_spec(spec_source) specs = d.get('datasets', list()) for spec_dict in specs: ret.extend(__reg_spec(self.__dataset_spec_cls, spec_dict)) specs = d.get('groups', list()) for spec_dict in specs: ret.extend(__reg_spec(self.__group_spec_cls, spec_dict)) self.__loaded_specs[spec_source] = ret return ret def __resolve_includes(self, spec_dict, catalog): """ Pull in any attributes, datasets, or groups included """ dt_inc = spec_dict.get(self.__group_spec_cls.inc_key()) dt_def = spec_dict.get(self.__group_spec_cls.def_key()) if dt_inc is not None and dt_def is not None: parent_spec = catalog.get_spec(dt_inc) if parent_spec is None: msg = "Cannot resolve include spec '%s' for type '%s'" % (dt_inc, dt_def) raise ValueError(msg) spec_dict[self.__group_spec_cls.inc_key()] = parent_spec it = chain(spec_dict.get('groups', list()), spec_dict.get('datasets', list())) for subspec_dict in it: self.__resolve_includes(subspec_dict, catalog) def __load_namespace(self, namespace, reader, types_key, resolve=True): ns_name = namespace['name'] if ns_name in self.__namespaces: raise KeyError("namespace '%s' already exists" % ns_name) catalog = SpecCatalog() included_types = dict() for s in namespace['schema']: if 'source' in s: # read specs from file dtypes = None if types_key in s: dtypes = set(s[types_key]) self.__load_spec_file(reader, s['source'], catalog, dtypes=dtypes, resolve=resolve) self.__included_sources.setdefault(ns_name, list()).append(s['source']) elif 'namespace' in s: # load specs from namespace try: inc_ns = self.get_namespace(s['namespace']) except KeyError as e: raise ValueError("Could not load namespace '%s'" % s['namespace']) from e if types_key in s: types = s[types_key] else: types = inc_ns.get_registered_types() for ndt in types: spec = inc_ns.get_spec(ndt) spec_file = inc_ns.catalog.get_spec_source_file(ndt) if isinstance(spec, DatasetSpec): spec = self.dataset_spec_cls.build_spec(spec) else: spec = self.group_spec_cls.build_spec(spec) catalog.register_spec(spec, spec_file) included_types[s['namespace']] = tuple(types) # construct namespace self.__namespaces[ns_name] = self.__spec_namespace_cls.build_namespace(catalog=catalog, **namespace) return included_types @docval({'name': 'namespace_path', 'type': str, 'doc': 'the path to the file containing the namespaces(s) to load'}, {'name': 'resolve', 'type': bool, 'doc': 'whether or not to include objects from included/parent spec objects', 'default': True}, {'name': 'reader', 'type': SpecReader, 'doc': 'the class to user for reading specifications', 'default': None}, returns='a dictionary describing the dependencies of loaded namespaces', rtype=dict) def load_namespaces(self, **kwargs): """Load the namespaces in the given file""" namespace_path, resolve, reader = getargs('namespace_path', 'resolve', 'reader', kwargs) if reader is None: # load namespace definition from file if not os.path.exists(namespace_path): msg = "namespace file '%s' not found" % namespace_path raise IOError(msg) reader = YAMLSpecReader(indir=os.path.dirname(namespace_path)) ns_path_key = os.path.join(reader.source, os.path.basename(namespace_path)) ret = self.__included_specs.get(ns_path_key) if ret is None: ret = dict() else: return ret namespaces = reader.read_namespace(namespace_path) types_key = self.__spec_namespace_cls.types_key() to_load = list() for ns in namespaces: if ns['name'] in self.__namespaces: warn("ignoring namespace '%s' because it already exists" % ns['name']) else: to_load.append(ns) # now load specs into namespace for ns in to_load: ret[ns['name']] = self.__load_namespace(ns, reader, types_key, resolve=resolve) self.__included_specs[ns_path_key] = ret return ret ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/spec/spec.py0000644000655200065520000014210000000000000017767 0ustar00circlecicircleci00000000000000from abc import ABCMeta from copy import deepcopy from collections import OrderedDict import re from warnings import warn from ..utils import docval, getargs, popargs, get_docval, fmt_docval_args NAME_WILDCARD = None ZERO_OR_ONE = '?' ZERO_OR_MANY = '*' ONE_OR_MANY = '+' DEF_QUANTITY = 1 FLAGS = { 'zero_or_one': ZERO_OR_ONE, 'zero_or_many': ZERO_OR_MANY, 'one_or_many': ONE_OR_MANY } class DtypeHelper(): # Dict where the keys are the primary data type and the values are list of strings with synonyms for the dtype primary_dtype_synonyms = { 'float': ["float", "float32"], 'double': ["double", "float64"], 'short': ["int16", "short"], 'int': ["int32", "int"], 'long': ["int64", "long"], 'utf': ["text", "utf", "utf8", "utf-8"], 'ascii': ["ascii", "bytes"], 'bool': ["bool"], 'int8': ["int8"], 'uint8': ["uint8"], 'uint16': ["uint16"], 'uint32': ["uint32", "uint"], 'uint64': ["uint64"], 'object': ['object'], 'region': ['region'], 'numeric': ['numeric'], 'isodatetime': ["isodatetime", "datetime"] } # List of recommended primary dtype strings. These are the keys of primary_dtype_string_synonyms recommended_primary_dtypes = list(primary_dtype_synonyms.keys()) # List of valid primary data type strings valid_primary_dtypes = set(list(primary_dtype_synonyms.keys()) + [vi for v in primary_dtype_synonyms.values() for vi in v]) @staticmethod def simplify_cpd_type(cpd_type): ''' Transform a list of DtypeSpecs into a list of strings. Use for simple representation of compound type and validation. :param cpd_type: The list of DtypeSpecs to simplify :type cpd_type: list ''' ret = list() for exp in cpd_type: exp_key = exp.dtype if isinstance(exp_key, RefSpec): exp_key = exp_key.reftype ret.append(exp_key) return ret class ConstructableDict(dict, metaclass=ABCMeta): @classmethod def build_const_args(cls, spec_dict): ''' Build constructor arguments for this ConstructableDict class from a dictionary ''' return deepcopy(spec_dict) @classmethod def build_spec(cls, spec_dict): ''' Build a Spec object from the given Spec dict ''' vargs = cls.build_const_args(spec_dict) args = list() kwargs = dict() try: for x in get_docval(cls.__init__): if not x['name'] in vargs: continue if 'default' not in x: args.append(vargs.get(x['name'])) else: kwargs[x['name']] = vargs.get(x['name']) except KeyError as e: raise KeyError("'%s' not found in %s" % (e.args[0], str(spec_dict))) return cls(*args, **kwargs) class Spec(ConstructableDict): ''' A base specification class ''' @docval({'name': 'doc', 'type': str, 'doc': 'a description about what this specification represents'}, {'name': 'name', 'type': str, 'doc': 'The name of this attribute', 'default': None}, {'name': 'required', 'type': bool, 'doc': 'whether or not this attribute is required', 'default': True}, {'name': 'parent', 'type': 'Spec', 'doc': 'the parent of this spec', 'default': None}) def __init__(self, **kwargs): name, doc, required, parent = getargs('name', 'doc', 'required', 'parent', kwargs) super().__init__() if name is not None: self['name'] = name if doc is not None: self['doc'] = doc if not required: self['required'] = required self._parent = parent @property def doc(self): ''' Documentation on what this Spec is specifying ''' return self.get('doc', None) @property def name(self): ''' The name of the object being specified ''' return self.get('name', None) @property def parent(self): ''' The parent specification of this specification ''' return self._parent @parent.setter def parent(self, spec): ''' Set the parent of this specification ''' if self._parent is not None: raise Exception('Cannot re-assign parent') self._parent = spec @classmethod def build_const_args(cls, spec_dict): ''' Build constructor arguments for this Spec class from a dictionary ''' ret = super().build_const_args(spec_dict) if 'doc' not in ret: msg = "'doc' missing: %s" % str(spec_dict) raise ValueError(msg) return ret def __hash__(self): return id(self) # def __eq__(self, other): # return id(self) == id(other) _target_type_key = 'target_type' _ref_args = [ {'name': _target_type_key, 'type': str, 'doc': 'the target type GroupSpec or DatasetSpec'}, {'name': 'reftype', 'type': str, 'doc': 'the type of references this is i.e. region or object'}, ] class RefSpec(ConstructableDict): __allowable_types = ('object', 'region') @docval(*_ref_args) def __init__(self, **kwargs): target_type, reftype = getargs(_target_type_key, 'reftype', kwargs) self[_target_type_key] = target_type if reftype not in self.__allowable_types: msg = "reftype must be one of the following: %s" % ", ".join(self.__allowable_types) raise ValueError(msg) self['reftype'] = reftype @property def target_type(self): '''The data_type of the target of the reference''' return self[_target_type_key] @property def reftype(self): '''The type of reference''' return self['reftype'] @docval(rtype=bool, returns='True if this RefSpec specifies a region reference, False otherwise') def is_region(self): return self['reftype'] == 'region' _attr_args = [ {'name': 'name', 'type': str, 'doc': 'The name of this attribute'}, {'name': 'doc', 'type': str, 'doc': 'a description about what this specification represents'}, {'name': 'dtype', 'type': (str, RefSpec), 'doc': 'The data type of this attribute'}, {'name': 'shape', 'type': (list, tuple), 'doc': 'the shape of this dataset', 'default': None}, {'name': 'dims', 'type': (list, tuple), 'doc': 'the dimensions of this dataset', 'default': None}, {'name': 'required', 'type': bool, 'doc': 'whether or not this attribute is required. ignored when "value" is specified', 'default': True}, {'name': 'parent', 'type': 'BaseStorageSpec', 'doc': 'the parent of this spec', 'default': None}, {'name': 'value', 'type': None, 'doc': 'a constant value for this attribute', 'default': None}, {'name': 'default_value', 'type': None, 'doc': 'a default value for this attribute', 'default': None} ] class AttributeSpec(Spec): ''' Specification for attributes ''' @docval(*_attr_args) def __init__(self, **kwargs): name, dtype, doc, dims, shape, required, parent, value, default_value = getargs( 'name', 'dtype', 'doc', 'dims', 'shape', 'required', 'parent', 'value', 'default_value', kwargs) super().__init__(doc, name=name, required=required, parent=parent) if isinstance(dtype, RefSpec): self['dtype'] = dtype else: self['dtype'] = dtype # Validate the dype string if self['dtype'] not in DtypeHelper.valid_primary_dtypes: raise ValueError('dtype %s not a valid primary data type %s' % (self['dtype'], str(DtypeHelper.valid_primary_dtypes))) if value is not None: self.pop('required', None) self['value'] = value if default_value is not None: if value is not None: raise ValueError("cannot specify 'value' and 'default_value'") self['default_value'] = default_value self['required'] = False if shape is not None: self['shape'] = shape if dims is not None: self['dims'] = dims if 'shape' not in self: self['shape'] = tuple([None] * len(dims)) if self.shape is not None and self.dims is not None: if len(self['dims']) != len(self['shape']): raise ValueError("'dims' and 'shape' must be the same length") @property def dtype(self): ''' The data type of the attribute ''' return self.get('dtype', None) @property def value(self): ''' The constant value of the attribute. "None" if this attribute is not constant ''' return self.get('value', None) @property def default_value(self): ''' The default value of the attribute. "None" if this attribute has no default value ''' return self.get('default_value', None) @property def required(self): ''' True if this attribute is required, False otherwise. ''' return self.get('required', True) @property def dims(self): ''' The dimensions of this attribute's value ''' return self.get('dims', None) @property def shape(self): ''' The shape of this attribute's value ''' return self.get('shape', None) @classmethod def build_const_args(cls, spec_dict): ''' Build constructor arguments for this Spec class from a dictionary ''' ret = super().build_const_args(spec_dict) if 'dtype' in ret: if isinstance(ret['dtype'], dict): ret['dtype'] = RefSpec.build_spec(ret['dtype']) return ret _attrbl_args = [ {'name': 'doc', 'type': str, 'doc': 'a description about what this specification represents'}, {'name': 'name', 'type': str, 'doc': 'the name of this base storage container, ' + 'allowed only if quantity is not \'%s\' or \'%s\'' % (ONE_OR_MANY, ZERO_OR_MANY), 'default': None}, {'name': 'default_name', 'type': str, 'doc': 'The default name of this base storage container, used only if name is None', 'default': None}, {'name': 'attributes', 'type': list, 'doc': 'the attributes on this group', 'default': list()}, {'name': 'linkable', 'type': bool, 'doc': 'whether or not this group can be linked', 'default': True}, {'name': 'quantity', 'type': (str, int), 'doc': 'the required number of allowed instance', 'default': 1}, {'name': 'data_type_def', 'type': str, 'doc': 'the data type this specification represents', 'default': None}, {'name': 'data_type_inc', 'type': (str, 'BaseStorageSpec'), 'doc': 'the data type this specification extends', 'default': None}, ] class BaseStorageSpec(Spec): ''' A specification for any object that can hold attributes. ''' __inc_key = 'data_type_inc' __def_key = 'data_type_def' __type_key = 'data_type' __id_key = 'object_id' @docval(*_attrbl_args) def __init__(self, **kwargs): name, doc, parent, quantity, attributes, linkable, data_type_def, data_type_inc =\ getargs('name', 'doc', 'parent', 'quantity', 'attributes', 'linkable', 'data_type_def', 'data_type_inc', kwargs) if name == NAME_WILDCARD and data_type_def is None and data_type_inc is None: raise ValueError("Cannot create Group or Dataset spec with wildcard name " "without specifying 'data_type_def' and/or 'data_type_inc'") super().__init__(doc, name=name, parent=parent) default_name = getargs('default_name', kwargs) if default_name: if name is not None: warn("found 'default_name' with 'name' - ignoring 'default_name'") else: self['default_name'] = default_name self.__attributes = dict() if quantity in (ONE_OR_MANY, ZERO_OR_MANY): if name != NAME_WILDCARD: raise ValueError(("Cannot give specific name to something that can ", "exist multiple times: name='%s', quantity='%s'" % (name, quantity))) if quantity != DEF_QUANTITY: self['quantity'] = quantity if not linkable: self['linkable'] = False resolve = False if data_type_inc is not None: if isinstance(data_type_inc, BaseStorageSpec): self[self.inc_key()] = data_type_inc.data_type_def else: self[self.inc_key()] = data_type_inc if data_type_def is not None: self.pop('required', None) self[self.def_key()] = data_type_def if data_type_inc is not None and isinstance(data_type_inc, BaseStorageSpec): resolve = True for attribute in attributes: self.set_attribute(attribute) self.__new_attributes = set(self.__attributes.keys()) self.__overridden_attributes = set() self.__resolved = False if resolve: self.resolve_spec(data_type_inc) self.__resolved = True @property def default_name(self): '''The default name for this spec''' return self.get('default_name', None) @property def resolved(self): return self.__resolved @property def required(self): ''' Whether or not the this spec represents a required field ''' return self.quantity not in (ZERO_OR_ONE, ZERO_OR_MANY) @docval({'name': 'inc_spec', 'type': 'BaseStorageSpec', 'doc': 'the data type this specification represents'}) def resolve_spec(self, **kwargs): inc_spec = getargs('inc_spec', kwargs) for attribute in inc_spec.attributes: self.__new_attributes.discard(attribute) if attribute.name in self.__attributes: self.__overridden_attributes.add(attribute.name) continue self.set_attribute(attribute) @docval({'name': 'spec', 'type': (Spec, str), 'doc': 'the specification to check'}) def is_inherited_spec(self, **kwargs): ''' Return True if this spec was inherited from the parent type, False otherwise ''' spec = getargs('spec', kwargs) if isinstance(spec, Spec): spec = spec.name if spec in self.__attributes: return self.is_inherited_attribute(spec) return False @docval({'name': 'spec', 'type': (Spec, str), 'doc': 'the specification to check'}) def is_overridden_spec(self, **kwargs): ''' Return True if this spec overrides a specification from the parent type, False otherwise ''' spec = getargs('spec', kwargs) if isinstance(spec, Spec): spec = spec.name if spec in self.__attributes: return self.is_overridden_attribute(spec) return False @docval({'name': 'name', 'type': str, 'doc': 'the name of the attribute to the Spec for'}) def is_inherited_attribute(self, **kwargs): ''' Return True if the attribute was inherited from the parent type, False otherwise ''' name = getargs('name', kwargs) if name not in self.__attributes: raise ValueError("Attribute '%s' not found" % name) return name not in self.__new_attributes @docval({'name': 'name', 'type': str, 'doc': 'the name of the attribute to the Spec for'}) def is_overridden_attribute(self, **kwargs): ''' Return True if the given attribute overrides the specification from the parent, False otherwise ''' name = getargs('name', kwargs) if name not in self.__attributes: raise ValueError("Attribute '%s' not found" % name) return name not in self.__overridden_attributes def is_many(self): return self.quantity not in (1, ZERO_OR_ONE) @classmethod def get_data_type_spec(cls, data_type_def): return AttributeSpec(cls.type_key(), 'the data type of this object', 'text', value=data_type_def) @classmethod def get_namespace_spec(cls): return AttributeSpec('namespace', 'the namespace for the data type of this object', 'text', required=False) @property def attributes(self): ''' The attributes for this specification ''' return tuple(self.get('attributes', tuple())) @property def linkable(self): ''' True if object can be a link, False otherwise ''' return self.get('linkable', True) @classmethod def id_key(cls): ''' Get the key used to store data ID on an instance Override this method to use a different name for 'object_id' ''' return cls.__id_key @classmethod def type_key(cls): ''' Get the key used to store data type on an instance Override this method to use a different name for 'data_type' ''' return cls.__type_key @classmethod def inc_key(cls): ''' Get the key used to define a data_type include. Override this method to use a different keyword for 'data_type_inc' ''' return cls.__inc_key @classmethod def def_key(cls): ''' Get the key used to define a data_type definition. Override this method to use a different keyword for 'data_type_def' ''' return cls.__def_key @property def data_type_inc(self): ''' The data type of this specification ''' return self.get(self.inc_key()) @property def data_type_def(self): ''' The data type this specification defines ''' return self.get(self.def_key(), None) @property def quantity(self): ''' The number of times the object being specified should be present ''' return self.get('quantity', DEF_QUANTITY) @docval(*_attr_args) def add_attribute(self, **kwargs): ''' Add an attribute to this specification ''' pargs, pkwargs = fmt_docval_args(AttributeSpec.__init__, kwargs) spec = AttributeSpec(*pargs, **pkwargs) self.set_attribute(spec) return spec @docval({'name': 'spec', 'type': AttributeSpec, 'doc': 'the specification for the attribute to add'}) def set_attribute(self, **kwargs): ''' Set an attribute on this specification ''' spec = kwargs.get('spec') attributes = self.setdefault('attributes', list()) if spec.parent is not None: spec = AttributeSpec.build_spec(spec) if spec.name in self.__attributes: idx = -1 for i, attribute in enumerate(attributes): if attribute.name == spec.name: idx = i break if idx >= 0: attributes[idx] = spec else: raise ValueError('%s in __attributes but not in spec record' % spec.name) else: attributes.append(spec) self.__attributes[spec.name] = spec spec.parent = self @docval({'name': 'name', 'type': str, 'doc': 'the name of the attribute to the Spec for'}) def get_attribute(self, **kwargs): ''' Get an attribute on this specification ''' name = getargs('name', kwargs) return self.__attributes.get(name) @classmethod def build_const_args(cls, spec_dict): ''' Build constructor arguments for this Spec class from a dictionary ''' ret = super().build_const_args(spec_dict) if 'attributes' in ret: ret['attributes'] = [AttributeSpec.build_spec(sub_spec) for sub_spec in ret['attributes']] return ret _dt_args = [ {'name': 'name', 'type': str, 'doc': 'the name of this column'}, {'name': 'doc', 'type': str, 'doc': 'a description about what this data type is'}, {'name': 'dtype', 'type': (str, list, RefSpec), 'doc': 'the data type of this column'}, ] class DtypeSpec(ConstructableDict): '''A class for specifying a component of a compound type''' @docval(*_dt_args) def __init__(self, **kwargs): doc, name, dtype = getargs('doc', 'name', 'dtype', kwargs) self['doc'] = doc self['name'] = name self.assertValidDtype(dtype) self['dtype'] = dtype @property def doc(self): '''Documentation about this component''' return self['doc'] @property def name(self): '''The name of this component''' return self['name'] @property def dtype(self): ''' The data type of this component''' return self['dtype'] @staticmethod def assertValidDtype(dtype): if isinstance(dtype, dict): if _target_type_key not in dtype: msg = "'dtype' must have the key '%s'" % _target_type_key raise AssertionError(msg) elif isinstance(dtype, RefSpec): pass else: if dtype not in DtypeHelper.valid_primary_dtypes: msg = "'dtype=%s' string not in valid primary data type: %s " % (str(dtype), str(DtypeHelper.valid_primary_dtypes)) raise AssertionError(msg) return True @staticmethod @docval({'name': 'spec', 'type': (str, dict), 'doc': 'the spec object to check'}, is_method=False) def is_ref(**kwargs): spec = getargs('spec', kwargs) spec_is_ref = False if isinstance(spec, dict): if _target_type_key in spec: spec_is_ref = True elif 'dtype' in spec and isinstance(spec['dtype'], dict) and _target_type_key in spec['dtype']: spec_is_ref = True return spec_is_ref @classmethod def build_const_args(cls, spec_dict): ''' Build constructor arguments for this Spec class from a dictionary ''' ret = super().build_const_args(spec_dict) if isinstance(ret['dtype'], list): ret['dtype'] = list(map(cls.build_const_args, ret['dtype'])) elif isinstance(ret['dtype'], dict): ret['dtype'] = RefSpec.build_spec(ret['dtype']) return ret _dataset_args = [ {'name': 'doc', 'type': str, 'doc': 'a description about what this specification represents'}, {'name': 'dtype', 'type': (str, list, RefSpec), 'doc': 'The data type of this attribute. Use a list of DtypeSpecs to specify a compound data type.', 'default': None}, {'name': 'name', 'type': str, 'doc': 'The name of this dataset', 'default': None}, {'name': 'default_name', 'type': str, 'doc': 'The default name of this dataset', 'default': None}, {'name': 'shape', 'type': (list, tuple), 'doc': 'the shape of this dataset', 'default': None}, {'name': 'dims', 'type': (list, tuple), 'doc': 'the dimensions of this dataset', 'default': None}, {'name': 'attributes', 'type': list, 'doc': 'the attributes on this group', 'default': list()}, {'name': 'linkable', 'type': bool, 'doc': 'whether or not this group can be linked', 'default': True}, {'name': 'quantity', 'type': (str, int), 'doc': 'the required number of allowed instance', 'default': 1}, {'name': 'default_value', 'type': None, 'doc': 'a default value for this dataset', 'default': None}, {'name': 'data_type_def', 'type': str, 'doc': 'the data type this specification represents', 'default': None}, {'name': 'data_type_inc', 'type': (str, 'DatasetSpec'), 'doc': 'the data type this specification extends', 'default': None}, ] class DatasetSpec(BaseStorageSpec): ''' Specification for datasets To specify a table-like dataset i.e. a compound data type. ''' @docval(*_dataset_args) def __init__(self, **kwargs): doc, shape, dims, dtype, default_value = popargs('doc', 'shape', 'dims', 'dtype', 'default_value', kwargs) if shape is not None: self['shape'] = shape if dims is not None: self['dims'] = dims if 'shape' not in self: self['shape'] = tuple([None] * len(dims)) if self.shape is not None and self.dims is not None: if len(self['dims']) != len(self['shape']): raise ValueError("'dims' and 'shape' must be the same length") if dtype is not None: if isinstance(dtype, list): # Dtype is a compound data type for _i, col in enumerate(dtype): if not isinstance(col, DtypeSpec): msg = 'must use DtypeSpec if defining compound dtype - found %s at element %d' % \ (type(col), _i) raise ValueError(msg) self['dtype'] = dtype elif isinstance(dtype, RefSpec): # Dtype is a reference self['dtype'] = dtype else: # Dtype is a string self['dtype'] = dtype if self['dtype'] not in DtypeHelper.valid_primary_dtypes: raise ValueError('dtype %s not a valid primary data type %s' % (self['dtype'], str(DtypeHelper.valid_primary_dtypes))) super().__init__(doc, **kwargs) if default_value is not None: self['default_value'] = default_value if self.name is not None: valid_quant_vals = [1, 'zero_or_one', ZERO_OR_ONE] if self.quantity not in valid_quant_vals: raise ValueError("quantity %s invalid for spec with fixed name. Valid values are: %s" % (self.quantity, str(valid_quant_vals))) @classmethod def __get_prec_level(cls, dtype): m = re.search('[0-9]+', dtype) if m is not None: prec = int(m.group()) else: prec = 32 return (dtype[0], prec) @classmethod def __is_sub_dtype(cls, orig, new): if isinstance(orig, RefSpec): if not isinstance(new, RefSpec): return False return orig == new else: orig_prec = cls.__get_prec_level(orig) new_prec = cls.__get_prec_level(new) if orig_prec[0] != new_prec[0]: # cannot extend int to float and vice-versa return False return new_prec >= orig_prec @docval({'name': 'inc_spec', 'type': 'DatasetSpec', 'doc': 'the data type this specification represents'}) def resolve_spec(self, **kwargs): inc_spec = getargs('inc_spec', kwargs) if isinstance(self.dtype, list): # merge the new types inc_dtype = inc_spec.dtype if isinstance(inc_dtype, str): msg = 'Cannot extend simple data type to compound data type' raise ValueError(msg) order = OrderedDict() if inc_dtype is not None: for dt in inc_dtype: order[dt['name']] = dt for dt in self.dtype: name = dt['name'] if name in order: # verify that the exension has supplied # a valid subtyping of existing type orig = order[name].dtype new = dt.dtype if not self.__is_sub_dtype(orig, new): msg = 'Cannot extend %s to %s' % (str(orig), str(new)) raise ValueError(msg) order[name] = dt self['dtype'] = list(order.values()) super().resolve_spec(inc_spec) @property def dims(self): ''' The dimensions of this Dataset ''' return self.get('dims', None) @property def dtype(self): ''' The data type of the Dataset ''' return self.get('dtype', None) @property def shape(self): ''' The shape of the dataset ''' return self.get('shape', None) @property def default_value(self): '''The default value of the dataset or None if not specified''' return self.get('default_value', None) @classmethod def __check_dim(cls, dim, data): return True @classmethod def dtype_spec_cls(cls): ''' The class to use when constructing DtypeSpec objects Override this if extending to use a class other than DtypeSpec to build dataset specifications ''' return DtypeSpec @classmethod def build_const_args(cls, spec_dict): ''' Build constructor arguments for this Spec class from a dictionary ''' ret = super().build_const_args(spec_dict) if 'dtype' in ret: if isinstance(ret['dtype'], list): ret['dtype'] = list(map(cls.dtype_spec_cls().build_spec, ret['dtype'])) elif isinstance(ret['dtype'], dict): ret['dtype'] = RefSpec.build_spec(ret['dtype']) return ret _link_args = [ {'name': 'doc', 'type': str, 'doc': 'a description about what this link represents'}, {'name': _target_type_key, 'type': str, 'doc': 'the target type GroupSpec or DatasetSpec'}, {'name': 'quantity', 'type': (str, int), 'doc': 'the required number of allowed instance', 'default': 1}, {'name': 'name', 'type': str, 'doc': 'the name of this link', 'default': None} ] class LinkSpec(Spec): @docval(*_link_args) def __init__(self, **kwargs): doc, target_type, name, quantity = popargs('doc', _target_type_key, 'name', 'quantity', kwargs) super().__init__(doc, name, **kwargs) self[_target_type_key] = target_type if quantity != 1: self['quantity'] = quantity @property def target_type(self): ''' The data type of target specification ''' return self.get(_target_type_key) @property def data_type_inc(self): ''' The data type of target specification ''' return self.get(_target_type_key) def is_many(self): return self.quantity not in (1, ZERO_OR_ONE) @property def quantity(self): ''' The number of times the object being specified should be present ''' return self.get('quantity', DEF_QUANTITY) @property def required(self): ''' Whether or not the this spec represents a required field ''' return self.quantity not in (ZERO_OR_ONE, ZERO_OR_MANY) _group_args = [ {'name': 'doc', 'type': str, 'doc': 'a description about what this specification represents'}, {'name': 'name', 'type': str, 'doc': 'the name of this group', 'default': None}, {'name': 'default_name', 'type': str, 'doc': 'The default name of this group', 'default': None}, {'name': 'groups', 'type': list, 'doc': 'the subgroups in this group', 'default': list()}, {'name': 'datasets', 'type': list, 'doc': 'the datasets in this group', 'default': list()}, {'name': 'attributes', 'type': list, 'doc': 'the attributes on this group', 'default': list()}, {'name': 'links', 'type': list, 'doc': 'the links in this group', 'default': list()}, {'name': 'linkable', 'type': bool, 'doc': 'whether or not this group can be linked', 'default': True}, {'name': 'quantity', 'type': (str, int), 'doc': 'the required number of allowed instance', 'default': 1}, {'name': 'data_type_def', 'type': str, 'doc': 'the data type this specification represents', 'default': None}, {'name': 'data_type_inc', 'type': (str, 'GroupSpec'), 'doc': 'the data type this specification data_type_inc', 'default': None}, ] class GroupSpec(BaseStorageSpec): ''' Specification for groups ''' @docval(*_group_args) def __init__(self, **kwargs): doc, groups, datasets, links = popargs('doc', 'groups', 'datasets', 'links', kwargs) self.__data_types = dict() self.__groups = dict() for group in groups: self.set_group(group) self.__datasets = dict() for dataset in datasets: self.set_dataset(dataset) self.__links = dict() for link in links: self.set_link(link) self.__new_data_types = set(self.__data_types.keys()) self.__new_datasets = set(self.__datasets.keys()) self.__overridden_datasets = set() self.__new_links = set(self.__links.keys()) self.__overridden_links = set() self.__new_groups = set(self.__groups.keys()) self.__overridden_groups = set() super().__init__(doc, **kwargs) @docval({'name': 'inc_spec', 'type': 'GroupSpec', 'doc': 'the data type this specification represents'}) def resolve_spec(self, **kwargs): inc_spec = getargs('inc_spec', kwargs) data_types = list() # resolve inherited datasets for dataset in inc_spec.datasets: # if not (dataset.data_type_def is None and dataset.data_type_inc is None): if dataset.name is None: data_types.append(dataset) continue self.__new_datasets.discard(dataset.name) if dataset.name in self.__datasets: self.__datasets[dataset.name].resolve_spec(dataset) self.__overridden_datasets.add(dataset.name) else: self.set_dataset(dataset) # resolve inherited groups for group in inc_spec.groups: # if not (group.data_type_def is None and group.data_type_inc is None): if group.name is None: data_types.append(group) continue self.__new_groups.discard(group.name) if group.name in self.__groups: self.__groups[group.name].resolve_spec(group) self.__overridden_groups.add(group.name) else: self.set_group(group) # resolve inherited links for link in inc_spec.links: if link.name is None: data_types.append(link) self.__new_links.discard(link.name) if link.name in self.__links: self.__overridden_links.add(link.name) continue self.set_link(link) # resolve inherited data_types for dt_spec in data_types: if isinstance(dt_spec, LinkSpec): dt = dt_spec.target_type else: dt = dt_spec.data_type_def if dt is None: dt = dt_spec.data_type_inc self.__new_data_types.discard(dt) existing_dt_spec = self.get_data_type(dt) if existing_dt_spec is None or \ ((isinstance(existing_dt_spec, list) or existing_dt_spec.name is not None)) and \ dt_spec.name is None: if isinstance(dt_spec, DatasetSpec): self.set_dataset(dt_spec) elif isinstance(dt_spec, GroupSpec): self.set_group(dt_spec) else: self.set_link(dt_spec) super().resolve_spec(inc_spec) @docval({'name': 'name', 'type': str, 'doc': 'the name of the dataset'}, raises="ValueError, if 'name' is not part of this spec") def is_inherited_dataset(self, **kwargs): '''Return true if a dataset with the given name was inherited''' name = getargs('name', kwargs) if name not in self.__datasets: raise ValueError("Dataset '%s' not found in spec" % name) return name not in self.__new_datasets @docval({'name': 'name', 'type': str, 'doc': 'the name of the dataset'}, raises="ValueError, if 'name' is not part of this spec") def is_overridden_dataset(self, **kwargs): '''Return true if a dataset with the given name overrides a specification from the parent type''' name = getargs('name', kwargs) if name not in self.__datasets: raise ValueError("Dataset '%s' not found in spec" % name) return name in self.__overridden_datasets @docval({'name': 'name', 'type': str, 'doc': 'the name of the group'}, raises="ValueError, if 'name' is not part of this spec") def is_inherited_group(self, **kwargs): '''Return true if a group with the given name was inherited''' name = getargs('name', kwargs) if name not in self.__groups: raise ValueError("Group '%s' not found in spec" % name) return name not in self.__new_groups @docval({'name': 'name', 'type': str, 'doc': 'the name of the group'}, raises="ValueError, if 'name' is not part of this spec") def is_overridden_group(self, **kwargs): '''Return true if a group with the given name overrides a specification from the parent type''' name = getargs('name', kwargs) if name not in self.__groups: raise ValueError("Group '%s' not found in spec" % name) return name in self.__overridden_groups @docval({'name': 'name', 'type': str, 'doc': 'the name of the link'}, raises="ValueError, if 'name' is not part of this spec") def is_inherited_link(self, **kwargs): '''Return true if a link with the given name was inherited''' name = getargs('name', kwargs) if name not in self.__links: raise ValueError("Link '%s' not found in spec" % name) return name not in self.__new_links @docval({'name': 'name', 'type': str, 'doc': 'the name of the link'}, raises="ValueError, if 'name' is not part of this spec") def is_overridden_link(self, **kwargs): '''Return true if a link with the given name overrides a specification from the parent type''' name = getargs('name', kwargs) if name not in self.__links: raise ValueError("Link '%s' not found in spec" % name) return name in self.__overridden_links @docval({'name': 'spec', 'type': (Spec, str), 'doc': 'the specification to check'}) def is_inherited_spec(self, **kwargs): ''' Returns 'True' if specification was inherited from a parent type ''' spec = getargs('spec', kwargs) if isinstance(spec, Spec): name = spec.name if name is None: name = spec.data_type_def if name is None: name = spec.data_type_inc if name is None: raise ValueError('received Spec with wildcard name but no data_type_inc or data_type_def') spec = name if spec in self.__links: return self.is_inherited_link(spec) elif spec in self.__groups: return self.is_inherited_group(spec) elif spec in self.__datasets: return self.is_inherited_dataset(spec) elif spec in self.__data_types: return self.is_inherited_type(spec) else: if super().is_inherited_spec(spec): return True else: for s in self.__datasets: if self.is_inherited_dataset(s): if self.__datasets[s].get_attribute(spec) is not None: return True for s in self.__groups: if self.is_inherited_group(s): if self.__groups[s].get_attribute(spec) is not None: return True return False @docval({'name': 'spec', 'type': (Spec, str), 'doc': 'the specification to check'}) def is_overridden_spec(self, **kwargs): ''' Returns 'True' if specification was inherited from a parent type ''' spec = getargs('spec', kwargs) if isinstance(spec, Spec): name = spec.name if name is None: if spec.is_many(): # this is a wildcard spec, so it cannot be overridden return False name = spec.data_type_def if name is None: name = spec.data_type_inc if name is None: raise ValueError('received Spec with wildcard name but no data_type_inc or data_type_def') spec = name if spec in self.__links: return self.is_overridden_link(spec) elif spec in self.__groups: return self.is_overridden_group(spec) elif spec in self.__datasets: return self.is_overridden_dataset(spec) elif spec in self.__data_types: return self.is_overridden_type(spec) else: if super().is_overridden_spec(spec): # check if overridden attribute return True else: for s in self.__datasets: if self.is_overridden_dataset(s): if self.__datasets[s].is_overridden_spec(spec): return True for s in self.__groups: if self.is_overridden_group(s): if self.__groups[s].is_overridden_spec(spec): return True return False @docval({'name': 'spec', 'type': (BaseStorageSpec, str), 'doc': 'the specification to check'}) def is_inherited_type(self, **kwargs): ''' Returns True if `spec` represents a spec that was inherited from an included data_type ''' spec = getargs('spec', kwargs) if isinstance(spec, BaseStorageSpec): if spec.data_type_def is None: raise ValueError('cannot check if something was inherited if it does not have a %s' % self.def_key()) spec = spec.data_type_def return spec not in self.__new_data_types @docval({'name': 'spec', 'type': (BaseStorageSpec, str), 'doc': 'the specification to check'}, raises="ValueError, if 'name' is not part of this spec") def is_overridden_type(self, **kwargs): ''' Returns True if `spec` represents a spec that was overriden by the subtype''' spec = getargs('spec', kwargs) if isinstance(spec, BaseStorageSpec): if spec.data_type_def is None: raise ValueError('cannot check if something was inherited if it does not have a %s' % self.def_key()) spec = spec.data_type_def return spec not in self.__new_data_types def __add_data_type_inc(self, spec): dt = None if hasattr(spec, 'data_type_def') and spec.data_type_def is not None: dt = spec.data_type_def elif hasattr(spec, 'data_type_inc') and spec.data_type_inc is not None: dt = spec.data_type_inc if not dt: raise TypeError("spec does not have '%s' or '%s' defined" % (self.def_key(), self.inc_key())) if dt in self.__data_types: curr = self.__data_types[dt] if curr is spec: return if spec.name is None: if isinstance(curr, list): self.__data_types[dt] = spec else: if curr.name is None: raise TypeError('Cannot have multiple data types of the same type without specifying name') else: # unnamed data types will be stored as data_types self.__data_types[dt] = spec else: if isinstance(curr, list): self.__data_types[dt].append(spec) else: if curr.name is None: # leave the existing data type as is, since the new one can be retrieved by name return else: # store both specific instances of a data type self.__data_types[dt] = [curr, spec] else: self.__data_types[dt] = spec @docval({'name': 'data_type', 'type': str, 'doc': 'the data_type to retrieve'}) def get_data_type(self, **kwargs): ''' Get a specification by "data_type" ''' ndt = getargs('data_type', kwargs) return self.__data_types.get(ndt, None) @property def groups(self): ''' The groups specificed in this GroupSpec ''' return tuple(self.get('groups', tuple())) @property def datasets(self): ''' The datasets specificed in this GroupSpec ''' return tuple(self.get('datasets', tuple())) @property def links(self): ''' The links specificed in this GroupSpec ''' return tuple(self.get('links', tuple())) @docval(*_group_args) def add_group(self, **kwargs): ''' Add a new specification for a subgroup to this group specification ''' doc = kwargs.pop('doc') spec = self.__class__(doc, **kwargs) self.set_group(spec) return spec @docval({'name': 'spec', 'type': ('GroupSpec'), 'doc': 'the specification for the subgroup'}) def set_group(self, **kwargs): ''' Add the given specification for a subgroup to this group specification ''' spec = getargs('spec', kwargs) if spec.parent is not None: spec = self.build_spec(spec) if spec.name == NAME_WILDCARD: if spec.data_type_inc is not None or spec.data_type_def is not None: self.__add_data_type_inc(spec) else: raise TypeError("must specify 'name' or 'data_type_inc' in Group spec") else: if spec.data_type_inc is not None or spec.data_type_def is not None: self.__add_data_type_inc(spec) self.__groups[spec.name] = spec self.setdefault('groups', list()).append(spec) spec.parent = self @docval({'name': 'name', 'type': str, 'doc': 'the name of the group to the Spec for'}) def get_group(self, **kwargs): ''' Get a specification for a subgroup to this group specification ''' name = getargs('name', kwargs) return self.__groups.get(name, self.__links.get(name)) @docval(*_dataset_args) def add_dataset(self, **kwargs): ''' Add a new specification for a dataset to this group specification ''' doc = kwargs.pop('doc') spec = self.dataset_spec_cls()(doc, **kwargs) self.set_dataset(spec) return spec @docval({'name': 'spec', 'type': 'DatasetSpec', 'doc': 'the specification for the dataset'}) def set_dataset(self, **kwargs): ''' Add the given specification for a dataset to this group specification ''' spec = getargs('spec', kwargs) if spec.parent is not None: spec = self.dataset_spec_cls().build_spec(spec) if spec.name == NAME_WILDCARD: if spec.data_type_inc is not None or spec.data_type_def is not None: self.__add_data_type_inc(spec) else: raise TypeError("must specify 'name' or 'data_type_inc' in Dataset spec") else: if spec.data_type_inc is not None or spec.data_type_def is not None: self.__add_data_type_inc(spec) self.__datasets[spec.name] = spec self.setdefault('datasets', list()).append(spec) spec.parent = self @docval({'name': 'name', 'type': str, 'doc': 'the name of the dataset to the Spec for'}) def get_dataset(self, **kwargs): ''' Get a specification for a dataset to this group specification ''' name = getargs('name', kwargs) return self.__datasets.get(name, self.__links.get(name)) @docval(*_link_args) def add_link(self, **kwargs): ''' Add a new specification for a link to this group specification ''' doc, target_type = popargs('doc', _target_type_key, kwargs) spec = self.link_spec_cls()(doc, target_type, **kwargs) self.set_link(spec) return spec @docval({'name': 'spec', 'type': 'LinkSpec', 'doc': 'the specification for the object to link to'}) def set_link(self, **kwargs): ''' Add a given specification for a link to this group specification ''' spec = getargs('spec', kwargs) if spec.parent is not None: spec = self.link_spec_cls().build_spec(spec) if spec.name == NAME_WILDCARD: if spec.data_type_inc is not None or spec.data_type_def is not None: self.__add_data_type_inc(spec) else: raise TypeError("must specify 'name' or 'data_type_inc' in Dataset spec") else: self.__links[spec.name] = spec self.setdefault('links', list()).append(spec) spec.parent = self @docval({'name': 'name', 'type': str, 'doc': 'the name of the link to the Spec for'}) def get_link(self, **kwargs): ''' Get a specification for a link to this group specification ''' name = getargs('name', kwargs) return self.__links.get(name) @classmethod def dataset_spec_cls(cls): ''' The class to use when constructing DatasetSpec objects Override this if extending to use a class other than DatasetSpec to build dataset specifications ''' return DatasetSpec @classmethod def link_spec_cls(cls): ''' The class to use when constructing LinkSpec objects Override this if extending to use a class other than LinkSpec to build link specifications ''' return LinkSpec @classmethod def build_const_args(cls, spec_dict): ''' Build constructor arguments for this Spec class from a dictionary ''' ret = super().build_const_args(spec_dict) if 'datasets' in ret: ret['datasets'] = list(map(cls.dataset_spec_cls().build_spec, ret['datasets'])) if 'groups' in ret: ret['groups'] = list(map(cls.build_spec, ret['groups'])) if 'links' in ret: ret['links'] = list(map(cls.link_spec_cls().build_spec, ret['links'])) return ret ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/spec/write.py0000644000655200065520000002446100000000000020200 0ustar00circlecicircleci00000000000000import copy import json import ruamel.yaml as yaml import os.path import warnings from collections import OrderedDict from abc import ABCMeta, abstractmethod from datetime import datetime from .namespace import SpecNamespace from .spec import GroupSpec, DatasetSpec from .catalog import SpecCatalog from ..utils import docval, getargs, popargs class SpecWriter(metaclass=ABCMeta): @abstractmethod def write_spec(self, spec_file_dict, path): pass @abstractmethod def write_namespace(self, namespace, path): pass class YAMLSpecWriter(SpecWriter): @docval({'name': 'outdir', 'type': str, 'doc': 'the path to write the directory to output the namespace and specs too', 'default': '.'}) def __init__(self, **kwargs): self.__outdir = getargs('outdir', kwargs) def __dump_spec(self, specs, stream): specs_plain_dict = json.loads(json.dumps(specs)) yaml.main.safe_dump(specs_plain_dict, stream, default_flow_style=False) def write_spec(self, spec_file_dict, path): out_fullpath = os.path.join(self.__outdir, path) spec_plain_dict = json.loads(json.dumps(spec_file_dict)) sorted_data = self.sort_keys(spec_plain_dict) with open(out_fullpath, 'w') as fd_write: yaml.dump(sorted_data, fd_write, Dumper=yaml.dumper.RoundTripDumper) def write_namespace(self, namespace, path): with open(os.path.join(self.__outdir, path), 'w') as stream: ns = namespace # Convert the date to a string if necessary if 'date' in namespace and isinstance(namespace['date'], datetime): ns = copy.copy(ns) # copy the namespace to avoid side-effects ns['date'] = ns['date'].isoformat() self.__dump_spec({'namespaces': [ns]}, stream) def reorder_yaml(self, path): """ Open a YAML file, load it as python data, sort the data, and write it back out to the same path. """ with open(path, 'rb') as fd_read: data = yaml.load(fd_read, Loader=yaml.loader.RoundTripLoader, preserve_quotes=True) self.write_spec(data, path) def sort_keys(self, obj): # Represent None as null def my_represent_none(self, data): return self.represent_scalar(u'tag:yaml.org,2002:null', u'null') yaml.representer.RoundTripRepresenter.add_representer(type(None), my_represent_none) order = ['neurodata_type_def', 'neurodata_type_inc', 'data_type_def', 'data_type_inc', 'name', 'default_name', 'dtype', 'target_type', 'dims', 'shape', 'default_value', 'value', 'doc', 'required', 'quantity', 'attributes', 'datasets', 'groups', 'links'] if isinstance(obj, dict): keys = list(obj.keys()) for k in order[::-1]: if k in keys: keys.remove(k) keys.insert(0, k) if 'neurodata_type_def' not in keys and 'name' in keys: keys.remove('name') keys.insert(0, 'name') return yaml.comments.CommentedMap( yaml.compat.ordereddict([(k, self.sort_keys(obj[k])) for k in keys]) ) elif isinstance(obj, list): return [self.sort_keys(v) for v in obj] elif isinstance(obj, tuple): return (self.sort_keys(v) for v in obj) else: return obj class NamespaceBuilder: ''' A class for building namespace and spec files ''' @docval({'name': 'doc', 'type': str, 'doc': 'Description about what the namespace represents'}, {'name': 'name', 'type': str, 'doc': 'Name of the namespace'}, {'name': 'full_name', 'type': str, 'doc': 'Extended full name of the namespace', 'default': None}, {'name': 'version', 'type': (str, tuple, list), 'doc': 'Version number of the namespace', 'default': None}, {'name': 'author', 'type': (str, list), 'doc': 'Author or list of authors.', 'default': None}, {'name': 'contact', 'type': (str, list), 'doc': 'List of emails. Ordering should be the same as for author', 'default': None}, {'name': 'date', 'type': (datetime, str), 'doc': "Date last modified or released. Formatting is %Y-%m-%d %H:%M:%S, e.g, 2017-04-25 17:14:13", 'default': None}, {'name': 'namespace_cls', 'type': type, 'doc': 'the SpecNamespace type', 'default': SpecNamespace}) def __init__(self, **kwargs): ns_cls = popargs('namespace_cls', kwargs) self.__ns_args = copy.deepcopy(kwargs) self.__namespaces = OrderedDict() self.__sources = OrderedDict() self.__catalog = SpecCatalog() self.__dt_key = ns_cls.types_key() @docval({'name': 'source', 'type': str, 'doc': 'the path to write the spec to'}, {'name': 'spec', 'type': (GroupSpec, DatasetSpec), 'doc': 'the Spec to add'}) def add_spec(self, **kwargs): ''' Add a Spec to the namespace ''' source, spec = getargs('source', 'spec', kwargs) self.__catalog.auto_register(spec, source) self.add_source(source) self.__sources[source].setdefault(self.__dt_key, list()).append(spec) @docval({'name': 'source', 'type': str, 'doc': 'the path to write the spec to'}, {'name': 'doc', 'type': str, 'doc': 'additional documentation for the source file', 'default': None}, {'name': 'title', 'type': str, 'doc': 'optional heading to be used for the source', 'default': None}) def add_source(self, **kwargs): ''' Add a source file to the namespace ''' source, doc, title = getargs('source', 'doc', 'title', kwargs) if '/' in source or source[0] == '.': raise ValueError('source must be a base file') source_dict = {'source': source} self.__sources.setdefault(source, source_dict) # Update the doc and title if given if doc is not None: self.__sources[source]['doc'] = doc if title is not None: self.__sources[source]['title'] = doc @docval({'name': 'data_type', 'type': str, 'doc': 'the data type to include'}, {'name': 'source', 'type': str, 'doc': 'the source file to include the type from', 'default': None}, {'name': 'namespace', 'type': str, 'doc': 'the namespace from which to include the data type', 'default': None}) def include_type(self, **kwargs): ''' Include a data type from an existing namespace or source ''' dt, src, ns = getargs('data_type', 'source', 'namespace', kwargs) if src is not None: self.add_source(src) self.__sources[src].setdefault(self.__dt_key, list()).append(dt) elif ns is not None: self.include_namespace(ns) self.__namespaces[ns].setdefault(self.__dt_key, list()).append(dt) else: raise ValueError("must specify 'source' or 'namespace' when including type") @docval({'name': 'namespace', 'type': str, 'doc': 'the namespace to include'}) def include_namespace(self, **kwargs): ''' Include an entire namespace ''' namespace = getargs('namespace', kwargs) self.__namespaces.setdefault(namespace, {'namespace': namespace}) @docval({'name': 'path', 'type': str, 'doc': 'the path to write the spec to'}, {'name': 'outdir', 'type': str, 'doc': 'the path to write the directory to output the namespace and specs too', 'default': '.'}, {'name': 'writer', 'type': SpecWriter, 'doc': 'the SpecWriter to use to write the namespace', 'default': None}) def export(self, **kwargs): ''' Export the namespace to the given path. All new specification source files will be written in the same directory as the given path. ''' ns_path, writer = getargs('path', 'writer', kwargs) if writer is None: writer = YAMLSpecWriter(outdir=getargs('outdir', kwargs)) ns_args = copy.copy(self.__ns_args) ns_args['schema'] = list() for ns, info in self.__namespaces.items(): ns_args['schema'].append(info) for path, info in self.__sources.items(): out = SpecFileBuilder() dts = list() for spec in info[self.__dt_key]: if isinstance(spec, str): dts.append(spec) else: out.add_spec(spec) item = {'source': path} if 'doc' in info: item['doc'] = info['doc'] if 'title' in info: item['title'] = info['title'] if out and dts: raise ValueError('cannot include from source if writing to source') elif dts: item[self.__dt_key] = dts elif out: writer.write_spec(out, path) ns_args['schema'].append(item) namespace = SpecNamespace.build_namespace(**ns_args) writer.write_namespace(namespace, ns_path) @property def name(self): return self.__ns_args['name'] class SpecFileBuilder(dict): @docval({'name': 'spec', 'type': (GroupSpec, DatasetSpec), 'doc': 'the Spec to add'}) def add_spec(self, **kwargs): spec = getargs('spec', kwargs) if isinstance(spec, GroupSpec): self.setdefault('groups', list()).append(spec) elif isinstance(spec, DatasetSpec): self.setdefault('datasets', list()).append(spec) def export_spec(ns_builder, new_data_types, output_dir): """ Create YAML specification files for a new namespace and extensions with the given data type specs. Args: ns_builder - NamespaceBuilder instance used to build the namespace and extension new_data_types - Iterable of specs that represent new data types to be added """ if len(new_data_types) == 0: warnings.warn('No data types specified. Exiting.') return if not ns_builder.name: raise RuntimeError('Namespace name is required to export specs') ns_path = ns_builder.name + '.namespace.yaml' ext_path = ns_builder.name + '.extensions.yaml' for data_type in new_data_types: ns_builder.add_spec(ext_path, data_type) ns_builder.export(ns_path, outdir=output_dir) ././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1579654747.9201882 hdmf-1.5.4/src/hdmf/testing/0000755000655200065520000000000000000000000017210 5ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/testing/__init__.py0000644000655200065520000000007700000000000021325 0ustar00circlecicircleci00000000000000from .testcase import TestCase, H5RoundTripMixin # noqa: F401 ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/testing/testcase.py0000644000655200065520000002065700000000000021407 0ustar00circlecicircleci00000000000000import re import unittest import h5py import numpy as np import os from abc import ABCMeta, abstractmethod from ..container import Container, Data from ..query import HDMFDataset from ..common import validate as common_validate, get_manager from ..backends.hdf5 import HDF5IO class TestCase(unittest.TestCase): """ Extension of unittest's TestCase to add useful functions for unit testing in HDMF. """ def assertRaisesWith(self, exc_type, exc_msg, *args, **kwargs): """ Asserts the given invocation raises the expected exception. This is similar to unittest's assertRaises and assertRaisesRegex, but checks for an exact match. """ return self.assertRaisesRegex(exc_type, '^%s$' % re.escape(exc_msg), *args, **kwargs) def assertWarnsWith(self, warn_type, exc_msg, *args, **kwargs): """ Asserts the given invocation raises the expected warning. This is similar to unittest's assertWarns and assertWarnsRegex, but checks for an exact match. """ return self.assertWarnsRegex(warn_type, '^%s$' % re.escape(exc_msg), *args, **kwargs) def assertContainerEqual(self, container1, container2, ignore_name=False, ignore_hdmf_attrs=False): """ Asserts that the two containers have equal contents. ignore_name - whether to ignore testing equality of name ignore_hdmf_attrs - whether to ignore testing equality of HDMF container attributes, such as container_source and object_id """ type1 = type(container1) type2 = type(container2) self.assertEqual(type1, type2) if not ignore_name: self.assertEqual(container1.name, container2.name) if not ignore_hdmf_attrs: self.assertEqual(container1.container_source, container2.container_source) self.assertEqual(container1.object_id, container2.object_id) # NOTE: parent is not tested because it can lead to infinite loops self.assertEqual(len(container1.children), len(container2.children)) # do not actually check the children values here. all children *should* also be fields, which is checked below. # this is in case non-field children are added to one and not the other for field in getattr(container1, type1._fieldsname): with self.subTest(field=field, container_type=type1.__name__): f1 = getattr(container1, field) f2 = getattr(container2, field) self._assert_field_equal(f1, f2, ignore_hdmf_attrs) def _assert_field_equal(self, f1, f2, ignore_hdmf_attrs=False): if (isinstance(f1, (tuple, list, np.ndarray, h5py.Dataset)) or isinstance(f2, (tuple, list, np.ndarray, h5py.Dataset))): self._assert_array_equal(f1, f2, ignore_hdmf_attrs) elif isinstance(f1, dict) and len(f1) and isinstance(f1.values()[0], Container): self.assertIsInstance(f2, dict) f1_keys = set(f1.keys()) f2_keys = set(f2.keys()) self.assertSetEqual(f1_keys, f2_keys) for k in f1_keys: with self.subTest(module_name=k): self.assertContainerEqual(f1[k], f2[k], ignore_hdmf_attrs) elif isinstance(f1, Container): self.assertContainerEqual(f1, f2, ignore_hdmf_attrs) elif isinstance(f1, Data): self._assert_data_equal(f1, f2, ignore_hdmf_attrs) elif isinstance(f1, (float, np.floating)): np.testing.assert_equal(f1, f2) else: self.assertEqual(f1, f2) def _assert_data_equal(self, data1, data2, ignore_hdmf_attrs=False): self.assertEqual(type(data1), type(data2)) self.assertEqual(len(data1), len(data2)) self._assert_array_equal(data1.data, data2.data, ignore_hdmf_attrs) def _assert_array_equal(self, arr1, arr2, ignore_hdmf_attrs=False): if isinstance(arr1, (h5py.Dataset, HDMFDataset)): arr1 = arr1[()] if isinstance(arr2, (h5py.Dataset, HDMFDataset)): arr2 = arr2[()] if not isinstance(arr1, (tuple, list, np.ndarray)) and not isinstance(arr2, (tuple, list, np.ndarray)): if isinstance(arr1, (float, np.floating)): np.testing.assert_equal(arr1, arr2) else: self.assertEqual(arr1, arr2) # scalar else: self.assertEqual(len(arr1), len(arr2)) if isinstance(arr1, np.ndarray) and len(arr1.dtype) > 1: # compound type arr1 = arr1.tolist() if isinstance(arr2, np.ndarray) and len(arr2.dtype) > 1: # compound type arr2 = arr2.tolist() if isinstance(arr1, np.ndarray) and isinstance(arr2, np.ndarray): np.testing.assert_array_equal(arr1, arr2) else: for sub1, sub2 in zip(arr1, arr2): if isinstance(sub1, Container): self.assertContainerEqual(sub1, sub2, ignore_hdmf_attrs) elif isinstance(sub1, Data): self._assert_data_equal(sub1, sub2, ignore_hdmf_attrs) else: self._assert_array_equal(sub1, sub2, ignore_hdmf_attrs) class H5RoundTripMixin(metaclass=ABCMeta): """ Mixin class for methods to run a roundtrip test writing a container to and reading the container from an HDF5 file. The setUp, test_roundtrip, and tearDown methods will be run by unittest. The abstract method setUpContainer needs to be implemented by classes that include this mixin. Example:: class TestMyContainerRoundTrip(H5RoundTripMixin, TestCase): def setUpContainer(self): # return the Container to read/write NOTE: This class is a mix-in and not a subclass of TestCase so that unittest does not discover it, try to run it, and skip it. """ def setUp(self): self.__manager = get_manager() self.container = self.setUpContainer() self.container_type = self.container.__class__.__name__ self.filename = 'test_%s.h5' % self.container_type self.writer = None self.reader = None def tearDown(self): if self.writer is not None: self.writer.close() if self.reader is not None: self.reader.close() if os.path.exists(self.filename) and os.getenv("CLEAN_HDMF", '1') not in ('0', 'false', 'FALSE', 'False'): os.remove(self.filename) @property def manager(self): """ The build manager """ return self.__manager @abstractmethod def setUpContainer(self): """ Should return the Container to read/write """ raise NotImplementedError('Cannot run test unless setUpContainer is implemented') def test_roundtrip(self): """ Test whether the test Container read from file has the same contents the original Container and validate the file """ self.read_container = self.roundtripContainer() self.assertIsNotNone(str(self.container)) # added as a test to make sure printing works self.assertIsNotNone(str(self.read_container)) # make sure we get a completely new object self.assertNotEqual(id(self.container), id(self.read_container)) # the name of the root container of a file is always 'root' (see h5tools.py ROOT_NAME) # thus, ignore the name of the container when comparing original container vs read container self.assertContainerEqual(self.read_container, self.container, ignore_name=True) self.reader.close() self.validate() def roundtripContainer(self, cache_spec=False): """ Write just the Container to an HDF5 file, read the container from the file, and return it """ self.writer = HDF5IO(self.filename, manager=self.manager, mode='w') self.writer.write(self.container, cache_spec=cache_spec) self.writer.close() self.reader = HDF5IO(self.filename, manager=self.manager, mode='r') try: return self.reader.read() except Exception as e: self.reader.close() self.reader = None raise e def validate(self): """ Validate the created file """ if os.path.exists(self.filename): with HDF5IO(self.filename, manager=self.manager, mode='r') as io: errors = common_validate(io) if errors: for err in errors: raise Exception(err) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/utils.py0000644000655200065520000007461600000000000017263 0ustar00circlecicircleci00000000000000import copy as _copy from abc import ABCMeta import collections import h5py import numpy as np __macros = { 'array_data': [np.ndarray, list, tuple, h5py.Dataset], 'scalar_data': [str, int, float], } def docval_macro(macro): """Class decorator to add the class to a list of types associated with the key macro in the __macros dict """ def _dec(cls): if macro not in __macros: __macros[macro] = list() __macros[macro].append(cls) return cls return _dec def __type_okay(value, argtype, allow_none=False): """Check a value against a type The difference between this function and :py:func:`isinstance` is that it allows specifying a type as a string. Furthermore, strings allow for specifying more general types, such as a simple numeric type (i.e. ``argtype``="num"). Args: value (any): the value to check argtype (type, str): the type to check for allow_none (bool): whether or not to allow None as a valid value Returns: bool: True if value is a valid instance of argtype """ if value is None: return allow_none if isinstance(argtype, str): if argtype in __macros: return __type_okay(value, __macros[argtype], allow_none=allow_none) elif argtype == 'int': return __is_int(value) elif argtype == 'float': return __is_float(value) elif argtype == 'bool': return __is_bool(value) return argtype in [cls.__name__ for cls in value.__class__.__mro__] elif isinstance(argtype, type): if argtype is int: return __is_int(value) elif argtype is float: return __is_float(value) elif argtype is bool: return __is_bool(value) return isinstance(value, argtype) elif isinstance(argtype, tuple) or isinstance(argtype, list): return any(__type_okay(value, i) for i in argtype) else: # argtype is None return True def __shape_okay_multi(value, argshape): if type(argshape[0]) in (tuple, list): # if multiple shapes are present return any(__shape_okay(value, a) for a in argshape) else: return __shape_okay(value, argshape) def __shape_okay(value, argshape): valshape = get_data_shape(value) if not len(valshape) == len(argshape): return False for a, b in zip(valshape, argshape): if b not in (a, None): return False return True def __is_int(value): return any(isinstance(value, i) for i in (int, np.int8, np.int16, np.int32, np.int64)) def __is_float(value): SUPPORTED_FLOAT_TYPES = [float, np.float16, np.float32, np.float64] if hasattr(np, "float128"): SUPPORTED_FLOAT_TYPES.append(np.float128) if hasattr(np, "longdouble"): # on windows python<=3.5, h5py floats resolve float64s as either np.float64 or np.longdouble # non-deterministically. a future version of h5py will fix this. see #112 SUPPORTED_FLOAT_TYPES.append(np.longdouble) return any(isinstance(value, i) for i in SUPPORTED_FLOAT_TYPES) def __is_bool(value): return isinstance(value, bool) or isinstance(value, np.bool_) def __format_type(argtype): if isinstance(argtype, str): return argtype elif isinstance(argtype, type): return argtype.__name__ elif isinstance(argtype, tuple) or isinstance(argtype, list): types = [__format_type(i) for i in argtype] if len(types) > 1: return "%s or %s" % (", ".join(types[:-1]), types[-1]) else: return types[0] elif argtype is None: return "any type" else: raise ValueError("argtype must be a type, str, list, or tuple") def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, allow_extra=False): # noqa: C901 """ Internal helper function used by the docval decorator to parse and validate function arguments :param validator: List of dicts from docval with the description of the arguments :param args: List of the values of positional arguments supplied by the caller :param kwargs: Dict keyword arguments supplied by the caller where keys are the argument name and values are the argument value. :param enforce_type: Boolean indicating whether the type of arguments should be enforced :param enforce_shape: Boolean indicating whether the dimensions of array arguments should be enforced if possible. :param allow_extra: Boolean indicating whether extra keyword arguments are allowed (if False and extra keyword arguments are specified, then an error is raised). :return: Dict with: * 'args' : Dict all arguments where keys are the names and values are the values of the arguments. * 'errors' : List of string with error messages """ ret = dict() type_errors = list() value_errors = list() argsi = 0 extras = dict() # has to be initialized to empty here, to avoid spurious errors reported upon early raises try: # check for duplicates in docval names = [x['name'] for x in validator] duplicated = [item for item, count in collections.Counter(names).items() if count > 1] if duplicated: raise ValueError( 'The following names are duplicated: {}'.format(duplicated)) if allow_extra: # extra keyword arguments are allowed so do not consider them when checking number of args if len(args) > len(validator): raise TypeError( 'Expected at most %d arguments %r, got %d positional' % (len(validator), names, len(args)) ) else: # allow for keyword args if len(args) + len(kwargs) > len(validator): raise TypeError( 'Expected at most %d arguments %r, got %d: %d positional and %d keyword %s' % (len(validator), names, len(args) + len(kwargs), len(args), len(kwargs), sorted(kwargs)) ) # iterate through the docval specification and find a matching value in args / kwargs it = iter(validator) arg = next(it) # catch unsupported keys allowable_terms = ('name', 'doc', 'type', 'shape', 'default', 'help') unsupported_terms = set(arg.keys()) - set(allowable_terms) if unsupported_terms: raise ValueError('docval for {}: {} are not supported by docval'.format(arg['name'], sorted(unsupported_terms))) # process positional arguments of the docval specification (no default value) extras = dict(kwargs) while True: if 'default' in arg: break argname = arg['name'] argval_set = False if argname in kwargs: # if this positional arg is specified by a keyword arg and there are remaining positional args that # have not yet been matched, then it is undetermined what those positional args match to. thus, raise # an error if argsi < len(args): type_errors.append("got multiple values for argument '%s'" % argname) argval = kwargs.get(argname) extras.pop(argname, None) argval_set = True elif argsi < len(args): argval = args[argsi] argval_set = True if not argval_set: type_errors.append("missing argument '%s'" % argname) else: if enforce_type: if not __type_okay(argval, arg['type']): if argval is None: fmt_val = (argname, __format_type(arg['type'])) type_errors.append("None is not allowed for '%s' (expected '%s', not None)" % fmt_val) else: fmt_val = (argname, type(argval).__name__, __format_type(arg['type'])) type_errors.append("incorrect type for '%s' (got '%s', expected '%s')" % fmt_val) if enforce_shape and 'shape' in arg: valshape = get_data_shape(argval) while valshape is None: if argval is None: break if not hasattr(argval, argname): fmt_val = (argval, argname, arg['shape']) value_errors.append("cannot check shape of object '%s' for argument '%s' " "(expected shape '%s')" % fmt_val) break # unpack, e.g. if TimeSeries is passed for arg 'data', then TimeSeries.data is checked argval = getattr(argval, argname) valshape = get_data_shape(argval) if valshape is not None and not __shape_okay_multi(argval, arg['shape']): fmt_val = (argname, valshape, arg['shape']) value_errors.append("incorrect shape for '%s' (got '%s', expected '%s')" % fmt_val) ret[argname] = argval argsi += 1 arg = next(it) # process arguments of the docval specification with a default value while True: argname = arg['name'] if argname in kwargs: ret[argname] = kwargs.get(argname) extras.pop(argname, None) elif len(args) > argsi: ret[argname] = args[argsi] argsi += 1 else: ret[argname] = _copy.deepcopy(arg['default']) argval = ret[argname] if enforce_type: if not __type_okay(argval, arg['type'], arg['default'] is None): if argval is None and arg['default'] is None: fmt_val = (argname, __format_type(arg['type'])) type_errors.append("None is not allowed for '%s' (expected '%s', not None)" % fmt_val) else: fmt_val = (argname, type(argval).__name__, __format_type(arg['type'])) type_errors.append("incorrect type for '%s' (got '%s', expected '%s')" % fmt_val) if enforce_shape and 'shape' in arg and argval is not None: valshape = get_data_shape(argval) while valshape is None: if argval is None: break if not hasattr(argval, argname): fmt_val = (argval, argname, arg['shape']) value_errors.append("cannot check shape of object '%s' for argument '%s' (expected shape '%s')" % fmt_val) break # unpack, e.g. if TimeSeries is passed for arg 'data', then TimeSeries.data is checked argval = getattr(argval, argname) valshape = get_data_shape(argval) if valshape is not None and not __shape_okay_multi(argval, arg['shape']): fmt_val = (argname, valshape, arg['shape']) value_errors.append("incorrect shape for '%s' (got '%s', expected '%s')" % fmt_val) arg = next(it) except StopIteration: pass except TypeError as e: type_errors.append(str(e)) except ValueError as e: value_errors.append(str(e)) if not allow_extra: for key in extras.keys(): type_errors.append("unrecognized argument: '%s'" % key) else: # TODO: Extras get stripped out if function arguments are composed with fmt_docval_args. # allow_extra needs to be tracked on a function so that fmt_docval_args doesn't strip them out for key in extras.keys(): ret[key] = extras[key] return {'args': ret, 'type_errors': type_errors, 'value_errors': value_errors} docval_idx_name = '__dv_idx__' docval_attr_name = '__docval__' __docval_args_loc = 'args' def get_docval(func, *args): '''Get a copy of docval arguments for a function. If args are supplied, return only docval arguments with value for 'name' key equal to the args ''' func_docval = getattr(func, docval_attr_name, None) if func_docval: if args: docval_idx = getattr(func, docval_idx_name, None) try: return tuple(docval_idx[name] for name in args) except KeyError as ke: raise ValueError('Function %s does not have docval argument %s' % (func.__name__, str(ke))) return tuple(func_docval[__docval_args_loc]) else: if args: raise ValueError('Function %s has no docval arguments' % func.__name__) return tuple() # def docval_wrap(func, is_method=True): # if is_method: # @docval(*get_docval(func)) # def method(self, **kwargs): # # return call_docval_args(func, kwargs) # return method # else: # @docval(*get_docval(func)) # def static_method(**kwargs): # return call_docval_args(func, kwargs) # return method def fmt_docval_args(func, kwargs): ''' Separate positional and keyword arguments Useful for methods that wrap other methods ''' func_docval = getattr(func, docval_attr_name, None) ret_args = list() ret_kwargs = dict() kwargs_copy = _copy.copy(kwargs) if func_docval: for arg in func_docval[__docval_args_loc]: val = kwargs_copy.pop(arg['name'], None) if 'default' in arg: if val is not None: ret_kwargs[arg['name']] = val else: ret_args.append(val) if func_docval['allow_extra']: ret_kwargs.update(kwargs_copy) else: raise ValueError('no docval found on %s' % str(func)) return ret_args, ret_kwargs def call_docval_func(func, kwargs): fargs, fkwargs = fmt_docval_args(func, kwargs) return func(*fargs, **fkwargs) def __resolve_type(t): if t is None: return t if isinstance(t, str): if t in __macros: return tuple(__macros[t]) else: return t elif isinstance(t, type): return t elif isinstance(t, (list, tuple)): ret = list() for i in t: resolved = __resolve_type(i) if isinstance(resolved, tuple): ret.extend(resolved) else: ret.append(resolved) return tuple(ret) else: msg = "argtype must be a type, a str, a list, a tuple, or None - got %s" % type(t) raise ValueError(msg) def docval(*validator, **options): '''A decorator for documenting and enforcing type for instance method arguments. This decorator takes a list of dictionaries that specify the method parameters. These dictionaries are used for enforcing type and building a Sphinx docstring. The first arguments are dictionaries that specify the positional arguments and keyword arguments of the decorated function. These dictionaries must contain the following keys: ``'name'``, ``'type'``, and ``'doc'``. This will define a positional argument. To define a keyword argument, specify a default value using the key ``'default'``. To validate the dimensions of an input array add the optional ``'shape'`` parameter. The decorated method must take ``self`` and ``**kwargs`` as arguments. When using this decorator, the functions :py:func:`getargs` and :py:func:`popargs` can be used for easily extracting arguments from kwargs. The following code example demonstrates the use of this decorator: .. code-block:: python @docval({'name': 'arg1':, 'type': str, 'doc': 'this is the first positional argument'}, {'name': 'arg2':, 'type': int, 'doc': 'this is the second positional argument'}, {'name': 'kwarg1':, 'type': (list, tuple), 'doc': 'this is a keyword argument', 'default': list()}, returns='foo object', rtype='Foo')) def foo(self, **kwargs): arg1, arg2, kwarg1 = getargs('arg1', 'arg2', 'kwarg1', **kwargs) ... :param enforce_type: Enforce types of input parameters (Default=True) :param returns: String describing the return values :param rtype: String describing the data type of the return values :param is_method: True if this is decorating an instance or class method, False otherwise (Default=True) :param enforce_shape: Enforce the dimensions of input arrays (Default=True) :param validator: :py:func:`dict` objects specifying the method parameters :param options: additional options for documenting and validating method parameters ''' enforce_type = options.pop('enforce_type', True) enforce_shape = options.pop('enforce_shape', True) returns = options.pop('returns', None) rtype = options.pop('rtype', None) is_method = options.pop('is_method', True) allow_extra = options.pop('allow_extra', False) def dec(func): _docval = _copy.copy(options) _docval['allow_extra'] = allow_extra func.__name__ = _docval.get('func_name', func.__name__) func.__doc__ = _docval.get('doc', func.__doc__) pos = list() kw = list() for a in validator: try: a['type'] = __resolve_type(a['type']) except Exception as e: msg = "error parsing '%s' argument' : %s" % (a['name'], e.args[0]) raise Exception(msg) if 'default' in a: kw.append(a) else: pos.append(a) loc_val = pos+kw _docval[__docval_args_loc] = loc_val def func_call(*args, **kwargs): parsed = __parse_args( loc_val, args[1:] if is_method else args, kwargs, enforce_type=enforce_type, enforce_shape=enforce_shape, allow_extra=allow_extra) for error_type, ExceptionType in (('type_errors', TypeError), ('value_errors', ValueError)): parse_err = parsed.get(error_type) if parse_err: msg = '%s: %s' % (func.__qualname__, ', '.join(parse_err)) raise ExceptionType(msg) if is_method: return func(args[0], **parsed['args']) else: return func(**parsed['args']) _rtype = rtype if isinstance(rtype, type): _rtype = rtype.__name__ docstring = __googledoc(func, _docval[__docval_args_loc], returns=returns, rtype=_rtype) docval_idx = {a['name']: a for a in _docval[__docval_args_loc]} # cache a name-indexed dictionary of args setattr(func_call, '__doc__', docstring) setattr(func_call, '__name__', func.__name__) setattr(func_call, docval_attr_name, _docval) setattr(func_call, docval_idx_name, docval_idx) setattr(func_call, '__module__', func.__module__) return func_call return dec def __sig_arg(argval): if 'default' in argval: default = argval['default'] if isinstance(default, str): default = "'%s'" % default else: default = str(default) return "%s=%s" % (argval['name'], default) else: return argval['name'] def __builddoc(func, validator, docstring_fmt, arg_fmt, ret_fmt=None, returns=None, rtype=None): '''Generate a Spinxy docstring''' def to_str(argtype): if isinstance(argtype, type): module = argtype.__module__ name = argtype.__name__ if module.startswith("h5py") or module.startswith("pandas") or module.startswith("builtins"): return ":py:class:`~{name}`".format(name=name) else: return ":py:class:`~{module}.{name}`".format(name=name, module=module) return argtype def __sphinx_arg(arg): fmt = dict() fmt['name'] = arg.get('name') fmt['doc'] = arg.get('doc') if isinstance(arg['type'], tuple) or isinstance(arg['type'], list): fmt['type'] = " or ".join(map(to_str, arg['type'])) else: fmt['type'] = to_str(arg['type']) return arg_fmt.format(**fmt) sig = "%s(%s)\n\n" % (func.__name__, ", ".join(map(__sig_arg, validator))) desc = func.__doc__.strip() if func.__doc__ is not None else "" sig += docstring_fmt.format(description=desc, args="\n".join(map(__sphinx_arg, validator))) if not (ret_fmt is None or returns is None or rtype is None): sig += ret_fmt.format(returns=returns, rtype=rtype) return sig def __sphinxdoc(func, validator, returns=None, rtype=None): arg_fmt = (":param {name}: {doc}\n" ":type {name}: {type}") docstring_fmt = ("{description}\n\n" "{args}\n") ret_fmt = (":returns: {returns}\n" ":rtype: {rtype}") return __builddoc(func, validator, docstring_fmt, arg_fmt, ret_fmt=ret_fmt, returns=returns, rtype=rtype) def __googledoc(func, validator, returns=None, rtype=None): arg_fmt = " {name} ({type}): {doc}" docstring_fmt = "{description}\n\n" if len(validator) > 0: docstring_fmt += "Args:\n{args}\n" ret_fmt = ("\nReturns:\n" " {rtype}: {returns}") return __builddoc(func, validator, docstring_fmt, arg_fmt, ret_fmt=ret_fmt, returns=returns, rtype=rtype) def getargs(*argnames): '''getargs(*argnames, argdict) Convenience function to retrieve arguments from a dictionary in batch ''' if len(argnames) < 2: raise ValueError('Must supply at least one key and a dict') if not isinstance(argnames[-1], dict): raise ValueError('last argument must be dict') kwargs = argnames[-1] if len(argnames) == 2: return kwargs.get(argnames[0]) return [kwargs.get(arg) for arg in argnames[:-1]] def popargs(*argnames): '''popargs(*argnames, argdict) Convenience function to retrieve and remove arguments from a dictionary in batch ''' if len(argnames) < 2: raise ValueError('Must supply at least one key and a dict') if not isinstance(argnames[-1], dict): raise ValueError('last argument must be dict') kwargs = argnames[-1] if len(argnames) == 2: return kwargs.pop(argnames[0]) return [kwargs.pop(arg) for arg in argnames[:-1]] class ExtenderMeta(ABCMeta): """A metaclass that will extend the base class initialization routine by executing additional functions defined in classes that use this metaclass In general, this class should only be used by core developers. """ __preinit = '__preinit' @classmethod def pre_init(cls, func): setattr(func, cls.__preinit, True) return classmethod(func) __postinit = '__postinit' @classmethod def post_init(cls, func): '''A decorator for defining a routine to run after creation of a type object. An example use of this method would be to define a classmethod that gathers any defined methods or attributes after the base Python type construction (i.e. after :py:func:`type` has been called) ''' setattr(func, cls.__postinit, True) return classmethod(func) def __init__(cls, name, bases, classdict): it = (getattr(cls, n) for n in dir(cls)) it = (a for a in it if hasattr(a, cls.__preinit)) for func in it: func(name, bases, classdict) super().__init__(name, bases, classdict) it = (getattr(cls, n) for n in dir(cls)) it = (a for a in it if hasattr(a, cls.__postinit)) for func in it: func(name, bases, classdict) def get_data_shape(data, strict_no_data_load=False): """ Helper function used to determine the shape of the given array. :param data: Array for which we should determine the shape. :type data: List, numpy.ndarray, DataChunkIterator, any object that support __len__ or .shape. :param strict_no_data_load: In order to determine the shape of nested tuples and lists, this function recursively inspects elements along the dimensions, assuming that the data has a regular, rectangular shape. In the case of out-of-core iterators this means that the first item along each dimensions would potentially be loaded into memory. By setting this option we enforce that this does not happen, at the cost that we may not be able to determine the shape of the array. :return: Tuple of ints indicating the size of known dimensions. Dimensions for which the size is unknown will be set to None. """ def __get_shape_helper(local_data): shape = list() if hasattr(local_data, '__len__'): shape.append(len(local_data)) if len(local_data) and not isinstance(local_data[0], (str, bytes)): shape.extend(__get_shape_helper(local_data[0])) return tuple(shape) if hasattr(data, 'maxshape'): return data.maxshape elif hasattr(data, 'shape'): return data.shape elif isinstance(data, dict): return None elif hasattr(data, '__len__') and not isinstance(data, (str, bytes)): if not strict_no_data_load or (isinstance(data, list) or isinstance(data, tuple) or isinstance(data, set)): return __get_shape_helper(data) else: return None else: return None def pystr(s): """ Convert a string of characters to Python str object """ if isinstance(s, bytes): return s.decode('utf-8') else: return s class LabelledDict(dict): """A dict wrapper class with a label and which allows retrieval of values based on an attribute of the values For example, if the key attribute is set as 'name' in __init__, then all objects added to the LabelledDict must have a 'name' attribute and a particular object in the LabelledDict can be accessed using the syntax ['object_name'] if the object.name == 'object_name'. In this way, LabelledDict acts like a set where values can be retrieved using square brackets around the value of the key attribute. An 'add' method makes clear the association between the key attribute of the LabelledDict and the values of the LabelledDict. LabelledDict also supports retrieval of values with the syntax my_dict['attr == val'], which returns a set of objects in the LabelledDict which have an attribute 'attr' with a string value 'val'. If no objects match that condition, a KeyError is raised. Note that if 'attr' equals the key attribute, then the single matching value is returned, not a set. Usage: LabelledDict(label='my_objects', def_key_name = 'name') my_dict[obj.name] = obj my_dict.add(obj) # simpler syntax Example: # MyTestClass is a class with attributes 'prop1' and 'prop2'. MyTestClass.__init__ sets those attributes. ld = LabelledDict(label='all_objects', key_attr='prop1') obj1 = MyTestClass('a', 'b') obj2 = MyTestClass('d', 'b') ld[obj1.prop1] = obj1 # obj1 is added to the LabelledDict with the key obj1.prop1. Any other key is not allowed. ld.add(obj2) # Simpler 'add' syntax enforces the required relationship ld['a'] # Returns obj1 ld['prop1 == a'] # Also returns obj1 ld['prop2 == b'] # Returns set([obj1, obj2]) - the set of all values v in ld where v.prop2 == 'b' """ @docval({'name': 'label', 'type': str, 'doc': 'the label on this dictionary'}, {'name': 'key_attr', 'type': str, 'doc': 'the attribute name to use as the key', 'default': 'name'}) def __init__(self, **kwargs): label, key_attr = getargs('label', 'key_attr', kwargs) self.__label = label self.__key_attr = key_attr @property def label(self): """Return the label of this LabelledDict""" return self.__label @property def key_attr(self): """Return the attribute used as the key for values in this LabelledDict""" return self.__key_attr def __getitem__(self, args): """Get a value from the LabelledDict with the given key. Supports syntax my_dict['attr == val'], which returns a set of objects in the LabelledDict which have an attribute 'attr' with a string value 'val'. If no objects match that condition, a KeyError is raised. Note that if 'attr' equals the key attribute, then the single matching value is returned, not a set. """ key = args if '==' in args: key, val = args.split("==") key = key.strip() val = val.strip() # val is a string if not key: raise ValueError("An attribute name is required before '=='.") if not val: raise ValueError("A value is required after '=='.") if key != self.key_attr: ret = set() for item in self.values(): if getattr(item, key, None) == val: ret.add(item) if len(ret): return ret else: raise KeyError(val) # if key == self.key_attr, then call __getitem__ normally on val key = val return super().__getitem__(key) def __setitem__(self, key, value): """Set a value in the LabelledDict with the given key. The key must equal value.key_attr. See LabelledDict.add for simpler syntax. Raises ValueError if value does not have attribute key_attr. """ self.__check_value(value) if key != getattr(value, self.key_attr): raise KeyError("Key '%s' must equal attribute '%s' of '%s'." % (key, self.key_attr, value)) super().__setitem__(key, value) def add(self, value): """Add a value to the dict with the key value.key_attr. Raises ValueError if value does not have attribute key_attr. """ self.__check_value(value) self.__setitem__(getattr(value, self.key_attr), value) def __check_value(self, value): if not hasattr(value, self.key_attr): raise ValueError("Cannot set value '%s' in LabelledDict. Value must have key '%s'." % (value, self.key_attr)) ././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1579654747.9201882 hdmf-1.5.4/src/hdmf/validate/0000755000655200065520000000000000000000000017324 5ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/validate/__init__.py0000644000655200065520000000023700000000000021437 0ustar00circlecicircleci00000000000000from . import errors from .validator import ValidatorMap, Validator, AttributeValidator, DatasetValidator, GroupValidator from .errors import * # noqa: F403 ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/validate/errors.py0000644000655200065520000001427500000000000021223 0ustar00circlecicircleci00000000000000from numpy import dtype from ..utils import docval, getargs from ..spec.spec import DtypeHelper __all__ = [ "Error", "DtypeError", "MissingError", "ExpectedArrayError", "ShapeError", "MissingDataType", "IllegalLinkError", "IncorrectDataType" ] class Error: @docval({'name': 'name', 'type': str, 'doc': 'the name of the component that is erroneous'}, {'name': 'reason', 'type': str, 'doc': 'the reason for the error'}, {'name': 'location', 'type': str, 'doc': 'the location of the error', 'default': None}) def __init__(self, **kwargs): self.__name = getargs('name', kwargs) self.__reason = getargs('reason', kwargs) self.__location = getargs('location', kwargs) if self.__location is not None: self.__str = "%s (%s): %s" % (self.__name, self.__location, self.__reason) else: self.__str = "%s: %s" % (self.name, self.reason) @property def name(self): return self.__name @property def reason(self): return self.__reason @property def location(self): return self.__location @location.setter def location(self, loc): self.__location = loc self.__str = "%s (%s): %s" % (self.__name, self.__location, self.__reason) def __str__(self): return self.__str def __repr__(self): return self.__str__() class DtypeError(Error): @docval({'name': 'name', 'type': str, 'doc': 'the name of the component that is erroneous'}, {'name': 'expected', 'type': (dtype, type, str, list), 'doc': 'the expected dtype'}, {'name': 'received', 'type': (dtype, type, str, list), 'doc': 'the received dtype'}, {'name': 'location', 'type': str, 'doc': 'the location of the error', 'default': None}) def __init__(self, **kwargs): name = getargs('name', kwargs) expected = getargs('expected', kwargs) received = getargs('received', kwargs) if isinstance(expected, list): expected = DtypeHelper.simplify_cpd_type(expected) reason = "incorrect type - expected '%s', got '%s'" % (expected, received) loc = getargs('location', kwargs) super().__init__(name, reason, location=loc) class MissingError(Error): @docval({'name': 'name', 'type': str, 'doc': 'the name of the component that is erroneous'}, {'name': 'location', 'type': str, 'doc': 'the location of the error', 'default': None}) def __init__(self, **kwargs): name = getargs('name', kwargs) reason = "argument missing" loc = getargs('location', kwargs) super().__init__(name, reason, location=loc) class MissingDataType(Error): @docval({'name': 'name', 'type': str, 'doc': 'the name of the component that is erroneous'}, {'name': 'data_type', 'type': str, 'doc': 'the missing data type'}, {'name': 'location', 'type': str, 'doc': 'the location of the error', 'default': None}) def __init__(self, **kwargs): name, data_type = getargs('name', 'data_type', kwargs) self.__data_type = data_type reason = "missing data type %s" % self.__data_type loc = getargs('location', kwargs) super().__init__(name, reason, location=loc) @property def data_type(self): return self.__data_type class ExpectedArrayError(Error): @docval({'name': 'name', 'type': str, 'doc': 'the name of the component that is erroneous'}, {'name': 'expected', 'type': (tuple, list), 'doc': 'the expected shape'}, {'name': 'received', 'type': str, 'doc': 'the received data'}, {'name': 'location', 'type': str, 'doc': 'the location of the error', 'default': None}) def __init__(self, **kwargs): name = getargs('name', kwargs) expected = getargs('expected', kwargs) received = getargs('received', kwargs) reason = "incorrect shape - expected an array of shape '%s', got non-array data '%s'" % (expected, received) loc = getargs('location', kwargs) super().__init__(name, reason, location=loc) class ShapeError(Error): @docval({'name': 'name', 'type': str, 'doc': 'the name of the component that is erroneous'}, {'name': 'expected', 'type': (tuple, list), 'doc': 'the expected shape'}, {'name': 'received', 'type': (tuple, list), 'doc': 'the received shape'}, {'name': 'location', 'type': str, 'doc': 'the location of the error', 'default': None}) def __init__(self, **kwargs): name = getargs('name', kwargs) expected = getargs('expected', kwargs) received = getargs('received', kwargs) reason = "incorrect shape - expected '%s', got '%s'" % (expected, received) loc = getargs('location', kwargs) super().__init__(name, reason, location=loc) class IllegalLinkError(Error): """ A validation error for indicating that a link was used where an actual object (i.e. a dataset or a group) must be used """ @docval({'name': 'name', 'type': str, 'doc': 'the name of the component that is erroneous'}, {'name': 'location', 'type': str, 'doc': 'the location of the error', 'default': None}) def __init__(self, **kwargs): name = getargs('name', kwargs) reason = "illegal use of link" loc = getargs('location', kwargs) super().__init__(name, reason, location=loc) class IncorrectDataType(Error): """ A validation error for indicating that the incorrect data_type (not dtype) was used. """ @docval({'name': 'name', 'type': str, 'doc': 'the name of the component that is erroneous'}, {'name': 'expected', 'type': str, 'doc': 'the expected data_type'}, {'name': 'received', 'type': str, 'doc': 'the received data_type'}, {'name': 'location', 'type': str, 'doc': 'the location of the error', 'default': None}) def __init__(self, **kwargs): name = getargs('name', kwargs) expected = getargs('expected', kwargs) received = getargs('received', kwargs) reason = "incorrect data_type - expected '%s', got '%s'" % (expected, received) loc = getargs('location', kwargs) super().__init__(name, reason, location=loc) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/src/hdmf/validate/validator.py0000644000655200065520000004572100000000000021674 0ustar00circlecicircleci00000000000000import numpy as np from abc import ABCMeta, abstractmethod from copy import copy import re from itertools import chain from ..utils import docval, getargs, call_docval_func, pystr, get_data_shape from ..spec import Spec, AttributeSpec, GroupSpec, DatasetSpec, RefSpec from ..spec.spec import BaseStorageSpec, DtypeHelper from ..spec import SpecNamespace from ..build import GroupBuilder, DatasetBuilder, LinkBuilder, ReferenceBuilder, RegionBuilder from ..build.builders import BaseBuilder from .errors import Error, DtypeError, MissingError, MissingDataType, ShapeError, IllegalLinkError, IncorrectDataType from .errors import ExpectedArrayError __synonyms = DtypeHelper.primary_dtype_synonyms __additional = { 'float': ['double'], 'int8': ['short', 'int', 'long'], 'short': ['int', 'long'], 'int': ['long'], 'uint8': ['uint16', 'uint32', 'uint64'], 'uint16': ['uint32', 'uint64'], 'uint32': ['uint64'], } __allowable = dict() for dt, dt_syn in __synonyms.items(): allow = copy(dt_syn) if dt in __additional: for addl in __additional[dt]: allow.extend(__synonyms[addl]) for syn in dt_syn: __allowable[syn] = allow __allowable['numeric'] = set(chain.from_iterable(__allowable[k] for k in __allowable if 'int' in k or 'float' in k)) def check_type(expected, received): ''' *expected* should come from the spec *received* should come from the data ''' if isinstance(expected, list): if len(expected) > len(received): raise ValueError('compound type shorter than expected') for i, exp in enumerate(DtypeHelper.simplify_cpd_type(expected)): rec = received[i] if rec not in __allowable[exp]: return False return True else: if isinstance(received, np.dtype): if received.char == 'O': if 'vlen' in received.metadata: received = received.metadata['vlen'] else: raise ValueError("Unrecognized type: '%s'" % received) received = 'utf' if received is str else 'ascii' elif received.char == 'U': received = 'utf' elif received.char == 'S': received = 'ascii' else: received = received.name elif isinstance(received, type): received = received.__name__ if isinstance(expected, RefSpec): expected = expected.reftype elif isinstance(expected, type): expected = expected.__name__ return received in __allowable[expected] def get_iso8601_regex(): isodate_re = (r'^(-?(?:[1-9][0-9]*)?[0-9]{4})-(1[0-2]|0[1-9])-(3[01]|0[1-9]|[12][0-9])T(2[0-3]|[01][0-9]):' r'([0-5][0-9]):([0-5][0-9])(\.[0-9]+)?(Z|[+-](?:2[0-3]|[01][0-9]):[0-5][0-9])?$') return re.compile(isodate_re) _iso_re = get_iso8601_regex() def _check_isodatetime(s, default=None): try: if _iso_re.match(pystr(s)) is not None: return 'isodatetime' except Exception: pass return default def get_type(data): if isinstance(data, str): return _check_isodatetime(data, 'utf') elif isinstance(data, bytes): return _check_isodatetime(data, 'ascii') elif isinstance(data, RegionBuilder): return 'region' elif isinstance(data, ReferenceBuilder): return 'object' elif isinstance(data, np.ndarray): return get_type(data[0]) if not hasattr(data, '__len__'): return type(data).__name__ else: if hasattr(data, 'dtype'): if data.dtype.metadata is not None and data.dtype.metadata.get('vlen') is not None: return get_type(data[0]) return data.dtype if len(data) == 0: raise ValueError('cannot determine type for empty data') return get_type(data[0]) def check_shape(expected, received): ret = False if expected is None: ret = True else: if isinstance(expected, (list, tuple)): if isinstance(expected[0], (list, tuple)): for sub in expected: if check_shape(sub, received): ret = True break else: if len(expected) > 0 and received is None: ret = False elif len(expected) == len(received): ret = True for e, r in zip(expected, received): if not check_shape(e, r): ret = False break elif isinstance(expected, int): ret = expected == received return ret class ValidatorMap: """A class for keeping track of Validator objects for all data types in a namespace""" @docval({'name': 'namespace', 'type': SpecNamespace, 'doc': 'the namespace to builder map for'}) def __init__(self, **kwargs): ns = getargs('namespace', kwargs) self.__ns = ns tree = dict() types = ns.get_registered_types() self.__type_key = ns.get_spec(types[0]).type_key() for dt in types: spec = ns.get_spec(dt) parent = spec.data_type_inc child = spec.data_type_def tree.setdefault(child, list()) if parent is not None: tree.setdefault(parent, list()).append(child) for t in tree: self.__rec(tree, t) self.__valid_types = dict() self.__validators = dict() for dt, children in tree.items(): _list = list() for t in children: spec = self.__ns.get_spec(t) if isinstance(spec, GroupSpec): val = GroupValidator(spec, self) else: val = DatasetValidator(spec, self) if t == dt: self.__validators[t] = val _list.append(val) self.__valid_types[dt] = tuple(_list) def __rec(self, tree, node): if isinstance(tree[node], tuple): return tree[node] sub_types = {node} for child in tree[node]: sub_types.update(self.__rec(tree, child)) tree[node] = tuple(sub_types) return tree[node] @property def namespace(self): return self.__ns @docval({'name': 'spec', 'type': (Spec, str), 'doc': 'the specification to use to validate'}, returns='all valid sub data types for the given spec', rtype=tuple) def valid_types(self, **kwargs): '''Get all valid types for a given data type''' spec = getargs('spec', kwargs) if isinstance(spec, Spec): spec = spec.data_type_def try: return self.__valid_types[spec] except KeyError: raise ValueError("no children for '%s'" % spec) @docval({'name': 'data_type', 'type': (BaseStorageSpec, str), 'doc': 'the data type to get the validator for'}, returns='the validator ``data_type``') def get_validator(self, **kwargs): """Return the validator for a given data type""" dt = getargs('data_type', kwargs) if isinstance(dt, BaseStorageSpec): dt_tmp = dt.data_type_def if dt_tmp is None: dt_tmp = dt.data_type_inc dt = dt_tmp try: return self.__validators[dt] except KeyError: msg = "data type '%s' not found in namespace %s" % (dt, self.__ns.name) raise ValueError(msg) @docval({'name': 'builder', 'type': BaseBuilder, 'doc': 'the builder to validate'}, returns="a list of errors found", rtype=list) def validate(self, **kwargs): """Validate a builder against a Spec ``builder`` must have the attribute used to specifying data type by the namespace used to construct this ValidatorMap. """ builder = getargs('builder', kwargs) dt = builder.attributes.get(self.__type_key) if dt is None: msg = "builder must have data type defined with attribute '%s'" % self.__type_key raise ValueError(msg) validator = self.get_validator(dt) return validator.validate(builder) class Validator(metaclass=ABCMeta): '''A base class for classes that will be used to validate against Spec subclasses''' @docval({'name': 'spec', 'type': Spec, 'doc': 'the specification to use to validate'}, {'name': 'validator_map', 'type': ValidatorMap, 'doc': 'the ValidatorMap to use during validation'}) def __init__(self, **kwargs): self.__spec = getargs('spec', kwargs) self.__vmap = getargs('validator_map', kwargs) @property def spec(self): return self.__spec @property def vmap(self): return self.__vmap @abstractmethod @docval({'name': 'value', 'type': None, 'doc': 'either in the form of a value or a Builder'}, returns='a list of Errors', rtype=list) def validate(self, **kwargs): pass @classmethod def get_spec_loc(cls, spec): stack = list() tmp = spec while tmp is not None: name = tmp.name if name is None: name = tmp.data_type_def if name is None: name = tmp.data_type_inc stack.append(name) tmp = tmp.parent return "/".join(reversed(stack)) @classmethod def get_builder_loc(cls, builder): stack = list() tmp = builder while tmp is not None and tmp.name != 'root': stack.append(tmp.name) tmp = tmp.parent return "/".join(reversed(stack)) class AttributeValidator(Validator): '''A class for validating values against AttributeSpecs''' @docval({'name': 'spec', 'type': AttributeSpec, 'doc': 'the specification to use to validate'}, {'name': 'validator_map', 'type': ValidatorMap, 'doc': 'the ValidatorMap to use during validation'}) def __init__(self, **kwargs): call_docval_func(super().__init__, kwargs) @docval({'name': 'value', 'type': None, 'doc': 'the value to validate'}, returns='a list of Errors', rtype=list) def validate(self, **kwargs): value = getargs('value', kwargs) ret = list() spec = self.spec if spec.required and value is None: ret.append(MissingError(self.get_spec_loc(spec))) else: if spec.dtype is None: ret.append(Error(self.get_spec_loc(spec))) elif isinstance(spec.dtype, RefSpec): if not isinstance(value, BaseBuilder): expected = '%s reference' % spec.dtype.reftype ret.append(DtypeError(self.get_spec_loc(spec), expected, get_type(value))) else: target_spec = self.vmap.namespace.catalog.get_spec(spec.dtype.target_type) data_type = value.attributes.get(target_spec.type_key()) hierarchy = self.vmap.namespace.catalog.get_hierarchy(data_type) if spec.dtype.target_type not in hierarchy: ret.append(IncorrectDataType(self.get_spec_loc(spec), spec.dtype.target_type, data_type)) else: dtype = get_type(value) if not check_type(spec.dtype, dtype): ret.append(DtypeError(self.get_spec_loc(spec), spec.dtype, dtype)) shape = get_data_shape(value) if not check_shape(spec.shape, shape): ret.append(ShapeError(self.get_spec_loc(spec), spec.shape, shape)) return ret class BaseStorageValidator(Validator): '''A base class for validating against Spec objects that have attributes i.e. BaseStorageSpec''' @docval({'name': 'spec', 'type': BaseStorageSpec, 'doc': 'the specification to use to validate'}, {'name': 'validator_map', 'type': ValidatorMap, 'doc': 'the ValidatorMap to use during validation'}) def __init__(self, **kwargs): call_docval_func(super().__init__, kwargs) self.__attribute_validators = dict() for attr in self.spec.attributes: self.__attribute_validators[attr.name] = AttributeValidator(attr, self.vmap) @docval({"name": "builder", "type": BaseBuilder, "doc": "the builder to validate"}, returns='a list of Errors', rtype=list) def validate(self, **kwargs): builder = getargs('builder', kwargs) attributes = builder.attributes ret = list() for attr, validator in self.__attribute_validators.items(): attr_val = attributes.get(attr) if attr_val is None: if validator.spec.required: ret.append(MissingError(self.get_spec_loc(validator.spec), location=self.get_builder_loc(builder))) else: errors = validator.validate(attr_val) for err in errors: err.location = self.get_builder_loc(builder) + ".%s" % validator.spec.name ret.extend(errors) return ret class DatasetValidator(BaseStorageValidator): '''A class for validating DatasetBuilders against DatasetSpecs''' @docval({'name': 'spec', 'type': DatasetSpec, 'doc': 'the specification to use to validate'}, {'name': 'validator_map', 'type': ValidatorMap, 'doc': 'the ValidatorMap to use during validation'}) def __init__(self, **kwargs): call_docval_func(super().__init__, kwargs) @docval({"name": "builder", "type": DatasetBuilder, "doc": "the builder to validate"}, returns='a list of Errors', rtype=list) def validate(self, **kwargs): builder = getargs('builder', kwargs) ret = super().validate(builder) data = builder.data if self.spec.dtype is not None: dtype = get_type(data) if not check_type(self.spec.dtype, dtype): ret.append(DtypeError(self.get_spec_loc(self.spec), self.spec.dtype, dtype, location=self.get_builder_loc(builder))) shape = get_data_shape(data) if not check_shape(self.spec.shape, shape): if shape is None: ret.append(ExpectedArrayError(self.get_spec_loc(self.spec), self.spec.shape, str(data), location=self.get_builder_loc(builder))) else: ret.append(ShapeError(self.get_spec_loc(self.spec), self.spec.shape, shape, location=self.get_builder_loc(builder))) return ret class GroupValidator(BaseStorageValidator): '''A class for validating GroupBuilders against GroupSpecs''' @docval({'name': 'spec', 'type': GroupSpec, 'doc': 'the specification to use to validate'}, {'name': 'validator_map', 'type': ValidatorMap, 'doc': 'the ValidatorMap to use during validation'}) def __init__(self, **kwargs): call_docval_func(super().__init__, kwargs) self.__include_dts = dict() self.__dataset_validators = dict() self.__group_validators = dict() it = chain(self.spec.datasets, self.spec.groups) for spec in it: if spec.data_type_def is None: if spec.data_type_inc is None: if isinstance(spec, GroupSpec): self.__group_validators[spec.name] = GroupValidator(spec, self.vmap) else: self.__dataset_validators[spec.name] = DatasetValidator(spec, self.vmap) else: self.__include_dts[spec.data_type_inc] = spec else: self.__include_dts[spec.data_type_def] = spec @docval({"name": "builder", "type": GroupBuilder, "doc": "the builder to validate"}, returns='a list of Errors', rtype=list) def validate(self, **kwargs): # noqa: C901 builder = getargs('builder', kwargs) ret = super().validate(builder) # get the data_types data_types = dict() for key, value in builder.items(): v_builder = value if isinstance(v_builder, LinkBuilder): v_builder = v_builder.builder if isinstance(v_builder, BaseBuilder): dt = v_builder.attributes.get(self.spec.type_key()) if dt is not None: data_types.setdefault(dt, list()).append(value) for dt, inc_spec in self.__include_dts.items(): found = False inc_name = inc_spec.name for sub_val in self.vmap.valid_types(dt): spec = sub_val.spec sub_dt = spec.data_type_def dt_builders = data_types.get(sub_dt) if dt_builders is not None: if inc_name is not None: dt_builders = filter(lambda x: x.name == inc_name, dt_builders) for bldr in dt_builders: tmp = bldr if isinstance(bldr, LinkBuilder): if inc_spec.linkable: tmp = bldr.builder else: ret.append(IllegalLinkError(self.get_spec_loc(inc_spec), location=self.get_builder_loc(tmp))) ret.extend(sub_val.validate(tmp)) found = True if not found and self.__include_dts[dt].required: ret.append(MissingDataType(self.get_spec_loc(self.spec), dt, location=self.get_builder_loc(builder))) it = chain(self.__dataset_validators.items(), self.__group_validators.items()) for name, validator in it: sub_builder = builder.get(name) if isinstance(validator, BaseStorageSpec): inc_spec = validator validator = self.vmap.get_validator(inc_spec) def_spec = validator.spec if sub_builder is None: if inc_spec.required: ret.append(MissingDataType(self.get_spec_loc(def_spec), def_spec.data_type_def, location=self.get_builder_loc(builder))) else: ret.extend(validator.validate(sub_builder)) else: spec = validator.spec if isinstance(sub_builder, LinkBuilder): if spec.linkable: sub_builder = sub_builder.builder else: ret.append(IllegalLinkError(self.get_spec_loc(spec), location=self.get_builder_loc(builder))) continue if sub_builder is None: if spec.required: ret.append(MissingError(self.get_spec_loc(spec), location=self.get_builder_loc(builder))) else: ret.extend(validator.validate(sub_builder)) return ret ././@PaxHeader0000000000000000000000000000003300000000000011451 xustar000000000000000027 mtime=1579654747.916188 hdmf-1.5.4/src/hdmf.egg-info/0000755000655200065520000000000000000000000017225 5ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654747.0 hdmf-1.5.4/src/hdmf.egg-info/PKG-INFO0000644000655200065520000001744100000000000020331 0ustar00circlecicircleci00000000000000Metadata-Version: 2.1 Name: hdmf Version: 1.5.4 Summary: A package for standardizing hierarchical object data Home-page: https://github.com/hdmf-dev/hdmf Author: Andrew Tritt Author-email: ajtritt@lbl.gov License: BSD Description: ======================================== The Hierarchical Data Modeling Framework ======================================== The Hierarchical Data Modeling Framework, or *HDMF*, is a Python package for working with hierarchical data. It provides APIs for specifying data models, reading and writing data to different storage backends, and representing data with Python object. Documentation of HDMF can be found at https://hdmf.readthedocs.io Latest Release ============== .. image:: https://badge.fury.io/py/hdmf.svg :target: https://badge.fury.io/py/hdmf .. image:: https://anaconda.org/conda-forge/hdmf/badges/version.svg :target: https://anaconda.org/conda-forge/hdmf Build Status ============ .. table:: +---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+ | Linux | Windows and macOS | +=====================================================================+==================================================================================================+ | .. image:: https://circleci.com/gh/hdmf-dev/hdmf.svg?style=shield | .. image:: https://dev.azure.com/hdmf-dev/hdmf/_apis/build/status/hdmf-dev.hdmf?branchName=dev | | :target: https://circleci.com/gh/hdmf-dev/hdmf | :target: https://dev.azure.com/hdmf-dev/hdmf/_build/latest?definitionId=1&branchName=dev | +---------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+ **Conda** .. image:: https://circleci.com/gh/conda-forge/hdmf-feedstock.svg?style=shield :target: https://circleci.com/gh/conda-forge/hdmf-feedstock Overall Health ============== .. image:: https://codecov.io/gh/hdmf-dev/hdmf/branch/dev/graph/badge.svg :target: https://codecov.io/gh/hdmf-dev/hdmf .. image:: https://requires.io/github/hdmf-dev/hdmf/requirements.svg?branch=dev :target: https://requires.io/github/hdmf-dev/hdmf/requirements/?branch=dev :alt: Requirements Status .. image:: https://readthedocs.org/projects/hdmf/badge/?version=latest :target: https://hdmf.readthedocs.io/en/latest/?badge=latest :alt: Documentation Status Installation ============ See the HDMF documentation for details http://hdmf.readthedocs.io/en/latest/getting_started.html#installation Code of Conduct =============== This project and everyone participating in it is governed by our `code of conduct guidelines <.github/CODE_OF_CONDUCT.md>`_. By participating, you are expected to uphold this code. Contributing ============ For details on how to contribute to HDMF see our `contribution guidelines `_. LICENSE ======= "hdmf" Copyright (c) 2017-2020, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: (1) Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. (2) Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. (3) Neither the name of the University of California, Lawrence Berkeley National Laboratory, U.S. Dept. of Energy nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You are under no obligation whatsoever to provide any bug fixes, patches, or upgrades to the features, functionality or performance of the source code ("Enhancements") to anyone; however, if you choose to make your Enhancements available either publicly, or directly to Lawrence Berkeley National Laboratory, without imposing a separate written license agreement for such Enhancements, then you hereby grant the following license: a non-exclusive, royalty-free perpetual license to install, use, modify, prepare derivative works, incorporate into other computer software, distribute, and sublicense such enhancements or derivative works thereof, in binary and source code form. COPYRIGHT ========= "hdmf" Copyright (c) 2017-2020, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. If you have questions about your rights to use or distribute this software, please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov. NOTICE. This Software was developed under funding from the U.S. Department of Energy and the U.S. Government consequently retains certain rights. As such, the U.S. Government has been granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software to reproduce, distribute copies to the public, prepare derivative works, and perform publicly and display publicly, and to permit other to do so. Keywords: python HDF HDF5 cross-platform open-data data-format open-source open-science reproducible-research Platform: UNKNOWN Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: 3.8 Classifier: License :: OSI Approved :: BSD License Classifier: Development Status :: 2 - Pre-Alpha Classifier: Intended Audience :: Developers Classifier: Intended Audience :: Science/Research Classifier: Operating System :: Microsoft :: Windows Classifier: Operating System :: MacOS Classifier: Operating System :: Unix Classifier: Topic :: Scientific/Engineering :: Medical Science Apps. Description-Content-Type: text/x-rst; charset=UTF-8 ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654747.0 hdmf-1.5.4/src/hdmf.egg-info/SOURCES.txt0000644000655200065520000000553600000000000021122 0ustar00circlecicircleci00000000000000Legal.txt MANIFEST.in README.rst license.txt requirements-dev.txt requirements-doc.txt requirements-min.txt requirements.txt setup.cfg setup.py test.py tox.ini versioneer.py src/hdmf/__init__.py src/hdmf/_version.py src/hdmf/array.py src/hdmf/container.py src/hdmf/data_utils.py src/hdmf/monitor.py src/hdmf/query.py src/hdmf/region.py src/hdmf/utils.py src/hdmf.egg-info/PKG-INFO src/hdmf.egg-info/SOURCES.txt src/hdmf.egg-info/dependency_links.txt src/hdmf.egg-info/not-zip-safe src/hdmf.egg-info/requires.txt src/hdmf.egg-info/top_level.txt src/hdmf/backends/__init__.py src/hdmf/backends/io.py src/hdmf/backends/warnings.py src/hdmf/backends/hdf5/__init__.py src/hdmf/backends/hdf5/h5_utils.py src/hdmf/backends/hdf5/h5tools.py src/hdmf/build/__init__.py src/hdmf/build/builders.py src/hdmf/build/manager.py src/hdmf/build/map.py src/hdmf/build/objectmapper.py src/hdmf/build/warnings.py src/hdmf/common/__init__.py src/hdmf/common/sparse.py src/hdmf/common/table.py src/hdmf/common/hdmf-common-schema/common/namespace.yaml src/hdmf/common/hdmf-common-schema/common/sparse.yaml src/hdmf/common/hdmf-common-schema/common/table.yaml src/hdmf/common/io/__init__.py src/hdmf/common/io/table.py src/hdmf/spec/__init__.py src/hdmf/spec/catalog.py src/hdmf/spec/namespace.py src/hdmf/spec/spec.py src/hdmf/spec/write.py src/hdmf/testing/__init__.py src/hdmf/testing/testcase.py src/hdmf/validate/__init__.py src/hdmf/validate/errors.py src/hdmf/validate/validator.py tests/__init__.py tests/coloredtestrunner.py tests/coverage/runCoverage tests/unit/__init__.py tests/unit/test_container.py tests/unit/test_io_hdf5.py tests/unit/test_io_hdf5_h5tools.py tests/unit/test_query.py tests/unit/utils.py tests/unit/back_compat_tests/1.0.5.h5 tests/unit/back_compat_tests/__init__.py tests/unit/back_compat_tests/test_1_1_0.py tests/unit/build_tests/__init__.py tests/unit/build_tests/test_io_build_builders.py tests/unit/build_tests/test_io_manager.py tests/unit/build_tests/test_io_map.py tests/unit/build_tests/test_io_map_data.py tests/unit/common/__init__.py tests/unit/common/test_common.py tests/unit/common/test_sparse.py tests/unit/common/test_table.py tests/unit/spec_tests/__init__.py tests/unit/spec_tests/test_attribute_spec.py tests/unit/spec_tests/test_dataset_spec.py tests/unit/spec_tests/test_dtype_spec.py tests/unit/spec_tests/test_group_spec.py tests/unit/spec_tests/test_load_namespace.py tests/unit/spec_tests/test_ref_spec.py tests/unit/spec_tests/test_spec_catalog.py tests/unit/spec_tests/test_spec_write.py tests/unit/utils_test/__init__.py tests/unit/utils_test/test_core_DataChunk.py tests/unit/utils_test/test_core_DataChunkIterator.py tests/unit/utils_test/test_core_DataIO.py tests/unit/utils_test/test_core_ShapeValidator.py tests/unit/utils_test/test_docval.py tests/unit/utils_test/test_labelleddict.py tests/unit/validator_tests/__init__.py tests/unit/validator_tests/test_validate.py././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654747.0 hdmf-1.5.4/src/hdmf.egg-info/dependency_links.txt0000644000655200065520000000000100000000000023273 0ustar00circlecicircleci00000000000000 ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654691.0 hdmf-1.5.4/src/hdmf.egg-info/not-zip-safe0000644000655200065520000000000100000000000021453 0ustar00circlecicircleci00000000000000 ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654747.0 hdmf-1.5.4/src/hdmf.egg-info/requires.txt0000644000655200065520000000010000000000000021614 0ustar00circlecicircleci00000000000000h5py>=2.9 numpy>=1.16 scipy>=1.1 pandas>=0.23 ruamel.yaml>=0.15 ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654747.0 hdmf-1.5.4/src/hdmf.egg-info/top_level.txt0000644000655200065520000000000500000000000021752 0ustar00circlecicircleci00000000000000hdmf ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/test.py0000755000655200065520000001123100000000000015360 0ustar00circlecicircleci00000000000000#!/usr/bin/env python from __future__ import print_function import warnings import re import argparse import logging import os.path import os import sys import traceback import unittest from tests.coloredtestrunner import ColoredTestRunner, ColoredTestResult flags = {'hdmf': 1, 'integration': 3, 'example': 4} TOTAL = 0 FAILURES = 0 ERRORS = 0 def run_test_suite(directory, description="", verbose=True): global TOTAL, FAILURES, ERRORS logging.info("running %s" % description) directory = os.path.join(os.path.dirname(__file__), directory) if verbose > 1: runner = ColoredTestRunner(verbosity=verbose) else: runner = unittest.TextTestRunner(verbosity=verbose, resultclass=ColoredTestResult) test_result = runner.run(unittest.TestLoader().discover(directory)) TOTAL += test_result.testsRun FAILURES += len(test_result.failures) ERRORS += len(test_result.errors) return test_result def _import_from_file(script): import imp return imp.load_source(os.path.basename(script), script) warning_re = re.compile("Parent module '[a-zA-Z0-9]+' not found while handling absolute import") def run_example_tests(): global TOTAL, FAILURES, ERRORS logging.info('running example tests') examples_scripts = list() for root, dirs, files in os.walk(os.path.join(os.path.dirname(__file__), "docs", "gallery")): for f in files: if f.endswith(".py"): examples_scripts.append(os.path.join(root, f)) TOTAL += len(examples_scripts) for script in examples_scripts: try: logging.info("Executing %s" % script) ws = list() with warnings.catch_warnings(record=True) as tmp: _import_from_file(script) for w in tmp: # ignore RunTimeWarnings about importing if isinstance(w.message, RuntimeWarning) and not warning_re.match(str(w.message)): ws.append(w) for w in ws: warnings.showwarning(w.message, w.category, w.filename, w.lineno, w.line) except Exception: print(traceback.format_exc()) FAILURES += 1 ERRORS += 1 def main(): # setup and parse arguments parser = argparse.ArgumentParser('python test.py [options]') parser.set_defaults(verbosity=1, suites=[]) parser.add_argument('-v', '--verbose', const=2, dest='verbosity', action='store_const', help='run in verbose mode') parser.add_argument('-q', '--quiet', const=0, dest='verbosity', action='store_const', help='run disabling output') parser.add_argument('-u', '--unit', action='append_const', const=flags['hdmf'], dest='suites', help='run unit tests for hdmf package') parser.add_argument('-e', '--example', action='append_const', const=flags['example'], dest='suites', help='run example tests') args = parser.parse_args() if not args.suites: args.suites = list(flags.values()) args.suites.pop(args.suites.index(flags['example'])) # remove example as a suite run by default # set up logger root = logging.getLogger() root.setLevel(logging.DEBUG) ch = logging.StreamHandler(sys.stdout) ch.setLevel(logging.DEBUG) formatter = logging.Formatter('======================================================================\n' '%(asctime)s - %(levelname)s - %(message)s') ch.setFormatter(formatter) root.addHandler(ch) warnings.simplefilter('always') # many tests use NamespaceCatalog.add_namespace, which is deprecated, to set up tests. # ignore these warnings for now. warnings.filterwarnings("ignore", category=DeprecationWarning, module="hdmf.spec.namespace", message=("NamespaceCatalog.add_namespace has been deprecated. " "SpecNamespaces should be added with load_namespaces.")) # Run unit tests for hdmf package if flags['hdmf'] in args.suites: run_test_suite("tests/unit", "hdmf unit tests", verbose=args.verbosity) # Run example tests if flags['example'] in args.suites: run_example_tests() final_message = 'Ran %s tests' % TOTAL exitcode = 0 if ERRORS > 0 or FAILURES > 0: exitcode = 1 _list = list() if ERRORS > 0: _list.append('errors=%d' % ERRORS) if FAILURES > 0: _list.append('failures=%d' % FAILURES) final_message = '%s - FAILED (%s)' % (final_message, ','.join(_list)) else: final_message = '%s - OK' % final_message logging.info(final_message) return exitcode if __name__ == "__main__": sys.exit(main()) ././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1579654747.9201882 hdmf-1.5.4/tests/0000755000655200065520000000000000000000000015170 5ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/__init__.py0000644000655200065520000000000000000000000017267 0ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/coloredtestrunner.py0000644000655200065520000004346300000000000021335 0ustar00circlecicircleci00000000000000# -*- coding: utf-8 -*- """ A ColoredTestRunner for use with the Python unit testing framework. It generates a tabular report to show the result at a glance, with COLORS. coloredtestrunner.py was modified from code written by Vinícius Dantas and posted as a gist: https://gist.github.com/viniciusd/73e6eccd39dea5e714b1464e3c47e067 and demonstrated here: https://stackoverflow.com/questions/17162682/display-python-unittest-results-in-nice-tabular-form/31665827#31665827 The code linked above is based on HTMLTestRunner written by Wai Yip Tung. The BSD-3-Clause license covering HTMLTestRunner is below. ------------------------------------------------------------------------ Copyright (c) 2004-2007, Wai Yip Tung All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name Wai Yip Tung nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ import datetime try: from StringIO import StringIO except ImportError: from io import StringIO import sys import re import unittest import textwrap # ------------------------------------------------------------------------ # The redirectors below are used to capture output during testing. Output # sent to sys.stdout and sys.stderr are automatically captured. However # in some cases sys.stdout is already cached before HTMLTestRunner is # invoked (e.g. calling logging.basicConfig). In order to capture those # output, use the redirectors for the cached stream. # # e.g. # >>> logging.basicConfig(stream=HTMLTestRunner.stdout_redirector) # >>> class OutputRedirector(object): """ Wrapper to redirect stdout or stderr """ def __init__(self, fp): self.fp = fp def write(self, s): self.fp.write(s) def writelines(self, lines): self.fp.writelines(lines) def flush(self): self.fp.flush() class Table(object): def __init__(self, padding='', allow_newlines=False): self.__columnSize__ = [] self.__rows__ = [] self.__titles__ = None self.padding = padding self.allow_newlines = allow_newlines def __len__(self, x): return len(re.sub(r"\033\[[0-9];[0-9];[0-9]{1,2}m", "", x)) def addRow(self, row): rows = [[''] for l in range(len(row))] maxrows = 0 for i, x in enumerate(row): for j, y in enumerate(x.split("\n")): if len(y) == 0 and not self.allow_newlines: continue try: self.__columnSize__[i] = max(self.__columnSize__[i], self.__len__(y)) except IndexError: self.__columnSize__.append(self.__len__(y)) rows[i].append(y) maxrows = max(j, maxrows) for i in range(len(rows)): rows[i] += (maxrows-len(rows[i])+2)*[''] for i in range(maxrows + 1): self.__rows__.append([rows[j][i+1] for j in range(len(row))]) def addTitles(self, titles): for i, x in enumerate(titles): try: self.__columnSize__[i] = max(self.__columnSize__[i], self.__len__(x)) except IndexError: self.__columnSize__.append(self.__len__(x)) self.__titles__ = titles def __repr__(self): hline = self.padding+"+" for x in self.__columnSize__: hline += (x+2)*'-'+'+' rows = [] if self.__titles__ is None: title = "" else: if len(self.__titles__) < len(self.__columnSize__): self.__titles__ += ((len(self.__columnSize__)-len(self.__titles__))*['']) for i, x in enumerate(self.__titles__): self.__titles__[i] = x.center(self.__columnSize__[i]) title = self.padding+"| "+" | ".join(self.__titles__)+" |\n"+hline+"\n" for x in self.__rows__: if len(x) < len(self.__columnSize__): x += ((len(self.__columnSize__)-len(x))*['']) for i, c in enumerate(x): x[i] = c.ljust(self.__columnSize__[i])+(len(c)-self.__len__(c)-3)*' ' rows.append(self.padding+"| "+" | ".join(x)+" |") return hline+"\n"+title+"\n".join(rows)+"\n"+hline+"\n" class bcolors(object): FORMAT = { 'Regular': '0', 'Bold': '1', 'Underline': '4', 'High Intensity': '0', # +60 on color 'BoldHighIntensity': '1', # +60 on color } START = "\033[" COLOR = { 'black': "0;30m", 'red': "0;31m", 'green': "0;32m", 'yellow': "0;33m", 'blue': "0;34m", 'purple': "0;35m", 'cyan': "0;36m", 'white': "0;37m", 'end': "0m", } def __getattr__(self, name): def handlerFunction(*args, **kwargs): return self.START+self.FORMAT['Regular']+";"+self.COLOR[name.lower()] return handlerFunction(name=name) # ---------------------------------------------------------------------- # Template class Template_mixin(object): bc = bcolors() STATUS = { 0: bc.GREEN+'pass'+bc.END, 1: bc.PURPLE+'fail'+bc.END, 2: bc.RED+'error'+bc.END, } # ------------------------------------------------------------------------ # Report REPORT_TEST_WITH_OUTPUT_TMPL = r""" %(desc)s %(status)s %(script)s """ # variables: (tid, Class, style, desc, status) REPORT_TEST_NO_OUTPUT_TMPL = r""" %(desc)s %(status)s """ # variables: (tid, Class, style, desc, status) REPORT_TEST_OUTPUT_TMPL = r""" %(output)s """ # variables: (id, output) class ColoredTestResult(unittest.TextTestResult): stdout_redirector = OutputRedirector(sys.stdout) stderr_redirector = OutputRedirector(sys.stderr) def __init__(self, stream, descriptions, verbosity=1): super().__init__(stream, descriptions, verbosity) self.stdout0 = None self.stderr0 = None self.success_count = 0 self.failure_count = 0 self.error_count = 0 self.skip_count = 0 self.verbosity = verbosity # deny TextTestResult showAll functionality self.showAll = False # result is a list of result in 4 tuple # ( # result code (0: success; 1: fail; 2: error), # TestCase object, # Test output (byte string), # stack trace, # ) self.result = [] def startTest(self, test): super().startTest(test) # just one buffer for both stdout and stderr self.outputBuffer = StringIO() self.stdout_redirector.fp = self.outputBuffer self.stderr_redirector.fp = self.outputBuffer self.stdout0 = sys.stdout self.stderr0 = sys.stderr sys.stdout = self.stdout_redirector sys.stderr = self.stderr_redirector def complete_output(self): """ Disconnect output redirection and return buffer. Safe to call multiple times. """ if self.stdout0: sys.stdout = self.stdout0 sys.stderr = self.stderr0 self.stdout0 = None self.stderr0 = None return self.outputBuffer.getvalue() def stopTest(self, test): # Usually one of addSuccess, addError or addFailure would have been called. # But there are some path in unittest that would bypass this. # We must disconnect stdout in stopTest(), which is guaranteed to be called. self.complete_output() def addSuccess(self, test): self.success_count += 1 super().addSuccess(test) output = self.complete_output() self.result.append((0, test, output, '')) sys.stdout.write('.') sys.stdout.flush() if not hasattr(self, 'successes'): self.successes = [test] else: self.successes.append(test) def addError(self, test, err): self.error_count += 1 super().addError(test, err) output = self.complete_output() _, _exc_str = self.errors[-1] self.result.append((2, test, output, _exc_str)) sys.stdout.write('E') sys.stdout.flush() def addFailure(self, test, err): self.failure_count += 1 super().addFailure(test, err) output = self.complete_output() _, _exc_str = self.failures[-1] self.result.append((1, test, output, _exc_str)) sys.stdout.write('F') sys.stdout.flush() def addSubTest(self, test, subtest, err): if err is not None: if issubclass(err[0], test.failureException): self.addFailure(subtest, err) else: self.addError(subtest, err) def addSkip(self, test, reason): self.skip_count += 1 super().addSkip(test, reason) self.complete_output() sys.stdout.write('s') sys.stdout.flush() def get_all_cases_run(self): '''Return a list of each test case which failed or succeeded ''' cases = [] if hasattr(self, 'successes'): cases.extend(self.successes) cases.extend([failure[0] for failure in self.failures]) return cases class ColoredTestRunner(Template_mixin): def __init__(self, stream=sys.stdout, verbosity=1, title=None, description=None): self.stream = stream self.verbosity = verbosity if title is None: self.title = '' else: self.title = title if description is None: self.description = '' else: self.description = description self.startTime = datetime.datetime.now() self.bc = bcolors() self.desc_width = 40 self.output_width = 60 def run(self, test): "Run the given test case or test suite." result = ColoredTestResult(stream=self.stream, descriptions=True, verbosity=self.verbosity) test(result) self.stopTime = datetime.datetime.now() self.generateReport(test, result) return result def sortResult(self, result_list): # unittest does not seems to run in any particular order. # Here at least we want to group them together by class. rmap = {} classes = [] for n, test, output, error in result_list: testClass = test.__class__ if testClass not in rmap: rmap[testClass] = [] classes.append(testClass) rmap[testClass].append((n, test, output, error)) r = [(testClass, rmap[testClass]) for testClass in classes] return r def getReportAttributes(self, result): """ Return report attributes as a list of (name, value). Override this to add custom attributes. """ startTime = str(self.startTime)[:19] duration = str(self.stopTime - self.startTime) status = [] padding = 4 * ' ' if result.success_count: status.append(padding + self.bc.GREEN + 'Pass:' + self.bc.END + ' %s\n' % result.success_count) if result.failure_count: status.append(padding + self.bc.PURPLE + 'Failure:' + self.bc.END + ' %s\n' % result.failure_count) if result.error_count: status.append(padding + self.bc.RED + 'Error:' + self.bc.END + ' %s\n' % result.error_count) if status: status = '\n'+''.join(status) else: status = 'none' return [ ('Start Time', startTime), ('Duration', duration), ('Status', status), ] def generateReport(self, test, result): report_attrs = self.getReportAttributes(result) heading = self._generate_heading(report_attrs) report = self._generate_report(result) output = "\n" + self.title.rjust(30) + "\n" + heading + report try: self.stream.write(output.encode('utf8')) except TypeError: self.stream.write(output) def _generate_heading(self, report_attrs): a_lines = [] for name, value in report_attrs: line = self.bc.CYAN + name + ": " + self.bc.END + value + "\n" a_lines.append(line) heading = ''.join(a_lines) + self.bc.CYAN + "Description:" + self.bc.END + self.description + "\n" return heading def _generate_report(self, result): sortedResult = self.sortResult(result.result) padding = 4 * ' ' table = Table(padding=padding, allow_newlines=True) table.addTitles(["Test group/Test case", "Count", "Pass", "Fail", "Error"]) tests = '' for cid, (testClass, classResults) in enumerate(sortedResult): # Iterate over the test cases classTable = Table(padding=2*padding) classTable.addTitles(["Test name", "Output", "Status"]) # subtotal for a class np = nf = ne = 0 for n, test, output, error in classResults: if n == 0: np += 1 elif n == 1: nf += 1 else: ne += 1 # format class description if testClass.__module__ == "__main__": name = testClass.__name__ else: name = "%s.%s" % (testClass.__module__, testClass.__name__) tests += padding + name + "\n" doc = testClass.__doc__ and testClass.__doc__.split("\n")[0] or "" if doc: doc = self._indent(self._wrap_text(doc, width=self.output_width - 4), 4) desc = doc and ('%s:\n%s' % (name, doc)) or name table.addRow([self._wrap_text(desc, width=self.desc_width), str(np + nf + ne), str(np), str(nf), str(ne)]) for tid, (n, test, output, error) in enumerate(classResults): # Iterate over the unit tests classTable.addRow(self._generate_report_test(cid, tid, n, test, output, error)) tests += str(classTable) for tid, (n, test, output, error) in enumerate(classResults): # Iterate over the unit tests if error: tests += self._indent(self.bc.RED + "ERROR in test %s:" % test + self.bc.END, 2) tests += "\n" + self._indent(error, 2) + "\n" table.addRow([self.desc_width * '-', '----', '----', '----', '----']) table.addRow(["Total", str(result.success_count + result.failure_count + result.error_count), str(result.success_count), str(result.failure_count), str(result.error_count)]) report = self.bc.CYAN + "Summary: " + self.bc.END + "\n" + str(table) + tests return report def _generate_report_test(self, cid, tid, n, test, output, error): name = test.id().split('.')[-1] doc = test.shortDescription() or "" if doc: doc = self._indent(self._wrap_text(doc, width=self.output_width - 4), 4) desc = doc and ('%s:\n%s' % (name, doc)) or name # o and e should be byte string because they are collected from stdout and stderr? if isinstance(output, str): # TODO: some problem with 'string_escape': it escape \n and mess up formating # uo = unicode(o.encode('string_escape')) try: uo = output.decode('latin-1') except AttributeError: uo = output else: uo = output if isinstance(error, str): # TODO: some problem with 'string_escape': it escape \n and mess up formating # ue = unicode(e.encode('string_escape')) try: ue = error.decode('latin-1') except AttributeError: ue = error else: ue = error # just print the last line of any error if "\n" in ue: ue = ue.splitlines()[-1] script = self.REPORT_TEST_OUTPUT_TMPL % dict( output=self._wrap_text(uo, width=self.output_width) + self._wrap_text(ue, width=self.output_width), ) row = [desc, script, self.STATUS[n]] return row @staticmethod def _wrap_text(text, width): """Wrap text to a given width but preserve line breaks """ # https://stackoverflow.com/a/26538082 return '\n'.join(['\n'.join(textwrap.wrap(line, width, break_long_words=False, replace_whitespace=False)) for line in text.splitlines() if line.strip() != '']) @staticmethod def _indent(text, amount): """Indent text by a particular number of spaces on each line """ try: return textwrap.indent(text, amount * ' ') except AttributeError: # undefined function (indent wasn't added until Python 3.3) return ''.join((amount * ' ') + line for line in text.splitlines(True)) ././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1579654747.9201882 hdmf-1.5.4/tests/coverage/0000755000655200065520000000000000000000000016763 5ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/coverage/runCoverage0000755000655200065520000000044600000000000021175 0ustar00circlecicircleci00000000000000#!/bin/ksh # use default coverage name COV=coverage3 cd ../.. echo "" echo "Running Tests with Coverage:" ${COV} run --source=. test.py echo "" echo "Creating HTML output:" ${COV} html -d tests/coverage/htmlcov echo "" echo "Open /coverage/htmlcov/index.html to see results." echo "" ././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1579654747.9201882 hdmf-1.5.4/tests/unit/0000755000655200065520000000000000000000000016147 5ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/__init__.py0000644000655200065520000000000000000000000020246 0ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1579654747.9201882 hdmf-1.5.4/tests/unit/back_compat_tests/0000755000655200065520000000000000000000000021634 5ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/back_compat_tests/1.0.5.h50000644000655200065520000004312000000000000022533 0ustar00circlecicircleci00000000000000HDF  PF`h6TREEHEAPX bucketsspecifications8HhTREE HEAPXtest_bucket@SNOD Hh`%87X95TREE HEAPX foo_holder@SNOD(Pp X x TREEHEAPX foo1foo2@SNOD0 X x XTREEPHEAPXmy_dataHSNOD8`8! 79] 0 attr2 HSNOD@ ]% [% ` @attr1 %+ 8attr3 ?@4 4Q @ H data_type%, H namespace %-$XTREE"HEAPXX!my_dataH 79] 0 attr2 "HSNOD!8! @attr1 %. 8attr3 ?@4 4Q@ H data_type%/ H namespace %087X9GCOL I am foo1 test_coreFoo I am foo2 test_coreFoo test_core FooBucket test_core FooFile ]{"groups":[{"datasets":[{"dtype":"int","name":"my_data","doc":"an example dataset","attributes":[{"name":"attr2","doc":"an example integer attribute","dtype":"int"}]}],"doc":"A test group specification with a data type","data_type_def":"Foo","attributes":[{"name":"attr1","doc":"an example string attribute","dtype":"text"},{"name":"attr3","doc":"an example float attribute","dtype":"float"}]},{"groups":[{"groups":[{"doc":"the Foos in this bucket","quantity":"*","data_type_inc":"Foo"}],"name":"foo_holder","doc":"A subgroup for Foos"}],"doc":"A test group specification for a data type containing data type","data_type_def":"FooBucket"},{"groups":[{"groups":[{"doc":"One or more FooBuckets","quantity":"+","data_type_inc":"FooBucket"}],"name":"buckets","doc":"Holds the FooBuckets"}],"doc":"A file of Foos contained in FooBuckets","data_type_def":"FooFile"}]} [{"namespaces":[{"doc":"a test namespace","schema":[{"source":"test"}],"name":"test_core"}]} I am foo1Foo test_core I am foo2Foo test_core FooBucket test_coreFooFile test_core I am foo1Foo test_core I am foo2Foo test_core FooBucket test_coreFooFile test_core! I am foo1"Foo# test_core$ I am foo2%Foo& test_core' FooBucket( test_core)FooFile* test_core+ I am foo1,Foo- test_core. I am foo2/Foo0 test_core1 FooBucket2 test_core3FooFile4 test_coreHPp H data_type %1 H namespace %2 0 .specloc`%E H data_type%3TREE<HEAPXx9test_core@ 0 .specloc`%H:h<TREE@HEAPX<unversioned@SNOD :H:h<P>p@TREE@CHEAPX @testnamespace8SNOD(>P>p@79]SNODD0B79] H namespace %4`././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/back_compat_tests/__init__.py0000644000655200065520000000000000000000000023733 0ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/back_compat_tests/test_1_1_0.py0000644000655200065520000000432600000000000024051 0ustar00circlecicircleci00000000000000from shutil import copyfile import os from hdmf.backends.hdf5.h5tools import HDF5IO from tests.unit.test_io_hdf5_h5tools import _get_manager from tests.unit.utils import Foo, FooBucket from hdmf.testing import TestCase class Test1_1_0(TestCase): def setUp(self): # created using manager in test_io_hdf5_h5tools self.orig_1_0_5 = 'tests/unit/back_compat_tests/1.0.5.h5' self.path_1_0_5 = 'test_1.0.5.h5' copyfile(self.orig_1_0_5, self.path_1_0_5) # note: this may break if the current manager is different from the old manager # better to save a spec file self.manager = _get_manager() def tearDown(self): if os.path.exists(self.path_1_0_5): os.remove(self.path_1_0_5) def test_read_1_0_5(self): '''Test whether we can read files made by hdmf version 1.0.5''' with HDF5IO(self.path_1_0_5, manager=self.manager, mode='r') as io: read_foofile = io.read() self.assertTrue(len(read_foofile.buckets) == 1) # workaround for the fact that order of foos is not maintained for foo in read_foofile.buckets[0].foos: if foo.name == 'foo1': self.assertListEqual(foo.my_data[:].tolist(), [0, 1, 2, 3, 4]) if foo.name == 'foo2': self.assertListEqual(foo.my_data[:].tolist(), [5, 6, 7, 8, 9]) def test_append_1_0_5(self): '''Test whether we can append to files made by hdmf version 1.0.5''' foo = Foo('foo3', [10, 20, 30, 40, 50], "I am foo3", 17, 3.14) foobucket = FooBucket('foobucket2', [foo]) with HDF5IO(self.path_1_0_5, manager=self.manager, mode='a') as io: read_foofile = io.read() read_foofile.buckets.append(foobucket) foobucket.parent = read_foofile io.write(read_foofile) with HDF5IO(self.path_1_0_5, manager=self.manager, mode='r') as io: read_foofile = io.read() # workaround for the fact that order of buckets is not maintained for bucket in read_foofile.buckets: if bucket.name == 'foobucket2': self.assertListEqual(bucket.foos[0].my_data[:].tolist(), foo.my_data) ././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1579654747.9241881 hdmf-1.5.4/tests/unit/build_tests/0000755000655200065520000000000000000000000020470 5ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/build_tests/__init__.py0000644000655200065520000000000000000000000022567 0ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/build_tests/test_io_build_builders.py0000644000655200065520000002673500000000000025575 0ustar00circlecicircleci00000000000000from hdmf.build import GroupBuilder, DatasetBuilder, LinkBuilder from hdmf.testing import TestCase class GroupBuilderSetterTests(TestCase): """Tests for setter functions in GroupBuilder class""" def setUp(self): self.gb = GroupBuilder('gb') self.gb2 = GroupBuilder('gb2', source='file1') def tearDown(self): pass def test_setitem_disabled(self): """Test __set_item__ is disabled""" with self.assertRaises(NotImplementedError): self.gb['key'] = 'value' def test_add_dataset(self): ds = self.gb.add_dataset('my_dataset', list(range(10))) self.assertIsInstance(ds, DatasetBuilder) self.assertIs(self.gb, ds.parent) def test_add_group(self): gp = self.gb.add_group('my_subgroup') self.assertIsInstance(gp, GroupBuilder) self.assertIs(self.gb['my_subgroup'], gp) self.assertIs(self.gb, gp.parent) def test_add_link(self): gp = self.gb.add_group('my_subgroup') sl = self.gb.add_link(gp, 'my_link') self.assertIsInstance(sl, LinkBuilder) self.assertIs(self.gb['my_link'], sl) self.assertIs(self.gb, sl.parent) def test_add_external_link(self): gp = self.gb2.add_group('my_subgroup') el = self.gb.add_link(gp, 'my_externallink') self.assertIsInstance(el, LinkBuilder) self.assertIs(self.gb['my_externallink'], el) self.assertIs(self.gb, el.parent) self.assertIs(self.gb2, gp.parent) # @unittest.expectedFailure def test_set_attribute(self): self.gb.set_attribute('key', 'value') self.assertIn('key', self.gb.obj_type) # self.assertEqual(dict.__getitem__(self.gb, 'attributes')['key'], 'value') self.assertEqual(self.gb['key'], 'value') def test_parent_constructor(self): gb2 = GroupBuilder('gb2', parent=self.gb) self.assertIs(gb2.parent, self.gb) def test_set_group(self): self.gb.set_group(self.gb2) self.assertIs(self.gb2.parent, self.gb) class GroupBuilderGetterTests(TestCase): def setUp(self): self.subgroup1 = GroupBuilder('subgroup1') self.dataset1 = DatasetBuilder('dataset1', list(range(10))) self.soft_link1 = LinkBuilder(self.subgroup1, 'soft_link1') self.int_attr = 1 self.str_attr = "my_str" self.group1 = GroupBuilder('group1', {'subgroup1': self.subgroup1}) self.gb = GroupBuilder('gb', {'group1': self.group1}, {'dataset1': self.dataset1}, {'int_attr': self.int_attr, 'str_attr': self.str_attr}, {'soft_link1': self.soft_link1}) # {'soft_link1': self.soft_link1, # 'external_link1': self.external_link1})) def tearDown(self): pass def test_path(self): self.assertEqual(self.subgroup1.path, 'gb/group1/subgroup1') self.assertEqual(self.dataset1.path, 'gb/dataset1') self.assertEqual(self.soft_link1.path, 'gb/soft_link1') self.assertEqual(self.group1.path, 'gb/group1') self.assertEqual(self.gb.path, 'gb') def test_get_item_group(self): """Test __get_item__ for groups""" self.assertIs(self.gb['group1'], self.group1) def test_get_item_group_subgroup1(self): """Test __get_item__ for groups deeper in hierarchy""" self.assertIs(self.gb['group1/subgroup1'], self.subgroup1) def test_get_item_dataset(self): """Test __get_item__ for datasets""" self.assertIs(self.gb['dataset1'], self.dataset1) def test_get_item_attr1(self): """Test __get_item__ for attributes""" self.assertEqual(self.gb['int_attr'], self.int_attr) def test_get_item_attr2(self): """Test __get_item__ for attributes""" self.assertEqual(self.gb['str_attr'], self.str_attr) def test_get_item_invalid_key(self): """Test __get_item__ for invalid key""" with self.assertRaises(KeyError): self.gb['invalid_key'] def test_get_item_soft_link(self): """Test __get_item__ for soft links""" self.assertIs(self.gb['soft_link1'], self.soft_link1) def test_get_group(self): """Test get() for groups""" self.assertIs(self.gb.get('group1'), self.group1) def test_get_group_subgroup1(self): """Test get() for groups deeper in hierarchy""" self.assertIs(self.gb.get('group1/subgroup1'), self.subgroup1) def test_get_dataset(self): """Test get() for datasets""" self.assertIs(self.gb.get('dataset1'), self.dataset1) def test_get_attr1(self): """Test get() for attributes""" self.assertEqual(self.gb.get('int_attr'), self.int_attr) def test_get_attr2(self): """Test get() for attributes""" self.assertEqual(self.gb.get('str_attr'), self.str_attr) def test_get_item_fcn_soft_link(self): """Test get() for soft links""" self.assertIs(self.gb.get('soft_link1'), self.soft_link1) def test_get_invalid_key(self): """Test get() for invalid key""" self.assertIs(self.gb.get('invalid_key'), None) def test_items(self): """Test items()""" items = ( ('group1', self.group1), ('dataset1', self.dataset1), ('int_attr', self.int_attr), ('str_attr', self.str_attr), ('soft_link1', self.soft_link1), # ('external_link1', self.external_link1) ) # self.assertSetEqual(items, set(self.gb.items())) try: self.assertCountEqual(items, self.gb.items()) except AttributeError: self.assertItemsEqual(items, self.gb.items()) def test_keys(self): """Test keys()""" keys = ( 'group1', 'dataset1', 'int_attr', 'str_attr', 'soft_link1', # 'external_link1', ) try: self.assertCountEqual(keys, self.gb.keys()) except AttributeError: self.assertItemsEqual(keys, self.gb.keys()) def test_values(self): """Test values()""" values = ( self.group1, self.dataset1, self.int_attr, self.str_attr, self.soft_link1, # self.external_link1, ) try: self.assertCountEqual(values, self.gb.values()) except AttributeError: self.assertItemsEqual(values, self.gb.values()) class GroupBuilderIsEmptyTests(TestCase): def test_is_empty_true(self): """Test empty when group has nothing in it""" gb = GroupBuilder('gb') self.assertEqual(gb.is_empty(), True) def test_is_empty_true_group(self): """Test is_empty() when group has an empty subgroup""" gb = GroupBuilder('gb', {'my_subgroup': GroupBuilder('my_subgroup')}) self.assertEqual(gb.is_empty(), True) def test_is_empty_false_dataset(self): """Test is_empty() when group has a dataset""" gb = GroupBuilder('gb', datasets={'my_dataset': DatasetBuilder('my_dataset')}) self.assertEqual(gb.is_empty(), False) def test_is_empty_false_group_dataset(self): """Test is_empty() when group has a subgroup with a dataset""" gb = GroupBuilder( 'gb', {'my_subgroup': GroupBuilder( 'my_subgroup', datasets={'my_dataset': DatasetBuilder('my_dataset')})}) self.assertEqual(gb.is_empty(), False) def test_is_empty_false_attribute(self): """Test is_empty() when group has an attribute""" gb = GroupBuilder('gb', attributes={'my_attr': 'attr_value'}) self.assertEqual(gb.is_empty(), False) def test_is_empty_false_group_attribute(self): """Test is_empty() when group has subgroup with an attribute""" gb = GroupBuilder('gb', {'my_subgroup': GroupBuilder('my_subgroup', attributes={'my_attr': 'attr_value'})}) self.assertEqual(gb.is_empty(), False) class GroupBuilderDeepUpdateTests(TestCase): def test_mutually_exclusive_subgroups(self): gb1 = GroupBuilder('gb1', {'subgroup1': GroupBuilder('subgroup1')}) gb2 = GroupBuilder('gb2', {'subgroup2': GroupBuilder('subgroup2')}) gb1.deep_update(gb2) self.assertIn('subgroup2', gb1) gb1sg = gb1['subgroup2'] gb2sg = gb2['subgroup2'] self.assertIs(gb1sg, gb2sg) def test_mutually_exclusive_datasets(self): gb1 = GroupBuilder('gb1', datasets={'dataset1': DatasetBuilder('dataset1', [1, 2, 3])}) gb2 = GroupBuilder('gb2', datasets={'dataset2': DatasetBuilder('dataset2', [4, 5, 6])}) gb1.deep_update(gb2) self.assertIn('dataset2', gb1) # self.assertIs(gb1['dataset2'], gb2['dataset2']) self.assertListEqual(gb1['dataset2'].data, gb2['dataset2'].data) def test_mutually_exclusive_attributes(self): gb1 = GroupBuilder('gb1', attributes={'attr1': 'my_attribute1'}) gb2 = GroupBuilder('gb2', attributes={'attr2': 'my_attribute2'}) gb1.deep_update(gb2) self.assertIn('attr2', gb2) self.assertEqual(gb2['attr2'], 'my_attribute2') def test_mutually_exclusive_links(self): gb1 = GroupBuilder('gb1', links={'link1': LinkBuilder(GroupBuilder('target1'), 'link1')}) gb2 = GroupBuilder('gb2', links={'link2': LinkBuilder(GroupBuilder('target2'), 'link2')}) gb1.deep_update(gb2) self.assertIn('link2', gb2) self.assertEqual(gb1['link2'], gb2['link2']) def test_intersecting_subgroups(self): subgroup2 = GroupBuilder('subgroup2') gb1 = GroupBuilder('gb1', {'subgroup1': GroupBuilder('subgroup1'), 'subgroup2': subgroup2}) gb2 = GroupBuilder('gb2', {'subgroup2': GroupBuilder('subgroup2'), 'subgroup3': GroupBuilder('subgroup3')}) gb1.deep_update(gb2) self.assertIn('subgroup3', gb1) self.assertIs(gb1['subgroup3'], gb2['subgroup3']) self.assertIs(gb1['subgroup2'], subgroup2) def test_intersecting_datasets(self): gb1 = GroupBuilder('gb1', datasets={'dataset2': DatasetBuilder('dataset2', [1, 2, 3])}) gb2 = GroupBuilder('gb2', datasets={'dataset2': DatasetBuilder('dataset2', [4, 5, 6])}) gb1.deep_update(gb2) self.assertIn('dataset2', gb1) self.assertListEqual(gb1['dataset2'].data, gb2['dataset2'].data) def test_intersecting_attributes(self): gb1 = GroupBuilder('gb1', attributes={'attr2': 'my_attribute1'}) gb2 = GroupBuilder('gb2', attributes={'attr2': 'my_attribute2'}) gb1.deep_update(gb2) self.assertIn('attr2', gb2) self.assertEqual(gb2['attr2'], 'my_attribute2') def test_intersecting_links(self): gb1 = GroupBuilder('gb1', links={'link2': LinkBuilder(GroupBuilder('target1'), 'link2')}) gb2 = GroupBuilder('gb2', links={'link2': LinkBuilder(GroupBuilder('target2'), 'link2')}) gb1.deep_update(gb2) self.assertIn('link2', gb2) self.assertEqual(gb1['link2'], gb2['link2']) class DatasetBuilderDeepUpdateTests(TestCase): def test_overwrite(self): db1 = DatasetBuilder('db1', [1, 2, 3]) db2 = DatasetBuilder('db2', [4, 5, 6]) db1.deep_update(db2) self.assertListEqual(db1.data, db2.data) def test_no_overwrite(self): db1 = DatasetBuilder('db1', [1, 2, 3]) db2 = DatasetBuilder('db2', [4, 5, 6], attributes={'attr1': 'va1'}) db1.deep_update(db2) self.assertListEqual(db1.data, db2.data) self.assertIn('attr1', db1.attributes) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/build_tests/test_io_manager.py0000644000655200065520000003024500000000000024206 0ustar00circlecicircleci00000000000000from hdmf.spec import GroupSpec, AttributeSpec, DatasetSpec, SpecCatalog, SpecNamespace, NamespaceCatalog from hdmf.spec.spec import ZERO_OR_MANY from hdmf.build import GroupBuilder, DatasetBuilder from hdmf.build import ObjectMapper, BuildManager, TypeMap from hdmf.testing import TestCase from abc import ABCMeta, abstractmethod from tests.unit.utils import Foo, FooBucket, CORE_NAMESPACE class FooMapper(ObjectMapper): """Maps nested 'attr2' attribute on dataset 'my_data' to Foo.attr2 in constructor and attribute map """ def __init__(self, spec): super().__init__(spec) my_data_spec = spec.get_dataset('my_data') self.map_spec('attr2', my_data_spec.get_attribute('attr2')) class TestBase(TestCase): def setUp(self): self.foo_spec = GroupSpec('A test group specification with a data type', data_type_def='Foo', datasets=[DatasetSpec( 'an example dataset', 'int', name='my_data', attributes=[AttributeSpec( 'attr2', 'an example integer attribute', 'int')])], attributes=[AttributeSpec('attr1', 'an example string attribute', 'text')]) self.spec_catalog = SpecCatalog() self.spec_catalog.register_spec(self.foo_spec, 'test.yaml') self.namespace = SpecNamespace( 'a test namespace', CORE_NAMESPACE, [{'source': 'test.yaml'}], version='0.1.0', catalog=self.spec_catalog) self.namespace_catalog = NamespaceCatalog() self.namespace_catalog.add_namespace(CORE_NAMESPACE, self.namespace) self.type_map = TypeMap(self.namespace_catalog) self.type_map.register_container_type(CORE_NAMESPACE, 'Foo', Foo) self.type_map.register_map(Foo, FooMapper) self.manager = BuildManager(self.type_map) class TestBuildManager(TestBase): def test_build(self): container_inst = Foo('my_foo', list(range(10)), 'value1', 10) expected = GroupBuilder( 'my_foo', datasets={ 'my_data': DatasetBuilder( 'my_data', list(range(10)), attributes={'attr2': 10})}, attributes={'attr1': 'value1', 'namespace': CORE_NAMESPACE, 'data_type': 'Foo', 'object_id': container_inst.object_id}) builder1 = self.manager.build(container_inst) self.assertDictEqual(builder1, expected) def test_build_memoization(self): container_inst = Foo('my_foo', list(range(10)), 'value1', 10) expected = GroupBuilder( 'my_foo', datasets={ 'my_data': DatasetBuilder( 'my_data', list(range(10)), attributes={'attr2': 10})}, attributes={'attr1': 'value1', 'namespace': CORE_NAMESPACE, 'data_type': 'Foo', 'object_id': container_inst.object_id}) builder1 = self.manager.build(container_inst) builder2 = self.manager.build(container_inst) self.assertDictEqual(builder1, expected) self.assertIs(builder1, builder2) def test_construct(self): builder = GroupBuilder( 'my_foo', datasets={ 'my_data': DatasetBuilder( 'my_data', list(range(10)), attributes={'attr2': 10})}, attributes={'attr1': 'value1', 'namespace': CORE_NAMESPACE, 'data_type': 'Foo', 'object_id': -1}) container = self.manager.construct(builder) self.assertListEqual(container.my_data, list(range(10))) self.assertEqual(container.attr1, 'value1') self.assertEqual(container.attr2, 10) def test_construct_memoization(self): builder = GroupBuilder( 'my_foo', datasets={'my_data': DatasetBuilder( 'my_data', list(range(10)), attributes={'attr2': 10})}, attributes={'attr1': 'value1', 'namespace': CORE_NAMESPACE, 'data_type': 'Foo', 'object_id': -1}) container1 = self.manager.construct(builder) container2 = self.manager.construct(builder) self.assertIs(container1, container2) class NestedBaseMixin(metaclass=ABCMeta): def setUp(self): super().setUp() self.foo_bucket = FooBucket('test_foo_bucket', [ Foo('my_foo1', list(range(10)), 'value1', 10), Foo('my_foo2', list(range(10, 20)), 'value2', 20)]) self.foo_builders = { 'my_foo1': GroupBuilder('my_foo1', datasets={'my_data': DatasetBuilder( 'my_data', list(range(10)), attributes={'attr2': 10})}, attributes={'attr1': 'value1', 'namespace': CORE_NAMESPACE, 'data_type': 'Foo', 'object_id': self.foo_bucket.foos[0].object_id}), 'my_foo2': GroupBuilder('my_foo2', datasets={'my_data': DatasetBuilder( 'my_data', list(range(10, 20)), attributes={'attr2': 20})}, attributes={'attr1': 'value2', 'namespace': CORE_NAMESPACE, 'data_type': 'Foo', 'object_id': self.foo_bucket.foos[1].object_id}) } self.setUpBucketBuilder() self.setUpBucketSpec() self.spec_catalog.register_spec(self.bucket_spec, 'test.yaml') self.type_map.register_container_type(CORE_NAMESPACE, 'FooBucket', FooBucket) self.type_map.register_map(FooBucket, self.setUpBucketMapper()) self.manager = BuildManager(self.type_map) @abstractmethod def setUpBucketBuilder(self): raise NotImplementedError('Cannot run test unless setUpBucketBuilder is implemented') @abstractmethod def setUpBucketSpec(self): raise NotImplementedError('Cannot run test unless setUpBucketSpec is implemented') @abstractmethod def setUpBucketMapper(self): raise NotImplementedError('Cannot run test unless setUpBucketMapper is implemented') def test_build(self): ''' Test default mapping for an Container that has an Container as an attribute value ''' builder = self.manager.build(self.foo_bucket) self.assertDictEqual(builder, self.bucket_builder) def test_construct(self): container = self.manager.construct(self.bucket_builder) self.assertEqual(container, self.foo_bucket) class TestNestedContainersNoSubgroups(NestedBaseMixin, TestBase): ''' Test BuildManager.build and BuildManager.construct when the Container contains other Containers, but does not keep them in additional subgroups ''' def setUpBucketBuilder(self): self.bucket_builder = GroupBuilder( 'test_foo_bucket', groups=self.foo_builders, attributes={'namespace': CORE_NAMESPACE, 'data_type': 'FooBucket', 'object_id': self.foo_bucket.object_id}) def setUpBucketSpec(self): self.bucket_spec = GroupSpec('A test group specification for a data type containing data type', name="test_foo_bucket", data_type_def='FooBucket', groups=[GroupSpec( 'the Foos in this bucket', data_type_inc='Foo', quantity=ZERO_OR_MANY)]) def setUpBucketMapper(self): return ObjectMapper class TestNestedContainersSubgroup(NestedBaseMixin, TestBase): ''' Test BuildManager.build and BuildManager.construct when the Container contains other Containers that are stored in a subgroup ''' def setUpBucketBuilder(self): tmp_builder = GroupBuilder('foo_holder', groups=self.foo_builders) self.bucket_builder = GroupBuilder( 'test_foo_bucket', groups={'foos': tmp_builder}, attributes={'namespace': CORE_NAMESPACE, 'data_type': 'FooBucket', 'object_id': self.foo_bucket.object_id}) def setUpBucketSpec(self): tmp_spec = GroupSpec( 'A subgroup for Foos', name='foo_holder', groups=[GroupSpec('the Foos in this bucket', data_type_inc='Foo', quantity=ZERO_OR_MANY)]) self.bucket_spec = GroupSpec('A test group specification for a data type containing data type', name="test_foo_bucket", data_type_def='FooBucket', groups=[tmp_spec]) def setUpBucketMapper(self): class BucketMapper(ObjectMapper): def __init__(self, spec): super().__init__(spec) self.unmap(spec.get_group('foo_holder')) self.map_spec('foos', spec.get_group('foo_holder').get_data_type('Foo')) return BucketMapper class TestNestedContainersSubgroupSubgroup(NestedBaseMixin, TestBase): ''' Test BuildManager.build and BuildManager.construct when the Container contains other Containers that are stored in a subgroup in a subgroup ''' def setUpBucketBuilder(self): tmp_builder = GroupBuilder('foo_holder', groups=self.foo_builders) tmp_builder = GroupBuilder('foo_holder_holder', groups={'foo_holder': tmp_builder}) self.bucket_builder = GroupBuilder( 'test_foo_bucket', groups={'foo_holder': tmp_builder}, attributes={'namespace': CORE_NAMESPACE, 'data_type': 'FooBucket', 'object_id': self.foo_bucket.object_id}) def setUpBucketSpec(self): tmp_spec = GroupSpec('A subgroup for Foos', name='foo_holder', groups=[GroupSpec('the Foos in this bucket', data_type_inc='Foo', quantity=ZERO_OR_MANY)]) tmp_spec = GroupSpec('A subgroup to hold the subgroup', name='foo_holder_holder', groups=[tmp_spec]) self.bucket_spec = GroupSpec('A test group specification for a data type containing data type', name="test_foo_bucket", data_type_def='FooBucket', groups=[tmp_spec]) def setUpBucketMapper(self): class BucketMapper(ObjectMapper): def __init__(self, spec): super().__init__(spec) self.unmap(spec.get_group('foo_holder_holder')) self.unmap(spec.get_group('foo_holder_holder').get_group('foo_holder')) self.map_spec('foos', spec.get_group('foo_holder_holder').get_group('foo_holder').get_data_type('Foo')) return BucketMapper class TestTypeMap(TestBase): def test_get_ns_dt_missing(self): bldr = GroupBuilder('my_foo', attributes={'attr1': 'value1'}) dt = self.type_map.get_builder_dt(bldr) ns = self.type_map.get_builder_ns(bldr) self.assertIsNone(dt) self.assertIsNone(ns) def test_get_ns_dt(self): bldr = GroupBuilder('my_foo', attributes={'attr1': 'value1', 'namespace': 'CORE', 'data_type': 'Foo', 'object_id': -1}) dt = self.type_map.get_builder_dt(bldr) ns = self.type_map.get_builder_ns(bldr) self.assertEqual(dt, 'Foo') self.assertEqual(ns, 'CORE') # TODO: class TestWildCardNamedSpecs(TestCase): pass ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/build_tests/test_io_map.py0000644000655200065520000010632700000000000023356 0ustar00circlecicircleci00000000000000from hdmf.spec import GroupSpec, AttributeSpec, DatasetSpec, SpecCatalog, SpecNamespace, NamespaceCatalog, RefSpec from hdmf.build import GroupBuilder, DatasetBuilder, ObjectMapper, BuildManager, TypeMap, LinkBuilder from hdmf import Container from hdmf.utils import docval, getargs, get_docval from hdmf.data_utils import DataChunkIterator from hdmf.backends.hdf5 import H5DataIO from hdmf.testing import TestCase from abc import ABCMeta, abstractmethod import numpy as np from tests.unit.utils import CORE_NAMESPACE class Bar(Container): @docval({'name': 'name', 'type': str, 'doc': 'the name of this Bar'}, {'name': 'data', 'type': ('data', 'array_data'), 'doc': 'some data'}, {'name': 'attr1', 'type': str, 'doc': 'an attribute'}, {'name': 'attr2', 'type': int, 'doc': 'another attribute'}, {'name': 'attr3', 'type': float, 'doc': 'a third attribute', 'default': 3.14}, {'name': 'foo', 'type': 'Foo', 'doc': 'a group', 'default': None}) def __init__(self, **kwargs): name, data, attr1, attr2, attr3, foo = getargs('name', 'data', 'attr1', 'attr2', 'attr3', 'foo', kwargs) super().__init__(name=name) self.__data = data self.__attr1 = attr1 self.__attr2 = attr2 self.__attr3 = attr3 self.__foo = foo if self.__foo is not None and self.__foo.parent is None: self.__foo.parent = self def __eq__(self, other): attrs = ('name', 'data', 'attr1', 'attr2', 'attr3', 'foo') return all(getattr(self, a) == getattr(other, a) for a in attrs) def __str__(self): attrs = ('name', 'data', 'attr1', 'attr2', 'attr3', 'foo') return ','.join('%s=%s' % (a, getattr(self, a)) for a in attrs) @property def data_type(self): return 'Bar' @property def data(self): return self.__data @property def attr1(self): return self.__attr1 @property def attr2(self): return self.__attr2 @property def attr3(self): return self.__attr3 @property def foo(self): return self.__foo class Foo(Container): @property def data_type(self): return 'Foo' class TestGetSubSpec(TestCase): def setUp(self): self.bar_spec = GroupSpec('A test group specification with a data type', data_type_def='Bar') spec_catalog = SpecCatalog() spec_catalog.register_spec(self.bar_spec, 'test.yaml') namespace = SpecNamespace('a test namespace', CORE_NAMESPACE, [{'source': 'test.yaml'}], version='0.1.0', catalog=spec_catalog) namespace_catalog = NamespaceCatalog() namespace_catalog.add_namespace(CORE_NAMESPACE, namespace) self.type_map = TypeMap(namespace_catalog) self.type_map.register_container_type(CORE_NAMESPACE, 'Bar', Bar) def test_get_subspec_data_type_noname(self): parent_spec = GroupSpec('Something to hold a Bar', 'bar_bucket', groups=[self.bar_spec]) sub_builder = GroupBuilder('my_bar', attributes={'data_type': 'Bar', 'namespace': CORE_NAMESPACE, 'object_id': -1}) GroupBuilder('bar_bucket', groups={'my_bar': sub_builder}) result = self.type_map.get_subspec(parent_spec, sub_builder) self.assertIs(result, self.bar_spec) def test_get_subspec_named(self): child_spec = GroupSpec('A test group specification with a data type', 'my_subgroup') parent_spec = GroupSpec('Something to hold a Bar', 'my_group', groups=[child_spec]) sub_builder = GroupBuilder('my_subgroup', attributes={'data_type': 'Bar', 'namespace': CORE_NAMESPACE, 'object_id': -1}) GroupBuilder('my_group', groups={'my_bar': sub_builder}) result = self.type_map.get_subspec(parent_spec, sub_builder) self.assertIs(result, child_spec) class TestTypeMap(TestCase): def setUp(self): self.bar_spec = GroupSpec('A test group specification with a data type', data_type_def='Bar') self.foo_spec = GroupSpec('A test group specification with data type Foo', data_type_def='Foo') self.spec_catalog = SpecCatalog() self.spec_catalog.register_spec(self.bar_spec, 'test.yaml') self.spec_catalog.register_spec(self.foo_spec, 'test.yaml') self.namespace = SpecNamespace('a test namespace', CORE_NAMESPACE, [{'source': 'test.yaml'}], version='0.1.0', catalog=self.spec_catalog) self.namespace_catalog = NamespaceCatalog() self.namespace_catalog.add_namespace(CORE_NAMESPACE, self.namespace) self.type_map = TypeMap(self.namespace_catalog) self.type_map.register_container_type(CORE_NAMESPACE, 'Bar', Bar) self.type_map.register_container_type(CORE_NAMESPACE, 'Foo', Foo) # self.build_manager = BuildManager(self.type_map) def test_get_map_unique_mappers(self): self.type_map.register_map(Bar, ObjectMapper) self.type_map.register_map(Foo, ObjectMapper) bar_inst = Bar('my_bar', list(range(10)), 'value1', 10) foo_inst = Foo(name='my_foo') bar_mapper = self.type_map.get_map(bar_inst) foo_mapper = self.type_map.get_map(foo_inst) self.assertIsNot(bar_mapper, foo_mapper) def test_get_map(self): self.type_map.register_map(Bar, ObjectMapper) container_inst = Bar('my_bar', list(range(10)), 'value1', 10) mapper = self.type_map.get_map(container_inst) self.assertIsInstance(mapper, ObjectMapper) self.assertIs(mapper.spec, self.bar_spec) mapper2 = self.type_map.get_map(container_inst) self.assertIs(mapper, mapper2) def test_get_map_register(self): class MyMap(ObjectMapper): pass self.type_map.register_map(Bar, MyMap) container_inst = Bar('my_bar', list(range(10)), 'value1', 10) mapper = self.type_map.get_map(container_inst) self.assertIs(mapper.spec, self.bar_spec) self.assertIsInstance(mapper, MyMap) class BarMapper(ObjectMapper): def __init__(self, spec): super().__init__(spec) data_spec = spec.get_dataset('data') self.map_spec('attr2', data_spec.get_attribute('attr2')) class TestMapStrings(TestCase): def customSetUp(self, bar_spec): spec_catalog = SpecCatalog() spec_catalog.register_spec(bar_spec, 'test.yaml') namespace = SpecNamespace('a test namespace', CORE_NAMESPACE, [{'source': 'test.yaml'}], version='0.1.0', catalog=spec_catalog) namespace_catalog = NamespaceCatalog() namespace_catalog.add_namespace(CORE_NAMESPACE, namespace) type_map = TypeMap(namespace_catalog) type_map.register_container_type(CORE_NAMESPACE, 'Bar', Bar) return type_map def test_build_1d(self): bar_spec = GroupSpec('A test group specification with a data type', data_type_def='Bar', datasets=[DatasetSpec('an example dataset', 'text', name='data', shape=(None,), attributes=[AttributeSpec( 'attr2', 'an example integer attribute', 'int')])], attributes=[AttributeSpec('attr1', 'an example string attribute', 'text')]) type_map = self.customSetUp(bar_spec) type_map.register_map(Bar, BarMapper) bar_inst = Bar('my_bar', ['a', 'b', 'c', 'd'], 'value1', 10) builder = type_map.build(bar_inst) self.assertEqual(builder.get('data').data, ['a', 'b', 'c', 'd']) def test_build_scalar(self): bar_spec = GroupSpec('A test group specification with a data type', data_type_def='Bar', datasets=[DatasetSpec('an example dataset', 'text', name='data', attributes=[AttributeSpec( 'attr2', 'an example integer attribute', 'int')])], attributes=[AttributeSpec('attr1', 'an example string attribute', 'text')]) type_map = self.customSetUp(bar_spec) type_map.register_map(Bar, BarMapper) bar_inst = Bar('my_bar', ['a', 'b', 'c', 'd'], 'value1', 10) builder = type_map.build(bar_inst) self.assertEqual(builder.get('data').data, "['a', 'b', 'c', 'd']") def test_build_dataio(self): bar_spec = GroupSpec('A test group specification with a data type', data_type_def='Bar', datasets=[DatasetSpec('an example dataset', 'text', name='data', shape=(None,), attributes=[AttributeSpec( 'attr2', 'an example integer attribute', 'int')])], attributes=[AttributeSpec('attr1', 'an example string attribute', 'text')]) type_map = self.customSetUp(bar_spec) type_map.register_map(Bar, BarMapper) bar_inst = Bar('my_bar', H5DataIO(['a', 'b', 'c', 'd'], chunks=True), 'value1', 10) builder = type_map.build(bar_inst) self.assertIsInstance(builder.get('data').data, H5DataIO) class TestDynamicContainer(TestCase): def setUp(self): self.bar_spec = GroupSpec('A test group specification with a data type', data_type_def='Bar', datasets=[DatasetSpec('an example dataset', 'int', name='data', attributes=[AttributeSpec( 'attr2', 'an example integer attribute', 'int')])], attributes=[AttributeSpec('attr1', 'an example string attribute', 'text')]) self.spec_catalog = SpecCatalog() self.spec_catalog.register_spec(self.bar_spec, 'test.yaml') self.namespace = SpecNamespace('a test namespace', CORE_NAMESPACE, [{'source': 'test.yaml'}], version='0.1.0', catalog=self.spec_catalog) self.namespace_catalog = NamespaceCatalog() self.namespace_catalog.add_namespace(CORE_NAMESPACE, self.namespace) self.type_map = TypeMap(self.namespace_catalog) self.type_map.register_container_type(CORE_NAMESPACE, 'Bar', Bar) self.type_map.register_map(Bar, ObjectMapper) self.manager = BuildManager(self.type_map) self.mapper = ObjectMapper(self.bar_spec) def test_dynamic_container_creation(self): baz_spec = GroupSpec('A test extension with no Container class', data_type_def='Baz', data_type_inc=self.bar_spec, attributes=[AttributeSpec('attr3', 'an example float attribute', 'float'), AttributeSpec('attr4', 'another example float attribute', 'float')]) self.spec_catalog.register_spec(baz_spec, 'extension.yaml') cls = self.type_map.get_container_cls(CORE_NAMESPACE, 'Baz') expected_args = {'name', 'data', 'attr1', 'attr2', 'attr3', 'attr4'} received_args = set() for x in get_docval(cls.__init__): if x['name'] != 'foo': received_args.add(x['name']) with self.subTest(name=x['name']): self.assertNotIn('default', x) self.assertSetEqual(expected_args, received_args) self.assertEqual(cls.__name__, 'Baz') self.assertTrue(issubclass(cls, Bar)) def test_dynamic_container_default_name(self): baz_spec = GroupSpec('doc', default_name='bingo', data_type_def='Baz') self.spec_catalog.register_spec(baz_spec, 'extension.yaml') cls = self.type_map.get_container_cls(CORE_NAMESPACE, 'Baz') inst = cls() self.assertEqual(inst.name, 'bingo') def test_dynamic_container_creation_defaults(self): baz_spec = GroupSpec('A test extension with no Container class', data_type_def='Baz', data_type_inc=self.bar_spec, attributes=[AttributeSpec('attr3', 'an example float attribute', 'float'), AttributeSpec('attr4', 'another example float attribute', 'float')]) self.spec_catalog.register_spec(baz_spec, 'extension.yaml') cls = self.type_map.get_container_cls(CORE_NAMESPACE, 'Baz') expected_args = {'name', 'data', 'attr1', 'attr2', 'attr3', 'attr4', 'foo'} received_args = set(map(lambda x: x['name'], get_docval(cls.__init__))) self.assertSetEqual(expected_args, received_args) self.assertEqual(cls.__name__, 'Baz') self.assertTrue(issubclass(cls, Bar)) def test_dynamic_container_constructor(self): baz_spec = GroupSpec('A test extension with no Container class', data_type_def='Baz', data_type_inc=self.bar_spec, attributes=[AttributeSpec('attr3', 'an example float attribute', 'float'), AttributeSpec('attr4', 'another example float attribute', 'float')]) self.spec_catalog.register_spec(baz_spec, 'extension.yaml') cls = self.type_map.get_container_cls(CORE_NAMESPACE, 'Baz') # TODO: test that constructor works! inst = cls('My Baz', [1, 2, 3, 4], 'string attribute', 1000, attr3=98.6, attr4=1.0) self.assertEqual(inst.name, 'My Baz') self.assertEqual(inst.data, [1, 2, 3, 4]) self.assertEqual(inst.attr1, 'string attribute') self.assertEqual(inst.attr2, 1000) self.assertEqual(inst.attr3, 98.6) self.assertEqual(inst.attr4, 1.0) def test_dynamic_container_constructor_name(self): # name is specified in spec and cannot be changed baz_spec = GroupSpec('A test extension with no Container class', data_type_def='Baz', data_type_inc=self.bar_spec, name='A fixed name', attributes=[AttributeSpec('attr3', 'an example float attribute', 'float'), AttributeSpec('attr4', 'another example float attribute', 'float')]) self.spec_catalog.register_spec(baz_spec, 'extension.yaml') cls = self.type_map.get_container_cls(CORE_NAMESPACE, 'Baz') with self.assertRaises(TypeError): inst = cls('My Baz', [1, 2, 3, 4], 'string attribute', 1000, attr3=98.6, attr4=1.0) inst = cls([1, 2, 3, 4], 'string attribute', 1000, attr3=98.6, attr4=1.0) self.assertEqual(inst.name, 'A fixed name') self.assertEqual(inst.data, [1, 2, 3, 4]) self.assertEqual(inst.attr1, 'string attribute') self.assertEqual(inst.attr2, 1000) self.assertEqual(inst.attr3, 98.6) self.assertEqual(inst.attr4, 1.0) def test_dynamic_container_constructor_name_default_name(self): # if both name and default_name are specified, name should be used with self.assertWarns(Warning): baz_spec = GroupSpec('A test extension with no Container class', data_type_def='Baz', data_type_inc=self.bar_spec, name='A fixed name', default_name='A default name', attributes=[AttributeSpec('attr3', 'an example float attribute', 'float'), AttributeSpec('attr4', 'another example float attribute', 'float')]) self.spec_catalog.register_spec(baz_spec, 'extension.yaml') cls = self.type_map.get_container_cls(CORE_NAMESPACE, 'Baz') inst = cls([1, 2, 3, 4], 'string attribute', 1000, attr3=98.6, attr4=1.0) self.assertEqual(inst.name, 'A fixed name') def test_dynamic_container_composition(self): baz_spec2 = GroupSpec('A composition inside', data_type_def='Baz2', data_type_inc=self.bar_spec, attributes=[ AttributeSpec('attr3', 'an example float attribute', 'float'), AttributeSpec('attr4', 'another example float attribute', 'float')]) baz_spec1 = GroupSpec('A composition test outside', data_type_def='Baz1', data_type_inc=self.bar_spec, attributes=[AttributeSpec('attr3', 'an example float attribute', 'float'), AttributeSpec('attr4', 'another example float attribute', 'float')], groups=[GroupSpec('A composition inside', data_type_inc='Baz2')]) self.spec_catalog.register_spec(baz_spec1, 'extension.yaml') self.spec_catalog.register_spec(baz_spec2, 'extension.yaml') Baz2 = self.type_map.get_container_cls(CORE_NAMESPACE, 'Baz2') Baz1 = self.type_map.get_container_cls(CORE_NAMESPACE, 'Baz1') Baz1('My Baz', [1, 2, 3, 4], 'string attribute', 1000, attr3=98.6, attr4=1.0, baz2=Baz2('My Baz', [1, 2, 3, 4], 'string attribute', 1000, attr3=98.6, attr4=1.0)) Bar = self.type_map.get_container_cls(CORE_NAMESPACE, 'Bar') bar = Bar('My Bar', [1, 2, 3, 4], 'string attribute', 1000) with self.assertRaises(TypeError): Baz1('My Baz', [1, 2, 3, 4], 'string attribute', 1000, attr3=98.6, attr4=1.0, baz2=bar) def test_dynamic_container_composition_wrong_order(self): baz_spec2 = GroupSpec('A composition inside', data_type_def='Baz2', data_type_inc=self.bar_spec, attributes=[ AttributeSpec('attr3', 'an example float attribute', 'float'), AttributeSpec('attr4', 'another example float attribute', 'float')]) baz_spec1 = GroupSpec('A composition test outside', data_type_def='Baz1', data_type_inc=self.bar_spec, attributes=[AttributeSpec('attr3', 'an example float attribute', 'float'), AttributeSpec('attr4', 'another example float attribute', 'float')], groups=[GroupSpec('A composition inside', data_type_inc='Baz2')]) self.spec_catalog.register_spec(baz_spec1, 'extension.yaml') self.spec_catalog.register_spec(baz_spec2, 'extension.yaml') # Setup all the data we need msg = ("Cannot dynamically generate class for type 'Baz1'. Type 'Baz2' does not exist. " "Please define that type before defining 'Baz1'.") with self.assertRaisesWith(ValueError, msg): self.manager.type_map.get_container_cls(CORE_NAMESPACE, 'Baz1') class ObjectMapperMixin(metaclass=ABCMeta): def setUp(self): self.setUpBarSpec() self.spec_catalog = SpecCatalog() self.spec_catalog.register_spec(self.bar_spec, 'test.yaml') self.namespace = SpecNamespace('a test namespace', CORE_NAMESPACE, [{'source': 'test.yaml'}], version='0.1.0', catalog=self.spec_catalog) self.namespace_catalog = NamespaceCatalog() self.namespace_catalog.add_namespace(CORE_NAMESPACE, self.namespace) self.type_map = TypeMap(self.namespace_catalog) self.type_map.register_container_type(CORE_NAMESPACE, 'Bar', Bar) self.type_map.register_map(Bar, ObjectMapper) self.manager = BuildManager(self.type_map) self.mapper = ObjectMapper(self.bar_spec) @abstractmethod def setUpBarSpec(self): raise NotImplementedError('Cannot run test unless setUpBarSpec is implemented') def test_default_mapping(self): attr_map = self.mapper.get_attr_names(self.bar_spec) keys = set(attr_map.keys()) for key in keys: with self.subTest(key=key): self.assertIs(attr_map[key], self.mapper.get_attr_spec(key)) self.assertIs(attr_map[key], self.mapper.get_carg_spec(key)) class TestObjectMapperNested(ObjectMapperMixin, TestCase): def setUpBarSpec(self): self.bar_spec = GroupSpec('A test group specification with a data type', data_type_def='Bar', datasets=[DatasetSpec('an example dataset', 'int', name='data', attributes=[AttributeSpec( 'attr2', 'an example integer attribute', 'int')])], attributes=[AttributeSpec('attr1', 'an example string attribute', 'text')]) def test_build(self): ''' Test default mapping functionality when object attributes map to an attribute deeper than top-level Builder ''' container_inst = Bar('my_bar', list(range(10)), 'value1', 10) expected = GroupBuilder('my_bar', datasets={'data': DatasetBuilder( 'data', list(range(10)), attributes={'attr2': 10})}, attributes={'attr1': 'value1'}) self._remap_nested_attr() builder = self.mapper.build(container_inst, self.manager) self.assertDictEqual(builder, expected) def test_construct(self): ''' Test default mapping functionality when object attributes map to an attribute deeper than top-level Builder ''' expected = Bar('my_bar', list(range(10)), 'value1', 10) builder = GroupBuilder('my_bar', datasets={'data': DatasetBuilder( 'data', list(range(10)), attributes={'attr2': 10})}, attributes={'attr1': 'value1', 'data_type': 'Bar', 'namespace': CORE_NAMESPACE, 'object_id': expected.object_id}) self._remap_nested_attr() container = self.mapper.construct(builder, self.manager) self.assertEqual(container, expected) def test_default_mapping_keys(self): attr_map = self.mapper.get_attr_names(self.bar_spec) keys = set(attr_map.keys()) expected = {'attr1', 'data', 'data__attr2'} self.assertSetEqual(keys, expected) def test_remap_keys(self): self._remap_nested_attr() self.assertEqual(self.mapper.get_attr_spec('attr2'), self.mapper.spec.get_dataset('data').get_attribute('attr2')) self.assertEqual(self.mapper.get_attr_spec('attr1'), self.mapper.spec.get_attribute('attr1')) self.assertEqual(self.mapper.get_attr_spec('data'), self.mapper.spec.get_dataset('data')) def _remap_nested_attr(self): data_spec = self.mapper.spec.get_dataset('data') self.mapper.map_spec('attr2', data_spec.get_attribute('attr2')) class TestObjectMapperNoNesting(ObjectMapperMixin, TestCase): def setUpBarSpec(self): self.bar_spec = GroupSpec('A test group specification with a data type', data_type_def='Bar', datasets=[DatasetSpec('an example dataset', 'int', name='data')], attributes=[AttributeSpec('attr1', 'an example string attribute', 'text'), AttributeSpec('attr2', 'an example integer attribute', 'int')]) def test_build(self): ''' Test default mapping functionality when no attributes are nested ''' container = Bar('my_bar', list(range(10)), 'value1', 10) builder = self.mapper.build(container, self.manager) expected = GroupBuilder('my_bar', datasets={'data': DatasetBuilder('data', list(range(10)))}, attributes={'attr1': 'value1', 'attr2': 10}) self.assertDictEqual(builder, expected) def test_build_empty(self): ''' Test default mapping functionality when no attributes are nested ''' container = Bar('my_bar', [], 'value1', 10) builder = self.mapper.build(container, self.manager) expected = GroupBuilder('my_bar', datasets={'data': DatasetBuilder('data', [])}, attributes={'attr1': 'value1', 'attr2': 10}) self.assertDictEqual(builder, expected) def test_construct(self): expected = Bar('my_bar', list(range(10)), 'value1', 10) builder = GroupBuilder('my_bar', datasets={'data': DatasetBuilder('data', list(range(10)))}, attributes={'attr1': 'value1', 'attr2': 10, 'data_type': 'Bar', 'namespace': CORE_NAMESPACE, 'object_id': expected.object_id}) container = self.mapper.construct(builder, self.manager) self.assertEqual(container, expected) def test_default_mapping_keys(self): attr_map = self.mapper.get_attr_names(self.bar_spec) keys = set(attr_map.keys()) expected = {'attr1', 'data', 'attr2'} self.assertSetEqual(keys, expected) class TestObjectMapperContainer(ObjectMapperMixin, TestCase): def setUpBarSpec(self): self.bar_spec = GroupSpec('A test group specification with a data type', data_type_def='Bar', groups=[GroupSpec('an example group', data_type_def='Foo')], attributes=[AttributeSpec('attr1', 'an example string attribute', 'text'), AttributeSpec('attr2', 'an example integer attribute', 'int')]) def test_default_mapping_keys(self): attr_map = self.mapper.get_attr_names(self.bar_spec) keys = set(attr_map.keys()) expected = {'attr1', 'foo', 'attr2'} self.assertSetEqual(keys, expected) class TestLinkedContainer(TestCase): def setUp(self): self.foo_spec = GroupSpec('A test group specification with data type Foo', data_type_def='Foo') self.bar_spec = GroupSpec('A test group specification with a data type Bar', data_type_def='Bar', groups=[self.foo_spec], datasets=[DatasetSpec('an example dataset', 'int', name='data')], attributes=[AttributeSpec('attr1', 'an example string attribute', 'text'), AttributeSpec('attr2', 'an example integer attribute', 'int')]) self.spec_catalog = SpecCatalog() self.spec_catalog.register_spec(self.foo_spec, 'test.yaml') self.spec_catalog.register_spec(self.bar_spec, 'test.yaml') self.namespace = SpecNamespace('a test namespace', CORE_NAMESPACE, [{'source': 'test.yaml'}], version='0.1.0', catalog=self.spec_catalog) self.namespace_catalog = NamespaceCatalog() self.namespace_catalog.add_namespace(CORE_NAMESPACE, self.namespace) self.type_map = TypeMap(self.namespace_catalog) self.type_map.register_container_type(CORE_NAMESPACE, 'Foo', Foo) self.type_map.register_container_type(CORE_NAMESPACE, 'Bar', Bar) self.type_map.register_map(Foo, ObjectMapper) self.type_map.register_map(Bar, ObjectMapper) self.manager = BuildManager(self.type_map) self.foo_mapper = ObjectMapper(self.foo_spec) self.bar_mapper = ObjectMapper(self.bar_spec) def test_build_child_link(self): ''' Test default mapping functionality when one container contains a child link to another container ''' foo_inst = Foo('my_foo') bar_inst1 = Bar('my_bar1', list(range(10)), 'value1', 10, foo=foo_inst) # bar_inst2.foo should link to bar_inst1.foo bar_inst2 = Bar('my_bar2', list(range(10)), 'value1', 10, foo=foo_inst) foo_builder = self.foo_mapper.build(foo_inst, self.manager) bar1_builder = self.bar_mapper.build(bar_inst1, self.manager) bar2_builder = self.bar_mapper.build(bar_inst2, self.manager) foo_expected = GroupBuilder('my_foo') inner_foo_builder = GroupBuilder('my_foo', attributes={'data_type': 'Foo', 'namespace': CORE_NAMESPACE, 'object_id': foo_inst.object_id}) bar1_expected = GroupBuilder('n/a', # name doesn't matter datasets={'data': DatasetBuilder('data', list(range(10)))}, groups={'foo': inner_foo_builder}, attributes={'attr1': 'value1', 'attr2': 10}) link_foo_builder = LinkBuilder(builder=inner_foo_builder) bar2_expected = GroupBuilder('n/a', datasets={'data': DatasetBuilder('data', list(range(10)))}, links={'foo': link_foo_builder}, attributes={'attr1': 'value1', 'attr2': 10}) self.assertDictEqual(foo_builder, foo_expected) self.assertDictEqual(bar1_builder, bar1_expected) self.assertDictEqual(bar2_builder, bar2_expected) class TestConvertDtype(TestCase): def test_value_none(self): spec = DatasetSpec('an example dataset', 'int', name='data') self.assertTupleEqual(ObjectMapper.convert_dtype(spec, None), (None, 'int')) spec = DatasetSpec('an example dataset', RefSpec(reftype='object', target_type='int'), name='data') self.assertTupleEqual(ObjectMapper.convert_dtype(spec, None), (None, 'object')) def test_convert_higher_precision(self): """Test that passing a data type with a precision <= specified returns the higher precision type""" spec_type = 'float64' value_types = ['float', 'float32', 'double', 'float64'] self.convert_higher_precision_helper(spec_type, value_types) spec_type = 'int64' value_types = ['long', 'int64', 'uint64', 'int', 'int32', 'int16', 'int8'] self.convert_higher_precision_helper(spec_type, value_types) spec_type = 'int32' value_types = ['int32', 'int16', 'int8'] self.convert_higher_precision_helper(spec_type, value_types) spec_type = 'int16' value_types = ['int16', 'int8'] self.convert_higher_precision_helper(spec_type, value_types) spec_type = 'uint32' value_types = ['uint32', 'uint16', 'uint8'] self.convert_higher_precision_helper(spec_type, value_types) def convert_higher_precision_helper(self, spec_type, value_types): data = 2 spec = DatasetSpec('an example dataset', spec_type, name='data') match = (np.dtype(spec_type).type(data), np.dtype(spec_type)) for dtype in value_types: value = np.dtype(dtype).type(data) with self.subTest(dtype=dtype): ret = ObjectMapper.convert_dtype(spec, value) self.assertTupleEqual(ret, match) self.assertIs(ret[0].dtype, match[1]) def test_keep_higher_precision(self): """Test that passing a data type with a precision >= specified return the given type""" spec_type = 'float' value_types = ['double', 'float64'] self.keep_higher_precision_helper(spec_type, value_types) spec_type = 'int' value_types = ['int64'] self.keep_higher_precision_helper(spec_type, value_types) spec_type = 'int8' value_types = ['long', 'int64', 'int', 'int32', 'int16'] self.keep_higher_precision_helper(spec_type, value_types) spec_type = 'uint' value_types = ['uint64'] self.keep_higher_precision_helper(spec_type, value_types) spec_type = 'uint8' value_types = ['uint64', 'uint32', 'uint', 'uint16'] self.keep_higher_precision_helper(spec_type, value_types) def keep_higher_precision_helper(self, spec_type, value_types): data = 2 spec = DatasetSpec('an example dataset', spec_type, name='data') for dtype in value_types: value = np.dtype(dtype).type(data) match = (value, np.dtype(dtype)) with self.subTest(dtype=dtype): ret = ObjectMapper.convert_dtype(spec, value) self.assertTupleEqual(ret, match) self.assertIs(ret[0].dtype, match[1]) def test_no_spec(self): spec_type = None spec = DatasetSpec('an example dataset', spec_type, name='data') value = [1, 2, 3] ret = ObjectMapper.convert_dtype(spec, value) match = (value, int) self.assertTupleEqual(ret, match) self.assertIs(type(ret[0][0]), match[1]) value = np.uint64(4) ret = ObjectMapper.convert_dtype(spec, value) match = (value, np.uint64) self.assertTupleEqual(ret, match) self.assertIs(type(ret[0]), match[1]) value = 'hello' ret = ObjectMapper.convert_dtype(spec, value) match = (value, 'utf8') self.assertTupleEqual(ret, match) self.assertIs(type(ret[0]), str) value = bytes('hello', encoding='utf-8') ret = ObjectMapper.convert_dtype(spec, value) match = (value, 'ascii') self.assertTupleEqual(ret, match) self.assertIs(type(ret[0]), bytes) value = DataChunkIterator(data=[1, 2, 3]) ret = ObjectMapper.convert_dtype(spec, value) match = (value, np.dtype(int).type) self.assertTupleEqual(ret, match) self.assertIs(ret[0].dtype.type, match[1]) value = DataChunkIterator(data=[1., 2., 3.]) ret = ObjectMapper.convert_dtype(spec, value) match = (value, np.dtype(float).type) self.assertTupleEqual(ret, match) self.assertIs(ret[0].dtype.type, match[1]) value = H5DataIO(np.arange(30).reshape(5, 2, 3)) ret = ObjectMapper.convert_dtype(spec, value) match = (value, np.dtype(int).type) self.assertTupleEqual(ret, match) self.assertIs(ret[0].dtype.type, match[1]) value = H5DataIO(['foo' 'bar']) ret = ObjectMapper.convert_dtype(spec, value) match = (value, 'utf8') self.assertTupleEqual(ret, match) self.assertIs(type(ret[0].data[0]), str) def test_numeric_spec(self): spec_type = 'numeric' spec = DatasetSpec('an example dataset', spec_type, name='data') value = np.uint64(4) ret = ObjectMapper.convert_dtype(spec, value) match = (value, np.uint64) self.assertTupleEqual(ret, match) self.assertIs(type(ret[0]), match[1]) value = DataChunkIterator(data=[1, 2, 3]) ret = ObjectMapper.convert_dtype(spec, value) match = (value, np.dtype(int).type) self.assertTupleEqual(ret, match) self.assertIs(ret[0].dtype.type, match[1]) def test_bool_spec(self): spec_type = 'bool' spec = DatasetSpec('an example dataset', spec_type, name='data') value = np.bool_(True) ret = ObjectMapper.convert_dtype(spec, value) match = (value, np.bool_) self.assertTupleEqual(ret, match) self.assertIs(type(ret[0]), match[1]) value = True ret = ObjectMapper.convert_dtype(spec, value) match = (value, np.bool_) self.assertTupleEqual(ret, match) self.assertIs(type(ret[0]), match[1]) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/build_tests/test_io_map_data.py0000644000655200065520000000605100000000000024340 0ustar00circlecicircleci00000000000000from hdmf.spec import AttributeSpec, DatasetSpec, SpecCatalog, SpecNamespace, NamespaceCatalog from hdmf.build import DatasetBuilder, ObjectMapper, BuildManager, TypeMap from hdmf import Data from hdmf.utils import docval, getargs from hdmf.testing import TestCase import h5py import numpy as np import os from tests.unit.utils import CORE_NAMESPACE class Baz(Data): @docval({'name': 'name', 'type': str, 'doc': 'the name of this Baz'}, {'name': 'data', 'type': (list, h5py.Dataset), 'doc': 'some data'}, {'name': 'baz_attr', 'type': str, 'doc': 'an attribute'}) def __init__(self, **kwargs): name, data, baz_attr = getargs('name', 'data', 'baz_attr', kwargs) super().__init__(name=name, data=data) self.__baz_attr = baz_attr @property def baz_attr(self): return self.__baz_attr class TestDataMap(TestCase): def setUp(self): self.setUpBazSpec() self.spec_catalog = SpecCatalog() self.spec_catalog.register_spec(self.baz_spec, 'test.yaml') self.namespace = SpecNamespace('a test namespace', CORE_NAMESPACE, [{'source': 'test.yaml'}], version='0.1.0', catalog=self.spec_catalog) self.namespace_catalog = NamespaceCatalog() self.namespace_catalog.add_namespace(CORE_NAMESPACE, self.namespace) self.type_map = TypeMap(self.namespace_catalog) self.type_map.register_container_type(CORE_NAMESPACE, 'Baz', Baz) self.type_map.register_map(Baz, ObjectMapper) self.manager = BuildManager(self.type_map) self.mapper = ObjectMapper(self.baz_spec) def setUpBazSpec(self): self.baz_spec = DatasetSpec('an Baz type', 'int', name='MyBaz', data_type_def='Baz', attributes=[AttributeSpec('baz_attr', 'an example string attribute', 'text')]) def test_build(self): ''' Test default mapping functionality when no attributes are nested ''' container = Baz('my_baz', list(range(10)), 'abcdefghijklmnopqrstuvwxyz') builder = self.mapper.build(container, self.manager) expected = DatasetBuilder('my_baz', list(range(10)), attributes={'baz_attr': 'abcdefghijklmnopqrstuvwxyz'}) self.assertDictEqual(builder, expected) def test_append(self): with h5py.File('test.h5', 'w') as file: test_ds = file.create_dataset('test_ds', data=[1, 2, 3], chunks=True, maxshape=(None,)) container = Baz('my_baz', test_ds, 'abcdefghijklmnopqrstuvwxyz') container.append(4) np.testing.assert_array_equal(container[:], [1, 2, 3, 4]) os.remove('test.h5') def test_extend(self): with h5py.File('test.h5', 'w') as file: test_ds = file.create_dataset('test_ds', data=[1, 2, 3], chunks=True, maxshape=(None,)) container = Baz('my_baz', test_ds, 'abcdefghijklmnopqrstuvwxyz') container.extend([4, 5]) np.testing.assert_array_equal(container[:], [1, 2, 3, 4, 5]) os.remove('test.h5') ././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1579654747.9241881 hdmf-1.5.4/tests/unit/common/0000755000655200065520000000000000000000000017437 5ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/common/__init__.py0000644000655200065520000000000000000000000021536 0ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/common/test_common.py0000644000655200065520000000061100000000000022336 0ustar00circlecicircleci00000000000000from hdmf import Data, Container from hdmf.common import get_type_map from hdmf.testing import TestCase class TestCommonTypeMap(TestCase): def test_base_types(self): tm = get_type_map() cls = tm.get_container_cls('hdmf-common', 'Container') self.assertIs(cls, Container) cls = tm.get_container_cls('hdmf-common', 'Data') self.assertIs(cls, Data) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/common/test_sparse.py0000644000655200065520000000244300000000000022350 0ustar00circlecicircleci00000000000000from hdmf.common import CSRMatrix from hdmf.testing import TestCase, H5RoundTripMixin import scipy.sparse as sps import numpy as np class TestCSRMatrix(TestCase): def test_from_sparse_matrix(self): data = np.array([1, 2, 3, 4, 5, 6]) indices = np.array([0, 2, 2, 0, 1, 2]) indptr = np.array([0, 2, 3, 6]) expected = CSRMatrix(data, indices, indptr, (3, 3)) sps_mat = sps.csr_matrix((data, indices, indptr), shape=(3, 3)) received = CSRMatrix(sps_mat) self.assertContainerEqual(received, expected, ignore_hdmf_attrs=True) def test_to_spmat(self): data = np.array([1, 2, 3, 4, 5, 6]) indices = np.array([0, 2, 2, 0, 1, 2]) indptr = np.array([0, 2, 3, 6]) csr_mat = CSRMatrix(data, indices, indptr, (3, 3)) spmat_array = csr_mat.to_spmat().toarray() expected = np.asarray([[1, 0, 2], [0, 0, 3], [4, 5, 6]]) np.testing.assert_array_equal(spmat_array, expected) # TODO more unit tests are needed for CSRMatrix class TestCSRMatrixRoundTrip(H5RoundTripMixin, TestCase): def setUpContainer(self): data = np.array([1, 2, 3, 4, 5, 6]) indices = np.array([0, 2, 2, 0, 1, 2]) indptr = np.array([0, 2, 3, 6]) return CSRMatrix(data, indices, indptr, (3, 3)) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/common/test_table.py0000644000655200065520000003557300000000000022154 0ustar00circlecicircleci00000000000000from hdmf.common import DynamicTable, VectorData, ElementIdentifiers, DynamicTableRegion from hdmf.testing import TestCase, H5RoundTripMixin import pandas as pd import numpy as np from collections import OrderedDict class TestDynamicTable(TestCase): def setUp(self): self.spec = [ {'name': 'foo', 'description': 'foo column'}, {'name': 'bar', 'description': 'bar column'}, {'name': 'baz', 'description': 'baz column'}, ] self.data = [ [1, 2, 3, 4, 5], [10.0, 20.0, 30.0, 40.0, 50.0], ['cat', 'dog', 'bird', 'fish', 'lizard'] ] def with_table_columns(self): cols = [VectorData(**d) for d in self.spec] table = DynamicTable("with_table_columns", 'a test table', columns=cols) return table def with_columns_and_data(self): columns = [ VectorData(name=s['name'], description=s['description'], data=d) for s, d in zip(self.spec, self.data) ] return DynamicTable("with_columns_and_data", 'a test table', columns=columns) def with_spec(self): table = DynamicTable("with_spec", 'a test table', columns=self.spec) return table def check_empty_table(self, table): self.assertIsInstance(table.columns[0], VectorData) self.assertEqual(len(table.columns), 3) self.assertEqual(table.colnames, ('foo', 'bar', 'baz')) def test_constructor_table_columns(self): table = self.with_table_columns() self.assertEqual(table.name, 'with_table_columns') self.check_empty_table(table) def test_constructor_spec(self): table = self.with_spec() self.assertEqual(table.name, 'with_spec') self.check_empty_table(table) def check_table(self, table): self.assertEqual(len(table), 5) self.assertEqual(table.columns[0].data, [1, 2, 3, 4, 5]) self.assertEqual(table.columns[1].data, [10.0, 20.0, 30.0, 40.0, 50.0]) self.assertEqual(table.columns[2].data, ['cat', 'dog', 'bird', 'fish', 'lizard']) self.assertEqual(table.id.data, [0, 1, 2, 3, 4]) self.assertTrue(hasattr(table, 'baz')) def test_constructor_ids_default(self): columns = [VectorData(name=s['name'], description=s['description'], data=d) for s, d in zip(self.spec, self.data)] table = DynamicTable("with_spec", 'a test table', columns=columns) self.check_table(table) def test_constructor_ids(self): columns = [VectorData(name=s['name'], description=s['description'], data=d) for s, d in zip(self.spec, self.data)] table = DynamicTable("with_columns", 'a test table', id=[0, 1, 2, 3, 4], columns=columns) self.check_table(table) def test_constructor_ElementIdentifier_ids(self): columns = [VectorData(name=s['name'], description=s['description'], data=d) for s, d in zip(self.spec, self.data)] ids = ElementIdentifiers('ids', [0, 1, 2, 3, 4]) table = DynamicTable("with_columns", 'a test table', id=ids, columns=columns) self.check_table(table) def test_constructor_ids_bad_ids(self): columns = [VectorData(name=s['name'], description=s['description'], data=d) for s, d in zip(self.spec, self.data)] msg = "must provide same number of ids as length of columns" with self.assertRaisesWith(ValueError, msg): DynamicTable("with_columns", 'a test table', id=[0, 1], columns=columns) def add_rows(self, table): table.add_row({'foo': 1, 'bar': 10.0, 'baz': 'cat'}) table.add_row({'foo': 2, 'bar': 20.0, 'baz': 'dog'}) table.add_row({'foo': 3, 'bar': 30.0, 'baz': 'bird'}) table.add_row({'foo': 4, 'bar': 40.0, 'baz': 'fish'}) table.add_row({'foo': 5, 'bar': 50.0, 'baz': 'lizard'}) def test_add_row(self): table = self.with_spec() self.add_rows(table) self.check_table(table) def test_get_item(self): table = self.with_spec() self.add_rows(table) self.check_table(table) def test_add_column(self): table = self.with_spec() table.add_column(name='qux', description='qux column') self.assertEqual(table.colnames, ('foo', 'bar', 'baz', 'qux')) self.assertTrue(hasattr(table, 'qux')) def test_getitem_row_num(self): table = self.with_spec() self.add_rows(table) row = table[2] self.assertTupleEqual(row.shape, (1, 3)) self.assertTupleEqual(tuple(row.iloc[0]), (3, 30.0, 'bird')) def test_getitem_row_slice(self): table = self.with_spec() self.add_rows(table) rows = table[1:3] self.assertIsInstance(rows, pd.DataFrame) self.assertTupleEqual(rows.shape, (2, 3)) self.assertTupleEqual(tuple(rows.iloc[1]), (3, 30.0, 'bird')) def test_getitem_row_slice_with_step(self): table = self.with_spec() self.add_rows(table) rows = table[0:5:2] self.assertIsInstance(rows, pd.DataFrame) self.assertTupleEqual(rows.shape, (3, 3)) self.assertEqual(rows.iloc[2][0], 5) self.assertEqual(rows.iloc[2][1], 50.0) self.assertEqual(rows.iloc[2][2], 'lizard') def test_getitem_invalid_keytype(self): table = self.with_spec() self.add_rows(table) with self.assertRaises(KeyError): _ = table[0.1] def test_getitem_col_select_and_row_slice(self): table = self.with_spec() self.add_rows(table) col = table[1:3, 'bar'] self.assertEqual(len(col), 2) self.assertEqual(col[0], 20.0) self.assertEqual(col[1], 30.0) def test_getitem_column(self): table = self.with_spec() self.add_rows(table) col = table['bar'] self.assertEqual(col[0], 10.0) self.assertEqual(col[1], 20.0) self.assertEqual(col[2], 30.0) self.assertEqual(col[3], 40.0) self.assertEqual(col[4], 50.0) def test_getitem_list_idx(self): table = self.with_spec() self.add_rows(table) row = table[[0, 2, 4]] self.assertEqual(len(row), 3) self.assertTupleEqual(tuple(row.iloc[0]), (1, 10.0, 'cat')) self.assertTupleEqual(tuple(row.iloc[1]), (3, 30.0, 'bird')) self.assertTupleEqual(tuple(row.iloc[2]), (5, 50.0, 'lizard')) def test_getitem_point_idx_colname(self): table = self.with_spec() self.add_rows(table) val = table[2, 'bar'] self.assertEqual(val, 30.0) def test_getitem_point_idx(self): table = self.with_spec() self.add_rows(table) row = table[2] self.assertTupleEqual(tuple(row.iloc[0]), (3, 30.0, 'bird')) def test_getitem_point_idx_colidx(self): table = self.with_spec() self.add_rows(table) val = table[2, 2] self.assertEqual(val, 30.0) def test_pandas_roundtrip(self): df = pd.DataFrame({ 'a': [1, 2, 3, 4], 'b': ['a', 'b', 'c', '4'] }, index=pd.Index(name='an_index', data=[2, 4, 6, 8])) table = DynamicTable.from_dataframe(df, 'foo') obtained = table.to_dataframe() self.assertTrue(df.equals(obtained)) def test_to_dataframe(self): table = self.with_columns_and_data() data = OrderedDict() for name in table.colnames: if name == 'foo': data[name] = [1, 2, 3, 4, 5] elif name == 'bar': data[name] = [10.0, 20.0, 30.0, 40.0, 50.0] elif name == 'baz': data[name] = ['cat', 'dog', 'bird', 'fish', 'lizard'] expected_df = pd.DataFrame(data) obtained_df = table.to_dataframe() self.assertTrue(expected_df.equals(obtained_df)) def test_from_dataframe(self): df = pd.DataFrame({ 'foo': [1, 2, 3, 4, 5], 'bar': [10.0, 20.0, 30.0, 40.0, 50.0], 'baz': ['cat', 'dog', 'bird', 'fish', 'lizard'] }).loc[:, ('foo', 'bar', 'baz')] obtained_table = DynamicTable.from_dataframe(df, 'test') self.check_table(obtained_table) def test_from_dataframe_eq(self): expected = DynamicTable('test_table', 'the expected table') expected.add_column('a', '2d column') expected.add_column('b', '1d column') expected.add_row(a=[1, 2, 3], b='4') expected.add_row(a=[1, 2, 3], b='5') expected.add_row(a=[1, 2, 3], b='6') df = pd.DataFrame({ 'a': [[1, 2, 3], [1, 2, 3], [1, 2, 3]], 'b': ['4', '5', '6'] }) coldesc = {'a': '2d column', 'b': '1d column'} received = DynamicTable.from_dataframe(df, 'test_table', table_description='the expected table', column_descriptions=coldesc) self.assertContainerEqual(expected, received, ignore_hdmf_attrs=True) def test_from_dataframe_dup_attr(self): df = pd.DataFrame({ 'foo': [1, 2, 3, 4, 5], 'bar': [10.0, 20.0, 30.0, 40.0, 50.0], 'description': ['cat', 'dog', 'bird', 'fish', 'lizard'] }).loc[:, ('foo', 'bar', 'description')] msg = "Column name 'description' is not allowed because it is already an attribute" with self.assertRaisesWith(ValueError, msg): DynamicTable.from_dataframe(df, 'test') def test_missing_columns(self): table = self.with_spec() with self.assertRaises(ValueError): table.add_row({'bar': 60.0, 'foo': [6]}, None) def test_enforce_unique_id_error(self): table = self.with_spec() table.add_row(id=10, data={'foo': 1, 'bar': 10.0, 'baz': 'cat'}, enforce_unique_id=True) with self.assertRaises(ValueError): table.add_row(id=10, data={'foo': 1, 'bar': 10.0, 'baz': 'cat'}, enforce_unique_id=True) def test_not_enforce_unique_id_error(self): table = self.with_spec() table.add_row(id=10, data={'foo': 1, 'bar': 10.0, 'baz': 'cat'}, enforce_unique_id=False) try: table.add_row(id=10, data={'foo': 1, 'bar': 10.0, 'baz': 'cat'}, enforce_unique_id=False) except ValueError as e: self.fail("add row with non unique id raised error %s" % str(e)) def test_bad_id_type_error(self): table = self.with_spec() with self.assertRaises(TypeError): table.add_row(id=10.1, data={'foo': 1, 'bar': 10.0, 'baz': 'cat'}, enforce_unique_id=True) with self.assertRaises(TypeError): table.add_row(id='str', data={'foo': 1, 'bar': 10.0, 'baz': 'cat'}, enforce_unique_id=True) def test_extra_columns(self): table = self.with_spec() with self.assertRaises(ValueError): table.add_row({'bar': 60.0, 'foo': 6, 'baz': 'oryx', 'qax': -1}, None) def test_indexed_dynamic_table_region(self): table = self.with_columns_and_data() dynamic_table_region = DynamicTableRegion('dtr', [1, 2, 2], 'desc', table=table) fetch_ids = dynamic_table_region[:3].index.values self.assertListEqual(fetch_ids.tolist(), [1, 2, 2]) def test_dynamic_table_iteration(self): table = self.with_columns_and_data() dynamic_table_region = DynamicTableRegion('dtr', [0, 1, 2, 3, 4], 'desc', table=table) for ii, item in enumerate(dynamic_table_region): self.assertTrue(table[ii].equals(item)) def test_dynamic_table_region_shape(self): table = self.with_columns_and_data() dynamic_table_region = DynamicTableRegion('dtr', [0, 1, 2, 3, 4], 'desc', table=table) self.assertTupleEqual(dynamic_table_region.shape, (5, 3)) def test_nd_array_to_df(self): data = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]]) col = VectorData(name='data', description='desc', data=data) df = DynamicTable('test', 'desc', np.arange(3, dtype='int'), (col, )).to_dataframe() df2 = pd.DataFrame({'data': [x for x in data]}, index=pd.Index(name='id', data=[0, 1, 2])) pd.testing.assert_frame_equal(df, df2) def test_id_search(self): table = self.with_spec() data = [{'foo': 1, 'bar': 10.0, 'baz': 'cat'}, {'foo': 2, 'bar': 20.0, 'baz': 'dog'}, {'foo': 3, 'bar': 30.0, 'baz': 'bird'}, # id=2 {'foo': 4, 'bar': 40.0, 'baz': 'fish'}, {'foo': 5, 'bar': 50.0, 'baz': 'lizard'} # id=4 ] for i in data: table.add_row(i) res = table[table.id == [2, 4]] self.assertEqual(len(res), 2) self.assertTupleEqual(tuple(res.iloc[0]), (3, 30.0, 'bird')) self.assertTupleEqual(tuple(res.iloc[1]), (5, 50.0, 'lizard')) class TestDynamicTableRoundTrip(H5RoundTripMixin, TestCase): def setUpContainer(self): table = DynamicTable('table0', 'an example table') table.add_column('foo', 'an int column') table.add_column('bar', 'a float column') table.add_column('baz', 'a string column') table.add_column('qux', 'a boolean column') table.add_row(foo=27, bar=28.0, baz="cat", qux=True) table.add_row(foo=37, bar=38.0, baz="dog", qux=False) return table class TestElementIdentifiers(TestCase): def test_identifier_search_single_list(self): e = ElementIdentifiers('ids', [0, 1, 2, 3, 4]) a = (e == [1]) np.testing.assert_array_equal(a, [1]) def test_identifier_search_single_int(self): e = ElementIdentifiers('ids', [0, 1, 2, 3, 4]) a = (e == 2) np.testing.assert_array_equal(a, [2]) def test_identifier_search_single_list_not_found(self): e = ElementIdentifiers('ids', [0, 1, 2, 3, 4]) a = (e == [10]) np.testing.assert_array_equal(a, []) def test_identifier_search_single_int_not_found(self): e = ElementIdentifiers('ids', [0, 1, 2, 3, 4]) a = (e == 10) np.testing.assert_array_equal(a, []) def test_identifier_search_single_list_all_match(self): e = ElementIdentifiers('ids', [0, 1, 2, 3, 4]) a = (e == [1, 2, 3]) np.testing.assert_array_equal(a, [1, 2, 3]) def test_identifier_search_single_list_partial_match(self): e = ElementIdentifiers('ids', [0, 1, 2, 3, 4]) a = (e == [1, 2, 10]) np.testing.assert_array_equal(a, [1, 2]) a = (e == [-1, 2, 10]) np.testing.assert_array_equal(a, [2, ]) def test_identifier_search_with_element_identifier(self): e = ElementIdentifiers('ids', [0, 1, 2, 3, 4]) a = (e == ElementIdentifiers('ids', [1, 2, 10])) np.testing.assert_array_equal(a, [1, 2]) def test_identifier_search_with_bad_ids(self): e = ElementIdentifiers('ids', [0, 1, 2, 3, 4]) with self.assertRaises(TypeError): _ = (e == 0.1) with self.assertRaises(TypeError): _ = (e == 'test') ././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1579654747.9241881 hdmf-1.5.4/tests/unit/spec_tests/0000755000655200065520000000000000000000000020323 5ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/spec_tests/__init__.py0000644000655200065520000000000000000000000022422 0ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/spec_tests/test_attribute_spec.py0000644000655200065520000000505000000000000024751 0ustar00circlecicircleci00000000000000import json from hdmf.spec import AttributeSpec from hdmf.testing import TestCase class AttributeSpecTests(TestCase): def test_constructor(self): spec = AttributeSpec('attribute1', 'my first attribute', 'text') self.assertEqual(spec['name'], 'attribute1') self.assertEqual(spec['dtype'], 'text') self.assertEqual(spec['doc'], 'my first attribute') self.assertIsNone(spec.parent) json.dumps(spec) # to ensure there are no circular links def test_invalid_dtype(self): with self.assertRaises(ValueError): AttributeSpec(name='attribute1', doc='my first attribute', dtype='invalid' # <-- Invalid dtype must raise a ValueError ) def test_both_value_and_default_value_set(self): with self.assertRaises(ValueError): AttributeSpec(name='attribute1', doc='my first attribute', dtype='int', value=5, default_value=10 # <-- Default_value and value can't be set at the same time ) def test_colliding_shape_and_dims(self): with self.assertRaises(ValueError): AttributeSpec(name='attribute1', doc='my first attribute', dtype='int', dims=['test'], shape=[None, 2] # <-- Length of shape and dims do not match must raise a ValueError ) def test_default_value(self): spec = AttributeSpec('attribute1', 'my first attribute', 'text', default_value='some text') self.assertEqual(spec['default_value'], 'some text') self.assertEqual(spec.default_value, 'some text') def test_shape(self): shape = [None, 2] spec = AttributeSpec('attribute1', 'my first attribute', 'text', shape=shape) self.assertEqual(spec['shape'], shape) self.assertEqual(spec.shape, shape) def test_dims_without_shape(self): spec = AttributeSpec('attribute1', 'my first attribute', 'text', dims=['test']) self.assertEqual(spec.shape, (None, )) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/spec_tests/test_dataset_spec.py0000644000655200065520000002477100000000000024406 0ustar00circlecicircleci00000000000000import json from hdmf.spec import GroupSpec, DatasetSpec, AttributeSpec, DtypeSpec, RefSpec from hdmf.testing import TestCase class DatasetSpecTests(TestCase): def setUp(self): self.attributes = [ AttributeSpec('attribute1', 'my first attribute', 'text'), AttributeSpec('attribute2', 'my second attribute', 'text') ] def test_constructor(self): spec = DatasetSpec('my first dataset', 'int', name='dataset1', attributes=self.attributes) self.assertEqual(spec['dtype'], 'int') self.assertEqual(spec['name'], 'dataset1') self.assertEqual(spec['doc'], 'my first dataset') self.assertNotIn('linkable', spec) self.assertNotIn('data_type_def', spec) self.assertListEqual(spec['attributes'], self.attributes) self.assertIs(spec, self.attributes[0].parent) self.assertIs(spec, self.attributes[1].parent) json.dumps(spec) def test_constructor_datatype(self): spec = DatasetSpec('my first dataset', 'int', name='dataset1', attributes=self.attributes, linkable=False, data_type_def='EphysData') self.assertEqual(spec['dtype'], 'int') self.assertEqual(spec['name'], 'dataset1') self.assertEqual(spec['doc'], 'my first dataset') self.assertEqual(spec['data_type_def'], 'EphysData') self.assertFalse(spec['linkable']) self.assertListEqual(spec['attributes'], self.attributes) self.assertIs(spec, self.attributes[0].parent) self.assertIs(spec, self.attributes[1].parent) def test_constructor_shape(self): shape = [None, 2] spec = DatasetSpec('my first dataset', 'int', name='dataset1', shape=shape, attributes=self.attributes) self.assertEqual(spec['shape'], shape) self.assertEqual(spec.shape, shape) def test_constructor_invalidate_dtype(self): with self.assertRaises(ValueError): DatasetSpec(doc='my first dataset', dtype='my bad dtype', # <-- Expect AssertionError due to bad type name='dataset1', dims=(None, None), attributes=self.attributes, linkable=False, data_type_def='EphysData') def test_constructor_ref_spec(self): dtype = RefSpec('TimeSeries', 'object') spec = DatasetSpec(doc='my first dataset', dtype=dtype, name='dataset1', dims=(None, None), attributes=self.attributes, linkable=False, data_type_def='EphysData') self.assertDictEqual(spec['dtype'], dtype) def test_datatype_extension(self): base = DatasetSpec('my first dataset', 'int', name='dataset1', attributes=self.attributes, linkable=False, data_type_def='EphysData') attributes = [AttributeSpec('attribute3', 'my first extending attribute', 'float')] ext = DatasetSpec('my first dataset extension', 'int', name='dataset1', attributes=attributes, linkable=False, data_type_inc=base, data_type_def='SpikeData') self.assertDictEqual(ext['attributes'][0], attributes[0]) self.assertDictEqual(ext['attributes'][1], self.attributes[0]) self.assertDictEqual(ext['attributes'][2], self.attributes[1]) ext_attrs = ext.attributes self.assertIs(ext, ext_attrs[0].parent) self.assertIs(ext, ext_attrs[1].parent) self.assertIs(ext, ext_attrs[2].parent) def test_datatype_extension_groupspec(self): '''Test to make sure DatasetSpec catches when a GroupSpec used as data_type_inc''' base = GroupSpec('a fake grop', data_type_def='EphysData') with self.assertRaises(TypeError): DatasetSpec('my first dataset extension', 'int', name='dataset1', data_type_inc=base, data_type_def='SpikeData') def test_constructor_table(self): dtype1 = DtypeSpec('column1', 'the first column', 'int') dtype2 = DtypeSpec('column2', 'the second column', 'float') spec = DatasetSpec('my first table', [dtype1, dtype2], name='table1', attributes=self.attributes) self.assertEqual(spec['dtype'], [dtype1, dtype2]) self.assertEqual(spec['name'], 'table1') self.assertEqual(spec['doc'], 'my first table') self.assertNotIn('linkable', spec) self.assertNotIn('data_type_def', spec) self.assertListEqual(spec['attributes'], self.attributes) self.assertIs(spec, self.attributes[0].parent) self.assertIs(spec, self.attributes[1].parent) json.dumps(spec) def test_constructor_invalid_table(self): with self.assertRaises(ValueError): DatasetSpec('my first table', [DtypeSpec('column1', 'the first column', 'int'), {} # <--- Bad compound type spec must raise an error ], name='table1', attributes=self.attributes) def test_constructor_default_value(self): spec = DatasetSpec(doc='test', default_value=5, dtype='int', data_type_def='test') self.assertEqual(spec.default_value, 5) def test_name_with_incompatible_quantity(self): # Check that we raise an error when the quantity allows more than one instance with a fixed name with self.assertRaises(ValueError): DatasetSpec(doc='my first dataset', dtype='int', name='ds1', quantity='zero_or_many') with self.assertRaises(ValueError): DatasetSpec(doc='my first dataset', dtype='int', name='ds1', quantity='one_or_many') def test_name_with_compatible_quantity(self): # Make sure compatible quantity flags pass when name is fixed DatasetSpec(doc='my first dataset', dtype='int', name='ds1', quantity='zero_or_one') DatasetSpec(doc='my first dataset', dtype='int', name='ds1', quantity=1) def test_datatype_table_extension(self): dtype1 = DtypeSpec('column1', 'the first column', 'int') dtype2 = DtypeSpec('column2', 'the second column', 'float') base = DatasetSpec('my first table', [dtype1, dtype2], attributes=self.attributes, data_type_def='SimpleTable') self.assertEqual(base['dtype'], [dtype1, dtype2]) self.assertEqual(base['doc'], 'my first table') dtype3 = DtypeSpec('column3', 'the third column', 'text') ext = DatasetSpec('my first table extension', [dtype3], data_type_inc=base, data_type_def='ExtendedTable') self.assertEqual(ext['dtype'], [dtype1, dtype2, dtype3]) self.assertEqual(ext['doc'], 'my first table extension') def test_datatype_table_extension_higher_precision(self): dtype1 = DtypeSpec('column1', 'the first column', 'int') dtype2 = DtypeSpec('column2', 'the second column', 'float32') base = DatasetSpec('my first table', [dtype1, dtype2], attributes=self.attributes, data_type_def='SimpleTable') self.assertEqual(base['dtype'], [dtype1, dtype2]) self.assertEqual(base['doc'], 'my first table') dtype3 = DtypeSpec('column2', 'the second column, with greater precision', 'float64') ext = DatasetSpec('my first table extension', [dtype3], data_type_inc=base, data_type_def='ExtendedTable') self.assertEqual(ext['dtype'], [dtype1, dtype3]) self.assertEqual(ext['doc'], 'my first table extension') def test_datatype_table_extension_lower_precision(self): dtype1 = DtypeSpec('column1', 'the first column', 'int') dtype2 = DtypeSpec('column2', 'the second column', 'float64') base = DatasetSpec('my first table', [dtype1, dtype2], attributes=self.attributes, data_type_def='SimpleTable') self.assertEqual(base['dtype'], [dtype1, dtype2]) self.assertEqual(base['doc'], 'my first table') dtype3 = DtypeSpec('column2', 'the second column, with greater precision', 'float32') with self.assertRaisesWith(ValueError, 'Cannot extend float64 to float32'): DatasetSpec('my first table extension', [dtype3], data_type_inc=base, data_type_def='ExtendedTable') def test_datatype_table_extension_diff_format(self): dtype1 = DtypeSpec('column1', 'the first column', 'int') dtype2 = DtypeSpec('column2', 'the second column', 'float64') base = DatasetSpec('my first table', [dtype1, dtype2], attributes=self.attributes, data_type_def='SimpleTable') self.assertEqual(base['dtype'], [dtype1, dtype2]) self.assertEqual(base['doc'], 'my first table') dtype3 = DtypeSpec('column2', 'the second column, with greater precision', 'int32') with self.assertRaisesWith(ValueError, 'Cannot extend float64 to int32'): DatasetSpec('my first table extension', [dtype3], data_type_inc=base, data_type_def='ExtendedTable') ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/spec_tests/test_dtype_spec.py0000644000655200065520000000472400000000000024102 0ustar00circlecicircleci00000000000000from hdmf.spec import DtypeSpec, DtypeHelper, RefSpec from hdmf.testing import TestCase class DtypeSpecHelper(TestCase): def setUp(self): pass def test_recommended_dtypes(self): self.assertListEqual(DtypeHelper.recommended_primary_dtypes, list(DtypeHelper.primary_dtype_synonyms.keys())) def test_valid_primary_dtypes(self): a = set(list(DtypeHelper.primary_dtype_synonyms.keys()) + [vi for v in DtypeHelper.primary_dtype_synonyms.values() for vi in v]) self.assertSetEqual(a, DtypeHelper.valid_primary_dtypes) def test_simplify_cpd_type(self): compound_type = [DtypeSpec('test', 'test field', 'float'), DtypeSpec('test2', 'test field2', 'int')] expected_result = ['float', 'int'] result = DtypeHelper.simplify_cpd_type(compound_type) self.assertListEqual(result, expected_result) class DtypeSpecTests(TestCase): def setUp(self): pass def test_constructor(self): spec = DtypeSpec('column1', 'an example column', 'int') self.assertEqual(spec.doc, 'an example column') self.assertEqual(spec.name, 'column1') self.assertEqual(spec.dtype, 'int') def test_build_spec(self): spec = DtypeSpec.build_spec({'doc': 'an example column', 'name': 'column1', 'dtype': 'int'}) self.assertEqual(spec.doc, 'an example column') self.assertEqual(spec.name, 'column1') self.assertEqual(spec.dtype, 'int') def test_invalid_refspec_dict(self): with self.assertRaises(AssertionError): DtypeSpec.assertValidDtype({'no target': 'test', # <-- missing or here bad target key for RefSpec 'reftype': 'object'}) def test_refspec_dtype(self): # just making sure this does not cause an error DtypeSpec('column1', 'an example column', RefSpec('TimeSeries', 'object')) def test_invalid_dtype(self): with self.assertRaises(AssertionError): DtypeSpec('column1', 'an example column', dtype='bad dtype' # <-- make sure a bad type string raises an error ) def test_is_ref(self): spec = DtypeSpec('column1', 'an example column', RefSpec('TimeSeries', 'object')) self.assertTrue(DtypeSpec.is_ref(spec)) spec = DtypeSpec('column1', 'an example column', 'int') self.assertFalse(DtypeSpec.is_ref(spec)) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/spec_tests/test_group_spec.py0000644000655200065520000002160200000000000024103 0ustar00circlecicircleci00000000000000import json from hdmf.spec import GroupSpec, DatasetSpec, AttributeSpec from hdmf.testing import TestCase class GroupSpecTests(TestCase): def setUp(self): self.attributes = [ AttributeSpec('attribute1', 'my first attribute', 'text'), AttributeSpec('attribute2', 'my second attribute', 'text') ] self.dset1_attributes = [ AttributeSpec('attribute3', 'my third attribute', 'text'), AttributeSpec('attribute4', 'my fourth attribute', 'text') ] self.dset2_attributes = [ AttributeSpec('attribute5', 'my fifth attribute', 'text'), AttributeSpec('attribute6', 'my sixth attribute', 'text') ] self.datasets = [ DatasetSpec('my first dataset', 'int', name='dataset1', attributes=self.dset1_attributes, linkable=True), DatasetSpec('my second dataset', 'int', name='dataset2', attributes=self.dset2_attributes, linkable=True, data_type_def='VoltageArray') ] self.subgroups = [ GroupSpec('A test subgroup', name='subgroup1', linkable=False), GroupSpec('A test subgroup', name='subgroup2', linkable=False) ] self.ndt_attr_spec = AttributeSpec('data_type', 'the data type of this object', 'text', value='EphysData') self.ns_attr_spec = AttributeSpec('namespace', 'the namespace for the data type of this object', 'text', required=False) def test_constructor(self): spec = GroupSpec('A test group', name='root_constructor', groups=self.subgroups, datasets=self.datasets, attributes=self.attributes, linkable=False) self.assertFalse(spec['linkable']) self.assertListEqual(spec['attributes'], self.attributes) self.assertListEqual(spec['datasets'], self.datasets) self.assertNotIn('data_type_def', spec) self.assertIs(spec, self.subgroups[0].parent) self.assertIs(spec, self.subgroups[1].parent) self.assertIs(spec, self.attributes[0].parent) self.assertIs(spec, self.attributes[1].parent) self.assertIs(spec, self.datasets[0].parent) self.assertIs(spec, self.datasets[1].parent) json.dumps(spec) def test_constructor_datatype(self): spec = GroupSpec('A test group', name='root_constructor_datatype', datasets=self.datasets, attributes=self.attributes, linkable=False, data_type_def='EphysData') self.assertFalse(spec['linkable']) self.assertListEqual(spec['attributes'], self.attributes) self.assertListEqual(spec['datasets'], self.datasets) self.assertEqual(spec['data_type_def'], 'EphysData') self.assertIs(spec, self.attributes[0].parent) self.assertIs(spec, self.attributes[1].parent) self.assertIs(spec, self.datasets[0].parent) self.assertIs(spec, self.datasets[1].parent) self.assertEqual(spec.data_type_def, 'EphysData') self.assertIsNone(spec.data_type_inc) json.dumps(spec) def test_set_dataset(self): spec = GroupSpec('A test group', name='root_test_set_dataset', linkable=False, data_type_def='EphysData') spec.set_dataset(self.datasets[0]) self.assertIs(spec, self.datasets[0].parent) def test_set_group(self): spec = GroupSpec('A test group', name='root_test_set_group', linkable=False, data_type_def='EphysData') spec.set_group(self.subgroups[0]) spec.set_group(self.subgroups[1]) self.assertListEqual(spec['groups'], self.subgroups) self.assertIs(spec, self.subgroups[0].parent) self.assertIs(spec, self.subgroups[1].parent) json.dumps(spec) def test_type_extension(self): spec = GroupSpec('A test group', name='parent_type', datasets=self.datasets, attributes=self.attributes, linkable=False, data_type_def='EphysData') dset1_attributes_ext = [ AttributeSpec('dset1_extra_attribute', 'an extra attribute for the first dataset', 'text') ] ext_datasets = [ DatasetSpec('my first dataset extension', 'int', name='dataset1', attributes=dset1_attributes_ext, linkable=True), ] ext_attributes = [ AttributeSpec('ext_extra_attribute', 'an extra attribute for the group', 'text'), ] ext = GroupSpec('A test group extension', name='child_type', datasets=ext_datasets, attributes=ext_attributes, linkable=False, data_type_inc=spec, data_type_def='SpikeData') ext_dset1 = ext.get_dataset('dataset1') ext_dset1_attrs = ext_dset1.attributes self.assertDictEqual(ext_dset1_attrs[0], dset1_attributes_ext[0]) self.assertDictEqual(ext_dset1_attrs[1], self.dset1_attributes[0]) self.assertDictEqual(ext_dset1_attrs[2], self.dset1_attributes[1]) self.assertEqual(ext.data_type_def, 'SpikeData') self.assertEqual(ext.data_type_inc, 'EphysData') ext_dset2 = ext.get_dataset('dataset2') self.maxDiff = None # this will suffice for now, assertDictEqual doesn't do deep equality checks self.assertEqual(str(ext_dset2), str(self.datasets[1])) self.assertAttributesEqual(ext_dset2, self.datasets[1]) # self.ns_attr_spec ndt_attr_spec = AttributeSpec('data_type', 'the data type of this object', # noqa: F841 'text', value='SpikeData') res_attrs = ext.attributes self.assertDictEqual(res_attrs[0], ext_attributes[0]) self.assertDictEqual(res_attrs[1], self.attributes[0]) self.assertDictEqual(res_attrs[2], self.attributes[1]) # test that inherited specs are tracked appropriate for d in self.datasets: with self.subTest(dataset=d.name): self.assertTrue(ext.is_inherited_spec(d)) self.assertFalse(spec.is_inherited_spec(d)) json.dumps(spec) def assertDatasetsEqual(self, spec1, spec2): spec1_dsets = spec1.datasets spec2_dsets = spec2.datasets if len(spec1_dsets) != len(spec2_dsets): raise AssertionError('different number of AttributeSpecs') else: for i in range(len(spec1_dsets)): self.assertAttributesEqual(spec1_dsets[i], spec2_dsets[i]) def assertAttributesEqual(self, spec1, spec2): spec1_attr = spec1.attributes spec2_attr = spec2.attributes if len(spec1_attr) != len(spec2_attr): raise AssertionError('different number of AttributeSpecs') else: for i in range(len(spec1_attr)): self.assertDictEqual(spec1_attr[i], spec2_attr[i]) def test_add_attribute(self): spec = GroupSpec('A test group', name='root_constructor', groups=self.subgroups, datasets=self.datasets, linkable=False) for attrspec in self.attributes: spec.add_attribute(**attrspec) self.assertListEqual(spec['attributes'], self.attributes) self.assertListEqual(spec['datasets'], self.datasets) self.assertNotIn('data_type_def', spec) self.assertIs(spec, self.subgroups[0].parent) self.assertIs(spec, self.subgroups[1].parent) self.assertIs(spec, spec.attributes[0].parent) self.assertIs(spec, spec.attributes[1].parent) self.assertIs(spec, self.datasets[0].parent) self.assertIs(spec, self.datasets[1].parent) json.dumps(spec) def test_update_attribute_spec(self): spec = GroupSpec('A test group', name='root_constructor', attributes=[AttributeSpec('attribute1', 'my first attribute', 'text'), ]) spec.set_attribute(AttributeSpec('attribute1', 'my first attribute', 'int', value=5)) res = spec.get_attribute('attribute1') self.assertEqual(res.value, 5) self.assertEqual(res.dtype, 'int') ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/spec_tests/test_load_namespace.py0000644000655200065520000001167000000000000024674 0ustar00circlecicircleci00000000000000import ruamel.yaml as yaml import json import os from hdmf.spec import AttributeSpec, DatasetSpec, GroupSpec, SpecNamespace, NamespaceCatalog from hdmf.testing import TestCase class TestSpecLoad(TestCase): NS_NAME = 'test_ns' def setUp(self): self.attributes = [ AttributeSpec('attribute1', 'my first attribute', 'text'), AttributeSpec('attribute2', 'my second attribute', 'text') ] self.dset1_attributes = [ AttributeSpec('attribute3', 'my third attribute', 'text'), AttributeSpec('attribute4', 'my fourth attribute', 'text') ] self.dset2_attributes = [ AttributeSpec('attribute5', 'my fifth attribute', 'text'), AttributeSpec('attribute6', 'my sixth attribute', 'text') ] self.datasets = [ DatasetSpec('my first dataset', 'int', name='dataset1', attributes=self.dset1_attributes, linkable=True), DatasetSpec('my second dataset', 'int', name='dataset2', dims=(None, None), attributes=self.dset2_attributes, linkable=True, data_type_def='VoltageArray') ] self.spec = GroupSpec('A test group', name='root_constructor_datatype', datasets=self.datasets, attributes=self.attributes, linkable=False, data_type_def='EphysData') dset1_attributes_ext = [ AttributeSpec('dset1_extra_attribute', 'an extra attribute for the first dataset', 'text') ] self.ext_datasets = [ DatasetSpec('my first dataset extension', 'int', name='dataset1', attributes=dset1_attributes_ext, linkable=True), ] self.ext_attributes = [ AttributeSpec('ext_extra_attribute', 'an extra attribute for the group', 'text'), ] self.ext_spec = GroupSpec('A test group extension', name='root_constructor_datatype', datasets=self.ext_datasets, attributes=self.ext_attributes, linkable=False, data_type_inc='EphysData', data_type_def='SpikeData') to_dump = {'groups': [self.spec, self.ext_spec]} self.specs_path = 'test_load_namespace.specs.yaml' self.namespace_path = 'test_load_namespace.namespace.yaml' with open(self.specs_path, 'w') as tmp: yaml.safe_dump(json.loads(json.dumps(to_dump)), tmp, default_flow_style=False) ns_dict = { 'doc': 'a test namespace', 'name': self.NS_NAME, 'schema': [ {'source': self.specs_path} ], 'version': '0.1.0' } self.namespace = SpecNamespace.build_namespace(**ns_dict) to_dump = {'namespaces': [self.namespace]} with open(self.namespace_path, 'w') as tmp: yaml.safe_dump(json.loads(json.dumps(to_dump)), tmp, default_flow_style=False) self.ns_catalog = NamespaceCatalog() def tearDown(self): if os.path.exists(self.namespace_path): os.remove(self.namespace_path) if os.path.exists(self.specs_path): os.remove(self.specs_path) def test_inherited_attributes(self): self.ns_catalog.load_namespaces(self.namespace_path, resolve=True) ts_spec = self.ns_catalog.get_spec(self.NS_NAME, 'EphysData') es_spec = self.ns_catalog.get_spec(self.NS_NAME, 'SpikeData') ts_attrs = {s.name for s in ts_spec.attributes} es_attrs = {s.name for s in es_spec.attributes} for attr in ts_attrs: with self.subTest(attr=attr): self.assertIn(attr, es_attrs) # self.assertSetEqual(ts_attrs, es_attrs) ts_dsets = {s.name for s in ts_spec.datasets} es_dsets = {s.name for s in es_spec.datasets} for dset in ts_dsets: with self.subTest(dset=dset): self.assertIn(dset, es_dsets) # self.assertSetEqual(ts_dsets, es_dsets) def test_inherited_attributes_not_resolved(self): self.ns_catalog.load_namespaces(self.namespace_path, resolve=False) es_spec = self.ns_catalog.get_spec(self.NS_NAME, 'SpikeData') src_attrs = {s.name for s in self.ext_attributes} ext_attrs = {s.name for s in es_spec.attributes} self.assertSetEqual(src_attrs, ext_attrs) src_dsets = {s.name for s in self.ext_datasets} ext_dsets = {s.name for s in es_spec.datasets} self.assertSetEqual(src_dsets, ext_dsets) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/spec_tests/test_ref_spec.py0000644000655200065520000000127600000000000023530 0ustar00circlecicircleci00000000000000import json from hdmf.spec import RefSpec from hdmf.testing import TestCase class RefSpecTests(TestCase): def test_constructor(self): spec = RefSpec('TimeSeries', 'object') self.assertEqual(spec.target_type, 'TimeSeries') self.assertEqual(spec.reftype, 'object') json.dumps(spec) # to ensure there are no circular links def test_wrong_reference_type(self): with self.assertRaises(ValueError): RefSpec('TimeSeries', 'unknownreftype') def test_isregion(self): spec = RefSpec('TimeSeries', 'object') self.assertFalse(spec.is_region()) spec = RefSpec('Data', 'region') self.assertTrue(spec.is_region()) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/spec_tests/test_spec_catalog.py0000644000655200065520000002070100000000000024360 0ustar00circlecicircleci00000000000000import copy from hdmf.spec import GroupSpec, DatasetSpec, AttributeSpec, SpecCatalog from hdmf.testing import TestCase class SpecCatalogTest(TestCase): def setUp(self): self.catalog = SpecCatalog() self.attributes = [ AttributeSpec('attribute1', 'my first attribute', 'text'), AttributeSpec('attribute2', 'my second attribute', 'text') ] self.spec = DatasetSpec('my first dataset', 'int', name='dataset1', dims=(None, None), attributes=self.attributes, linkable=False, data_type_def='EphysData') def test_register_spec(self): self.catalog.register_spec(self.spec, 'test.yaml') result = self.catalog.get_spec('EphysData') self.assertIs(result, self.spec) def test_hierarchy(self): spikes_spec = DatasetSpec('my extending dataset', 'int', data_type_inc='EphysData', data_type_def='SpikeData') lfp_spec = DatasetSpec('my second extending dataset', 'int', data_type_inc='EphysData', data_type_def='LFPData') self.catalog.register_spec(self.spec, 'test.yaml') self.catalog.register_spec(spikes_spec, 'test.yaml') self.catalog.register_spec(lfp_spec, 'test.yaml') spike_hierarchy = self.catalog.get_hierarchy('SpikeData') lfp_hierarchy = self.catalog.get_hierarchy('LFPData') ephys_hierarchy = self.catalog.get_hierarchy('EphysData') self.assertTupleEqual(spike_hierarchy, ('SpikeData', 'EphysData')) self.assertTupleEqual(lfp_hierarchy, ('LFPData', 'EphysData')) self.assertTupleEqual(ephys_hierarchy, ('EphysData',)) def test_subtypes(self): """ -BaseContainer--+-->AContainer--->ADContainer | +-->BContainer """ base_spec = GroupSpec(doc='Base container', data_type_def='BaseContainer') acontainer = GroupSpec(doc='AContainer', data_type_inc='BaseContainer', data_type_def='AContainer') adcontainer = GroupSpec(doc='ADContainer', data_type_inc='AContainer', data_type_def='ADContainer') bcontainer = GroupSpec(doc='BContainer', data_type_inc='BaseContainer', data_type_def='BContainer') self.catalog.register_spec(base_spec, 'test.yaml') self.catalog.register_spec(acontainer, 'test.yaml') self.catalog.register_spec(adcontainer, 'test.yaml') self.catalog.register_spec(bcontainer, 'test.yaml') base_spec_subtypes = self.catalog.get_subtypes('BaseContainer') base_spec_subtypes = tuple(sorted(base_spec_subtypes)) # Sort so we have a guaranteed order for comparison acontainer_subtypes = self.catalog.get_subtypes('AContainer') bcontainer_substypes = self.catalog.get_subtypes('BContainer') adcontainer_subtypes = self.catalog.get_subtypes('ADContainer') self.assertTupleEqual(adcontainer_subtypes, ()) self.assertTupleEqual(bcontainer_substypes, ()) self.assertTupleEqual(acontainer_subtypes, ('ADContainer',)) self.assertTupleEqual(base_spec_subtypes, ('AContainer', 'ADContainer', 'BContainer')) def test_subtypes_norecursion(self): """ -BaseContainer--+-->AContainer--->ADContainer | +-->BContainer """ base_spec = GroupSpec(doc='Base container', data_type_def='BaseContainer') acontainer = GroupSpec(doc='AContainer', data_type_inc='BaseContainer', data_type_def='AContainer') adcontainer = GroupSpec(doc='ADContainer', data_type_inc='AContainer', data_type_def='ADContainer') bcontainer = GroupSpec(doc='BContainer', data_type_inc='BaseContainer', data_type_def='BContainer') self.catalog.register_spec(base_spec, 'test.yaml') self.catalog.register_spec(acontainer, 'test.yaml') self.catalog.register_spec(adcontainer, 'test.yaml') self.catalog.register_spec(bcontainer, 'test.yaml') base_spec_subtypes = self.catalog.get_subtypes('BaseContainer', recursive=False) base_spec_subtypes = tuple(sorted(base_spec_subtypes)) # Sort so we have a guaranteed order for comparison acontainer_subtypes = self.catalog.get_subtypes('AContainer', recursive=False) bcontainer_substypes = self.catalog.get_subtypes('BContainer', recursive=False) adcontainer_subtypes = self.catalog.get_subtypes('ADContainer', recursive=False) self.assertTupleEqual(adcontainer_subtypes, ()) self.assertTupleEqual(bcontainer_substypes, ()) self.assertTupleEqual(acontainer_subtypes, ('ADContainer',)) self.assertTupleEqual(base_spec_subtypes, ('AContainer', 'BContainer')) def test_subtypes_unknown_type(self): subtypes_of_bad_type = self.catalog.get_subtypes('UnknownType') self.assertTupleEqual(subtypes_of_bad_type, ()) def test_get_spec_source_file(self): spikes_spec = GroupSpec('test group', data_type_def='SpikeData') source_file_path = '/test/myt/test.yaml' self.catalog.auto_register(spikes_spec, source_file_path) recorded_source_file_path = self.catalog.get_spec_source_file('SpikeData') self.assertEqual(recorded_source_file_path, source_file_path) def test_get_full_hierarchy(self): """ BaseContainer--+-->AContainer--->ADContainer | +-->BContainer Expected output: >> print(json.dumps(full_hierarchy, indent=4)) >> { >> "BaseContainer": { >> "AContainer": { >> "ADContainer": {} >> }, >> "BContainer": {} >> } """ base_spec = GroupSpec(doc='Base container', data_type_def='BaseContainer') acontainer = GroupSpec(doc='AContainer', data_type_inc='BaseContainer', data_type_def='AContainer') adcontainer = GroupSpec(doc='ADContainer', data_type_inc='AContainer', data_type_def='ADContainer') bcontainer = GroupSpec(doc='BContainer', data_type_inc='BaseContainer', data_type_def='BContainer') self.catalog.register_spec(base_spec, 'test.yaml') self.catalog.register_spec(acontainer, 'test.yaml') self.catalog.register_spec(adcontainer, 'test.yaml') self.catalog.register_spec(bcontainer, 'test.yaml') full_hierarchy = self.catalog.get_full_hierarchy() expected_hierarchy = { "BaseContainer": { "AContainer": { "ADContainer": {} }, "BContainer": {} } } self.assertDictEqual(full_hierarchy, expected_hierarchy) def test_copy_spec_catalog(self): # Register the spec first self.catalog.register_spec(self.spec, 'test.yaml') result = self.catalog.get_spec('EphysData') self.assertIs(result, self.spec) # Now test the copy re = copy.copy(self.catalog) self.assertTupleEqual(self.catalog.get_registered_types(), re.get_registered_types()) def test_deepcopy_spec_catalog(self): # Register the spec first self.catalog.register_spec(self.spec, 'test.yaml') result = self.catalog.get_spec('EphysData') self.assertIs(result, self.spec) # Now test the copy re = copy.deepcopy(self.catalog) self.assertTupleEqual(self.catalog.get_registered_types(), re.get_registered_types()) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/spec_tests/test_spec_write.py0000644000655200065520000001743200000000000024107 0ustar00circlecicircleci00000000000000import os import datetime from hdmf.spec.write import NamespaceBuilder, YAMLSpecWriter, export_spec from hdmf.spec.namespace import SpecNamespace, NamespaceCatalog from hdmf.spec.spec import GroupSpec from hdmf.testing import TestCase class TestSpec(TestCase): def setUp(self): # create a builder for the namespace self.ns_name = "mylab" self.date = datetime.datetime.now() self.ns_builder = NamespaceBuilder(doc="mydoc", name=self.ns_name, full_name="My Laboratory", version="0.0.1", author="foo", contact="foo@bar.com", namespace_cls=SpecNamespace, date=self.date) # create extensions ext1 = GroupSpec('A custom DataSeries interface', attributes=[], datasets=[], groups=[], data_type_inc=None, data_type_def='MyDataSeries') ext2 = GroupSpec('An extension of a DataSeries interface', attributes=[], datasets=[], groups=[], data_type_inc='MyDataSeries', data_type_def='MyExtendedMyDataSeries') ext2.add_dataset(doc='test', dtype='float', name='testdata') self.data_types = [ext1, ext2] # add the extension self.ext_source_path = 'mylab.extensions.yaml' self.namespace_path = 'mylab.namespace.yaml' def _test_extensions_file(self): with open(self.ext_source_path, 'r') as file: match_str = \ """groups: - data_type_def: MyDataSeries doc: A custom DataSeries interface - data_type_def: MyExtendedMyDataSeries data_type_inc: MyDataSeries doc: An extension of a DataSeries interface datasets: - name: testdata dtype: float doc: test """ # noqa: E128 nsstr = file.read() self.assertEqual(nsstr, match_str) class TestNamespaceBuilder(TestSpec): NS_NAME = 'test_ns' def setUp(self): super().setUp() for data_type in self.data_types: self.ns_builder.add_spec(source=self.ext_source_path, spec=data_type) self.ns_builder.add_source(source=self.ext_source_path, doc='Extensions for my lab', title='My lab extensions') self.ns_builder.export(self.namespace_path) def tearDown(self): if os.path.exists(self.ext_source_path): os.remove(self.ext_source_path) if os.path.exists(self.namespace_path): os.remove(self.namespace_path) def test_export_namespace(self): self._test_namespace_file() self._test_extensions_file() def _test_namespace_file(self): with open(self.namespace_path, 'r') as file: match_str = \ """namespaces: - author: foo contact: foo@bar.com date: '%s' doc: mydoc full_name: My Laboratory name: mylab schema: - doc: Extensions for my lab source: mylab.extensions.yaml title: Extensions for my lab version: 0.0.1 """ % self.date.isoformat() # noqa: E128 nsstr = file.read() self.assertEqual(nsstr, match_str) def test_read_namespace(self): ns_catalog = NamespaceCatalog() ns_catalog.load_namespaces(self.namespace_path, resolve=True) loaded_ns = ns_catalog.get_namespace(self.ns_name) self.assertEqual(loaded_ns.doc, "mydoc") self.assertEqual(loaded_ns.author, "foo") self.assertEqual(loaded_ns.contact, "foo@bar.com") self.assertEqual(loaded_ns.full_name, "My Laboratory") self.assertEqual(loaded_ns.name, "mylab") self.assertEqual(loaded_ns.date, self.date.isoformat()) self.assertDictEqual(loaded_ns.schema[0], {'doc': 'Extensions for my lab', 'source': 'mylab.extensions.yaml', 'title': 'Extensions for my lab'}) self.assertEqual(loaded_ns.version, "0.0.1") def test_get_source_files(self): ns_catalog = NamespaceCatalog() ns_catalog.load_namespaces(self.namespace_path, resolve=True) loaded_ns = ns_catalog.get_namespace(self.ns_name) self.assertListEqual(loaded_ns.get_source_files(), ['mylab.extensions.yaml']) def test_get_source_description(self): ns_catalog = NamespaceCatalog() ns_catalog.load_namespaces(self.namespace_path, resolve=True) loaded_ns = ns_catalog.get_namespace(self.ns_name) descr = loaded_ns.get_source_description('mylab.extensions.yaml') self.assertDictEqual(descr, {'doc': 'Extensions for my lab', 'source': 'mylab.extensions.yaml', 'title': 'Extensions for my lab'}) class TestYAMLSpecWrite(TestSpec): def setUp(self): super().setUp() for data_type in self.data_types: self.ns_builder.add_spec(source=self.ext_source_path, spec=data_type) self.ns_builder.add_source(source=self.ext_source_path, doc='Extensions for my lab', title='My lab extensions') def tearDown(self): if os.path.exists(self.ext_source_path): os.remove(self.ext_source_path) if os.path.exists(self.namespace_path): os.remove(self.namespace_path) def test_init(self): temp = YAMLSpecWriter('.') self.assertEqual(temp._YAMLSpecWriter__outdir, '.') def test_write_namespace(self): temp = YAMLSpecWriter() self.ns_builder.export(self.namespace_path, writer=temp) self._test_namespace_file() self._test_extensions_file() def test_get_name(self): self.assertEqual(self.ns_name, self.ns_builder.name) def _test_namespace_file(self): with open(self.namespace_path, 'r') as file: match_str = \ """namespaces: - author: foo contact: foo@bar.com date: '%s' doc: mydoc full_name: My Laboratory name: mylab schema: - doc: Extensions for my lab source: mylab.extensions.yaml title: Extensions for my lab version: 0.0.1 """ % self.date.isoformat() # noqa: E128 nsstr = file.read() self.assertEqual(nsstr, match_str) class TestExportSpec(TestSpec): def test_export(self): export_spec(self.ns_builder, self.data_types, '.') self._test_namespace_file() self._test_extensions_file() def tearDown(self): if os.path.exists(self.ext_source_path): os.remove(self.ext_source_path) if os.path.exists(self.namespace_path): os.remove(self.namespace_path) def _test_namespace_file(self): with open(self.namespace_path, 'r') as nsfile: nsstr = nsfile.read() match_str = \ """namespaces: - author: foo contact: foo@bar.com date: '%s' doc: mydoc full_name: My Laboratory name: mylab schema: - source: mylab.extensions.yaml version: 0.0.1 """ % self.date.isoformat() # noqa: E128 self.assertEqual(nsstr, match_str) def test_missing_data_types(self): with self.assertWarnsWith(UserWarning, 'No data types specified. Exiting.'): export_spec(self.ns_builder, [], '.') def test_missing_name(self): self.ns_builder._NamespaceBuilder__ns_args['name'] = None with self.assertRaisesWith(RuntimeError, 'Namespace name is required to export specs'): export_spec(self.ns_builder, self.data_types, '.') ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/test_container.py0000644000655200065520000001304400000000000021544 0ustar00circlecicircleci00000000000000import numpy as np from hdmf.container import AbstractContainer, Container, Data from hdmf.testing import TestCase class Subcontainer(Container): pass class TestContainer(TestCase): def test_constructor(self): """Test that constructor properly sets parent and both child and parent have an object_id """ parent_obj = Container('obj1') child_obj = Container.__new__(Container, parent=parent_obj) self.assertIs(child_obj.parent, parent_obj) self.assertIs(parent_obj.children[0], child_obj) self.assertIsNotNone(parent_obj.object_id) self.assertIsNotNone(child_obj.object_id) def test_constructor_object_id_none(self): """Test that setting object_id to None in __new__ is OK and the object ID is set on get """ parent_obj = Container('obj1') child_obj = Container.__new__(Container, parent=parent_obj, object_id=None) self.assertIsNotNone(child_obj.object_id) def test_set_parent(self): """Test that parent setter properly sets parent """ parent_obj = Container('obj1') child_obj = Container('obj2') child_obj.parent = parent_obj self.assertIs(child_obj.parent, parent_obj) self.assertIs(parent_obj.children[0], child_obj) def test_set_parent_overwrite(self): """Test that parent setter properly blocks overwriting """ parent_obj = Container('obj1') child_obj = Container('obj2') child_obj.parent = parent_obj self.assertIs(parent_obj.children[0], child_obj) another_obj = Container('obj3') with self.assertRaisesWith(ValueError, 'Cannot reassign parent to Container: %s. Parent is already: %s.' % (repr(child_obj), repr(child_obj.parent))): child_obj.parent = another_obj self.assertIs(child_obj.parent, parent_obj) self.assertIs(parent_obj.children[0], child_obj) def test_set_parent_overwrite_proxy(self): """Test that parent setter properly blocks overwriting with proxy/object """ child_obj = Container('obj2') child_obj.parent = object() with self.assertRaisesRegex(ValueError, r"Got None for parent of '[^/]+' - cannot overwrite Proxy with NoneType"): child_obj.parent = None def test_slash_restriction(self): self.assertRaises(ValueError, Container, 'bad/name') def test_set_modified_parent(self): """Test that set modified properly sets parent modified """ parent_obj = Container('obj1') child_obj = Container('obj2') child_obj.parent = parent_obj parent_obj.set_modified(False) child_obj.set_modified(False) self.assertFalse(child_obj.parent.modified) child_obj.set_modified() self.assertTrue(child_obj.parent.modified) def test_add_child(self): """Test that add child creates deprecation warning and also properly sets child's parent and modified """ parent_obj = Container('obj1') child_obj = Container('obj2') parent_obj.set_modified(False) with self.assertWarnsWith(DeprecationWarning, 'add_child is deprecated. Set the parent attribute instead.'): parent_obj.add_child(child_obj) self.assertIs(child_obj.parent, parent_obj) self.assertTrue(parent_obj.modified) self.assertIs(parent_obj.children[0], child_obj) def test_set_parent_exists(self): """Test that setting a parent a second time does nothing """ parent_obj = Container('obj1') child_obj = Container('obj2') child_obj3 = Container('obj3') child_obj.parent = parent_obj child_obj.parent = parent_obj child_obj3.parent = parent_obj self.assertEqual(len(parent_obj.children), 2) self.assertIs(parent_obj.children[0], child_obj) self.assertIs(parent_obj.children[1], child_obj3) def test_reassign_container_source(self): """Test that reassign container source throws error """ parent_obj = Container('obj1') parent_obj.container_source = 'a source' with self.assertRaisesWith(Exception, 'cannot reassign container_source'): parent_obj.container_source = 'some other source' def test_repr(self): parent_obj = Container('obj1') self.assertRegex(str(parent_obj), r"obj1 hdmf.container.Container at 0x\d+") def test_type_hierarchy(self): self.assertEqual(Container.type_hierarchy(), (Container, AbstractContainer, object)) self.assertEqual(Subcontainer.type_hierarchy(), (Subcontainer, Container, AbstractContainer, object)) class TestData(TestCase): def test_bool_true(self): """Test that __bool__ method works correctly on data with len """ data_obj = Data('my_data', [1, 2, 3, 4, 5]) self.assertTrue(data_obj) def test_bool_false(self): """Test that __bool__ method works correctly on empty data """ data_obj = Data('my_data', []) self.assertFalse(data_obj) def test_shape_nparray(self): """ Test that shape works for np.array """ data_obj = Data('my_data', np.arange(10).reshape(2, 5)) self.assertTupleEqual(data_obj.shape, (2, 5)) def test_shape_list(self): """ Test that shape works for np.array """ data_obj = Data('my_data', [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]) self.assertTupleEqual(data_obj.shape, (2, 5)) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/test_io_hdf5.py0000644000655200065520000002216000000000000021076 0ustar00circlecicircleci00000000000000import os from h5py import File, Dataset, Reference from numbers import Number import json import numpy as np from hdmf.backends.hdf5 import HDF5IO from hdmf.build import GroupBuilder, DatasetBuilder, LinkBuilder from hdmf.utils import get_data_shape from hdmf.testing import TestCase from tests.unit.utils import Foo from tests.unit.test_io_hdf5_h5tools import _get_manager class HDF5Encoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, Dataset): ret = None for t in (list, str): try: ret = t(obj) break except: # noqa: F722 pass if ret is None: return obj else: return ret elif isinstance(obj, np.int64): return int(obj) elif isinstance(obj, bytes): return str(obj) return json.JSONEncoder.default(self, obj) class GroupBuilderTestCase(TestCase): ''' A TestCase class for comparing GroupBuilders. ''' def __is_scalar(self, obj): if hasattr(obj, 'shape'): return len(obj.shape) == 0 else: if any(isinstance(obj, t) for t in (int, str, float, bytes, str)): return True return False def __convert_h5_scalar(self, obj): if isinstance(obj, Dataset): return obj[...] return obj def __compare_attr_dicts(self, a, b): reasons = list() b_keys = set(b.keys()) for k in a: if k not in b: reasons.append("'%s' attribute missing from second dataset" % k) else: if a[k] != b[k]: reasons.append("'%s' attribute on datasets not equal" % k) b_keys.remove(k) for k in b_keys: reasons.append("'%s' attribute missing from first dataset" % k) return reasons def __compare_dataset(self, a, b): reasons = self.__compare_attr_dicts(a.attributes, b.attributes) if not self.__compare_data(a.data, b.data): reasons.append("dataset '%s' not equal" % a.name) return reasons def __compare_data(self, a, b): if isinstance(a, Number) and isinstance(b, Number): return a == b elif isinstance(a, Number) != isinstance(b, Number): return False else: a_scalar = self.__is_scalar(a) b_scalar = self.__is_scalar(b) if a_scalar and b_scalar: return self.__convert_h5_scalar(a_scalar) == self.__convert_h5_scalar(b_scalar) elif a_scalar != b_scalar: return False if len(a) == len(b): for i in range(len(a)): if not self.__compare_data(a[i], b[i]): return False else: return False return True def __fmt(self, val): return "%s (%s)" % (val, type(val)) def __assert_helper(self, a, b): reasons = list() b_keys = set(b.keys()) for k, a_sub in a.items(): if k in b: b_sub = b[k] b_keys.remove(k) if isinstance(a_sub, LinkBuilder) and isinstance(a_sub, LinkBuilder): a_sub = a_sub['builder'] b_sub = b_sub['builder'] elif isinstance(a_sub, LinkBuilder) != isinstance(a_sub, LinkBuilder): reasons.append('%s != %s' % (a_sub, b_sub)) if isinstance(a_sub, DatasetBuilder) and isinstance(a_sub, DatasetBuilder): # if not self.__compare_dataset(a_sub, b_sub): # reasons.append('%s != %s' % (a_sub, b_sub)) reasons.extend(self.__compare_dataset(a_sub, b_sub)) elif isinstance(a_sub, GroupBuilder) and isinstance(a_sub, GroupBuilder): reasons.extend(self.__assert_helper(a_sub, b_sub)) else: equal = None a_array = isinstance(a_sub, np.ndarray) b_array = isinstance(b_sub, np.ndarray) if a_array and b_array: equal = np.array_equal(a_sub, b_sub) elif a_array or b_array: # if strings, convert before comparing if b_array: if b_sub.dtype.char in ('S', 'U'): a_sub = [np.string_(s) for s in a_sub] else: if a_sub.dtype.char in ('S', 'U'): b_sub = [np.string_(s) for s in b_sub] equal = np.array_equal(a_sub, b_sub) else: equal = a_sub == b_sub if not equal: reasons.append('%s != %s' % (self.__fmt(a_sub), self.__fmt(b_sub))) else: reasons.append("'%s' not in both" % k) for k in b_keys: reasons.append("'%s' not in both" % k) return reasons def assertBuilderEqual(self, a, b): ''' Tests that two GroupBuilders are equal ''' reasons = self.__assert_helper(a, b) if len(reasons): raise AssertionError(', '.join(reasons)) return True class TestHDF5Writer(GroupBuilderTestCase): def setUp(self): self.manager = _get_manager() self.path = "test_io_hdf5.h5" self.foo_builder = GroupBuilder('foo1', attributes={'data_type': 'Foo', 'namespace': 'test_core', 'attr1': "bar", 'object_id': -1}, datasets={'my_data': DatasetBuilder('my_data', list(range(100, 200, 10)), attributes={'attr2': 17})}) self.foo = Foo('foo1', list(range(100, 200, 10)), attr1="bar", attr2=17, attr3=3.14) self.manager.prebuilt(self.foo, self.foo_builder) self.builder = GroupBuilder( 'root', source=self.path, groups={'test_bucket': GroupBuilder('test_bucket', groups={'foo_holder': GroupBuilder('foo_holder', groups={'foo1': self.foo_builder})})}, attributes={'data_type': 'FooFile'}) def tearDown(self): if os.path.exists(self.path): os.remove(self.path) def check_fields(self): f = File(self.path, 'r') self.assertIn('test_bucket', f) bucket = f.get('test_bucket') self.assertIn('foo_holder', bucket) holder = bucket.get('foo_holder') self.assertIn('foo1', holder) return f def test_write_builder(self): writer = HDF5IO(self.path, manager=self.manager, mode='a') writer.write_builder(self.builder) writer.close() self.check_fields() def test_write_attribute_reference_container(self): writer = HDF5IO(self.path, manager=self.manager, mode='a') self.builder.set_attribute('ref_attribute', self.foo) writer.write_builder(self.builder) writer.close() f = self.check_fields() self.assertIsInstance(f.attrs['ref_attribute'], Reference) self.assertEqual(f['test_bucket/foo_holder/foo1'], f[f.attrs['ref_attribute']]) def test_write_attribute_reference_builder(self): writer = HDF5IO(self.path, manager=self.manager, mode='a') self.builder.set_attribute('ref_attribute', self.foo_builder) writer.write_builder(self.builder) writer.close() f = self.check_fields() self.assertIsInstance(f.attrs['ref_attribute'], Reference) self.assertEqual(f['test_bucket/foo_holder/foo1'], f[f.attrs['ref_attribute']]) def test_write_context_manager(self): with HDF5IO(self.path, manager=self.manager, mode='a') as writer: writer.write_builder(self.builder) self.check_fields() def test_read_builder(self): self.maxDiff = None io = HDF5IO(self.path, manager=self.manager, mode='a') io.write_builder(self.builder) builder = io.read_builder() self.assertBuilderEqual(builder, self.builder) io.close() def test_overwrite_written(self): self.maxDiff = None io = HDF5IO(self.path, manager=self.manager, mode='a') io.write_builder(self.builder) builder = io.read_builder() with self.assertRaisesWith(ValueError, "cannot change written to not written"): builder.written = False io.close() def test_dataset_shape(self): self.maxDiff = None io = HDF5IO(self.path, manager=self.manager, mode='a') io.write_builder(self.builder) builder = io.read_builder() dset = builder['test_bucket']['foo_holder']['foo1']['my_data'].data self.assertEqual(get_data_shape(dset), (10,)) io.close() ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/test_io_hdf5_h5tools.py0000644000655200065520000016362700000000000022571 0ustar00circlecicircleci00000000000000import os import unittest import tempfile import warnings import numpy as np from hdmf.utils import docval, getargs from hdmf.data_utils import DataChunkIterator, InvalidDataIOError from hdmf.backends.hdf5.h5tools import HDF5IO, ROOT_NAME from hdmf.backends.hdf5 import H5DataIO from hdmf.backends.io import UnsupportedOperation from hdmf.build import GroupBuilder, DatasetBuilder, BuildManager, TypeMap, ObjectMapper from hdmf.spec.namespace import NamespaceCatalog from hdmf.spec.spec import AttributeSpec, DatasetSpec, GroupSpec, ZERO_OR_MANY, ONE_OR_MANY from hdmf.spec.namespace import SpecNamespace from hdmf.spec.catalog import SpecCatalog from hdmf.container import Container from hdmf.testing import TestCase from h5py import SoftLink, HardLink, ExternalLink, File from h5py import filters as h5py_filters from tests.unit.utils import Foo, FooBucket, CORE_NAMESPACE class FooFile(Container): @docval({'name': 'buckets', 'type': list, 'doc': 'the FooBuckets in this file', 'default': list()}) def __init__(self, **kwargs): buckets = getargs('buckets', kwargs) super().__init__(name=ROOT_NAME) # name is not used - FooFile should be the root container self.__buckets = buckets for f in self.__buckets: f.parent = self def __eq__(self, other): return set(self.buckets) == set(other.buckets) def __str__(self): foo_str = "[" + ",".join(str(f) for f in self.buckets) + "]" return 'buckets=%s' % foo_str @property def buckets(self): return self.__buckets def get_temp_filepath(): # On Windows, h5py cannot truncate an open file in write mode. # The temp file will be closed before h5py truncates it and will be removed during the tearDown step. temp_file = tempfile.NamedTemporaryFile() temp_file.close() return temp_file.name class H5IOTest(TestCase): """Tests for h5tools IO tools""" def setUp(self): self.path = get_temp_filepath() self.io = HDF5IO(self.path, mode='a') self.f = self.io._file def tearDown(self): self.io.close() os.remove(self.path) ########################################## # __chunked_iter_fill__(...) tests ########################################## def test__chunked_iter_fill(self): """Matrix test of HDF5IO.__chunked_iter_fill__ using a DataChunkIterator with different parameters""" data_opts = {'iterator': range(10), 'numpy': np.arange(30).reshape(5, 2, 3), 'list': np.arange(30).reshape(5, 2, 3).tolist(), 'sparselist1': [1, 2, 3, None, None, None, None, 8, 9, 10], 'sparselist2': [None, None, 3], 'sparselist3': [1, 2, 3, None, None], # note: cannot process None in ndarray 'nanlist': [[[1, 2, 3, np.nan, np.nan, 6], [np.nan, np.nan, 3, 4, np.nan, np.nan]], [[10, 20, 30, 40, np.nan, np.nan], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]]]} buffer_size_opts = [1, 2, 3, 4] # data is divisible by some of these, some not for data_type, data in data_opts.items(): iter_axis_opts = [0, 1, 2] if data_type == 'iterator' or data_type.startswith('sparselist'): iter_axis_opts = [0] # only one dimension for iter_axis in iter_axis_opts: for buffer_size in buffer_size_opts: with self.subTest(data_type=data_type, iter_axis=iter_axis, buffer_size=buffer_size): with warnings.catch_warnings(record=True) as w: dci = DataChunkIterator(data=data, buffer_size=buffer_size, iter_axis=iter_axis) if len(w) <= 1: # init may throw UserWarning for iterating over not-first dim of a list. ignore here pass dset_name = '%s, %d, %d' % (data_type, iter_axis, buffer_size) my_dset = HDF5IO.__chunked_iter_fill__(self.f, dset_name, dci) if data_type == 'iterator': self.assertListEqual(my_dset[:].tolist(), list(data)) elif data_type == 'numpy': self.assertTrue(np.all(my_dset[:] == data)) self.assertTupleEqual(my_dset.shape, data.shape) elif data_type == 'list' or data_type == 'nanlist': data_np = np.array(data) np.testing.assert_array_equal(my_dset[:], data_np) self.assertTupleEqual(my_dset.shape, data_np.shape) elif data_type.startswith('sparselist'): # replace None in original data with default hdf5 fillvalue 0 data_zeros = np.where(np.equal(np.array(data), None), 0, data) np.testing.assert_array_equal(my_dset[:], data_zeros) self.assertTupleEqual(my_dset.shape, data_zeros.shape) ########################################## # write_dataset tests: scalars ########################################## def test_write_dataset_scalar(self): a = 10 self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) dset = self.f['test_dataset'] self.assertTupleEqual(dset.shape, ()) self.assertEqual(dset[()], a) def test_write_dataset_string(self): a = 'test string' self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) dset = self.f['test_dataset'] self.assertTupleEqual(dset.shape, ()) # self.assertEqual(dset[()].decode('utf-8'), a) self.assertEqual(dset[()], a) ########################################## # write_dataset tests: lists ########################################## def test_write_dataset_list(self): a = np.arange(30).reshape(5, 2, 3) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a.tolist(), attributes={})) dset = self.f['test_dataset'] self.assertTrue(np.all(dset[:] == a)) def test_write_dataset_list_compress_gzip(self): a = H5DataIO(np.arange(30).reshape(5, 2, 3), compression='gzip', compression_opts=5, shuffle=True, fletcher32=True) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) dset = self.f['test_dataset'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.compression, 'gzip') self.assertEqual(dset.compression_opts, 5) self.assertEqual(dset.shuffle, True) self.assertEqual(dset.fletcher32, True) @unittest.skipIf("lzf" not in h5py_filters.encode, "LZF compression not supported in this h5py library install") def test_write_dataset_list_compress_lzf(self): warn_msg = ("lzf compression may not be available on all installations of HDF5. Use of gzip is " "recommended to ensure portability of the generated HDF5 files.") with self.assertWarnsWith(UserWarning, warn_msg): a = H5DataIO(np.arange(30).reshape(5, 2, 3), compression='lzf', shuffle=True, fletcher32=True) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) dset = self.f['test_dataset'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.compression, 'lzf') self.assertEqual(dset.shuffle, True) self.assertEqual(dset.fletcher32, True) @unittest.skipIf("szip" not in h5py_filters.encode, "SZIP compression not supported in this h5py library install") def test_write_dataset_list_compress_szip(self): warn_msg = ("szip compression may not be available on all installations of HDF5. Use of gzip is " "recommended to ensure portability of the generated HDF5 files.") with self.assertWarnsWith(UserWarning, warn_msg): a = H5DataIO(np.arange(30).reshape(5, 2, 3), compression='szip', compression_opts=('ec', 16), shuffle=True, fletcher32=True) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) dset = self.f['test_dataset'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.compression, 'szip') self.assertEqual(dset.shuffle, True) self.assertEqual(dset.fletcher32, True) def test_write_dataset_list_compress_available_int_filters(self): a = H5DataIO(np.arange(30).reshape(5, 2, 3), compression=1, shuffle=True, fletcher32=True, allow_plugin_filters=True) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) dset = self.f['test_dataset'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.compression, 'gzip') self.assertEqual(dset.shuffle, True) self.assertEqual(dset.fletcher32, True) def test_write_dataset_list_enable_default_compress(self): a = H5DataIO(np.arange(30).reshape(5, 2, 3), compression=True) self.assertEqual(a.io_settings['compression'], 'gzip') self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) dset = self.f['test_dataset'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.compression, 'gzip') def test_write_dataset_list_disable_default_compress(self): with warnings.catch_warnings(record=True) as w: a = H5DataIO(np.arange(30).reshape(5, 2, 3), compression=False, compression_opts=5) self.assertEqual(len(w), 1) # We expect a warning that compression options are being ignored self.assertFalse('compression_ops' in a.io_settings) self.assertFalse('compression' in a.io_settings) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) dset = self.f['test_dataset'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.compression, None) def test_write_dataset_list_chunked(self): a = H5DataIO(np.arange(30).reshape(5, 2, 3), chunks=(1, 1, 3)) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) dset = self.f['test_dataset'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.chunks, (1, 1, 3)) def test_write_dataset_list_fillvalue(self): a = H5DataIO(np.arange(20).reshape(5, 4), fillvalue=-1) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) dset = self.f['test_dataset'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.fillvalue, -1) ########################################## # write_dataset tests: tables ########################################## def test_write_table(self): cmpd_dt = np.dtype([('a', np.int32), ('b', np.float64)]) data = np.zeros(10, dtype=cmpd_dt) data['a'][1] = 101 data['b'][1] = 0.1 dt = [{'name': 'a', 'dtype': 'int32', 'doc': 'a column'}, {'name': 'b', 'dtype': 'float64', 'doc': 'b column'}] self.io.write_dataset(self.f, DatasetBuilder('test_dataset', data, attributes={}, dtype=dt)) dset = self.f['test_dataset'] self.assertEqual(dset['a'].tolist(), data['a'].tolist()) self.assertEqual(dset['b'].tolist(), data['b'].tolist()) def test_write_table_nested(self): b_cmpd_dt = np.dtype([('c', np.int32), ('d', np.float64)]) cmpd_dt = np.dtype([('a', np.int32), ('b', b_cmpd_dt)]) data = np.zeros(10, dtype=cmpd_dt) data['a'][1] = 101 data['b']['c'] = 202 data['b']['d'] = 10.1 b_dt = [{'name': 'c', 'dtype': 'int32', 'doc': 'c column'}, {'name': 'd', 'dtype': 'float64', 'doc': 'd column'}] dt = [{'name': 'a', 'dtype': 'int32', 'doc': 'a column'}, {'name': 'b', 'dtype': b_dt, 'doc': 'b column'}] self.io.write_dataset(self.f, DatasetBuilder('test_dataset', data, attributes={}, dtype=dt)) dset = self.f['test_dataset'] self.assertEqual(dset['a'].tolist(), data['a'].tolist()) self.assertEqual(dset['b'].tolist(), data['b'].tolist()) ########################################## # write_dataset tests: Iterable ########################################## def test_write_dataset_iterable(self): self.io.write_dataset(self.f, DatasetBuilder('test_dataset', range(10), attributes={})) dset = self.f['test_dataset'] self.assertListEqual(dset[:].tolist(), list(range(10))) def test_write_dataset_iterable_multidimensional_array(self): a = np.arange(30).reshape(5, 2, 3) aiter = iter(a) daiter = DataChunkIterator.from_iterable(aiter, buffer_size=2) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', daiter, attributes={})) dset = self.f['test_dataset'] self.assertListEqual(dset[:].tolist(), a.tolist()) def test_write_multi_dci_oaat(self): """ Test writing multiple DataChunkIterators, one at a time """ a = np.arange(30).reshape(5, 2, 3) b = np.arange(30, 60).reshape(5, 2, 3) aiter = iter(a) biter = iter(b) daiter1 = DataChunkIterator.from_iterable(aiter, buffer_size=2) daiter2 = DataChunkIterator.from_iterable(biter, buffer_size=2) builder = GroupBuilder("root") builder.add_dataset('test_dataset1', daiter1, attributes={}) builder.add_dataset('test_dataset2', daiter2, attributes={}) self.io.write_builder(builder) dset1 = self.f['test_dataset1'] self.assertListEqual(dset1[:].tolist(), a.tolist()) dset2 = self.f['test_dataset2'] self.assertListEqual(dset2[:].tolist(), b.tolist()) def test_write_multi_dci_conc(self): """ Test writing multiple DataChunkIterators, concurrently """ a = np.arange(30).reshape(5, 2, 3) b = np.arange(30, 60).reshape(5, 2, 3) aiter = iter(a) biter = iter(b) daiter1 = DataChunkIterator.from_iterable(aiter, buffer_size=2) daiter2 = DataChunkIterator.from_iterable(biter, buffer_size=2) builder = GroupBuilder("root") builder.add_dataset('test_dataset1', daiter1, attributes={}) builder.add_dataset('test_dataset2', daiter2, attributes={}) self.io.write_builder(builder) dset1 = self.f['test_dataset1'] self.assertListEqual(dset1[:].tolist(), a.tolist()) dset2 = self.f['test_dataset2'] self.assertListEqual(dset2[:].tolist(), b.tolist()) def test_write_dataset_iterable_multidimensional_array_compression(self): a = np.arange(30).reshape(5, 2, 3) aiter = iter(a) daiter = DataChunkIterator.from_iterable(aiter, buffer_size=2) wrapped_daiter = H5DataIO(data=daiter, compression='gzip', compression_opts=5, shuffle=True, fletcher32=True) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', wrapped_daiter, attributes={})) dset = self.f['test_dataset'] self.assertEqual(dset.shape, a.shape) self.assertListEqual(dset[:].tolist(), a.tolist()) self.assertEqual(dset.compression, 'gzip') self.assertEqual(dset.compression_opts, 5) self.assertEqual(dset.shuffle, True) self.assertEqual(dset.fletcher32, True) ############################################# # write_dataset tests: data chunk iterator ############################################# def test_write_dataset_data_chunk_iterator(self): dci = DataChunkIterator(data=np.arange(10), buffer_size=2) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', dci, attributes={}, dtype=dci.dtype)) dset = self.f['test_dataset'] self.assertListEqual(dset[:].tolist(), list(range(10))) self.assertEqual(dset[:].dtype, dci.dtype) def test_write_dataset_data_chunk_iterator_with_compression(self): dci = DataChunkIterator(data=np.arange(10), buffer_size=2) wrapped_dci = H5DataIO(data=dci, compression='gzip', compression_opts=5, shuffle=True, fletcher32=True, chunks=(2,)) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', wrapped_dci, attributes={})) dset = self.f['test_dataset'] self.assertListEqual(dset[:].tolist(), list(range(10))) self.assertEqual(dset.compression, 'gzip') self.assertEqual(dset.compression_opts, 5) self.assertEqual(dset.shuffle, True) self.assertEqual(dset.fletcher32, True) self.assertEqual(dset.chunks, (2,)) def test_pass_through_of_recommended_chunks(self): class DC(DataChunkIterator): def recommended_chunk_shape(self): return (5, 1, 1) dci = DC(data=np.arange(30).reshape(5, 2, 3)) wrapped_dci = H5DataIO(data=dci, compression='gzip', compression_opts=5, shuffle=True, fletcher32=True) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', wrapped_dci, attributes={})) dset = self.f['test_dataset'] self.assertEqual(dset.chunks, (5, 1, 1)) self.assertEqual(dset.compression, 'gzip') self.assertEqual(dset.compression_opts, 5) self.assertEqual(dset.shuffle, True) self.assertEqual(dset.fletcher32, True) def test_dci_h5dataset(self): data = np.arange(30).reshape(5, 2, 3) dci1 = DataChunkIterator(data=data, buffer_size=1, iter_axis=0) HDF5IO.__chunked_iter_fill__(self.f, 'test_dataset', dci1) dset = self.f['test_dataset'] dci2 = DataChunkIterator(data=dset, buffer_size=2, iter_axis=2) chunk = dci2.next() self.assertTupleEqual(chunk.shape, (5, 2, 2)) chunk = dci2.next() self.assertTupleEqual(chunk.shape, (5, 2, 1)) # TODO test chunk data, shape, selection self.assertTupleEqual(dci2.recommended_data_shape(), data.shape) self.assertIsNone(dci2.recommended_chunk_shape()) def test_dci_h5dataset_sparse_matched(self): data = [1, 2, 3, None, None, None, None, 8, 9, 10] dci1 = DataChunkIterator(data=data, buffer_size=3) HDF5IO.__chunked_iter_fill__(self.f, 'test_dataset', dci1) dset = self.f['test_dataset'] dci2 = DataChunkIterator(data=dset, buffer_size=2) # dataset is read such that Nones in original data were not written, but are read as 0s self.assertTupleEqual(dci2.maxshape, (10,)) self.assertEqual(dci2.dtype, np.dtype(int)) count = 0 for chunk in dci2: self.assertEqual(len(chunk.selection), 1) if count == 0: self.assertListEqual(chunk.data.tolist(), [1, 2]) self.assertEqual(chunk.selection[0], slice(0, 2)) elif count == 1: self.assertListEqual(chunk.data.tolist(), [3, 0]) self.assertEqual(chunk.selection[0], slice(2, 4)) elif count == 2: self.assertListEqual(chunk.data.tolist(), [0, 0]) self.assertEqual(chunk.selection[0], slice(4, 6)) elif count == 3: self.assertListEqual(chunk.data.tolist(), [0, 8]) self.assertEqual(chunk.selection[0], slice(6, 8)) elif count == 4: self.assertListEqual(chunk.data.tolist(), [9, 10]) self.assertEqual(chunk.selection[0], slice(8, 10)) count += 1 self.assertEqual(count, 5) self.assertTupleEqual(dci2.recommended_data_shape(), (10,)) self.assertIsNone(dci2.recommended_chunk_shape()) def test_dci_h5dataset_sparse_unmatched(self): data = [1, 2, 3, None, None, None, None, 8, 9, 10] dci1 = DataChunkIterator(data=data, buffer_size=3) HDF5IO.__chunked_iter_fill__(self.f, 'test_dataset', dci1) dset = self.f['test_dataset'] dci2 = DataChunkIterator(data=dset, buffer_size=4) # dataset is read such that Nones in original data were not written, but are read as 0s self.assertTupleEqual(dci2.maxshape, (10,)) self.assertEqual(dci2.dtype, np.dtype(int)) count = 0 for chunk in dci2: self.assertEqual(len(chunk.selection), 1) if count == 0: self.assertListEqual(chunk.data.tolist(), [1, 2, 3, 0]) self.assertEqual(chunk.selection[0], slice(0, 4)) elif count == 1: self.assertListEqual(chunk.data.tolist(), [0, 0, 0, 8]) self.assertEqual(chunk.selection[0], slice(4, 8)) elif count == 2: self.assertListEqual(chunk.data.tolist(), [9, 10]) self.assertEqual(chunk.selection[0], slice(8, 10)) count += 1 self.assertEqual(count, 3) self.assertTupleEqual(dci2.recommended_data_shape(), (10,)) self.assertIsNone(dci2.recommended_chunk_shape()) def test_dci_h5dataset_scalar(self): data = [1] dci1 = DataChunkIterator(data=data, buffer_size=3) HDF5IO.__chunked_iter_fill__(self.f, 'test_dataset', dci1) dset = self.f['test_dataset'] dci2 = DataChunkIterator(data=dset, buffer_size=4) # dataset is read such that Nones in original data were not written, but are read as 0s self.assertTupleEqual(dci2.maxshape, (1,)) self.assertEqual(dci2.dtype, np.dtype(int)) count = 0 for chunk in dci2: self.assertEqual(len(chunk.selection), 1) if count == 0: self.assertListEqual(chunk.data.tolist(), [1]) self.assertEqual(chunk.selection[0], slice(0, 1)) count += 1 self.assertEqual(count, 1) self.assertTupleEqual(dci2.recommended_data_shape(), (1,)) self.assertIsNone(dci2.recommended_chunk_shape()) ############################################# # H5DataIO general ############################################# def test_warning_on_non_gzip_compression(self): # Make sure no warning is issued when using gzip with warnings.catch_warnings(record=True) as w: dset = H5DataIO(np.arange(30), compression='gzip') self.assertEqual(len(w), 0) self.assertEqual(dset.io_settings['compression'], 'gzip') # Make sure a warning is issued when using szip (even if installed) if "szip" in h5py_filters.encode: with warnings.catch_warnings(record=True) as w: dset = H5DataIO(np.arange(30), compression='szip', compression_opts=('ec', 16)) self.assertEqual(len(w), 1) self.assertEqual(dset.io_settings['compression'], 'szip') else: with self.assertRaises(ValueError): H5DataIO(np.arange(30), compression='szip', compression_opts=('ec', 16)) # Make sure a warning is issued when using lzf compression with warnings.catch_warnings(record=True) as w: dset = H5DataIO(np.arange(30), compression='lzf') self.assertEqual(len(w), 1) self.assertEqual(dset.io_settings['compression'], 'lzf') def test_error_on_unsupported_compression_filter(self): # Make sure gzip does not raise an error try: H5DataIO(np.arange(30), compression='gzip', compression_opts=5) except ValueError: self.fail("Using gzip compression raised a ValueError when it should not") # Make sure szip raises an error if not installed (or does not raise an error if installed) warn_msg = ("szip compression may not be available on all installations of HDF5. Use of gzip is " "recommended to ensure portability of the generated HDF5 files.") if "szip" not in h5py_filters.encode: with self.assertRaises(ValueError): H5DataIO(np.arange(30), compression='szip', compression_opts=('ec', 16)) else: try: with self.assertWarnsWith(UserWarning, warn_msg): H5DataIO(np.arange(30), compression='szip', compression_opts=('ec', 16)) except ValueError: self.fail("SZIP is installed but H5DataIO still raises an error") # Test error on illegal (i.e., a made-up compressor) with self.assertRaises(ValueError): warn_msg = ("unknown compression may not be available on all installations of HDF5. Use of gzip is " "recommended to ensure portability of the generated HDF5 files.") with self.assertWarnsWith(UserWarning, warn_msg): H5DataIO(np.arange(30), compression="unknown") # Make sure passing int compression filter raise an error if not installed if not h5py_filters.h5z.filter_avail(h5py_filters.h5z.FILTER_MAX): with self.assertRaises(ValueError): warn_msg = ("%i compression may not be available on all installations of HDF5. Use of gzip is " "recommended to ensure portability of the generated HDF5 files." % h5py_filters.h5z.FILTER_MAX) with self.assertWarnsWith(UserWarning, warn_msg): H5DataIO(np.arange(30), compression=h5py_filters.h5z.FILTER_MAX, allow_plugin_filters=True) # Make sure available int compression filters raise an error without passing allow_plugin_filters=True with self.assertRaises(ValueError): H5DataIO(np.arange(30), compression=h5py_filters.h5z.FILTER_DEFLATE) def test_value_error_on_incompatible_compression_opts(self): # Make sure we warn when gzip with szip compression options is used with self.assertRaises(ValueError): H5DataIO(np.arange(30), compression='gzip', compression_opts=('ec', 16)) # Make sure we warn if gzip with a too high agression is used with self.assertRaises(ValueError): H5DataIO(np.arange(30), compression='gzip', compression_opts=100) # Make sure we warn if lzf with gzip compression option is used with self.assertRaises(ValueError): H5DataIO(np.arange(30), compression='lzf', compression_opts=5) # Make sure we warn if lzf with szip compression option is used with self.assertRaises(ValueError): H5DataIO(np.arange(30), compression='lzf', compression_opts=('ec', 16)) # Make sure we warn if szip with gzip compression option is used with self.assertRaises(ValueError): H5DataIO(np.arange(30), compression='szip', compression_opts=4) # Make sure szip raises a ValueError if bad options are used (odd compression option) with self.assertRaises(ValueError): H5DataIO(np.arange(30), compression='szip', compression_opts=('ec', 3)) # Make sure szip raises a ValueError if bad options are used (bad methos) with self.assertRaises(ValueError): H5DataIO(np.arange(30), compression='szip', compression_opts=('bad_method', 16)) def test_warning_on_linking_of_regular_array(self): with warnings.catch_warnings(record=True) as w: dset = H5DataIO(np.arange(30), link_data=True) self.assertEqual(len(w), 1) self.assertEqual(dset.link_data, False) def test_warning_on_setting_io_options_on_h5dataset_input(self): self.io.write_dataset(self.f, DatasetBuilder('test_dataset', np.arange(10), attributes={})) with warnings.catch_warnings(record=True) as w: H5DataIO(self.f['test_dataset'], compression='gzip', compression_opts=4, fletcher32=True, shuffle=True, maxshape=(10, 20), chunks=(10,), fillvalue=100) self.assertEqual(len(w), 7) def test_h5dataio_array_conversion_numpy(self): # Test that H5DataIO.__array__ is working when wrapping an ndarray test_speed = np.array([10., 20.]) data = H5DataIO((test_speed)) self.assertTrue(np.all(np.isfinite(data))) # Force call of H5DataIO.__array__ def test_h5dataio_array_conversion_list(self): # Test that H5DataIO.__array__ is working when wrapping a python list test_speed = [10., 20.] data = H5DataIO(test_speed) self.assertTrue(np.all(np.isfinite(data))) # Force call of H5DataIO.__array__ def test_h5dataio_array_conversion_datachunkiterator(self): # Test that H5DataIO.__array__ is working when wrapping a python list test_speed = DataChunkIterator(data=[10., 20.]) data = H5DataIO(test_speed) with self.assertRaises(NotImplementedError): np.isfinite(data) # Force call of H5DataIO.__array__ ############################################# # Copy/Link h5py.Dataset object ############################################# def test_link_h5py_dataset_input(self): self.io.write_dataset(self.f, DatasetBuilder('test_dataset', np.arange(10), attributes={})) self.io.write_dataset(self.f, DatasetBuilder('test_softlink', self.f['test_dataset'], attributes={})) self.assertTrue(isinstance(self.f.get('test_softlink', getlink=True), SoftLink)) def test_copy_h5py_dataset_input(self): self.io.write_dataset(self.f, DatasetBuilder('test_dataset', np.arange(10), attributes={})) self.io.write_dataset(self.f, DatasetBuilder('test_copy', self.f['test_dataset'], attributes={}), link_data=False) self.assertTrue(isinstance(self.f.get('test_copy', getlink=True), HardLink)) self.assertListEqual(self.f['test_dataset'][:].tolist(), self.f['test_copy'][:].tolist()) def test_link_h5py_dataset_h5dataio_input(self): self.io.write_dataset(self.f, DatasetBuilder('test_dataset', np.arange(10), attributes={})) self.io.write_dataset(self.f, DatasetBuilder('test_softlink', H5DataIO(data=self.f['test_dataset'], link_data=True), attributes={})) self.assertTrue(isinstance(self.f.get('test_softlink', getlink=True), SoftLink)) def test_copy_h5py_dataset_h5dataio_input(self): self.io.write_dataset(self.f, DatasetBuilder('test_dataset', np.arange(10), attributes={})) self.io.write_dataset(self.f, DatasetBuilder('test_copy', H5DataIO(data=self.f['test_dataset'], link_data=False), # Force dataset copy attributes={}), link_data=True) # Make sure the default behavior is set to link the data self.assertTrue(isinstance(self.f.get('test_copy', getlink=True), HardLink)) self.assertListEqual(self.f['test_dataset'][:].tolist(), self.f['test_copy'][:].tolist()) def test_list_fill_empty(self): dset = self.io.__list_fill__(self.f, 'empty_dataset', [], options={'dtype': int, 'io_settings': {}}) self.assertTupleEqual(dset.shape, (0,)) def test_list_fill_empty_no_dtype(self): with self.assertRaisesRegex(Exception, r"cannot add \S+ to [/\S]+ - could not determine type"): self.io.__list_fill__(self.f, 'empty_dataset', []) def _get_manager(): foo_spec = GroupSpec('A test group specification with a data type', data_type_def='Foo', datasets=[DatasetSpec('an example dataset', 'int', name='my_data', attributes=[AttributeSpec('attr2', 'an example integer attribute', 'int')])], attributes=[AttributeSpec('attr1', 'an example string attribute', 'text'), AttributeSpec('attr3', 'an example float attribute', 'float')]) tmp_spec = GroupSpec('A subgroup for Foos', name='foo_holder', groups=[GroupSpec('the Foos in this bucket', data_type_inc='Foo', quantity=ZERO_OR_MANY)]) bucket_spec = GroupSpec('A test group specification for a data type containing data type', data_type_def='FooBucket', groups=[tmp_spec]) class FooMapper(ObjectMapper): def __init__(self, spec): super().__init__(spec) my_data_spec = spec.get_dataset('my_data') self.map_spec('attr2', my_data_spec.get_attribute('attr2')) class BucketMapper(ObjectMapper): def __init__(self, spec): super().__init__(spec) foo_holder_spec = spec.get_group('foo_holder') self.unmap(foo_holder_spec) foo_spec = foo_holder_spec.get_data_type('Foo') self.map_spec('foos', foo_spec) file_spec = GroupSpec("A file of Foos contained in FooBuckets", data_type_def='FooFile', groups=[GroupSpec('Holds the FooBuckets', name='buckets', groups=[GroupSpec("One or more FooBuckets", data_type_inc='FooBucket', quantity=ONE_OR_MANY)])]) class FileMapper(ObjectMapper): def __init__(self, spec): super().__init__(spec) bucket_spec = spec.get_group('buckets').get_data_type('FooBucket') self.map_spec('buckets', bucket_spec) spec_catalog = SpecCatalog() spec_catalog.register_spec(foo_spec, 'test.yaml') spec_catalog.register_spec(bucket_spec, 'test.yaml') spec_catalog.register_spec(file_spec, 'test.yaml') namespace = SpecNamespace( 'a test namespace', CORE_NAMESPACE, [{'source': 'test.yaml'}], version='0.1.0', catalog=spec_catalog) namespace_catalog = NamespaceCatalog() namespace_catalog.add_namespace(CORE_NAMESPACE, namespace) type_map = TypeMap(namespace_catalog) type_map.register_container_type(CORE_NAMESPACE, 'Foo', Foo) type_map.register_container_type(CORE_NAMESPACE, 'FooBucket', FooBucket) type_map.register_container_type(CORE_NAMESPACE, 'FooFile', FooFile) type_map.register_map(Foo, FooMapper) type_map.register_map(FooBucket, BucketMapper) type_map.register_map(FooFile, FileMapper) manager = BuildManager(type_map) return manager class TestRoundTrip(TestCase): def setUp(self): self.manager = _get_manager() self.path = get_temp_filepath() def tearDown(self): if os.path.exists(self.path): os.remove(self.path) def test_roundtrip_basic(self): # Setup all the data we need foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) foobucket = FooBucket('test_bucket', [foo1]) foofile = FooFile([foobucket]) with HDF5IO(self.path, manager=self.manager, mode='w') as io: io.write(foofile) with HDF5IO(self.path, manager=self.manager, mode='r') as io: read_foofile = io.read() self.assertListEqual(foofile.buckets[0].foos[0].my_data, read_foofile.buckets[0].foos[0].my_data[:].tolist()) def test_roundtrip_empty_dataset(self): foo1 = Foo('foo1', [], "I am foo1", 17, 3.14) foobucket = FooBucket('test_bucket', [foo1]) foofile = FooFile([foobucket]) with HDF5IO(self.path, manager=self.manager, mode='w') as io: io.write(foofile) with HDF5IO(self.path, manager=self.manager, mode='r') as io: read_foofile = io.read() self.assertListEqual([], read_foofile.buckets[0].foos[0].my_data[:].tolist()) def test_roundtrip_empty_group(self): foobucket = FooBucket('test_bucket', []) foofile = FooFile([foobucket]) with HDF5IO(self.path, manager=self.manager, mode='w') as io: io.write(foofile) with HDF5IO(self.path, manager=self.manager, mode='r') as io: read_foofile = io.read() self.assertListEqual([], read_foofile.buckets[0].foos) class TestHDF5IO(TestCase): def setUp(self): self.manager = _get_manager() self.path = get_temp_filepath() foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) foobucket = FooBucket('test_bucket', [foo1]) self.foofile = FooFile([foobucket]) self.file_obj = None def tearDown(self): if os.path.exists(self.path): os.remove(self.path) if self.file_obj is not None: fn = self.file_obj.filename self.file_obj.close() if os.path.exists(fn): os.remove(fn) def test_constructor(self): with HDF5IO(self.path, manager=self.manager, mode='w') as io: self.assertEqual(io.manager, self.manager) self.assertEqual(io.source, self.path) def test_set_file_mismatch(self): self.file_obj = File(get_temp_filepath(), 'w') err_msg = ("You argued %s as this object's path, but supplied a file with filename: %s" % (self.path, self.file_obj.filename)) with self.assertRaisesWith(ValueError, err_msg): HDF5IO(self.path, manager=self.manager, mode='w', file=self.file_obj) class TestCacheSpec(TestCase): def setUp(self): self.manager = _get_manager() self.path = get_temp_filepath() def tearDown(self): if os.path.exists(self.path): os.remove(self.path) def test_cache_spec(self): foo1 = Foo('foo1', [0, 1, 2, 3, 4], "I am foo1", 17, 3.14) foo2 = Foo('foo2', [5, 6, 7, 8, 9], "I am foo2", 34, 6.28) foobucket = FooBucket('test_bucket', [foo1, foo2]) foofile = FooFile([foobucket]) with HDF5IO(self.path, manager=self.manager, mode='w') as io: io.write(foofile) ns_catalog = NamespaceCatalog() HDF5IO.load_namespaces(ns_catalog, self.path) self.assertEqual(ns_catalog.namespaces, (CORE_NAMESPACE,)) source_types = self.__get_types(io.manager.namespace_catalog) read_types = self.__get_types(ns_catalog) self.assertSetEqual(source_types, read_types) def test_double_cache_spec(self): # Setup all the data we need foo1 = Foo('foo1', [0, 1, 2, 3, 4], "I am foo1", 17, 3.14) foo2 = Foo('foo2', [5, 6, 7, 8, 9], "I am foo2", 34, 6.28) foobucket = FooBucket('test_bucket', [foo1, foo2]) foofile = FooFile([foobucket]) with HDF5IO(self.path, manager=self.manager, mode='w') as io: io.write(foofile) with HDF5IO(self.path, manager=self.manager, mode='a') as io: io.write(foofile) def __get_types(self, catalog): types = set() for ns_name in catalog.namespaces: ns = catalog.get_namespace(ns_name) for source in ns['schema']: types.update(catalog.get_types(source['source'])) return types class TestNoCacheSpec(TestCase): def setUp(self): self.manager = _get_manager() self.path = get_temp_filepath() def tearDown(self): if os.path.exists(self.path): os.remove(self.path) def test_no_cache_spec(self): # Setup all the data we need foo1 = Foo('foo1', [0, 1, 2, 3, 4], "I am foo1", 17, 3.14) foo2 = Foo('foo2', [5, 6, 7, 8, 9], "I am foo2", 34, 6.28) foobucket = FooBucket('test_bucket', [foo1, foo2]) foofile = FooFile([foobucket]) with HDF5IO(self.path, manager=self.manager, mode='w') as io: io.write(foofile, cache_spec=False) with File(self.path, 'r') as f: self.assertNotIn('specifications', f) class HDF5IOMultiFileTest(TestCase): """Tests for h5tools IO tools""" def setUp(self): numfiles = 3 base_name = "test_multifile_hdf5_%d.h5" self.test_temp_files = [base_name % i for i in range(numfiles)] # On Windows h5py cannot truncate an open file in write mode. # The temp file will be closed before h5py truncates it # and will be removed during the tearDown step. self.io = [HDF5IO(i, mode='a', manager=_get_manager()) for i in self.test_temp_files] self.f = [i._file for i in self.io] def tearDown(self): # Close all the files for i in self.io: i.close() del(i) self.io = None self.f = None # Make sure the files have been deleted for tf in self.test_temp_files: try: os.remove(tf) except OSError: pass self.test_temp_files = None def test_copy_file_with_external_links(self): # Create the first file foo1 = Foo('foo1', [0, 1, 2, 3, 4], "I am foo1", 17, 3.14) bucket1 = FooBucket('test_bucket1', [foo1]) foofile1 = FooFile(buckets=[bucket1]) # Write the first file self.io[0].write(foofile1) # Create the second file bucket1_read = self.io[0].read() foo2 = Foo('foo2', bucket1_read.buckets[0].foos[0].my_data, "I am foo2", 34, 6.28) bucket2 = FooBucket('test_bucket2', [foo2]) foofile2 = FooFile(buckets=[bucket2]) # Write the second file self.io[1].write(foofile2) self.io[1].close() self.io[0].close() # Don't forget to close the first file too # Copy the file self.io[2].close() HDF5IO.copy_file(source_filename=self.test_temp_files[1], dest_filename=self.test_temp_files[2], expand_external=True, expand_soft=False, expand_refs=False) # Test that everything is working as expected # Confirm that our original data file is correct f1 = File(self.test_temp_files[0], 'r') self.assertIsInstance(f1.get('/buckets/test_bucket1/foo_holder/foo1/my_data', getlink=True), HardLink) # Confirm that we successfully created and External Link in our second file f2 = File(self.test_temp_files[1], 'r') self.assertIsInstance(f2.get('/buckets/test_bucket2/foo_holder/foo2/my_data', getlink=True), ExternalLink) # Confirm that we successfully resolved the External Link when we copied our second file f3 = File(self.test_temp_files[2], 'r') self.assertIsInstance(f3.get('/buckets/test_bucket2/foo_holder/foo2/my_data', getlink=True), HardLink) class HDF5IOInitNoFileTest(TestCase): """ Test if file does not exist, init with mode (r, r+) throws error, all others succeed """ def test_init_no_file_r(self): self.path = "test_init_nofile_r.h5" with self.assertRaisesWith(UnsupportedOperation, "Unable to open file %s in 'r' mode. File does not exist." % self.path): HDF5IO(self.path, mode='r') def test_init_no_file_rplus(self): self.path = "test_init_nofile_rplus.h5" with self.assertRaisesWith(UnsupportedOperation, "Unable to open file %s in 'r+' mode. File does not exist." % self.path): HDF5IO(self.path, mode='r+') def test_init_no_file_ok(self): # test that no errors are thrown modes = ('w', 'w-', 'x', 'a') for m in modes: self.path = "test_init_nofile.h5" with HDF5IO(self.path, mode=m): pass if os.path.exists(self.path): os.remove(self.path) class HDF5IOInitFileExistsTest(TestCase): """ Test if file exists, init with mode w-/x throws error, all others succeed """ def setUp(self): self.path = get_temp_filepath() temp_io = HDF5IO(self.path, mode='w') temp_io.close() self.io = None def tearDown(self): if self.io is not None: self.io.close() del(self.io) if os.path.exists(self.path): os.remove(self.path) def test_init_wminus_file_exists(self): with self.assertRaisesWith(UnsupportedOperation, "Unable to open file %s in 'w-' mode. File already exists." % self.path): self.io = HDF5IO(self.path, mode='w-') def test_init_x_file_exists(self): with self.assertRaisesWith(UnsupportedOperation, "Unable to open file %s in 'x' mode. File already exists." % self.path): self.io = HDF5IO(self.path, mode='x') def test_init_file_exists_ok(self): # test that no errors are thrown modes = ('r', 'r+', 'w', 'a') for m in modes: with HDF5IO(self.path, mode=m): pass class HDF5IOReadNoDataTest(TestCase): """ Test if file exists and there is no data, read with mode (r, r+, a) throws error """ def setUp(self): self.path = get_temp_filepath() temp_io = HDF5IO(self.path, mode='w') temp_io.close() self.io = None def tearDown(self): if self.io is not None: self.io.close() del(self.io) if os.path.exists(self.path): os.remove(self.path) def test_read_no_data_r(self): self.io = HDF5IO(self.path, mode='r') with self.assertRaisesWith(UnsupportedOperation, "Cannot read data from file %s in mode 'r'. There are no values." % self.path): self.io.read() def test_read_no_data_rplus(self): self.io = HDF5IO(self.path, mode='r+') with self.assertRaisesWith(UnsupportedOperation, "Cannot read data from file %s in mode 'r+'. There are no values." % self.path): self.io.read() def test_read_no_data_a(self): self.io = HDF5IO(self.path, mode='a') with self.assertRaisesWith(UnsupportedOperation, "Cannot read data from file %s in mode 'a'. There are no values." % self.path): self.io.read() class HDF5IOReadData(TestCase): """ Test if file exists and there is no data, read in mode (r, r+, a) is ok and read in mode w throws error """ def setUp(self): self.path = get_temp_filepath() foo1 = Foo('foo1', [0, 1, 2, 3, 4], "I am foo1", 17, 3.14) bucket1 = FooBucket('test_bucket1', [foo1]) self.foofile1 = FooFile(buckets=[bucket1]) with HDF5IO(self.path, manager=_get_manager(), mode='w') as temp_io: temp_io.write(self.foofile1) self.io = None def tearDown(self): if self.io is not None: self.io.close() del(self.io) if os.path.exists(self.path): os.remove(self.path) def test_read_file_ok(self): modes = ('r', 'r+', 'a') for m in modes: with HDF5IO(self.path, manager=_get_manager(), mode=m) as io: io.read() def test_read_file_w(self): with HDF5IO(self.path, manager=_get_manager(), mode='w') as io: with self.assertRaisesWith(UnsupportedOperation, "Cannot read from file %s in mode 'w'. Please use mode 'r', 'r+', or 'a'." % self.path): read_foofile1 = io.read() self.assertListEqual(self.foofile1.buckets[0].foos[0].my_data, read_foofile1.buckets[0].foos[0].my_data[:].tolist()) class HDF5IOWriteNoFile(TestCase): """ Test if file does not exist, write in mode (w, w-, x, a) is ok """ def setUp(self): foo1 = Foo('foo1', [0, 1, 2, 3, 4], "I am foo1", 17, 3.14) bucket1 = FooBucket('test_bucket1', [foo1]) self.foofile1 = FooFile(buckets=[bucket1]) self.path = 'test_write_nofile.h5' def tearDown(self): if os.path.exists(self.path): os.remove(self.path) def test_write_no_file_w_ok(self): self.__write_file('w') def test_write_no_file_wminus_ok(self): self.__write_file('w-') def test_write_no_file_x_ok(self): self.__write_file('x') def test_write_no_file_a_ok(self): self.__write_file('a') def __write_file(self, mode): with HDF5IO(self.path, manager=_get_manager(), mode=mode) as io: io.write(self.foofile1) with HDF5IO(self.path, manager=_get_manager(), mode='r') as io: read_foofile = io.read() self.assertListEqual(self.foofile1.buckets[0].foos[0].my_data, read_foofile.buckets[0].foos[0].my_data[:].tolist()) class HDF5IOWriteFileExists(TestCase): """ Test if file exists, write in mode (r+, w, a) is ok and write in mode r throws error """ def setUp(self): self.path = get_temp_filepath() foo1 = Foo('foo1', [0, 1, 2, 3, 4], "I am foo1", 17, 3.14) bucket1 = FooBucket('test_bucket1', [foo1]) self.foofile1 = FooFile(buckets=[bucket1]) foo2 = Foo('foo2', [0, 1, 2, 3, 4], "I am foo2", 17, 3.14) bucket2 = FooBucket('test_bucket2', [foo2]) self.foofile2 = FooFile(buckets=[bucket2]) with HDF5IO(self.path, manager=_get_manager(), mode='w') as io: io.write(self.foofile1) self.io = None def tearDown(self): if self.io is not None: self.io.close() del(self.io) if os.path.exists(self.path): os.remove(self.path) def test_write_rplus(self): with HDF5IO(self.path, manager=_get_manager(), mode='r+') as io: # even though foofile1 and foofile2 have different names, writing a # root object into a file that already has a root object, in r+ mode # should throw an error with self.assertRaisesWith(ValueError, "Unable to create group (name already exists)"): io.write(self.foofile2) def test_write_a(self): with HDF5IO(self.path, manager=_get_manager(), mode='a') as io: # even though foofile1 and foofile2 have different names, writing a # root object into a file that already has a root object, in r+ mode # should throw an error with self.assertRaisesWith(ValueError, "Unable to create group (name already exists)"): io.write(self.foofile2) def test_write_w(self): # mode 'w' should overwrite contents of file with HDF5IO(self.path, manager=_get_manager(), mode='w') as io: io.write(self.foofile2) with HDF5IO(self.path, manager=_get_manager(), mode='r') as io: read_foofile = io.read() self.assertListEqual(self.foofile2.buckets[0].foos[0].my_data, read_foofile.buckets[0].foos[0].my_data[:].tolist()) def test_write_r(self): with HDF5IO(self.path, manager=_get_manager(), mode='r') as io: with self.assertRaisesWith(UnsupportedOperation, ("Cannot write to file %s in mode 'r'. " "Please use mode 'r+', 'w', 'w-', 'x', or 'a'") % self.path): io.write(self.foofile2) class H5DataIOValid(TestCase): def setUp(self): self.paths = [get_temp_filepath(), ] self.foo1 = Foo('foo1', H5DataIO([1, 2, 3, 4, 5]), "I am foo1", 17, 3.14) bucket1 = FooBucket('test_bucket1', [self.foo1]) foofile1 = FooFile(buckets=[bucket1]) with HDF5IO(self.paths[0], manager=_get_manager(), mode='w') as io: io.write(foofile1) def tearDown(self): for path in self.paths: if os.path.exists(path): os.remove(path) def test_valid(self): self.assertTrue(self.foo1.my_data.valid) def test_read_valid(self): """Test that h5py.H5Dataset.id.valid works as expected""" with HDF5IO(self.paths[0], manager=_get_manager(), mode='r') as io: read_foofile1 = io.read() self.assertTrue(read_foofile1.buckets[0].foos[0].my_data.id.valid) self.assertFalse(read_foofile1.buckets[0].foos[0].my_data.id.valid) def test_link(self): """Test that wrapping of linked data within H5DataIO """ with HDF5IO(self.paths[0], manager=_get_manager(), mode='r') as io: read_foofile1 = io.read() self.foo2 = Foo('foo2', H5DataIO(data=read_foofile1.buckets[0].foos[0].my_data), "I am foo2", 17, 3.14) bucket2 = FooBucket('test_bucket2', [self.foo2]) foofile2 = FooFile(buckets=[bucket2]) self.paths.append(get_temp_filepath()) with HDF5IO(self.paths[1], manager=_get_manager(), mode='w') as io: io.write(foofile2) self.assertTrue(self.foo2.my_data.valid) # test valid self.assertEqual(len(self.foo2.my_data), 5) # test len self.assertEqual(self.foo2.my_data.shape, (5,)) # test getattr with shape self.assertTrue(np.array_equal(np.array(self.foo2.my_data), [1, 2, 3, 4, 5])) # test array conversion # test loop through iterable match = [1, 2, 3, 4, 5] for (i, j) in zip(self.foo2.my_data, match): self.assertEqual(i, j) # test iterator my_iter = iter(self.foo2.my_data) self.assertEqual(next(my_iter), 1) # foo2.my_data dataset is now closed self.assertFalse(self.foo2.my_data.valid) with self.assertRaisesWith(InvalidDataIOError, "Cannot get length of data. Data is not valid."): len(self.foo2.my_data) with self.assertRaisesWith(InvalidDataIOError, "Cannot get attribute 'shape' of data. Data is not valid."): self.foo2.my_data.shape with self.assertRaisesWith(InvalidDataIOError, "Cannot convert data to array. Data is not valid."): np.array(self.foo2.my_data) with self.assertRaisesWith(InvalidDataIOError, "Cannot iterate on data. Data is not valid."): for i in self.foo2.my_data: pass with self.assertRaisesWith(InvalidDataIOError, "Cannot iterate on data. Data is not valid."): iter(self.foo2.my_data) # re-open the file with the data linking to other file (still closed) with HDF5IO(self.paths[1], manager=_get_manager(), mode='r') as io: read_foofile2 = io.read() read_foo2 = read_foofile2.buckets[0].foos[0] # note that read_foo2 dataset does not have an attribute 'valid' self.assertEqual(len(read_foo2.my_data), 5) # test len self.assertEqual(read_foo2.my_data.shape, (5,)) # test getattr with shape self.assertTrue(np.array_equal(np.array(read_foo2.my_data), [1, 2, 3, 4, 5])) # test array conversion # test loop through iterable match = [1, 2, 3, 4, 5] for (i, j) in zip(read_foo2.my_data, match): self.assertEqual(i, j) # test iterator my_iter = iter(read_foo2.my_data) self.assertEqual(next(my_iter), 1) class TestReadLink(TestCase): def setUp(self): self.target_path = get_temp_filepath() self.link_path = get_temp_filepath() self.root1 = GroupBuilder(name='root') self.subgroup = self.root1.add_group('test_group') self.dataset = self.subgroup.add_dataset('test_dataset', data=[1, 2, 3, 4]) self.root2 = GroupBuilder(name='root') self.group_link = self.root2.add_link(self.subgroup, 'link_to_test_group') self.dataset_link = self.root2.add_link(self.dataset, 'link_to_test_dataset') with HDF5IO(self.target_path, manager=_get_manager(), mode='w') as io: io.write_builder(self.root1) self.root1.source = self.target_path with HDF5IO(self.link_path, manager=_get_manager(), mode='w') as io: io.write_builder(self.root2) self.root2.source = self.link_path def test_set_link_loc(self): """ Test that Builder location is set when it is read as a link """ read_io = HDF5IO(self.link_path, manager=_get_manager(), mode='r') bldr = read_io.read_builder() self.assertEqual(bldr['link_to_test_group'].builder.location, '/') self.assertEqual(bldr['link_to_test_dataset'].builder.location, '/test_group') read_io.close() def test_link_to_link(self): """ Test that link to link gets written and read properly """ link_to_link_path = get_temp_filepath() read_io1 = HDF5IO(self.link_path, manager=_get_manager(), mode='r') bldr1 = read_io1.read_builder() root3 = GroupBuilder(name='root') root3.add_link(bldr1['link_to_test_group'].builder, 'link_to_link') with HDF5IO(link_to_link_path, manager=_get_manager(), mode='w') as io: io.write_builder(root3) read_io1.close() read_io2 = HDF5IO(link_to_link_path, manager=_get_manager(), mode='r') bldr2 = read_io2.read_builder() self.assertEqual(bldr2['link_to_link'].builder.source, self.target_path) read_io2.close() ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/test_query.py0000644000655200065520000001121000000000000020720 0ustar00circlecicircleci00000000000000import os from h5py import File import numpy as np from abc import ABCMeta, abstractmethod from hdmf.query import HDMFDataset, Query from hdmf.array import SortedArray, LinSpace from hdmf.testing import TestCase class AbstractQueryMixin(metaclass=ABCMeta): @abstractmethod def getDataset(self): raise NotImplementedError('Cannot run test unless getDataset is implemented') def setUp(self): self.dset = self.getDataset() self.wrapper = HDMFDataset(self.dset) def test_get_dataset(self): array = self.wrapper.dataset self.assertIsInstance(array, SortedArray) def test___gt__(self): ''' Test wrapper greater than magic method ''' q = self.wrapper > 5 self.assertIsInstance(q, Query) result = q.evaluate() expected = [False, False, False, False, False, False, True, True, True, True] expected = slice(6, 10) self.assertEqual(result, expected) def test___ge__(self): ''' Test wrapper greater than or equal magic method ''' q = self.wrapper >= 5 self.assertIsInstance(q, Query) result = q.evaluate() expected = [False, False, False, False, False, True, True, True, True, True] expected = slice(5, 10) self.assertEqual(result, expected) def test___lt__(self): ''' Test wrapper less than magic method ''' q = self.wrapper < 5 self.assertIsInstance(q, Query) result = q.evaluate() expected = [True, True, True, True, True, False, False, False, False, False] expected = slice(0, 5) self.assertEqual(result, expected) def test___le__(self): ''' Test wrapper less than or equal magic method ''' q = self.wrapper <= 5 self.assertIsInstance(q, Query) result = q.evaluate() expected = [True, True, True, True, True, True, False, False, False, False] expected = slice(0, 6) self.assertEqual(result, expected) def test___eq__(self): ''' Test wrapper equals magic method ''' q = self.wrapper == 5 self.assertIsInstance(q, Query) result = q.evaluate() expected = [False, False, False, False, False, True, False, False, False, False] expected = 5 self.assertTrue(np.array_equal(result, expected)) def test___ne__(self): ''' Test wrapper not equal magic method ''' q = self.wrapper != 5 self.assertIsInstance(q, Query) result = q.evaluate() expected = [True, True, True, True, True, False, True, True, True, True] expected = [slice(0, 5), slice(6, 10)] self.assertTrue(np.array_equal(result, expected)) def test___getitem__(self): ''' Test wrapper getitem using slice ''' result = self.wrapper[0:5] expected = [0, 1, 2, 3, 4] self.assertTrue(np.array_equal(result, expected)) def test___getitem__query(self): ''' Test wrapper getitem using query ''' q = self.wrapper < 5 result = self.wrapper[q] expected = [0, 1, 2, 3, 4] self.assertTrue(np.array_equal(result, expected)) class SortedQueryTest(AbstractQueryMixin, TestCase): path = 'SortedQueryTest.h5' def getDataset(self): self.f = File(self.path, 'w') self.input = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] self.d = self.f.create_dataset('dset', data=self.input) return SortedArray(self.d) def tearDown(self): self.f.close() if os.path.exists(self.path): os.remove(self.path) class LinspaceQueryTest(AbstractQueryMixin, TestCase): path = 'LinspaceQueryTest.h5' def getDataset(self): return LinSpace(0, 10, 1) class CompoundQueryTest(TestCase): def getM(self): return SortedArray(np.arange(10, 20, 1)) def getN(self): return SortedArray(np.arange(10.0, 20.0, 0.5)) def setUp(self): self.m = HDMFDataset(self.getM()) self.n = HDMFDataset(self.getN()) # TODO: test not completed # def test_map(self): # q = self.m == (12, 16) # IN operation # q.evaluate() # [2,3,4,5] # q.evaluate(False) # RangeResult(2,6) # r = self.m[q] # noqa: F841 # r = self.m[q.evaluate()] # noqa: F841 # r = self.m[q.evaluate(False)] # noqa: F841 def tearDown(self): pass ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/utils.py0000644000655200065520000000423100000000000017661 0ustar00circlecicircleci00000000000000from hdmf.utils import docval, getargs from hdmf.container import Container CORE_NAMESPACE = 'test_core' class Foo(Container): @docval({'name': 'name', 'type': str, 'doc': 'the name of this Foo'}, {'name': 'my_data', 'type': ('array_data', 'data'), 'doc': 'some data'}, {'name': 'attr1', 'type': str, 'doc': 'an attribute'}, {'name': 'attr2', 'type': int, 'doc': 'another attribute'}, {'name': 'attr3', 'type': float, 'doc': 'a third attribute', 'default': 3.14}) def __init__(self, **kwargs): name, my_data, attr1, attr2, attr3 = getargs('name', 'my_data', 'attr1', 'attr2', 'attr3', kwargs) super().__init__(name=name) self.__data = my_data self.__attr1 = attr1 self.__attr2 = attr2 self.__attr3 = attr3 def __eq__(self, other): attrs = ('name', 'my_data', 'attr1', 'attr2', 'attr3') return all(getattr(self, a) == getattr(other, a) for a in attrs) def __str__(self): attrs = ('name', 'my_data', 'attr1', 'attr2', 'attr3') return '<' + ','.join('%s=%s' % (a, getattr(self, a)) for a in attrs) + '>' @property def my_data(self): return self.__data @property def attr1(self): return self.__attr1 @property def attr2(self): return self.__attr2 @property def attr3(self): return self.__attr3 def __hash__(self): return hash(self.name) class FooBucket(Container): @docval({'name': 'name', 'type': str, 'doc': 'the name of this bucket'}, {'name': 'foos', 'type': list, 'doc': 'the Foo objects in this bucket', 'default': list()}) def __init__(self, **kwargs): name, foos = getargs('name', 'foos', kwargs) super().__init__(name=name) self.__foos = foos for f in self.__foos: f.parent = self def __eq__(self, other): return self.name == other.name and set(self.foos) == set(other.foos) def __str__(self): foo_str = "[" + ",".join(str(f) for f in self.foos) + "]" return 'name=%s, foos=%s' % (self.name, foo_str) @property def foos(self): return self.__foos ././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1579654747.9241881 hdmf-1.5.4/tests/unit/utils_test/0000755000655200065520000000000000000000000020346 5ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/utils_test/__init__.py0000644000655200065520000000000000000000000022445 0ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/utils_test/test_core_DataChunk.py0000644000655200065520000000220200000000000024625 0ustar00circlecicircleci00000000000000import numpy as np from copy import copy, deepcopy from hdmf.data_utils import DataChunk from hdmf.testing import TestCase class DataChunkTests(TestCase): def setUp(self): pass def tearDown(self): pass def test_datachunk_copy(self): obj = DataChunk(data=np.arange(3), selection=np.s_[0:3]) obj_copy = copy(obj) self.assertNotEqual(id(obj), id(obj_copy)) self.assertEqual(id(obj.data), id(obj_copy.data)) self.assertEqual(id(obj.selection), id(obj_copy.selection)) def test_datachunk_deepcopy(self): obj = DataChunk(data=np.arange(3), selection=np.s_[0:3]) obj_copy = deepcopy(obj) self.assertNotEqual(id(obj), id(obj_copy)) self.assertNotEqual(id(obj.data), id(obj_copy.data)) self.assertNotEqual(id(obj.selection), id(obj_copy.selection)) def test_datachunk_astype(self): obj = DataChunk(data=np.arange(3), selection=np.s_[0:3]) newtype = np.dtype('int16') obj_astype = obj.astype(newtype) self.assertNotEqual(id(obj), id(obj_astype)) self.assertEqual(obj_astype.dtype, np.dtype(newtype)) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/utils_test/test_core_DataChunkIterator.py0000644000655200065520000004476100000000000026357 0ustar00circlecicircleci00000000000000import numpy as np from hdmf.data_utils import DataChunkIterator, DataChunk from hdmf.testing import TestCase class DataChunkIteratorTests(TestCase): def setUp(self): pass def tearDown(self): pass def test_none_iter(self): """Test that DataChunkIterator __init__ sets defaults correctly and all chunks and recommended shapes are None. """ dci = DataChunkIterator(dtype=np.dtype('int')) self.assertIsNone(dci.maxshape) self.assertEqual(dci.dtype, np.dtype('int')) self.assertEqual(dci.buffer_size, 1) self.assertEqual(dci.iter_axis, 0) count = 0 for chunk in dci: pass self.assertEqual(count, 0) self.assertIsNone(dci.recommended_data_shape()) self.assertIsNone(dci.recommended_chunk_shape()) def test_list_none(self): """Test that DataChunkIterator has no dtype or chunks when given a list of None. """ a = [None, None, None] with self.assertRaisesWith(Exception, 'Data type could not be determined. Please specify dtype in ' 'DataChunkIterator init.'): DataChunkIterator(a) def test_list_none_dtype(self): """Test that DataChunkIterator has the passed-in dtype and no chunks when given a list of None. """ a = [None, None, None] dci = DataChunkIterator(a, dtype=np.dtype('int')) self.assertTupleEqual(dci.maxshape, (3,)) self.assertEqual(dci.dtype, np.dtype('int')) count = 0 for chunk in dci: pass self.assertEqual(count, 0) self.assertTupleEqual(dci.recommended_data_shape(), (3,)) self.assertIsNone(dci.recommended_chunk_shape()) def test_numpy_iter_unbuffered_first_axis(self): """Test DataChunkIterator with numpy data, no buffering, and iterating on the first dimension. """ a = np.arange(30).reshape(5, 2, 3) dci = DataChunkIterator(data=a, buffer_size=1) count = 0 for chunk in dci: self.assertTupleEqual(chunk.shape, (1, 2, 3)) count += 1 self.assertEqual(count, 5) self.assertTupleEqual(dci.recommended_data_shape(), a.shape) self.assertIsNone(dci.recommended_chunk_shape()) def test_numpy_iter_unbuffered_middle_axis(self): """Test DataChunkIterator with numpy data, no buffering, and iterating on a middle dimension. """ a = np.arange(30).reshape(5, 2, 3) dci = DataChunkIterator(data=a, buffer_size=1, iter_axis=1) count = 0 for chunk in dci: self.assertTupleEqual(chunk.shape, (5, 1, 3)) count += 1 self.assertEqual(count, 2) self.assertTupleEqual(dci.recommended_data_shape(), a.shape) self.assertIsNone(dci.recommended_chunk_shape()) def test_numpy_iter_unbuffered_last_axis(self): """Test DataChunkIterator with numpy data, no buffering, and iterating on the last dimension. """ a = np.arange(30).reshape(5, 2, 3) dci = DataChunkIterator(data=a, buffer_size=1, iter_axis=2) count = 0 for chunk in dci: self.assertTupleEqual(chunk.shape, (5, 2, 1)) count += 1 self.assertEqual(count, 3) self.assertTupleEqual(dci.recommended_data_shape(), a.shape) self.assertIsNone(dci.recommended_chunk_shape()) def test_numpy_iter_buffered_first_axis(self): """Test DataChunkIterator with numpy data, buffering, and iterating on the first dimension. """ a = np.arange(30).reshape(5, 2, 3) dci = DataChunkIterator(data=a, buffer_size=2) count = 0 for chunk in dci: if count < 2: self.assertTupleEqual(chunk.shape, (2, 2, 3)) else: self.assertTupleEqual(chunk.shape, (1, 2, 3)) count += 1 self.assertEqual(count, 3) self.assertTupleEqual(dci.recommended_data_shape(), a.shape) self.assertIsNone(dci.recommended_chunk_shape()) def test_numpy_iter_buffered_middle_axis(self): """Test DataChunkIterator with numpy data, buffering, and iterating on a middle dimension. """ a = np.arange(45).reshape(5, 3, 3) dci = DataChunkIterator(data=a, buffer_size=2, iter_axis=1) count = 0 for chunk in dci: if count < 1: self.assertTupleEqual(chunk.shape, (5, 2, 3)) else: self.assertTupleEqual(chunk.shape, (5, 1, 3)) count += 1 self.assertEqual(count, 2) self.assertTupleEqual(dci.recommended_data_shape(), a.shape) self.assertIsNone(dci.recommended_chunk_shape()) def test_numpy_iter_buffered_last_axis(self): """Test DataChunkIterator with numpy data, buffering, and iterating on the last dimension. """ a = np.arange(30).reshape(5, 2, 3) dci = DataChunkIterator(data=a, buffer_size=2, iter_axis=2) count = 0 for chunk in dci: if count < 1: self.assertTupleEqual(chunk.shape, (5, 2, 2)) else: self.assertTupleEqual(chunk.shape, (5, 2, 1)) count += 1 self.assertEqual(count, 2) self.assertTupleEqual(dci.recommended_data_shape(), a.shape) self.assertIsNone(dci.recommended_chunk_shape()) def test_numpy_iter_unmatched_buffer_size(self): a = np.arange(10) dci = DataChunkIterator(data=a, buffer_size=3) self.assertTupleEqual(dci.maxshape, a.shape) self.assertEqual(dci.dtype, a.dtype) count = 0 for chunk in dci: if count < 3: self.assertTupleEqual(chunk.data.shape, (3,)) else: self.assertTupleEqual(chunk.data.shape, (1,)) count += 1 self.assertEqual(count, 4) self.assertTupleEqual(dci.recommended_data_shape(), a.shape) self.assertIsNone(dci.recommended_chunk_shape()) def test_standard_iterator_unbuffered(self): dci = DataChunkIterator(data=range(10), buffer_size=1) self.assertEqual(dci.dtype, np.dtype(int)) self.assertTupleEqual(dci.maxshape, (10,)) self.assertTupleEqual(dci.recommended_data_shape(), (10,)) # Test before and after iteration count = 0 for chunk in dci: self.assertTupleEqual(chunk.data.shape, (1,)) count += 1 self.assertEqual(count, 10) self.assertTupleEqual(dci.recommended_data_shape(), (10,)) # Test before and after iteration self.assertIsNone(dci.recommended_chunk_shape()) def test_standard_iterator_unmatched_buffersized(self): dci = DataChunkIterator(data=range(10), buffer_size=3) self.assertEqual(dci.dtype, np.dtype(int)) self.assertTupleEqual(dci.maxshape, (10,)) self.assertIsNone(dci.recommended_chunk_shape()) self.assertTupleEqual(dci.recommended_data_shape(), (10,)) # Test before and after iteration count = 0 for chunk in dci: if count < 3: self.assertTupleEqual(chunk.data.shape, (3,)) else: self.assertTupleEqual(chunk.data.shape, (1,)) count += 1 self.assertEqual(count, 4) self.assertTupleEqual(dci.recommended_data_shape(), (10,)) # Test before and after iteration def test_multidimensional_list_first_axis(self): """Test DataChunkIterator with multidimensional list data, no buffering, and iterating on the first dimension. """ a = np.arange(30).reshape(5, 2, 3).tolist() dci = DataChunkIterator(a) self.assertTupleEqual(dci.maxshape, (5, 2, 3)) self.assertEqual(dci.dtype, np.dtype(int)) count = 0 for chunk in dci: self.assertTupleEqual(chunk.data.shape, (1, 2, 3)) count += 1 self.assertEqual(count, 5) self.assertTupleEqual(dci.recommended_data_shape(), (5, 2, 3)) self.assertIsNone(dci.recommended_chunk_shape()) def test_multidimensional_list_middle_axis(self): """Test DataChunkIterator with multidimensional list data, no buffering, and iterating on a middle dimension. """ a = np.arange(30).reshape(5, 2, 3).tolist() warn_msg = ('Iterating over an axis other than the first dimension of list or tuple data ' 'involves converting the data object to a numpy ndarray, which may incur a computational ' 'cost.') with self.assertWarnsWith(UserWarning, warn_msg): dci = DataChunkIterator(a, iter_axis=1) self.assertTupleEqual(dci.maxshape, (5, 2, 3)) self.assertEqual(dci.dtype, np.dtype(int)) count = 0 for chunk in dci: self.assertTupleEqual(chunk.data.shape, (5, 1, 3)) count += 1 self.assertEqual(count, 2) self.assertTupleEqual(dci.recommended_data_shape(), (5, 2, 3)) self.assertIsNone(dci.recommended_chunk_shape()) def test_multidimensional_list_last_axis(self): """Test DataChunkIterator with multidimensional list data, no buffering, and iterating on the last dimension. """ a = np.arange(30).reshape(5, 2, 3).tolist() warn_msg = ('Iterating over an axis other than the first dimension of list or tuple data ' 'involves converting the data object to a numpy ndarray, which may incur a computational ' 'cost.') with self.assertWarnsWith(UserWarning, warn_msg): dci = DataChunkIterator(a, iter_axis=2) self.assertTupleEqual(dci.maxshape, (5, 2, 3)) self.assertEqual(dci.dtype, np.dtype(int)) count = 0 for chunk in dci: self.assertTupleEqual(chunk.data.shape, (5, 2, 1)) count += 1 self.assertEqual(count, 3) self.assertTupleEqual(dci.recommended_data_shape(), (5, 2, 3)) self.assertIsNone(dci.recommended_chunk_shape()) def test_maxshape(self): a = np.arange(30).reshape(5, 2, 3) aiter = iter(a) daiter = DataChunkIterator.from_iterable(aiter, buffer_size=2) self.assertEqual(daiter.maxshape, (None, 2, 3)) def test_dtype(self): a = np.arange(30, dtype='int32').reshape(5, 2, 3) aiter = iter(a) daiter = DataChunkIterator.from_iterable(aiter, buffer_size=2) self.assertEqual(daiter.dtype, a.dtype) def test_sparse_data_buffer_aligned(self): a = [1, 2, 3, 4, None, None, 7, 8, None, None] dci = DataChunkIterator(a, buffer_size=2) self.assertTupleEqual(dci.maxshape, (10,)) self.assertEqual(dci.dtype, np.dtype(int)) count = 0 for chunk in dci: self.assertTupleEqual(chunk.data.shape, (2,)) self.assertEqual(len(chunk.selection), 1) self.assertEqual(chunk.selection[0], slice(chunk.data[0] - 1, chunk.data[1])) count += 1 self.assertEqual(count, 3) self.assertTupleEqual(dci.recommended_data_shape(), (10,)) self.assertIsNone(dci.recommended_chunk_shape()) def test_sparse_data_buffer_notaligned(self): a = [1, 2, 3, None, None, None, None, 8, 9, 10] dci = DataChunkIterator(a, buffer_size=2) self.assertTupleEqual(dci.maxshape, (10,)) self.assertEqual(dci.dtype, np.dtype(int)) count = 0 for chunk in dci: self.assertEqual(len(chunk.selection), 1) if count == 0: # [1, 2] self.assertListEqual(chunk.data.tolist(), [1, 2]) self.assertEqual(chunk.selection[0], slice(chunk.data[0] - 1, chunk.data[1])) elif count == 1: # [3, None] self.assertListEqual(chunk.data.tolist(), [3, ]) self.assertEqual(chunk.selection[0], slice(chunk.data[0] - 1, chunk.data[0])) elif count == 2: # [8, 9] self.assertListEqual(chunk.data.tolist(), [8, 9]) self.assertEqual(chunk.selection[0], slice(chunk.data[0] - 1, chunk.data[1])) else: # count == 3, [10] self.assertListEqual(chunk.data.tolist(), [10, ]) self.assertEqual(chunk.selection[0], slice(chunk.data[0] - 1, chunk.data[0])) count += 1 self.assertEqual(count, 4) self.assertTupleEqual(dci.recommended_data_shape(), (10,)) self.assertIsNone(dci.recommended_chunk_shape()) def test_start_with_none(self): a = [None, None, 3] dci = DataChunkIterator(a, buffer_size=2) self.assertTupleEqual(dci.maxshape, (3,)) self.assertEqual(dci.dtype, np.dtype(int)) count = 0 for chunk in dci: self.assertListEqual(chunk.data.tolist(), [3]) self.assertEqual(len(chunk.selection), 1) self.assertEqual(chunk.selection[0], slice(2, 3)) count += 1 self.assertEqual(count, 1) self.assertTupleEqual(dci.recommended_data_shape(), (3,)) self.assertIsNone(dci.recommended_chunk_shape()) def test_list_scalar(self): a = [3] dci = DataChunkIterator(a, buffer_size=2) self.assertTupleEqual(dci.maxshape, (1,)) self.assertEqual(dci.dtype, np.dtype(int)) count = 0 for chunk in dci: self.assertListEqual(chunk.data.tolist(), [3]) self.assertEqual(len(chunk.selection), 1) self.assertEqual(chunk.selection[0], slice(0, 1)) count += 1 self.assertEqual(count, 1) self.assertTupleEqual(dci.recommended_data_shape(), (1,)) self.assertIsNone(dci.recommended_chunk_shape()) def test_list_numpy_scalar(self): a = np.array([3]) dci = DataChunkIterator(a, buffer_size=2) self.assertTupleEqual(dci.maxshape, (1,)) self.assertEqual(dci.dtype, np.dtype(int)) count = 0 for chunk in dci: self.assertListEqual(chunk.data.tolist(), [3]) self.assertEqual(len(chunk.selection), 1) self.assertEqual(chunk.selection[0], slice(0, 1)) count += 1 self.assertEqual(count, 1) self.assertTupleEqual(dci.recommended_data_shape(), (1,)) self.assertIsNone(dci.recommended_chunk_shape()) def test_set_maxshape(self): a = np.array([3]) dci = DataChunkIterator(a, maxshape=(5, 2, 3), buffer_size=2) self.assertTupleEqual(dci.maxshape, (5, 2, 3)) self.assertEqual(dci.dtype, np.dtype(int)) count = 0 for chunk in dci: self.assertListEqual(chunk.data.tolist(), [3]) self.assertTupleEqual(chunk.selection, (slice(0, 1), slice(None), slice(None))) count += 1 self.assertEqual(count, 1) self.assertTupleEqual(dci.recommended_data_shape(), (5, 2, 3)) self.assertIsNone(dci.recommended_chunk_shape()) def test_custom_iter_first_axis(self): def my_iter(): count = 0 a = np.arange(30).reshape(5, 2, 3) while count < a.shape[0]: val = a[count, :, :] count = count + 1 yield val return dci = DataChunkIterator(data=my_iter(), buffer_size=2) count = 0 for chunk in dci: if count < 2: self.assertTupleEqual(chunk.shape, (2, 2, 3)) else: self.assertTupleEqual(chunk.shape, (1, 2, 3)) count += 1 self.assertEqual(count, 3) # self.assertTupleEqual(dci.recommended_data_shape(), (2, 2, 3)) self.assertIsNone(dci.recommended_chunk_shape()) def test_custom_iter_middle_axis(self): def my_iter(): count = 0 a = np.arange(45).reshape(5, 3, 3) while count < a.shape[1]: val = a[:, count, :] count = count + 1 yield val return dci = DataChunkIterator(data=my_iter(), buffer_size=2, iter_axis=1) count = 0 for chunk in dci: if count < 1: self.assertTupleEqual(chunk.shape, (5, 2, 3)) else: self.assertTupleEqual(chunk.shape, (5, 1, 3)) count += 1 self.assertEqual(count, 2) # self.assertTupleEqual(dci.recommended_data_shape(), (5, 2, 3)) self.assertIsNone(dci.recommended_chunk_shape()) def test_custom_iter_last_axis(self): def my_iter(): count = 0 a = np.arange(30).reshape(5, 2, 3) while count < a.shape[2]: val = a[:, :, count] count = count + 1 yield val return dci = DataChunkIterator(data=my_iter(), buffer_size=2, iter_axis=2) count = 0 for chunk in dci: if count < 1: self.assertTupleEqual(chunk.shape, (5, 2, 2)) else: self.assertTupleEqual(chunk.shape, (5, 2, 1)) count += 1 self.assertEqual(count, 2) # self.assertTupleEqual(dci.recommended_data_shape(), (5, 2, 2)) self.assertIsNone(dci.recommended_chunk_shape()) def test_custom_iter_mismatched_axis(self): def my_iter(): count = 0 a = np.arange(30).reshape(5, 2, 3) while count < a.shape[2]: val = a[:, :, count] count = count + 1 yield val return # iterator returns slices of size (5, 2) # because iter_axis is by default 0, these chunks will be placed along the first dimension dci = DataChunkIterator(data=my_iter(), buffer_size=2) count = 0 for chunk in dci: if count < 1: self.assertTupleEqual(chunk.shape, (2, 5, 2)) else: self.assertTupleEqual(chunk.shape, (1, 5, 2)) count += 1 self.assertEqual(count, 2) # self.assertTupleEqual(dci.recommended_data_shape(), (5, 2, 2)) self.assertIsNone(dci.recommended_chunk_shape()) class DataChunkTests(TestCase): def setUp(self): pass def tearDown(self): pass def test_len_operator_no_data(self): temp = DataChunk() self.assertEqual(len(temp), 0) def test_len_operator_with_data(self): temp = DataChunk(np.arange(10).reshape(5, 2)) self.assertEqual(len(temp), 5) def test_dtype(self): temp = DataChunk(np.arange(10).astype('int')) temp_dtype = temp.dtype self.assertEqual(temp_dtype, np.dtype('int')) def test_astype(self): temp1 = DataChunk(np.arange(10).reshape(5, 2)) temp2 = temp1.astype('float32') self.assertEqual(temp2.dtype, np.dtype('float32')) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/utils_test/test_core_DataIO.py0000644000655200065520000000331700000000000024074 0ustar00circlecicircleci00000000000000import numpy as np from copy import copy, deepcopy from hdmf.data_utils import DataIO from hdmf.container import Data from hdmf.testing import TestCase class DataIOTests(TestCase): def setUp(self): pass def tearDown(self): pass def test_copy(self): obj = DataIO(data=[1., 2., 3.]) obj_copy = copy(obj) self.assertNotEqual(id(obj), id(obj_copy)) self.assertEqual(id(obj.data), id(obj_copy.data)) def test_deepcopy(self): obj = DataIO(data=[1., 2., 3.]) obj_copy = deepcopy(obj) self.assertNotEqual(id(obj), id(obj_copy)) self.assertNotEqual(id(obj.data), id(obj_copy.data)) def test_dataio_slice_delegation(self): indata = np.arange(30) dset = DataIO(indata) self.assertTrue(np.all(dset[2:15] == indata[2:15])) indata = np.arange(50).reshape(5, 10) dset = DataIO(indata) self.assertTrue(np.all(dset[1:3, 5:8] == indata[1:3, 5:8])) def test_set_dataio(self): """ Test that Data.set_dataio works as intended """ dataio = DataIO() data = np.arange(30).reshape(5, 2, 3) container = Data('wrapped_data', data) container.set_dataio(dataio) self.assertIs(dataio.data, data) self.assertIs(dataio, container.data) def test_set_dataio_data_already_set(self): """ Test that Data.set_dataio works as intended """ dataio = DataIO(data=np.arange(30).reshape(5, 2, 3)) data = np.arange(30).reshape(5, 2, 3) container = Data('wrapped_data', data) with self.assertRaisesWith(ValueError, "cannot overwrite 'data' on DataIO"): container.set_dataio(dataio) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/utils_test/test_core_ShapeValidator.py0000644000655200065520000002227200000000000025702 0ustar00circlecicircleci00000000000000import numpy as np from hdmf.data_utils import ShapeValidatorResult, DataChunkIterator, assertEqualShape from hdmf.common.table import DynamicTable, DynamicTableRegion, VectorData from hdmf.testing import TestCase class ShapeValidatorTests(TestCase): def setUp(self): pass def tearDown(self): pass def test_array_all_dimensions_match(self): # Test match d1 = np.arange(10).reshape(2, 5) d2 = np.arange(10).reshape(2, 5) res = assertEqualShape(d1, d2) self.assertTrue(res.result) self.assertIsNone(res.error) self.assertTupleEqual(res.ignored, ()) self.assertTupleEqual(res.unmatched, ()) self.assertTupleEqual(res.shape1, (2, 5)) self.assertTupleEqual(res.shape2, (2, 5)) self.assertTupleEqual(res.axes1, (0, 1)) self.assertTupleEqual(res.axes2, (0, 1)) def test_array_dimensions_mismatch(self): # Test unmatched d1 = np.arange(10).reshape(2, 5) d2 = np.arange(10).reshape(5, 2) res = assertEqualShape(d1, d2) self.assertFalse(res.result) self.assertEqual(res.error, 'AXIS_LEN_ERROR') self.assertTupleEqual(res.ignored, ()) self.assertTupleEqual(res.unmatched, ((0, 0), (1, 1))) self.assertTupleEqual(res.shape1, (2, 5)) self.assertTupleEqual(res.shape2, (5, 2)) self.assertTupleEqual(res.axes1, (0, 1)) self.assertTupleEqual(res.axes2, (0, 1)) def test_array_unequal_number_of_dimensions(self): # Test unequal num dims d1 = np.arange(10).reshape(2, 5) d2 = np.arange(20).reshape(5, 2, 2) res = assertEqualShape(d1, d2) self.assertFalse(res.result) self.assertEqual(res.error, 'NUM_AXES_ERROR') self.assertTupleEqual(res.ignored, ()) self.assertTupleEqual(res.unmatched, ()) self.assertTupleEqual(res.shape1, (2, 5)) self.assertTupleEqual(res.shape2, (5, 2, 2)) self.assertTupleEqual(res.axes1, (0, 1)) self.assertTupleEqual(res.axes2, (0, 1, 2)) def test_array_unequal_number_of_dimensions_check_one_axis_only(self): # Test unequal num dims compare one axis d1 = np.arange(10).reshape(2, 5) d2 = np.arange(20).reshape(2, 5, 2) res = assertEqualShape(d1, d2, 0, 0) self.assertTrue(res.result) self.assertIsNone(res.error) self.assertTupleEqual(res.ignored, ()) self.assertTupleEqual(res.unmatched, ()) self.assertTupleEqual(res.shape1, (2, 5)) self.assertTupleEqual(res.shape2, (2, 5, 2)) self.assertTupleEqual(res.axes1, (0,)) self.assertTupleEqual(res.axes2, (0,)) def test_array_unequal_number_of_dimensions_check_multiple_axesy(self): # Test unequal num dims compare multiple axes d1 = np.arange(10).reshape(2, 5) d2 = np.arange(20).reshape(5, 2, 2) res = assertEqualShape(d1, d2, [0, 1], [1, 0]) self.assertTrue(res.result) self.assertIsNone(res.error) self.assertTupleEqual(res.ignored, ()) self.assertTupleEqual(res.unmatched, ()) self.assertTupleEqual(res.shape1, (2, 5)) self.assertTupleEqual(res.shape2, (5, 2, 2)) self.assertTupleEqual(res.axes1, (0, 1)) self.assertTupleEqual(res.axes2, (1, 0)) def test_array_unequal_number_of_axes_for_comparison(self): # Test unequal num axes for comparison d1 = np.arange(10).reshape(2, 5) d2 = np.arange(20).reshape(5, 2, 2) res = assertEqualShape(d1, d2, [0, 1], 1) self.assertFalse(res.result) self.assertEqual(res.error, "NUM_AXES_ERROR") self.assertTupleEqual(res.ignored, ()) self.assertTupleEqual(res.unmatched, ()) self.assertTupleEqual(res.shape1, (2, 5)) self.assertTupleEqual(res.shape2, (5, 2, 2)) self.assertTupleEqual(res.axes1, (0, 1)) self.assertTupleEqual(res.axes2, (1,)) def test_array_axis_index_out_of_bounds_single_axis(self): # Test too large frist axis d1 = np.arange(10).reshape(2, 5) d2 = np.arange(20).reshape(5, 2, 2) res = assertEqualShape(d1, d2, 4, 1) self.assertFalse(res.result) self.assertEqual(res.error, 'AXIS_OUT_OF_BOUNDS') self.assertTupleEqual(res.ignored, ()) self.assertTupleEqual(res.unmatched, ()) self.assertTupleEqual(res.shape1, (2, 5)) self.assertTupleEqual(res.shape2, (5, 2, 2)) self.assertTupleEqual(res.axes1, (4,)) self.assertTupleEqual(res.axes2, (1,)) def test_array_axis_index_out_of_bounds_mutilple_axis(self): # Test too large second axis d1 = np.arange(10).reshape(2, 5) d2 = np.arange(20).reshape(5, 2, 2) res = assertEqualShape(d1, d2, [0, 1], [5, 0]) self.assertFalse(res.result) self.assertEqual(res.error, 'AXIS_OUT_OF_BOUNDS') self.assertTupleEqual(res.ignored, ()) self.assertTupleEqual(res.unmatched, ()) self.assertTupleEqual(res.shape1, (2, 5)) self.assertTupleEqual(res.shape2, (5, 2, 2)) self.assertTupleEqual(res.axes1, (0, 1)) self.assertTupleEqual(res.axes2, (5, 0)) def test_DataChunkIterators_match(self): # Compare data chunk iterators d1 = DataChunkIterator(data=np.arange(10).reshape(2, 5)) d2 = DataChunkIterator(data=np.arange(10).reshape(2, 5)) res = assertEqualShape(d1, d2) self.assertTrue(res.result) self.assertIsNone(res.error) self.assertTupleEqual(res.ignored, ()) self.assertTupleEqual(res.unmatched, ()) self.assertTupleEqual(res.shape1, (2, 5)) self.assertTupleEqual(res.shape2, (2, 5)) self.assertTupleEqual(res.axes1, (0, 1)) self.assertTupleEqual(res.axes2, (0, 1)) def test_DataChunkIterator_ignore_undetermined_axis(self): # Compare data chunk iterators with undetermined axis (ignore axis) d1 = DataChunkIterator(data=np.arange(10).reshape(2, 5), maxshape=(None, 5)) d2 = DataChunkIterator(data=np.arange(10).reshape(2, 5)) res = assertEqualShape(d1, d2, ignore_undetermined=True) self.assertTrue(res.result) self.assertIsNone(res.error) self.assertTupleEqual(res.ignored, ((0, 0),)) self.assertTupleEqual(res.unmatched, ()) self.assertTupleEqual(res.shape1, (None, 5)) self.assertTupleEqual(res.shape2, (2, 5)) self.assertTupleEqual(res.axes1, (0, 1)) self.assertTupleEqual(res.axes2, (0, 1)) def test_DataChunkIterator_error_on_undetermined_axis(self): # Compare data chunk iterators with undetermined axis (error on undetermined axis) d1 = DataChunkIterator(data=np.arange(10).reshape(2, 5), maxshape=(None, 5)) d2 = DataChunkIterator(data=np.arange(10).reshape(2, 5)) res = assertEqualShape(d1, d2, ignore_undetermined=False) self.assertFalse(res.result) self.assertEqual(res.error, 'AXIS_LEN_ERROR') self.assertTupleEqual(res.ignored, ()) self.assertTupleEqual(res.unmatched, ((0, 0),)) self.assertTupleEqual(res.shape1, (None, 5)) self.assertTupleEqual(res.shape2, (2, 5)) self.assertTupleEqual(res.axes1, (0, 1)) self.assertTupleEqual(res.axes2, (0, 1)) def test_DynamicTableRegion_shape_validation(self): # Create a test DynamicTable dt_spec = [ {'name': 'foo', 'description': 'foo column'}, {'name': 'bar', 'description': 'bar column'}, {'name': 'baz', 'description': 'baz column'}, ] dt_data = [ [1, 2, 3, 4, 5], [10.0, 20.0, 30.0, 40.0, 50.0], ['cat', 'dog', 'bird', 'fish', 'lizard'] ] columns = [ VectorData(name=s['name'], description=s['description'], data=d) for s, d in zip(dt_spec, dt_data) ] dt = DynamicTable("with_columns_and_data", "a test table", columns=columns) # Create test DynamicTableRegion dtr = DynamicTableRegion('dtr', [1, 2, 2], 'desc', table=dt) # Confirm that the shapes match res = assertEqualShape(dtr, np.arange(9).reshape(3, 3)) self.assertTrue(res.result) def with_table_columns(self): cols = [VectorData(**d) for d in self.spec] table = DynamicTable("with_table_columns", 'a test table', columns=cols) return table def with_columns_and_data(self): return class ShapeValidatorResultTests(TestCase): def setUp(self): pass def tearDown(self): pass def test_default_message(self): temp = ShapeValidatorResult() temp.error = 'AXIS_LEN_ERROR' self.assertEqual(temp.default_message, ShapeValidatorResult.SHAPE_ERROR[temp.error]) def test_set_error_to_illegal_type(self): temp = ShapeValidatorResult() with self.assertRaises(ValueError): temp.error = 'MY_ILLEGAL_ERROR_TYPE' def test_ensure_use_of_tuples_during_asignment(self): temp = ShapeValidatorResult() temp_d = [1, 2] temp_cases = ['shape1', 'shape2', 'axes1', 'axes2', 'ignored', 'unmatched'] for var in temp_cases: setattr(temp, var, temp_d) self.assertIsInstance(getattr(temp, var), tuple, var) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/utils_test/test_docval.py0000644000655200065520000006755700000000000023253 0ustar00circlecicircleci00000000000000import numpy as np from hdmf.utils import docval, fmt_docval_args, get_docval, popargs from hdmf.testing import TestCase class MyTestClass(object): @docval({'name': 'arg1', 'type': str, 'doc': 'argument1 is a str'}) def basic_add(self, **kwargs): return kwargs @docval({'name': 'arg1', 'type': str, 'doc': 'argument1 is a str'}, {'name': 'arg2', 'type': int, 'doc': 'argument2 is a int'}) def basic_add2(self, **kwargs): return kwargs @docval({'name': 'arg1', 'type': str, 'doc': 'argument1 is a str'}, {'name': 'arg2', 'type': 'int', 'doc': 'argument2 is a int'}, {'name': 'arg3', 'type': bool, 'doc': 'argument3 is a bool. it defaults to False', 'default': False}) def basic_add2_kw(self, **kwargs): return kwargs @docval({'name': 'arg1', 'type': str, 'doc': 'argument1 is a str', 'default': 'a'}, {'name': 'arg2', 'type': int, 'doc': 'argument2 is a int', 'default': 1}) def basic_only_kw(self, **kwargs): return kwargs @docval({'name': 'arg1', 'type': str, 'doc': 'argument1 is a str'}, {'name': 'arg2', 'type': 'int', 'doc': 'argument2 is a int'}, {'name': 'arg3', 'type': bool, 'doc': 'argument3 is a bool. it defaults to False', 'default': False}, allow_extra=True) def basic_add2_kw_allow_extra(self, **kwargs): return kwargs class MyTestSubclass(MyTestClass): @docval({'name': 'arg1', 'type': str, 'doc': 'argument1 is a str'}, {'name': 'arg2', 'type': int, 'doc': 'argument2 is a int'}) def basic_add(self, **kwargs): return kwargs @docval({'name': 'arg1', 'type': str, 'doc': 'argument1 is a str'}, {'name': 'arg2', 'type': int, 'doc': 'argument2 is a int'}, {'name': 'arg3', 'type': bool, 'doc': 'argument3 is a bool. it defaults to False', 'default': False}, {'name': 'arg4', 'type': str, 'doc': 'argument4 is a str'}, {'name': 'arg5', 'type': 'float', 'doc': 'argument5 is a float'}, {'name': 'arg6', 'type': bool, 'doc': 'argument6 is a bool. it defaults to False', 'default': None}) def basic_add2_kw(self, **kwargs): return kwargs class MyChainClass(MyTestClass): @docval({'name': 'arg1', 'type': (str, 'MyChainClass'), 'doc': 'arg1 is a string or MyChainClass'}, {'name': 'arg2', 'type': ('array_data', 'MyChainClass'), 'doc': 'arg2 is array data or MyChainClass. it defaults to None', 'default': None}, {'name': 'arg3', 'type': ('array_data', 'MyChainClass'), 'doc': 'arg3 is array data or MyChainClass', 'shape': (None, 2)}, {'name': 'arg4', 'type': ('array_data', 'MyChainClass'), 'doc': 'arg3 is array data or MyChainClass. it defaults to None.', 'shape': (None, 2), 'default': None}) def __init__(self, **kwargs): self._arg1, self._arg2, self._arg3, self._arg4 = popargs('arg1', 'arg2', 'arg3', 'arg4', kwargs) @property def arg1(self): if isinstance(self._arg1, MyChainClass): return self._arg1.arg1 else: return self._arg1 @property def arg2(self): if isinstance(self._arg2, MyChainClass): return self._arg2.arg2 else: return self._arg2 @property def arg3(self): if isinstance(self._arg3, MyChainClass): return self._arg3.arg3 else: return self._arg3 @arg3.setter def arg3(self, val): self._arg3 = val @property def arg4(self): if isinstance(self._arg4, MyChainClass): return self._arg4.arg4 else: return self._arg4 @arg4.setter def arg4(self, val): self._arg4 = val class TestDocValidator(TestCase): def setUp(self): self.test_obj = MyTestClass() self.test_obj_sub = MyTestSubclass() def test_bad_type(self): exp_msg = (r"error parsing 'arg1' argument' : argtype must be a type, " r"a str, a list, a tuple, or None - got ") with self.assertRaisesRegex(Exception, exp_msg): @docval({'name': 'arg1', 'type': {'a': 1}, 'doc': 'this is a bad type'}) def method(self, **kwargs): pass method(self, arg1=1234560) def test_bad_shape(self): @docval({'name': 'arg1', 'type': 'array_data', 'doc': 'this is a bad shape', 'shape': (None, 2)}) def method(self, **kwargs): pass with self.assertRaises(ValueError): method(self, arg1=[[1]]) with self.assertRaises(ValueError): method(self, arg1=[1]) # this should work method(self, arg1=[[1, 1]]) def test_multi_shape(self): @docval({'name': 'arg1', 'type': 'array_data', 'doc': 'this is a bad shape', 'shape': ((None,), (None, 2))}) def method1(self, **kwargs): pass method1(self, arg1=[[1, 1]]) method1(self, arg1=[1, 2]) with self.assertRaises(ValueError): method1(self, arg1=[[1, 1, 1]]) def test_fmt_docval_args(self): """ Test that fmt_docval_args works """ test_kwargs = { 'arg1': 'a string', 'arg2': 1, 'arg3': True, } rec_args, rec_kwargs = fmt_docval_args(self.test_obj.basic_add2_kw, test_kwargs) exp_args = ['a string', 1] self.assertListEqual(rec_args, exp_args) exp_kwargs = {'arg3': True} self.assertDictEqual(rec_kwargs, exp_kwargs) def test_fmt_docval_args_no_docval(self): """ Test that fmt_docval_args raises an error when run on function without docval """ def method1(self, **kwargs): pass with self.assertRaisesRegex(ValueError, r"no docval found on .*method1.*"): fmt_docval_args(method1, {}) def test_fmt_docval_args_allow_extra(self): """ Test that fmt_docval_args works """ test_kwargs = { 'arg1': 'a string', 'arg2': 1, 'arg3': True, 'hello': 'abc', 'list': ['abc', 1, 2, 3] } rec_args, rec_kwargs = fmt_docval_args(self.test_obj.basic_add2_kw_allow_extra, test_kwargs) exp_args = ['a string', 1] self.assertListEqual(rec_args, exp_args) exp_kwargs = {'arg3': True, 'hello': 'abc', 'list': ['abc', 1, 2, 3]} self.assertDictEqual(rec_kwargs, exp_kwargs) def test_docval_add(self): """Test that docval works with a single positional argument """ kwargs = self.test_obj.basic_add('a string') self.assertDictEqual(kwargs, {'arg1': 'a string'}) def test_docval_add_kw(self): """Test that docval works with a single positional argument passed as key-value """ kwargs = self.test_obj.basic_add(arg1='a string') self.assertDictEqual(kwargs, {'arg1': 'a string'}) def test_docval_add_missing_args(self): """Test that docval catches missing argument with a single positional argument """ with self.assertRaisesWith(TypeError, "MyTestClass.basic_add: missing argument 'arg1'"): self.test_obj.basic_add() def test_docval_add2(self): """Test that docval works with two positional arguments """ kwargs = self.test_obj.basic_add2('a string', 100) self.assertDictEqual(kwargs, {'arg1': 'a string', 'arg2': 100}) def test_docval_add2_w_unicode(self): """Test that docval works with two positional arguments """ kwargs = self.test_obj.basic_add2(u'a string', 100) self.assertDictEqual(kwargs, {'arg1': u'a string', 'arg2': 100}) def test_docval_add2_kw_default(self): """Test that docval works with two positional arguments and a keyword argument when using default keyword argument value """ kwargs = self.test_obj.basic_add2_kw('a string', 100) self.assertDictEqual(kwargs, {'arg1': 'a string', 'arg2': 100, 'arg3': False}) def test_docval_add2_pos_as_kw(self): """Test that docval works with two positional arguments and a keyword argument when using default keyword argument value, but pass positional arguments by key-value """ kwargs = self.test_obj.basic_add2_kw(arg1='a string', arg2=100) self.assertDictEqual(kwargs, {'arg1': 'a string', 'arg2': 100, 'arg3': False}) def test_docval_add2_kw_kw_syntax(self): """Test that docval works with two positional arguments and a keyword argument when specifying keyword argument value with keyword syntax """ kwargs = self.test_obj.basic_add2_kw('a string', 100, arg3=True) self.assertDictEqual(kwargs, {'arg1': 'a string', 'arg2': 100, 'arg3': True}) def test_docval_add2_kw_all_kw_syntax(self): """Test that docval works with two positional arguments and a keyword argument when specifying all arguments by key-value """ kwargs = self.test_obj.basic_add2_kw(arg1='a string', arg2=100, arg3=True) self.assertDictEqual(kwargs, {'arg1': 'a string', 'arg2': 100, 'arg3': True}) def test_docval_add2_kw_pos_syntax(self): """Test that docval works with two positional arguments and a keyword argument when specifying keyword argument value with positional syntax """ kwargs = self.test_obj.basic_add2_kw('a string', 100, True) self.assertDictEqual(kwargs, {'arg1': 'a string', 'arg2': 100, 'arg3': True}) def test_docval_add2_kw_pos_syntax_missing_args(self): """Test that docval catches incorrect type with two positional arguments and a keyword argument when specifying keyword argument value with positional syntax """ msg = "MyTestClass.basic_add2_kw: incorrect type for 'arg2' (got 'str', expected 'int')" with self.assertRaisesWith(TypeError, msg): self.test_obj.basic_add2_kw('a string', 'bad string') def test_docval_add_sub(self): """Test that docval works with a two positional arguments, where the second is specified by the subclass implementation """ kwargs = self.test_obj_sub.basic_add('a string', 100) expected = {'arg1': 'a string', 'arg2': 100} self.assertDictEqual(kwargs, expected) def test_docval_add2_kw_default_sub(self): """Test that docval works with a four positional arguments and two keyword arguments, where two positional and one keyword argument is specified in both the parent and sublcass implementations """ kwargs = self.test_obj_sub.basic_add2_kw('a string', 100, 'another string', 200.0) expected = {'arg1': 'a string', 'arg2': 100, 'arg4': 'another string', 'arg5': 200.0, 'arg3': False, 'arg6': None} self.assertDictEqual(kwargs, expected) def test_docval_add2_kw_default_sub_missing_args(self): """Test that docval catches missing arguments with a four positional arguments and two keyword arguments, where two positional and one keyword argument is specified in both the parent and sublcass implementations, when using default values for keyword arguments """ with self.assertRaisesWith(TypeError, "MyTestSubclass.basic_add2_kw: missing argument 'arg5'"): self.test_obj_sub.basic_add2_kw('a string', 100, 'another string') def test_docval_add2_kw_kwsyntax_sub(self): """Test that docval works when called with a four positional arguments and two keyword arguments, where two positional and one keyword argument is specified in both the parent and sublcass implementations """ kwargs = self.test_obj_sub.basic_add2_kw('a string', 100, 'another string', 200.0, arg6=True) expected = {'arg1': 'a string', 'arg2': 100, 'arg4': 'another string', 'arg5': 200.0, 'arg3': False, 'arg6': True} self.assertDictEqual(kwargs, expected) def test_docval_add2_kw_kwsyntax_sub_missing_args(self): """Test that docval catches missing arguments when called with a four positional arguments and two keyword arguments, where two positional and one keyword argument is specified in both the parent and sublcass implementations """ with self.assertRaisesWith(TypeError, "MyTestSubclass.basic_add2_kw: missing argument 'arg5'"): self.test_obj_sub.basic_add2_kw('a string', 100, 'another string', arg6=True) def test_docval_add2_kw_kwsyntax_sub_nonetype_arg(self): """Test that docval catches NoneType when called with a four positional arguments and two keyword arguments, where two positional and one keyword argument is specified in both the parent and sublcass implementations """ msg = "MyTestSubclass.basic_add2_kw: None is not allowed for 'arg5' (expected 'float', not None)" with self.assertRaisesWith(TypeError, msg): self.test_obj_sub.basic_add2_kw('a string', 100, 'another string', None, arg6=True) def test_only_kw_no_args(self): """Test that docval parses arguments when only keyword arguments exist, and no arguments are specified """ kwargs = self.test_obj.basic_only_kw() self.assertDictEqual(kwargs, {'arg1': 'a', 'arg2': 1}) def test_only_kw_arg1_no_arg2(self): """Test that docval parses arguments when only keyword arguments exist, and only first argument is specified as key-value """ kwargs = self.test_obj.basic_only_kw(arg1='b') self.assertDictEqual(kwargs, {'arg1': 'b', 'arg2': 1}) def test_only_kw_arg1_pos_no_arg2(self): """Test that docval parses arguments when only keyword arguments exist, and only first argument is specified as positional argument """ kwargs = self.test_obj.basic_only_kw('b') self.assertDictEqual(kwargs, {'arg1': 'b', 'arg2': 1}) def test_only_kw_arg2_no_arg1(self): """Test that docval parses arguments when only keyword arguments exist, and only second argument is specified as key-value """ kwargs = self.test_obj.basic_only_kw(arg2=2) self.assertDictEqual(kwargs, {'arg1': 'a', 'arg2': 2}) def test_only_kw_arg1_arg2(self): """Test that docval parses arguments when only keyword arguments exist, and both arguments are specified as key-value """ kwargs = self.test_obj.basic_only_kw(arg1='b', arg2=2) self.assertDictEqual(kwargs, {'arg1': 'b', 'arg2': 2}) def test_only_kw_arg1_arg2_pos(self): """Test that docval parses arguments when only keyword arguments exist, and both arguments are specified as positional arguments """ kwargs = self.test_obj.basic_only_kw('b', 2) self.assertDictEqual(kwargs, {'arg1': 'b', 'arg2': 2}) def test_extra_kwarg(self): """Test that docval parses arguments when only keyword arguments exist, and both arguments are specified as positional arguments """ with self.assertRaises(TypeError): self.test_obj.basic_add2_kw('a string', 100, bar=1000) def test_extra_args_pos_only(self): """Test that docval raises an error if too many positional arguments are specified """ msg = ("MyTestClass.basic_add2_kw: Expected at most 3 arguments ['arg1', 'arg2', 'arg3'], got 4: 4 positional " "and 0 keyword []") with self.assertRaisesWith(TypeError, msg): self.test_obj.basic_add2_kw('a string', 100, True, 'extra') def test_extra_args_pos_kw(self): """Test that docval raises an error if too many positional arguments are specified and a keyword arg is specified """ msg = ("MyTestClass.basic_add2_kw: Expected at most 3 arguments ['arg1', 'arg2', 'arg3'], got 4: 3 positional " "and 1 keyword ['arg3']") with self.assertRaisesWith(TypeError, msg): self.test_obj.basic_add2_kw('a string', 'extra', 100, arg3=True) def test_extra_kwargs_pos_kw(self): """Test that docval raises an error if extra keyword arguments are specified """ msg = ("MyTestClass.basic_add2_kw: Expected at most 3 arguments ['arg1', 'arg2', 'arg3'], got 4: 2 positional " "and 2 keyword ['arg3', 'extra']") with self.assertRaisesWith(TypeError, msg): self.test_obj.basic_add2_kw('a string', 100, extra='extra', arg3=True) def test_extra_args_pos_only_ok(self): """Test that docval raises an error if too many positional arguments are specified even if allow_extra is True """ msg = ("MyTestClass.basic_add2_kw_allow_extra: Expected at most 3 arguments ['arg1', 'arg2', 'arg3'], got " "4 positional") with self.assertRaisesWith(TypeError, msg): self.test_obj.basic_add2_kw_allow_extra('a string', 100, True, 'extra', extra='extra') def test_extra_args_pos_kw_ok(self): """Test that docval does not raise an error if too many keyword arguments are specified and allow_extra is True """ kwargs = self.test_obj.basic_add2_kw_allow_extra('a string', 100, True, extra='extra') self.assertDictEqual(kwargs, {'arg1': 'a string', 'arg2': 100, 'arg3': True, 'extra': 'extra'}) def test_dup_kw(self): """Test that docval raises an error if a keyword argument captures a positional argument before all positional arguments have been resolved """ with self.assertRaisesWith(TypeError, "MyTestClass.basic_add2_kw: got multiple values for argument 'arg1'"): self.test_obj.basic_add2_kw('a string', 100, arg1='extra') def test_extra_args_dup_kw(self): """Test that docval raises an error if a keyword argument captures a positional argument before all positional arguments have been resolved and allow_extra is True """ msg = "MyTestClass.basic_add2_kw_allow_extra: got multiple values for argument 'arg1'" with self.assertRaisesWith(TypeError, msg): self.test_obj.basic_add2_kw_allow_extra('a string', 100, True, arg1='extra') def test_unsupported_docval_term(self): """Test that docval does not allow setting of arguments marked as unsupported """ @docval({'name': 'arg1', 'type': 'array_data', 'doc': 'this is a bad shape', 'unsupported': 'hi!'}) def method(self, **kwargs): pass with self.assertRaises(ValueError): method(self, arg1=[[1, 1]]) def test_catch_dup_names(self): """Test that docval does not allow duplicate argument names """ @docval({'name': 'arg1', 'type': 'array_data', 'doc': 'this is a bad shape'}, {'name': 'arg1', 'type': 'array_data', 'doc': 'this is a bad shape2'}) def method(self, **kwargs): pass msg = "TestDocValidator.test_catch_dup_names..method: The following names are duplicated: ['arg1']" with self.assertRaisesWith(ValueError, msg): method(self, arg1=[1]) def test_get_docval_all(self): """Test that get_docval returns a tuple of the docval arguments """ args = get_docval(self.test_obj.basic_add2) self.assertTupleEqual(args, ({'name': 'arg1', 'type': str, 'doc': 'argument1 is a str'}, {'name': 'arg2', 'type': int, 'doc': 'argument2 is a int'})) def test_get_docval_one_arg(self): """Test that get_docval returns the matching docval argument """ arg = get_docval(self.test_obj.basic_add2, 'arg2') self.assertTupleEqual(arg, ({'name': 'arg2', 'type': int, 'doc': 'argument2 is a int'},)) def test_get_docval_two_args(self): """Test that get_docval returns the matching docval arguments in order """ args = get_docval(self.test_obj.basic_add2, 'arg2', 'arg1') self.assertTupleEqual(args, ({'name': 'arg2', 'type': int, 'doc': 'argument2 is a int'}, {'name': 'arg1', 'type': str, 'doc': 'argument1 is a str'})) def test_get_docval_missing_arg(self): """Test that get_docval throws error if the matching docval argument is not found """ with self.assertRaisesWith(ValueError, "Function basic_add2 does not have docval argument 'arg3'"): get_docval(self.test_obj.basic_add2, 'arg3') def test_get_docval_missing_args(self): """Test that get_docval throws error if the matching docval arguments is not found """ with self.assertRaisesWith(ValueError, "Function basic_add2 does not have docval argument 'arg3'"): get_docval(self.test_obj.basic_add2, 'arg3', 'arg4') def test_get_docval_missing_arg_of_many_ok(self): """Test that get_docval throws error if the matching docval arguments is not found """ with self.assertRaisesWith(ValueError, "Function basic_add2 does not have docval argument 'arg3'"): get_docval(self.test_obj.basic_add2, 'arg2', 'arg3') def test_get_docval_none(self): """Test that get_docval returns an empty tuple if there is no docval """ args = get_docval(self.test_obj.__init__) self.assertTupleEqual(args, tuple()) def test_get_docval_none_arg(self): """Test that get_docval throws error if there is no docval and an argument name is passed """ with self.assertRaisesWith(ValueError, 'Function __init__ has no docval arguments'): get_docval(self.test_obj.__init__, 'arg3') def test_bool_type(self): @docval({'name': 'arg1', 'type': bool, 'doc': 'this is a bool'}) def method(self, **kwargs): return popargs('arg1', kwargs) res = method(self, arg1=True) self.assertEqual(res, True) self.assertIsInstance(res, bool) res = method(self, arg1=np.bool_(True)) self.assertEqual(res, np.bool_(True)) self.assertIsInstance(res, np.bool_) def test_bool_string_type(self): @docval({'name': 'arg1', 'type': 'bool', 'doc': 'this is a bool'}) def method(self, **kwargs): return popargs('arg1', kwargs) res = method(self, arg1=True) self.assertEqual(res, True) self.assertIsInstance(res, bool) res = method(self, arg1=np.bool_(True)) self.assertEqual(res, np.bool_(True)) self.assertIsInstance(res, np.bool_) class TestDocValidatorChain(TestCase): def setUp(self): self.obj1 = MyChainClass('base', [[1, 2], [3, 4], [5, 6]], [[10, 20]]) # note that self.obj1.arg3 == [[1, 2], [3, 4], [5, 6]] def test_type_arg(self): """Test that passing an object for an argument that allows a specific type works""" obj2 = MyChainClass(self.obj1, [[10, 20], [30, 40], [50, 60]], [[10, 20]]) self.assertEqual(obj2.arg1, 'base') def test_type_arg_wrong_type(self): """Test that passing an object for an argument that does not match a specific type raises an error""" err_msg = "MyChainClass.__init__: incorrect type for 'arg1' (got 'object', expected 'str or MyChainClass')" with self.assertRaisesWith(TypeError, err_msg): MyChainClass(object(), [[10, 20], [30, 40], [50, 60]], [[10, 20]]) def test_shape_valid_unpack(self): """Test that passing an object for an argument with required shape tests the shape of object.argument""" obj2 = MyChainClass(self.obj1, [[10, 20], [30, 40], [50, 60]], [[10, 20]]) obj3 = MyChainClass(self.obj1, obj2, [[100, 200]]) self.assertListEqual(obj3.arg3, obj2.arg3) def test_shape_invalid_unpack(self): """Test that passing an object for an argument with required shape and object.argument has an invalid shape raises an error""" obj2 = MyChainClass(self.obj1, [[10, 20], [30, 40], [50, 60]], [[10, 20]]) # change arg3 of obj2 to fail the required shape - contrived, but could happen because datasets can change # shape after an object is initialized obj2.arg3 = [10, 20, 30] err_msg = "MyChainClass.__init__: incorrect shape for 'arg3' (got '(3,)', expected '(None, 2)')" with self.assertRaisesWith(ValueError, err_msg): MyChainClass(self.obj1, obj2, [[100, 200]]) def test_shape_none_unpack(self): """Test that passing an object for an argument with required shape and object.argument is None is OK""" obj2 = MyChainClass(self.obj1, [[10, 20], [30, 40], [50, 60]], [[10, 20]]) obj2.arg3 = None obj3 = MyChainClass(self.obj1, obj2, [[100, 200]]) self.assertIsNone(obj3.arg3) def test_shape_other_unpack(self): """Test that passing an object for an argument with required shape and object.argument is an object without an argument attribute raises an error""" obj2 = MyChainClass(self.obj1, [[10, 20], [30, 40], [50, 60]], [[10, 20]]) obj2.arg3 = object() err_msg = (r"cannot check shape of object '' for argument 'arg3' " r"\(expected shape '\(None, 2\)'\)") with self.assertRaisesRegex(ValueError, err_msg): MyChainClass(self.obj1, obj2, [[100, 200]]) def test_shape_valid_unpack_default(self): """Test that passing an object for an argument with required shape and a default value tests the shape of object.argument""" obj2 = MyChainClass(self.obj1, [[10, 20], [30, 40], [50, 60]], arg4=[[10, 20]]) obj3 = MyChainClass(self.obj1, [[100, 200], [300, 400], [500, 600]], arg4=obj2) self.assertListEqual(obj3.arg4, obj2.arg4) def test_shape_invalid_unpack_default(self): """Test that passing an object for an argument with required shape and a default value and object.argument has an invalid shape raises an error""" obj2 = MyChainClass(self.obj1, [[10, 20], [30, 40], [50, 60]], arg4=[[10, 20]]) # change arg3 of obj2 to fail the required shape - contrived, but could happen because datasets can change # shape after an object is initialized obj2.arg4 = [10, 20, 30] err_msg = "MyChainClass.__init__: incorrect shape for 'arg4' (got '(3,)', expected '(None, 2)')" with self.assertRaisesWith(ValueError, err_msg): MyChainClass(self.obj1, [[100, 200], [300, 400], [500, 600]], arg4=obj2) def test_shape_none_unpack_default(self): """Test that passing an object for an argument with required shape and a default value and object.argument is an object without an argument attribute raises an error""" obj2 = MyChainClass(self.obj1, [[10, 20], [30, 40], [50, 60]], arg4=[[10, 20]]) # change arg3 of obj2 to fail the required shape - contrived, but could happen because datasets can change # shape after an object is initialized obj2.arg4 = None obj3 = MyChainClass(self.obj1, [[100, 200], [300, 400], [500, 600]], arg4=obj2) self.assertIsNone(obj3.arg4) def test_shape_other_unpack_default(self): """Test that passing an object for an argument with required shape and a default value and object.argument is None is OK""" obj2 = MyChainClass(self.obj1, [[10, 20], [30, 40], [50, 60]], arg4=[[10, 20]]) # change arg3 of obj2 to fail the required shape - contrived, but could happen because datasets can change # shape after an object is initialized obj2.arg4 = object() err_msg = (r"cannot check shape of object '' for argument 'arg4' " r"\(expected shape '\(None, 2\)'\)") with self.assertRaisesRegex(ValueError, err_msg): MyChainClass(self.obj1, [[100, 200], [300, 400], [500, 600]], arg4=obj2) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/utils_test/test_labelleddict.py0000644000655200065520000001603500000000000024374 0ustar00circlecicircleci00000000000000from hdmf.utils import LabelledDict from hdmf.testing import TestCase class MyTestClass: def __init__(self, prop1, prop2): self._prop1 = prop1 self._prop2 = prop2 @property def prop1(self): return self._prop1 @property def prop2(self): return self._prop2 class TestLabelledDict(TestCase): def test_constructor(self): """Test that constructor sets arguments properly""" ld = LabelledDict(label='all_objects', key_attr='prop1') self.assertEqual(ld.label, 'all_objects') self.assertEqual(ld.key_attr, 'prop1') def test_constructor_default(self): """Test that constructor sets default key attribute""" ld = LabelledDict(label='all_objects') self.assertEqual(ld.key_attr, 'name') def test_set_key_attr(self): """Test that the key attribute cannot be set after initialization""" ld = LabelledDict(label='all_objects') with self.assertRaisesWith(AttributeError, "can't set attribute"): ld.key_attr = 'another_name' def test_getitem_unknown_val(self): """Test that dict[val] raises an error if there are no matches for val""" ld = LabelledDict(label='all_objects', key_attr='prop1') with self.assertRaisesWith(KeyError, "'unknown_val'"): ld['unknown_val'] def test_getitem_eqeq_unknown_val(self): """Test that dict[key_attr == val] raises an error if there are no matches for val""" ld = LabelledDict(label='all_objects', key_attr='prop1') with self.assertRaisesWith(KeyError, "'unknown_val'"): ld['prop1 == unknown_val'] def test_getitem_eqeq_other_key_attr(self): """Test that dict[key_attr == val] raises an error if there are no matches for other_attr == val""" ld = LabelledDict(label='all_objects', key_attr='prop1') with self.assertRaisesWith(KeyError, "'unknown_val'"): ld['other_attr == unknown_val'] def test_getitem_eqeq_no_key_attr(self): """Test that dict[key_attr == val] raises an error if key_attr is not given""" ld = LabelledDict(label='all_objects', key_attr='prop1') with self.assertRaisesWith(ValueError, "An attribute name is required before '=='."): ld[' == unknown_key'] def test_getitem_eqeq_no_val(self): """Test that dict[key_attr == val] raises an error if val is not given""" ld = LabelledDict(label='all_objects', key_attr='prop1') with self.assertRaisesWith(ValueError, "A value is required after '=='."): ld['prop1 == '] def test_getitem_eqeq_no_key_attr_no_val(self): """Test that dict[key_attr == val] raises an error if key_attr is not given and val is not given""" ld = LabelledDict(label='all_objects', key_attr='prop1') with self.assertRaisesWith(ValueError, "An attribute name is required before '=='."): ld[' == '] def test_add_basic(self): """Test add function on object with correct key_attr""" ld = LabelledDict(label='all_objects', key_attr='prop1') obj1 = MyTestClass('a', 'b') ld.add(obj1) self.assertIs(ld['a'], obj1) def test_add_value_missing_key(self): """Test that add raises an error if the value being set does not have the attribute key_attr""" ld = LabelledDict(label='all_objects', key_attr='prop3') obj1 = MyTestClass('a', 'b') err_msg = r"Cannot set value '<.*>' in LabelledDict\. Value must have key 'prop3'\." with self.assertRaisesRegex(ValueError, err_msg): ld.add(obj1) def test_setitem_getitem_basic(self): """Test that setitem and getitem properly set and get the object""" ld = LabelledDict(label='all_objects', key_attr='prop1') obj1 = MyTestClass('a', 'b') ld.add(obj1) self.assertIs(ld['a'], obj1) def test_setitem_value_missing_key(self): """Test that setitem raises an error if the value being set does not have the attribute key_attr""" ld = LabelledDict(label='all_objects', key_attr='prop3') obj1 = MyTestClass('a', 'b') err_msg = r"Cannot set value '<.*>' in LabelledDict\. Value must have key 'prop3'\." with self.assertRaisesRegex(ValueError, err_msg): ld['a'] = obj1 def test_setitem_value_wrong_value(self): """Test that setitem raises an error if the value being set has a different value for attribute key_attr than the given key""" ld = LabelledDict(label='all_objects', key_attr='prop1') obj1 = MyTestClass('a', 'b') err_msg = r"Key 'b' must equal attribute 'prop1' of '<.*>'\." with self.assertRaisesRegex(KeyError, err_msg): ld['b'] = obj1 def test_addval_getitem_eqeq(self): """Test that dict[key_attr == val] returns the single matching object""" ld = LabelledDict(label='all_objects', key_attr='prop1') obj1 = MyTestClass('a', 'b') ld.add(obj1) self.assertIs(ld['prop1 == a'], obj1) def test_addval_getitem_eqeq_unknown_val(self): """Test that dict[key_attr == val] with an unknown val raises an error even if there are other objects in dict""" ld = LabelledDict(label='all_objects', key_attr='prop1') obj1 = MyTestClass('a', 'b') ld.add(obj1) with self.assertRaisesWith(KeyError, "'unknown_val'"): ld['prop1 == unknown_val'] def test_addval_getitem_eqeq_unknown_key_val(self): """Test that dict[key_attr == val] where prop3 does not match any objects in the dict raises an error""" ld = LabelledDict(label='all_objects', key_attr='prop1') obj1 = MyTestClass('a', 'b') ld['a'] = obj1 with self.assertRaisesWith(KeyError, "'unknown_val'"): ld['prop3 == unknown_val'] def test_addval_getitem_other_key(self): """Test that dict[other_key == val] returns a list of matching objects""" ld = LabelledDict(label='all_objects', key_attr='prop1') obj1 = MyTestClass('a', 'b') ld.add(obj1) self.assertSetEqual(ld['prop2 == b'], {obj1}) def test_addval_getitem_other_key_multi(self): """Test that dict[other_key == val] returns a list of matching objects""" ld = LabelledDict(label='all_objects', key_attr='prop1') obj1 = MyTestClass('a', 'b') obj2 = MyTestClass('d', 'b') obj3 = MyTestClass('f', 'e') ld.add(obj1) ld.add(obj2) ld.add(obj3) self.assertSetEqual(ld['prop2 == b'], set([obj1, obj2])) def test_addval_getitem_other_key_none(self): """Test that dict[other_key == val] raises an error if val does not equal any of the other_key attribute values in the dict, even when the other_key attribute exists""" ld = LabelledDict(label='all_objects', key_attr='prop1') obj1 = MyTestClass('a', 'b') obj2 = MyTestClass('d', 'b') obj3 = MyTestClass('f', 'e') ld.add(obj1) ld.add(obj2) ld.add(obj3) with self.assertRaisesWith(KeyError, "'c'"): ld['prop2 == c'] ././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1579654747.9241881 hdmf-1.5.4/tests/unit/validator_tests/0000755000655200065520000000000000000000000021356 5ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/validator_tests/__init__.py0000644000655200065520000000000000000000000023455 0ustar00circlecicircleci00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tests/unit/validator_tests/test_validate.py0000644000655200065520000002451600000000000024570 0ustar00circlecicircleci00000000000000from abc import ABCMeta, abstractmethod from datetime import datetime from dateutil.tz import tzlocal from hdmf.spec import GroupSpec, AttributeSpec, DatasetSpec, SpecCatalog, SpecNamespace from hdmf.build import GroupBuilder, DatasetBuilder from hdmf.validate import ValidatorMap from hdmf.validate.errors import * # noqa: F403 from hdmf.testing import TestCase CORE_NAMESPACE = 'test_core' class ValidatorTestBase(TestCase, metaclass=ABCMeta): def setUp(self): spec_catalog = SpecCatalog() for spec in self.getSpecs(): spec_catalog.register_spec(spec, 'test.yaml') self.namespace = SpecNamespace( 'a test namespace', CORE_NAMESPACE, [{'source': 'test.yaml'}], version='0.1.0', catalog=spec_catalog) self.vmap = ValidatorMap(self.namespace) @abstractmethod def getSpecs(self): pass class TestEmptySpec(ValidatorTestBase): def getSpecs(self): return (GroupSpec('A test group specification with a data type', data_type_def='Bar'),) def test_valid(self): builder = GroupBuilder('my_bar', attributes={'data_type': 'Bar'}) validator = self.vmap.get_validator('Bar') result = validator.validate(builder) self.assertEqual(len(result), 0) def test_invalid_missing_req_type(self): builder = GroupBuilder('my_bar') err_msg = r"builder must have data type defined with attribute '[A-Za-z_]+'" with self.assertRaisesRegex(ValueError, err_msg): self.vmap.validate(builder) class TestBasicSpec(ValidatorTestBase): def getSpecs(self): ret = GroupSpec('A test group specification with a data type', data_type_def='Bar', datasets=[DatasetSpec('an example dataset', 'int', name='data', attributes=[AttributeSpec( 'attr2', 'an example integer attribute', 'int')])], attributes=[AttributeSpec('attr1', 'an example string attribute', 'text')]) return (ret,) def test_invalid_missing(self): builder = GroupBuilder('my_bar', attributes={'data_type': 'Bar'}) validator = self.vmap.get_validator('Bar') result = validator.validate(builder) self.assertEqual(len(result), 2) self.assertIsInstance(result[0], MissingError) # noqa: F405 self.assertEqual(result[0].name, 'Bar/attr1') self.assertIsInstance(result[1], MissingError) # noqa: F405 self.assertEqual(result[1].name, 'Bar/data') def test_invalid_incorrect_type_get_validator(self): builder = GroupBuilder('my_bar', attributes={'data_type': 'Bar', 'attr1': 10}) validator = self.vmap.get_validator('Bar') result = validator.validate(builder) self.assertEqual(len(result), 2) self.assertIsInstance(result[0], DtypeError) # noqa: F405 self.assertEqual(result[0].name, 'Bar/attr1') self.assertIsInstance(result[1], MissingError) # noqa: F405 self.assertEqual(result[1].name, 'Bar/data') def test_invalid_incorrect_type_validate(self): builder = GroupBuilder('my_bar', attributes={'data_type': 'Bar', 'attr1': 10}) result = self.vmap.validate(builder) self.assertEqual(len(result), 2) self.assertIsInstance(result[0], DtypeError) # noqa: F405 self.assertEqual(result[0].name, 'Bar/attr1') self.assertIsInstance(result[1], MissingError) # noqa: F405 self.assertEqual(result[1].name, 'Bar/data') def test_valid(self): builder = GroupBuilder('my_bar', attributes={'data_type': 'Bar', 'attr1': 'a string attribute'}, datasets=[DatasetBuilder('data', 100, attributes={'attr2': 10})]) validator = self.vmap.get_validator('Bar') result = validator.validate(builder) self.assertEqual(len(result), 0) class TestDateTimeInSpec(ValidatorTestBase): def getSpecs(self): ret = GroupSpec('A test group specification with a data type', data_type_def='Bar', datasets=[DatasetSpec('an example dataset', 'int', name='data', attributes=[AttributeSpec( 'attr2', 'an example integer attribute', 'int')]), DatasetSpec('an example time dataset', 'isodatetime', name='time'), DatasetSpec('an array of times', 'isodatetime', name='time_array', dims=('num_times',), shape=(None,))], attributes=[AttributeSpec('attr1', 'an example string attribute', 'text')]) return (ret,) def test_valid_isodatetime(self): builder = GroupBuilder('my_bar', attributes={'data_type': 'Bar', 'attr1': 'a string attribute'}, datasets=[DatasetBuilder('data', 100, attributes={'attr2': 10}), DatasetBuilder('time', datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())), DatasetBuilder('time_array', [datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())])]) validator = self.vmap.get_validator('Bar') result = validator.validate(builder) self.assertEqual(len(result), 0) def test_invalid_isodatetime(self): builder = GroupBuilder('my_bar', attributes={'data_type': 'Bar', 'attr1': 'a string attribute'}, datasets=[DatasetBuilder('data', 100, attributes={'attr2': 10}), DatasetBuilder('time', 100), DatasetBuilder('time_array', [datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())])]) validator = self.vmap.get_validator('Bar') result = validator.validate(builder) self.assertEqual(len(result), 1) self.assertIsInstance(result[0], DtypeError) # noqa: F405 self.assertEqual(result[0].name, 'Bar/time') def test_invalid_isodatetime_array(self): builder = GroupBuilder('my_bar', attributes={'data_type': 'Bar', 'attr1': 'a string attribute'}, datasets=[DatasetBuilder('data', 100, attributes={'attr2': 10}), DatasetBuilder('time', datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())), DatasetBuilder('time_array', datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal()))]) validator = self.vmap.get_validator('Bar') result = validator.validate(builder) self.assertEqual(len(result), 1) self.assertIsInstance(result[0], ExpectedArrayError) # noqa: F405 self.assertEqual(result[0].name, 'Bar/time_array') class TestNestedTypes(ValidatorTestBase): def getSpecs(self): bar = GroupSpec('A test group specification with a data type', data_type_def='Bar', datasets=[DatasetSpec('an example dataset', 'int', name='data', attributes=[AttributeSpec('attr2', 'an example integer attribute', 'int')])], attributes=[AttributeSpec('attr1', 'an example string attribute', 'text')]) foo = GroupSpec('A test group that contains a data type', data_type_def='Foo', groups=[GroupSpec('A Bar group for Foos', name='my_bar', data_type_inc='Bar')], attributes=[AttributeSpec('foo_attr', 'a string attribute specified as text', 'text', required=False)]) return (bar, foo) def test_invalid_missing_req_group(self): foo_builder = GroupBuilder('my_foo', attributes={'data_type': 'Foo', 'foo_attr': 'example Foo object'}) results = self.vmap.validate(foo_builder) self.assertIsInstance(results[0], MissingDataType) # noqa: F405 self.assertEqual(results[0].name, 'Foo') self.assertEqual(results[0].reason, 'missing data type Bar') def test_invalid_wrong_name_req_type(self): bar_builder = GroupBuilder('bad_bar_name', attributes={'data_type': 'Bar', 'attr1': 'a string attribute'}, datasets=[DatasetBuilder('data', 100, attributes={'attr2': 10})]) foo_builder = GroupBuilder('my_foo', attributes={'data_type': 'Foo', 'foo_attr': 'example Foo object'}, groups=[bar_builder]) results = self.vmap.validate(foo_builder) self.assertEqual(len(results), 1) self.assertIsInstance(results[0], MissingDataType) # noqa: F405 self.assertEqual(results[0].data_type, 'Bar') def test_valid(self): bar_builder = GroupBuilder('my_bar', attributes={'data_type': 'Bar', 'attr1': 'a string attribute'}, datasets=[DatasetBuilder('data', 100, attributes={'attr2': 10})]) foo_builder = GroupBuilder('my_foo', attributes={'data_type': 'Foo', 'foo_attr': 'example Foo object'}, groups=[bar_builder]) results = self.vmap.validate(foo_builder) self.assertEqual(len(results), 0) def test_valid_wo_opt_attr(self): bar_builder = GroupBuilder('my_bar', attributes={'data_type': 'Bar', 'attr1': 'a string attribute'}, datasets=[DatasetBuilder('data', 100, attributes={'attr2': 10})]) foo_builder = GroupBuilder('my_foo', attributes={'data_type': 'Foo'}, groups=[bar_builder]) results = self.vmap.validate(foo_builder) self.assertEqual(len(results), 0) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/tox.ini0000644000655200065520000000604000000000000015341 0ustar00circlecicircleci00000000000000# Tox (https://tox.readthedocs.io/) is a tool for running tests # in multiple virtualenvs. This configuration file will run the # test suite on all supported python versions. To use it, "pip install tox" # and then run "tox" from this directory. [tox] envlist = py35, py36, py37, py38 [testenv] usedevelop = True setenv = PYTHONDONTWRITEBYTECODE = 1 install_command = pip install -U {opts} {packages} deps = -rrequirements-dev.txt -rrequirements.txt commands = pip check # Check for conflicting packages python test.py -v # Env to create coverage report locally [testenv:localcoverage] basepython = python3.8 commands = python -m coverage run test.py -u coverage html -d tests/coverage/htmlcov # Test with python 3.5, pinned dev reqs, and minimum run requirements [testenv:py35-min-req] basepython = python3.5 deps = -rrequirements-dev.txt -rrequirements-min.txt commands = {[testenv]commands} # Envs that builds wheels and source distribution [testenv:build] commands = python setup.py sdist python setup.py bdist_wheel [testenv:build-py35] basepython = python3.5 commands = {[testenv:build]commands} [testenv:build-py36] basepython = python3.6 commands = {[testenv:build]commands} [testenv:build-py37] basepython = python3.7 commands = {[testenv:build]commands} [testenv:build-py38] basepython = python3.8 commands = {[testenv:build]commands} [testenv:build-py35-min-req] basepython = python3.5 deps = -rrequirements-dev.txt -rrequirements-min.txt commands = {[testenv:build]commands} # Envs that will only be executed on CI that does coverage reporting [testenv:coverage] passenv = CODECOV_TOKEN basepython = python3.8 commands = python -m coverage run test.py -u coverage report -m codecov -X fix # Envs that will test installation from a wheel [testenv:wheelinstall-py35] deps = null commands = python -c "import hdmf" [testenv:wheelinstall-py36] deps = null commands = python -c "import hdmf" [testenv:wheelinstall-py37] deps = null commands = python -c "import hdmf" [testenv:wheelinstall-py38] deps = null commands = python -c "import hdmf" [testenv:wheelinstall-py35-min-req] deps = null commands = python -c "import hdmf" # Envs that will execute gallery tests [testenv:gallery] install_command = pip install -U {opts} {packages} deps = -rrequirements-dev.txt -rrequirements.txt -rrequirements-doc.txt commands = python test.py --example [testenv:gallery-py35] basepython = python3.5 deps = {[testenv:gallery]deps} commands = {[testenv:gallery]commands} [testenv:gallery-py36] basepython = python3.6 deps = {[testenv:gallery]deps} commands = {[testenv:gallery]commands} [testenv:gallery-py37] basepython = python3.7 deps = {[testenv:gallery]deps} commands = {[testenv:gallery]commands} [testenv:gallery-py38] basepython = python3.8 deps = {[testenv:gallery]deps} commands = {[testenv:gallery]commands} [testenv:gallery-py35-min-req] basepython = python3.5 deps = -rrequirements-dev.txt -rrequirements-min.txt -rrequirements-doc.txt commands = {[testenv:gallery]commands} ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1579654657.0 hdmf-1.5.4/versioneer.py0000644000655200065520000021321100000000000016561 0ustar00circlecicircleci00000000000000# flake8: noqa: C901 # Version: 0.18 """The Versioneer - like a rocketeer, but for versions. The Versioneer ============== * like a rocketeer, but for versions! * https://github.com/warner/python-versioneer * Brian Warner * License: Public Domain * Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, and pypy * [![Latest Version] (https://pypip.in/version/versioneer/badge.svg?style=flat) ](https://pypi.python.org/pypi/versioneer/) * [![Build Status] (https://travis-ci.org/warner/python-versioneer.png?branch=master) ](https://travis-ci.org/warner/python-versioneer) This is a tool for managing a recorded version number in distutils-based python projects. The goal is to remove the tedious and error-prone "update the embedded version string" step from your release process. Making a new release should be as easy as recording a new tag in your version-control system, and maybe making new tarballs. ## Quick Install * `pip install versioneer` to somewhere to your $PATH * add a `[versioneer]` section to your setup.cfg (see below) * run `versioneer install` in your source tree, commit the results ## Version Identifiers Source trees come from a variety of places: * a version-control system checkout (mostly used by developers) * a nightly tarball, produced by build automation * a snapshot tarball, produced by a web-based VCS browser, like github's "tarball from tag" feature * a release tarball, produced by "setup.py sdist", distributed through PyPI Within each source tree, the version identifier (either a string or a number, this tool is format-agnostic) can come from a variety of places: * ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows about recent "tags" and an absolute revision-id * the name of the directory into which the tarball was unpacked * an expanded VCS keyword ($Id$, etc) * a `_version.py` created by some earlier build step For released software, the version identifier is closely related to a VCS tag. Some projects use tag names that include more than just the version string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool needs to strip the tag prefix to extract the version identifier. For unreleased software (between tags), the version identifier should provide enough information to help developers recreate the same tree, while also giving them an idea of roughly how old the tree is (after version 1.2, before version 1.3). Many VCS systems can report a description that captures this, for example `git describe --tags --dirty --always` reports things like "0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the 0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has uncommitted changes. The version identifier is used for multiple purposes: * to allow the module to self-identify its version: `myproject.__version__` * to choose a name and prefix for a 'setup.py sdist' tarball ## Theory of Operation Versioneer works by adding a special `_version.py` file into your source tree, where your `__init__.py` can import it. This `_version.py` knows how to dynamically ask the VCS tool for version information at import time. `_version.py` also contains `$Revision$` markers, and the installation process marks `_version.py` to have this marker rewritten with a tag name during the `git archive` command. As a result, generated tarballs will contain enough information to get the proper version. To allow `setup.py` to compute a version too, a `versioneer.py` is added to the top level of your source tree, next to `setup.py` and the `setup.cfg` that configures it. This overrides several distutils/setuptools commands to compute the version when invoked, and changes `setup.py build` and `setup.py sdist` to replace `_version.py` with a small static file that contains just the generated version data. ## Installation See [INSTALL.md](./INSTALL.md) for detailed installation instructions. ## Version-String Flavors Code which uses Versioneer can learn about its version string at runtime by importing `_version` from your main `__init__.py` file and running the `get_versions()` function. From the "outside" (e.g. in `setup.py`), you can import the top-level `versioneer.py` and run `get_versions()`. Both functions return a dictionary with different flavors of version information: * `['version']`: A condensed version string, rendered using the selected style. This is the most commonly used value for the project's version string. The default "pep440" style yields strings like `0.11`, `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section below for alternative styles. * `['full-revisionid']`: detailed revision identifier. For Git, this is the full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". * `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the commit date in ISO 8601 format. This will be None if the date is not available. * `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that this is only accurate if run in a VCS checkout, otherwise it is likely to be False or None * `['error']`: if the version string could not be computed, this will be set to a string describing the problem, otherwise it will be None. It may be useful to throw an exception in setup.py if this is set, to avoid e.g. creating tarballs with a version string of "unknown". Some variants are more useful than others. Including `full-revisionid` in a bug report should allow developers to reconstruct the exact code being tested (or indicate the presence of local changes that should be shared with the developers). `version` is suitable for display in an "about" box or a CLI `--version` output: it can be easily compared against release notes and lists of bugs fixed in various releases. The installer adds the following text to your `__init__.py` to place a basic version in `YOURPROJECT.__version__`: from ._version import get_versions __version__ = get_versions()['version'] del get_versions ## Styles The setup.cfg `style=` configuration controls how the VCS information is rendered into a version string. The default style, "pep440", produces a PEP440-compliant string, equal to the un-prefixed tag name for actual releases, and containing an additional "local version" section with more detail for in-between builds. For Git, this is TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags --dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and that this commit is two revisions ("+2") beyond the "0.11" tag. For released software (exactly equal to a known tag), the identifier will only contain the stripped tag, e.g. "0.11". Other styles are available. See [details.md](details.md) in the Versioneer source tree for descriptions. ## Debugging Versioneer tries to avoid fatal errors: if something goes wrong, it will tend to return a version of "0+unknown". To investigate the problem, run `setup.py version`, which will run the version-lookup code in a verbose mode, and will display the full contents of `get_versions()` (including the `error` string, which may help identify what went wrong). ## Known Limitations Some situations are known to cause problems for Versioneer. This details the most significant ones. More can be found on Github [issues page](https://github.com/warner/python-versioneer/issues). ### Subprojects Versioneer has limited support for source trees in which `setup.py` is not in the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are two common reasons why `setup.py` might not be in the root: * Source trees which contain multiple subprojects, such as [Buildbot](https://github.com/buildbot/buildbot), which contains both "master" and "slave" subprojects, each with their own `setup.py`, `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI distributions (and upload multiple independently-installable tarballs). * Source trees whose main purpose is to contain a C library, but which also provide bindings to Python (and perhaps other languages) in subdirectories. Versioneer will look for `.git` in parent directories, and most operations should get the right version string. However `pip` and `setuptools` have bugs and implementation details which frequently cause `pip install .` from a subproject directory to fail to find a correct version string (so it usually defaults to `0+unknown`). `pip install --editable .` should work correctly. `setup.py install` might work too. Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in some later version. [Bug #38](https://github.com/warner/python-versioneer/issues/38) is tracking this issue. The discussion in [PR #61](https://github.com/warner/python-versioneer/pull/61) describes the issue from the Versioneer side in more detail. [pip PR#3176](https://github.com/pypa/pip/pull/3176) and [pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve pip to let Versioneer work correctly. Versioneer-0.16 and earlier only looked for a `.git` directory next to the `setup.cfg`, so subprojects were completely unsupported with those releases. ### Editable installs with setuptools <= 18.5 `setup.py develop` and `pip install --editable .` allow you to install a project into a virtualenv once, then continue editing the source code (and test) without re-installing after every change. "Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a convenient way to specify executable scripts that should be installed along with the python package. These both work as expected when using modern setuptools. When using setuptools-18.5 or earlier, however, certain operations will cause `pkg_resources.DistributionNotFound` errors when running the entrypoint script, which must be resolved by re-installing the package. This happens when the install happens with one version, then the egg_info data is regenerated while a different version is checked out. Many setup.py commands cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into a different virtualenv), so this can be surprising. [Bug #83](https://github.com/warner/python-versioneer/issues/83) describes this one, but upgrading to a newer version of setuptools should probably resolve it. ### Unicode version strings While Versioneer works (and is continually tested) with both Python 2 and Python 3, it is not entirely consistent with bytes-vs-unicode distinctions. Newer releases probably generate unicode version strings on py2. It's not clear that this is wrong, but it may be surprising for applications when then write these strings to a network connection or include them in bytes-oriented APIs like cryptographic checksums. [Bug #71](https://github.com/warner/python-versioneer/issues/71) investigates this question. ## Updating Versioneer To upgrade your project to a new release of Versioneer, do the following: * install the new Versioneer (`pip install -U versioneer` or equivalent) * edit `setup.cfg`, if necessary, to include any new configuration settings indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details. * re-run `versioneer install` in your source tree, to replace `SRC/_version.py` * commit any changed files ## Future Directions This tool is designed to make it easily extended to other version-control systems: all VCS-specific components are in separate directories like src/git/ . The top-level `versioneer.py` script is assembled from these components by running make-versioneer.py . In the future, make-versioneer.py will take a VCS name as an argument, and will construct a version of `versioneer.py` that is specific to the given VCS. It might also take the configuration arguments that are currently provided manually during installation by editing setup.py . Alternatively, it might go the other direction and include code from all supported VCS systems, reducing the number of intermediate scripts. ## License To make Versioneer easier to embed, all its code is dedicated to the public domain. The `_version.py` that it creates is also in the public domain. Specifically, both are released under the Creative Commons "Public Domain Dedication" license (CC0-1.0), as described in https://creativecommons.org/publicdomain/zero/1.0/ . """ from __future__ import print_function try: import configparser except ImportError: import ConfigParser as configparser import errno import fnmatch # HDMF import json import os import re import subprocess import sys class VersioneerConfig: """Container for Versioneer configuration parameters.""" def get_root(): """Get the project root directory. We require that all commands are run from the project root, i.e. the directory that contains setup.py, setup.cfg, and versioneer.py . """ root = os.path.realpath(os.path.abspath(os.getcwd())) setup_py = os.path.join(root, "setup.py") versioneer_py = os.path.join(root, "versioneer.py") if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): # allow 'python path/to/setup.py COMMAND' root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) setup_py = os.path.join(root, "setup.py") versioneer_py = os.path.join(root, "versioneer.py") if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): err = ("Versioneer was unable to run the project root directory. " "Versioneer requires setup.py to be executed from " "its immediate directory (like 'python setup.py COMMAND'), " "or in a way that lets it use sys.argv[0] to find the root " "(like 'python path/to/setup.py COMMAND').") raise VersioneerBadRootError(err) try: # Certain runtime workflows (setup.py install/develop in a setuptools # tree) execute all dependencies in a single python process, so # "versioneer" may be imported multiple times, and python's shared # module-import table will cache the first one. So we can't use # os.path.dirname(__file__), as that will find whichever # versioneer.py was first imported, even in later projects. me = os.path.realpath(os.path.abspath(__file__)) me_dir = os.path.normcase(os.path.splitext(me)[0]) vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) if me_dir != vsr_dir: print("Warning: build in %s is using versioneer.py from %s" % (os.path.dirname(me), versioneer_py)) except NameError: pass return root def get_config_from_root(root): """Read the project setup.cfg file to determine Versioneer config.""" # This might raise EnvironmentError (if setup.cfg is missing), or # configparser.NoSectionError (if it lacks a [versioneer] section), or # configparser.NoOptionError (if it lacks "VCS="). See the docstring at # the top of versioneer.py for instructions on writing your setup.cfg . setup_cfg = os.path.join(root, "setup.cfg") parser = configparser.SafeConfigParser() with open(setup_cfg, "r") as f: parser.readfp(f) VCS = parser.get("versioneer", "VCS") # mandatory def get(parser, name): if parser.has_option("versioneer", name): return parser.get("versioneer", name) return None cfg = VersioneerConfig() cfg.VCS = VCS cfg.style = get(parser, "style") or "" cfg.versionfile_source = get(parser, "versionfile_source") cfg.versionfile_build = get(parser, "versionfile_build") cfg.tag_prefix = get(parser, "tag_prefix") if cfg.tag_prefix in ("''", '""'): cfg.tag_prefix = "" cfg.parentdir_prefix = get(parser, "parentdir_prefix") cfg.verbose = get(parser, "verbose") return cfg class NotThisMethod(Exception): """Exception raised if a method is not valid for the current scenario.""" # these dictionaries contain VCS-specific tools LONG_VERSION_PY = {} HANDLERS = {} def register_vcs_handler(vcs, method): # decorator """Decorator to mark a method as the handler for a particular VCS.""" def decorate(f): """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f return decorate def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): """Call the given command(s).""" assert isinstance(commands, list) p = None for c in commands: try: dispcmd = str([c] + args) # remember shell=False, so use git.cmd on windows, not just git p = subprocess.Popen([c] + args, cwd=cwd, env=env, stdout=subprocess.PIPE, stderr=(subprocess.PIPE if hide_stderr else None)) break except EnvironmentError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue if verbose: print("unable to run %s" % dispcmd) print(e) return None, None else: if verbose: print("unable to find command, tried %s" % (commands,)) return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: stdout = stdout.decode() if p.returncode != 0: if verbose: print("unable to run %s (error)" % dispcmd) print("stdout was %s" % stdout) return None, p.returncode return stdout, p.returncode LONG_VERSION_PY['git'] = ''' # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. # This file is released into the public domain. Generated by # versioneer-0.18 (https://github.com/warner/python-versioneer) """Git implementation of _version.py.""" import errno import fnmatch # HDMF import os import re import subprocess import sys def get_keywords(): """Get the keywords needed to look up the version information.""" # these strings will be replaced by git during git-archive. # setup.py/versioneer.py will grep for the variable names, so they must # each be defined on a line of their own. _version.py will just call # get_keywords(). git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} return keywords class VersioneerConfig: """Container for Versioneer configuration parameters.""" def get_config(): """Create, populate and return the VersioneerConfig() object.""" # these strings are filled in when 'setup.py versioneer' creates # _version.py cfg = VersioneerConfig() cfg.VCS = "git" cfg.style = "%(STYLE)s" cfg.tag_prefix = "%(TAG_PREFIX)s" cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" cfg.verbose = False return cfg class NotThisMethod(Exception): """Exception raised if a method is not valid for the current scenario.""" LONG_VERSION_PY = {} HANDLERS = {} def register_vcs_handler(vcs, method): # decorator """Decorator to mark a method as the handler for a particular VCS.""" def decorate(f): """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f return decorate def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): """Call the given command(s).""" assert isinstance(commands, list) p = None for c in commands: try: dispcmd = str([c] + args) # remember shell=False, so use git.cmd on windows, not just git p = subprocess.Popen([c] + args, cwd=cwd, env=env, stdout=subprocess.PIPE, stderr=(subprocess.PIPE if hide_stderr else None)) break except EnvironmentError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue if verbose: print("unable to run %%s" %% dispcmd) print(e) return None, None else: if verbose: print("unable to find command, tried %%s" %% (commands,)) return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: stdout = stdout.decode() if p.returncode != 0: if verbose: print("unable to run %%s (error)" %% dispcmd) print("stdout was %%s" %% stdout) return None, p.returncode return stdout, p.returncode def versions_from_parentdir(parentdir_prefix, root, verbose): """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both the project name and a version string. We will also support searching up two directory levels for an appropriately named parent directory """ rootdirs = [] for i in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None, "date": None} else: rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: print("Tried directories %%s but none started with prefix %%s" %% (str(rootdirs), parentdir_prefix)) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") @register_vcs_handler("git", "get_keywords") def git_get_keywords(versionfile_abs): """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. keywords = {} try: f = open(versionfile_abs, "r") for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["full"] = mo.group(1) if line.strip().startswith("git_date ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["date"] = mo.group(1) f.close() except EnvironmentError: pass return keywords @register_vcs_handler("git", "keywords") def git_versions_from_keywords(keywords, tag_prefix, verbose): """Get version information from git keywords.""" if not keywords: raise NotThisMethod("no keywords at all, weird") date = keywords.get("date") if date is not None: # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 # -like" string, which we must then edit to make compliant), because # it's been around since git-1.5.3, and it's too difficult to # discover which version we're using, or to work around using an # older one. date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) refnames = keywords["refnames"].strip() if refnames.startswith("$Format"): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") refs = set([r.strip() for r in refnames.strip("()").split(",")]) # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %%d # expansion behaves like git log --decorate=short and strips out the # refs/heads/ and refs/tags/ prefixes that would let us distinguish # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". tags = set([r for r in refs if re.search(r'\d', r)]) if verbose: print("discarding '%%s', no digits" %% ",".join(refs - tags)) if verbose: print("likely tags: %%s" %% ",".join(sorted(tags))) for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" # HDMF: Support tag_prefix specified as a glob pattern tag_is_glob_pattern = "*" in tag_prefix if tag_is_glob_pattern: if fnmatch.fnmatch(ref, tag_prefix): r = ref if verbose: print("picking %s" % r) return {"version": r, "full-revisionid": keywords["full"].strip(), "dirty": False, "error": None, "date": date} else: if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] if verbose: print("picking %s" % r) return {"version": r, "full-revisionid": keywords["full"].strip(), "dirty": False, "error": None, "date": date} # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") return {"version": "0+unknown", "full-revisionid": keywords["full"].strip(), "dirty": False, "error": "no suitable tags", "date": None} @register_vcs_handler("git", "pieces_from_vcs") def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* expanded, and _version.py hasn't already been rewritten with a short version string, meaning we're inside a checked out source tree. """ GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) if rc != 0: if verbose: print("Directory %%s not under git control" %% root) raise NotThisMethod("'git rev-parse --git-dir' returned error") # HDMF: Support tag_prefix specified as a glob pattern tag_is_glob_pattern = "*" in tag_prefix match_argument = tag_prefix if not tag_is_glob_pattern: match_argument = tag_prefix + "*" # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", "--always", "--long", "--match", "%s" % match_argument], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() pieces = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out # look for -dirty suffix dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: git_describe = git_describe[:git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? pieces["error"] = ("unable to parse git-describe output: '%%s'" %% describe_out) return pieces # tag full_tag = mo.group(1) # HDMF: Support tag_prefix specified as a glob pattern if tag_is_glob_pattern: if not fnmatch.fnmatch(full_tag, tag_prefix): if verbose: fmt = "tag '%%s' doesn't match glob pattern '%%s'" print(fmt %% (full_tag, tag_prefix)) pieces["error"] = ("tag '%%s' doesn't match glob pattern '%%s'" %% (full_tag, tag_prefix)) return pieces pieces["closest-tag"] = full_tag else: if not full_tag.startswith(tag_prefix): if verbose: fmt = "tag '%%s' doesn't start with prefix '%%s'" print(fmt %% (full_tag, tag_prefix)) pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" %% (full_tag, tag_prefix)) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix):] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) # commit: short hex revision ID pieces["short"] = mo.group(3) else: # HEX: no tags pieces["closest-tag"] = None count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) pieces["distance"] = int(count_out) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip() pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces def plus_or_dot(pieces): """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" def render_pep440(pieces): """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty Exceptions: 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += plus_or_dot(pieces) rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered def render_pep440_pre(pieces): """TAG[.post.devDISTANCE] -- No -dirty. Exceptions: 1: no tags. 0.post.devDISTANCE """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += ".post.dev%%d" %% pieces["distance"] else: # exception #1 rendered = "0.post.dev%%d" %% pieces["distance"] return rendered def render_pep440_post(pieces): """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards (a dirty tree will appear "older" than the corresponding clean one), but you shouldn't be releasing software with -dirty anyways. Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "g%%s" %% pieces["short"] else: # exception #1 rendered = "0.post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += "+g%%s" %% pieces["short"] return rendered def render_pep440_old(pieces): """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. Eexceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" else: # exception #1 rendered = "0.post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" return rendered def render_git_describe(pieces): """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render_git_describe_long(pieces): """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. The distance/hash is unconditional. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render(pieces, style): """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", "full-revisionid": pieces.get("long"), "dirty": None, "error": pieces["error"], "date": None} if not style or style == "default": style = "pep440" # the default if style == "pep440": rendered = render_pep440(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": rendered = render_git_describe(pieces) elif style == "git-describe-long": rendered = render_git_describe_long(pieces) else: raise ValueError("unknown style '%%s'" %% style) return {"version": rendered, "full-revisionid": pieces["long"], "dirty": pieces["dirty"], "error": None, "date": pieces.get("date")} def get_versions(): """Get version information or return default if unable to do so.""" # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # __file__, we can work backwards from there to the root. Some # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which # case we can only use expanded keywords. cfg = get_config() verbose = cfg.verbose try: return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) except NotThisMethod: pass try: root = os.path.realpath(__file__) # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. for i in cfg.versionfile_source.split('/'): root = os.path.dirname(root) except NameError: return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to find root of source tree", "date": None} try: pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) return render(pieces, cfg.style) except NotThisMethod: pass try: if cfg.parentdir_prefix: return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) except NotThisMethod: pass return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to compute version", "date": None} ''' @register_vcs_handler("git", "get_keywords") def git_get_keywords(versionfile_abs): """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. keywords = {} try: f = open(versionfile_abs, "r") for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["full"] = mo.group(1) if line.strip().startswith("git_date ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["date"] = mo.group(1) f.close() except EnvironmentError: pass return keywords @register_vcs_handler("git", "keywords") def git_versions_from_keywords(keywords, tag_prefix, verbose): """Get version information from git keywords.""" if not keywords: raise NotThisMethod("no keywords at all, weird") date = keywords.get("date") if date is not None: # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 # -like" string, which we must then edit to make compliant), because # it's been around since git-1.5.3, and it's too difficult to # discover which version we're using, or to work around using an # older one. date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) refnames = keywords["refnames"].strip() if refnames.startswith("$Format"): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") refs = set([r.strip() for r in refnames.strip("()").split(",")]) # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d # expansion behaves like git log --decorate=short and strips out the # refs/heads/ and refs/tags/ prefixes that would let us distinguish # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". tags = set([r for r in refs if re.search(r'\d', r)]) if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: print("likely tags: %s" % ",".join(sorted(tags))) for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" # HDMF: Support tag_prefix specified as a glob pattern tag_is_glob_pattern = "*" in tag_prefix if tag_is_glob_pattern: if fnmatch.fnmatch(ref, tag_prefix): r = ref if verbose: print("picking %s" % r) return {"version": r, "full-revisionid": keywords["full"].strip(), "dirty": False, "error": None, "date": date} else: if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] if verbose: print("picking %s" % r) return {"version": r, "full-revisionid": keywords["full"].strip(), "dirty": False, "error": None, "date": date} # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") return {"version": "0+unknown", "full-revisionid": keywords["full"].strip(), "dirty": False, "error": "no suitable tags", "date": None} @register_vcs_handler("git", "pieces_from_vcs") def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* expanded, and _version.py hasn't already been rewritten with a short version string, meaning we're inside a checked out source tree. """ GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) if rc != 0: if verbose: print("Directory %s not under git control" % root) raise NotThisMethod("'git rev-parse --git-dir' returned error") # HDMF: Support tag_prefix specified as a glob pattern tag_is_glob_pattern = "*" in tag_prefix match_argument = tag_prefix if not tag_is_glob_pattern: match_argument = tag_prefix + "*" # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", "--always", "--long", "--match", "%s" % match_argument], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() pieces = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out # look for -dirty suffix dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: git_describe = git_describe[:git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? pieces["error"] = ("unable to parse git-describe output: '%s'" % describe_out) return pieces # tag full_tag = mo.group(1) # HDMF: Support tag_prefix specified as a glob pattern if tag_is_glob_pattern: if not fnmatch.fnmatch(full_tag, tag_prefix): if verbose: fmt = "tag '%s' doesn't match glob pattern '%s'" print(fmt % (full_tag, tag_prefix)) pieces["error"] = ("tag '%s' doesn't match glob pattern '%s'" % (full_tag, tag_prefix)) return pieces pieces["closest-tag"] = full_tag else: if not full_tag.startswith(tag_prefix): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" % (full_tag, tag_prefix)) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix):] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) # commit: short hex revision ID pieces["short"] = mo.group(3) else: # HEX: no tags pieces["closest-tag"] = None count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) pieces["distance"] = int(count_out) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces def do_vcs_install(manifest_in, versionfile_source, ipy): """Git-specific installation logic for Versioneer. For Git, this means creating/changing .gitattributes to mark _version.py for export-subst keyword substitution. """ GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] files = [manifest_in, versionfile_source] if ipy: files.append(ipy) try: me = __file__ if me.endswith(".pyc") or me.endswith(".pyo"): me = os.path.splitext(me)[0] + ".py" versioneer_file = os.path.relpath(me) except NameError: versioneer_file = "versioneer.py" files.append(versioneer_file) present = False try: f = open(".gitattributes", "r") for line in f.readlines(): if line.strip().startswith(versionfile_source): if "export-subst" in line.strip().split()[1:]: present = True f.close() except EnvironmentError: pass if not present: f = open(".gitattributes", "a+") f.write("%s export-subst\n" % versionfile_source) f.close() files.append(".gitattributes") run_command(GITS, ["add", "--"] + files) def versions_from_parentdir(parentdir_prefix, root, verbose): """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both the project name and a version string. We will also support searching up two directory levels for an appropriately named parent directory """ rootdirs = [] for i in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None, "date": None} else: rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: print("Tried directories %s but none started with prefix %s" % (str(rootdirs), parentdir_prefix)) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") SHORT_VERSION_PY = """ # This file was generated by 'versioneer.py' (0.18) from # revision-control system data, or from the parent directory name of an # unpacked source archive. Distribution tarballs contain a pre-generated copy # of this file. import json version_json = ''' %s ''' # END VERSION_JSON def get_versions(): return json.loads(version_json) """ def versions_from_file(filename): """Try to determine the version from _version.py if present.""" try: with open(filename) as f: contents = f.read() except EnvironmentError: raise NotThisMethod("unable to read _version.py") mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", contents, re.M | re.S) if not mo: mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", contents, re.M | re.S) if not mo: raise NotThisMethod("no version_json in _version.py") return json.loads(mo.group(1)) def write_to_version_file(filename, versions): """Write the given version number to the given _version.py file.""" os.unlink(filename) contents = json.dumps(versions, sort_keys=True, indent=1, separators=(",", ": ")) with open(filename, "w") as f: f.write(SHORT_VERSION_PY % contents) print("set %s to '%s'" % (filename, versions["version"])) def plus_or_dot(pieces): """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" def render_pep440(pieces): """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty Exceptions: 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += plus_or_dot(pieces) rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered def render_pep440_pre(pieces): """TAG[.post.devDISTANCE] -- No -dirty. Exceptions: 1: no tags. 0.post.devDISTANCE """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += ".post.dev%d" % pieces["distance"] else: # exception #1 rendered = "0.post.dev%d" % pieces["distance"] return rendered def render_pep440_post(pieces): """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards (a dirty tree will appear "older" than the corresponding clean one), but you shouldn't be releasing software with -dirty anyways. Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "g%s" % pieces["short"] else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += "+g%s" % pieces["short"] return rendered def render_pep440_old(pieces): """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. Eexceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" return rendered def render_git_describe(pieces): """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render_git_describe_long(pieces): """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. The distance/hash is unconditional. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render(pieces, style): """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", "full-revisionid": pieces.get("long"), "dirty": None, "error": pieces["error"], "date": None} if not style or style == "default": style = "pep440" # the default if style == "pep440": rendered = render_pep440(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": rendered = render_git_describe(pieces) elif style == "git-describe-long": rendered = render_git_describe_long(pieces) else: raise ValueError("unknown style '%s'" % style) return {"version": rendered, "full-revisionid": pieces["long"], "dirty": pieces["dirty"], "error": None, "date": pieces.get("date")} class VersioneerBadRootError(Exception): """The project root directory is unknown or missing key files.""" def get_versions(verbose=False): """Get the project version from whatever source is available. Returns dict with two keys: 'version' and 'full'. """ if "versioneer" in sys.modules: # see the discussion in cmdclass.py:get_cmdclass() del sys.modules["versioneer"] root = get_root() cfg = get_config_from_root(root) assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" handlers = HANDLERS.get(cfg.VCS) assert handlers, "unrecognized VCS '%s'" % cfg.VCS verbose = verbose or cfg.verbose assert cfg.versionfile_source is not None, \ "please set versioneer.versionfile_source" assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" versionfile_abs = os.path.join(root, cfg.versionfile_source) # extract version from first of: _version.py, VCS command (e.g. 'git # describe'), parentdir. This is meant to work for developers using a # source checkout, for users of a tarball created by 'setup.py sdist', # and for users of a tarball/zipball created by 'git archive' or github's # download-from-tag feature or the equivalent in other VCSes. get_keywords_f = handlers.get("get_keywords") from_keywords_f = handlers.get("keywords") if get_keywords_f and from_keywords_f: try: keywords = get_keywords_f(versionfile_abs) ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) if verbose: print("got version from expanded keyword %s" % ver) return ver except NotThisMethod: pass try: ver = versions_from_file(versionfile_abs) if verbose: print("got version from file %s %s" % (versionfile_abs, ver)) return ver except NotThisMethod: pass from_vcs_f = handlers.get("pieces_from_vcs") if from_vcs_f: try: pieces = from_vcs_f(cfg.tag_prefix, root, verbose) ver = render(pieces, cfg.style) if verbose: print("got version from VCS %s" % ver) return ver except NotThisMethod: pass try: if cfg.parentdir_prefix: ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) if verbose: print("got version from parentdir %s" % ver) return ver except NotThisMethod: pass if verbose: print("unable to compute version") return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to compute version", "date": None} def get_version(): """Get the short version string for this project.""" return get_versions()["version"] def get_cmdclass(): """Get the custom setuptools/distutils subclasses used by Versioneer.""" if "versioneer" in sys.modules: del sys.modules["versioneer"] # this fixes the "python setup.py develop" case (also 'install' and # 'easy_install .'), in which subdependencies of the main project are # built (using setup.py bdist_egg) in the same python process. Assume # a main project A and a dependency B, which use different versions # of Versioneer. A's setup.py imports A's Versioneer, leaving it in # sys.modules by the time B's setup.py is executed, causing B to run # with the wrong versioneer. Setuptools wraps the sub-dep builds in a # sandbox that restores sys.modules to it's pre-build state, so the # parent is protected against the child's "import versioneer". By # removing ourselves from sys.modules here, before the child build # happens, we protect the child from the parent's versioneer too. # Also see https://github.com/warner/python-versioneer/issues/52 cmds = {} # we add "version" to both distutils and setuptools from distutils.core import Command class cmd_version(Command): description = "report generated version string" user_options = [] boolean_options = [] def initialize_options(self): pass def finalize_options(self): pass def run(self): vers = get_versions(verbose=True) print("Version: %s" % vers["version"]) print(" full-revisionid: %s" % vers.get("full-revisionid")) print(" dirty: %s" % vers.get("dirty")) print(" date: %s" % vers.get("date")) if vers["error"]: print(" error: %s" % vers["error"]) cmds["version"] = cmd_version # we override "build_py" in both distutils and setuptools # # most invocation pathways end up running build_py: # distutils/build -> build_py # distutils/install -> distutils/build ->.. # setuptools/bdist_wheel -> distutils/install ->.. # setuptools/bdist_egg -> distutils/install_lib -> build_py # setuptools/install -> bdist_egg ->.. # setuptools/develop -> ? # pip install: # copies source tree to a tempdir before running egg_info/etc # if .git isn't copied too, 'git describe' will fail # then does setup.py bdist_wheel, or sometimes setup.py install # setup.py egg_info -> ? # we override different "build_py" commands for both environments if "setuptools" in sys.modules: from setuptools.command.build_py import build_py as _build_py else: from distutils.command.build_py import build_py as _build_py class cmd_build_py(_build_py): def run(self): root = get_root() cfg = get_config_from_root(root) versions = get_versions() _build_py.run(self) # now locate _version.py in the new build/ directory and replace # it with an updated value if cfg.versionfile_build: target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build) print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) cmds["build_py"] = cmd_build_py if "cx_Freeze" in sys.modules: # cx_freeze enabled? from cx_Freeze.dist import build_exe as _build_exe # nczeczulin reports that py2exe won't like the pep440-style string # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. # setup(console=[{ # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION # "product_version": versioneer.get_version(), # ... class cmd_build_exe(_build_exe): def run(self): root = get_root() cfg = get_config_from_root(root) versions = get_versions() target_versionfile = cfg.versionfile_source print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) _build_exe.run(self) os.unlink(target_versionfile) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] f.write(LONG % {"DOLLAR": "$", "STYLE": cfg.style, "TAG_PREFIX": cfg.tag_prefix, "PARENTDIR_PREFIX": cfg.parentdir_prefix, "VERSIONFILE_SOURCE": cfg.versionfile_source, }) cmds["build_exe"] = cmd_build_exe del cmds["build_py"] if 'py2exe' in sys.modules: # py2exe enabled? try: from py2exe.distutils_buildexe import py2exe as _py2exe # py3 except ImportError: from py2exe.build_exe import py2exe as _py2exe # py2 class cmd_py2exe(_py2exe): def run(self): root = get_root() cfg = get_config_from_root(root) versions = get_versions() target_versionfile = cfg.versionfile_source print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) _py2exe.run(self) os.unlink(target_versionfile) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] f.write(LONG % {"DOLLAR": "$", "STYLE": cfg.style, "TAG_PREFIX": cfg.tag_prefix, "PARENTDIR_PREFIX": cfg.parentdir_prefix, "VERSIONFILE_SOURCE": cfg.versionfile_source, }) cmds["py2exe"] = cmd_py2exe # we override different "sdist" commands for both environments if "setuptools" in sys.modules: from setuptools.command.sdist import sdist as _sdist else: from distutils.command.sdist import sdist as _sdist class cmd_sdist(_sdist): def run(self): versions = get_versions() self._versioneer_generated_versions = versions # unless we update this, the command will keep using the old # version self.distribution.metadata.version = versions["version"] return _sdist.run(self) def make_release_tree(self, base_dir, files): root = get_root() cfg = get_config_from_root(root) _sdist.make_release_tree(self, base_dir, files) # now locate _version.py in the new base_dir directory # (remembering that it may be a hardlink) and replace it with an # updated value target_versionfile = os.path.join(base_dir, cfg.versionfile_source) print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, self._versioneer_generated_versions) cmds["sdist"] = cmd_sdist return cmds CONFIG_ERROR = """ setup.cfg is missing the necessary Versioneer configuration. You need a section like: [versioneer] VCS = git style = pep440 versionfile_source = src/myproject/_version.py versionfile_build = myproject/_version.py tag_prefix = parentdir_prefix = myproject- You will also need to edit your setup.py to use the results: import versioneer setup(version=versioneer.get_version(), cmdclass=versioneer.get_cmdclass(), ...) Please read the docstring in ./versioneer.py for configuration instructions, edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. """ SAMPLE_CONFIG = """ # See the docstring in versioneer.py for instructions. Note that you must # re-run 'versioneer.py setup' after changing this section, and commit the # resulting files. [versioneer] #VCS = git #style = pep440 #versionfile_source = #versionfile_build = #tag_prefix = #parentdir_prefix = """ INIT_PY_SNIPPET = """ from ._version import get_versions __version__ = get_versions()['version'] del get_versions """ def do_setup(): """Main VCS-independent setup function for installing Versioneer.""" root = get_root() try: cfg = get_config_from_root(root) except (EnvironmentError, configparser.NoSectionError, configparser.NoOptionError) as e: if isinstance(e, (EnvironmentError, configparser.NoSectionError)): print("Adding sample versioneer config to setup.cfg", file=sys.stderr) with open(os.path.join(root, "setup.cfg"), "a") as f: f.write(SAMPLE_CONFIG) print(CONFIG_ERROR, file=sys.stderr) return 1 print(" creating %s" % cfg.versionfile_source) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] f.write(LONG % {"DOLLAR": "$", "STYLE": cfg.style, "TAG_PREFIX": cfg.tag_prefix, "PARENTDIR_PREFIX": cfg.parentdir_prefix, "VERSIONFILE_SOURCE": cfg.versionfile_source, }) ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") if os.path.exists(ipy): try: with open(ipy, "r") as f: old = f.read() except EnvironmentError: old = "" if INIT_PY_SNIPPET not in old: print(" appending to %s" % ipy) with open(ipy, "a") as f: f.write(INIT_PY_SNIPPET) else: print(" %s unmodified" % ipy) else: print(" %s doesn't exist, ok" % ipy) ipy = None # Make sure both the top-level "versioneer.py" and versionfile_source # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so # they'll be copied into source distributions. Pip won't be able to # install the package without this. manifest_in = os.path.join(root, "MANIFEST.in") simple_includes = set() try: with open(manifest_in, "r") as f: for line in f: if line.startswith("include "): for include in line.split()[1:]: simple_includes.add(include) except EnvironmentError: pass # That doesn't cover everything MANIFEST.in can do # (http://docs.python.org/2/distutils/sourcedist.html#commands), so # it might give some false negatives. Appending redundant 'include' # lines is safe, though. if "versioneer.py" not in simple_includes: print(" appending 'versioneer.py' to MANIFEST.in") with open(manifest_in, "a") as f: f.write("include versioneer.py\n") else: print(" 'versioneer.py' already in MANIFEST.in") if cfg.versionfile_source not in simple_includes: print(" appending versionfile_source ('%s') to MANIFEST.in" % cfg.versionfile_source) with open(manifest_in, "a") as f: f.write("include %s\n" % cfg.versionfile_source) else: print(" versionfile_source already in MANIFEST.in") # Make VCS-specific changes. For git, this means creating/changing # .gitattributes to mark _version.py for export-subst keyword # substitution. do_vcs_install(manifest_in, cfg.versionfile_source, ipy) return 0 def scan_setup_py(): """Validate the contents of setup.py against Versioneer's expectations.""" found = set() setters = False errors = 0 with open("setup.py", "r") as f: for line in f.readlines(): if "import versioneer" in line: found.add("import") if "versioneer.get_cmdclass()" in line: found.add("cmdclass") if "versioneer.get_version()" in line: found.add("get_version") if "versioneer.VCS" in line: setters = True if "versioneer.versionfile_source" in line: setters = True if len(found) != 3: print("") print("Your setup.py appears to be missing some important items") print("(but I might be wrong). Please make sure it has something") print("roughly like the following:") print("") print(" import versioneer") print(" setup( version=versioneer.get_version(),") print(" cmdclass=versioneer.get_cmdclass(), ...)") print("") errors += 1 if setters: print("You should remove lines like 'versioneer.VCS = ' and") print("'versioneer.versionfile_source = ' . This configuration") print("now lives in setup.cfg, and should be removed from setup.py") print("") errors += 1 return errors if __name__ == "__main__": cmd = sys.argv[1] if cmd == "setup": errors = do_setup() errors += scan_setup_py() if errors: sys.exit(1)