pax_global_header00006660000000000000000000000064146256467430014533gustar00rootroot0000000000000052 comment=d20e8de5f825ca2e7a38e0438dbeed84bd4ec616 gsd-3.3.0/000077500000000000000000000000001462564674300123135ustar00rootroot00000000000000gsd-3.3.0/.bumpversion.cfg000066400000000000000000000004651462564674300154300ustar00rootroot00000000000000[bumpversion] current_version = 3.3.0 commit = False tag = False [bumpversion:file:pyproject.toml] [bumpversion:file:gsd/version.py] [bumpversion:file:gsd/pygsd.py] [bumpversion:file:Doxyfile] [bumpversion:file:.github/ISSUE_TEMPLATE/bug_report.yml] [bumpversion:file:.github/ISSUE_TEMPLATE/release.md] gsd-3.3.0/.clang-format000066400000000000000000000016661462564674300146770ustar00rootroot00000000000000--- BasedOnStyle: WebKit AccessModifierOffset: 0 AlignAfterOpenBracket: Align AlignEscapedNewlines: 'Left' AlignOperands: 'true' AlignTrailingComments: 'true' AllowAllArgumentsOnNextLine: 'false' AllowAllParametersOfDeclarationOnNextLine: 'false' AllowShortFunctionsOnASingleLine: Empty AllowShortIfStatementsOnASingleLine: 'false' AllowShortLoopsOnASingleLine: 'false' BinPackArguments: 'false' BinPackParameters: 'false' BreakBeforeBraces: Whitesmiths BreakConstructorInitializers: BeforeColon ColumnLimit: '100' CompactNamespaces: 'true' Cpp11BracedListStyle: 'true' FixNamespaceComments: 'true' IndentWidth: '4' KeepEmptyLinesAtTheStartOfBlocks: false Language: Cpp NamespaceIndentation: None PointerAlignment: Left SortIncludes: 'true' SpaceAfterCStyleCast: 'false' SpaceAfterTemplateKeyword: 'false' SpaceBeforeParens: ControlStatements SpacesInAngles: 'false' SpaceInEmptyParentheses: false Standard: Cpp11 TabWidth: '4' UseTab: Never ... gsd-3.3.0/.clang-tidy000066400000000000000000000011051462564674300143440ustar00rootroot00000000000000Checks: 'bugprone-*, cert-*, clang-analyzer-*, cppcoreguidlines-*, misc-*, modernize-*, performance-*, readability-*, -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling, -modernize-use-trailing-return-type, -readability-identifier-length, -bugprone-reserved-identifier, -bugprone-easily-swappable-parameters, -readability-function-cognitive-complexity, -cert-dcl37-c, -cert-dcl51-cpp, -bugprone-narrowing-conversions' gsd-3.3.0/.github/000077500000000000000000000000001462564674300136535ustar00rootroot00000000000000gsd-3.3.0/.github/CODEOWNERS000066400000000000000000000005461462564674300152530ustar00rootroot00000000000000# by default, assign a maintainer and a contributor to review a pull request * @glotzerlab/hoomd-maintainers @glotzerlab/hoomd-reviewers # For changes to github settings, workflows, and CI, only assign maintainers .github @glotzerlab/hoomd-maintainers .pre-commit-config.yaml @glotzerlab/hoomd-maintainers doc/requirements.txt @glotzerlab/hoomd-maintainers gsd-3.3.0/.github/ISSUE_TEMPLATE/000077500000000000000000000000001462564674300160365ustar00rootroot00000000000000gsd-3.3.0/.github/ISSUE_TEMPLATE/bug_report.yml000066400000000000000000000041041462564674300207300ustar00rootroot00000000000000name: Bug report description: Report a problem with GSD. labels: ['bug'] body: - type: textarea attributes: label: Description description: Describe the problem. validations: required: true - type: textarea attributes: label: Script description: | The *minimal* script that demonstrates the problem. The script should depend only on GSD and its dependencies so that developers can run it. placeholder: | import gsd ... render: python validations: required: true - type: textarea attributes: label: Input files description: Attach any input files needed to run the script. - type: textarea attributes: label: Output description: | What output did you get? render: shell validations: required: true - type: textarea attributes: label: Expected output description: What output do you expect? validations: required: true - type: dropdown attributes: label: Platform description: What platform are you using? Select all that apply. multiple: true options: - Linux - macOS - Windows validations: required: true - type: dropdown attributes: label: Installation method description: How did you install GSD? options: - Compiled from source - Conda-forge package - PyPI package validations: required: true - type: input attributes: label: GSD description: | What version of GSD are you using? placeholder: 3.3.0 validations: required: true - type: markdown attributes: value: | Run `python3 -c "import gsd; print(gsd.version.version)"` to find your GSD version. - type: input attributes: label: Python version description: What version of Python are you using? placeholder: 3.11.3 validations: required: true - type: markdown attributes: value: | Run `python3 --version` to find your Python version. gsd-3.3.0/.github/ISSUE_TEMPLATE/config.yml000066400000000000000000000004211462564674300200230ustar00rootroot00000000000000blank_issues_enabled: true contact_links: - name: GSD discussion board url: https://github.com/glotzerlab/gsd/discussions/ about: Ask the GSD user community for help. - name: GSD documentation url: https://gsd.readthedocs.io/ about: User documentation. gsd-3.3.0/.github/ISSUE_TEMPLATE/feature_request.yml000066400000000000000000000015311462564674300217640ustar00rootroot00000000000000name: Feature request description: Suggest a new GSD feature labels: ['enhancement'] body: - type: textarea attributes: label: Description description: What new capability would you like in GSD? validations: required: true - type: textarea attributes: label: Proposed solution description: How should this capability be implemented? What might the user API look like? validations: required: true - type: textarea attributes: label: Additional context description: What additional information is helpful to understand this request? - type: markdown attributes: value: | We would welcome your contribution! If you plan to implement this functionality, the GSD developers will discuss the proposed design and API with you **before** you begin work. gsd-3.3.0/.github/ISSUE_TEMPLATE/release.md000066400000000000000000000013011462564674300177730ustar00rootroot00000000000000--- name: Release checklist about: '[for maintainer use]' title: 'Release gsd 3.3.0' labels: '' assignees: 'joaander' --- Release checklist: - [ ] Run *bumpversion*. - [ ] Review the change log. - [ ] Check for new or duplicate contributors since the last release: `comm -13 (git log $(git describe --tags --abbrev=0) --format="%aN <%aE>" | sort | uniq | psub) (git log --format="%aN <%aE>" | sort | uniq | psub)`. Add entries to `.mailmap` to remove duplicates. - [ ] Check readthedocs build, especially change log formatting. - [Build status](https://readthedocs.org/projects/gsd/builds/) - [Output](https://gsd.readthedocs.io/en/latest/) - [ ] Tag and push. - [ ] Update conda-forge recipe. gsd-3.3.0/.github/PULL_REQUEST_TEMPLATE.md000066400000000000000000000022001462564674300174460ustar00rootroot00000000000000 ## Description ## Motivation and Context Resolves #??? ## How Has This Been Tested? ## Checklist: - [ ] I have reviewed the [**Contributor Guidelines**](https://github.com/glotzerlab/gsd/blob/trunk-patch/CONTRIBUTING.rst). - [ ] I agree with the terms of the [**GSD Contributor Agreement**](https://github.com/glotzerlab/gsd/blob/trunk-patch/ContributorAgreement.md). - [ ] My name is on the list of contributors (`doc/credits.rst`) in the pull request source branch. - [ ] I have added a change log entry to ``CHANGELOG.rst``. gsd-3.3.0/.github/dependabot.yml000066400000000000000000000017271462564674300165120ustar00rootroot00000000000000version: 2 updates: - package-ecosystem: "github-actions" directory: "/" target-branch: trunk-patch schedule: interval: "monthly" time: "07:00" timezone: "EST5EDT" pull-request-branch-name: separator: "-" open-pull-requests-limit: 2 reviewers: - joaander groups: actions-version: applies-to: version-updates patterns: - '*' actions-security: applies-to: security-updates patterns: - '*' - package-ecosystem: "pip" directory: "/" target-branch: trunk-patch schedule: interval: "monthly" time: "07:00" timezone: "EST5EDT" pull-request-branch-name: separator: "-" open-pull-requests-limit: 2 reviewers: - joaander groups: pip-version: applies-to: version-updates patterns: - '*' update-types: - minor - patch pip-security: applies-to: security-updates patterns: - '*' update-types: - minor - patch gsd-3.3.0/.github/requirements-build-sdist.in000066400000000000000000000000171462564674300211450ustar00rootroot00000000000000build == 1.1.1 gsd-3.3.0/.github/requirements-build-sdist.txt000066400000000000000000000004111462564674300213540ustar00rootroot00000000000000# # This file is autogenerated by pip-compile with Python 3.12 # by the following command: # # pip-compile requirements-build-sdist.in # build==1.1.1 # via -r requirements-build-sdist.in packaging==24.0 # via build pyproject-hooks==1.1.0 # via build gsd-3.3.0/.github/requirements-test.in000066400000000000000000000000601462564674300176770ustar00rootroot00000000000000cython == 3.0.8 numpy == 1.26.4 pytest == 8.2.0 gsd-3.3.0/.github/requirements-test.txt000066400000000000000000000005671462564674300201240ustar00rootroot00000000000000# # This file is autogenerated by pip-compile with Python 3.12 # by the following command: # # pip-compile requirements-test.in # cython==3.0.8 # via -r requirements-test.in iniconfig==2.0.0 # via pytest numpy==1.26.4 # via -r requirements-test.in packaging==24.0 # via pytest pluggy==1.5.0 # via pytest pytest==8.2.0 # via -r requirements-test.in gsd-3.3.0/.github/workflows/000077500000000000000000000000001462564674300157105ustar00rootroot00000000000000gsd-3.3.0/.github/workflows/build_wheels.yaml000066400000000000000000000064631462564674300212530ustar00rootroot00000000000000name: PyPI concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true on: pull_request: push: branches: - "trunk-*" tags: - "v*" workflow_dispatch: env: UV_VERSION: 0.2.2 jobs: build_wheels: name: Build wheels [${{ matrix.python.version }}, ${{ matrix.os.base }}-${{ matrix.os.arch }}] runs-on: ${{ matrix.os.base }}-${{ matrix.os.version }} strategy: fail-fast: false matrix: os: - base: ubuntu version: latest arch: 'x86_64' - base: windows version: latest arch: 'amd64' - base: macos version: 14 arch: 'arm64' - base: macos version: 13 arch: 'x86_64' python: - version: 'cp39' oldest_numpy: '1.21.6' - version: 'cp310' oldest_numpy: '1.21.6' - version: 'cp311' oldest_numpy: '1.23.2' - version: 'cp312' oldest_numpy: '1.26.2' steps: - uses: actions/checkout@v4.1.6 - name: Build wheels uses: pypa/cibuildwheel@v2.18.1 env: CIBW_BUILD: "${{ matrix.python.version }}-*" CIBW_TEST_REQUIRES: pytest==8.2.1 numpy==${{ matrix.python.oldest_numpy }} - uses: actions/upload-artifact@v4.3.3 with: name: dist-python-${{ matrix.python.version }}-${{ matrix.os.base }}-${{ matrix.os.arch }} path: ./wheelhouse/*.whl build_sdist: name: Build source distribution runs-on: ubuntu-latest steps: - uses: actions/checkout@v4.1.6 - uses: actions/setup-python@v5.1.0 name: Install Python with: python-version: '3.12' - name: Install uv run: curl --proto '=https' --tlsv1.2 -LsSf https://github.com/astral-sh/uv/releases/download/${{ env.UV_VERSION }}/uv-installer.sh | bash - name: Install build run: 'uv pip install -r .github/requirements-build-sdist.txt --only-binary :all: --system --reinstall' - name: Build sdist run: python -m build --sdist --outdir dist/ . - uses: actions/upload-artifact@v4.3.3 with: name: dist-sdist path: dist/*.tar.gz upload_pypi: name: Publish [PyPI] needs: [build_wheels, build_sdist] runs-on: ubuntu-latest steps: - name: Download artifacts uses: actions/download-artifact@v4.1.7 with: merge-multiple: 'true' pattern: dist-* path: dist - name: Check files run: ls -lR dist - name: Upload to PyPI # upload to PyPI on every tag starting with 'v' if: startsWith(github.ref, 'refs/tags/v') uses: pypa/gh-action-pypi-publish@v1.8.14 with: user: __token__ password: ${{ secrets.PYPI_API_TOKEN }} - name: Upload to TestPyPI # otherwise, upload to TestPyPi if: ${{ !startsWith(github.ref, 'refs/tags/v') && (github.event_name != 'pull_request' || (github.event.pull_request.head.repo.full_name == github.repository && github.actor != 'dependabot[bot]')) }} uses: pypa/gh-action-pypi-publish@v1.8.14 with: user: __token__ password: ${{ secrets.TEST_PYPI_API_TOKEN }} repository-url: https://test.pypi.org/legacy/ skip-existing: true gsd-3.3.0/.github/workflows/make-changelog-md.sh000077500000000000000000000003541462564674300215110ustar00rootroot00000000000000#!/bin/bash set -euo pipefail tag=$(echo "$1" | sed -e 's/\./\\\./g') pcregrep -M "^${tag}.*\n\^\^\^\^+.*\n(.*\n)+?(\^\^\^\^+|^---+)$" CHANGELOG.rst \ | tail -n +3 \ | head -n -2 \ | pandoc --from=rst --to=markdown --wrap=none gsd-3.3.0/.github/workflows/release.yaml000066400000000000000000000042771462564674300202260ustar00rootroot00000000000000name: GitHub Release concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true on: pull_request: push: branches: - "trunk-*" tags: - "v*" workflow_dispatch: env: name: gsd defaults: run: shell: bash jobs: release: name: Build release tarball runs-on: ubuntu-latest steps: - uses: actions/checkout@v4.1.6 with: fetch-depth: 0 submodules: true path: code - name: Install tools run: sudo apt-get install pcregrep pandoc - name: Determine last tag via git describe if: ${{ ! startsWith(github.ref, 'refs/tags/v') }} run: echo tag="$(git describe --abbrev=0)" >> "$GITHUB_ENV" working-directory: code # git describe does not return the current tag in tag pushes on GitHub Actions, use GITHUB_REF instead - name: Determine tag from GITHUB_REF if: startsWith(github.ref, 'refs/tags/v') run: echo tag="$(echo "${GITHUB_REF}" | sed -e 's/refs\/tags\///g')" >> "$GITHUB_ENV" - name: Write version change log run: .github/workflows/make-changelog-md.sh "${tag:1}" | tee "${GITHUB_WORKSPACE}/changelog.md" working-directory: code - name: Copy source run: cp -R code "${name}-${tag:1}" - name: Remove .git run: rm -rf "${name}-${tag:1}/.git" && ls -laR "${name}-${tag:1}" - name: Tar source run: tar -cvzf "${name}-${tag:1}.tar.gz" "${name}-${tag:1}" - name: Tar source run: tar -cvJf "${name}-${tag:1}.tar.xz" "${name}-${tag:1}" - uses: actions/upload-artifact@v4.3.3 with: name: release path: | *.tar.* changelog.md publish: name: Publish [GitHub] needs: [release] runs-on: ubuntu-latest steps: - name: Download artifacts uses: actions/download-artifact@v4.1.7 with: name: release - name: Create release uses: softprops/action-gh-release@v2 if: startsWith(github.ref, 'refs/tags/v') with: files: "*.tar.*" body_path: changelog.md env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} gsd-3.3.0/.github/workflows/stale.yaml000066400000000000000000000021001462564674300176750ustar00rootroot00000000000000name: Close stale issues and PRs on: schedule: - cron: '0 19 * * *' workflow_dispatch: jobs: stale: runs-on: ubuntu-latest steps: - uses: actions/stale@v9.0.0 with: days-before-close: 10 stale-issue-label: stale stale-pr-label: stale exempt-issue-labels: essential exempt-pr-labels: essential days-before-issue-stale: 170 stale-issue-message: > This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. close-issue-message: > This issue has been automatically closed because it has not had recent activity. days-before-pr-stale: 20 stale-pr-message: > This pull request has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. close-pr-message: > This pull request has been automatically closed because it has not had recent activity. gsd-3.3.0/.github/workflows/style_check.yaml000066400000000000000000000023611462564674300210730ustar00rootroot00000000000000name: Style check concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true on: pull_request: push: branches: - "trunk-*" workflow_dispatch: env: UV_VERSION: 0.2.2 jobs: clang-tidy: name: Run clang-tidy runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v4.1.6 - name: Set up Python uses: actions/setup-python@v5.1.0 with: python-version: 3.12 - name: Install uv run: curl --proto '=https' --tlsv1.2 -LsSf https://github.com/astral-sh/uv/releases/download/${{ env.UV_VERSION }}/uv-installer.sh | bash - name: Install prereqs run: 'uv pip install -r .github/requirements-test.txt --only-binary :all: --system --reinstall' - name: Configure run: cmake -B build - name: Execute clang-tidy run: clang-tidy-14 -p=build gsd/*.c gsd/*.h scripts/*.cc --quiet --warnings-as-errors="*" # This job is used to provide a single requirement for branch merge conditions. checks_complete: name: Style check if: always() needs: [clang-tidy] runs-on: ubuntu-latest steps: - run: jq --exit-status 'all(.result == "success")' <<< '${{ toJson(needs) }}' - name: Done run: exit 0 gsd-3.3.0/.github/workflows/unit_test.yaml000066400000000000000000000077231462564674300206230ustar00rootroot00000000000000name: Unit test concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true on: pull_request: push: branches: - "trunk-*" workflow_dispatch: defaults: run: shell: bash env: UV_VERSION: 0.2.2 jobs: unit_test: name: Unit test [py${{ matrix.python }} ${{ matrix.c_compiler }} ${{ matrix.os }}] runs-on: ${{ matrix.os }} strategy: matrix: include: ############## # Windows - os: windows-2019 python: '3.12' - os: windows-2022 python: '3.12' ############## # Mac # macos-x86_64 - os: macos-12 python: '3.12' # macos-arm64 - os: macos-14 python: '3.12' ############## # Ubuntu 24.04 - os: ubuntu-24.04 python: '3.9' c_compiler: gcc-13 cxx_compiler: g++-14 - os: ubuntu-24.04 python: '3.10' c_compiler: gcc-14 cxx_compiler: g++-14 - os: ubuntu-24.04 python: '3.11' c_compiler: clang-16 cxx_compiler: clang++-16 - os: ubuntu-24.04 python: '3.12' c_compiler: clang-17 cxx_compiler: clang++-17 - os: ubuntu-24.04 python: '3.12' c_compiler: clang-18 cxx_compiler: clang++-18 ############## # Ubuntu 22.04 - os: ubuntu-22.04 python: '3.9' c_compiler: gcc-11 cxx_compiler: g++-11 - os: ubuntu-22.04 python: '3.10' c_compiler: gcc-12 cxx_compiler: g++-12 - os: ubuntu-22.04 python: '3.11' c_compiler: clang-13 cxx_compiler: clang++-13 - os: ubuntu-22.04 python: '3.12' c_compiler: clang-14 cxx_compiler: clang++-14 - os: ubuntu-22.04 python: '3.12' c_compiler: clang-15 cxx_compiler: clang++-15 ############## # Ubuntu 20.04 - os: ubuntu-20.04 python: '3.9' c_compiler: gcc-10 cxx_compiler: g++-10 - os: ubuntu-20.04 python: '3.10' c_compiler: clang-10 cxx_compiler: clang++-10 - os: ubuntu-20.04 python: '3.11' c_compiler: clang-11 cxx_compiler: clang++-11 - os: ubuntu-20.04 python: '3.12' c_compiler: clang-12 cxx_compiler: clang++-12 steps: - uses: actions/checkout@v4.1.6 with: path: code - name: Set up Python uses: actions/setup-python@v5.1.0 with: python-version: ${{ matrix.python }} - name: Install uv run: curl --proto '=https' --tlsv1.2 -LsSf https://github.com/astral-sh/uv/releases/download/${{ env.UV_VERSION }}/uv-installer.sh | bash - name: Install prereqs run: 'uv pip install -r code/.github/requirements-test.txt --only-binary :all: --system --reinstall' - name: Set Linux compiler if: ${{ startsWith(matrix.os, 'ubuntu') }} run: | echo "CC=${{ matrix.c_compiler }}" >> "$GITHUB_ENV" echo "CXX=${{ matrix.cxx_compiler }}" >> "$GITHUB_ENV" - name: Configure run: cmake -S code -B build - name: Build run: cmake --build build -j 6 --config Release - if: runner.os == 'Windows' name: Copy pyd files run: cp ./gsd/Release/*.pyd gsd/ working-directory: build - name: Run tests run: python3 -m pytest --pyargs gsd -p gsd.pytest_plugin_validate --validate -v --log-level=DEBUG env: PYTHONPATH: ${{ github.workspace }}/build # This job is used to provide a single requirement for branch merge conditions. tests_complete: name: Unit test if: always() needs: [unit_test] runs-on: ubuntu-latest steps: - run: jq --exit-status 'all(.result == "success")' <<< '${{ toJson(needs) }}' - name: Done run: exit 0 gsd-3.3.0/.gitignore000066400000000000000000000001501462564674300142770ustar00rootroot00000000000000*.sublime-workspace *.DS_Store *.pyc *.pyo *.so fl.c build/* dist gsd.egg-info __pycache__ devdoc build gsd-3.3.0/.mailmap000066400000000000000000000006111462564674300137320ustar00rootroot00000000000000Joshua A. Anderson Joshua Anderson Bradley Dice Bradley Dice Vyas Ramasubramani Vyas Jenny Fothergill jennyfothergill <39961845+jennyfothergill@users.noreply.github.com> Jens Glaser jglaser gsd-3.3.0/.pre-commit-config.yaml000066400000000000000000000030101462564674300165660ustar00rootroot00000000000000ci: autoupdate_schedule: quarterly autoupdate_branch: 'trunk-patch' autofix_prs: false repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: 'v4.5.0' hooks: - id: end-of-file-fixer - id: trailing-whitespace - id: check-json - id: check-toml - id: check-yaml - id: check-case-conflict - id: fix-encoding-pragma args: - --remove - id: mixed-line-ending # actionlint fails to build on mac... # - repo: https://github.com/rhysd/actionlint # rev: v1.6.27 # hooks: # - id: actionlint - repo: https://github.com/glotzerlab/fix-license-header rev: v0.3.2 hooks: - id: fix-license-header name: Fix license headers (Python) types_or: [python, cython] args: - --license-file=LICENSE - --add=Part of GSD, released under the BSD 2-Clause License. - --keep-before=#! - id: fix-license-header name: Fix license headers (C) types_or: [c] args: - --license-file=LICENSE - --add=Part of GSD, released under the BSD 2-Clause License. - --comment-prefix=// - id: fix-license-header name: Fix license headers (reStructuredText) types_or: [rst] args: - --license-file=LICENSE - --add=Part of GSD, released under the BSD 2-Clause License. - --keep-after=.. include - --comment-prefix=.. - repo: https://github.com/astral-sh/ruff-pre-commit rev: 'v0.3.4' hooks: - id: ruff-format - id: ruff - repo: https://github.com/pre-commit/mirrors-clang-format rev: v18.1.2 hooks: - id: clang-format types_or: [c, c++] gsd-3.3.0/.readthedocs.yaml000066400000000000000000000003451462564674300155440ustar00rootroot00000000000000version: 2 build: os: ubuntu-22.04 tools: python: "3.12" jobs: pre_build: - doxygen sphinx: fail_on_warning: true python: install: - requirements: doc/requirements.txt - method: pip path: . gsd-3.3.0/.ruff.toml000066400000000000000000000025451462564674300142360ustar00rootroot00000000000000target-version = "py38" lint.extend-select = [ "A", "B", "D", "E501", "EM", "I", "ICN", "ISC", "N", "NPY", "PL", "PT", "RET", "RUF", "UP", "W", ] lint.ignore = [ "N806", "N803", # Allow occasional use of uppercase variable and argument names (e.g. N). "D107", # Do not document __init__ separately from the class. "PLR09", # Allow "too many" statements/arguments/etc... "N816", # Allow mixed case names like kT. "PT011", # PT011 insists that specific pytest.raises checks should impossibly more specific "RUF012", # gsd does not use typing hints ] [lint.per-file-ignores] "__init__.py" = ["F401", # __init__.py import submodules for use by the package importer. ] "gsd/test/*.py" = ["PLR2004", # unit test value comparisons are not magic values ] "doc/conf.py" = ["A001", # Allow copyright variable name "D", # conf.py does not need documentation ] [lint.pydocstyle] convention = "google" [format] quote-style = "single" [lint.flake8-import-conventions] # Prefer no import aliases aliases = {} # Always import hoomd and gsd without 'from' banned-from = ["hoomd", "gsd"] # Ban standard import conventions and force common packages to be imported by their actual name. [lint.flake8-import-conventions.banned-aliases] "numpy" = ["np"] "pandas" = ["pd"] "matplotlib" = ["mpl"] gsd-3.3.0/.yapfignore000066400000000000000000000000511462564674300144530ustar00rootroot00000000000000run-clang-format.py setup.py doc/conf.py gsd-3.3.0/CHANGELOG.rst000066400000000000000000000303651462564674300143430ustar00rootroot00000000000000.. Copyright (c) 2016-2024 The Regents of the University of Michigan .. Part of GSD, released under the BSD 2-Clause License. Change Log ========== `GSD `_ releases follow `semantic versioning `_. 3.x --- 3.3.0 (2024-05-29) ^^^^^^^^^^^^^^^^^^ *Added:* * Support numpy 2.0 (`#367 `__). *Changed:* * Navigate the documentation with arrow keys (`#365 `__). * Add button to copy code snippets from the documentation (`#365 `__). * Remove singularity reference from the documentation (`#365 `__). *Removed:* * Support for Python 3.8 (`#367 `__). 3.2.1 (2024-01-22) ^^^^^^^^^^^^^^^^^^ *Fixed:* * Write all pending index entries to the file when ``gsd_flush()`` is called after ``gsd_write_chunk()`` and before ``gsd_end_frame()`` (`#319 `__). * Readthedocs builds with pandas 2.2.0 (`#322 `__). * Import without seg fault when built with CMake on macOS (`#323 `__). * Internal cached data remains valid when users modify frames obtained by indexing trajectories (`#324 `__). *Changed:* * Provide support via GitHub discussions (`#308 `__). * Use ruff (`#317 `__). * Perform fewer implicit flushes when using the ``gsd.hoomd`` python API (`#325 `__). 3.2.0 (2023-09-27) ^^^^^^^^^^^^^^^^^^ *Added:* * Support Python 3.12 (`#283 `__). 3.1.1 (2023-08-03) ^^^^^^^^^^^^^^^^^^ *Fixed:* * Raise a ``FileExistsError`` when opening a file that already exists with ``mode = 'x'`` (`#270 `__). 3.1.0 (2023-07-28) ^^^^^^^^^^^^^^^^^^ *Fixed:* * ``hoomd.read_log`` no longer triggers a numpy deprecation warning (`#267 `__). *Added:* * ``HOOMDTrajectory.flush`` - flush buffered writes on an open ``HOOMDTrajectory`` (`#266 `__). 3.0.1 (2023-06-20) ^^^^^^^^^^^^^^^^^^ *Fixed:* * Prevent ``ValueError: signal only works in main thread of the main interpreter`` when importing gsd in a non-main thread (`#257 `__). 3.0.0 (2023-06-16) ^^^^^^^^^^^^^^^^^^ *Added:* * ``gsd.version.version`` - version string identifier. PEP8 compliant name replaces ``__version__``. * ``GSDFile.flush`` - flush write buffers (C API ``gsd_flush``) (`#237 `__). * ``GSDFile.maximum_write_buffer_size`` - get/set the write buffer size (C API ``gsd_get_maximum_write_buffer_size`` / ``gsd_set_maximum_write_buffer_size``) (`#237 `__). * ``GSDFile.index_entries_to_buffer`` - get/set the write buffer size (C API ``index_entries_to_buffer`` / ``index_entries_to_buffer``) (`#237 `__). * On importing `gsd`, install a ``SIGTERM`` handler that calls ``sys.exit(1)`` (`#237 `__). *Changed:* * ``write_chunk`` buffers writes across frames to increase performance (`#237 `__). * Use *Doxygen* and *breathe* to generate C API documentation in Sphinx (`#237 `__). *Removed:* * ``gsd.__version__`` - use ``gsd.version.version``. * ``gsd.hoomd.Snapshot`` - use ``gsd.hoomd.Frame`` (`#249 `__). * ``gsd.hoomd.HOOMDTrajectory.read_frame`` - use ``gsd.hoomd.HOOMDTrajectory.__getitem__`` (`#249 `__). * The file modes ``'wb'``, ``'wb+'``, ``'rb'``, ``'rb+'``, ``'ab'``, ``'xb'``, and ``'xb+'``. Use ``'r'``, ``'r+'``, ``'w'``, ``'x'``, or ``'a'`` (`#249 `__). 2.x --- 2.9.0 (2023-05-19) ^^^^^^^^^^^^^^^^^^ *Added:* * File modes ``'r'``, ``'r+'``, ``'w'``, ``'x'``, and ``'a'`` (`#238 `__). *Changed:* * Test on gcc9, clang10, and newer (`#235 `__). * Test and provide binary wheels on Python 3.8 and newer (`#235 `__). *Deprecated:* * File modes ``'wb'``, ``'wb+'``, ``'rb'``, ``'rb+'``, ``'ab'``, ``'xb'``, and ``'xb+'`` (`#238 `__). * [C API] ``GSD_APPEND`` file open mode (`#238 `__). v2.8.1 (2023-03-13) ^^^^^^^^^^^^^^^^^^^ *Fixed:* * Reduce memory usage in most use cases. * Reduce likelihood of data corruption when writing GSD files. v2.8.0 (2023-02-24) ^^^^^^^^^^^^^^^^^^^ *Added:* * ``gsd.hoomd.read_log`` - Read log quantities from a GSD file. * ``gsd.hoomd.Frame`` class to replace ``gsd.hoomd.Snapshot``. *Changed:* * Improved documentation. *Deprecated:* * ``gsd.hoomd.Snapshot``. v2.7.0 (2022-11-30) ^^^^^^^^^^^^^^^^^^^ *Added* * Support Python 3.11. v2.6.1 (2022-11-04) ^^^^^^^^^^^^^^^^^^^ *Fixed:* * Default values are now written to frame N (N != 0) when non-default values exist in frame 0. * Data chunks can now be read from files opened in 'wb', 'xb', and 'ab' modes. v2.6.0 (2022-08-19) ^^^^^^^^^^^^^^^^^^^ *Changed:* * Raise an error when writing a frame with duplicate types. v2.5.3 (2022-06-22) ^^^^^^^^^^^^^^^^^^^ *Fixed* * Support Python >=3.6. v2.5.2 (2022-04-15) ^^^^^^^^^^^^^^^^^^^ *Fixed* * Correctly handle non-ASCII characters on Windows. * Document that the ``fname`` argument to ``gsd_`` C API functions is UTF-8 encoded. v2.5.1 (2021-11-17) ^^^^^^^^^^^^^^^^^^^ *Added* * Support for Python 3.10. * Support for clang 13. v2.5.0 (2021-10-13) ^^^^^^^^^^^^^^^^^^^ *Changed* * Improved documentation. *Deprecated* - ``HOOMDTrajectory.read_frame`` - use indexing (``trajectory[index]``) to access frames from a trajectory. v2.4.2 (2021-04-14) ^^^^^^^^^^^^^^^^^^^ *Added* * MacOS and Windows wheels on PyPI. *Fixed* - Documented array shapes for angles, dihedrals, and impropers. v2.4.1 (2021-03-11) ^^^^^^^^^^^^^^^^^^^ *Added* * Support macos-arm64. *Changed* * Stop testing with clang 4-5, gcc 4.8-6. v2.4.0 (2020-11-11) ^^^^^^^^^^^^^^^^^^^ *Changed* * Set ``gsd.hoomd.ConfigurationData.dimensions`` default based on ``box``'s :math:`L_z` value. *Fixed* * Failure in ``test_fl.py`` when run by a user and GSD was installed by root. v2.3.0 (2020-10-30) ^^^^^^^^^^^^^^^^^^^ *Added* * Support clang 11. * Support Python 3.9. *Changed* * Install unit tests with the Python package. *Fixed* * Compile error on macOS 10.15. v2.2.0 (2020-08-05) ^^^^^^^^^^^^^^^^^^^ *Added* * Command line convenience interface for opening a GSD file. v2.1.2 (2020-06-26) ^^^^^^^^^^^^^^^^^^^ *Fixed* * Adding missing ``close`` method to ``HOOMDTrajectory``. * Documentation improvements. v2.1.1 (2020-04-20) ^^^^^^^^^^^^^^^^^^^ *Fixed* * List defaults in ``gsd.fl.open`` documentation. v2.1.0 (2020-02-27) ^^^^^^^^^^^^^^^^^^^ *Added* * Shape specification for sphere unions. v2.0.0 (2020-02-03) ^^^^^^^^^^^^^^^^^^^ *Note* * This release introduces a new file storage format. * GSD >= 2.0 can read and write to files created by GSD 1.x. * Files created or upgraded by GSD >= 2.0 can not be opened by GSD < 1.x. *Added* * The ``upgrade`` method converts a GSD 1.0 file to a GSD 2.0 file in place. * Support arbitrarily long chunk names (only in GSD 2.0 files). *Changed* * ``gsd.fl.open`` accepts ``None`` for ``application``, ``schema``, and ``schema_version`` when opening files for reading. * Improve read latency when accessing files with thousands of chunk names in a frame (only for GSD 2.0 files). * Buffer small writes to improve write performance. * Improve performance and reduce memory usage in read/write modes ('rb+', 'wb+' and ('xb+'). * **C API**: functions return error codes from the ``gsd_error`` enum. v2.x integer error codes differ from v1.x, use the enum to check. For example: ``if (retval == GSD_ERROR_IO)``. * Python, Cython, and C code must follow strict style guidelines. *Removed* * ``gsd.fl.create`` - use ``gsd.fl.open``. * ``gsd.hoomd.create`` - use ``gsd.hoomd.open``. * ``GSDFile`` v1.0 compatibility mode - use ``gsd.fl.open``. * ``hoomdxml2gsd.py``. *Fixed* * Allow more than 127 data chunk names in a single GSD file. v1.x ---- v1.10.0 (2019-11-26) ^^^^^^^^^^^^^^^^^^^^ * Improve performance of first frame write. * Allow pickling of GSD file handles opened in read only mode. * Removed Cython-generated code from repository. ``fl.pyx`` will be cythonized during installation. v1.9.3 (2019-10-04) ^^^^^^^^^^^^^^^^^^^ * Fixed preprocessor directive affecting Windows builds using setup.py. * Documentation updates v1.9.2 (2019-10-01) ^^^^^^^^^^^^^^^^^^^ * Support chunk sizes larger than 2GiB v1.9.1 (2019-09-23) ^^^^^^^^^^^^^^^^^^^ * Support writing chunks wider than 255 from Python. v1.9.0 (2019-09-18) ^^^^^^^^^^^^^^^^^^^ * File API: Add ``find_matching_chunk_names()`` * ``HOOMD`` schema 1.4: Add user defined logged data. * ``HOOMD`` schema 1.4: Add ``type_shapes`` specification. * pytest >= 3.9.0 is required to run unit tests. * ``gsd.fl.open`` and ``gsd.hoomd.open`` accept objects implementing ``os.PathLike``. * Report an error when attempting to write a chunk that fails to allocate a name. * Reduce virtual memory usage in ``rb`` and ``wb`` open modes. * Additional checks for corrupt GSD files on open. * Synchronize after expanding file index. v1.8.1 (2019-08-19) ^^^^^^^^^^^^^^^^^^^ * Correctly raise ``IndexError`` when attempting to read frames before the first frame. * Raise ``RuntimeError`` when importing ``gsd`` in unsupported Python versions. v1.8.0 (2019-08-05) ^^^^^^^^^^^^^^^^^^^ * Slicing a HOOMDTrajectory object returns a view that can be used to directly select frames from a subset or sliced again. * raise ``IndexError`` when attempting to read frames before the first frame. * Dropped support for Python 2. v1.7.0 (2019-04-30) ^^^^^^^^^^^^^^^^^^^ * Add ``hpmc/sphere/orientable`` to HOOMD schema. * HOOMD schema 1.3 v1.6.2 (2019-04-16) ^^^^^^^^^^^^^^^^^^^ * PyPI binary wheels now support numpy>=1.9.3,<2 v1.6.1 (2019-03-05) ^^^^^^^^^^^^^^^^^^^ * Documentation updates v1.6.0 (2018-12-20) ^^^^^^^^^^^^^^^^^^^ * The length of sliced HOOMDTrajectory objects can be determined with the built-in ``len()`` function. v1.5.5 (2018-11-28) ^^^^^^^^^^^^^^^^^^^ * Silence numpy deprecation warnings v1.5.4 (2018-10-04) ^^^^^^^^^^^^^^^^^^^ * Add ``pyproject.toml`` file that defines ``numpy`` as a proper build dependency (requires pip >= 10) * Reorganize documentation v1.5.3 (2018-05-22) ^^^^^^^^^^^^^^^^^^^ * Revert ``setup.py`` changes in v1.5.2 - these do not work in most circumstances. * Include ``sys/stat.h`` on all architectures. v1.5.2 (2018-04-04) ^^^^^^^^^^^^^^^^^^^ * Close file handle on errors in ``gsd_open``. * Always close file handle in ``gsd_close``. * ``setup.py`` now correctly pulls in the numpy dependency. v1.5.1 (2018-02-26) ^^^^^^^^^^^^^^^^^^^ * Documentation fixes. v1.5.0 (2018-01-18) ^^^^^^^^^^^^^^^^^^^ * Read and write HPMC shape state data. v1.4.0 (2017-12-04) ^^^^^^^^^^^^^^^^^^^ * Support reading and writing chunks with 0 length. No schema changes are necessary to support this. v1.3.0 (2017-11-17) ^^^^^^^^^^^^^^^^^^^ * Document ``state`` entries in the HOOMD schema. * No changes to the gsd format or reader code in v1.3. v1.2.0 (2017-02-21) ^^^^^^^^^^^^^^^^^^^ * Add ``gsd.hoomd.open()`` method which can create and open hoomd gsd files. * Add ``gsd.fl.open()`` method which can create and open gsd files. * The previous create/class ``GSDFile`` instantiation is still supported for backward compatibility. v1.1.0 (2016-10-04) ^^^^^^^^^^^^^^^^^^^ * Add special pairs section pairs/ to HOOMD schema. * HOOMD schema version is now 1.1. v1.0.1 (2016-06-15) ^^^^^^^^^^^^^^^^^^^ * Fix compile error on more strict POSIX systems. v1.0.0 (2016-05-24) ^^^^^^^^^^^^^^^^^^^ Initial release. gsd-3.3.0/CMake/000077500000000000000000000000001462564674300132735ustar00rootroot00000000000000gsd-3.3.0/CMake/CFlagsSetup.cmake000066400000000000000000000013101462564674300164500ustar00rootroot00000000000000# Set a default build type if none was specified if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) message(STATUS "Setting build type to 'Release' as none was specified.") set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") endif() # Enable compiler warnings on gcc and clang (common compilers used by developers) if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-unknown-pragmas -Wno-deprecated-declarations") set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-unknown-pragmas") endif() gsd-3.3.0/CMake/CMakeLists.txt000066400000000000000000000001521462564674300160310ustar00rootroot00000000000000set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_MODULE_PATH}" PARENT_SCOPE ) gsd-3.3.0/CMake/LinterSetup.cmake000066400000000000000000000007011462564674300165510ustar00rootroot00000000000000include(FindPackageMessage) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) # http://www.mariobadr.com/using-clang-tidy-with-cmake-36.html find_program( CLANG_TIDY_EXE NAMES "clang-tidy" DOC "Path to clang-tidy executable" ) if(CLANG_TIDY_EXE) find_package_message(CLANG_TIDY "Found clang-tidy: ${CLANG_TIDY_EXE}" "[${CLANG_TIDY_EXE}]") set(DO_CLANG_TIDY "${CLANG_TIDY_EXE}" "-p=${CMAKE_BINARY_DIR}" "-header-filter=^${CMAKE_SOURCE_DIR}/.*") endif() gsd-3.3.0/CMake/PythonSetup.cmake000066400000000000000000000062731462564674300166070ustar00rootroot00000000000000######################################################################################### ### Find Python and set PYTHON_SITEDIR, the location to install python modules # macro for running python and getting output macro(run_python code result) execute_process( COMMAND ${PYTHON_EXECUTABLE} -c ${code} OUTPUT_VARIABLE ${result} RESULT_VARIABLE PY_ERR OUTPUT_STRIP_TRAILING_WHITESPACE ) if(PY_ERR) message(STATUS "Error while querying python for information") endif(PY_ERR) endmacro(run_python) # find the python interpreter find_program(PYTHON_EXECUTABLE NAMES python3 python) # get the python installation prefix and version run_python("import sys\; print('%d' % (sys.version_info[0]))" PYTHON_VERSION_MAJOR) run_python("import sys\; print('%d' % (sys.version_info[1]))" PYTHON_VERSION_MINOR) set(PYTHON_VERSION "${PYTHON_VERSION_MAJOR}.${PYTHON_VERSION_MINOR}") string(REPLACE "." "" _python_version_no_dots ${PYTHON_VERSION}) # determine the include directory run_python("import sysconfig\; print(sysconfig.get_path('include'))" _python_include_hint) run_python("import sysconfig\; print(sysconfig.get_config_var('LIBDIR'))" _python_lib_hint) run_python("import sysconfig\; print(sysconfig.get_config_var('BINDIR'))" _python_prefix_hint) find_path(PYTHON_INCLUDE_DIR Python.h HINTS ${_python_include_hint} NO_DEFAULT_PATH) # find the python library # add a blank suffix to the beginning to find the Python framework set(_old_suffixes ${CMAKE_FIND_LIBRARY_SUFFIXES}) set(CMAKE_FIND_LIBRARY_SUFFIXES ";${CMAKE_FIND_LIBRARY_SUFFIXES}") find_library(PYTHON_LIBRARY NAMES python${PYTHON_VERSION} python${PYTHON_VERSION}m python${_python_version_no_dots} HINTS ${_python_lib_hint} ${_python_prefix_hint} ${_python_prefix_hint}/DLLs PATH_SUFFIXES lib64 lib libs NO_DEFAULT_PATH ) set(${CMAKE_FIND_LIBRARY_SUFFIXES} _old_suffixes) INCLUDE(FindPackageHandleStandardArgs) message(STATUS "Python library: ${PYTHON_LIBRARY}") find_package_message(python_library "Found Python library: ${PYTHON_LIBRARY}" "[${PYTHON_LIBRARY}]") FIND_PACKAGE_HANDLE_STANDARD_ARGS(Python DEFAULT_MSG PYTHON_EXECUTABLE PYTHON_LIBRARY PYTHON_INCLUDE_DIR) #### Setup numpy if (PYTHON_VERSION VERSION_GREATER 3) run_python("import numpy\; print(numpy.get_include())" NUMPY_INCLUDE_GUESS) else() run_python("import numpy\; print numpy.get_include()" NUMPY_INCLUDE_GUESS) endif() # We use the full path name (including numpy on the end), but # Double-check that all is well with that choice. find_path( NUMPY_INCLUDE_DIR numpy/arrayobject.h HINTS ${NUMPY_INCLUDE_GUESS} ) FIND_PACKAGE_HANDLE_STANDARD_ARGS(numpy DEFAULT_MSG NUMPY_INCLUDE_DIR) if (NUMPY_INCLUDE_DIR) mark_as_advanced(NUMPY_INCLUDE_DIR) endif (NUMPY_INCLUDE_DIR) include_directories(${NUMPY_INCLUDE_DIR}) # add_definitions(-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION) ############################################################################################# # Find cython find_program(CYTHON_EXECUTABLE NAMES cython cython3) FIND_PACKAGE_HANDLE_STANDARD_ARGS(cython DEFAULT_MSG CYTHON_EXECUTABLE) if (NOT CYTHON_EXECUTABLE) message(ERROR "cython not found") endif() gsd-3.3.0/CMakeLists.txt000066400000000000000000000011421462564674300150510ustar00rootroot00000000000000cmake_minimum_required(VERSION 2.8.10...3.13 FATAL_ERROR) project(gsd) set(PYTHON_MODULE_BASE_DIR "gsd") add_subdirectory(CMake) include(CFlagsSetup) include(LinterSetup) ############################################################################################# # Find libraries include(PythonSetup) include_directories(${PYTHON_INCLUDE_DIR}) if (WIN32) add_compile_definitions(_CRT_SECURE_NO_WARNINGS) endif() ############################################################################################# ### Add project subdirs include_directories(gsd) add_subdirectory(gsd) add_subdirectory(scripts) gsd-3.3.0/CONTRIBUTING.rst000066400000000000000000000063341462564674300147620ustar00rootroot00000000000000.. Copyright (c) 2016-2024 The Regents of the University of Michigan .. Part of GSD, released under the BSD 2-Clause License. Contributing ============ Contributions are welcomed via `pull requests on GitHub `__. Contact the **GSD** developers before starting work to ensure it meshes well with the planned development direction and standards set for the project. Features -------- Implement functionality in a general and flexible fashion _________________________________________________________ New features should be applicable to a variety of use-cases. The **GSD** developers can assist you in designing flexible interfaces. Maintain performance of existing code paths ___________________________________________ Expensive code paths should only execute when requested. Version control --------------- Base your work off the correct branch _____________________________________ - Base backwards compatible bug fixes on ``trunk-patch``. - Base additional functionality on ``trunk-minor``. - Base API incompatible changes on ``trunk-major``. Propose a minimal set of related changes ________________________________________ All changes in a pull request should be closely related. Multiple change sets that are loosely coupled should be proposed in separate pull requests. Agree to the Contributor Agreement __________________________________ All contributors must agree to the Contributor Agreement before their pull request can be merged. Set your git identity _____________________ Git identifies every commit you make with your name and e-mail. `Set your identity`_ to correctly identify your work and set it identically on all systems and accounts where you make commits. .. _Set your identity: http://www.git-scm.com/book/en/v2/Getting-Started-First-Time-Git-Setup Source code ----------- Use a consistent style ______________________ The **Code style** section of the documentation sets the style guidelines for **GSD** code. Document code with comments ___________________________ Use doxygen header comments for classes, functions, etc. Also comment complex sections of code so that other developers can understand them. Compile without warnings ________________________ Your changes should compile without warnings. Tests ----- Write unit tests ________________ Add unit tests for all new functionality. Validity tests ______________ The developer should run research-scale simulations using the new functionality and ensure that it behaves as intended. User documentation ------------------ Write user documentation ________________________ Document public-facing API with Python docstrings in Google style. Example notebooks _________________ Demonstrate new functionality in the documentation examples pages. Document version status _______________________ Each user-facing Python class, method, etc. with a docstring should have [``versionadded``, ``versionchanged``, and ``deprecated`` Sphinx directives] Add developer to the credits ____________________________ Update the credits documentation to name each developer that has contributed to the code. Propose a change log entry __________________________ Propose a short concise entry describing the change in the pull request description. gsd-3.3.0/ContributorAgreement.md000066400000000000000000000076771462564674300170200ustar00rootroot00000000000000# GSD Contributor Agreement These terms apply to your contribution to the GSD Open Source Project ("Project") owned or managed by the Regents of the University of Michigan ("Michigan"), and set out the intellectual property rights you grant to Michigan in the contributed materials. If this contribution is on behalf of a company, the term "you" will also mean the company you identify below. If you agree to be bound by these terms, fill in the information requested below and provide your signature. 1. The term "contribution" means any source code, object code, patch, tool, sample, graphic, specification, manual, documentation, or any other material posted or submitted by you to a project. 2. With respect to any worldwide copyrights, or copyright applications and registrations, in your contribution: * you hereby assign to Michigan joint ownership, and to the extent that such assignment is or becomes invalid, ineffective or unenforceable, you hereby grant to Michigan a perpetual, irrevocable, non-exclusive, worldwide, no-charge, royalty-free, unrestricted license to exercise all rights under those copyrights. This includes, at Michigan's option, the right to sublicense these same rights to third parties through multiple levels of sublicensees or other licensing arrangements; * you agree that both Michigan and you can do all things in relation to your contribution as if each of us were the sole owners, and if one of us makes a derivative work of your contribution, the one who makes the derivative work (or has it made) will be the sole owner of that derivative work; * you agree that you will not assert any moral rights in your contribution against us, our licensees or transferees; * you agree that we may register a copyright in your contribution and exercise all ownership rights associated with it; and * you agree that neither of us has any duty to consult with, obtain the consent of, pay or render an accounting to the other for any use or distribution of your contribution. 3. With respect to any patents you own, or that you can license without payment to any third party, you hereby grant to Michigan a perpetual, irrevocable, non-exclusive, worldwide, no-charge, royalty-free license to: * make, have made, use, sell, offer to sell, import, and otherwise transfer your contribution in whole or in part, alone or in combination with or included in any product, work or materials arising out of the project to which your contribution was submitted; and * at Michigan's option, to sublicense these same rights to third parties through multiple levels of sublicensees or other licensing arrangements. 4. Except as set out above, you keep all right, title, and interest in your contribution. The rights that you grant to Michigan under these terms are effective on the date you first submitted a contribution to Michigan, even if your submission took place before the date you sign these terms. Any contribution Michigan makes available under any license will also be made available under a suitable Free Software Foundation or Open Source Initiative approved license. 5. With respect to your contribution, you represent that: * it is an original work and that you can legally grant the rights set out in these terms; * it does not to the best of your knowledge violate any third party's copyrights, trademarks, patents, or other intellectual property rights; and you are authorized to sign this contract on behalf of your company (if identified below). 6. The terms will be governed by the laws of the State of Michigan and applicable U.S. Federal Law. Any choice of law rules will not apply. **By making contribution, you electronically sign and agree to the terms of the GSD Contributor Agreement.** ![by-sa.png](https://licensebuttons.net/l/by-sa/3.0/88x31.png) Based on the Sun Contributor Agreement - version 1.5. This document is licensed under a Creative Commons Attribution-Share Alike 3.0 Unported License http://creativecommons.org/licenses/by-sa/3.0/ gsd-3.3.0/Doxyfile000066400000000000000000000266101462564674300140260ustar00rootroot00000000000000# Doxyfile 1.8.16 #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- DOXYFILE_ENCODING = UTF-8 PROJECT_NAME = "GSD" PROJECT_NUMBER = v3.3.0 PROJECT_BRIEF = "General simulation data" PROJECT_LOGO = OUTPUT_DIRECTORY = devdoc CREATE_SUBDIRS = NO ALLOW_UNICODE_NAMES = NO OUTPUT_LANGUAGE = English OUTPUT_TEXT_DIRECTION = None BRIEF_MEMBER_DESC = YES REPEAT_BRIEF = YES ABBREVIATE_BRIEF = "The $name class" \ "The $name widget" \ "The $name file" \ is \ provides \ specifies \ contains \ represents \ a \ an \ the ALWAYS_DETAILED_SEC = NO INLINE_INHERITED_MEMB = NO FULL_PATH_NAMES = YES STRIP_FROM_PATH = STRIP_FROM_INC_PATH = SHORT_NAMES = NO JAVADOC_AUTOBRIEF = YES JAVADOC_BANNER = NO QT_AUTOBRIEF = NO MULTILINE_CPP_IS_BRIEF = NO INHERIT_DOCS = YES SEPARATE_MEMBER_PAGES = NO TAB_SIZE = 4 ALIASES = TCL_SUBST = OPTIMIZE_OUTPUT_FOR_C = YES OPTIMIZE_OUTPUT_JAVA = NO OPTIMIZE_FOR_FORTRAN = NO OPTIMIZE_OUTPUT_VHDL = NO OPTIMIZE_OUTPUT_SLICE = NO EXTENSION_MAPPING = MARKDOWN_SUPPORT = YES TOC_INCLUDE_HEADINGS = 5 AUTOLINK_SUPPORT = YES BUILTIN_STL_SUPPORT = NO CPP_CLI_SUPPORT = NO SIP_SUPPORT = NO IDL_PROPERTY_SUPPORT = YES DISTRIBUTE_GROUP_DOC = NO GROUP_NESTED_COMPOUNDS = NO SUBGROUPING = YES INLINE_GROUPED_CLASSES = NO INLINE_SIMPLE_STRUCTS = NO TYPEDEF_HIDES_STRUCT = NO LOOKUP_CACHE_SIZE = 0 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- EXTRACT_ALL = YES EXTRACT_PRIVATE = YES EXTRACT_PRIV_VIRTUAL = NO EXTRACT_PACKAGE = NO EXTRACT_STATIC = YES EXTRACT_LOCAL_CLASSES = YES EXTRACT_LOCAL_METHODS = NO EXTRACT_ANON_NSPACES = NO HIDE_UNDOC_MEMBERS = NO HIDE_UNDOC_CLASSES = NO HIDE_FRIEND_COMPOUNDS = NO HIDE_IN_BODY_DOCS = NO INTERNAL_DOCS = NO CASE_SENSE_NAMES = YES HIDE_SCOPE_NAMES = NO HIDE_COMPOUND_REFERENCE= NO SHOW_INCLUDE_FILES = YES SHOW_GROUPED_MEMB_INC = NO FORCE_LOCAL_INCLUDES = NO INLINE_INFO = YES SORT_MEMBER_DOCS = YES SORT_BRIEF_DOCS = NO SORT_MEMBERS_CTORS_1ST = NO SORT_GROUP_NAMES = NO SORT_BY_SCOPE_NAME = NO STRICT_PROTO_MATCHING = NO GENERATE_TODOLIST = YES GENERATE_TESTLIST = YES GENERATE_BUGLIST = YES GENERATE_DEPRECATEDLIST= YES ENABLED_SECTIONS = MAX_INITIALIZER_LINES = 30 SHOW_USED_FILES = YES SHOW_FILES = YES SHOW_NAMESPACES = YES FILE_VERSION_FILTER = LAYOUT_FILE = CITE_BIB_FILES = #--------------------------------------------------------------------------- # Configuration options related to warning and progress messages #--------------------------------------------------------------------------- QUIET = NO WARNINGS = YES WARN_IF_UNDOCUMENTED = YES WARN_IF_DOC_ERROR = YES WARN_NO_PARAMDOC = NO WARN_AS_ERROR = NO WARN_FORMAT = "$file:$line: $text" WARN_LOGFILE = #--------------------------------------------------------------------------- # Configuration options related to the input files #--------------------------------------------------------------------------- INPUT = gsd INPUT_ENCODING = UTF-8 FILE_PATTERNS = *.h RECURSIVE = YES EXCLUDE = EXCLUDE_SYMLINKS = NO EXCLUDE_PATTERNS = EXCLUDE_SYMBOLS = EXAMPLE_PATH = EXAMPLE_PATTERNS = * EXAMPLE_RECURSIVE = NO IMAGE_PATH = INPUT_FILTER = FILTER_PATTERNS = FILTER_SOURCE_FILES = NO FILTER_SOURCE_PATTERNS = USE_MDFILE_AS_MAINPAGE = #--------------------------------------------------------------------------- # Configuration options related to source browsing #--------------------------------------------------------------------------- SOURCE_BROWSER = NO INLINE_SOURCES = NO STRIP_CODE_COMMENTS = YES REFERENCED_BY_RELATION = NO REFERENCES_RELATION = NO REFERENCES_LINK_SOURCE = YES SOURCE_TOOLTIPS = YES USE_HTAGS = NO VERBATIM_HEADERS = YES #--------------------------------------------------------------------------- # Configuration options related to the alphabetical class index #--------------------------------------------------------------------------- ALPHABETICAL_INDEX = YES COLS_IN_ALPHA_INDEX = 5 IGNORE_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the HTML output #--------------------------------------------------------------------------- GENERATE_HTML = NO HTML_OUTPUT = html HTML_FILE_EXTENSION = .html HTML_HEADER = HTML_FOOTER = HTML_STYLESHEET = HTML_EXTRA_STYLESHEET = HTML_EXTRA_FILES = HTML_COLORSTYLE_HUE = 210 HTML_COLORSTYLE_SAT = 100 HTML_COLORSTYLE_GAMMA = 80 HTML_TIMESTAMP = YES HTML_DYNAMIC_MENUS = YES HTML_DYNAMIC_SECTIONS = NO HTML_INDEX_NUM_ENTRIES = 100 GENERATE_DOCSET = NO DOCSET_FEEDNAME = "Doxygen generated docs" DOCSET_BUNDLE_ID = org.doxygen.Project DOCSET_PUBLISHER_ID = org.doxygen.Publisher DOCSET_PUBLISHER_NAME = Publisher GENERATE_HTMLHELP = NO CHM_FILE = HHC_LOCATION = GENERATE_CHI = NO CHM_INDEX_ENCODING = BINARY_TOC = NO TOC_EXPAND = NO GENERATE_QHP = NO QCH_FILE = QHP_NAMESPACE = org.doxygen.Project QHP_VIRTUAL_FOLDER = doc QHP_CUST_FILTER_NAME = QHP_CUST_FILTER_ATTRS = QHP_SECT_FILTER_ATTRS = QHG_LOCATION = GENERATE_ECLIPSEHELP = NO ECLIPSE_DOC_ID = org.doxygen.Project DISABLE_INDEX = NO GENERATE_TREEVIEW = NO ENUM_VALUES_PER_LINE = 4 TREEVIEW_WIDTH = 250 EXT_LINKS_IN_WINDOW = NO FORMULA_FONTSIZE = 10 FORMULA_TRANSPARENT = YES USE_MATHJAX = YES MATHJAX_FORMAT = HTML-CSS MATHJAX_RELPATH = https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/ MATHJAX_EXTENSIONS = MATHJAX_CODEFILE = SEARCHENGINE = YES SERVER_BASED_SEARCH = NO EXTERNAL_SEARCH = NO SEARCHENGINE_URL = SEARCHDATA_FILE = searchdata.xml EXTERNAL_SEARCH_ID = EXTRA_SEARCH_MAPPINGS = #--------------------------------------------------------------------------- # Configuration options related to the LaTeX output #--------------------------------------------------------------------------- GENERATE_LATEX = NO LATEX_OUTPUT = latex LATEX_CMD_NAME = MAKEINDEX_CMD_NAME = makeindex LATEX_MAKEINDEX_CMD = makeindex COMPACT_LATEX = NO PAPER_TYPE = a4 EXTRA_PACKAGES = LATEX_HEADER = LATEX_FOOTER = LATEX_EXTRA_STYLESHEET = LATEX_EXTRA_FILES = PDF_HYPERLINKS = YES USE_PDFLATEX = YES LATEX_BATCHMODE = NO LATEX_HIDE_INDICES = NO LATEX_SOURCE_CODE = NO LATEX_BIB_STYLE = plain LATEX_TIMESTAMP = NO LATEX_EMOJI_DIRECTORY = #--------------------------------------------------------------------------- # Configuration options related to the RTF output #--------------------------------------------------------------------------- GENERATE_RTF = NO RTF_OUTPUT = rtf COMPACT_RTF = NO RTF_HYPERLINKS = NO RTF_STYLESHEET_FILE = RTF_EXTENSIONS_FILE = RTF_SOURCE_CODE = NO #--------------------------------------------------------------------------- # Configuration options related to the man page output #--------------------------------------------------------------------------- GENERATE_MAN = NO MAN_OUTPUT = man MAN_EXTENSION = .3 MAN_SUBDIR = MAN_LINKS = NO #--------------------------------------------------------------------------- # Configuration options related to the XML output #--------------------------------------------------------------------------- GENERATE_XML = YES XML_OUTPUT = xml XML_PROGRAMLISTING = YES XML_NS_MEMB_FILE_SCOPE = NO #--------------------------------------------------------------------------- # Configuration options related to the DOCBOOK output #--------------------------------------------------------------------------- GENERATE_DOCBOOK = NO DOCBOOK_OUTPUT = docbook DOCBOOK_PROGRAMLISTING = NO #--------------------------------------------------------------------------- # Configuration options for the AutoGen Definitions output #--------------------------------------------------------------------------- GENERATE_AUTOGEN_DEF = NO #--------------------------------------------------------------------------- # Configuration options related to the Perl module output #--------------------------------------------------------------------------- GENERATE_PERLMOD = NO PERLMOD_LATEX = NO PERLMOD_PRETTY = YES PERLMOD_MAKEVAR_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the preprocessor #--------------------------------------------------------------------------- ENABLE_PREPROCESSING = YES MACRO_EXPANSION = NO EXPAND_ONLY_PREDEF = NO SEARCH_INCLUDES = YES INCLUDE_PATH = INCLUDE_FILE_PATTERNS = PREDEFINED = EXPAND_AS_DEFINED = SKIP_FUNCTION_MACROS = YES #--------------------------------------------------------------------------- # Configuration options related to external references #--------------------------------------------------------------------------- TAGFILES = GENERATE_TAGFILE = ALLEXTERNALS = NO EXTERNAL_GROUPS = YES EXTERNAL_PAGES = YES #--------------------------------------------------------------------------- # Configuration options related to the dot tool #--------------------------------------------------------------------------- CLASS_DIAGRAMS = YES DIA_PATH = HIDE_UNDOC_RELATIONS = YES HAVE_DOT = NO DOT_NUM_THREADS = 0 DOT_FONTNAME = Helvetica DOT_FONTSIZE = 10 DOT_FONTPATH = CLASS_GRAPH = YES COLLABORATION_GRAPH = YES GROUP_GRAPHS = YES UML_LOOK = NO UML_LIMIT_NUM_FIELDS = 10 TEMPLATE_RELATIONS = NO INCLUDE_GRAPH = YES INCLUDED_BY_GRAPH = YES CALL_GRAPH = NO CALLER_GRAPH = NO GRAPHICAL_HIERARCHY = YES DIRECTORY_GRAPH = YES DOT_IMAGE_FORMAT = png INTERACTIVE_SVG = NO DOT_PATH = DOTFILE_DIRS = MSCFILE_DIRS = DIAFILE_DIRS = PLANTUML_JAR_PATH = PLANTUML_CFG_FILE = PLANTUML_INCLUDE_PATH = DOT_GRAPH_MAX_NODES = 50 MAX_DOT_GRAPH_DEPTH = 0 DOT_TRANSPARENT = NO DOT_MULTI_TARGETS = NO GENERATE_LEGEND = YES DOT_CLEANUP = YES gsd-3.3.0/INSTALLING.rst000066400000000000000000000151031462564674300145110ustar00rootroot00000000000000.. Copyright (c) 2016-2024 The Regents of the University of Michigan .. Part of GSD, released under the BSD 2-Clause License. Installation ============ **gsd** binaries are available on conda-forge_ and PyPI_. You can also compile **gsd** from source, embed ``gsd.c`` in your code, or read gsd files with a pure Python reader ``pygsd.py``. .. _conda-forge: https://conda-forge.org/ .. _PyPI: https://pypi.org/ Binaries -------- Conda package ^^^^^^^^^^^^^ **gsd** is available on conda-forge_ for the *linux-64*, *linux-aarch64*, *linux-ppc64le*, *osx-64*, *osx-arm64* and *win-64* architectures. Install with: .. code-block:: bash $ mamba install gsd PyPI ^^^^ Use **pip** to install **gsd** binaries: .. code-block:: bash $ python3 -m pip install gsd Compile from source ------------------- To build the **gsd** Python package from source: 1. `Install prerequisites`_:: $ install cmake cython git numpy python pytest 2. `Obtain the source`_:: $ git clone https://github.com/glotzerlab/gsd 3. `Install with setuptools`_:: $ python3 -m pip install -e gsd **OR** `Build with CMake for development`_:: $ cmake -B build/gsd -S gsd $ cmake --build build/gsd To run the tests (optional): 1. `Run tests`_:: $ pytest --pyargs gsd To build the documentation from source (optional): 1. `Install prerequisites`_:: $ install breathe doxygen sphinx furo ipython sphinx-copybutton 2. `Build the documentation`_:: $ cd gsd && doxygen && cd .. $ sphinx-build -b html gsd/doc build/gsd-documentation The sections below provide details on each of these steps. .. _Install prerequisites: Install prerequisites ^^^^^^^^^^^^^^^^^^^^^ **gsd** requires a number of tools and libraries to build. .. note:: This documentation is generic. Replace ```` with your package or module manager. You may need to adjust package names and/or install additional packages, such as ``-dev`` packages that provide headers needed to build **gsd**. .. tip:: Create or use an existing `virtual environment`_, one place where you can install dependencies and **gsd**:: $ python3 -m venv gsd-venv You will need to activate your environment before installing or configuring **gsd**:: $ source gsd-venv/bin/activate **General requirements:** * **C compiler** (tested with gcc 10-14, clang 10-18, Visual Studio 2019-2022) * **Python** >= 3.9 * **numpy** >= 1.19.0 * **Cython** >= 0.22 **To build the documentation**: * **breathe** * **Doxygen** * **furo** * **IPython** * **Sphinx** * **sphinx-copybutton** * an internet connection **To execute unit tests:** * **pytest** >= 3.9.0 .. _virtual environment: https://docs.python.org/3/library/venv.html .. _Obtain the source: Obtain the source ^^^^^^^^^^^^^^^^^ Clone using Git_:: $ git clone https://github.com/glotzerlab/gsd Release tarballs are also available on the `GitHub release pages`_. .. seealso:: See the `git book`_ to learn how to work with `Git`_ repositories. .. _GitHub release pages: https://github.com/glotzerlab/gsd/releases/ .. _git book: https://git-scm.com/book .. _Git: https://git-scm.com/ .. _Install with setuptools: Install with setuptools ^^^^^^^^^^^^^^^^^^^^^^^ Use **pip** to install the Python module into your virtual environment: .. code-block:: bash $ python3 -m pip install -e gsd .. Build with CMake for development: Build with CMake for development ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ In addition to the setuptools build system. GSD also provides a `CMake`_ configuration for development and testing. You can assemble a functional Python module in the given build directory. First, configure the build with ``cmake``. .. code-block:: bash $ cmake -B build/gsd -S gsd Then, build the code: .. code-block:: bash $ cmake --build build/gsd When modifying code, you only need to repeat the build step to update your build - it will automatically reconfigure as needed. .. tip:: Use Ninja_ to perform incremental builds in less time:: $ cmake -B build/gsd -S gsd -GNinja .. tip:: Place your build directory in ``/tmp`` or ``/scratch`` for faster builds. CMake_ performs out-of-source builds, so the build directory can be anywhere on the filesystem. .. tip:: Pass the following options to ``cmake`` to optimize the build for your processor: ``-DCMAKE_CXX_FLAGS=-march=native -DCMAKE_C_FLAGS=-march=native``. .. important:: When using a virtual environment, activate the environment and set the cmake prefix path before running CMake_: ``$ export CMAKE_PREFIX_PATH=``. .. _CMake: https://cmake.org/ .. _Ninja: https://ninja-build.org/ .. _Run tests: Run tests ^^^^^^^^^ Use `pytest`_ to execute unit tests: .. code-block:: bash $ python3 -m pytest --pyargs gsd Add the ``--validate`` option to include longer-running validation tests: .. code-block:: bash $ python3 -m pytest --pyargs gsd -p gsd.pytest_plugin_validate --validate .. tip:: When using CMake builds, change to the build directory before running ``pytest``:: $ cd build/gsd .. _pytest: https://docs.pytest.org/ .. _Build the documentation: Build the documentation ^^^^^^^^^^^^^^^^^^^^^^^ Run `Doxygen`_ to generate the C documentation: .. code-block:: bash $ cd gsd $ doxygen $ cd .. Run `Sphinx`_ to build the HTML documentation: .. code-block:: bash $ sphinx-build -b html gsd/doc build/gsd-documentation Open the file :file:`build/gsd-documentation/index.html` in your web browser to view the documentation. .. tip:: When iteratively modifying the documentation, the sphinx options ``-a -n -W -T --keep-going`` are helpful to produce docs with consistent links in the side panel and to see more useful error messages:: $ sphinx-build -a -n -W -T --keep-going -b html gsd/doc build/gsd-documentation .. tip:: When using CMake builds, set PYTHONPATH to the build directory before running ``sphinx-build``:: $ PYTHONPATH=build/gsd sphinx-build -b html gsd/doc build/gsd-documentation .. _Sphinx: https://www.sphinx-doc.org/ .. _Doxygen: https://www.doxygen.nl/ Embedding GSD in your project ----------------------------- Using the C library ^^^^^^^^^^^^^^^^^^^ **gsd** is implemented in a single C file. Copy ``gsd/gsd.h`` and ``gsd/gsd.c`` into your project. Using the pure Python reader ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ If you only need to read files, you can skip installing and just extract the module modules ``gsd/pygsd.py`` and ``gsd/hoomd.py``. Together, these implement a pure Python reader for **gsd** and **HOOMD** files - no C compiler required. gsd-3.3.0/LICENSE000066400000000000000000000024651462564674300133270ustar00rootroot00000000000000Copyright (c) 2016-2024 The Regents of the University of Michigan All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. gsd-3.3.0/MANIFEST.in000066400000000000000000000003131462564674300140460ustar00rootroot00000000000000global-include *.pyx global-include *.pxd global-include *.c global-include *.h global-include *.ini include README.md include CHANGELOG.rst include LICENSE include pyproject.toml include gsd/test/*.gsd gsd-3.3.0/README.md000066400000000000000000000063351462564674300136010ustar00rootroot00000000000000# GSD The **GSD** file format is the native file format for [HOOMD-blue](https://glotzerlab.engin.umich.edu/hoomd-blue/). **GSD** files store trajectories of the **HOOMD-blue** system state in a binary file with efficient random access to frames. **GSD** allows all particle and topology properties to vary from one frame to the next. Use the **GSD** Python API to specify the initial condition for a **HOOMD-blue** simulation or analyze trajectory output with a script. Read a **GSD** trajectory with a visualization tool to explore the behavior of the simulation. ## Resources * [GSD documentation](http://gsd.readthedocs.io): Tutorials, Python API, C API, usage information, and format specification. * [Installation Guide](INSTALLING.rst): Instructions for installing and compiling **GSD**. * [HOOMD-blue](https://glotzerlab.engin.umich.edu/hoomd-blue/): Simulation engine that reads and writes **GSD** files. * [GSD discussion board](https://github.com/glotzerlab/gsd/discussions/): Ask the **GSD** community for help. * [freud](https://freud.readthedocs.io): A powerful set of tools for analyzing trajectories. * [OVITO](https://www.ovito.org/): The Open Visualization Tool works with **GSD** files. * [gsd-vmd plugin](https://github.com/mphoward/gsd-vmd): VMD plugin to support **GSD** files. ## HOOMD examples Create a hoomd gsd file. ```python >>> s = gsd.hoomd.Frame() >>> s.particles.N = 4 >>> s.particles.types = ['A', 'B'] >>> s.particles.typeid = [0,0,1,1] >>> s.particles.position = [[0,0,0],[1,1,1], [-1,-1,-1], [1,-1,-1]] >>> s.configuration.box = [3, 3, 3, 0, 0, 0] >>> traj = gsd.hoomd.open(name='test.gsd', mode='w') >>> traj.append(s) ``` Append frames to a gsd file: ```python >>> def create_frame(i): ... s = gsd.hoomd.Frame(); ... s.configuration.step = i; ... s.particles.N = 4+i; ... s.particles.position = numpy.random.random(size=(4+i,3)) ... return s; >>> with gsd.hoomd.open('test.gsd', 'a') as t: ... t.extend( (create_frame(i) for i in range(10)) ) ... print(len(t)) 11 ``` Randomly index frames: ```python >>> with gsd.hoomd.open('test.gsd', 'r') as t: ... frame = t[5] ... print(frame.configuration.step) 4 ... print(frame.particles.N) 8 ... print(frame.particles.position) [[ 0.56993282 0.42243481 0.5502916 ] [ 0.36892486 0.38167036 0.27310368] [ 0.04739023 0.13603486 0.196539 ] [ 0.120232 0.91591144 0.99463677] [ 0.79806316 0.16991436 0.15228257] [ 0.13724308 0.14253527 0.02505 ] [ 0.39287439 0.82519054 0.01613089] [ 0.23150323 0.95167434 0.7715748 ]] ``` Slice frames: ```python >>> with gsd.hoomd.open('test.gsd', 'r') as t: ... for s in t[5:-2]: ... print(s.configuration.step, end=' ') 4 5 6 7 ``` ## File layer examples ```python with gsd.fl.open(name='file.gsd', mode='w') as f: f.write_chunk(name='position', data=numpy.array([[1,2,3],[4,5,6]], dtype=numpy.float32)); f.write_chunk(name='angle', data=numpy.array([0, 1], dtype=numpy.float32)); f.write_chunk(name='box', data=numpy.array([10, 10, 10], dtype=numpy.float32)); f.end_frame() ``` ```python with gsd.fl.open(name='file.gsd', mode='r') as f: for i in range(1,f.nframes): position = f.read_chunk(frame=i, name='position'); do_something(position); ``` gsd-3.3.0/doc/000077500000000000000000000000001462564674300130605ustar00rootroot00000000000000gsd-3.3.0/doc/.gitignore000066400000000000000000000000151462564674300150440ustar00rootroot00000000000000_build *.gsd gsd-3.3.0/doc/_static/000077500000000000000000000000001462564674300145065ustar00rootroot00000000000000gsd-3.3.0/doc/_static/.keep000066400000000000000000000000001462564674300154210ustar00rootroot00000000000000gsd-3.3.0/doc/_static/umich-block-M.svg000066400000000000000000000201111462564674300176110ustar00rootroot00000000000000 gsd-3.3.0/doc/_templates/000077500000000000000000000000001462564674300152155ustar00rootroot00000000000000gsd-3.3.0/doc/_templates/page.html000066400000000000000000000140261462564674300170220ustar00rootroot00000000000000{% extends "furo/page.html" %} {% block footer %}

Development of {{ project }} is led by the Glotzer Group at the University of Michigan.

{%- if show_copyright %} {%- endif %} {% trans %}Made with {% endtrans -%} {%- if show_sphinx -%} {% trans %}Sphinx and {% endtrans -%} @pradyunsg's {% endif -%} {% trans %} Furo {% endtrans %} {%- if last_updated -%}
{% trans last_updated=last_updated|e -%} Last updated on {{ last_updated }} {%- endtrans -%}
{%- endif %}
University of Michigan logo {% if theme_footer_icons or READTHEDOCS -%}
{% if theme_footer_icons -%} {% for icon_dict in theme_footer_icons -%} {{- icon_dict.html -}} {% endfor %} {%- else -%} {#- Show Read the Docs project -#} {%- if READTHEDOCS and slug -%} {%- endif -%} {#- Show GitHub repository home -#} {%- if READTHEDOCS and display_github and github_user != "None" and github_repo != "None" -%} {%- endif -%} {%- endif %}
{%- endif %}
{% endblock footer %} gsd-3.3.0/doc/c-api.rst000066400000000000000000000013501462564674300146020ustar00rootroot00000000000000.. Copyright (c) 2016-2024 The Regents of the University of Michigan .. Part of GSD, released under the BSD 2-Clause License. .. _c_api_: C API ===== The GSD C API consists of a single header and source file. Developers can drop the implementation into any package that needs it. .. doxygenindex:: .. c:type:: uint8_t 8-bit unsigned integer (defined by C compiler). .. c:type:: uint16_t 16-bit unsigned integer (defined by C compiler). .. c:type:: uint32_t 32-bit unsigned integer (defined by C compiler). .. c:type:: uint64_t 64-bit unsigned integer (defined by C compiler). .. c:type:: int64_t 64-bit signed integer (defined by C compiler). .. c:type:: size_t unsigned integer (defined by C compiler). gsd-3.3.0/doc/changes.rst000066400000000000000000000002351462564674300152220ustar00rootroot00000000000000.. Copyright (c) 2016-2024 The Regents of the University of Michigan .. Part of GSD, released under the BSD 2-Clause License. .. include:: ../CHANGELOG.rst gsd-3.3.0/doc/cli.rst000066400000000000000000000005251462564674300143630ustar00rootroot00000000000000.. Copyright (c) 2016-2024 The Regents of the University of Michigan .. Part of GSD, released under the BSD 2-Clause License. gsd command line interface ========================== **GSD** provides a command line interface for rapid inspection of files from the command line. .. automodule:: gsd.__main__ :synopsis: GSD CLI. gsd-3.3.0/doc/community.rst000066400000000000000000000007351462564674300156430ustar00rootroot00000000000000.. Copyright (c) 2016-2024 The Regents of the University of Michigan .. Part of GSD, released under the BSD 2-Clause License. User community ============== GSD discussion board -------------------------- Use the `GSD discussion board `_ to post questions, ask for support, and discuss potential new features. Issue tracker ------------- File bug reports on `GSD's issue tracker `_. gsd-3.3.0/doc/conf.py000066400000000000000000000046151462564674300143650ustar00rootroot00000000000000#!/usr/bin/env python3 # Copyright (c) 2016-2024 The Regents of the University of Michigan # Part of GSD, released under the BSD 2-Clause License. import datetime import os import tempfile import gsd extensions = [ 'breathe', 'sphinx.ext.autodoc', 'sphinx.ext.napoleon', 'sphinx.ext.intersphinx', 'sphinx.ext.mathjax', 'IPython.sphinxext.ipython_console_highlighting', 'IPython.sphinxext.ipython_directive', 'sphinx_copybutton', ] if os.getenv('READTHEDOCS'): extensions.append('sphinxcontrib.googleanalytics') googleanalytics_id = 'G-25TF48HJ76' napoleon_include_special_with_doc = True intersphinx_mapping = { 'python': ('https://docs.python.org/3', None), 'numpy': ('https://numpy.org/doc/stable', None), 'hoomd': ('https://hoomd-blue.readthedocs.io/en/latest/', None), } autodoc_docstring_signature = True templates_path = ['_templates'] source_suffix = '.rst' master_doc = 'index' project = 'GSD' year = datetime.date.today().year copyright = f'2016-{ year } The Regents of the University of Michigan' version = gsd.version.version release = version exclude_patterns = ['_build'] default_role = 'any' pygments_style = 'friendly' pygments_dark_style = 'native' html_theme = 'furo' html_static_path = ['_static'] html_theme_options = { 'navigation_with_keys': True, 'top_of_page_buttons': [], 'dark_css_variables': { 'color-brand-primary': '#5187b2', 'color-brand-content': '#5187b2', }, 'light_css_variables': { 'color-brand-primary': '#406a8c', 'color-brand-content': '#406a8c', }, } copybutton_prompt_text = r'>>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.\.\.: | {5,8}: ' copybutton_prompt_is_regexp = True copybutton_remove_prompts = True copybutton_line_continuation_character = '\\' ### Add custom directives def setup(app): app.add_object_type( 'chunk', 'chunk', objname='Data chunk', indextemplate='single: %s (data chunk)' ) tmpdir = tempfile.TemporaryDirectory() ###### IPython directive settings ipython_mplbackend = '' ipython_execlines = [ 'import gsd.fl', 'import gsd.hoomd', 'import gsd.pygsd', 'import numpy', 'import os', f'os.chdir("{tmpdir.name}")', ] dirname = os.path.abspath(os.path.dirname(__file__)) breathe_projects = {'gsd': os.path.join(dirname, '..', 'devdoc', 'xml')} breathe_default_project = 'gsd' breathe_domain_by_extension = { 'h': 'c', } gsd-3.3.0/doc/contributing.rst000066400000000000000000000002401462564674300163150ustar00rootroot00000000000000.. Copyright (c) 2016-2024 The Regents of the University of Michigan .. Part of GSD, released under the BSD 2-Clause License. .. include:: ../CONTRIBUTING.rst gsd-3.3.0/doc/credits.rst000066400000000000000000000012471462564674300152530ustar00rootroot00000000000000.. Copyright (c) 2016-2024 The Regents of the University of Michigan .. Part of GSD, released under the BSD 2-Clause License. Credits ======= The following people contributed to GSD. * Joshua A. Anderson, University of Michigan * Carl Simon Adorf, University of Michigan * Bradley Dice, University of Michigan * Jenny W. Fothergill, Boise State University * Jens Glaser, University of Michigan * Vyas Ramasubramani, University of Michigan * Luis Y. Rivera-Rivera, University of Michigan * Brandon Butler, University of Michigan * Arthur Zamarin, Gentoo Linux * Alexander Stukowski, OVITO GmbH * Charlotte Shiqi Zhao, University of Michigan * Tim Moore, University of Michigan gsd-3.3.0/doc/file-layer.rst000066400000000000000000000205221462564674300156440ustar00rootroot00000000000000.. Copyright (c) 2016-2024 The Regents of the University of Michigan .. Part of GSD, released under the BSD 2-Clause License. File layer ========== .. highlight:: c **Version: 2.0** General simulation data (GSD) **file layer** design and rationale. These use cases and design specifications define the low level GSD file format. Differences from the 1.0 specification are noted. Use-cases --------- * capabilities * efficiently store many frames of data from simulation runs * high performance file read and write * support arbitrary chunks of data in each frame (position, orientation, type, etc...) * variable number of named chunks in each frame * variable size of chunks in each frame * each chunk identifies data type * common use cases: NxM arrays in double, float, int, char types. * generic use case: binary blob of N bytes * can be integrated into other tools * append frames to an existing file with a monotonically increasing frame number * resilient to job kills * queries * number of frames * is named chunk present in frame *i* * type and size of named chunk in frame *i* * read data for named chunk in frame *i* * read only a portion of a chunk * list chunk names in the file * writes * write data to named chunk in the current frame * end frame and commit to disk These capabilities enable a simple and rich higher level schema for storing particle and other types of data. The schema determine which named chunks exist in a given file and what they mean. Non use-cases ------------- These capabilities are use-cases that GSD does **not** support, by design. #. Modify data in the file: GSD is designed to capture simulation data. #. Add chunks to frames in the middle of a file: See (1). #. Transparent conversion between float and double: Callers must take care of this. #. Transparent compression: this gets in the way of parallel I/O. Disk space is cheap. Dependencies ------------ The file layer is implemented in C (*not C++*) with no dependencies to enable trivial installation and incorporation into existing projects. A single header and C file completely implement the entire file layer. Python based projects that need only read access can use :py:mod:`gsd.pygsd`, a pure Python gsd reader implementation. A Python interface to the file layer allows reference implementations and convenience methods for schemas. Most non-technical users of GSD will probably use these reference implementations directly in their scripts. The low level C library is wrapped with cython. A Python ``pyproject.toml`` file will provide simple installation on as many systems as possible. Cython c++ output is checked in to the repository so users do not even need cython as a dependency. Specifications -------------- Support: * Files as large as the underlying filesystem allows (up to 64-bit address limits) * Data chunk names of arbitrary length (v1.0 limits chunk names to 63 bytes) * Reference up to 65535 different chunk names within a file * Application and schema names up to 63 characters * Store as many frames as can fit in a file up to file size limits * Data chunks up to (64-bit) x (32-bit) elements The limits on only 16-bit name indices and 32-bit column indices are to keep the size of each index entry as small as possible to avoid wasting space in the file index. The primary use cases in mind for column indices are Nx3 and Nx4 arrays for position and quaternion values. Schemas that wish to store larger truly n-dimensional arrays can store their dimensionality in metadata in another chunk and store as an Nx1 index entry. Or use a file format more suited to N-dimensional arrays such as HDF5. File format ----------- There are four types of data blocks in a GSD file. #. Header block * Overall header for the entire file, contains the magic cookie, a format version, the name of the generating application, the schema name, and its version. Some bytes in the header are reserved for future use. Header size: 256 bytes. The header block also includes a pointer to the index, the number of allocated entries, the number of allocated entries in the index, a pointer to the name list, and the size of the name list block. * The header is the first 256 bytes in the file. #. Index block * Index the frame data, size information, location, name id, etc... * The index contains space for any number of ``index_entry`` structs * The first index in the list with a location of 0 marks the end of the list. * When the index fills up, a new index block is allocated at the end of the file with more space and all current index entries are rewritten there. * Index entry size: 32 bytes #. Name list * List of string names used by index entries. * v1.0 files: Each name is a 64-byte character string. * v2.0 files: Names may have any length and are separated by 0 terminators. * The first name that starts with the 0 byte marks the end of the list * The header stores the total size of the name list block. #. Data chunk * Raw binary data stored for the named frame data blocks. Header index, and name blocks are stored in memory as C structs (or arrays of C structs) and written to disk in whole chunks. Header block ^^^^^^^^^^^^ This is the header block:: struct gsd_header { uint64_t magic; uint64_t index_location; uint64_t index_allocated_entries; uint64_t namelist_location; uint64_t namelist_allocated_entries; uint32_t schema_version; uint32_t gsd_version; char application[64]; char schema[64]; char reserved[80]; }; * ``magic`` is the magic number identifying this as a GSD file (``0x65DF65DF65DF65DF``). * ``gsd_version`` is the version number of the gsd file layer (``0xaaaabbbb => aaaa.bbbb``). * ``application`` is the name of the generating application. * ``schema`` is the name of the schema for data in this gsd file. * ``schema_version`` is the version of the schema (``0xaaaabbbb => aaaa.bbbb``). * ``index_location`` is the file location f the index block. * ``index_allocated_entries`` is the number of 64-byte segments available in the namelist block. * ``namelist_location`` is the file location of the namelist block. * ``namelist_allocated_entries`` is the number of entries allocated in the namelist block. * ``reserved`` are bytes saved for future use. This structure is ordered so that all known compilers at the time of writing produced a tightly packed 256-byte header. Some compilers may required non-standard packing attributes or pragmas to enforce this. Index block ^^^^^^^^^^^ An Index block is made of a number of line items that store a pointer to a single data chunk:: struct gsd_index_entry { uint64_t frame; uint64_t N; int64_t location; uint32_t M; uint16_t *id*; uint8_t type; uint8_t flags; }; * ``frame`` is the index of the frame this chunk belongs to * ``N`` and ``M`` define the dimensions of the data matrix (NxM in C ordering with M as the fast index). * ``location`` is the location of the data chunk in the file * ``id`` is the index of the name of this entry in the namelist. * ``type`` is the type of the data (char, int, float, double) indicated by index values * ``flags`` is reserved for future use. Many ``gsd_index_entry_t`` structs are combined into one index block. They are stored densely packed and in the same order as the corresponding data chunks are written to the file. This structure is ordered so that all known compilers at the time of writing produced a tightly packed 32-byte entry. Some compilers may required non-standard packing attributes or pragmas to enforce this. In v1.0 files, the frame index must monotonically increase from one index entry to the next. The GSD API ensures this. In v2.0 files, the entire index block is stored sorted first by frame, then by *id*. Namelist block ^^^^^^^^^^^^^^ In v2.0 files, the namelist block stores a list of strings separated by 0 terminators. In v1.0 files, the namelist block stores a list of 0-terminated strings in 64-byte segments. The first string that starts with 0 marks the end of the list. Data block ^^^^^^^^^^ A data block stores raw data bytes on the disk. For a given index entry ``entry``, the data starts at location ``entry.location`` and is the next ``entry.N * entry.M * gsd_sizeof_type(entry.type)`` bytes. gsd-3.3.0/doc/fl-examples.rst000066400000000000000000000143521462564674300160340ustar00rootroot00000000000000.. Copyright (c) 2016-2024 The Regents of the University of Michigan .. Part of GSD, released under the BSD 2-Clause License. .. _fl-examples: File layer examples ------------------- The file layer python module `gsd.fl` allows direct low level access to read and write **GSD** files of any schema. The **HOOMD** reader (`gsd.hoomd`) provides higher level access to **HOOMD** schema files, see :ref:`hoomd-examples`. View the page source to find unformatted example code. Import the module ^^^^^^^^^^^^^^^^^ .. ipython:: python import gsd.fl Open a gsd file ^^^^^^^^^^^^^^^ .. ipython:: python f = gsd.fl.open(name="file.gsd", mode='w', application="My application", schema="My Schema", schema_version=[1,0]) Use `gsd.fl.open` to open a gsd file. .. note:: When creating a new file, you must specify the application name, schema name, and schema version. .. warning:: Opening a gsd file with a 'w' or 'x' mode overwrites any existing file with the given name. Close a gsd file ^^^^^^^^^^^^^^^^ .. ipython:: python f.close() Call the `close ` method to close the file. Write data ^^^^^^^^^^ .. ipython:: python f = gsd.fl.open(name="file.gsd", mode='w', application="My application", schema="My Schema", schema_version=[1,0]); f.write_chunk(name='chunk1', data=numpy.array([1,2,3,4], dtype=numpy.float32)) f.write_chunk(name='chunk2', data=numpy.array([[5,6],[7,8]], dtype=numpy.float32)) f.end_frame() f.write_chunk(name='chunk1', data=numpy.array([9,10,11,12], dtype=numpy.float32)) f.write_chunk(name='chunk2', data=numpy.array([[13,14],[15,16]], dtype=numpy.float32)) f.end_frame() f.close() Add any number of named data chunks to each frame in the file with `write_chunk `. The data must be a 1 or 2 dimensional numpy array of a simple numeric type (or a data type that will automatically convert when passed to ``numpy.array(data)``. Call `end_frame ` to end the frame and start the next one. .. note:: While supported, implicit conversion to numpy arrays creates a copy of the data in memory and adds conversion overhead. .. warning:: Call `end_frame ` to write the last frame before closing the file. Read data ^^^^^^^^^ .. ipython:: python f = gsd.fl.open(name="file.gsd", mode='r') f.read_chunk(frame=0, name='chunk1') f.read_chunk(frame=1, name='chunk2') f.close() `read_chunk ` reads the named chunk at the given frame index in the file and returns it as a numpy array. Test if a chunk exists ^^^^^^^^^^^^^^^^^^^^^^ .. ipython:: python f = gsd.fl.open(name="file.gsd", mode='r') f.chunk_exists(frame=0, name='chunk1') f.chunk_exists(frame=1, name='chunk2') f.chunk_exists(frame=2, name='chunk1') f.close() `chunk_exists ` tests to see if a chunk by the given name exists in the file at the given frame. Discover chunk names ^^^^^^^^^^^^^^^^^^^^ .. ipython:: python f = gsd.fl.open(name="file.gsd", mode='r') f.find_matching_chunk_names('') f.find_matching_chunk_names('chunk') f.find_matching_chunk_names('chunk1') f.find_matching_chunk_names('other') `find_matching_chunk_names ` finds all chunk names present in a GSD file that start with the given string. Read-only access ^^^^^^^^^^^^^^^^ .. ipython:: python :okexcept: f = gsd.fl.open(name="file.gsd", mode='r') if f.chunk_exists(frame=0, name='chunk1'): data = f.read_chunk(frame=0, name='chunk1') data # Fails because the file is open read only f.write_chunk(name='error', data=numpy.array([1])) f.close() Writes fail when a file is opened in a read only mode. Access file metadata ^^^^^^^^^^^^^^^^^^^^ .. ipython:: python f = gsd.fl.open(name="file.gsd", mode='r') f.name f.mode f.gsd_version f.application f.schema f.schema_version f.nframes f.close() Read file metadata from properties of the file object. Open a file in read/write mode ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. ipython:: python f = gsd.fl.open(name="file.gsd", mode='w', application="My application", schema="My Schema", schema_version=[1,0]) f.write_chunk(name='double', data=numpy.array([1,2,3,4], dtype=numpy.float64)); f.end_frame() f.nframes f.read_chunk(frame=0, name='double') Open a file in read/write mode to allow both reading and writing. Use as a context manager ^^^^^^^^^^^^^^^^^^^^^^^^ .. ipython:: python with gsd.fl.open(name="file.gsd", mode='r') as f: data = f.read_chunk(frame=0, name='double'); data Use `gsd.fl.GSDFile` as a context manager for guaranteed file closure and cleanup when exceptions occur. Store string chunks ^^^^^^^^^^^^^^^^^^^ .. ipython:: python f = gsd.fl.open(name="file.gsd", mode='w', application="My application", schema="My Schema", schema_version=[1,0]) f.mode s = "This is a string" b = numpy.array([s], dtype=numpy.dtype((bytes, len(s)+1))) b = b.view(dtype=numpy.int8) b f.write_chunk(name='string', data=b) f.end_frame() r = f.read_chunk(frame=0, name='string') r r = r.view(dtype=numpy.dtype((bytes, r.shape[0]))); r[0].decode('UTF-8') f.close() To store a string in a gsd file, convert it to a numpy array of bytes and store that data in the file. Decode the byte sequence to get back a string. Truncate ^^^^^^^^ .. ipython:: python f = gsd.fl.open(name="file.gsd", mode='r+') f.nframes f.schema, f.schema_version, f.application f.truncate() f.nframes f.schema, f.schema_version, f.application f.close() Truncating a gsd file removes all data chunks from it, but retains the same schema, schema version, and application name. The file is not closed during this process. This is useful when writing restart files on a Lustre file system when file open operations need to be kept to a minimum. gsd-3.3.0/doc/hoomd-examples.rst000066400000000000000000000153571462564674300165470ustar00rootroot00000000000000.. Copyright (c) 2016-2024 The Regents of the University of Michigan .. Part of GSD, released under the BSD 2-Clause License. .. _hoomd-examples: HOOMD examples -------------- `gsd.hoomd` provides high-level access to **HOOMD** schema **GSD** files. View the page source to find unformatted example code. Import the module ^^^^^^^^^^^^^^^^^ .. ipython:: python import gsd.hoomd Define a frame ^^^^^^^^^^^^^^^^^ .. ipython:: python frame = gsd.hoomd.Frame() frame.particles.N = 4 frame.particles.types = ['A', 'B'] frame.particles.typeid = [0,0,1,1] frame.particles.position = [[0,0,0],[1,1,1], [-1,-1,-1], [1,-1,-1]] frame.configuration.box = [3, 3, 3, 0, 0, 0] `gsd.hoomd.Frame` stores the state of a single system configuration, or frame, in the file. Instantiate this class to create a system configuration. All fields default to `None`. Each field is written to the file when not `None` **and** when the data does not match the data in the first frame or defaults specified in the schema. Create a hoomd gsd file ^^^^^^^^^^^^^^^^^^^^^^^ .. ipython:: python f = gsd.hoomd.open(name='file.gsd', mode='w') @suppress f.close() Use `gsd.hoomd.open` to open a **GSD** file as a `gsd.hoomd.HOOMDTrajectory` instance. Write frames to a gsd file ^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. ipython:: python def create_frame(i): frame = gsd.hoomd.Frame() frame.configuration.step = i frame.particles.N = 4+i frame.particles.position = numpy.random.random(size=(4+i,3)) return frame f = gsd.hoomd.open(name='example.gsd', mode='w') f.extend( (create_frame(i) for i in range(10)) ) f.append( create_frame(10) ) len(f) @suppress f.close() `gsd.hoomd.HOOMDTrajectory` is similar to a sequence of `gsd.hoomd.Frame` objects. The `append ` and `extend ` methods add frames to the trajectory. .. tip:: When using `extend `, pass in a generator or generator expression to avoid storing the entire trajectory in memory before writing it out. Randomly index frames ^^^^^^^^^^^^^^^^^^^^^ .. ipython:: python f = gsd.hoomd.open(name='example.gsd', mode='r') frame = f[5] frame.configuration.step frame.particles.N frame.particles.position @suppress f.close() `gsd.hoomd.HOOMDTrajectory` supports random indexing of frames in the file. Indexing into a trajectory returns a `gsd.hoomd.Frame`. Slicing and selection ^^^^^^^^^^^^^^^^^^^^^ Use the slicing operator to select individual frames or a subset of a trajectory. .. ipython:: python f = gsd.hoomd.open(name='example.gsd', mode='r') for frame in f[5:-2]: print(frame.configuration.step, end=' ') every_2nd_frame = f[::2] # create a view of a trajectory subset for frame in every_2nd_frame[:4]: print(frame.configuration.step, end=' ') @suppress f.close() Slicing a trajectory creates a trajectory view, which can then be queried for length or sliced again. Pure python reader ^^^^^^^^^^^^^^^^^^ .. ipython:: python f = gsd.pygsd.GSDFile(open('example.gsd', 'rb')) trajectory = gsd.hoomd.HOOMDTrajectory(f); trajectory[3].particles.position @suppress f.close() You can use **GSD** without needing to compile C code to read **GSD** files using `gsd.pygsd.GSDFile` in combination with `gsd.hoomd.HOOMDTrajectory`. It only supports the ``rb`` mode and does not read files as fast as the C implementation. It takes in a python file-like object, so it can be used with in-memory IO classes, and grid file classes that access data over the internet. .. warning:: `gsd.pygsd` is **slow**. Use `gsd.hoomd.open` whenever possible. Access logged data ^^^^^^^^^^^^^^^^^^ .. ipython:: python with gsd.hoomd.open(name='log-example.gsd', mode='w') as f: frame = gsd.hoomd.Frame() frame.particles.N = 4 for i in range(10): frame.configuration.step = i*100 frame.log['particles/net_force'] = numpy.array([[-1,2,-3+i], [0,2,-4], [-3,2,1], [1,2,3]], dtype=numpy.float32) frame.log['value/potential_energy'] = 1.5+i f.append(frame) Logged data is stored in the ``log`` dictionary as numpy arrays. Place data into this dictionary directly without the ``'log/'`` prefix and gsd will include it in the output. Store per-particle quantities with the prefix ``particles/``. Choose another prefix for other quantities. .. ipython:: python log = gsd.hoomd.read_log(name='log-example.gsd', scalar_only=True) list(log.keys()) log['log/value/potential_energy'] log['configuration/step'] Read logged data from the ``log`` dictionary. .. note:: Logged data must be a convertible to a numpy array of a supported type. .. ipython:: python :okexcept: with gsd.hoomd.open(name='example.gsd', mode='w') as f: frame = gsd.hoomd.Frame() frame.particles.N = 4 frame.log['invalid'] = dict(a=1, b=5) f.append(frame) Use multiprocessing ^^^^^^^^^^^^^^^^^^^ .. code:: python import multiprocessing def count_particles(args): t, frame_idx = args return len(t[frame_idx].particles.position) with gsd.hoomd.open(name='example.gsd', mode='r') as t: with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as pool: result = pool.map(count_particles, [(t, frame_idx) for frame_idx in range(len(t))]) result `gsd.hoomd.HOOMDTrajectory` can be pickled when in read mode to allow for multiprocessing through Python's :py:mod:`multiprocessing` library. Here ``count_particles`` finds the number of particles in each frame and appends it to a list. Using the command line ^^^^^^^^^^^^^^^^^^^^^^ The GSD library provides a command line interface for reading files with first-class support for reading HOOMD GSD files. The CLI opens a Python interpreter with a file opened in a specified mode. .. code-block:: console $ gsd read -s hoomd 'example.gsd' ... File: example.gsd Number of frames: 11 The GSD file handle is available via the "handle" variable. For supported schema, you may access the trajectory using the "traj" variable. Type "help(handle)" or "help(traj)" for more information. The gsd and gsd.fl packages are always loaded. Schema-specific modules (e.g. gsd.hoomd) are loaded if available. >>> len(traj) 11 >>> traj[0].particles.position.shape == (4, 3) True >>> handle.read_chunk(0, 'particles/N') array([4], dtype=uint32) gsd-3.3.0/doc/index.rst000066400000000000000000000034361462564674300147270ustar00rootroot00000000000000.. Copyright (c) 2016-2024 The Regents of the University of Michigan .. Part of GSD, released under the BSD 2-Clause License. GSD documentation +++++++++++++++++ The **GSD** file format is the native file format for `HOOMD-blue `_. **GSD** files store trajectories of the HOOMD-blue system state in a binary file with efficient random access to frames. **GSD** allows all particle and topology properties to vary from one frame to the next. Use the **GSD** Python API to specify the initial condition for a HOOMD-blue simulation or analyze trajectory output with a script. Read a **GSD** trajectory with a visualization tool to explore the behavior of the simulation. * `GitHub Repository `_: **GSD** source code and issue tracker. * `HOOMD-blue `_: Simulation engine that reads and writes GSD files. * `GSD discussion board `_: Ask the **GSD** user community for help. * `freud `_: A powerful set of tools for analyzing trajectories. * `OVITO `_: The Open Visualization Tool works with GSD files. * `gsd-vmd plugin `_: VMD plugin to support GSD files. .. toctree:: :maxdepth: 1 :caption: Getting started installation changes community .. toctree:: :maxdepth: 1 :caption: Tutorials hoomd-examples fl-examples .. toctree:: :maxdepth: 1 :caption: Reference python-api cli c-api specification .. toctree:: :maxdepth: 1 :caption: Contributing contributing style .. toctree:: :maxdepth: 1 :caption: Additional information credits license indices gsd-3.3.0/doc/indices.rst000066400000000000000000000002701462564674300152270ustar00rootroot00000000000000.. Copyright (c) 2016-2024 The Regents of the University of Michigan .. Part of GSD, released under the BSD 2-Clause License. Index ===== * :ref:`genindex` * :ref:`modindex` gsd-3.3.0/doc/installation.rst000066400000000000000000000002361462564674300163140ustar00rootroot00000000000000.. Copyright (c) 2016-2024 The Regents of the University of Michigan .. Part of GSD, released under the BSD 2-Clause License. .. include:: ../INSTALLING.rst gsd-3.3.0/doc/license.rst000066400000000000000000000003361462564674300152360ustar00rootroot00000000000000.. Copyright (c) 2016-2024 The Regents of the University of Michigan .. Part of GSD, released under the BSD 2-Clause License. License ======= GSD is available under the following license. .. literalinclude:: ../LICENSE gsd-3.3.0/doc/python-api.rst000066400000000000000000000023011462564674300156760ustar00rootroot00000000000000.. Copyright (c) 2016-2024 The Regents of the University of Michigan .. Part of GSD, released under the BSD 2-Clause License. gsd Python package ================== **GSD** provides a **Python** API. Use the `gsd.hoomd` module to read and write files for HOOMD-blue. Submodules ---------- .. toctree:: :maxdepth: 1 python-module-gsd.fl python-module-gsd.hoomd python-module-gsd.pygsd python-module-gsd.version Package contents ---------------- .. automodule:: gsd :synopsis: GSD main module. :members: Logging ------- All Python modules in **GSD** use the Python standard library module :py:mod:`logging` to log events. Use this module to control the verbosity and output destination:: import logging logging.basicConfig(level=logging.INFO) .. seealso:: Module :py:mod:`logging` Documentation of the :py:mod:`logging` standard module. Signal handling --------------- On import, `gsd` installs a ``SIGTERM`` signal handler that calls `sys.exit` so that open gsd files have a chance to flush write buffers (`GSDFile.flush`) when a user's process is terminated. Use `signal.signal` to adjust this behavior as needed. gsd-3.3.0/doc/python-module-gsd.fl.rst000066400000000000000000000004431462564674300175720ustar00rootroot00000000000000.. Copyright (c) 2016-2024 The Regents of the University of Michigan .. Part of GSD, released under the BSD 2-Clause License. gsd.fl module ^^^^^^^^^^^^^ .. automodule:: gsd.fl :synopsis: The file layer provides a low level API for directly accessing gsd files. :members: gsd-3.3.0/doc/python-module-gsd.hoomd.rst000066400000000000000000000004651462564674300203030ustar00rootroot00000000000000.. Copyright (c) 2016-2024 The Regents of the University of Michigan .. Part of GSD, released under the BSD 2-Clause License. gsd.hoomd module ^^^^^^^^^^^^^^^^ .. automodule:: gsd.hoomd :synopsis: Reference implementation for reading/writing hoomd schema GSD files. :members: :show-inheritance: gsd-3.3.0/doc/python-module-gsd.pygsd.rst000066400000000000000000000004361462564674300203210ustar00rootroot00000000000000.. Copyright (c) 2016-2024 The Regents of the University of Michigan .. Part of GSD, released under the BSD 2-Clause License. gsd.pygsd module ^^^^^^^^^^^^^^^^ .. automodule:: gsd.pygsd :synopsis: pygsd provides a GSD reader written in pure Python. :members: GSDFile gsd-3.3.0/doc/python-module-gsd.version.rst000066400000000000000000000003751462564674300206620ustar00rootroot00000000000000.. Copyright (c) 2016-2024 The Regents of the University of Michigan .. Part of GSD, released under the BSD 2-Clause License. gsd.version module ^^^^^^^^^^^^^^^^^^ .. automodule:: gsd.version :synopsis: GSD version. :members: version gsd-3.3.0/doc/requirements.in000066400000000000000000000003041462564674300161300ustar00rootroot00000000000000breathe == 4.35.0 cython == 3.0.10 furo == 2024.5.6 ipython == 8.24.0 numpy == 1.26.4 pandas == 2.2.2 pyarrow == 16.0.0 sphinx == 7.3.7 sphinxcontrib-googleanalytics==0.4 sphinx-copybutton==0.5.2 gsd-3.3.0/doc/requirements.txt000066400000000000000000000046051462564674300163510ustar00rootroot00000000000000# # This file is autogenerated by pip-compile with Python 3.12 # by the following command: # # pip-compile doc/requirements.in # alabaster==0.7.16 # via sphinx asttokens==2.4.1 # via stack-data babel==2.14.0 # via sphinx beautifulsoup4==4.12.3 # via furo breathe==4.35.0 # via -r doc/requirements.in certifi==2024.2.2 # via requests charset-normalizer==3.3.2 # via requests cython==3.0.10 # via -r doc/requirements.in decorator==5.1.1 # via ipython docutils==0.21.2 # via # breathe # sphinx executing==2.0.1 # via stack-data furo==2024.5.6 # via -r doc/requirements.in idna==3.7 # via requests imagesize==1.4.1 # via sphinx ipython==8.24.0 # via -r doc/requirements.in jedi==0.19.1 # via ipython jinja2==3.1.4 # via sphinx markupsafe==2.1.5 # via jinja2 matplotlib-inline==0.1.7 # via ipython numpy==1.26.4 # via # -r doc/requirements.in # pandas # pyarrow packaging==24.0 # via sphinx pandas==2.2.2 # via -r doc/requirements.in parso==0.8.4 # via jedi pexpect==4.9.0 # via ipython prompt-toolkit==3.0.43 # via ipython ptyprocess==0.7.0 # via pexpect pure-eval==0.2.2 # via stack-data pyarrow==16.0.0 # via -r doc/requirements.in pygments==2.17.2 # via # furo # ipython # sphinx python-dateutil==2.9.0.post0 # via pandas pytz==2024.1 # via pandas requests==2.32.0 # via sphinx six==1.16.0 # via # asttokens # python-dateutil snowballstemmer==2.2.0 # via sphinx soupsieve==2.5 # via beautifulsoup4 sphinx==7.3.7 # via # -r doc/requirements.in # breathe # furo # sphinx-basic-ng # sphinx-copybutton # sphinxcontrib-googleanalytics sphinx-basic-ng==1.0.0b2 # via furo sphinx-copybutton==0.5.2 # via -r doc/requirements.in sphinxcontrib-applehelp==1.0.8 # via sphinx sphinxcontrib-devhelp==1.0.6 # via sphinx sphinxcontrib-googleanalytics==0.4 # via -r doc/requirements.in sphinxcontrib-htmlhelp==2.0.5 # via sphinx sphinxcontrib-jsmath==1.0.1 # via sphinx sphinxcontrib-qthelp==1.0.7 # via sphinx sphinxcontrib-serializinghtml==1.1.10 # via sphinx stack-data==0.6.3 # via ipython traitlets==5.14.3 # via # ipython # matplotlib-inline tzdata==2024.1 # via pandas urllib3==2.2.1 # via requests wcwidth==0.2.13 # via prompt-toolkit gsd-3.3.0/doc/schema-hoomd.rst000066400000000000000000000643701462564674300161700ustar00rootroot00000000000000.. Copyright (c) 2016-2024 The Regents of the University of Michigan .. Part of GSD, released under the BSD 2-Clause License. HOOMD Schema ============ HOOMD-blue supports a wide variety of per particle attributes and properties. Particles, bonds, and types can be dynamically added and removed during simulation runs. The ``hoomd`` schema can handle all of these situations in a reasonably space efficient and high performance manner. It is also backwards compatible with previous versions of itself, as we only add new additional data chunks in new versions and do not change the interpretation of the existing data chunks. Any newer reader will initialize new data chunks with default values when they are not present in an older version file. :Schema name: ``hoomd`` :Schema version: 1.4 .. seealso:: `hoomd.State` for a full description of how HOOMD interprets this data. Use-cases --------- The GSD schema ``hoomd`` provides: #. Every frame of GSD output is viable to restart a simulation #. Support varying numbers of particles, bonds, etc... #. Support varying attributes (type, mass, etc...) #. Support orientation, angular momentum, and other fields. #. Binary format on disk #. High performance file read and write #. Support logging computed quantities Data chunks ----------- Each frame the ``hoomd`` schema may contain one or more data chunks. The layout and names of the chunksmatch that of the binary frame API in HOOMD-blue itself. Data chunks are organized in categories. These categories have no meaning in the ``hoomd`` schema specification, and are simply an organizational tool. Some file writers may implement options that act on categories (i.e. write **attributes** out to every frame, or just frame 0). Values are well defined for all fields at all frames. When a data chunk is present in frame *i*, it defines the values for the frame. When it is not present, the data chunk of the same name at frame 0 defines the values for frame *i* (when *N* is equal between the frames). If the data chunk is not present in frame 0, or *N* differs between frames, values are default. Default values allow files sizes to remain small. For example, a simulation with point particles where orientation is always (1,0,0,0) would not write any orientation chunk to the file. *N* may be zero. When *N* is zero, an index entry may be written for a data chunk with no actual data written to the file for that chunk. ================================= ========= ====== ==== ======= ================ Name Category Type Size Default Units ================================= ========= ====== ==== ======= ================ **Configuration** :chunk:`configuration/step` uint64 1x1 0 number :chunk:`configuration/dimensions` uint8 1x1 3 number :chunk:`configuration/box` float 6x1 *varies* **Particle data** :chunk:`particles/N` attribute uint32 1x1 0 number :chunk:`particles/types` attribute int8 NTxM ['A'] UTF-8 :chunk:`particles/typeid` attribute uint32 Nx1 0 number :chunk:`particles/type_shapes` attribute int8 NTxM UTF-8 :chunk:`particles/mass` attribute float Nx1 1.0 mass :chunk:`particles/charge` attribute float Nx1 0.0 charge :chunk:`particles/diameter` attribute float Nx1 1.0 length :chunk:`particles/body` attribute int32 Nx1 -1 number :chunk:`particles/moment_inertia` attribute float Nx3 0,0,0 mass * length^2 :chunk:`particles/position` property float Nx3 0,0,0 length :chunk:`particles/orientation` property float Nx4 1,0,0,0 unit quaternion :chunk:`particles/velocity` momentum float Nx3 0,0,0 length/time :chunk:`particles/angmom` momentum float Nx4 0,0,0,0 quaternion :chunk:`particles/image` momentum int32 Nx3 0,0,0 number **Bond data** :chunk:`bonds/N` topology uint32 1x1 0 number :chunk:`bonds/types` topology int8 NTxM UTF-8 :chunk:`bonds/typeid` topology uint32 Nx1 0 number :chunk:`bonds/group` topology uint32 Nx2 0,0 number **Angle data** :chunk:`angles/N` topology uint32 1x1 0 number :chunk:`angles/types` topology int8 NTxM UTF-8 :chunk:`angles/typeid` topology uint32 Nx1 0 number :chunk:`angles/group` topology uint32 Nx3 0,0,0 number **Dihedral data** :chunk:`dihedrals/N` topology uint32 1x1 0 number :chunk:`dihedrals/types` topology int8 NTxM UTF-8 :chunk:`dihedrals/typeid` topology uint32 Nx1 0 number :chunk:`dihedrals/group` topology uint32 Nx4 0,0,0,0 number **Improper data** :chunk:`impropers/N` topology uint32 1x1 0 number :chunk:`impropers/types` topology int8 NTxM UTF-8 :chunk:`impropers/typeid` topology uint32 Nx1 0 number :chunk:`impropers/group` topology uint32 Nx4 0,0,0,0 number **Constraint data** :chunk:`constraints/N` topology uint32 1x1 0 number :chunk:`constraints/value` topology float Nx1 0 length :chunk:`constraints/group` topology uint32 Nx2 0,0 number **Special pairs data** :chunk:`pairs/N` topology uint32 1x1 0 number :chunk:`pairs/types` topology int8 NTxM utf-8 :chunk:`pairs/typeid` topology uint32 Nx1 0 number :chunk:`pairs/group` topology uint32 Nx2 0,0 number ================================= ========= ====== ==== ======= ================ Configuration ------------- .. chunk:: configuration/step :Type: uint64 :Size: 1x1 :Default: 0 :Units: number Simulation time step. .. chunk:: configuration/dimensions :Type: uint8 :Size: 1x1 :Default: 3 :Units: number Number of dimensions in the simulation. Must be 2 or 3. .. note:: When using `gsd.hoomd.Frame`, the object will try to intelligently default to a dimension. When setting a box with :math:`L_z = 0`, ``dimensions`` will default to 2 otherwise 3. Explicit setting of this value by users always takes precedence. .. chunk:: configuration/box :Type: float :Size: 6x1 :Default: [1,1,1,0,0,0] :Units: *varies* Simulation box. Each array element defines a different box property. See the hoomd documentation for a full description on how these box parameters map to a triclinic geometry. * ``box[0:3]``: :math:`(l_x, l_y, l_z)` the box length in each direction, in length units * ``box[3:]``: :math:`(xy, xz, yz)` the tilt factors, dimensionless values Particle data ------------- Within a single frame, the number of particles *N* and *NT* are fixed for all chunks. *N* and *NT* may vary from one frame to the next. All values are stored in hoomd native units. Attributes ^^^^^^^^^^ .. chunk:: particles/N :Type: uint32 :Size: 1x1 :Default: 0 :Units: number Define *N*, the number of particles, for all data chunks ``particles/*``. .. chunk:: particles/types :Type: int8 :Size: NTxM :Default: ['A'] :Units: UTF-8 Implicitly define *NT*, the number of particle types, for all data chunks ``particles/*``. *M* must be large enough to accommodate each type name as a null terminated UTF-8 character string. Row *i* of the 2D matrix is the type name for particle type *i*. .. chunk:: particles/typeid :Type: uint32 :Size: Nx1 :Default: 0 :Units: number Store the type id of each particle. All id's must be less than *NT*. A particle with type *id* has a type name matching the corresponding row in :chunk:`particles/types`. .. chunk:: particles/type_shapes :Type: int8 :Size: NTxM :Default: *empty* :Units: UTF-8 Store a per-type shape definition for visualization. A dictionary is stored for each of the *NT* types, corresponding to a shape for visualization of that type. *M* must be large enough to accommodate the shape definition as a null-terminated UTF-8 JSON-encoded string. See: :ref:`shapes` for examples. .. chunk:: particles/mass :Type: float (32-bit) :Size: Nx1 :Default: 1.0 :Units: mass Store the mass of each particle. .. chunk:: particles/charge :Type: float (32-bit) :Size: Nx1 :Default: 0.0 :Units: charge Store the charge of each particle. .. chunk:: particles/diameter :Type: float (32-bit) :Size: Nx1 :Default: 1.0 :Units: length Store the diameter of each particle. .. chunk:: particles/body :Type: int32 :Size: Nx1 :Default: -1 :Units: number Store the composite body associated with each particle. The value -1 indicates no body. The body field may be left out of input files, as hoomd will create the needed constituent particles. .. chunk:: particles/moment_inertia :Type: float (32-bit) :Size: Nx3 :Default: 0,0,0 :Units: mass * length^2 Store the moment_inertia of each particle :math:`(I_{xx}, I_{yy}, I_{zz})`. This inertia tensor is diagonal in the body frame of the particle. The default value is for point particles. Properties ^^^^^^^^^^ .. chunk:: particles/position :Type: float (32-bit) :Size: Nx3 :Default: 0,0,0 :Units: length Store the position of each particle (*x*, *y*, *z*). All particles in the simulation are referenced by a tag. The position data chunk (and all other per particle data chunks) list particles in tag order. The first particle listed has tag 0, the second has tag 1, ..., and the last has tag N-1 where N is the number of particles in the simulation. All particles must be inside the box: * :math:`-l_x/2 + (xz-xy \cdot yz) \cdot z + xy \cdot y \le x < l_x/2 + (xz-xy \cdot yz) \cdot z + xy \cdot y` * :math:`-l_y/2 + yz \cdot z \le y < l_y/2 + yz \cdot z` * :math:`-l_z/2 \le z < l_z/2` Where :math:`l_x`, :math:`l_y`, :math:`l_z`, :math:`xy`, :math:`xz`, and :math:`yz` are the simulation box parameters (:chunk:`configuration/box`). .. chunk:: particles/orientation :Type: float (32-bit) :Size: Nx4 :Default: 1,0,0,0 :Units: unit quaternion Store the orientation of each particle. In scalar + vector notation, this is :math:`(r, a_x, a_y, a_z)`, where the quaternion is :math:`q = r + a_xi + a_yj + a_zk`. A unit quaternion has the property: :math:`\sqrt{r^2 + a_x^2 + a_y^2 + a_z^2} = 1`. Momenta ^^^^^^^^ .. chunk:: particles/velocity :Type: float (32-bit) :Size: Nx3 :Default: 0,0,0 :Units: length/time Store the velocity of each particle :math:`(v_x, v_y, v_z)`. .. chunk:: particles/angmom :Type: float (32-bit) :Size: Nx4 :Default: 0,0,0,0 :Units: quaternion Store the angular momentum of each particle as a quaternion. See the HOOMD documentation for information on how to convert to a vector representation. .. chunk:: particles/image :Type: int32 :Size: Nx3 :Default: 0,0,0 :Units: number Store the number of times each particle has wrapped around the box :math:`(i_x, i_y, i_z)`. In constant volume simulations, the unwrapped position in the particle's full trajectory is * :math:`x_u = x + i_x \cdot l_x + xy \cdot i_y \cdot l_y + xz \cdot i_z \cdot l_z` * :math:`y_u = y + i_y \cdot l_y + yz \cdot i_z \cdot l_z` * :math:`z_u = z + i_z \cdot l_z` Topology -------- .. chunk:: bonds/N :Type: uint32 :Size: 1x1 :Default: 0 :Units: number Define *N*, the number of bonds, for all data chunks ``bonds/*``. .. chunk:: bonds/types :Type: int8 :Size: NTxM :Default: *empty* :Units: UTF-8 Implicitly define *NT*, the number of bond types, for all data chunks ``bonds/*``. *M* must be large enough to accommodate each type name as a null terminated UTF-8 character string. Row *i* of the 2D matrix is the type name for bond type *i*. By default, there are 0 bond types. .. chunk:: bonds/typeid :Type: uint32 :Size: Nx1 :Default: 0 :Units: number Store the type id of each bond. All id's must be less than *NT*. A bond with type *id* has a type name matching the corresponding row in :chunk:`bonds/types`. .. chunk:: bonds/group :Type: uint32 :Size: Nx2 :Default: 0,0 :Units: number Store the particle tags in each bond. .. chunk:: angles/N :Type: uint32 :Size: 1x1 :Default: 0 :Units: number Define *N*, the number of angles, for all data chunks ``angles/*``. .. chunk:: angles/types :Type: int8 :Size: NTxM :Default: *empty* :Units: UTF-8 Implicitly define *NT*, the number of angle types, for all data chunks ``angles/*``. *M* must be large enough to accommodate each type name as a null terminated UTF-8 character string. Row *i* of the 2D matrix is the type name for angle type *i*. By default, there are 0 angle types. .. chunk:: angles/typeid :Type: uint32 :Size: Nx1 :Default: 0 :Units: number Store the type id of each angle. All id's must be less than *NT*. A angle with type *id* has a type name matching the corresponding row in :chunk:`angles/types`. .. chunk:: angles/group :Type: uint32 :Size: Nx3 :Default: 0,0,0 :Units: number Store the particle tags in each angle. .. chunk:: dihedrals/N :Type: uint32 :Size: 1x1 :Default: 0 :Units: number Define *N*, the number of dihedrals, for all data chunks ``dihedrals/*``. .. chunk:: dihedrals/types :Type: int8 :Size: NTxM :Default: *empty* :Units: UTF-8 Implicitly define *NT*, the number of dihedral types, for all data chunks ``dihedrals/*``. *M* must be large enough to accommodate each type name as a null terminated UTF-8 character string. Row *i* of the 2D matrix is the type name for dihedral type *i*. By default, there are 0 dihedral types. .. chunk:: dihedrals/typeid :Type: uint32 :Size: Nx1 :Default: 0 :Units: number Store the type id of each dihedral. All id's must be less than *NT*. A dihedral with type *id* has a type name matching the corresponding row in :chunk:`dihedrals/types`. .. chunk:: dihedrals/group :Type: uint32 :Size: Nx4 :Default: 0,0,0,0 :Units: number Store the particle tags in each dihedral. .. chunk:: impropers/N :Type: uint32 :Size: 1x1 :Default: 0 :Units: number Define *N*, the number of impropers, for all data chunks ``impropers/*``. .. chunk:: impropers/types :Type: int8 :Size: NTxM :Default: *empty* :Units: UTF-8 Implicitly define *NT*, the number of improper types, for all data chunks ``impropers/*``. *M* must be large enough to accommodate each type name as a null terminated UTF-8 character string. Row *i* of the 2D matrix is the type name for improper type *i*. By default, there are 0 improper types. .. chunk:: impropers/typeid :Type: uint32 :Size: Nx1 :Default: 0 :Units: number Store the type id of each improper. All id's must be less than *NT*. A improper with type *id* has a type name matching the corresponding row in :chunk:`impropers/types`. .. chunk:: impropers/group :Type: uint32 :Size: Nx4 :Default: 0,0,0,0 :Units: number Store the particle tags in each improper. .. chunk:: constraints/N :Type: uint32 :Size: 1x1 :Default: 0 :Units: number Define *N*, the number of constraints, for all data chunks ``constraints/*``. .. chunk:: constraints/value :Type: float :Size: Nx1 :Default: 0 :Units: length Store the distance of each constraint. Each constraint defines a fixed distance between two particles. .. chunk:: constraints/group :Type: uint32 :Size: Nx2 :Default: 0,0 :Units: number Store the particle tags in each constraint. .. chunk:: pairs/N :Type: uint32 :Size: 1x1 :Default: 0 :Units: number Define *N*, the number of special pair interactions, for all data chunks ``pairs/*``. .. versionadded:: 1.1 .. chunk:: pairs/types :Type: int8 :Size: NTxM :Default: *empty* :Units: UTF-8 Implicitly define *NT*, the number of special pair types, for all data chunks ``pairs/*``. *M* must be large enough to accommodate each type name as a null terminated UTF-8 character string. Row *i* of the 2D matrix is the type name for particle type *i*. By default, there are 0 special pair types. .. versionadded:: 1.1 .. chunk:: pairs/typeid :Type: uint32 :Size: Nx1 :Default: 0 :Units: number Store the type id of each special pair interaction. All id's must be less than *NT*. A pair with type *id* has a type name matching the corresponding row in :chunk:`pairs/types`. .. versionadded:: 1.1 .. chunk:: pairs/group :Type: uint32 :Size: Nx2 :Default: 0,0 :Units: number Store the particle tags in each special pair interaction. .. versionadded:: 1.1 Logged data ------------ Users may store logged data in ``log/*`` data chunks. Logged data encompasses values computed at simulation time that are too expensive or cumbersome to re-compute in post processing. This specification does not define specific chunk names or define logged data. Users may select any valid name for logged data chunks as appropriate for their workflow. For any named logged data chunks present in any frame frame the file: If a chunk is not present in a given frame i != 0, the implementation should provide the quantity as read from frame 0 for that frame. GSD files that include a logged data chunk only in some frames i != 0 and not in frame 0 are invalid. By convention, per-particle and per-bond logged data should have a chunk name starting with ``log/particles/`` and ``log/bonds``, respectively. Scalar, vector, and string values may be stored under a different prefix starting with ``log/``. This specification may recognize additional conventions in later versions without invalidating existing files. ========================================================== ====== ========= ================ Name Type Size Units ========================================================== ====== ========= ================ :chunk:`log/particles/user_defined` n/a NxM user-defined :chunk:`log/bonds/user_defined` n/a NxM user-defined :chunk:`log/user_defined` n/a NxM user-defined ========================================================== ====== ========= ================ .. chunk:: log/particles/user_defined :Type: user-defined :Size: NxM :Units: user-defined This chunk is a place holder for any number of user defined per-particle quantities. *N* is the number of particles in this frame. *M*, the data type, the units, and the chunk name (after the prefix ``log/particles/``) are user-defined. .. versionadded:: 1.4 .. chunk:: log/bonds/user_defined :Type: user-defined :Size: NxM :Units: user-defined This chunk is a place holder for any number of user defined per-bond quantities. *N* is the number of bonds in this frame. *M*, the data type, the units, and the chunk name (after the prefix ``log/bonds/``) are user-defined. .. versionadded:: 1.4 .. chunk:: log/user_defined :Type: user-defined :Size: NxM :Units: user-defined This chunk is a place holder for any number of user defined quantities. *N*, *M*, the data type, the units, and the chunk name (after the prefix ``log/``) are user-defined. .. versionadded:: 1.4 State data ------------ HOOMD stores auxiliary state information in ``state/*`` data chunks. Auxiliary state encompasses internal state to any integrator, updater, or other class that is not part of the particle system state but is also not a fixed parameter. For example, the internal degrees of freedom in integrator. Auxiliary state is useful when restarting simulations. HOOMD only stores state in GSD files when requested explicitly by the user. Only a few of the documented state data chunks will be present in any GSD file and not all state chunks are valid. Thus, state data chunks do not have default values. If a chunk is not present in the file, that state does not have a well-defined value. .. note:: HOOMD-blue >= v3.0.0 do not write state data. ========================================================== ====== ========= ================ Name Type Size Units ========================================================== ====== ========= ================ **HPMC integrator state** :chunk:`state/hpmc/integrate/d` double 1x1 length :chunk:`state/hpmc/integrate/a` double 1x1 number :chunk:`state/hpmc/sphere/radius` float NTx1 length :chunk:`state/hpmc/sphere/orientable` uint8 NTx1 boolean :chunk:`state/hpmc/ellipsoid/a` float NTx1 length :chunk:`state/hpmc/ellipsoid/b` float NTx1 length :chunk:`state/hpmc/ellipsoid/c` float NTx1 length :chunk:`state/hpmc/convex_polyhedron/N` uint32 NTx1 number :chunk:`state/hpmc/convex_polyhedron/vertices` float sum(N)x3 length :chunk:`state/hpmc/convex_spheropolyhedron/N` uint32 NTx1 number :chunk:`state/hpmc/convex_spheropolyhedron/vertices` float sum(N)x3 length :chunk:`state/hpmc/convex_spheropolyhedron/sweep_radius` float NTx1 length :chunk:`state/hpmc/convex_polygon/N` uint32 NTx1 number :chunk:`state/hpmc/convex_polygon/vertices` float sum(N)x2 length :chunk:`state/hpmc/convex_spheropolygon/N` uint32 NTx1 number :chunk:`state/hpmc/convex_spheropolygon/vertices` float sum(N)x2 length :chunk:`state/hpmc/convex_spheropolygon/sweep_radius` float NTx1 length :chunk:`state/hpmc/simple_polygon/N` uint32 NTx1 number :chunk:`state/hpmc/simple_polygon/vertices` float sum(N)x2 length ========================================================== ====== ========= ================ HPMC integrator state ^^^^^^^^^^^^^^^^^^^^^ *NT* is the number of particle types. .. chunk:: state/hpmc/integrate/d :Type: double :Size: 1x1 :Units: length *d* is the maximum trial move displacement. .. versionadded:: 1.2 .. chunk:: state/hpmc/integrate/a :Type: double :Size: 1x1 :Units: number *a* is the size of the maximum rotation move. .. versionadded:: 1.2 .. chunk:: state/hpmc/sphere/radius :Type: float :Size: NTx1 :Units: length Sphere radius for each particle type. .. versionadded:: 1.2 .. chunk:: state/hpmc/sphere/orientable :Type: uint8 :Size: NTx1 :Units: boolean Orientable flag for each particle type. .. versionadded:: 1.3 .. chunk:: state/hpmc/ellipsoid/a :Type: float :Size: NTx1 :Units: length Size of the first ellipsoid semi-axis for each particle type. .. versionadded:: 1.2 .. chunk:: state/hpmc/ellipsoid/b :Type: float :Size: NTx1 :Units: length Size of the second ellipsoid semi-axis for each particle type. .. versionadded:: 1.2 .. chunk:: state/hpmc/ellipsoid/c :Type: float :Size: NTx1 :Units: length Size of the third ellipsoid semi-axis for each particle type. .. versionadded:: 1.2 .. chunk:: state/hpmc/convex_polyhedron/N :Type: uint32 :Size: NTx1 :Units: number Number of vertices defined for each type. .. versionadded:: 1.2 .. chunk:: state/hpmc/convex_polyhedron/vertices :Type: float :Size: sum(N)x3 :Units: length Position of the vertices in the shape for all types. The shape for type 0 is the first N[0] vertices, the shape for type 1 is the next N[1] vertices, and so on... .. versionadded:: 1.2 .. chunk:: state/hpmc/convex_spheropolyhedron/N :Type: uint32 :Size: NTx1 :Units: number Number of vertices defined for each type. .. versionadded:: 1.2 .. chunk:: state/hpmc/convex_spheropolyhedron/vertices :Type: float :Size: sum(N)x3 :Units: length Position of the vertices in the shape for all types. The shape for type 0 is the first N[0] vertices, the shape for type 1 is the next N[1] vertices, and so on... .. versionadded:: 1.2 .. chunk:: state/hpmc/convex_spheropolyhedron/sweep_radius :Type: float :Size: NTx1 :Units: length Sweep radius for each type. .. versionadded:: 1.2 .. chunk:: state/hpmc/convex_polygon/N :Type: uint32 :Size: NTx1 :Units: number Number of vertices defined for each type. .. versionadded:: 1.2 .. chunk:: state/hpmc/convex_polygon/vertices :Type: float :Size: sum(N)x2 :Units: length Position of the vertices in the shape for all types. The shape for type 0 is the first N[0] vertices, the shape for type 1 is the next N[1] vertices, and so on... .. versionadded:: 1.2 .. chunk:: state/hpmc/convex_spheropolygon/N :Type: uint32 :Size: NTx1 :Units: number Number of vertices defined for each type. .. versionadded:: 1.2 .. chunk:: state/hpmc/convex_spheropolygon/vertices :Type: float :Size: sum(N)x2 :Units: length Position of the vertices in the shape for all types. The shape for type 0 is the first N[0] vertices, the shape for type 1 is the next N[1] vertices, and so on... .. versionadded:: 1.2 .. chunk:: state/hpmc/convex_spheropolygon/sweep_radius :Type: float :Size: NTx1 :Units: length Sweep radius for each type. .. versionadded:: 1.2 .. chunk:: state/hpmc/simple_polygon/N :Type: uint32 :Size: NTx1 :Units: number Number of vertices defined for each type. .. versionadded:: 1.2 .. chunk:: state/hpmc/simple_polygon/vertices :Type: float :Size: sum(N)x2 :Units: length Position of the vertices in the shape for all types. The shape for type 0 is the first N[0] vertices, the shape for type 1 is the next N[1] vertices, and so on... .. versionadded:: 1.2 gsd-3.3.0/doc/shapes.rst000066400000000000000000000115371462564674300151040ustar00rootroot00000000000000.. Copyright (c) 2016-2024 The Regents of the University of Michigan .. Part of GSD, released under the BSD 2-Clause License. .. _shapes: Shape Visualization =================== The chunk :chunk:`particles/type_shapes` stores information about shapes corresponding to particle types. Shape definitions are stored for each type as a UTF-8 encoded JSON string containing key-value pairs. The class of a shape is defined by the ``type`` key. All other keys define properties of that shape. Keys without a default value are required for a valid shape specification. Empty (Undefined) Shape ----------------------- An empty dictionary can be used for undefined shapes. A visualization application may choose how to interpret this, e.g. by drawing nothing or drawing spheres. Example:: {} Spheres ------- Type: ``Sphere`` Spheres' dimensionality (2D circles or 3D spheres) can be inferred from the system box dimensionality. =============== =============== ====== ==== ======= ====== Key Description Type Size Default Units =============== =============== ====== ==== ======= ====== diameter Sphere diameter float 1x1 length =============== =============== ====== ==== ======= ====== Example:: { "type": "Sphere", "diameter": 2.0 } Ellipsoids ---------- Type: ``Ellipsoid`` The ellipsoid class has principal axes a, b, c corresponding to its radii in the x, y, and z directions. =============== ===================== ====== ==== ======= ====== Key Description Type Size Default Units =============== ===================== ====== ==== ======= ====== a Radius in x direction float 1x1 length b Radius in y direction float 1x1 length c Radius in z direction float 1x1 length =============== ===================== ====== ==== ======= ====== Example:: { "type": "Ellipsoid", "a": 7.0, "b": 5.0, "c": 3.0 } Polygons -------- Type: ``Polygon`` A simple polygon with its vertices specified in a counterclockwise order. Spheropolygons can be represented using this shape type, through the ``rounding_radius`` key. =============== =============== ===== ==== ======= ====== Key Description Type Size Default Units =============== =============== ===== ==== ======= ====== rounding_radius Rounding radius float 1x1 0.0 length vertices Shape vertices float Nx2 length =============== =============== ===== ==== ======= ====== Example:: { "type": "Polygon", "rounding_radius": 0.1, "vertices": [[-0.5, -0.5], [0.5, -0.5], [0.5, 0.5]] } Convex Polyhedra ---------------- Type: ``ConvexPolyhedron`` A convex polyhedron with vertices specifying the convex hull of the shape. Spheropolyhedra can be represented using this shape type, through the ``rounding_radius`` key. =============== =============== ===== ==== ======= ====== Key Description Type Size Default Units =============== =============== ===== ==== ======= ====== rounding_radius Rounding radius float 1x1 0.0 length vertices Shape vertices float Nx3 length =============== =============== ===== ==== ======= ====== Example:: { "type": "ConvexPolyhedron", "rounding_radius": 0.1, "vertices": [[0.5, 0.5, 0.5], [0.5, -0.5, -0.5], [-0.5, 0.5, -0.5], [-0.5, -0.5, 0.5]] } General 3D Meshes ----------------- Type: ``Mesh`` A list of lists of indices are used to specify faces. Faces must contain 3 or more vertex indices. The vertex indices must be zero-based. Faces must be defined with a counterclockwise winding order (to produce an "outward" normal). =============== ================ ====== ==== ======= ====== Key Description Type Size Default Units =============== ================ ====== ==== ======= ====== vertices Shape vertices float Nx3 length indices Vertices indices uint32 number =============== ================ ====== ==== ======= ====== Example:: { "type": "Mesh", "vertices": [[0.5, 0.5, 0.5], [0.5, -0.5, -0.5], [-0.5, 0.5, -0.5], [-0.5, -0.5, 0.5]], "indices": [[0, 1, 2], [0, 3, 1], [0, 2, 3], [1, 3, 2]] } Sphere Unions ------------- Type: ``SphereUnion`` A collection of spheres, defined by their diameters and centers. =============== ================ ===== ==== ======= ====== Key Description Type Size Default Units =============== ================ ===== ==== ======= ====== diameters Sphere diameters float Nx1 length centers Sphere centers float Nx3 length =============== ================ ===== ==== ======= ====== Example:: { "type": "SphereUnion", "centers": [[0, 0, 1.0], [0, 1.0, 0], [1.0, 0, 0]], "diameters": [0.5, 0.5, 0.5] } gsd-3.3.0/doc/specification.rst000066400000000000000000000003251462564674300164320ustar00rootroot00000000000000.. Copyright (c) 2016-2024 The Regents of the University of Michigan .. Part of GSD, released under the BSD 2-Clause License. Specification ============= .. toctree:: schema-hoomd shapes file-layer gsd-3.3.0/doc/style.rst000066400000000000000000000067451462564674300147660ustar00rootroot00000000000000.. Copyright (c) 2016-2024 The Regents of the University of Michigan .. Part of GSD, released under the BSD 2-Clause License. Code style ========== All code in GSD must follow a consistent style to ensure readability. We provide configuration files for linters (specified below) so that developers can automatically validate and format files. These tools are configured for use with `pre-commit`_ in ``.pre-commit-config.yaml``. You can install pre-commit hooks to validate your code. Checks will run on pull requests. Run checks manually with:: pre-commit run --all-files .. _pre-commit: https://pre-commit.com/ Python ------ Python code in GSD should follow `PEP8`_ with the formatting performed by `yapf`_ (configuration in ``setup.cfg``). Code should pass all **flake8** tests and formatted by **yapf**. .. _PEP8: https://www.python.org/dev/peps/pep-0008 .. _yapf: https://github.com/google/yapf Tools ^^^^^ * Linter: `flake8 `_ * With these plugins: * `pep8-naming `_ * `flake8-docstrings `_ * `flake8-rst-docstrings `_ * Configure flake8 in your editor to see violations on save. * Autoformatter: `yapf `_ * Run: ``pre-commit run --all-files`` to apply style changes to the whole repository. Documentation ^^^^^^^^^^^^^ Python code should be documented with docstrings and added to the Sphinx documentation index in ``doc/``. Docstrings should follow `Google style`_ formatting for use in `Napoleon`_. .. _Google Style: https://www.sphinx-doc.org/en/master/usage/extensions/example_google.html#example-google .. _Napoleon: https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html C --- * Style is set by clang-format=11 * Whitesmith's indentation style. * 100 character line width. * Indent only with spaces. * 4 spaces per indent level. * See :file:`.clang-format` for the full **clang-format** configuration. * Naming conventions: * Functions: lowercase with words separated by underscores ``function_name``. * Structures: lowercase with words separated by underscores ``struct_name``. * Constants: all upper-case with words separated by underscores ``SOME_CONSTANT``. Tools ^^^^^ * Autoformatter: `clang-format `_. * Linter: `clang-tidy `_ * Compile **GSD** with **CMake** to see **clang-tidy** output. Documentation ^^^^^^^^^^^^^ Documentation comments should be in Javadoc format and precede the item they document for compatibility with Doxygen and most source code editors. Multi-line documentation comment blocks start with ``/**`` and single line ones start with ``///``. See :file:`gsd.h` for an example. Restructured Text/Markdown files -------------------------------- * 80 character line width. * Use spaces to indent. * Indentation levels are set by the respective formats. Other file types ---------------- Use your best judgment and follow existing patterns when styling CMake and other files types. The following general guidelines apply: * 100 character line width. * 4 spaces per indent level. * 4 space indent. Editor configuration -------------------- `Visual Studio Code `_ users: Open the provided workspace file (``gsd.code-workspace``) which provides configuration settings for these style guidelines. gsd-3.3.0/gsd/000077500000000000000000000000001462564674300130705ustar00rootroot00000000000000gsd-3.3.0/gsd/CMakeLists.txt000066400000000000000000000040641462564674300156340ustar00rootroot00000000000000############### cython based modules add_custom_command ( OUTPUT fl.c DEPENDS fl.pyx libgsd.pxd COMMAND ${CYTHON_EXECUTABLE} ARGS -${PYTHON_VERSION_MAJOR} -I ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/fl.pyx -o ${CMAKE_CURRENT_BINARY_DIR}/fl.c COMMENT "Cythonizing fl.pyx" ) set_source_files_properties(gsd.c PROPERTIES COMPILE_DEFINITIONS NO_IMPORT_ARRAY) add_library(gsd_objects OBJECT gsd.c) set_target_properties(gsd_objects PROPERTIES POSITION_INDEPENDENT_CODE TRUE) if (CLANG_TIDY_EXE) set_target_properties(gsd_objects PROPERTIES C_CLANG_TIDY "${DO_CLANG_TIDY}") endif() add_library(fl SHARED fl.c gsd.c) target_compile_definitions(fl PRIVATE NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION) set_target_properties(fl PROPERTIES PREFIX "" OUTPUT_NAME "fl" MACOSX_RPATH "On") if(APPLE) set_target_properties(fl PROPERTIES SUFFIX ".so") endif(APPLE) if (WIN32) set_target_properties(fl PROPERTIES SUFFIX ".pyd") endif() if(WIN32) # Link to the Python libraries on windows target_link_libraries(fl ${PYTHON_LIBRARY}) else() # Do not link to the Python libraries on Mac/Linux - symbols are provided by # the `python` executable. "-undefined dynamic_lookup" is needed on Mac target_link_options( fl PRIVATE "$<$:LINKER:-undefined,dynamic_lookup>") endif() ################ Python only modules # copy python modules to the build directory to make it a working python package MACRO(copy_file file) add_custom_command ( OUTPUT ${file} DEPENDS ${file} POST_BUILD COMMAND ${CMAKE_COMMAND} ARGS -E copy ${CMAKE_CURRENT_SOURCE_DIR}/${file} ${CMAKE_CURRENT_BINARY_DIR}/${file} COMMENT "Copy gsd/${file}" ) ENDMACRO(copy_file) set(files __init__.py __main__.py hoomd.py pygsd.py version.py conftest.py pytest_plugin_validate.py) foreach(file ${files}) copy_file(${file}) endforeach() add_custom_target(copy_gsd ALL DEPENDS ${files}) add_subdirectory(test) gsd-3.3.0/gsd/__init__.py000066400000000000000000000014311462564674300152000ustar00rootroot00000000000000# Copyright (c) 2016-2024 The Regents of the University of Michigan # Part of GSD, released under the BSD 2-Clause License. """The GSD main module. The main package :py:mod:`gsd` is the root package. It holds the submodules :py:mod:`gsd.fl` and :py:mod:`gsd.hoomd`, but does not import them by default. You must explicitly import these modules before use:: import gsd.fl import gsd.hoomd """ import signal import sys from . import version # Install a SIGTERM handler that gracefully exits, allowing open files to flush # buffered writes and close. Catch ValueError and pass as there is no way to # determine if this is the main interpreter running the main thread prior to # the call. try: signal.signal(signal.SIGTERM, lambda n, f: sys.exit(1)) except ValueError: pass gsd-3.3.0/gsd/__main__.py000066400000000000000000000113351462564674300151650ustar00rootroot00000000000000# Copyright (c) 2016-2024 The Regents of the University of Michigan # Part of GSD, released under the BSD 2-Clause License. """The GSD command line interface. To simplify ad hoc usage of :py:mod:`gsd`, this module provides a command line interface for interacting with GSD files. The primary entry point is a single command for starting a Python interpreter with a GSD file pre-loaded:: $ gsd read trajectory.gsd The following options are available for the ``read`` subcommand: .. program:: read .. option:: -s schema, --schema schema The schema of the GSD file. Supported values for ``schema`` are "hoomd" and "none". .. option:: -m mode, --mode mode The mode in which to open the file. Valid modes are identical to those accepted by :func:`gsd.fl.open`. """ import argparse import code import sys from . import fl, version from .hoomd import open as hoomd_open def _print_err(msg=None, *args): print(msg, *args, file=sys.stderr) SHELL_BANNER = """Python {python_version} gsd {gsd_version} File: {fn} {extras} The GSD file handle is available via the "handle" variable. For supported schema, you may access the trajectory using the "traj" variable. Type "help(handle)" or "help(traj)" for more information. The gsd and gsd.fl packages are always loaded. Schema-specific modules (e.g. gsd.hoomd) are loaded if available.""" def main_read(args): """Main function to launch a Python interpreter with an open GSD file.""" # Default to a new line for well-formatted printing. local_ns = { 'gsd': sys.modules['gsd'], 'gsd.hoomd': sys.modules['gsd.hoomd'], 'gsd.fl': sys.modules['gsd.fl'], } attributes = {} if args.schema == 'hoomd': traj = hoomd_open(args.file, mode=args.mode) handle = traj.file local_ns.update( { 'handle': handle, 'traj': traj, } ) attributes.update({'Number of frames': len(traj)}) else: if args.mode not in ['rb', 'rb+', 'ab', 'a', 'r', 'r+']: msg = 'Unsupported schema for creating a file.' raise ValueError(msg) handle = fl.open(args.file, args.mode) local_ns.update( { 'handle': handle, } ) extras = '\n'.join(f'{key}: {val}' for key, val in attributes.items()) code.interact( local=local_ns, banner=SHELL_BANNER.format( python_version=sys.version, gsd_version=version.version, fn=args.file, extras=extras + '\n', ), ) def main(): """Entry point to the GSD command-line interface. This function handles parsing command-line arguments and launching the appropriate subcommand based on the first argument to ``gsd`` on the command line. At present the following commands are supported: * read """ parser = argparse.ArgumentParser( description='The gsd package encodes canonical readers and writers ' 'for the gsd file format.' ) parser.add_argument( '--version', action='store_true', help='Display the version number and exit.' ) parser.add_argument( '--debug', action='store_true', help='Show traceback on error for debugging.' ) subparsers = parser.add_subparsers() parser_read = subparsers.add_parser('read') parser_read.add_argument('file', type=str, nargs='?', help='GSD file to read.') parser_read.add_argument( '-s', '--schema', type=str, default='hoomd', choices=['hoomd', 'none'], help='The file schema.', ) parser_read.add_argument( '-m', '--mode', type=str, default='r', choices=[ 'w', 'r', 'r+', 'x', 'a', ], help='The file mode.', ) parser_read.set_defaults(func=main_read) # This is a hack, as argparse itself does not # allow to parse only --version without any # of the other required arguments. if '--version' in sys.argv: print('gsd', version.version) sys.exit(0) args = parser.parse_args() if not hasattr(args, 'func'): parser.print_usage() sys.exit(2) try: args.func(args) except KeyboardInterrupt: _print_err() _print_err('Interrupted.') if args.debug: raise sys.exit(1) except RuntimeWarning as warning: _print_err(f'Warning: {warning}') if args.debug: raise sys.exit(1) except Exception as error: _print_err(f'Error: {error}') if args.debug: raise sys.exit(1) else: sys.exit(0) if __name__ == '__main__': main() gsd-3.3.0/gsd/conftest.py000066400000000000000000000003141462564674300152650ustar00rootroot00000000000000# Copyright (c) 2016-2024 The Regents of the University of Michigan # Part of GSD, released under the BSD 2-Clause License. """Global pytest options.""" pytest_plugins = ('gsd.pytest_plugin_validate',) gsd-3.3.0/gsd/fl.pyx000066400000000000000000001103561462564674300142410ustar00rootroot00000000000000# Copyright (c) 2016-2024 The Regents of the University of Michigan # Part of GSD, released under the BSD 2-Clause License. """GSD file layer API. Low level access to gsd files. :py:mod:`gsd.fl` allows direct access to create, read, and write ``gsd`` files. The module is implemented in C and is optimized. See :ref:`fl-examples` for detailed example code. * :py:class:`GSDFile` - Class interface to read and write gsd files. * :py:func:`open` - Open a gsd file. """ import logging import numpy import os from pickle import PickleError import warnings from libc.stdint cimport uint8_t, int8_t, uint16_t, int16_t, uint32_t, int32_t,\ uint64_t, int64_t from libc.errno cimport errno cimport gsd.libgsd as libgsd cimport numpy logger = logging.getLogger('gsd.fl') #################### # Helper functions # cdef __format_errno(fname): """Return a tuple for constructing an IOError.""" return (errno, os.strerror(errno), fname) cdef __raise_on_error(retval, extra): """Raise the appropriate error type. Args: retval: Return value from a gsd C API call extra: Extra string to pass along with the exception """ if retval == libgsd.GSD_ERROR_IO: raise IOError(*__format_errno(extra)) elif retval == libgsd.GSD_ERROR_NOT_A_GSD_FILE: raise RuntimeError("Not a GSD file: " + extra) elif retval == libgsd.GSD_ERROR_INVALID_GSD_FILE_VERSION: raise RuntimeError("Unsupported GSD file version: " + extra) elif retval == libgsd.GSD_ERROR_FILE_CORRUPT: raise RuntimeError("Corrupt GSD file: " + extra) elif retval == libgsd.GSD_ERROR_MEMORY_ALLOCATION_FAILED: raise MemoryError("Memory allocation failed: " + extra) elif retval == libgsd.GSD_ERROR_NAMELIST_FULL: raise RuntimeError("GSD namelist is full: " + extra) elif retval == libgsd.GSD_ERROR_FILE_MUST_BE_WRITABLE: raise RuntimeError("File must be writable: " + extra) elif retval == libgsd.GSD_ERROR_FILE_MUST_BE_READABLE: raise RuntimeError("File must be readable: " + extra) elif retval == libgsd.GSD_ERROR_INVALID_ARGUMENT: raise RuntimeError("Invalid gsd argument: " + extra) elif retval != 0: raise RuntimeError("Unknown error: " + extra) # Getter methods for 2D numpy arrays of all supported types # cython needs strongly typed numpy arrays to get a void * # to the data, so we implement each by hand here and dispacth # from the read and write routines cdef void * __get_ptr_uint8(data): cdef numpy.ndarray[uint8_t, ndim=2, mode="c"] data_array_uint8 data_array_uint8 = data if (data.size == 0): return NULL else: return &data_array_uint8[0, 0] cdef void * __get_ptr_uint16(data): cdef numpy.ndarray[uint16_t, ndim=2, mode="c"] data_array_uint16 data_array_uint16 = data if (data.size == 0): return NULL else: return &data_array_uint16[0, 0] cdef void * __get_ptr_uint32(data): cdef numpy.ndarray[uint32_t, ndim=2, mode="c"] data_array_uint32 data_array_uint32 = data if (data.size == 0): return NULL else: return &data_array_uint32[0, 0] cdef void * __get_ptr_uint64(data): cdef numpy.ndarray[uint64_t, ndim=2, mode="c"] data_array_uint64 data_array_uint64 = data if (data.size == 0): return NULL else: return &data_array_uint64[0, 0] cdef void * __get_ptr_int8(data): cdef numpy.ndarray[int8_t, ndim=2, mode="c"] data_array_int8 data_array_int8 = data if (data.size == 0): return NULL else: return &data_array_int8[0, 0] cdef void * __get_ptr_int16(data): cdef numpy.ndarray[int16_t, ndim=2, mode="c"] data_array_int16 data_array_int16 = data if (data.size == 0): return NULL else: return &data_array_int16[0, 0] cdef void * __get_ptr_int32(data): cdef numpy.ndarray[int32_t, ndim=2, mode="c"] data_array_int32 data_array_int32 = data if (data.size == 0): return NULL else: return &data_array_int32[0, 0] cdef void * __get_ptr_int64(data): cdef numpy.ndarray[int64_t, ndim=2, mode="c"] data_array_int64 data_array_int64 = data if (data.size == 0): return NULL else: return &data_array_int64[0, 0] cdef void * __get_ptr_float32(data): cdef numpy.ndarray[float, ndim=2, mode="c"] data_array_float32 data_array_float32 = data if (data.size == 0): return NULL else: return &data_array_float32[0, 0] cdef void * __get_ptr_float64(data): cdef numpy.ndarray[double, ndim=2, mode="c"] data_array_float64 data_array_float64 = data if (data.size == 0): return NULL else: return &data_array_float64[0, 0] def open(name, mode, application=None, schema=None, schema_version=None): """open(name, mode, application=None, schema=None, schema_version=None) :py:func:`open` opens a GSD file and returns a :py:class:`GSDFile` instance. The return value of :py:func:`open` can be used as a context manager. Args: name (str): File name to open. mode (str): File access mode. application (str): Name of the application creating the file. schema (str): Name of the data schema. schema_version (tuple[int, int]): Schema version number (major, minor). Valid values for ``mode``: +------------------+---------------------------------------------+ | mode | description | +==================+=============================================+ | ``'r'`` | Open an existing file for reading. | +------------------+---------------------------------------------+ | ``'r+'`` | Open an existing file for reading and | | | writing. | +------------------+---------------------------------------------+ | ``'w'`` | Open a file for reading and writing. | | | Creates the file if needed, or overwrites | | | an existing file. | +------------------+---------------------------------------------+ | ``'x'`` | Create a gsd file exclusively and opens it | | | for reading and writing. | | | Raise :py:exc:`FileExistsError` | | | if it already exists. | +------------------+---------------------------------------------+ | ``'a'`` | Open a file for reading and writing. | | | Creates the file if it doesn't exist. | +------------------+---------------------------------------------+ When opening a file for reading (``'r'`` and ``'r+'`` modes): ``application`` and ``schema_version`` are ignored and may be ``None``. When ``schema`` is not ``None``, :py:func:`open` throws an exception if the file's schema does not match ``schema``. When opening a file for writing (``'w'``, ``'x'``, or ``'a'`` modes): The given ``application``, ``schema``, and ``schema_version`` must not be None. Example: .. ipython:: python with gsd.fl.open(name='file.gsd', mode='w', application="My application", schema="My Schema", schema_version=[1,0]) as f: f.write_chunk(name='chunk1', data=numpy.array([1,2,3,4], dtype=numpy.float32)) f.write_chunk(name='chunk2', data=numpy.array([[5,6],[7,8]], dtype=numpy.float32)) f.end_frame() f.write_chunk(name='chunk1', data=numpy.array([9,10,11,12], dtype=numpy.float32)) f.write_chunk(name='chunk2', data=numpy.array([[13,14],[15,16]], dtype=numpy.float32)) f.end_frame() f = gsd.fl.open(name='file.gsd', mode='r') if f.chunk_exists(frame=0, name='chunk1'): data = f.read_chunk(frame=0, name='chunk1') data f.close() """ return GSDFile(str(name), mode, application, schema, schema_version) cdef class GSDFile: """GSDFile GSD file access interface. Args: name (str): Name of the open file. mode (str): Mode of the open file. gsd_version (tuple[int, int]): GSD file layer version number (major, minor). application (str): Name of the generating application. schema (str): Name of the data schema. schema_version (tuple[int, int]): Schema version number (major, minor). nframes (int): Number of frames. :py:class:`GSDFile` implements an object oriented class interface to the GSD file layer. Use :py:func:`open` to open a GSD file and obtain a :py:class:`GSDFile` instance. :py:class:`GSDFile` can be used as a context manager. Attributes: name (str): Name of the open file. mode (str): Mode of the open file. gsd_version (tuple[int, int]): GSD file layer version number (major, minor). application (str): Name of the generating application. schema (str): Name of the data schema. schema_version (tuple[int, int]): Schema version number (major, minor). nframes (int): Number of frames. maximum_write_buffer_size (int): The Maximum write buffer size (bytes). index_entries_to_buffer (int): Number of index entries to buffer before flushing. """ cdef libgsd.gsd_handle __handle cdef bint __is_open cdef str mode cdef str name def __init__(self, name, mode, application, schema, schema_version): cdef libgsd.gsd_open_flag c_flags cdef int exclusive_create = 0 cdef int overwrite = 0 self.mode = mode if mode == 'w': c_flags = libgsd.GSD_OPEN_READWRITE overwrite = 1 elif mode == 'r': c_flags = libgsd.GSD_OPEN_READONLY elif mode == 'r+': c_flags = libgsd.GSD_OPEN_READWRITE elif mode == 'x': c_flags = libgsd.GSD_OPEN_READWRITE overwrite = 1 exclusive_create = 1 elif mode == 'a': c_flags = libgsd.GSD_OPEN_READWRITE if not os.path.exists(name): overwrite = 1 else: raise ValueError("Invalid mode: " + mode) self.name = name cdef char * c_name cdef char * c_application cdef char * c_schema cdef int _c_schema_version cdef str schema_truncated if overwrite: if application is None: raise ValueError("Provide application when creating a file") if schema is None: raise ValueError("Provide schema when creating a file") if schema_version is None: raise ValueError("Provide schema_version when creating a file") # create a new file or overwrite an existing one logger.info('overwriting file: ' + name + ' with mode: ' + mode + ', application: ' + application + ', schema: ' + schema + ', and schema_version: ' + str(schema_version)) name_e = name.encode('utf-8') c_name = name_e application_e = application.encode('utf-8') c_application = application_e schema_e = schema.encode('utf-8') c_schema = schema_e _c_schema_version = libgsd.gsd_make_version(schema_version[0], schema_version[1]) with nogil: retval = libgsd.gsd_create_and_open(&self.__handle, c_name, c_application, c_schema, _c_schema_version, c_flags, exclusive_create) else: # open an existing file logger.info('opening file: ' + name + ' with mode: ' + mode) name_e = name.encode('utf-8') c_name = name_e with nogil: retval = libgsd.gsd_open(&self.__handle, c_name, c_flags) __raise_on_error(retval, name) # validate schema if schema is not None: schema_truncated = schema if len(schema_truncated) > 64: schema_truncated = schema_truncated[0:63] if self.schema != schema_truncated: raise RuntimeError('file ' + name + ' has incorrect schema: ' + self.schema) self.__is_open = True def close(self): """close() Close the file. Once closed, any other operation on the file object will result in a `ValueError`. :py:meth:`close()` may be called more than once. The file is automatically closed when garbage collected or when the context manager exits. Example: .. ipython:: python :okexcept: f = gsd.fl.open(name='file.gsd', mode='w', application="My application", schema="My Schema", schema_version=[1,0]) f.write_chunk(name='chunk1', data=numpy.array([1,2,3,4], dtype=numpy.float32)) f.end_frame() data = f.read_chunk(frame=0, name='chunk1') f.close() # Read fails because the file is closed data = f.read_chunk(frame=0, name='chunk1') """ if self.__is_open: logger.info('closing file: ' + self.name) with nogil: retval = libgsd.gsd_close(&self.__handle) self.__is_open = False __raise_on_error(retval, self.name) def truncate(self): """truncate() Truncate all data from the file. After truncation, the file has no frames and no data chunks. The application, schema, and schema version remain the same. Example: .. ipython:: python with gsd.fl.open(name='file.gsd', mode='w', application="My application", schema="My Schema", schema_version=[1,0]) as f: for i in range(10): f.write_chunk(name='chunk1', data=numpy.array([1,2,3,4], dtype=numpy.float32)) f.end_frame() f = gsd.fl.open(name='file.gsd', mode='r+', application="My application", schema="My Schema", schema_version=[1,0]) f.nframes f.schema, f.schema_version, f.application f.truncate() f.nframes f.schema, f.schema_version, f.application f.close() """ if not self.__is_open: raise ValueError("File is not open") logger.info('truncating file: ' + self.name) with nogil: retval = libgsd.gsd_truncate(&self.__handle) __raise_on_error(retval, self.name) def end_frame(self): """end_frame() Complete writing the current frame. After calling :py:meth:`end_frame()` future calls to :py:meth:`write_chunk()` will write to the **next** frame in the file. .. danger:: Call :py:meth:`end_frame()` to complete the current frame **before** closing the file. If you fail to call :py:meth:`end_frame()`, the last frame will not be written to disk. Example: .. ipython:: python f = gsd.fl.open(name='file.gsd', mode='w', application="My application", schema="My Schema", schema_version=[1,0]) f.write_chunk(name='chunk1', data=numpy.array([1,2,3,4], dtype=numpy.float32)) f.end_frame() f.write_chunk(name='chunk1', data=numpy.array([9,10,11,12], dtype=numpy.float32)) f.end_frame() f.write_chunk(name='chunk1', data=numpy.array([13,14], dtype=numpy.float32)) f.end_frame() f.nframes f.close() """ if not self.__is_open: raise ValueError("File is not open") logger.debug('end frame: ' + self.name) with nogil: retval = libgsd.gsd_end_frame(&self.__handle) __raise_on_error(retval, self.name) def flush(self): """flush() Flush all buffered frames to the file. """ if not self.__is_open: raise ValueError("File is not open") logger.debug('flush: ' + self.name) with nogil: retval = libgsd.gsd_flush(&self.__handle) __raise_on_error(retval, self.name) def write_chunk(self, name, data): """write_chunk(name, data) Write a data chunk to the file. After writing all chunks in the current frame, call :py:meth:`end_frame()`. Args: name (str): Name of the chunk data: Data to write into the chunk. Must be a numpy array, or array-like, with 2 or fewer dimensions. Warning: :py:meth:`write_chunk()` will implicitly converts array-like and non-contiguous numpy arrays to contiguous numpy arrays with ``numpy.ascontiguousarray(data)``. This may or may not produce desired data types in the output file and incurs overhead. Example: .. ipython:: python f = gsd.fl.open(name='file.gsd', mode='w', application="My application", schema="My Schema", schema_version=[1,0]) f.write_chunk(name='float1d', data=numpy.array([1,2,3,4], dtype=numpy.float32)) f.write_chunk(name='float2d', data=numpy.array([[13,14],[15,16],[17,19]], dtype=numpy.float32)) f.write_chunk(name='double2d', data=numpy.array([[1,4],[5,6],[7,9]], dtype=numpy.float64)) f.write_chunk(name='int1d', data=numpy.array([70,80,90], dtype=numpy.int64)) f.end_frame() f.nframes f.close() """ if not self.__is_open: raise ValueError("File is not open") data_array = numpy.ascontiguousarray(data) if data_array is not data: logger.warning('implicit data copy when writing chunk: ' + name) data_array = data_array.view() cdef uint64_t N cdef uint32_t M if len(data_array.shape) > 2: raise ValueError("GSD can only write 1 or 2 dimensional arrays: " + name) if len(data_array.shape) == 1: data_array = data_array.reshape([data_array.shape[0], 1]) N = data_array.shape[0] M = data_array.shape[1] cdef libgsd.gsd_type gsd_type cdef void *data_ptr if data_array.dtype == numpy.uint8: gsd_type = libgsd.GSD_TYPE_UINT8 data_ptr = __get_ptr_uint8(data_array) elif data_array.dtype == numpy.uint16: gsd_type = libgsd.GSD_TYPE_UINT16 data_ptr = __get_ptr_uint16(data_array) elif data_array.dtype == numpy.uint32: gsd_type = libgsd.GSD_TYPE_UINT32 data_ptr = __get_ptr_uint32(data_array) elif data_array.dtype == numpy.uint64: gsd_type = libgsd.GSD_TYPE_UINT64 data_ptr = __get_ptr_uint64(data_array) elif data_array.dtype == numpy.int8: gsd_type = libgsd.GSD_TYPE_INT8 data_ptr = __get_ptr_int8(data_array) elif data_array.dtype == numpy.int16: gsd_type = libgsd.GSD_TYPE_INT16 data_ptr = __get_ptr_int16(data_array) elif data_array.dtype == numpy.int32: gsd_type = libgsd.GSD_TYPE_INT32 data_ptr = __get_ptr_int32(data_array) elif data_array.dtype == numpy.int64: gsd_type = libgsd.GSD_TYPE_INT64 data_ptr = __get_ptr_int64(data_array) elif data_array.dtype == numpy.float32: gsd_type = libgsd.GSD_TYPE_FLOAT data_ptr = __get_ptr_float32(data_array) elif data_array.dtype == numpy.float64: gsd_type = libgsd.GSD_TYPE_DOUBLE data_ptr = __get_ptr_float64(data_array) else: raise ValueError("invalid type for chunk: " + name) logger.debug('write chunk: ' + self.name + ' - ' + name) cdef char * c_name name_e = name.encode('utf-8') c_name = name_e with nogil: retval = libgsd.gsd_write_chunk(&self.__handle, c_name, gsd_type, N, M, 0, data_ptr) __raise_on_error(retval, self.name) def chunk_exists(self, frame, name): """chunk_exists(frame, name) Test if a chunk exists. Args: frame (int): Index of the frame to check name (str): Name of the chunk Returns: bool: ``True`` if the chunk exists in the file at the given frame.\ ``False`` if it does not. Example: .. ipython:: python with gsd.fl.open(name='file.gsd', mode='w', application="My application", schema="My Schema", schema_version=[1,0]) as f: f.write_chunk(name='chunk1', data=numpy.array([1,2,3,4], dtype=numpy.float32)) f.write_chunk(name='chunk2', data=numpy.array([[5,6],[7,8]], dtype=numpy.float32)) f.end_frame() f.write_chunk(name='chunk1', data=numpy.array([9,10,11,12], dtype=numpy.float32)) f.write_chunk(name='chunk2', data=numpy.array([[13,14],[15,16]], dtype=numpy.float32)) f.end_frame() f = gsd.fl.open(name='file.gsd', mode='r', application="My application", schema="My Schema", schema_version=[1,0]) f.chunk_exists(frame=0, name='chunk1') f.chunk_exists(frame=0, name='chunk2') f.chunk_exists(frame=0, name='chunk3') f.chunk_exists(frame=10, name='chunk1') f.close() """ cdef const libgsd.gsd_index_entry* index_entry cdef char * c_name name_e = name.encode('utf-8') c_name = name_e cdef int64_t c_frame c_frame = frame logger.debug('chunk exists: ' + self.name + ' - ' + name) with nogil: index_entry = libgsd.gsd_find_chunk(&self.__handle, c_frame, c_name) return index_entry != NULL def read_chunk(self, frame, name): """read_chunk(frame, name) Read a data chunk from the file and return it as a numpy array. Args: frame (int): Index of the frame to read name (str): Name of the chunk Returns: ``(N,M)`` or ``(N,)`` `numpy.ndarray` of ``type``: Data read from file. ``N``, ``M``, and ``type`` are determined by the chunk metadata. If the data is NxM in the file and M > 1, return a 2D array. If the data is Nx1, return a 1D array. .. tip:: Each call invokes a disk read and allocation of a new numpy array for storage. To avoid overhead, call :py:meth:`read_chunk()` on the same chunk only once. Example: .. ipython:: python :okexcept: with gsd.fl.open(name='file.gsd', mode='w', application="My application", schema="My Schema", schema_version=[1,0]) as f: f.write_chunk(name='chunk1', data=numpy.array([1,2,3,4], dtype=numpy.float32)) f.write_chunk(name='chunk2', data=numpy.array([[5,6],[7,8]], dtype=numpy.float32)) f.end_frame() f.write_chunk(name='chunk1', data=numpy.array([9,10,11,12], dtype=numpy.float32)) f.write_chunk(name='chunk2', data=numpy.array([[13,14],[15,16]], dtype=numpy.float32)) f.end_frame() f = gsd.fl.open(name='file.gsd', mode='r', application="My application", schema="My Schema", schema_version=[1,0]) f.read_chunk(frame=0, name='chunk1') f.read_chunk(frame=1, name='chunk1') f.read_chunk(frame=2, name='chunk1') f.close() """ if not self.__is_open: raise ValueError("File is not open") cdef const libgsd.gsd_index_entry* index_entry cdef char * c_name name_e = name.encode('utf-8') c_name = name_e cdef int64_t c_frame c_frame = frame with nogil: index_entry = libgsd.gsd_find_chunk(&self.__handle, c_frame, c_name) if index_entry == NULL: raise KeyError("frame " + str(frame) + " / chunk " + name + " not found in: " + self.name) cdef libgsd.gsd_type gsd_type gsd_type = index_entry.type cdef void *data_ptr if gsd_type == libgsd.GSD_TYPE_UINT8: data_array = numpy.empty(dtype=numpy.uint8, shape=[index_entry.N, index_entry.M]) elif gsd_type == libgsd.GSD_TYPE_UINT16: data_array = numpy.empty(dtype=numpy.uint16, shape=[index_entry.N, index_entry.M]) elif gsd_type == libgsd.GSD_TYPE_UINT32: data_array = numpy.empty(dtype=numpy.uint32, shape=[index_entry.N, index_entry.M]) elif gsd_type == libgsd.GSD_TYPE_UINT64: data_array = numpy.empty(dtype=numpy.uint64, shape=[index_entry.N, index_entry.M]) elif gsd_type == libgsd.GSD_TYPE_INT8: data_array = numpy.empty(dtype=numpy.int8, shape=[index_entry.N, index_entry.M]) elif gsd_type == libgsd.GSD_TYPE_INT16: data_array = numpy.empty(dtype=numpy.int16, shape=[index_entry.N, index_entry.M]) elif gsd_type == libgsd.GSD_TYPE_INT32: data_array = numpy.empty(dtype=numpy.int32, shape=[index_entry.N, index_entry.M]) elif gsd_type == libgsd.GSD_TYPE_INT64: data_array = numpy.empty(dtype=numpy.int64, shape=[index_entry.N, index_entry.M]) elif gsd_type == libgsd.GSD_TYPE_FLOAT: data_array = numpy.empty(dtype=numpy.float32, shape=[index_entry.N, index_entry.M]) elif gsd_type == libgsd.GSD_TYPE_DOUBLE: data_array = numpy.empty(dtype=numpy.float64, shape=[index_entry.N, index_entry.M]) else: raise ValueError("invalid type for chunk: " + name) logger.debug('read chunk: ' + self.name + ' - ' + str(frame) + ' - ' + name) # only read chunk if we have data if index_entry.N != 0 and index_entry.M != 0: if gsd_type == libgsd.GSD_TYPE_UINT8: data_ptr = __get_ptr_uint8(data_array) elif gsd_type == libgsd.GSD_TYPE_UINT16: data_ptr = __get_ptr_uint16(data_array) elif gsd_type == libgsd.GSD_TYPE_UINT32: data_ptr = __get_ptr_uint32(data_array) elif gsd_type == libgsd.GSD_TYPE_UINT64: data_ptr = __get_ptr_uint64(data_array) elif gsd_type == libgsd.GSD_TYPE_INT8: data_ptr = __get_ptr_int8(data_array) elif gsd_type == libgsd.GSD_TYPE_INT16: data_ptr = __get_ptr_int16(data_array) elif gsd_type == libgsd.GSD_TYPE_INT32: data_ptr = __get_ptr_int32(data_array) elif gsd_type == libgsd.GSD_TYPE_INT64: data_ptr = __get_ptr_int64(data_array) elif gsd_type == libgsd.GSD_TYPE_FLOAT: data_ptr = __get_ptr_float32(data_array) elif gsd_type == libgsd.GSD_TYPE_DOUBLE: data_ptr = __get_ptr_float64(data_array) else: raise ValueError("invalid type for chunk: " + name) with nogil: retval = libgsd.gsd_read_chunk(&self.__handle, data_ptr, index_entry) __raise_on_error(retval, self.name) if index_entry.M == 1: return data_array.reshape([index_entry.N]) else: return data_array def find_matching_chunk_names(self, match): """find_matching_chunk_names(match) Find all the chunk names in the file that start with the string *match*. Args: match (str): Start of the chunk name to match Returns: list[str]: Matching chunk names Example: .. ipython:: python with gsd.fl.open(name='file.gsd', mode='w', application="My application", schema="My Schema", schema_version=[1,0]) as f: f.write_chunk(name='data/chunk1', data=numpy.array([1,2,3,4], dtype=numpy.float32)) f.write_chunk(name='data/chunk2', data=numpy.array([[5,6],[7,8]], dtype=numpy.float32)) f.write_chunk(name='input/chunk3', data=numpy.array([9, 10], dtype=numpy.float32)) f.end_frame() f.write_chunk(name='input/chunk4', data=numpy.array([11, 12, 13, 14], dtype=numpy.float32)) f.end_frame() f = gsd.fl.open(name='file.gsd', mode='r', application="My application", schema="My Schema", schema_version=[1,0]) f.find_matching_chunk_names('') f.find_matching_chunk_names('data') f.find_matching_chunk_names('input') f.find_matching_chunk_names('other') f.close() """ if not self.__is_open: raise ValueError("File is not open") cdef const char * c_found cdef char * c_match match_e = match.encode('utf-8') c_match = match_e retval = [] with nogil: c_found = libgsd.gsd_find_matching_chunk_name(&self.__handle, c_match, NULL) while c_found != NULL: retval.append(c_found.decode('utf-8')) with nogil: c_found = libgsd.gsd_find_matching_chunk_name(&self.__handle, c_match, c_found) return retval def upgrade(self): """upgrade() Upgrade a GSD file to the v2 specification in place. The file must be open in a writable mode. """ if not self.__is_open: raise ValueError("File is not open") logger.info('upgrading file: ' + self.name) with nogil: retval = libgsd.gsd_upgrade(&self.__handle) __raise_on_error(retval, self.name) def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): self.close() def __reduce__(self): """Allows filehandles to be pickled when in read only mode.""" if self.mode not in ['rb', 'r']: raise PickleError("Only read only GSDFiles can be pickled.") return (GSDFile, (self.name, self.mode, self.application, self.schema, self.schema_version), ) property name: def __get__(self): return self.name property mode: def __get__(self): return self.mode property gsd_version: def __get__(self): cdef uint32_t v = self.__handle.header.gsd_version return (v >> 16, v & 0xffff) property schema_version: def __get__(self): cdef uint32_t v = self.__handle.header.schema_version return (v >> 16, v & 0xffff) property schema: def __get__(self): return self.__handle.header.schema.decode('utf-8') property application: def __get__(self): return self.__handle.header.application.decode('utf-8') property nframes: def __get__(self): if not self.__is_open: raise ValueError("File is not open") return libgsd.gsd_get_nframes(&self.__handle) property maximum_write_buffer_size: def __get__(self): if not self.__is_open: raise ValueError("File is not open") return libgsd.gsd_get_maximum_write_buffer_size(&self.__handle) def __set__(self, size): if not self.__is_open: raise ValueError("File is not open") retval = libgsd.gsd_set_maximum_write_buffer_size(&self.__handle, size) __raise_on_error(retval, self.name) property index_entries_to_buffer: def __get__(self): if not self.__is_open: raise ValueError("File is not open") return libgsd.gsd_get_index_entries_to_buffer(&self.__handle) def __set__(self, number): if not self.__is_open: raise ValueError("File is not open") retval = libgsd.gsd_set_index_entries_to_buffer(&self.__handle, number) __raise_on_error(retval, self.name) def __dealloc__(self): if self.__is_open: logger.info('closing file: ' + self.name) libgsd.gsd_close(&self.__handle) self.__is_open = False gsd-3.3.0/gsd/gsd.c000066400000000000000000002204161462564674300140160ustar00rootroot00000000000000// Copyright (c) 2016-2024 The Regents of the University of Michigan // Part of GSD, released under the BSD 2-Clause License. #include #ifdef _WIN32 #pragma warning(push) #pragma warning(disable : 4996) #define GSD_USE_MMAP 0 #define WIN32_LEAN_AND_MEAN #include #include #else // linux / mac #define _XOPEN_SOURCE 500 #include #include #define GSD_USE_MMAP 1 #endif #ifdef __APPLE__ #include #endif #include #include #include #include #include #include "gsd.h" /** @file gsd.c @brief Implements the GSD C API */ /// Magic value identifying a GSD file const uint64_t GSD_MAGIC_ID = 0x65DF65DF65DF65DF; /// Initial index size enum { GSD_INITIAL_INDEX_SIZE = 128 }; /// Initial namelist size enum { GSD_INITIAL_NAME_BUFFER_SIZE = 1024 }; /// Size of initial frame index enum { GSD_INITIAL_FRAME_INDEX_SIZE = 16 }; /// Initial size of write buffer enum { GSD_INITIAL_WRITE_BUFFER_SIZE = 1024 }; /// Default maximum size of write buffer enum { GSD_DEFAULT_MAXIMUM_WRITE_BUFFER_SIZE = 64 * 1024 * 1024 }; /// Default number of index entries to buffer enum { GSD_DEFAULT_INDEX_ENTRIES_TO_BUFFER = 256 * 1024 }; /// Size of hash map enum { GSD_NAME_MAP_SIZE = 57557 }; /// Current GSD file specification enum { GSD_CURRENT_FILE_VERSION = 2 }; // define windows wrapper functions #ifdef _WIN32 #define lseek _lseeki64 #define ftruncate _chsize #define fsync _commit typedef int64_t ssize_t; int S_IRUSR = _S_IREAD; int S_IWUSR = _S_IWRITE; int S_IRGRP = _S_IREAD; int S_IWGRP = _S_IWRITE; inline ssize_t pread(int fd, void* buf, size_t count, int64_t offset) { // Note: _read only accepts unsigned int values if (count > UINT_MAX) return GSD_ERROR_IO; int64_t oldpos = _telli64(fd); _lseeki64(fd, offset, SEEK_SET); ssize_t result = _read(fd, buf, (unsigned int)count); _lseeki64(fd, oldpos, SEEK_SET); return result; } inline ssize_t pwrite(int fd, const void* buf, size_t count, int64_t offset) { // Note: _write only accepts unsigned int values if (count > UINT_MAX) return GSD_ERROR_IO; int64_t oldpos = _telli64(fd); _lseeki64(fd, offset, SEEK_SET); ssize_t result = _write(fd, buf, (unsigned int)count); _lseeki64(fd, oldpos, SEEK_SET); return result; } #endif /** Zero memory @param d pointer to memory region @param size_to_zero size of the area to zero in bytes */ inline static void gsd_util_zero_memory(void* d, size_t size_to_zero) { memset(d, 0, size_to_zero); } /** @internal @brief Write large data buffer to file The system call pwrite() fails to write very large data buffers. This method calls pwrite() as many times as necessary to completely write a large buffer. @param fd File descriptor. @param buf Data buffer. @param count Number of bytes to write. @param offset Location in the file to start writing. @returns The total number of bytes written or a negative value on error. */ inline static ssize_t gsd_io_pwrite_retry(int fd, const void* buf, size_t count, int64_t offset) { size_t total_bytes_written = 0; const char* ptr = (char*)buf; // perform multiple pwrite calls to complete a large write successfully while (total_bytes_written < count) { size_t to_write = count - total_bytes_written; #if defined(_WIN32) || defined(__APPLE__) // win32 and apple raise an error for writes greater than INT_MAX if (to_write > INT_MAX / 2) to_write = INT_MAX / 2; #endif errno = 0; ssize_t bytes_written = pwrite(fd, ptr + total_bytes_written, to_write, offset + total_bytes_written); if (bytes_written == -1 || (bytes_written == 0 && errno != 0)) { return GSD_ERROR_IO; } total_bytes_written += bytes_written; } return total_bytes_written; } /** @internal @brief Read large data buffer to file The system call pread() fails to read very large data buffers. This method calls pread() as many times as necessary to completely read a large buffer. @param fd File descriptor. @param buf Data buffer. @param count Number of bytes to read. @param offset Location in the file to start reading. @returns The total number of bytes read or a negative value on error. */ inline static ssize_t gsd_io_pread_retry(int fd, void* buf, size_t count, int64_t offset) { size_t total_bytes_read = 0; char* ptr = (char*)buf; // perform multiple pread calls to complete a large write successfully while (total_bytes_read < count) { size_t to_read = count - total_bytes_read; #if defined(_WIN32) || defined(__APPLE__) // win32 and apple raise errors for reads greater than INT_MAX if (to_read > INT_MAX / 2) to_read = INT_MAX / 2; #endif errno = 0; ssize_t bytes_read = pread(fd, ptr + total_bytes_read, to_read, offset + total_bytes_read); if (bytes_read == -1 || (bytes_read == 0 && errno != 0)) { return GSD_ERROR_IO; } total_bytes_read += bytes_read; // handle end of file if (bytes_read == 0) { return total_bytes_read; } } return total_bytes_read; } /** @internal @brief Allocate a name/id map @param map Map to allocate. @param size Number of entries in the map. @returns GSD_SUCCESS on success, GSD_* error codes on error. */ inline static int gsd_name_id_map_allocate(struct gsd_name_id_map* map, size_t size) { if (map == NULL || map->v || size == 0 || map->size != 0) { return GSD_ERROR_INVALID_ARGUMENT; } map->v = calloc(size, sizeof(struct gsd_name_id_pair)); if (map->v == NULL) { return GSD_ERROR_MEMORY_ALLOCATION_FAILED; } map->size = size; return GSD_SUCCESS; } /** @internal @brief Free a name/id map @param map Map to free. @returns GSD_SUCCESS on success, GSD_* error codes on error. */ inline static int gsd_name_id_map_free(struct gsd_name_id_map* map) { if (map == NULL || map->v == NULL || map->size == 0) { return GSD_ERROR_INVALID_ARGUMENT; } // free all of the linked lists size_t i; for (i = 0; i < map->size; i++) { free(map->v[i].name); struct gsd_name_id_pair* cur = map->v[i].next; while (cur != NULL) { struct gsd_name_id_pair* prev = cur; cur = cur->next; free(prev->name); free(prev); } } // free the main map free(map->v); map->v = 0; map->size = 0; return GSD_SUCCESS; } /** @internal @brief Hash a string @param str String to hash @returns Hashed value of the string. */ inline static unsigned long gsd_hash_str(const unsigned char* str) { unsigned long hash = 5381; // NOLINT int c; while ((c = *str++)) { hash = ((hash << 5) + hash) + c; /* hash * 33 + c NOLINT */ } return hash; } /** @internal @brief Insert a string into a name/id map @param map Map to insert into. @param str String to insert. @param id ID to associate with the string. @returns GSD_SUCCESS on success, GSD_* error codes on error. */ inline static int gsd_name_id_map_insert(struct gsd_name_id_map* map, const char* str, uint16_t id) { if (map == NULL || map->v == NULL || map->size == 0) { return GSD_ERROR_INVALID_ARGUMENT; } size_t hash = gsd_hash_str((const unsigned char*)str) % map->size; // base case: no conflict if (map->v[hash].name == NULL) { map->v[hash].name = calloc(strlen(str) + 1, sizeof(char)); if (map->v[hash].name == NULL) { return GSD_ERROR_MEMORY_ALLOCATION_FAILED; } memcpy(map->v[hash].name, str, strlen(str) + 1); map->v[hash].id = id; map->v[hash].next = NULL; } else { // go to the end of the conflict list struct gsd_name_id_pair* insert_point = map->v + hash; while (insert_point->next != NULL) { insert_point = insert_point->next; } // allocate and insert a new entry insert_point->next = malloc(sizeof(struct gsd_name_id_pair)); if (insert_point->next == NULL) { return GSD_ERROR_MEMORY_ALLOCATION_FAILED; } insert_point->next->name = calloc(strlen(str) + 1, sizeof(char)); if (insert_point->next->name == NULL) { return GSD_ERROR_MEMORY_ALLOCATION_FAILED; } memcpy(insert_point->next->name, str, strlen(str) + 1); insert_point->next->id = id; insert_point->next->next = NULL; } return GSD_SUCCESS; } /** @internal @brief Find an ID in a name/id mapping @param map Map to search. @param str String to search. @returns The ID if found, or UINT16_MAX if not found. */ inline static uint16_t gsd_name_id_map_find(struct gsd_name_id_map* map, const char* str) { if (map == NULL || map->v == NULL || map->size == 0) { return UINT16_MAX; } size_t hash = gsd_hash_str((const unsigned char*)str) % map->size; struct gsd_name_id_pair* cur = map->v + hash; while (cur != NULL) { if (cur->name == NULL) { // not found return UINT16_MAX; } if (strcmp(str, cur->name) == 0) { // found return cur->id; } // keep looking cur = cur->next; } // not found in any conflict return UINT16_MAX; } /** @internal @brief Utility function to validate index entry @param handle handle to the open gsd file @param idx index of entry to validate @returns 1 if the entry is valid, 0 if it is not */ inline static int gsd_is_entry_valid(struct gsd_handle* handle, size_t idx) { const struct gsd_index_entry entry = handle->file_index.data[idx]; // check for valid type if (gsd_sizeof_type((enum gsd_type)entry.type) == 0) { return 0; } // validate that we don't read past the end of the file size_t size = entry.N * entry.M * gsd_sizeof_type((enum gsd_type)entry.type); if ((entry.location + size) > (uint64_t)handle->file_size) { return 0; } // check for valid frame (frame cannot be more than the number of index entries) if (entry.frame >= handle->header.index_allocated_entries) { return 0; } // check for valid id if (entry.id >= (handle->file_names.n_names + handle->frame_names.n_names)) { return 0; } // check for valid flags if (entry.flags != 0) { return 0; } return 1; } /** @internal @brief Allocate a write buffer @param buf Buffer to allocate. @param reserve Number of bytes to allocate. @returns GSD_SUCCESS on success, GSD_* error codes on error. */ inline static int gsd_byte_buffer_allocate(struct gsd_byte_buffer* buf, size_t reserve) { if (buf == NULL || buf->data || reserve == 0 || buf->reserved != 0 || buf->size != 0) { return GSD_ERROR_INVALID_ARGUMENT; } buf->data = calloc(reserve, sizeof(char)); if (buf->data == NULL) { return GSD_ERROR_MEMORY_ALLOCATION_FAILED; } buf->size = 0; buf->reserved = reserve; return GSD_SUCCESS; } /** @internal @brief Append bytes to a byte buffer @param buf Buffer to append to. @param data Data to append. @param size Number of bytes in *data*. @returns GSD_SUCCESS on success, GSD_* error codes on error. */ inline static int gsd_byte_buffer_append(struct gsd_byte_buffer* buf, const char* data, size_t size) { if (buf == NULL || buf->data == NULL || size == 0 || buf->reserved == 0) { return GSD_ERROR_INVALID_ARGUMENT; } if (buf->size + size > buf->reserved) { // reallocate by doubling size_t new_reserved = buf->reserved * 2; while (buf->size + size >= new_reserved) { new_reserved = new_reserved * 2; } char* old_data = buf->data; buf->data = realloc(buf->data, sizeof(char) * new_reserved); if (buf->data == NULL) { // this free should not be necessary, but clang-tidy disagrees free(old_data); return GSD_ERROR_MEMORY_ALLOCATION_FAILED; } // zero the new memory, but only the portion after the end of the new section to be appended gsd_util_zero_memory(buf->data + (buf->size + size), sizeof(char) * (new_reserved - (buf->size + size))); buf->reserved = new_reserved; } memcpy(buf->data + buf->size, data, size); buf->size += size; return GSD_SUCCESS; } /** @internal @brief Free the memory allocated by the write buffer or unmap the mapped memory. @param buf Buffer to free. @returns GSD_SUCCESS on success, GSD_* error codes on error. */ inline static int gsd_byte_buffer_free(struct gsd_byte_buffer* buf) { if (buf == NULL || buf->data == NULL) { return GSD_ERROR_INVALID_ARGUMENT; } free(buf->data); gsd_util_zero_memory(buf, sizeof(struct gsd_byte_buffer)); return GSD_SUCCESS; } /** @internal @brief Allocate a buffer of index entries @param buf Buffer to allocate. @param reserve Number of entries to allocate. @post The buffer's data element has *reserve* elements allocated in memory. @returns GSD_SUCCESS on success, GSD_* error codes on error. */ inline static int gsd_index_buffer_allocate(struct gsd_index_buffer* buf, size_t reserve) { if (buf == NULL || buf->mapped_data || buf->data || reserve == 0 || buf->reserved != 0 || buf->size != 0) { return GSD_ERROR_INVALID_ARGUMENT; } buf->data = calloc(reserve, sizeof(struct gsd_index_entry)); if (buf->data == NULL) { return GSD_ERROR_MEMORY_ALLOCATION_FAILED; } buf->size = 0; buf->reserved = reserve; buf->mapped_data = NULL; buf->mapped_len = 0; return GSD_SUCCESS; } /** @internal @brief Map index entries from the file @param buf Buffer to map. @param handle GSD file handle to map. @post The buffer's data element contains the index data from the file. On some systems, this will use mmap to efficiently access the file. On others, it may result in an allocation and read of the entire index from the file. @returns GSD_SUCCESS on success, GSD_* error codes on error. */ inline static int gsd_index_buffer_map(struct gsd_index_buffer* buf, struct gsd_handle* handle) { if (buf == NULL || buf->mapped_data || buf->data || buf->reserved != 0 || buf->size != 0) { return GSD_ERROR_INVALID_ARGUMENT; } // validate that the index block exists inside the file if (handle->header.index_location + sizeof(struct gsd_index_entry) * handle->header.index_allocated_entries > (uint64_t)handle->file_size) { return GSD_ERROR_FILE_CORRUPT; } #if GSD_USE_MMAP // map the index in read only mode size_t page_size = getpagesize(); size_t index_size = sizeof(struct gsd_index_entry) * handle->header.index_allocated_entries; size_t offset = (handle->header.index_location / page_size) * page_size; buf->mapped_data = mmap(NULL, index_size + (handle->header.index_location - offset), PROT_READ, MAP_SHARED, handle->fd, offset); if (buf->mapped_data == MAP_FAILED) { return GSD_ERROR_IO; } buf->data = (struct gsd_index_entry*)(((char*)buf->mapped_data) + (handle->header.index_location - offset)); buf->mapped_len = index_size + (handle->header.index_location - offset); buf->reserved = handle->header.index_allocated_entries; #else // mmap not supported, read the data from the disk int retval = gsd_index_buffer_allocate(buf, handle->header.index_allocated_entries); if (retval != GSD_SUCCESS) { return retval; } ssize_t bytes_read = gsd_io_pread_retry(handle->fd, buf->data, sizeof(struct gsd_index_entry) * handle->header.index_allocated_entries, handle->header.index_location); if (bytes_read == -1 || bytes_read != sizeof(struct gsd_index_entry) * handle->header.index_allocated_entries) { return GSD_ERROR_IO; } #endif // determine the number of index entries in the list // file is corrupt if first index entry is invalid if (buf->data[0].location != 0 && !gsd_is_entry_valid(handle, 0)) { return GSD_ERROR_FILE_CORRUPT; } if (buf->data[0].location == 0) { buf->size = 0; } else { // determine the number of index entries (marked by location = 0) // binary search for the first index entry with location 0 size_t L = 0; size_t R = buf->reserved; // progressively narrow the search window by halves do { size_t m = (L + R) / 2; // file is corrupt if any index entry is invalid or frame does not increase // monotonically if (buf->data[m].location != 0 && (!gsd_is_entry_valid(handle, m) || buf->data[m].frame < buf->data[L].frame)) { return GSD_ERROR_FILE_CORRUPT; } if (buf->data[m].location != 0) { L = m; } else { R = m; } } while ((R - L) > 1); // this finds R = the first index entry with location = 0 buf->size = R; } return GSD_SUCCESS; } /** @internal @brief Free the memory allocated by the index buffer or unmap the mapped memory. @param buf Buffer to free. @returns GSD_SUCCESS on success, GSD_* error codes on error. */ inline static int gsd_index_buffer_free(struct gsd_index_buffer* buf) { if (buf == NULL || buf->data == NULL) { return GSD_ERROR_INVALID_ARGUMENT; } #if GSD_USE_MMAP if (buf->mapped_data) { int retval = munmap(buf->mapped_data, buf->mapped_len); if (retval != 0) { return GSD_ERROR_IO; } } else #endif { free(buf->data); } gsd_util_zero_memory(buf, sizeof(struct gsd_index_buffer)); return GSD_SUCCESS; } /** @internal @brief Add a new index entry and provide a pointer to it. @param buf Buffer to add too. @param entry [out] Pointer to set to the new entry. Double the size of the reserved space as needed to hold the new entry. Does not accept mapped indices. @returns GSD_SUCCESS on success, GSD_* error codes on error. */ inline static int gsd_index_buffer_add(struct gsd_index_buffer* buf, struct gsd_index_entry** entry) { if (buf == NULL || buf->mapped_data || entry == NULL || buf->reserved == 0) { return GSD_ERROR_INVALID_ARGUMENT; } if (buf->size == buf->reserved) { // grow the array size_t new_reserved = buf->reserved * 2; buf->data = realloc(buf->data, sizeof(struct gsd_index_entry) * new_reserved); if (buf->data == NULL) { return GSD_ERROR_MEMORY_ALLOCATION_FAILED; } // zero the new memory gsd_util_zero_memory(buf->data + buf->reserved, sizeof(struct gsd_index_entry) * (new_reserved - buf->reserved)); buf->reserved = new_reserved; } size_t insert_pos = buf->size; buf->size++; *entry = buf->data + insert_pos; return GSD_SUCCESS; } inline static int gsd_cmp_index_entry(const struct gsd_index_entry* a, const struct gsd_index_entry* b) { int result = 0; if (a->frame < b->frame) { result = -1; } if (a->frame > b->frame) { result = 1; } if (a->frame == b->frame) { if (a->id < b->id) { result = -1; } if (a->id > b->id) { result = 1; } if (a->id == b->id) { result = 0; } } return result; } /** @internal @brief Compute heap parent node. @param i Node index. */ inline static size_t gsd_heap_parent(size_t i) { return (i - 1) / 2; } /** @internal @brief Compute heap left child. @param i Node index. */ inline static size_t gsd_heap_left_child(size_t i) { return 2 * i + 1; } /** @internal @brief Swap the nodes *a* and *b* in the buffer @param buf Buffer. @param a First index to swap. @param b Second index to swap. */ inline static void gsd_heap_swap(struct gsd_index_buffer* buf, size_t a, size_t b) { struct gsd_index_entry tmp = buf->data[a]; buf->data[a] = buf->data[b]; buf->data[b] = tmp; } /** @internal @brief Shift heap node downward @param buf Buffer. @param start First index of the valid heap in *buf*. @param end Last index of the valid hep in *buf*. */ inline static void gsd_heap_shift_down(struct gsd_index_buffer* buf, size_t start, size_t end) { size_t root = start; while (gsd_heap_left_child(root) <= end) { size_t child = gsd_heap_left_child(root); size_t swap = root; if (gsd_cmp_index_entry(buf->data + swap, buf->data + child) < 0) { swap = child; } if (child + 1 <= end && gsd_cmp_index_entry(buf->data + swap, buf->data + child + 1) < 0) { swap = child + 1; } if (swap == root) { return; } gsd_heap_swap(buf, root, swap); root = swap; } } /** @internal @brief Convert unordered index buffer to a heap @param buf Buffer. */ inline static void gsd_heapify(struct gsd_index_buffer* buf) { ssize_t start = gsd_heap_parent(buf->size - 1); while (start >= 0) { gsd_heap_shift_down(buf, start, buf->size - 1); start--; } } /** @internal @brief Sort the index buffer. @param buf Buffer to sort. Sorts an in-memory index buffer. Does not accept mapped indices. @returns GSD_SUCCESS on success, GSD_* error codes on error. */ inline static int gsd_index_buffer_sort(struct gsd_index_buffer* buf) { if (buf == NULL || buf->mapped_data || buf->reserved == 0) { return GSD_ERROR_INVALID_ARGUMENT; } // arrays of size 0 or 1 are already sorted if (buf->size <= 1) { return GSD_SUCCESS; } gsd_heapify(buf); size_t end = buf->size - 1; while (end > 0) { gsd_heap_swap(buf, end, 0); end = end - 1; gsd_heap_shift_down(buf, 0, end); } return GSD_SUCCESS; } /** @internal @brief Utility function to expand the memory space for the index block in the file. @param handle Handle to the open gsd file. @param size_required The new index must be able to hold at least this many elements. @returns GSD_SUCCESS on success, GSD_* error codes on error. */ inline static int gsd_expand_file_index(struct gsd_handle* handle, size_t size_required) { if (handle->open_flags == GSD_OPEN_READONLY) { return GSD_ERROR_FILE_MUST_BE_WRITABLE; } // multiply the index size each time it grows // this allows the index to grow rapidly to accommodate new frames const int multiplication_factor = 2; // save the old size and update the new size size_t size_old = handle->header.index_allocated_entries; size_t size_new = size_old * multiplication_factor; while (size_new <= size_required) { size_new *= multiplication_factor; } // Mac systems deadlock when writing from a mapped region into the tail end of that same region // unmap the index first and copy it over by chunks int retval = gsd_index_buffer_free(&handle->file_index); if (retval != 0) { return retval; } // allocate the copy buffer uint64_t copy_buffer_size = GSD_DEFAULT_INDEX_ENTRIES_TO_BUFFER * sizeof(struct gsd_index_entry); if (copy_buffer_size > size_old * sizeof(struct gsd_index_entry)) { copy_buffer_size = size_old * sizeof(struct gsd_index_entry); } char* buf = malloc(copy_buffer_size); // write the current index to the end of the file int64_t new_index_location = lseek(handle->fd, 0, SEEK_END); int64_t old_index_location = handle->header.index_location; size_t total_bytes_written = 0; size_t old_index_bytes = size_old * sizeof(struct gsd_index_entry); while (total_bytes_written < old_index_bytes) { size_t bytes_to_copy = copy_buffer_size; if (old_index_bytes - total_bytes_written < copy_buffer_size) { bytes_to_copy = old_index_bytes - total_bytes_written; } ssize_t bytes_read = gsd_io_pread_retry(handle->fd, buf, bytes_to_copy, old_index_location + total_bytes_written); if (bytes_read == -1 || bytes_read != bytes_to_copy) { free(buf); return GSD_ERROR_IO; } ssize_t bytes_written = gsd_io_pwrite_retry(handle->fd, buf, bytes_to_copy, new_index_location + total_bytes_written); if (bytes_written == -1 || bytes_written != bytes_to_copy) { free(buf); return GSD_ERROR_IO; } total_bytes_written += bytes_written; } // fill the new index space with 0s gsd_util_zero_memory(buf, copy_buffer_size); size_t new_index_bytes = size_new * sizeof(struct gsd_index_entry); while (total_bytes_written < new_index_bytes) { size_t bytes_to_copy = copy_buffer_size; if (new_index_bytes - total_bytes_written < copy_buffer_size) { bytes_to_copy = new_index_bytes - total_bytes_written; } ssize_t bytes_written = gsd_io_pwrite_retry(handle->fd, buf, bytes_to_copy, new_index_location + total_bytes_written); if (bytes_written == -1 || bytes_written != bytes_to_copy) { free(buf); return GSD_ERROR_IO; } total_bytes_written += bytes_written; } // sync the expanded index retval = fsync(handle->fd); if (retval != 0) { free(buf); return GSD_ERROR_IO; } // free the copy buffer free(buf); // update the header handle->header.index_location = new_index_location; handle->file_size = handle->header.index_location + total_bytes_written; handle->header.index_allocated_entries = size_new; // write the new header out ssize_t bytes_written = gsd_io_pwrite_retry(handle->fd, &(handle->header), sizeof(struct gsd_header), 0); if (bytes_written != sizeof(struct gsd_header)) { return GSD_ERROR_IO; } // sync the updated header retval = fsync(handle->fd); if (retval != 0) { return GSD_ERROR_IO; } // remap the file index retval = gsd_index_buffer_map(&handle->file_index, handle); if (retval != 0) { return retval; } return GSD_SUCCESS; } /** @internal @brief Flush the write buffer. gsd_write_frame() writes small data chunks into the write buffer. It adds index entries for these chunks to gsd_handle::buffer_index with locations offset from the start of the write buffer. gsd_flush_write_buffer() writes the buffer to the end of the file, moves the index entries to gsd_handle::frame_index and updates the location to reference the beginning of the file. @param handle Handle to flush the write buffer. @returns GSD_SUCCESS on success or GSD_* error codes on error */ inline static int gsd_flush_write_buffer(struct gsd_handle* handle) { if (handle == NULL) { return GSD_ERROR_INVALID_ARGUMENT; } if (handle->write_buffer.size == 0 && handle->buffer_index.size == 0) { // nothing to do return GSD_SUCCESS; } if (handle->write_buffer.size > 0 && handle->buffer_index.size == 0) { // error: bytes in buffer, but no index for them return GSD_ERROR_INVALID_ARGUMENT; } // write the buffer to the end of the file uint64_t offset = handle->file_size; ssize_t bytes_written = gsd_io_pwrite_retry(handle->fd, handle->write_buffer.data, handle->write_buffer.size, offset); if (bytes_written == -1 || bytes_written != handle->write_buffer.size) { return GSD_ERROR_IO; } handle->file_size += handle->write_buffer.size; // reset write_buffer for new data handle->write_buffer.size = 0; // Move buffer_index entries to frame_index. size_t i; for (i = 0; i < handle->buffer_index.size; i++) { struct gsd_index_entry* new_index; int retval = gsd_index_buffer_add(&handle->frame_index, &new_index); if (retval != GSD_SUCCESS) { return retval; } *new_index = handle->buffer_index.data[i]; new_index->location += offset; } // clear the buffer index for new entries handle->buffer_index.size = 0; return GSD_SUCCESS; } /** @internal @brief Flush the name buffer. gsd_write_frame() adds new names to the frame_names buffer. gsd_flush_name_buffer() flushes this buffer at the end of a frame write and commits the new names to the file. If necessary, the namelist is written to a new location in the file. @param handle Handle to flush the write buffer. @returns GSD_SUCCESS on success or GSD_* error codes on error */ inline static int gsd_flush_name_buffer(struct gsd_handle* handle) { if (handle == NULL) { return GSD_ERROR_INVALID_ARGUMENT; } if (handle->frame_names.n_names == 0) { // nothing to do return GSD_SUCCESS; } if (handle->frame_names.data.size == 0) { // error: bytes in buffer, but no names for them return GSD_ERROR_INVALID_ARGUMENT; } size_t old_reserved = handle->file_names.data.reserved; size_t old_size = handle->file_names.data.size; // add the new names to the file name list and zero the frame list int retval = gsd_byte_buffer_append(&handle->file_names.data, handle->frame_names.data.data, handle->frame_names.data.size); if (retval != GSD_SUCCESS) { return retval; } handle->file_names.n_names += handle->frame_names.n_names; handle->frame_names.n_names = 0; handle->frame_names.data.size = 0; gsd_util_zero_memory(handle->frame_names.data.data, handle->frame_names.data.reserved); // reserved space must be a multiple of the GSD name size if (handle->file_names.data.reserved % GSD_NAME_SIZE != 0) { return GSD_ERROR_INVALID_ARGUMENT; } if (handle->file_names.data.reserved > old_reserved) { // write the new name list to the end of the file uint64_t offset = handle->file_size; ssize_t bytes_written = gsd_io_pwrite_retry(handle->fd, handle->file_names.data.data, handle->file_names.data.reserved, offset); if (bytes_written == -1 || bytes_written != handle->file_names.data.reserved) { return GSD_ERROR_IO; } // sync the updated name list retval = fsync(handle->fd); if (retval != 0) { return GSD_ERROR_IO; } handle->file_size += handle->file_names.data.reserved; handle->header.namelist_location = offset; handle->header.namelist_allocated_entries = handle->file_names.data.reserved / GSD_NAME_SIZE; // write the new header out bytes_written = gsd_io_pwrite_retry(handle->fd, &(handle->header), sizeof(struct gsd_header), 0); if (bytes_written != sizeof(struct gsd_header)) { return GSD_ERROR_IO; } } else { // write the new name list to the old index location uint64_t offset = handle->header.namelist_location; ssize_t bytes_written = gsd_io_pwrite_retry(handle->fd, handle->file_names.data.data + old_size, handle->file_names.data.reserved - old_size, offset + old_size); if (bytes_written != (handle->file_names.data.reserved - old_size)) { return GSD_ERROR_IO; } } // sync the updated name list or header retval = fsync(handle->fd); if (retval != 0) { return GSD_ERROR_IO; } return GSD_SUCCESS; } /** @internal @brief utility function to append a name to the namelist @param id [out] ID of the new name @param handle handle to the open gsd file @param name string name Append a name to the names in the current frame. gsd_end_frame() will add this list to the file names. @return - GSD_SUCCESS (0) on success. Negative value on failure: - GSD_ERROR_IO: IO error (check errno). - GSD_ERROR_MEMORY_ALLOCATION_FAILED: Unable to allocate memory. - GSD_ERROR_FILE_MUST_BE_WRITABLE: File must not be read only. */ inline static int gsd_append_name(uint16_t* id, struct gsd_handle* handle, const char* name) { if (handle->open_flags == GSD_OPEN_READONLY) { return GSD_ERROR_FILE_MUST_BE_WRITABLE; } if (handle->file_names.n_names + handle->frame_names.n_names == UINT16_MAX) { // no more names may be added return GSD_ERROR_NAMELIST_FULL; } // Provide the ID of the new name *id = (uint16_t)(handle->file_names.n_names + handle->frame_names.n_names); if (handle->header.gsd_version < gsd_make_version(2, 0)) { // v1 files always allocate GSD_NAME_SIZE bytes for each name and put a NULL terminator // at address 63 char name_v1[GSD_NAME_SIZE]; strncpy(name_v1, name, GSD_NAME_SIZE - 1); name_v1[GSD_NAME_SIZE - 1] = 0; gsd_byte_buffer_append(&handle->frame_names.data, name_v1, GSD_NAME_SIZE); handle->frame_names.n_names++; // update the name/id mapping with the truncated name int retval = gsd_name_id_map_insert(&handle->name_map, name_v1, *id); if (retval != GSD_SUCCESS) { return retval; } } else { gsd_byte_buffer_append(&handle->frame_names.data, name, strlen(name) + 1); handle->frame_names.n_names++; // update the name/id mapping int retval = gsd_name_id_map_insert(&handle->name_map, name, *id); if (retval != GSD_SUCCESS) { return retval; } } return GSD_SUCCESS; } /** @internal @brief Cross-platform wrapper for the POSIX open() system function. @param pathname file path using UTF-8 encoding on all platforms @return file descriptor */ inline static int gsd_open_file(const char* pathname, int flags, int mode) { #ifndef _WIN32 return open(pathname, flags, mode); #else // On Windows, we call the _wopen() function, which requires converting the UTF-8 input path to // UTF-16 wide-character encoding. int count_wchars; wchar_t* wpathname; int fd; // First, determine the number of wide characters needed to represent the input string. count_wchars = MultiByteToWideChar(CP_UTF8, 0, pathname, -1, NULL, 0); // Then allocate temporary wchar_t buffer and perform the string conversion. wpathname = malloc(sizeof(wchar_t) * count_wchars); MultiByteToWideChar(CP_UTF8, 0, pathname, -1, wpathname, count_wchars); fd = _wopen(wpathname, flags, mode); free(wpathname); return fd; #endif } /** @internal @brief Truncate the file and write a new gsd header. @param fd file descriptor to initialize @param application Generating application name (truncated to 63 chars) @param schema Schema name for data to be written in this GSD file (truncated to 63 chars) @param schema_version Version of the scheme data to be written (make with gsd_make_version()) */ inline static int gsd_initialize_file(int fd, const char* application, const char* schema, uint32_t schema_version) { // check if the file was created if (fd == -1) { return GSD_ERROR_IO; } int retval = ftruncate(fd, 0); if (retval != 0) { return GSD_ERROR_IO; } // populate header fields struct gsd_header header; gsd_util_zero_memory(&header, sizeof(header)); header.magic = GSD_MAGIC_ID; header.gsd_version = gsd_make_version(GSD_CURRENT_FILE_VERSION, 0); strncpy(header.application, application, sizeof(header.application) - 1); header.application[sizeof(header.application) - 1] = 0; strncpy(header.schema, schema, sizeof(header.schema) - 1); header.schema[sizeof(header.schema) - 1] = 0; header.schema_version = schema_version; header.index_location = sizeof(header); header.index_allocated_entries = GSD_INITIAL_INDEX_SIZE; header.namelist_location = header.index_location + sizeof(struct gsd_index_entry) * header.index_allocated_entries; header.namelist_allocated_entries = GSD_INITIAL_NAME_BUFFER_SIZE / GSD_NAME_SIZE; gsd_util_zero_memory(header.reserved, sizeof(header.reserved)); // write the header out ssize_t bytes_written = gsd_io_pwrite_retry(fd, &header, sizeof(header), 0); if (bytes_written != sizeof(header)) { return GSD_ERROR_IO; } // allocate and zero default index memory struct gsd_index_entry index[GSD_INITIAL_INDEX_SIZE]; gsd_util_zero_memory(index, sizeof(index)); // write the empty index out bytes_written = gsd_io_pwrite_retry(fd, index, sizeof(index), sizeof(header)); if (bytes_written != sizeof(index)) { return GSD_ERROR_IO; } // allocate and zero the namelist memory char names[GSD_INITIAL_NAME_BUFFER_SIZE]; gsd_util_zero_memory(names, sizeof(char) * GSD_INITIAL_NAME_BUFFER_SIZE); // write the namelist out bytes_written = gsd_io_pwrite_retry(fd, names, sizeof(names), sizeof(header) + sizeof(index)); if (bytes_written != sizeof(names)) { return GSD_ERROR_IO; } // sync file retval = fsync(fd); if (retval != 0) { return GSD_ERROR_IO; } return GSD_SUCCESS; } /** @internal @brief Read in the file index and initialize the handle. @param handle Handle to read the header @pre handle->fd is an open file. @pre handle->open_flags is set. */ inline static int gsd_initialize_handle(struct gsd_handle* handle) { // check if the file was created if (handle->fd == -1) { return GSD_ERROR_IO; } // read the header ssize_t bytes_read = gsd_io_pread_retry(handle->fd, &handle->header, sizeof(struct gsd_header), 0); if (bytes_read == -1) { return GSD_ERROR_IO; } if (bytes_read != sizeof(struct gsd_header)) { return GSD_ERROR_NOT_A_GSD_FILE; } // validate the header if (handle->header.magic != GSD_MAGIC_ID) { return GSD_ERROR_NOT_A_GSD_FILE; } if (handle->header.gsd_version < gsd_make_version(1, 0) && handle->header.gsd_version != gsd_make_version(0, 3)) { return GSD_ERROR_INVALID_GSD_FILE_VERSION; } if (handle->header.gsd_version >= gsd_make_version(3, 0)) { return GSD_ERROR_INVALID_GSD_FILE_VERSION; } // determine the file size handle->file_size = lseek(handle->fd, 0, SEEK_END); // validate that the namelist block exists inside the file if (handle->header.namelist_location + (GSD_NAME_SIZE * handle->header.namelist_allocated_entries) > (uint64_t)handle->file_size) { return GSD_ERROR_FILE_CORRUPT; } // allocate the hash map int retval = gsd_name_id_map_allocate(&handle->name_map, GSD_NAME_MAP_SIZE); if (retval != GSD_SUCCESS) { return retval; } // read the namelist block size_t namelist_n_bytes = GSD_NAME_SIZE * handle->header.namelist_allocated_entries; retval = gsd_byte_buffer_allocate(&handle->file_names.data, namelist_n_bytes); if (retval != GSD_SUCCESS) { return retval; } bytes_read = gsd_io_pread_retry(handle->fd, handle->file_names.data.data, namelist_n_bytes, handle->header.namelist_location); if (bytes_read == -1 || bytes_read != namelist_n_bytes) { return GSD_ERROR_IO; } // The name buffer must end in a NULL terminator or else the file is corrupt if (handle->file_names.data.data[handle->file_names.data.reserved - 1] != 0) { return GSD_ERROR_FILE_CORRUPT; } // Add the names to the hash map. Also determine the number of used bytes in the namelist. size_t name_start = 0; handle->file_names.n_names = 0; while (name_start < handle->file_names.data.reserved) { char* name = handle->file_names.data.data + name_start; // an empty name notes the end of the list if (name[0] == 0) { break; } retval = gsd_name_id_map_insert(&handle->name_map, name, (uint16_t)handle->file_names.n_names); if (retval != GSD_SUCCESS) { return retval; } handle->file_names.n_names++; if (handle->header.gsd_version < gsd_make_version(2, 0)) { // gsd v1 stores names in fixed 64 byte segments name_start += GSD_NAME_SIZE; } else { size_t len = strnlen(name, handle->file_names.data.reserved - name_start); name_start += len + 1; } } handle->file_names.data.size = name_start; // read in the file index retval = gsd_index_buffer_map(&handle->file_index, handle); if (retval != GSD_SUCCESS) { return retval; } // determine the current frame counter if (handle->file_index.size == 0) { handle->cur_frame = 0; } else { handle->cur_frame = handle->file_index.data[handle->file_index.size - 1].frame + 1; } // if this is a write mode, allocate the initial frame index and the name buffer if (handle->open_flags != GSD_OPEN_READONLY) { retval = gsd_index_buffer_allocate(&handle->frame_index, GSD_INITIAL_FRAME_INDEX_SIZE); if (retval != GSD_SUCCESS) { return retval; } retval = gsd_index_buffer_allocate(&handle->buffer_index, GSD_INITIAL_FRAME_INDEX_SIZE); if (retval != GSD_SUCCESS) { return retval; } retval = gsd_byte_buffer_allocate(&handle->write_buffer, GSD_INITIAL_WRITE_BUFFER_SIZE); if (retval != GSD_SUCCESS) { return retval; } handle->frame_names.n_names = 0; retval = gsd_byte_buffer_allocate(&handle->frame_names.data, GSD_NAME_SIZE); if (retval != GSD_SUCCESS) { return retval; } } handle->pending_index_entries = 0; handle->maximum_write_buffer_size = GSD_DEFAULT_MAXIMUM_WRITE_BUFFER_SIZE; handle->index_entries_to_buffer = GSD_DEFAULT_INDEX_ENTRIES_TO_BUFFER; return GSD_SUCCESS; } uint32_t gsd_make_version(unsigned int major, unsigned int minor) { return major << (sizeof(uint32_t) * 4) | minor; } int gsd_create(const char* fname, const char* application, const char* schema, uint32_t schema_version) { int extra_flags = 0; #ifdef _WIN32 extra_flags = _O_BINARY; #endif // create the file int fd = gsd_open_file(fname, O_RDWR | O_CREAT | O_TRUNC | extra_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); int retval = gsd_initialize_file(fd, application, schema, schema_version); if (fd != -1) { close(fd); } return retval; } int gsd_create_and_open(struct gsd_handle* handle, const char* fname, const char* application, const char* schema, uint32_t schema_version, const enum gsd_open_flag flags, int exclusive_create) { // zero the handle gsd_util_zero_memory(handle, sizeof(struct gsd_handle)); int extra_flags = 0; #ifdef _WIN32 extra_flags = _O_BINARY; #endif // set the open flags in the handle if (flags == GSD_OPEN_READWRITE) { handle->open_flags = GSD_OPEN_READWRITE; } else if (flags == GSD_OPEN_READONLY) { return GSD_ERROR_FILE_MUST_BE_WRITABLE; } else if (flags == GSD_OPEN_APPEND) { handle->open_flags = GSD_OPEN_APPEND; } // set the exclusive create bit if (exclusive_create) { extra_flags |= O_EXCL; } // create the file handle->fd = gsd_open_file(fname, O_RDWR | O_CREAT | O_TRUNC | extra_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); int retval = gsd_initialize_file(handle->fd, application, schema, schema_version); if (retval != 0) { if (handle->fd != -1) { close(handle->fd); } return retval; } retval = gsd_initialize_handle(handle); if (retval != 0) { if (handle->fd != -1) { close(handle->fd); } } return retval; } int gsd_open(struct gsd_handle* handle, const char* fname, const enum gsd_open_flag flags) { // zero the handle gsd_util_zero_memory(handle, sizeof(struct gsd_handle)); int extra_flags = 0; #ifdef _WIN32 extra_flags = _O_BINARY; #endif // open the file if (flags == GSD_OPEN_READWRITE) { handle->fd = gsd_open_file(fname, O_RDWR | extra_flags, 0); handle->open_flags = GSD_OPEN_READWRITE; } else if (flags == GSD_OPEN_READONLY) { handle->fd = gsd_open_file(fname, O_RDONLY | extra_flags, 0); handle->open_flags = GSD_OPEN_READONLY; } else if (flags == GSD_OPEN_APPEND) { handle->fd = gsd_open_file(fname, O_RDWR | extra_flags, 0); handle->open_flags = GSD_OPEN_APPEND; } int retval = gsd_initialize_handle(handle); if (retval != 0) { if (handle->fd != -1) { close(handle->fd); } } return retval; } int gsd_truncate(struct gsd_handle* handle) { if (handle == NULL) { return GSD_ERROR_INVALID_ARGUMENT; } if (handle->open_flags == GSD_OPEN_READONLY) { return GSD_ERROR_FILE_MUST_BE_WRITABLE; } int retval = 0; // deallocate indices if (handle->frame_names.data.reserved > 0) { retval = gsd_byte_buffer_free(&handle->frame_names.data); if (retval != GSD_SUCCESS) { return retval; } } if (handle->file_names.data.reserved > 0) { retval = gsd_byte_buffer_free(&handle->file_names.data); if (retval != GSD_SUCCESS) { return retval; } } retval = gsd_name_id_map_free(&handle->name_map); if (retval != GSD_SUCCESS) { return retval; } retval = gsd_index_buffer_free(&handle->file_index); if (retval != GSD_SUCCESS) { return retval; } if (handle->frame_index.reserved > 0) { retval = gsd_index_buffer_free(&handle->frame_index); if (retval != GSD_SUCCESS) { return retval; } } if (handle->buffer_index.reserved > 0) { retval = gsd_index_buffer_free(&handle->buffer_index); if (retval != GSD_SUCCESS) { return retval; } } if (handle->write_buffer.reserved > 0) { retval = gsd_byte_buffer_free(&handle->write_buffer); if (retval != GSD_SUCCESS) { return retval; } } // keep a copy of the old header struct gsd_header old_header = handle->header; retval = gsd_initialize_file(handle->fd, old_header.application, old_header.schema, old_header.schema_version); if (retval != GSD_SUCCESS) { return retval; } return gsd_initialize_handle(handle); } int gsd_close(struct gsd_handle* handle) { if (handle == NULL) { return GSD_ERROR_INVALID_ARGUMENT; } int retval; if (handle->open_flags != GSD_OPEN_READONLY) { retval = gsd_flush(handle); if (retval != GSD_SUCCESS) { return retval; } } // save the fd so we can use it after freeing the handle int fd = handle->fd; retval = gsd_index_buffer_free(&handle->file_index); if (retval != GSD_SUCCESS) { return retval; } if (handle->frame_index.reserved > 0) { retval = gsd_index_buffer_free(&handle->frame_index); if (retval != GSD_SUCCESS) { return retval; } } if (handle->buffer_index.reserved > 0) { retval = gsd_index_buffer_free(&handle->buffer_index); if (retval != GSD_SUCCESS) { return retval; } } if (handle->write_buffer.reserved > 0) { retval = gsd_byte_buffer_free(&handle->write_buffer); if (retval != GSD_SUCCESS) { return retval; } } retval = gsd_name_id_map_free(&handle->name_map); if (retval != GSD_SUCCESS) { return retval; } if (handle->frame_names.data.reserved > 0) { handle->frame_names.n_names = 0; retval = gsd_byte_buffer_free(&handle->frame_names.data); if (retval != GSD_SUCCESS) { return retval; } } if (handle->file_names.data.reserved > 0) { handle->file_names.n_names = 0; retval = gsd_byte_buffer_free(&handle->file_names.data); if (retval != GSD_SUCCESS) { return retval; } } // close the file retval = close(fd); if (retval != 0) { return GSD_ERROR_IO; } return GSD_SUCCESS; } int gsd_end_frame(struct gsd_handle* handle) { if (handle == NULL) { return GSD_ERROR_INVALID_ARGUMENT; } if (handle->open_flags == GSD_OPEN_READONLY) { return GSD_ERROR_FILE_MUST_BE_WRITABLE; } handle->cur_frame++; handle->pending_index_entries = 0; if (handle->frame_index.size > 0 || handle->buffer_index.size > handle->index_entries_to_buffer) { return gsd_flush(handle); } return GSD_SUCCESS; } int gsd_flush(struct gsd_handle* handle) { if (handle == NULL) { return GSD_ERROR_INVALID_ARGUMENT; } if (handle->open_flags == GSD_OPEN_READONLY) { return GSD_ERROR_FILE_MUST_BE_WRITABLE; } // flush the namelist buffer int retval = gsd_flush_name_buffer(handle); if (retval != GSD_SUCCESS) { return retval; } // flush the write buffer retval = gsd_flush_write_buffer(handle); if (retval != GSD_SUCCESS) { return retval; } // sync the data before writing the index retval = fsync(handle->fd); if (retval != 0) { return GSD_ERROR_IO; } // Write the frame index to the file, excluding the index entries that are part of the current // frame. if (handle->pending_index_entries > handle->frame_index.size) { return GSD_ERROR_INVALID_ARGUMENT; } uint64_t index_entries_to_write = handle->frame_index.size - handle->pending_index_entries; if (index_entries_to_write > 0) { // ensure there is enough space in the index if ((handle->file_index.size + index_entries_to_write) > handle->file_index.reserved) { gsd_expand_file_index(handle, handle->file_index.size + index_entries_to_write); } // sort the index before writing retval = gsd_index_buffer_sort(&handle->frame_index); if (retval != 0) { return retval; } // write the frame index entries to the file int64_t write_pos = handle->header.index_location + sizeof(struct gsd_index_entry) * handle->file_index.size; size_t bytes_to_write = sizeof(struct gsd_index_entry) * index_entries_to_write; ssize_t bytes_written = gsd_io_pwrite_retry(handle->fd, handle->frame_index.data, bytes_to_write, write_pos); if (bytes_written == -1 || bytes_written != bytes_to_write) { return GSD_ERROR_IO; } #if !GSD_USE_MMAP // add the entries to the file index memcpy(handle->file_index.data + handle->file_index.size, handle->frame_index.data, sizeof(struct gsd_index_entry) * index_entries_to_write); #endif // update size of file index handle->file_index.size += index_entries_to_write; // Clear the frame index, keeping those in the current unfinished frame. if (handle->pending_index_entries > 0) { for (uint64_t i = 0; i < handle->pending_index_entries; i++) { handle->frame_index.data[i] = handle->frame_index .data[handle->frame_index.size - handle->pending_index_entries + i]; } } handle->frame_index.size = handle->pending_index_entries; } return GSD_SUCCESS; } int gsd_write_chunk(struct gsd_handle* handle, const char* name, enum gsd_type type, uint64_t N, uint32_t M, uint8_t flags, const void* data) { // validate input if (N > 0 && data == NULL) { return GSD_ERROR_INVALID_ARGUMENT; } if (M == 0) { return GSD_ERROR_INVALID_ARGUMENT; } if (handle->open_flags == GSD_OPEN_READONLY) { return GSD_ERROR_FILE_MUST_BE_WRITABLE; } if (flags != 0) { return GSD_ERROR_INVALID_ARGUMENT; } uint16_t id = gsd_name_id_map_find(&handle->name_map, name); if (id == UINT16_MAX) { // not found, append to the index int retval = gsd_append_name(&id, handle, name); if (retval != GSD_SUCCESS) { return retval; } if (id == UINT16_MAX) { // this should never happen return GSD_ERROR_NAMELIST_FULL; } } struct gsd_index_entry entry; // populate fields in the entry's data gsd_util_zero_memory(&entry, sizeof(struct gsd_index_entry)); entry.frame = handle->cur_frame; entry.id = id; entry.type = (uint8_t)type; entry.N = N; entry.M = M; size_t size = N * M * gsd_sizeof_type(type); // decide whether to write this chunk to the buffer or straight to disk if (size < handle->maximum_write_buffer_size) { // flush the buffer if this entry won't fit if (size > (handle->maximum_write_buffer_size - handle->write_buffer.size)) { gsd_flush_write_buffer(handle); } entry.location = handle->write_buffer.size; // add an entry to the buffer index struct gsd_index_entry* index_entry; int retval = gsd_index_buffer_add(&handle->buffer_index, &index_entry); if (retval != GSD_SUCCESS) { return retval; } *index_entry = entry; // add the data to the write buffer if (size > 0) { retval = gsd_byte_buffer_append(&handle->write_buffer, data, size); if (retval != GSD_SUCCESS) { return retval; } } } else { // add an entry to the frame index struct gsd_index_entry* index_entry; int retval = gsd_index_buffer_add(&handle->frame_index, &index_entry); if (retval != GSD_SUCCESS) { return retval; } *index_entry = entry; // find the location at the end of the file for the chunk index_entry->location = handle->file_size; // write the data ssize_t bytes_written = gsd_io_pwrite_retry(handle->fd, data, size, index_entry->location); if (bytes_written == -1 || bytes_written != size) { return GSD_ERROR_IO; } // update the file_size in the handle handle->file_size += bytes_written; } handle->pending_index_entries++; return GSD_SUCCESS; } uint64_t gsd_get_nframes(struct gsd_handle* handle) { if (handle == NULL) { return 0; } return handle->cur_frame; } const struct gsd_index_entry* gsd_find_chunk(struct gsd_handle* handle, uint64_t frame, const char* name) { if (handle == NULL) { return NULL; } if (name == NULL) { return NULL; } if (frame >= gsd_get_nframes(handle)) { return NULL; } if (handle->open_flags != GSD_OPEN_READONLY) { int retval = gsd_flush(handle); if (retval != GSD_SUCCESS) { return NULL; } } // find the id for the given name uint16_t match_id = gsd_name_id_map_find(&handle->name_map, name); if (match_id == UINT16_MAX) { return NULL; } if (handle->header.gsd_version >= gsd_make_version(2, 0)) { // gsd 2.0 files sort the entire index // binary search for the index entry ssize_t L = 0; ssize_t R = handle->file_index.size - 1; struct gsd_index_entry T; T.frame = frame; T.id = match_id; while (L <= R) { size_t m = (L + R) / 2; int cmp = gsd_cmp_index_entry(handle->file_index.data + m, &T); if (cmp == -1) { L = m + 1; } else if (cmp == 1) { R = m - 1; } else { return &(handle->file_index.data[m]); } } } else { // gsd 1.0 file: use binary search to find the frame and linear search to find the entry size_t L = 0; size_t R = handle->file_index.size; // progressively narrow the search window by halves do { size_t m = (L + R) / 2; if (frame < handle->file_index.data[m].frame) { R = m; } else { L = m; } } while ((R - L) > 1); // this finds L = the rightmost index with the desired frame int64_t cur_index; // search all index entries with the matching frame for (cur_index = L; (cur_index >= 0) && (handle->file_index.data[cur_index].frame == frame); cur_index--) { // if the frame matches, check the id if (match_id == handle->file_index.data[cur_index].id) { return &(handle->file_index.data[cur_index]); } } } // if we got here, we didn't find the specified chunk return NULL; } int gsd_read_chunk(struct gsd_handle* handle, void* data, const struct gsd_index_entry* chunk) { if (handle == NULL) { return GSD_ERROR_INVALID_ARGUMENT; } if (data == NULL) { return GSD_ERROR_INVALID_ARGUMENT; } if (chunk == NULL) { return GSD_ERROR_INVALID_ARGUMENT; } if (handle->open_flags != GSD_OPEN_READONLY) { int retval = gsd_flush(handle); if (retval != GSD_SUCCESS) { return retval; } } size_t size = chunk->N * chunk->M * gsd_sizeof_type((enum gsd_type)chunk->type); if (size == 0) { return GSD_ERROR_FILE_CORRUPT; } if (chunk->location == 0) { return GSD_ERROR_FILE_CORRUPT; } // validate that we don't read past the end of the file if ((chunk->location + size) > (uint64_t)handle->file_size) { return GSD_ERROR_FILE_CORRUPT; } ssize_t bytes_read = gsd_io_pread_retry(handle->fd, data, size, chunk->location); if (bytes_read == -1 || bytes_read != size) { return GSD_ERROR_IO; } return GSD_SUCCESS; } size_t gsd_sizeof_type(enum gsd_type type) { size_t val = 0; if (type == GSD_TYPE_UINT8) { val = sizeof(uint8_t); } else if (type == GSD_TYPE_UINT16) { val = sizeof(uint16_t); } else if (type == GSD_TYPE_UINT32) { val = sizeof(uint32_t); } else if (type == GSD_TYPE_UINT64) { val = sizeof(uint64_t); } else if (type == GSD_TYPE_INT8) { val = sizeof(int8_t); } else if (type == GSD_TYPE_INT16) { val = sizeof(int16_t); } else if (type == GSD_TYPE_INT32) { val = sizeof(int32_t); } else if (type == GSD_TYPE_INT64) { val = sizeof(int64_t); } else if (type == GSD_TYPE_FLOAT) { val = sizeof(float); } else if (type == GSD_TYPE_DOUBLE) { val = sizeof(double); } else { return 0; } return val; } const char* gsd_find_matching_chunk_name(struct gsd_handle* handle, const char* match, const char* prev) { if (handle == NULL) { return NULL; } if (match == NULL) { return NULL; } if (handle->file_names.n_names == 0) { return NULL; } if (handle->open_flags != GSD_OPEN_READONLY) { int retval = gsd_flush(handle); if (retval != GSD_SUCCESS) { return NULL; } } // return nothing found if the name buffer is corrupt if (handle->file_names.data.data[handle->file_names.data.reserved - 1] != 0) { return NULL; } // determine search start index const char* search_str; if (prev == NULL) { search_str = handle->file_names.data.data; } else { // return not found if prev is not in range if (prev < handle->file_names.data.data) { return NULL; } if (prev >= (handle->file_names.data.data + handle->file_names.data.reserved)) { return NULL; } if (handle->header.gsd_version < gsd_make_version(2, 0)) { search_str = prev + GSD_NAME_SIZE; } else { search_str = prev + strlen(prev) + 1; } } size_t match_len = strlen(match); while (search_str < (handle->file_names.data.data + handle->file_names.data.reserved)) { if (search_str[0] != 0 && 0 == strncmp(match, search_str, match_len)) { return search_str; } if (handle->header.gsd_version < gsd_make_version(2, 0)) { search_str += GSD_NAME_SIZE; } else { search_str += strlen(search_str) + 1; } } // searched past the end of the list, return NULL return NULL; } int gsd_upgrade(struct gsd_handle* handle) { if (handle == NULL) { return GSD_ERROR_INVALID_ARGUMENT; } if (handle->open_flags == GSD_OPEN_READONLY) { return GSD_ERROR_INVALID_ARGUMENT; } if (handle->frame_index.size > 0 || handle->frame_names.n_names > 0) { return GSD_ERROR_INVALID_ARGUMENT; } if (handle->header.gsd_version < gsd_make_version(2, 0)) { if (handle->file_index.size > 0) { // make a copy of the file index struct gsd_index_buffer buf; gsd_util_zero_memory(&buf, sizeof(struct gsd_index_buffer)); int retval = gsd_index_buffer_allocate(&buf, handle->file_index.size); if (retval != GSD_SUCCESS) { return retval; } memcpy(buf.data, handle->file_index.data, sizeof(struct gsd_index_entry) * handle->file_index.size); buf.size = handle->file_index.size; // sort the copy and write it back out to the file retval = gsd_index_buffer_sort(&buf); if (retval != GSD_SUCCESS) { gsd_index_buffer_free(&buf); return retval; } ssize_t bytes_written = gsd_io_pwrite_retry(handle->fd, buf.data, sizeof(struct gsd_index_entry) * buf.size, handle->header.index_location); if (bytes_written == -1 || bytes_written != sizeof(struct gsd_index_entry) * buf.size) { gsd_index_buffer_free(&buf); return GSD_ERROR_IO; } retval = gsd_index_buffer_free(&buf); if (retval != GSD_SUCCESS) { return retval; } // sync the updated index retval = fsync(handle->fd); if (retval != 0) { return GSD_ERROR_IO; } } if (handle->file_names.n_names > 0) { // compact the name list without changing its size or position on the disk struct gsd_byte_buffer new_name_buf; gsd_util_zero_memory(&new_name_buf, sizeof(struct gsd_byte_buffer)); int retval = gsd_byte_buffer_allocate(&new_name_buf, handle->file_names.data.reserved); if (retval != GSD_SUCCESS) { return retval; } const char* name = gsd_find_matching_chunk_name(handle, "", NULL); while (name != NULL) { retval = gsd_byte_buffer_append(&new_name_buf, name, strlen(name) + 1); if (retval != GSD_SUCCESS) { gsd_byte_buffer_free(&new_name_buf); return retval; } name = gsd_find_matching_chunk_name(handle, "", name); } if (new_name_buf.reserved != handle->file_names.data.reserved) { gsd_byte_buffer_free(&new_name_buf); return GSD_ERROR_FILE_CORRUPT; } // write the new names out to disk ssize_t bytes_written = gsd_io_pwrite_retry(handle->fd, new_name_buf.data, new_name_buf.reserved, handle->header.namelist_location); if (bytes_written == -1 || bytes_written != new_name_buf.reserved) { gsd_byte_buffer_free(&new_name_buf); return GSD_ERROR_IO; } // swap in the re-organized name buffer retval = gsd_byte_buffer_free(&handle->file_names.data); if (retval != GSD_SUCCESS) { gsd_byte_buffer_free(&new_name_buf); return retval; } handle->file_names.data = new_name_buf; // sync the updated name list retval = fsync(handle->fd); if (retval != 0) { gsd_byte_buffer_free(&new_name_buf); return GSD_ERROR_IO; } } // label the file as a v2.0 file handle->header.gsd_version = gsd_make_version(GSD_CURRENT_FILE_VERSION, 0); // write the new header out ssize_t bytes_written = gsd_io_pwrite_retry(handle->fd, &(handle->header), sizeof(struct gsd_header), 0); if (bytes_written != sizeof(struct gsd_header)) { return GSD_ERROR_IO; } // sync the updated header int retval = fsync(handle->fd); if (retval != 0) { return GSD_ERROR_IO; } // remap the file index retval = gsd_index_buffer_free(&handle->file_index); if (retval != 0) { return retval; } retval = gsd_index_buffer_map(&handle->file_index, handle); if (retval != 0) { return retval; } } return GSD_SUCCESS; } uint64_t gsd_get_maximum_write_buffer_size(struct gsd_handle* handle) { if (handle == NULL) { return 0; } return handle->maximum_write_buffer_size; } int gsd_set_maximum_write_buffer_size(struct gsd_handle* handle, uint64_t size) { if (handle == NULL || size == 0) { return GSD_ERROR_INVALID_ARGUMENT; } handle->maximum_write_buffer_size = size; return GSD_SUCCESS; } uint64_t gsd_get_index_entries_to_buffer(struct gsd_handle* handle) { if (handle == NULL) { return 0; } return handle->index_entries_to_buffer; } int gsd_set_index_entries_to_buffer(struct gsd_handle* handle, uint64_t number) { if (handle == NULL || number == 0) { return GSD_ERROR_INVALID_ARGUMENT; } handle->index_entries_to_buffer = number; return GSD_SUCCESS; } // undefine windows wrapper macros #ifdef _WIN32 #undef lseek #undef write #undef read #undef open #undef ftruncate #pragma warning(pop) #endif gsd-3.3.0/gsd/gsd.h000066400000000000000000000535061462564674300140270ustar00rootroot00000000000000// Copyright (c) 2016-2024 The Regents of the University of Michigan // Part of GSD, released under the BSD 2-Clause License. #ifndef GSD_H #define GSD_H #include #include #include #ifdef __cplusplus extern "C" { #endif /*! \file gsd.h \brief Declare GSD data types and C API */ /// Identifiers for the gsd data chunk element types enum gsd_type { /// Unsigned 8-bit integer. GSD_TYPE_UINT8 = 1, /// Unsigned 16-bit integer. GSD_TYPE_UINT16, /// Unsigned 32-bit integer. GSD_TYPE_UINT32, /// Unsigned 53-bit integer. GSD_TYPE_UINT64, /// Signed 8-bit integer. GSD_TYPE_INT8, /// Signed 16-bit integer. GSD_TYPE_INT16, /// Signed 32-bit integer. GSD_TYPE_INT32, /// Signed 64-bit integer. GSD_TYPE_INT64, /// 32-bit floating point number. GSD_TYPE_FLOAT, /// 64-bit floating point number. GSD_TYPE_DOUBLE }; /// Flag for GSD file open options enum gsd_open_flag { /// Open for both reading and writing GSD_OPEN_READWRITE = 1, /// Open only for reading GSD_OPEN_READONLY, /// Open only for writing GSD_OPEN_APPEND }; /// Error return values enum gsd_error { /// Success. GSD_SUCCESS = 0, /// IO error. Check ``errno`` for details GSD_ERROR_IO = -1, /// Invalid argument passed to function. GSD_ERROR_INVALID_ARGUMENT = -2, /// The file is not a GSD file. GSD_ERROR_NOT_A_GSD_FILE = -3, /// The GSD file version cannot be read. GSD_ERROR_INVALID_GSD_FILE_VERSION = -4, /// The GSD file is corrupt. GSD_ERROR_FILE_CORRUPT = -5, /// GSD failed to allocated memory. GSD_ERROR_MEMORY_ALLOCATION_FAILED = -6, /// The GSD file cannot store any additional unique data chunk names. GSD_ERROR_NAMELIST_FULL = -7, /** This API call requires that the GSD file opened in with the mode GSD_OPEN_APPEND or GSD_OPEN_READWRITE. */ GSD_ERROR_FILE_MUST_BE_WRITABLE = -8, /** This API call requires that the GSD file opened the mode GSD_OPEN_READ or GSD_OPEN_READWRITE. */ GSD_ERROR_FILE_MUST_BE_READABLE = -9, }; enum { /** v1 file: Size of a GSD name in memory. v2 file: The name buffer size is a multiple of GSD_NAME_SIZE. */ GSD_NAME_SIZE = 64 }; enum { /// Reserved bytes in the header structure GSD_RESERVED_BYTES = 80 }; /** GSD file header The in-memory and on-disk storage of the GSD file header. Stored in the first 256 bytes of the file. @warning All members are **read-only** to the caller. */ struct gsd_header { /// Magic number marking that this is a GSD file. uint64_t magic; /// Location of the chunk index in the file. uint64_t index_location; /// Number of index entries that will fit in the space allocated. uint64_t index_allocated_entries; /// Location of the name list in the file. uint64_t namelist_location; /// Number of bytes in the namelist divided by GSD_NAME_SIZE. uint64_t namelist_allocated_entries; /// Schema version: from gsd_make_version(). uint32_t schema_version; /// GSD file format version from gsd_make_version(). uint32_t gsd_version; /// Name of the application that generated this file. char application[GSD_NAME_SIZE]; /// Name of data schema. char schema[GSD_NAME_SIZE]; /// Reserved for future use. char reserved[GSD_RESERVED_BYTES]; }; /** Index entry An index entry for a single chunk of data. @warning All members are **read-only** to the caller. */ struct gsd_index_entry { /// Frame index of the chunk. uint64_t frame; /// Number of rows in the chunk. uint64_t N; /// Location of the chunk in the file. int64_t location; /// Number of columns in the chunk. uint32_t M; /// Index of the chunk name in the name list. uint16_t id; /// Data type of the chunk: one of gsd_type. uint8_t type; /// Flags (for internal use). uint8_t flags; }; /** Name/id mapping A string name paired with an ID. Used for storing sorted name/id mappings in a hash map. */ struct gsd_name_id_pair { /// Pointer to name (actual name storage is allocated in gsd_handle) char* name; /// Next name/id pair with the same hash struct gsd_name_id_pair* next; /// Entry id uint16_t id; }; /** Name/id hash map A hash map of string names to integer identifiers. */ struct gsd_name_id_map { /// Name/id mappings struct gsd_name_id_pair* v; /// Number of entries in the mapping size_t size; }; /** Array of index entries May point to a mapped location of index entries in the file or an in-memory buffer. */ struct gsd_index_buffer { /// Indices in the buffer struct gsd_index_entry* data; /// Number of entries in the buffer size_t size; /// Number of entries available in the buffer size_t reserved; /// Pointer to mapped data (NULL if not mapped) void* mapped_data; /// Number of bytes mapped size_t mapped_len; }; /** Byte buffer Used to buffer of small data chunks held for a buffered write at the end of a frame. Also used to hold the names. */ struct gsd_byte_buffer { /// Data char* data; /// Number of bytes in the buffer size_t size; /// Number of bytes available in the buffer size_t reserved; }; /** Name buffer Holds a list of string names in order separated by NULL terminators. In v1 files, each name is 64 bytes. In v2 files, only one NULL terminator is placed between each name. */ struct gsd_name_buffer { /// Data struct gsd_byte_buffer data; /// Number of names in the list size_t n_names; }; /** File handle A handle to an open GSD file. This handle is obtained when opening a GSD file and is passed into every method that operates on the file. @warning All members are **read-only** to the caller. */ struct gsd_handle { /// File descriptor int fd; /// The file header struct gsd_header header; /// Mapped data chunk index struct gsd_index_buffer file_index; /// Index entries to append to the current frame struct gsd_index_buffer frame_index; /// Buffered index entries to append to the current frame struct gsd_index_buffer buffer_index; /// Buffered write data struct gsd_byte_buffer write_buffer; /// List of names stored in the file struct gsd_name_buffer file_names; /// List of names added in the current frame struct gsd_name_buffer frame_names; /// The index of the last frame in the file uint64_t cur_frame; /// Size of the file (in bytes) int64_t file_size; /// Flags passed to gsd_open() when opening this handle enum gsd_open_flag open_flags; /// Access the names in the namelist struct gsd_name_id_map name_map; /// Number of index entries pending in the current frame. uint64_t pending_index_entries; /// Maximum write buffer size (bytes). uint64_t maximum_write_buffer_size; /// Number of index entries to buffer before flushing. uint64_t index_entries_to_buffer; }; /** Specify a version. @param major major version @param minor minor version @return a packed version number aaaa.bbbb suitable for storing in a gsd file version entry. */ uint32_t gsd_make_version(unsigned int major, unsigned int minor); /** Create a GSD file. @param fname File name (UTF-8 encoded). @param application Generating application name (truncated to 63 chars). @param schema Schema name for data to be written in this GSD file (truncated to 63 chars). @param schema_version Version of the scheme data to be written (make with gsd_make_version()). @post Create an empty gsd file in a file of the given name. Overwrite any existing file at that location. The generated gsd file is not opened. Call gsd_open() to open it for writing. @return - GSD_SUCCESS (0) on success. Negative value on failure: - GSD_ERROR_IO: IO error (check errno). */ int gsd_create(const char* fname, const char* application, const char* schema, uint32_t schema_version); /** Create and open a GSD file. @param handle Handle to open. @param fname File name (UTF-8 encoded). @param application Generating application name (truncated to 63 chars). @param schema Schema name for data to be written in this GSD file (truncated to 63 chars). @param schema_version Version of the scheme data to be written (make with gsd_make_version()). @param flags Either GSD_OPEN_READWRITE, or GSD_OPEN_APPEND. @param exclusive_create Set to non-zero to force exclusive creation of the file. @post Create an empty gsd file with the given name. Overwrite any existing file at that location. Open the generated gsd file in *handle*. The file descriptor is closed if there when an error opening the file. @return - GSD_SUCCESS (0) on success. Negative value on failure: - GSD_ERROR_IO: IO error (check errno). - GSD_ERROR_NOT_A_GSD_FILE: Not a GSD file. - GSD_ERROR_INVALID_GSD_FILE_VERSION: Invalid GSD file version. - GSD_ERROR_FILE_CORRUPT: Corrupt file. - GSD_ERROR_MEMORY_ALLOCATION_FAILED: Unable to allocate memory. */ int gsd_create_and_open(struct gsd_handle* handle, const char* fname, const char* application, const char* schema, uint32_t schema_version, enum gsd_open_flag flags, int exclusive_create); /** Open a GSD file. @param handle Handle to open. @param fname File name to open (UTF-8 encoded). @param flags Either GSD_OPEN_READWRITE, GSD_OPEN_READONLY, or GSD_OPEN_APPEND. @pre The file name *fname* is a GSD file. @post Open a GSD file and populates the handle for use by API calls. The file descriptor is closed if there is an error opening the file. @return - GSD_SUCCESS (0) on success. Negative value on failure: - GSD_ERROR_IO: IO error (check errno). - GSD_ERROR_NOT_A_GSD_FILE: Not a GSD file. - GSD_ERROR_INVALID_GSD_FILE_VERSION: Invalid GSD file version. - GSD_ERROR_FILE_CORRUPT: Corrupt file. - GSD_ERROR_MEMORY_ALLOCATION_FAILED: Unable to allocate memory. */ int gsd_open(struct gsd_handle* handle, const char* fname, enum gsd_open_flag flags); /** Truncate a GSD file. @param handle Open GSD file to truncate. After truncating, a file will have no frames and no data chunks. The file size will be that of a newly created gsd file. The application, schema, and schema version metadata will be kept. Truncate does not close and reopen the file, so it is suitable for writing restart files on Lustre file systems without any metadata access. @return - GSD_SUCCESS (0) on success. Negative value on failure: - GSD_ERROR_IO: IO error (check errno). - GSD_ERROR_NOT_A_GSD_FILE: Not a GSD file. - GSD_ERROR_INVALID_GSD_FILE_VERSION: Invalid GSD file version. - GSD_ERROR_FILE_CORRUPT: Corrupt file. - GSD_ERROR_MEMORY_ALLOCATION_FAILED: Unable to allocate memory. */ int gsd_truncate(struct gsd_handle* handle); /** Close a GSD file. @param handle GSD file to close. @pre *handle* was opened by gsd_open(). @post Writable files: All data and index entries buffered before the previous call to gsd_end_frame() is written to the file (see gsd_flush()). @post The file is closed. @post *handle* is freed and can no longer be used. @warning Ensure that all gsd_write_chunk() calls are completed with gsd_end_frame() before closing the file. @return - GSD_SUCCESS (0) on success. Negative value on failure: - GSD_ERROR_IO: IO error (check errno). - GSD_ERROR_INVALID_ARGUMENT: *handle* is NULL. */ int gsd_close(struct gsd_handle* handle); /** Complete the current frame. @param handle Handle to an open GSD file @pre *handle* was opened by gsd_open(). @post The current frame counter is increased by 1. @post Flush the write buffer if it has overflowed. See gsd_flush(). @return - GSD_SUCCESS (0) on success. Negative value on failure: - GSD_ERROR_IO: IO error (check errno). - GSD_ERROR_INVALID_ARGUMENT: *handle* is NULL. - GSD_ERROR_FILE_MUST_BE_WRITABLE: The file was opened read-only. - GSD_ERROR_MEMORY_ALLOCATION_FAILED: Unable to allocate memory. */ int gsd_end_frame(struct gsd_handle* handle); /** Flush the write buffer. @param handle Handle to an open GSD file @pre *handle* was opened by gsd_open(). @post All data buffered by gsd_write_chunk() are present in the file. @post All index entries buffered by gsd_write_chunk() prior to the last call to gsd_end_frame() are present in the file. @return - GSD_SUCCESS (0) on success. Negative value on failure: - GSD_ERROR_IO: IO error (check errno). - GSD_ERROR_INVALID_ARGUMENT: *handle* is NULL. - GSD_ERROR_FILE_MUST_BE_WRITABLE: The file was opened read-only. - GSD_ERROR_MEMORY_ALLOCATION_FAILED: Unable to allocate memory. */ int gsd_flush(struct gsd_handle* handle); /** Add a data chunk to the current frame. @param handle Handle to an open GSD file. @param name Name of the data chunk. @param type type ID that identifies the type of data in *data*. @param N Number of rows in the data. @param M Number of columns in the data. @param flags set to 0, non-zero values reserved for future use. @param data Data buffer. @pre *handle* was opened by gsd_open(). @pre *name* is a unique name for data chunks in the given frame. @pre data is allocated and contains at least `N * M * gsd_sizeof_type(type)` bytes. @post When there is space in the buffer: The given data is present in the write buffer. Otherwise, the data is present at the end of the file. @post The index is present in the buffer. @note If the GSD file is version 1.0, the chunk name is truncated to 63 bytes. GSD version 2.0 files support arbitrarily long names. @note *N* == 0 is allowed. When *N* is 0, *data* may be NULL. @return - GSD_SUCCESS (0) on success. Negative value on failure: - GSD_ERROR_IO: IO error (check errno). - GSD_ERROR_INVALID_ARGUMENT: *handle* is NULL, *N* == 0, *M* == 0, *type* is invalid, or *flags* != 0. - GSD_ERROR_FILE_MUST_BE_WRITABLE: The file was opened read-only. - GSD_ERROR_NAMELIST_FULL: The file cannot store any additional unique chunk names. - GSD_ERROR_MEMORY_ALLOCATION_FAILED: failed to allocate memory. */ int gsd_write_chunk(struct gsd_handle* handle, const char* name, enum gsd_type type, uint64_t N, uint32_t M, uint8_t flags, const void* data); /** Find a chunk in the GSD file. @param handle Handle to an open GSD file @param frame Frame to look for chunk @param name Name of the chunk to find @pre *handle* was opened by gsd_open() in read or readwrite mode. The found entry contains size and type metadata and can be passed to gsd_read_chunk() to read the data. @return A pointer to the found chunk, or NULL if not found. @note gsd_find_chunk() calls gsd_flush() when the file is writable. */ const struct gsd_index_entry* gsd_find_chunk(struct gsd_handle* handle, uint64_t frame, const char* name); /** Read a chunk from the GSD file. @param handle Handle to an open GSD file. @param data Data buffer to read into. @param chunk Chunk to read. @pre *handle* was opened in read or readwrite mode. @pre *chunk* was found by gsd_find_chunk(). @pre *data* points to an allocated buffer with at least `N * M * gsd_sizeof_type(type)` bytes. @return - GSD_SUCCESS (0) on success. Negative value on failure: - GSD_ERROR_IO: IO error (check errno). - GSD_ERROR_INVALID_ARGUMENT: *handle* is NULL, *data* is NULL, or *chunk* is NULL. - GSD_ERROR_FILE_MUST_BE_READABLE: The file was opened in append mode. - GSD_ERROR_FILE_CORRUPT: The GSD file is corrupt. @note gsd_read_chunk() calls gsd_flush() when the file is writable. */ int gsd_read_chunk(struct gsd_handle* handle, void* data, const struct gsd_index_entry* chunk); /** Get the number of frames in the GSD file. @param handle Handle to an open GSD file @pre *handle* was opened by gsd_open(). @return The number of frames in the file, or 0 on error. */ uint64_t gsd_get_nframes(struct gsd_handle* handle); /** Query size of a GSD type ID. @param type Type ID to query. @return Size of the given type in bytes, or 0 for an unknown type ID. */ size_t gsd_sizeof_type(enum gsd_type type); /** Search for chunk names in a gsd file. @param handle Handle to an open GSD file. @param match String to match. @param prev Search starting point. @pre *handle* was opened by gsd_open() @pre *prev* was returned by a previous call to gsd_find_matching_chunk_name() To find the first matching chunk name, pass NULL for prev. Pass in the previous found string to find the next after that, and so on. Chunk names match if they begin with the string in *match*. Chunk names returned by this function may be present in at least one frame. @return Pointer to a string, NULL if no more matching chunks are found found, or NULL if *prev* is invalid @note gsd_find_matching_chunk_name() calls gsd_flush() when the file is writable. */ const char* gsd_find_matching_chunk_name(struct gsd_handle* handle, const char* match, const char* prev); /** Upgrade a GSD file to the latest specification. @param handle Handle to an open GSD file @pre *handle* was opened by gsd_open() with a writable mode. @pre There are no pending data to write to the file in gsd_end_frame() @return - GSD_SUCCESS (0) on success. Negative value on failure: - GSD_ERROR_IO: IO error (check errno). - GSD_ERROR_INVALID_ARGUMENT: *handle* is NULL - GSD_ERROR_FILE_MUST_BE_WRITABLE: The file was opened in read-only mode. */ int gsd_upgrade(struct gsd_handle* handle); /** Get the maximum write buffer size. @param handle Handle to an open GSD file @pre *handle* was opened by gsd_open(). @return The maximum write buffer size in bytes, or 0 on error. */ uint64_t gsd_get_maximum_write_buffer_size(struct gsd_handle* handle); /** Set the maximum write buffer size. @param handle Handle to an open GSD file @param size Maximum number of bytes to allocate in the write buffer (must be greater than 0). @pre *handle* was opened by gsd_open(). @return - GSD_SUCCESS (0) on success. Negative value on failure: - GSD_ERROR_INVALID_ARGUMENT: *handle* is NULL - GSD_ERROR_INVALID_ARGUMENT: size == 0 */ int gsd_set_maximum_write_buffer_size(struct gsd_handle* handle, uint64_t size); /** Get the number of index entries to buffer. @param handle Handle to an open GSD file @pre *handle* was opened by gsd_open(). @return The number of index entries to buffer, or 0 on error. */ uint64_t gsd_get_index_entries_to_buffer(struct gsd_handle* handle); /** Set the number of index entries to buffer. @param handle Handle to an open GSD file @param number Number of index entries to buffer before automatically flushing in `gsd_end_frame()` (must be greater than 0). @pre *handle* was opened by gsd_open(). @note GSD may allocate more than this number of entries in the buffer, as needed to store all index entries for the already buffered frames and the current frame. @return - GSD_SUCCESS (0) on success. Negative value on failure: - GSD_ERROR_INVALID_ARGUMENT: *handle* is NULL - GSD_ERROR_INVALID_ARGUMENT: number == 0 */ int gsd_set_index_entries_to_buffer(struct gsd_handle* handle, uint64_t number); #ifdef __cplusplus } #endif #endif // #ifndef GSD_H gsd-3.3.0/gsd/hoomd.py000066400000000000000000001305101462564674300145500ustar00rootroot00000000000000# Copyright (c) 2016-2024 The Regents of the University of Michigan # Part of GSD, released under the BSD 2-Clause License. """Read and write HOOMD schema GSD files. :py:mod:`gsd.hoomd` reads and writes GSD files with the ``hoomd`` schema. * `HOOMDTrajectory` - Read and write hoomd schema GSD files. * `Frame` - Store the state of a single frame. * `ConfigurationData` - Store configuration data in a frame. * `ParticleData` - Store particle data in a frame. * `BondData` - Store topology data in a frame. * `open` - Open a hoomd schema GSD file. * `read_log` - Read log from a hoomd schema GSD file into a dict of time-series arrays. See Also: See :ref:`hoomd-examples` for full examples. """ import copy import json import logging import warnings from collections import OrderedDict import numpy try: import gsd except ImportError: gsd = None fl_imported = True try: import gsd.fl except ImportError: fl_imported = False logger = logging.getLogger('gsd.hoomd') class ConfigurationData: """Store configuration data. Use the `Frame.configuration` attribute of a to access the configuration. Attributes: step (int): Time step of this frame (:chunk:`configuration/step`). dimensions (int): Number of dimensions (:chunk:`configuration/dimensions`). When not set explicitly, dimensions will default to different values based on the value of :math:`L_z` in `box`. When :math:`L_z = 0` dimensions will default to 2, otherwise 3. User set values always take precedence. """ _default_value = OrderedDict() _default_value['step'] = numpy.uint64(0) _default_value['dimensions'] = numpy.uint8(3) _default_value['box'] = numpy.array([1, 1, 1, 0, 0, 0], dtype=numpy.float32) def __init__(self): self.step = None self.dimensions = None self._box = None @property def box(self): """((6, 1) `numpy.ndarray` of ``numpy.float32``): Box dimensions. [lx, ly, lz, xy, xz, yz]. See :chunk:`configuration/box`. """ return self._box @box.setter def box(self, box): self._box = box try: Lz = box[2] except TypeError: return else: if self.dimensions is None: self.dimensions = 2 if Lz == 0 else 3 def validate(self): """Validate all attributes. Convert every array attribute to a `numpy.ndarray` of the proper type and check that all attributes have the correct dimensions. Ignore any attributes that are ``None``. Warning: Array attributes that are not contiguous numpy arrays will be replaced with contiguous numpy arrays of the appropriate type. """ logger.debug('Validating ConfigurationData') if self.box is not None: self.box = numpy.ascontiguousarray(self.box, dtype=numpy.float32) self.box = self.box.reshape([6]) class ParticleData: """Store particle data chunks. Use the `Frame.particles` attribute of a to access the particles. Instances resulting from file read operations will always store array quantities in `numpy.ndarray` objects of the defined types. User created frames may provide input data that can be converted to a `numpy.ndarray`. See Also: `hoomd.State` for a full description of how HOOMD interprets this data. Attributes: N (int): Number of particles in the frame (:chunk:`particles/N`). types (tuple[str]): Names of the particle types (:chunk:`particles/types`). position ((*N*, 3) `numpy.ndarray` of ``numpy.float32``): Particle position (:chunk:`particles/position`). orientation ((*N*, 4) `numpy.ndarray` of ``numpy.float32``): Particle orientation. (:chunk:`particles/orientation`). typeid ((*N*, ) `numpy.ndarray` of ``numpy.uint32``): Particle type id (:chunk:`particles/typeid`). mass ((*N*, ) `numpy.ndarray` of ``numpy.float32``): Particle mass (:chunk:`particles/mass`). charge ((*N*, ) `numpy.ndarray` of ``numpy.float32``): Particle charge (:chunk:`particles/charge`). diameter ((*N*, ) `numpy.ndarray` of ``numpy.float32``): Particle diameter (:chunk:`particles/diameter`). body ((*N*, ) `numpy.ndarray` of ``numpy.int32``): Particle body (:chunk:`particles/body`). moment_inertia ((*N*, 3) `numpy.ndarray` of ``numpy.float32``): Particle moment of inertia (:chunk:`particles/moment_inertia`). velocity ((*N*, 3) `numpy.ndarray` of ``numpy.float32``): Particle velocity (:chunk:`particles/velocity`). angmom ((*N*, 4) `numpy.ndarray` of ``numpy.float32``): Particle angular momentum (:chunk:`particles/angmom`). image ((*N*, 3) `numpy.ndarray` of ``numpy.int32``): Particle image (:chunk:`particles/image`). type_shapes (tuple[dict]): Shape specifications for visualizing particle types (:chunk:`particles/type_shapes`). """ _default_value = OrderedDict() _default_value['N'] = numpy.uint32(0) _default_value['types'] = ['A'] _default_value['typeid'] = numpy.uint32(0) _default_value['mass'] = numpy.float32(1.0) _default_value['charge'] = numpy.float32(0) _default_value['diameter'] = numpy.float32(1.0) _default_value['body'] = numpy.int32(-1) _default_value['moment_inertia'] = numpy.array([0, 0, 0], dtype=numpy.float32) _default_value['position'] = numpy.array([0, 0, 0], dtype=numpy.float32) _default_value['orientation'] = numpy.array([1, 0, 0, 0], dtype=numpy.float32) _default_value['velocity'] = numpy.array([0, 0, 0], dtype=numpy.float32) _default_value['angmom'] = numpy.array([0, 0, 0, 0], dtype=numpy.float32) _default_value['image'] = numpy.array([0, 0, 0], dtype=numpy.int32) _default_value['type_shapes'] = [{}] def __init__(self): self.N = 0 self.position = None self.orientation = None self.types = None self.typeid = None self.mass = None self.charge = None self.diameter = None self.body = None self.moment_inertia = None self.velocity = None self.angmom = None self.image = None self.type_shapes = None def validate(self): """Validate all attributes. Convert every array attribute to a `numpy.ndarray` of the proper type and check that all attributes have the correct dimensions. Ignore any attributes that are ``None``. Warning: Array attributes that are not contiguous numpy arrays will be replaced with contiguous numpy arrays of the appropriate type. """ logger.debug('Validating ParticleData') if self.position is not None: self.position = numpy.ascontiguousarray(self.position, dtype=numpy.float32) self.position = self.position.reshape([self.N, 3]) if self.orientation is not None: self.orientation = numpy.ascontiguousarray( self.orientation, dtype=numpy.float32 ) self.orientation = self.orientation.reshape([self.N, 4]) if self.typeid is not None: self.typeid = numpy.ascontiguousarray(self.typeid, dtype=numpy.uint32) self.typeid = self.typeid.reshape([self.N]) if self.mass is not None: self.mass = numpy.ascontiguousarray(self.mass, dtype=numpy.float32) self.mass = self.mass.reshape([self.N]) if self.charge is not None: self.charge = numpy.ascontiguousarray(self.charge, dtype=numpy.float32) self.charge = self.charge.reshape([self.N]) if self.diameter is not None: self.diameter = numpy.ascontiguousarray(self.diameter, dtype=numpy.float32) self.diameter = self.diameter.reshape([self.N]) if self.body is not None: self.body = numpy.ascontiguousarray(self.body, dtype=numpy.int32) self.body = self.body.reshape([self.N]) if self.moment_inertia is not None: self.moment_inertia = numpy.ascontiguousarray( self.moment_inertia, dtype=numpy.float32 ) self.moment_inertia = self.moment_inertia.reshape([self.N, 3]) if self.velocity is not None: self.velocity = numpy.ascontiguousarray(self.velocity, dtype=numpy.float32) self.velocity = self.velocity.reshape([self.N, 3]) if self.angmom is not None: self.angmom = numpy.ascontiguousarray(self.angmom, dtype=numpy.float32) self.angmom = self.angmom.reshape([self.N, 4]) if self.image is not None: self.image = numpy.ascontiguousarray(self.image, dtype=numpy.int32) self.image = self.image.reshape([self.N, 3]) if self.types is not None and (not len(set(self.types)) == len(self.types)): msg = 'Type names must be unique.' raise ValueError(msg) class BondData: """Store bond data chunks. Use the `Frame.bonds`, `Frame.angles`, `Frame.dihedrals`, `Frame.impropers`, and `Frame.pairs` attributes to access the bond topology. Instances resulting from file read operations will always store array quantities in `numpy.ndarray` objects of the defined types. User created frames may provide input data that can be converted to a `numpy.ndarray`. See Also: `hoomd.State` for a full description of how HOOMD interprets this data. Note: *M* varies depending on the type of bond. `BondData` represents all types of topology connections. ======== === Type *M* ======== === Bond 2 Angle 3 Dihedral 4 Improper 4 Pair 2 ======== === Attributes: N (int): Number of bonds/angles/dihedrals/impropers/pairs in the frame (:chunk:`bonds/N`, :chunk:`angles/N`, :chunk:`dihedrals/N`, :chunk:`impropers/N`, :chunk:`pairs/N`). types (list[str]): Names of the particle types (:chunk:`bonds/types`, :chunk:`angles/types`, :chunk:`dihedrals/types`, :chunk:`impropers/types`, :chunk:`pairs/types`). typeid ((*N*,) `numpy.ndarray` of ``numpy.uint32``): Bond type id (:chunk:`bonds/typeid`, :chunk:`angles/typeid`, :chunk:`dihedrals/typeid`, :chunk:`impropers/typeid`, :chunk:`pairs/types`). group ((*N*, *M*) `numpy.ndarray` of ``numpy.uint32``): Tags of the particles in the bond (:chunk:`bonds/group`, :chunk:`angles/group`, :chunk:`dihedrals/group`, :chunk:`impropers/group`, :chunk:`pairs/group`). """ def __init__(self, M): self.M = M self.N = 0 self.types = None self.typeid = None self.group = None self._default_value = OrderedDict() self._default_value['N'] = numpy.uint32(0) self._default_value['types'] = [] self._default_value['typeid'] = numpy.uint32(0) self._default_value['group'] = numpy.array([0] * M, dtype=numpy.int32) def validate(self): """Validate all attributes. Convert every array attribute to a `numpy.ndarray` of the proper type and check that all attributes have the correct dimensions. Ignore any attributes that are ``None``. Warning: Array attributes that are not contiguous numpy arrays will be replaced with contiguous numpy arrays of the appropriate type. """ logger.debug('Validating BondData') if self.typeid is not None: self.typeid = numpy.ascontiguousarray(self.typeid, dtype=numpy.uint32) self.typeid = self.typeid.reshape([self.N]) if self.group is not None: self.group = numpy.ascontiguousarray(self.group, dtype=numpy.int32) self.group = self.group.reshape([self.N, self.M]) if self.types is not None and (not len(set(self.types)) == len(self.types)): msg = 'Type names must be unique.' raise ValueError(msg) class ConstraintData: """Store constraint data. Use the `Frame.constraints` attribute to access the constraints. Instances resulting from file read operations will always store array quantities in `numpy.ndarray` objects of the defined types. User created frames may provide input data that can be converted to a `numpy.ndarray`. See Also: `hoomd.State` for a full description of how HOOMD interprets this data. Attributes: N (int): Number of constraints in the frame (:chunk:`constraints/N`). value ((*N*, ) `numpy.ndarray` of ``numpy.float32``): Constraint length (:chunk:`constraints/value`). group ((*N*, *2*) `numpy.ndarray` of ``numpy.uint32``): Tags of the particles in the constraint (:chunk:`constraints/group`). """ def __init__(self): self.M = 2 self.N = 0 self.value = None self.group = None self._default_value = OrderedDict() self._default_value['N'] = numpy.uint32(0) self._default_value['value'] = numpy.float32(0) self._default_value['group'] = numpy.array([0] * self.M, dtype=numpy.int32) def validate(self): """Validate all attributes. Convert every array attribute to a `numpy.ndarray` of the proper type and check that all attributes have the correct dimensions. Ignore any attributes that are ``None``. Warning: Array attributes that are not contiguous numpy arrays will be replaced with contiguous numpy arrays of the appropriate type. """ logger.debug('Validating ConstraintData') if self.value is not None: self.value = numpy.ascontiguousarray(self.value, dtype=numpy.float32) self.value = self.value.reshape([self.N]) if self.group is not None: self.group = numpy.ascontiguousarray(self.group, dtype=numpy.int32) self.group = self.group.reshape([self.N, self.M]) class Frame: """System state at one point in time. Attributes: configuration (`ConfigurationData`): Configuration data. particles (`ParticleData`): Particles. bonds (`BondData`): Bonds. angles (`BondData`): Angles. dihedrals (`BondData`): Dihedrals. impropers (`BondData`): Impropers. pairs (`BondData`): Special pair. constraints (`ConstraintData`): Distance constraints. state (dict): State data. log (dict): Logged data (values must be `numpy.ndarray` or `array_like`) """ def __init__(self): self.configuration = ConfigurationData() self.particles = ParticleData() self.bonds = BondData(2) self.angles = BondData(3) self.dihedrals = BondData(4) self.impropers = BondData(4) self.constraints = ConstraintData() self.pairs = BondData(2) self.state = {} self.log = {} self._valid_state = [ 'hpmc/integrate/d', 'hpmc/integrate/a', 'hpmc/sphere/radius', 'hpmc/sphere/orientable', 'hpmc/ellipsoid/a', 'hpmc/ellipsoid/b', 'hpmc/ellipsoid/c', 'hpmc/convex_polyhedron/N', 'hpmc/convex_polyhedron/vertices', 'hpmc/convex_spheropolyhedron/N', 'hpmc/convex_spheropolyhedron/vertices', 'hpmc/convex_spheropolyhedron/sweep_radius', 'hpmc/convex_polygon/N', 'hpmc/convex_polygon/vertices', 'hpmc/convex_spheropolygon/N', 'hpmc/convex_spheropolygon/vertices', 'hpmc/convex_spheropolygon/sweep_radius', 'hpmc/simple_polygon/N', 'hpmc/simple_polygon/vertices', ] def validate(self): """Validate all contained frame data.""" logger.debug('Validating Frame') self.configuration.validate() self.particles.validate() self.bonds.validate() self.angles.validate() self.dihedrals.validate() self.impropers.validate() self.constraints.validate() self.pairs.validate() # validate HPMC state if self.particles.types is not None: NT = len(self.particles.types) else: NT = 1 if 'hpmc/integrate/d' in self.state: self.state['hpmc/integrate/d'] = numpy.ascontiguousarray( self.state['hpmc/integrate/d'], dtype=numpy.float64 ) self.state['hpmc/integrate/d'] = self.state['hpmc/integrate/d'].reshape([1]) if 'hpmc/integrate/a' in self.state: self.state['hpmc/integrate/a'] = numpy.ascontiguousarray( self.state['hpmc/integrate/a'], dtype=numpy.float64 ) self.state['hpmc/integrate/a'] = self.state['hpmc/integrate/a'].reshape([1]) if 'hpmc/sphere/radius' in self.state: self.state['hpmc/sphere/radius'] = numpy.ascontiguousarray( self.state['hpmc/sphere/radius'], dtype=numpy.float32 ) self.state['hpmc/sphere/radius'] = self.state['hpmc/sphere/radius'].reshape( [NT] ) if 'hpmc/sphere/orientable' in self.state: self.state['hpmc/sphere/orientable'] = numpy.ascontiguousarray( self.state['hpmc/sphere/orientable'], dtype=numpy.uint8 ) self.state['hpmc/sphere/orientable'] = self.state[ 'hpmc/sphere/orientable' ].reshape([NT]) if 'hpmc/ellipsoid/a' in self.state: self.state['hpmc/ellipsoid/a'] = numpy.ascontiguousarray( self.state['hpmc/ellipsoid/a'], dtype=numpy.float32 ) self.state['hpmc/ellipsoid/a'] = self.state['hpmc/ellipsoid/a'].reshape( [NT] ) self.state['hpmc/ellipsoid/b'] = numpy.ascontiguousarray( self.state['hpmc/ellipsoid/b'], dtype=numpy.float32 ) self.state['hpmc/ellipsoid/b'] = self.state['hpmc/ellipsoid/b'].reshape( [NT] ) self.state['hpmc/ellipsoid/c'] = numpy.ascontiguousarray( self.state['hpmc/ellipsoid/c'], dtype=numpy.float32 ) self.state['hpmc/ellipsoid/c'] = self.state['hpmc/ellipsoid/c'].reshape( [NT] ) if 'hpmc/convex_polyhedron/N' in self.state: self.state['hpmc/convex_polyhedron/N'] = numpy.ascontiguousarray( self.state['hpmc/convex_polyhedron/N'], dtype=numpy.uint32 ) self.state['hpmc/convex_polyhedron/N'] = self.state[ 'hpmc/convex_polyhedron/N' ].reshape([NT]) sumN = numpy.sum(self.state['hpmc/convex_polyhedron/N']) self.state['hpmc/convex_polyhedron/vertices'] = numpy.ascontiguousarray( self.state['hpmc/convex_polyhedron/vertices'], dtype=numpy.float32 ) self.state['hpmc/convex_polyhedron/vertices'] = self.state[ 'hpmc/convex_polyhedron/vertices' ].reshape([sumN, 3]) if 'hpmc/convex_spheropolyhedron/N' in self.state: self.state['hpmc/convex_spheropolyhedron/N'] = numpy.ascontiguousarray( self.state['hpmc/convex_spheropolyhedron/N'], dtype=numpy.uint32 ) self.state['hpmc/convex_spheropolyhedron/N'] = self.state[ 'hpmc/convex_spheropolyhedron/N' ].reshape([NT]) sumN = numpy.sum(self.state['hpmc/convex_spheropolyhedron/N']) self.state['hpmc/convex_spheropolyhedron/sweep_radius'] = ( numpy.ascontiguousarray( self.state['hpmc/convex_spheropolyhedron/sweep_radius'], dtype=numpy.float32, ) ) self.state['hpmc/convex_spheropolyhedron/sweep_radius'] = self.state[ 'hpmc/convex_spheropolyhedron/sweep_radius' ].reshape([NT]) self.state['hpmc/convex_spheropolyhedron/vertices'] = ( numpy.ascontiguousarray( self.state['hpmc/convex_spheropolyhedron/vertices'], dtype=numpy.float32, ) ) self.state['hpmc/convex_spheropolyhedron/vertices'] = self.state[ 'hpmc/convex_spheropolyhedron/vertices' ].reshape([sumN, 3]) if 'hpmc/convex_polygon/N' in self.state: self.state['hpmc/convex_polygon/N'] = numpy.ascontiguousarray( self.state['hpmc/convex_polygon/N'], dtype=numpy.uint32 ) self.state['hpmc/convex_polygon/N'] = self.state[ 'hpmc/convex_polygon/N' ].reshape([NT]) sumN = numpy.sum(self.state['hpmc/convex_polygon/N']) self.state['hpmc/convex_polygon/vertices'] = numpy.ascontiguousarray( self.state['hpmc/convex_polygon/vertices'], dtype=numpy.float32 ) self.state['hpmc/convex_polygon/vertices'] = self.state[ 'hpmc/convex_polygon/vertices' ].reshape([sumN, 2]) if 'hpmc/convex_spheropolygon/N' in self.state: self.state['hpmc/convex_spheropolygon/N'] = numpy.ascontiguousarray( self.state['hpmc/convex_spheropolygon/N'], dtype=numpy.uint32 ) self.state['hpmc/convex_spheropolygon/N'] = self.state[ 'hpmc/convex_spheropolygon/N' ].reshape([NT]) sumN = numpy.sum(self.state['hpmc/convex_spheropolygon/N']) self.state['hpmc/convex_spheropolygon/sweep_radius'] = ( numpy.ascontiguousarray( self.state['hpmc/convex_spheropolygon/sweep_radius'], dtype=numpy.float32, ) ) self.state['hpmc/convex_spheropolygon/sweep_radius'] = self.state[ 'hpmc/convex_spheropolygon/sweep_radius' ].reshape([NT]) self.state['hpmc/convex_spheropolygon/vertices'] = numpy.ascontiguousarray( self.state['hpmc/convex_spheropolygon/vertices'], dtype=numpy.float32 ) self.state['hpmc/convex_spheropolygon/vertices'] = self.state[ 'hpmc/convex_spheropolygon/vertices' ].reshape([sumN, 2]) if 'hpmc/simple_polygon/N' in self.state: self.state['hpmc/simple_polygon/N'] = numpy.ascontiguousarray( self.state['hpmc/simple_polygon/N'], dtype=numpy.uint32 ) self.state['hpmc/simple_polygon/N'] = self.state[ 'hpmc/simple_polygon/N' ].reshape([NT]) sumN = numpy.sum(self.state['hpmc/simple_polygon/N']) self.state['hpmc/simple_polygon/vertices'] = numpy.ascontiguousarray( self.state['hpmc/simple_polygon/vertices'], dtype=numpy.float32 ) self.state['hpmc/simple_polygon/vertices'] = self.state[ 'hpmc/simple_polygon/vertices' ].reshape([sumN, 2]) for k in self.state: if k not in self._valid_state: raise RuntimeError('Not a valid state: ' + k) class _HOOMDTrajectoryIterable: """Iterable over a HOOMDTrajectory object.""" def __init__(self, trajectory, indices): self._trajectory = trajectory self._indices = indices self._indices_iterator = iter(indices) def __next__(self): return self._trajectory[next(self._indices_iterator)] next = __next__ # Python 2.7 compatibility def __iter__(self): return type(self)(self._trajectory, self._indices) def __len__(self): return len(self._indices) class _HOOMDTrajectoryView: """A view of a HOOMDTrajectory object. Enables the slicing and iteration over a subset of a trajectory instance. """ def __init__(self, trajectory, indices): self._trajectory = trajectory self._indices = indices def __iter__(self): return _HOOMDTrajectoryIterable(self._trajectory, self._indices) def __len__(self): return len(self._indices) def __getitem__(self, key): if isinstance(key, slice): return type(self)(self._trajectory, self._indices[key]) return self._trajectory[self._indices[key]] class HOOMDTrajectory: """Read and write hoomd gsd files. Args: file (`gsd.fl.GSDFile`): File to access. Open hoomd GSD files with `open`. """ def __init__(self, file): if file.mode == 'ab': msg = 'Append mode not yet supported' raise ValueError(msg) self._file = file self._initial_frame = None # Used to cache positive results when chunks exist in frame 0. self._chunk_exists_frame_0 = {} logger.info('opening HOOMDTrajectory: ' + str(self.file)) if self.file.schema != 'hoomd': raise RuntimeError('GSD file is not a hoomd schema file: ' + str(self.file)) valid = False version = self.file.schema_version if version < (2, 0) and version >= (1, 0): valid = True if not valid: raise RuntimeError( 'Incompatible hoomd schema version ' + str(version) + ' in: ' + str(self.file) ) logger.info('found ' + str(len(self)) + ' frames') @property def file(self): """:class:`gsd.fl.GSDFile`: The file handle.""" return self._file def __len__(self): """The number of frames in the trajectory.""" return self.file.nframes def append(self, frame): """Append a frame to a hoomd gsd file. Args: frame (:py:class:`Frame`): Frame to append. Write the given frame to the file at the current frame and increase the frame counter. Do not write any fields that are ``None``. For all non-``None`` fields, scan them and see if they match the initial frame or the default value. If the given data differs, write it out to the frame. If it is the same, do not write it out as it can be instantiated either from the value at the initial frame or the default value. """ logger.debug('Appending frame to hoomd trajectory: ' + str(self.file)) frame.validate() # want the initial frame specified as a reference to detect if chunks # need to be written if self._initial_frame is None and len(self) > 0: self._read_frame(0) for path in [ 'configuration', 'particles', 'bonds', 'angles', 'dihedrals', 'impropers', 'constraints', 'pairs', ]: container = getattr(frame, path) for name in container._default_value: if self._should_write(path, name, frame): logger.debug('writing data chunk: ' + path + '/' + name) data = getattr(container, name) if name == 'N': data = numpy.array([data], dtype=numpy.uint32) if name == 'step': data = numpy.array([data], dtype=numpy.uint64) if name == 'dimensions': data = numpy.array([data], dtype=numpy.uint8) if name in ('types', 'type_shapes'): if name == 'type_shapes': data = [json.dumps(shape_dict) for shape_dict in data] wid = max(len(w) for w in data) + 1 b = numpy.array(data, dtype=numpy.dtype((bytes, wid))) data = b.view(dtype=numpy.int8).reshape(len(b), wid) self.file.write_chunk(path + '/' + name, data) # write state data for state, data in frame.state.items(): self.file.write_chunk('state/' + state, data) # write log data for log, data in frame.log.items(): self.file.write_chunk('log/' + log, data) self.file.end_frame() def truncate(self): """Remove all frames from the file.""" self.file.truncate() self._initial_frame = None def close(self): """Close the file.""" self.file.close() del self._initial_frame def _should_write(self, path, name, frame): """Test if we should write a given data chunk. Args: path (str): Path part of the data chunk. name (str): Name part of the data chunk. frame (:py:class:`Frame`): Frame data is from. Returns: False if the data matches that in the initial frame. False if the data matches all default values. True otherwise. """ container = getattr(frame, path) data = getattr(container, name) if data is None: return False if self._initial_frame is not None: initial_container = getattr(self._initial_frame, path) initial_data = getattr(initial_container, name) if numpy.array_equal(initial_data, data): logger.debug( 'skipping data chunk, matches frame 0: ' + path + '/' + name ) return False matches_default_value = False if name == 'types': matches_default_value = data == container._default_value[name] else: matches_default_value = numpy.array_equiv( data, container._default_value[name] ) if matches_default_value and not self._chunk_exists_frame_0.get( path + '/' + name, False ): logger.debug('skipping data chunk, default value: ' + path + '/' + name) return False return True def extend(self, iterable): """Append each item of the iterable to the file. Args: iterable: An iterable object the provides :py:class:`Frame` instances. This could be another HOOMDTrajectory, a generator that modifies frames, or a list of frames. """ for item in iterable: self.append(item) def _read_frame(self, idx): """Read the frame at the given index from the file. Args: idx (int): Frame index to read. Returns: `Frame` with the frame data Replace any data chunks not present in the given frame with either data from frame 0, or initialize from default values if not in frame 0. Cache frame 0 data to avoid file read overhead. Return any default data as non-writeable numpy arrays. """ if idx >= len(self): raise IndexError logger.debug('reading frame ' + str(idx) + ' from: ' + str(self.file)) if self._initial_frame is None and idx != 0: self._read_frame(0) frame = Frame() # read configuration first if self.file.chunk_exists(frame=idx, name='configuration/step'): step_arr = self.file.read_chunk(frame=idx, name='configuration/step') frame.configuration.step = step_arr[0] if idx == 0: self._chunk_exists_frame_0['configuration/step'] = True elif self._initial_frame is not None: frame.configuration.step = self._initial_frame.configuration.step else: frame.configuration.step = frame.configuration._default_value['step'] if self.file.chunk_exists(frame=idx, name='configuration/dimensions'): dimensions_arr = self.file.read_chunk( frame=idx, name='configuration/dimensions' ) frame.configuration.dimensions = dimensions_arr[0] if idx == 0: self._chunk_exists_frame_0['configuration/dimensions'] = True elif self._initial_frame is not None: frame.configuration.dimensions = ( self._initial_frame.configuration.dimensions ) else: frame.configuration.dimensions = frame.configuration._default_value[ 'dimensions' ] if self.file.chunk_exists(frame=idx, name='configuration/box'): frame.configuration.box = self.file.read_chunk( frame=idx, name='configuration/box' ) if idx == 0: self._chunk_exists_frame_0['configuration/box'] = True elif self._initial_frame is not None: frame.configuration.box = copy.copy(self._initial_frame.configuration.box) else: frame.configuration.box = copy.copy( frame.configuration._default_value['box'] ) # then read all groups that have N, types, etc... for path in [ 'particles', 'bonds', 'angles', 'dihedrals', 'impropers', 'constraints', 'pairs', ]: container = getattr(frame, path) if self._initial_frame is not None: initial_frame_container = getattr(self._initial_frame, path) container.N = 0 if self.file.chunk_exists(frame=idx, name=path + '/N'): N_arr = self.file.read_chunk(frame=idx, name=path + '/N') container.N = N_arr[0] if idx == 0: self._chunk_exists_frame_0[path + '/N'] = True elif self._initial_frame is not None: container.N = initial_frame_container.N # type names if 'types' in container._default_value: if self.file.chunk_exists(frame=idx, name=path + '/types'): tmp = self.file.read_chunk(frame=idx, name=path + '/types') tmp = tmp.view(dtype=numpy.dtype((bytes, tmp.shape[1]))) tmp = tmp.reshape([tmp.shape[0]]) container.types = list(a.decode('UTF-8') for a in tmp) if idx == 0: self._chunk_exists_frame_0[path + '/types'] = True elif self._initial_frame is not None: container.types = copy.copy(initial_frame_container.types) else: container.types = copy.copy(container._default_value['types']) # type shapes if 'type_shapes' in container._default_value and path == 'particles': if self.file.chunk_exists(frame=idx, name=path + '/type_shapes'): tmp = self.file.read_chunk(frame=idx, name=path + '/type_shapes') tmp = tmp.view(dtype=numpy.dtype((bytes, tmp.shape[1]))) tmp = tmp.reshape([tmp.shape[0]]) container.type_shapes = list( json.loads(json_string.decode('UTF-8')) for json_string in tmp ) if idx == 0: self._chunk_exists_frame_0[path + '/type_shapes'] = True elif self._initial_frame is not None: container.type_shapes = copy.copy( initial_frame_container.type_shapes ) else: container.type_shapes = copy.copy( container._default_value['type_shapes'] ) for name in container._default_value: if name in ('N', 'types', 'type_shapes'): continue # per particle/bond quantities if self.file.chunk_exists(frame=idx, name=path + '/' + name): container.__dict__[name] = self.file.read_chunk( frame=idx, name=path + '/' + name ) if idx == 0: self._chunk_exists_frame_0[path + '/' + name] = True else: if ( self._initial_frame is not None and initial_frame_container.N == container.N ): # read default from initial frame container.__dict__[name] = initial_frame_container.__dict__[ name ] else: # initialize from default value tmp = numpy.array([container._default_value[name]]) s = list(tmp.shape) s[0] = container.N container.__dict__[name] = numpy.empty(shape=s, dtype=tmp.dtype) container.__dict__[name][:] = tmp container.__dict__[name].flags.writeable = False # read state data for state in frame._valid_state: if self.file.chunk_exists(frame=idx, name='state/' + state): frame.state[state] = self.file.read_chunk( frame=idx, name='state/' + state ) # read log data logged_data_names = self.file.find_matching_chunk_names('log/') for log in logged_data_names: if self.file.chunk_exists(frame=idx, name=log): frame.log[log[4:]] = self.file.read_chunk(frame=idx, name=log) if idx == 0: self._chunk_exists_frame_0[log] = True elif self._initial_frame is not None: frame.log[log[4:]] = self._initial_frame.log[log[4:]] frame.log[log[4:]].flags.writeable = False # store initial frame if self._initial_frame is None and idx == 0: self._initial_frame = copy.deepcopy(frame) return frame def __getitem__(self, key): """Index trajectory frames. The index can be a positive integer, negative integer, or slice and is interpreted the same as `list` indexing. Warning: As you loop over frames, each frame is read from the file when it is reached in the iteration. Multiple passes may lead to multiple disk reads if the file does not fit in cache. """ if isinstance(key, slice): return _HOOMDTrajectoryView(self, range(*key.indices(len(self)))) if isinstance(key, int): if key < 0: key += len(self) if key >= len(self) or key < 0: raise IndexError() return self._read_frame(key) raise TypeError def __iter__(self): """Iterate over frames in the trajectory.""" return _HOOMDTrajectoryIterable(self, range(len(self))) def __enter__(self): """Enter the context manager.""" return self def __exit__(self, exc_type, exc_value, traceback): """Close the file when the context manager exits.""" self.file.close() def flush(self): """Flush all buffered frames to the file.""" self._file.flush() def open(name, mode='r'): # noqa: A001 - allow shadowing builtin open """Open a hoomd schema GSD file. The return value of `open` can be used as a context manager. Args: name (str): File name to open. mode (str): File open mode. Returns: `HOOMDTrajectory` instance that accesses the file **name** with the given **mode**. Valid values for ``mode``: +------------------+---------------------------------------------+ | mode | description | +==================+=============================================+ | ``'r'`` | Open an existing file for reading. | +------------------+---------------------------------------------+ | ``'r+'`` | Open an existing file for reading and | | | writing. | +------------------+---------------------------------------------+ | ``'w'`` | Open a file for reading and writing. | | | Creates the file if needed, or overwrites | | | an existing file. | +------------------+---------------------------------------------+ | ``'x'`` | Create a gsd file exclusively and opens it | | | for reading and writing. | | | Raise :py:exc:`FileExistsError` | | | if it already exists. | +------------------+---------------------------------------------+ | ``'a'`` | Open a file for reading and writing. | | | Creates the file if it doesn't exist. | +------------------+---------------------------------------------+ """ if not fl_imported: msg = 'file layer module is not available' raise RuntimeError(msg) if gsd is None: msg = 'gsd module is not available' raise RuntimeError(msg) gsdfileobj = gsd.fl.open( name=str(name), mode=mode, application='gsd.hoomd ' + gsd.version.version, schema='hoomd', schema_version=[1, 4], ) return HOOMDTrajectory(gsdfileobj) def read_log(name, scalar_only=False): """Read log from a hoomd schema GSD file into a dict of time-series arrays. Args: name (str): File name to open. scalar_only (bool): Set to `True` to include only scalar log values. The log data includes :chunk:`configuration/step` and all matching :chunk:`log/user_defined`, :chunk:`log/bonds/user_defined`, and :chunk:`log/particles/user_defined` quantities in the file. Returns: `dict` Note: `read_log` issues a `RuntimeWarning` when there are no matching ``log/`` quantities in the file. Caution: `read_log` requires that a logged quantity has the same shape in all frames. Use `open` and `Frame.log` to read files where the shape changes from frame to frame. To create a *pandas* ``DataFrame`` with the logged data: .. ipython:: python import pandas df = pandas.DataFrame(gsd.hoomd.read_log('log-example.gsd', scalar_only=True)) df """ if not fl_imported: msg = 'file layer module is not available' raise RuntimeError(msg) if gsd is None: msg = 'gsd module is not available' raise RuntimeError(msg) with gsd.fl.open( name=str(name), mode='r', application='gsd.hoomd ' + gsd.version.version, schema='hoomd', schema_version=[1, 4], ) as gsdfileobj: logged_data_names = gsdfileobj.find_matching_chunk_names('log/') # Always log timestep associated with each log entry logged_data_names.insert(0, 'configuration/step') if len(logged_data_names) == 1: warnings.warn( 'No logged data in file: ' + str(name), RuntimeWarning, stacklevel=2 ) logged_data_dict = dict() for log in logged_data_names: log_exists_frame_0 = gsdfileobj.chunk_exists(frame=0, name=log) is_configuration_step = log == 'configuration/step' if log_exists_frame_0 or is_configuration_step: if is_configuration_step and not log_exists_frame_0: # handle default configuration step on frame 0 tmp = numpy.array([0], dtype=numpy.uint64) else: tmp = gsdfileobj.read_chunk(frame=0, name=log) if scalar_only and not tmp.shape[0] == 1: continue if tmp.shape[0] == 1: logged_data_dict[log] = numpy.full( fill_value=tmp[0], shape=(gsdfileobj.nframes,) ) else: logged_data_dict[log] = numpy.tile( tmp, (gsdfileobj.nframes, *tuple(1 for _ in tmp.shape)) ) for idx in range(1, gsdfileobj.nframes): for key in logged_data_dict.keys(): if not gsdfileobj.chunk_exists(frame=idx, name=key): continue data = gsdfileobj.read_chunk(frame=idx, name=key) if len(logged_data_dict[key][idx].shape) == 0: logged_data_dict[key][idx] = data[0] else: logged_data_dict[key][idx] = data return logged_data_dict gsd-3.3.0/gsd/libgsd.pxd000066400000000000000000000105231462564674300150520ustar00rootroot00000000000000# Copyright (c) 2016-2024 The Regents of the University of Michigan # Part of GSD, released under the BSD 2-Clause License. from libc.stdint cimport uint8_t, int8_t, uint16_t, int16_t, uint32_t, int32_t,\ uint64_t, int64_t cdef extern from "gsd.h" nogil: cdef enum gsd_type: GSD_TYPE_UINT8=1 GSD_TYPE_UINT16 GSD_TYPE_UINT32 GSD_TYPE_UINT64 GSD_TYPE_INT8 GSD_TYPE_INT16 GSD_TYPE_INT32 GSD_TYPE_INT64 GSD_TYPE_FLOAT GSD_TYPE_DOUBLE cdef enum gsd_open_flag: GSD_OPEN_READWRITE=1 GSD_OPEN_READONLY GSD_OPEN_APPEND cdef enum gsd_error: GSD_SUCCESS = 0 GSD_ERROR_IO = -1 GSD_ERROR_INVALID_ARGUMENT = -2 GSD_ERROR_NOT_A_GSD_FILE = -3 GSD_ERROR_INVALID_GSD_FILE_VERSION = -4 GSD_ERROR_FILE_CORRUPT = -5 GSD_ERROR_MEMORY_ALLOCATION_FAILED = -6 GSD_ERROR_NAMELIST_FULL = -7 GSD_ERROR_FILE_MUST_BE_WRITABLE = -8 GSD_ERROR_FILE_MUST_BE_READABLE = -9 cdef struct gsd_header: uint64_t magic uint32_t gsd_version char application[64] char schema[64] uint32_t schema_version uint64_t index_location uint64_t index_allocated_entries uint64_t namelist_location uint64_t namelist_allocated_entries char reserved[80] cdef struct gsd_index_entry: uint64_t frame uint64_t N int64_t location uint32_t M uint16_t id uint8_t type uint8_t flags cdef struct gsd_namelist_entry: char name[64] cdef struct gsd_index_buffer: gsd_index_entry *data size_t size size_t reserved void *mapped_data size_t mapped_len cdef struct gsd_name_id_map: void *v size_t size cdef struct gsd_write_buffer: char *data size_t size size_t reserved cdef struct gsd_handle: int fd gsd_header header gsd_index_buffer file_index gsd_index_buffer frame_index gsd_index_buffer buffer_index gsd_write_buffer write_buffer gsd_namelist_entry *namelist uint64_t namelist_num_entries uint64_t cur_frame int64_t file_size gsd_open_flag open_flags gsd_name_id_map name_map uint64_t namelist_written_entries uint32_t gsd_make_version(unsigned int major, unsigned int minor) int gsd_create(const char *fname, const char *application, const char *schema, uint32_t schema_version) int gsd_create_and_open(gsd_handle* handle, const char *fname, const char *application, const char *schema, uint32_t schema_version, const gsd_open_flag flags, int exclusive_create) int gsd_open(gsd_handle* handle, const char *fname, const gsd_open_flag flags) int gsd_truncate(gsd_handle* handle) int gsd_close(gsd_handle* handle) int gsd_end_frame(gsd_handle* handle) int gsd_flush(gsd_handle* handle) int gsd_write_chunk(gsd_handle* handle, const char *name, gsd_type type, uint64_t N, uint8_t M, uint8_t flags, const void *data) const gsd_index_entry* gsd_find_chunk(gsd_handle* handle, uint64_t frame, const char *name) int gsd_read_chunk(gsd_handle* handle, void* data, const gsd_index_entry* chunk) uint64_t gsd_get_nframes(gsd_handle* handle) size_t gsd_sizeof_type(gsd_type type) const char *gsd_find_matching_chunk_name(gsd_handle* handle, const char *match, const char *prev) int gsd_upgrade(gsd_handle *handle) uint64_t gsd_get_maximum_write_buffer_size(gsd_handle* handle) int gsd_set_maximum_write_buffer_size(gsd_handle* handle, uint64_t size) uint64_t gsd_get_index_entries_to_buffer(gsd_handle* handle) int gsd_set_index_entries_to_buffer(gsd_handle* handle, uint64_t number) gsd-3.3.0/gsd/pygsd.py000066400000000000000000000325711462564674300146000ustar00rootroot00000000000000# Copyright (c) 2016-2024 The Regents of the University of Michigan # Part of GSD, released under the BSD 2-Clause License. """GSD reader written in pure Python. :file:`pygsd.py` is a pure Python implementation of a GSD reader. If your analysis tool is written in Python and you want to embed a GSD reader without requiring C code compilation or require the **gsd** Python package as a dependency, then use the following Python files from the :file:`gsd/` directory to make a pure Python reader. It is not as high performance as the C reader. * :file:`gsd/` * :file:`__init__.py` * :file:`pygsd.py` * :file:`hoomd.py` The reader reads from file-like Python objects, which may be useful for reading from in memory buffers, and in-database grid files, For regular files on the filesystem, and for writing gsd files, use :py:mod:`gsd.fl`. The :py:class:`GSDFile` in this module can be used with the :py:class:`gsd.hoomd.HOOMDTrajectory` hoomd reader: >>> with gsd.pygsd.GSDFile('test.gsd', 'r') as f: ... t = gsd.hoomd.HOOMDTrajectory(f) ... pos = t[0].particles.position """ import logging import struct import sys from collections import namedtuple import numpy version = '3.3.0' logger = logging.getLogger('gsd.pygsd') gsd_header = namedtuple( 'gsd_header', 'magic index_location index_allocated_entries ' 'namelist_location namelist_allocated_entries ' 'schema_version gsd_version application ' 'schema reserved', ) gsd_header_struct = struct.Struct('QQQQQII64s64s80s') gsd_index_entry = namedtuple('gsd_index_entry', 'frame N location M id type flags') gsd_index_entry_struct = struct.Struct('QQqIHBB') gsd_type_mapping = { 1: numpy.dtype('uint8'), 2: numpy.dtype('uint16'), 3: numpy.dtype('uint32'), 4: numpy.dtype('uint64'), 5: numpy.dtype('int8'), 6: numpy.dtype('int16'), 7: numpy.dtype('int32'), 8: numpy.dtype('int64'), 9: numpy.dtype('float32'), 10: numpy.dtype('float64'), } class GSDFile: """GSD file access interface. Implemented in pure Python and accepts any Python file-like object. Args: file: File-like object to read. GSDFile implements an object oriented class interface to the GSD file layer. Use it to open an existing file in a **read-only** mode. For read-write access to files, use the full featured C implementation in :py:mod:`gsd.fl`. Otherwise, the two implementations can be used interchangeably. Examples: Open a file in **read-only** mode:: f = GSDFile(open('file.gsd', mode='r')) if f.chunk_exists(frame=0, name='chunk'): data = f.read_chunk(frame=0, name='chunk') Access file **metadata**:: f = GSDFile(open('file.gsd', mode='r')) print(f.name, f.mode, f.gsd_version) print(f.application, f.schema, f.schema_version) print(f.nframes) Use as a **context manager**:: with GSDFile(open('file.gsd', mode='r')) as f: data = f.read_chunk(frame=0, name='chunk') """ def __init__(self, file): self.__file = file logger.info('opening file: ' + str(file)) # read the header self.__file.seek(0) try: header_raw = self.__file.read(gsd_header_struct.size) except UnicodeDecodeError: print('\nDid you open the file in binary mode (rb)?\n', file=sys.stderr) raise if len(header_raw) != gsd_header_struct.size: raise OSError self.__header = gsd_header._make(gsd_header_struct.unpack(header_raw)) # validate the header expected_magic = 0x65DF65DF65DF65DF if self.__header.magic != expected_magic: raise RuntimeError('Not a GSD file: ' + str(self.__file)) if self.__header.gsd_version < (1 << 16) and self.__header.gsd_version != ( 0 << 16 | 3 ): raise RuntimeError('Unsupported GSD file version: ' + str(self.__file)) if self.__header.gsd_version >= (3 << 16): raise RuntimeError('Unsupported GSD file version: ' + str(self.__file)) # determine the file size (only works in Python 3) self.__file.seek(0, 2) # read the namelist block into a dict for easy lookup self.__namelist = {} c = 0 self.__file.seek(self.__header.namelist_location, 0) namelist_raw = self.__file.read(self.__header.namelist_allocated_entries * 64) names = namelist_raw.split(b'\x00') for name in names: sname = name.decode('utf-8') if len(sname) != 0: self.__namelist[sname] = c c = c + 1 # read the index block. Since this is a read-only implementation, only # read in the used entries self.__index = [] self.__file.seek(self.__header.index_location, 0) for i in range(self.__header.index_allocated_entries): index_entry_raw = self.__file.read(gsd_index_entry_struct.size) if len(index_entry_raw) != gsd_index_entry_struct.size: raise OSError idx = gsd_index_entry._make(gsd_index_entry_struct.unpack(index_entry_raw)) # 0 location signifies end of index if idx.location == 0: break if not self.__is_entry_valid(idx): raise RuntimeError('Corrupt GSD file: ' + str(self.__file)) if i > 0 and idx.frame < self.__index[i - 1].frame: raise RuntimeError('Corrupt GSD file: ' + str(self.__file)) self.__index.append(idx) self.__is_open = True def __is_entry_valid(self, entry): """Return True if an entry is valid.""" if entry.type not in gsd_type_mapping: return False if entry.M == 0: return False if entry.frame >= self.__header.index_allocated_entries: return False if entry.id >= len(self.__namelist): return False if entry.flags != 0: return False return True def close(self): """Close the file. Once closed, any other operation on the file object will result in a `ValueError`. :py:meth:`close()` may be called more than once. The file is automatically closed when garbage collected or when the context manager exits. """ if self.__is_open: logger.info('closing file: ' + str(self.__file)) self.__handle = None self.__index = None self.__namelist = None self.__is_open = False self.__file.close() def truncate(self): """Not implemented.""" raise NotImplementedError def end_frame(self): """Not implemented.""" raise NotImplementedError def write_chunk(self, name, data): """Not implemented.""" raise NotImplementedError def _find_chunk(self, frame, name): # find the id for the given name if name in self.__namelist: match_id = self.__namelist[name] else: return None # TODO: optimize for v2.0 files # binary search for the first index entry at the requested frame L = 0 R = len(self.__index) # progressively narrow the search window by halves while R - L > 1: m = (L + R) // 2 if frame < self.__index[m].frame: R = m else: L = m # this finds L = the rightmost index with the desired frame # search all index entries with the matching frame cur_index = L while cur_index >= 0 and self.__index[cur_index].frame == frame: if match_id == self.__index[cur_index].id: return self.__index[cur_index] cur_index = cur_index - 1 # if we got here, we didn't find the specified chunk return None def chunk_exists(self, frame, name): """Test if a chunk exists. Args: frame (int): Index of the frame to check name (str): Name of the chunk Returns: bool: True if the chunk exists in the file. False if it does not. Example: Handle non-existent chunks:: with GSDFile(open('file.gsd', mode='r')) as f: if f.chunk_exists(frame=0, name='chunk'): return f.read_chunk(frame=0, name='chunk') else: return None """ if not self.__is_open: msg = 'File is not open' raise ValueError(msg) chunk = self._find_chunk(frame, name) return chunk is not None def read_chunk(self, frame, name): """Read a data chunk from the file and return it as a numpy array. Args: frame (int): Index of the frame to read name (str): Name of the chunk Returns: `numpy.ndarray`: Data read from file. Examples: Read a 1D array:: with GSDFile(name=filename, mode='r') as f: data = f.read_chunk(frame=0, name='chunk1d') # data.shape == [N] Read a 2D array:: with GSDFile(name=filename, mode='r') as f: data = f.read_chunk(frame=0, name='chunk2d') # data.shape == [N,M] Read multiple frames:: with GSDFile(name=filename, mode='r') as f: data0 = f.read_chunk(frame=0, name='chunk') data1 = f.read_chunk(frame=1, name='chunk') data2 = f.read_chunk(frame=2, name='chunk') data3 = f.read_chunk(frame=3, name='chunk') .. tip:: Each call invokes a disk read and allocation of a new numpy array for storage. To avoid overhead, don't call :py:meth:`read_chunk()` on the same chunk repeatedly. Cache the arrays instead. """ if not self.__is_open: msg = 'File is not open' raise ValueError(msg) chunk = self._find_chunk(frame, name) if chunk is None: raise KeyError( 'frame ' + str(frame) + ' / chunk ' + name + ' not found in: ' + str(self.__file) ) logger.debug( 'read chunk: ' + str(self.__file) + ' - ' + str(frame) + ' - ' + name ) size = chunk.N * chunk.M * gsd_type_mapping[chunk.type].itemsize if chunk.location == 0: raise RuntimeError( 'Corrupt chunk: ' + str(frame) + ' / ' + name + ' in file' + str(self.__file) ) if size == 0: return numpy.array([], dtype=gsd_type_mapping[chunk.type]) self.__file.seek(chunk.location, 0) data_raw = self.__file.read(size) if len(data_raw) != size: raise OSError data_npy = numpy.frombuffer(data_raw, dtype=gsd_type_mapping[chunk.type]) if chunk.M == 1: return data_npy return data_npy.reshape([chunk.N, chunk.M]) def find_matching_chunk_names(self, match): """Find chunk names in the file that start with the string *match*. Args: match (str): Start of the chunk name to match Returns: list[str]: Matching chunk names """ result = [] for key in self.__namelist.keys(): if key.startswith(match): result.append(key) return result def __getstate__(self): """Implement the pickle protocol.""" return dict(name=self.name) def __setstate__(self, state): """Implement the pickle protocol.""" self.__init__(open(state['name'], 'rb')) def __enter__(self): """Implement the context manager protocol.""" return self def __exit__(self, exc_type, exc_value, traceback): """Implement the context manager protocol.""" self.close() @property def name(self): """(str): file.name.""" return self.__file.name @property def file(self): """File-like object opened.""" return self.__file @property def mode(self): """str: Mode of the open file.""" return 'r' @property def gsd_version(self): """tuple[int, int]: GSD file layer version number. The tuple is in the order (major, minor). """ v = self.__header.gsd_version return (v >> 16, v & 0xFFFF) @property def schema_version(self): """tuple[int, int]: Schema version number. The tuple is in the order (major, minor). """ v = self.__header.schema_version return (v >> 16, v & 0xFFFF) @property def schema(self): """str: Name of the data schema.""" return self.__header.schema.rstrip(b'\x00').decode('utf-8') @property def application(self): """str: Name of the generating application.""" return self.__header.application.rstrip(b'\x00').decode('utf-8') @property def nframes(self): """int: Number of frames in the file.""" if not self.__is_open: msg = 'File is not open' raise ValueError(msg) if len(self.__index) == 0: return 0 return self.__index[-1].frame + 1 gsd-3.3.0/gsd/pytest_plugin_validate.py000066400000000000000000000017511462564674300202250ustar00rootroot00000000000000# Copyright (c) 2016-2024 The Regents of the University of Michigan # Part of GSD, released under the BSD 2-Clause License. """Command line options for pytest.""" import pytest def pytest_addoption(parser): """Add GSD specific options to the pytest command line. * validate - run validation tests """ parser.addoption( '--validate', action='store_true', default=False, help='Enable long running validation tests.', ) @pytest.fixture(autouse=True) def _skip_validate(request): """Skip validation tests by default. Pass the command line option --validate to enable these tests. """ if request.node.get_closest_marker('validate'): if not request.config.getoption('validate'): pytest.skip('Validation tests not requested.') def pytest_configure(config): """Define the ``validate`` marker.""" config.addinivalue_line( 'markers', 'validate: Tests that perform long-running validations.' ) gsd-3.3.0/gsd/test/000077500000000000000000000000001462564674300140475ustar00rootroot00000000000000gsd-3.3.0/gsd/test/CMakeLists.txt000066400000000000000000000011371462564674300166110ustar00rootroot00000000000000# copy Python modules to the build directory to make it a working Python package MACRO(copy_file file) add_custom_command ( OUTPUT ${file} DEPENDS ${file} POST_BUILD COMMAND ${CMAKE_COMMAND} ARGS -E copy ${CMAKE_CURRENT_SOURCE_DIR}/${file} ${CMAKE_CURRENT_BINARY_DIR}/${file} COMMENT "Copy gsd/test/${file}" ) ENDMACRO(copy_file) set(files conftest.py pytest.ini test_fl.py test_gsd_v1.gsd test_hoomd.py test_largefile.py) foreach(file ${files}) copy_file(${file}) endforeach() add_custom_target(copy_gsd_test ALL DEPENDS ${files}) gsd-3.3.0/gsd/test/conftest.py000066400000000000000000000011601462564674300162440ustar00rootroot00000000000000# Copyright (c) 2016-2024 The Regents of the University of Michigan # Part of GSD, released under the BSD 2-Clause License. """Pytest fixtures common to all tests.""" import collections import pytest Mode = collections.namedtuple('Mode', 'read write') mode_list = [Mode('r', 'w'), Mode('a', 'x'), Mode('r', 'a')] def open_mode_name(mode): """Provide a name for the open mode fixture.""" return '(' + mode.read + ',' + mode.write + ')' @pytest.fixture(params=mode_list, ids=open_mode_name) def open_mode(request): """Pytest fixture parameterized over multiple file open modes.""" return request.param gsd-3.3.0/gsd/test/pytest.ini000066400000000000000000000000351462564674300160760ustar00rootroot00000000000000[pytest] junit_family=xunit2 gsd-3.3.0/gsd/test/test_fl.py000066400000000000000000001076611462564674300160740ustar00rootroot00000000000000# Copyright (c) 2016-2024 The Regents of the University of Michigan # Part of GSD, released under the BSD 2-Clause License. """Test gsd.fl.""" import os import pathlib import platform import random import shutil import sys import numpy import pytest import gsd.fl import gsd.pygsd test_path = pathlib.Path(os.path.realpath(__file__)).parent def test_create(tmp_path, open_mode): """Test creation of GSD files.""" gsd.fl.open( mode=open_mode.write, name=tmp_path / 'test_create.gsd', application='test_create', schema='none', schema_version=[1, 2], ) @pytest.mark.parametrize( 'typ', [ numpy.uint8, numpy.uint16, numpy.uint32, numpy.uint64, numpy.int8, numpy.int16, numpy.int32, numpy.int64, numpy.float32, numpy.float64, ], ) def test_dtype(tmp_path, typ): """Test all supported data types.""" data1d = numpy.array([1, 2, 3, 4, 5, 127], dtype=typ) data2d = numpy.array([[10, 20], [30, 40], [50, 80]], dtype=typ) data_zero = numpy.array([], dtype=typ) gsd.fl.open( mode='x', name=tmp_path / 'test_dtype.gsd', application='test_dtype', schema='none', schema_version=[1, 2], ) with gsd.fl.open( name=tmp_path / 'test_dtype.gsd', mode='w', application='test_dtype', schema='none', schema_version=[1, 2], ) as f: f.write_chunk(name='data1d', data=data1d) f.write_chunk(name='data2d', data=data2d) f.write_chunk(name='data_zero', data=data_zero) f.end_frame() with gsd.fl.open( name=tmp_path / 'test_dtype.gsd', mode='r', application='test_dtype', schema='none', schema_version=[1, 2], ) as f: read_data1d = f.read_chunk(frame=0, name='data1d') read_data2d = f.read_chunk(frame=0, name='data2d') read_data_zero = f.read_chunk(frame=0, name='data_zero') assert data1d.dtype == read_data1d.dtype numpy.testing.assert_array_equal(data1d, read_data1d) assert data2d.dtype == read_data2d.dtype numpy.testing.assert_array_equal(data2d, read_data2d) assert data_zero.dtype == read_data_zero.dtype assert data_zero.shape == (0,) # test again with pygsd with gsd.pygsd.GSDFile(file=open(str(tmp_path / 'test_dtype.gsd'), mode='rb')) as f: read_data1d = f.read_chunk(frame=0, name='data1d') read_data2d = f.read_chunk(frame=0, name='data2d') assert data1d.dtype == read_data1d.dtype numpy.testing.assert_array_equal(data1d, read_data1d) assert data2d.dtype == read_data2d.dtype numpy.testing.assert_array_equal(data2d, read_data2d) def test_metadata(tmp_path, open_mode): """Test file metadata.""" data = numpy.array([1, 2, 3, 4, 5, 10012], dtype=numpy.int64) with gsd.fl.open( name=tmp_path / 'test_metadata.gsd', mode=open_mode.write, application='test_metadata', schema='none', schema_version=[1, 2], ) as f: assert f.mode == open_mode.write for _i in range(150): f.write_chunk(name='data', data=data) f.end_frame() with gsd.fl.open( name=tmp_path / 'test_metadata.gsd', mode=open_mode.read, application='test_metadata', schema='none', schema_version=[1, 2], ) as f: assert f.name == str(tmp_path / 'test_metadata.gsd') assert f.mode == open_mode.read assert f.application == 'test_metadata' assert f.schema == 'none' assert f.schema_version == (1, 2) assert f.nframes == 150 assert f.gsd_version == (2, 0) # test again with pygsd with gsd.pygsd.GSDFile( file=open(str(tmp_path / 'test_metadata.gsd'), mode='rb') ) as f: assert f.name == str(tmp_path / 'test_metadata.gsd') assert f.mode == 'r' assert f.application == 'test_metadata' assert f.schema == 'none' assert f.schema_version == (1, 2) assert f.nframes == 150 assert f.gsd_version == (2, 0) def test_append(tmp_path, open_mode): """Test that data chunks can be appended to existing files.""" with gsd.fl.open( name=tmp_path / 'test_append.gsd', mode=open_mode.write, application='test_append', schema='none', schema_version=[1, 2], ): pass data = numpy.array([10], dtype=numpy.int64) nframes = 1024 with gsd.fl.open( name=tmp_path / 'test_append.gsd', mode='a', application='test_append', schema='none', schema_version=[1, 2], ) as f: assert f.mode == 'a' for i in range(nframes): data[0] = i f.write_chunk(name='data1', data=data) data[0] = i * 10 f.write_chunk(name='data10', data=data) f.end_frame() with gsd.fl.open( name=tmp_path / 'test_append.gsd', mode=open_mode.read, application='test_append', schema='none', schema_version=[1, 2], ) as f: assert f.nframes == nframes for i in range(nframes): data1 = f.read_chunk(frame=i, name='data1') data10 = f.read_chunk(frame=i, name='data10') assert data1[0] == i assert data10[0] == i * 10 # test again with pygsd with gsd.pygsd.GSDFile( file=open(str(tmp_path / 'test_append.gsd'), mode='rb') ) as f: assert f.nframes == nframes for i in range(nframes): data1 = f.read_chunk(frame=i, name='data1') data10 = f.read_chunk(frame=i, name='data10') assert data1[0] == i assert data10[0] == i * 10 def test_chunk_exists(tmp_path, open_mode): """Test the chunk_exists API.""" data = numpy.array([1, 2, 3, 4, 5, 10012], dtype=numpy.int64) with gsd.fl.open( name=tmp_path / 'test_chunk_exists.gsd', mode=open_mode.write, application='test_chunk_exists', schema='none', schema_version=[1, 2], ) as f: f.write_chunk(name='chunk1', data=data) f.end_frame() f.write_chunk(name='abcdefg', data=data) f.end_frame() f.write_chunk(name='test', data=data) f.end_frame() with gsd.fl.open( name=tmp_path / 'test_chunk_exists.gsd', mode=open_mode.read, application='test_chunk_exists', schema='none', schema_version=[1, 2], ) as f: assert f.chunk_exists(frame=0, name='chunk1') read_data = f.read_chunk(frame=0, name='chunk1') assert f.chunk_exists(frame=1, name='abcdefg') read_data = f.read_chunk(frame=1, name='abcdefg') assert f.chunk_exists(frame=2, name='test') read_data = f.read_chunk(frame=2, name='test') assert not f.chunk_exists(frame=1, name='chunk1') with pytest.raises(KeyError): read_data = f.read_chunk(frame=1, name='chunk1') assert not f.chunk_exists(frame=2, name='abcdefg') with pytest.raises(KeyError): read_data = f.read_chunk(frame=2, name='abcdefg') assert not f.chunk_exists(frame=0, name='test') with pytest.raises(KeyError): read_data = f.read_chunk(frame=0, name='test') assert not f.chunk_exists(frame=2, name='chunk1') with pytest.raises(KeyError): read_data = f.read_chunk(frame=2, name='chunk1') assert not f.chunk_exists(frame=0, name='abcdefg') with pytest.raises(KeyError): read_data = f.read_chunk(frame=0, name='abcdefg') assert not f.chunk_exists(frame=1, name='test') with pytest.raises(KeyError): read_data = f.read_chunk(frame=1, name='test') # test again with pygsd with gsd.pygsd.GSDFile( file=open(str(tmp_path / 'test_chunk_exists.gsd'), mode='rb') ) as f: assert f.chunk_exists(frame=0, name='chunk1') read_data = f.read_chunk(frame=0, name='chunk1') assert f.chunk_exists(frame=1, name='abcdefg') read_data = f.read_chunk(frame=1, name='abcdefg') assert f.chunk_exists(frame=2, name='test') read_data = f.read_chunk(frame=2, name='test') assert not f.chunk_exists(frame=1, name='chunk1') with pytest.raises(KeyError): read_data = f.read_chunk(frame=1, name='chunk1') assert not f.chunk_exists(frame=2, name='abcdefg') with pytest.raises(KeyError): read_data = f.read_chunk(frame=2, name='abcdefg') assert not f.chunk_exists(frame=0, name='test') with pytest.raises(KeyError): read_data = f.read_chunk(frame=0, name='test') assert not f.chunk_exists(frame=2, name='chunk1') with pytest.raises(KeyError): read_data = f.read_chunk(frame=2, name='chunk1') assert not f.chunk_exists(frame=0, name='abcdefg') with pytest.raises(KeyError): read_data = f.read_chunk(frame=0, name='abcdefg') assert not f.chunk_exists(frame=1, name='test') with pytest.raises(KeyError): read_data = f.read_chunk(frame=1, name='test') # noqa def test_readonly_errors(tmp_path, open_mode): """Test that read only files provide the appropriate errors.""" data = numpy.array([1, 2, 3, 4, 5, 10012], dtype=numpy.int64) with gsd.fl.open( name=tmp_path / 'test_readonly_errors.gsd', mode=open_mode.write, application='test_readonly_errors', schema='none', schema_version=[1, 2], ) as f: for _i in range(10): f.write_chunk(name='chunk1', data=data) f.end_frame() data = numpy.array([1, 2, 3, 4, 5, 10012], dtype=numpy.int64) with gsd.fl.open( name=tmp_path / 'test_readonly_errors.gsd', mode='r', application='test_readonly_errors', schema='none', schema_version=[1, 2], ) as f: with pytest.raises(RuntimeError): f.end_frame() with pytest.raises(RuntimeError): f.write_chunk(name='chunk1', data=data) # test again with pygsd with gsd.pygsd.GSDFile( file=open(str(tmp_path / 'test_readonly_errors.gsd'), mode='rb') ) as f: with pytest.raises(NotImplementedError): f.end_frame() with pytest.raises(NotImplementedError): f.write_chunk(name='chunk1', data=data) def test_fileio_errors(tmp_path, open_mode): """Test that OS file I/O errors pass through.""" # These test cause python to crash on windows.... if platform.system() != 'Windows': with pytest.raises(FileNotFoundError): gsd.fl.open( name='/this/file/does/not/exist', mode='r', application='test_readonly_errors', schema='none', schema_version=[1, 2], ) with open(str(tmp_path / 'test_fileio_errors.gsd'), 'wb') as f: f.write(b'test') with pytest.raises(RuntimeError): f = gsd.fl.open( name=tmp_path / 'test_fileio_errors.gsd', mode=open_mode.read, application='test_readonly_errors', schema='none', schema_version=[1, 2], ) def test_dtype_errors(tmp_path, open_mode): """Test that unsupported data types result in errors.""" data = numpy.array([1, 2, 3, 4, 5, 10012], dtype=numpy.bool_) with gsd.fl.open( name=tmp_path / 'test_dtype_errors1.gsd', mode=open_mode.write, application='test_dtype_errors', schema='none', schema_version=[1, 2], ) as f: with pytest.raises(ValueError): f.write_chunk(name='chunk1', data=data) f.end_frame() data = numpy.array([1, 2, 3, 4, 5, 10012], dtype=numpy.float16) with gsd.fl.open( name=tmp_path / 'test_dtype_errors2.gsd', mode=open_mode.write, application='test_dtype_errors', schema='none', schema_version=[1, 2], ) as f: with pytest.raises(ValueError): f.write_chunk(name='chunk1', data=data) f.end_frame() data = numpy.array([1, 2, 3, 4, 5, 10012], dtype=numpy.complex64) with gsd.fl.open( name=tmp_path / 'test_dtype_errors3.gsd', mode=open_mode.write, application='test_dtype_errors', schema='none', schema_version=[1, 2], ) as f: with pytest.raises(ValueError): f.write_chunk(name='chunk1', data=data) f.end_frame() data = numpy.array([1, 2, 3, 4, 5, 10012], dtype=numpy.complex128) with gsd.fl.open( name=tmp_path / 'test_dtype_errors4.gsd', mode=open_mode.write, application='test_dtype_errors', schema='none', schema_version=[1, 2], ) as f: with pytest.raises(ValueError): f.write_chunk(name='chunk1', data=data) f.end_frame() def test_truncate(tmp_path): """Test that the truncate method functions.""" rng = numpy.random.default_rng() data = numpy.ascontiguousarray(rng.random(size=(1000, 3)), dtype=numpy.float32) with gsd.fl.open( name=tmp_path / 'test_truncate.gsd', mode='w', application='test_truncate', schema='none', schema_version=[1, 2], ) as f: assert f.mode == 'w' for _i in range(10): f.write_chunk(name='data', data=data) f.end_frame() assert f.nframes == 10 f.truncate() assert f.nframes == 0 assert f.application == 'test_truncate' assert f.schema == 'none' assert f.schema_version == (1, 2) f.write_chunk(name='data', data=data) f.end_frame() with gsd.fl.open( name=tmp_path / 'test_truncate.gsd', mode='r', application='test_truncate', schema='none', schema_version=[1, 2], ) as f: assert f.name == str(tmp_path / 'test_truncate.gsd') assert f.mode == 'r' assert f.application == 'test_truncate' assert f.schema == 'none' assert f.schema_version == (1, 2) assert f.nframes == 1 def test_namelen(tmp_path, open_mode): """Test that long names are truncated as documented.""" app_long = 'abcdefga' * 100 schema_long = 'ijklmnop' * 100 chunk_long = '12345678' * 100 with gsd.fl.open( name=tmp_path / 'test_namelen.gsd', mode=open_mode.write, application=app_long, schema=schema_long, schema_version=[1, 2], ) as f: assert f.application == app_long[0:63] assert f.schema == schema_long[0:63] data = numpy.array([1, 2, 3, 4, 5, 10012], dtype=numpy.int64) f.write_chunk(name=chunk_long, data=data) f.end_frame() with gsd.fl.open( name=tmp_path / 'test_namelen.gsd', mode=open_mode.read, application=app_long, schema=schema_long, schema_version=[1, 2], ) as f: data_read = f.read_chunk(0, name=chunk_long) numpy.testing.assert_array_equal(data, data_read) # test again with pygsd with gsd.pygsd.GSDFile( file=open(str(tmp_path / 'test_namelen.gsd'), mode='rb') ) as f: data_read = f.read_chunk(0, name=chunk_long) numpy.testing.assert_array_equal(data, data_read) def test_open(tmp_path): """Test the open() API.""" data = numpy.array([1, 2, 3, 4, 5, 10012], dtype=numpy.int64) with gsd.fl.open( name=tmp_path / 'test.gsd', mode='x', application='test_open', schema='none', schema_version=[1, 2], ) as f: f.write_chunk(name='chunk1', data=data) f.end_frame() with gsd.fl.open( name=tmp_path / 'test_2.gsd', mode='x', application='test_open', schema='none', schema_version=[1, 2], ) as f: f.write_chunk(name='chunk1', data=data) f.end_frame() f.read_chunk(0, name='chunk1') with gsd.fl.open( name=tmp_path / 'test.gsd', mode='w', application='test_open', schema='none', schema_version=[1, 2], ) as f: f.write_chunk(name='chunk1', data=data) f.end_frame() with gsd.fl.open( name=tmp_path / 'test.gsd', mode='w', application='test_open', schema='none', schema_version=[1, 2], ) as f: f.write_chunk(name='chunk1', data=data) f.end_frame() f.read_chunk(0, name='chunk1') with gsd.fl.open( name=tmp_path / 'test.gsd', mode='a', application='test_open', schema='none', schema_version=[1, 2], ) as f: f.write_chunk(name='chunk1', data=data) f.end_frame() with gsd.fl.open( name=tmp_path / 'test.gsd', mode='r', application='test_open', schema='none', schema_version=[1, 2], ) as f: f.read_chunk(0, name='chunk1') f.read_chunk(1, name='chunk1') with gsd.fl.open( name=tmp_path / 'test.gsd', mode='r+', application='test_open', schema='none', schema_version=[1, 2], ) as f: f.write_chunk(name='chunk1', data=data) f.end_frame() f.read_chunk(0, name='chunk1') f.read_chunk(1, name='chunk1') f.read_chunk(2, name='chunk1') def test_find_matching_chunk_names(tmp_path, open_mode): """Test the find_matching_chunk_names API.""" data = numpy.array([1, 2, 3, 4, 5], dtype=numpy.float32) with gsd.fl.open( name=tmp_path / 'test.gsd', mode=open_mode.write, application='test_find_matching_chunk_names', schema='none', schema_version=[1, 2], ) as f: f.write_chunk(name='log/A', data=data) f.write_chunk(name='log/chunk2', data=data) f.end_frame() f.write_chunk(name='data/B', data=data) f.end_frame() with gsd.fl.open( name=tmp_path / 'test.gsd', mode=open_mode.read, application='test_find_matching_chunk_names', schema='none', schema_version=[1, 2], ) as f: all_chunks = f.find_matching_chunk_names('') assert len(all_chunks) == 3 assert 'log/A' in all_chunks assert 'log/chunk2' in all_chunks assert 'data/B' in all_chunks log_chunks = f.find_matching_chunk_names('log/') assert len(log_chunks) == 2 assert 'log/A' in log_chunks assert 'log/chunk2' in log_chunks data_chunks = f.find_matching_chunk_names('data/') assert len(data_chunks) == 1 assert 'data/B' in data_chunks other_chunks = f.find_matching_chunk_names('other/') assert len(other_chunks) == 0 # test again with pygsd with gsd.pygsd.GSDFile(file=open(str(tmp_path / 'test.gsd'), mode='rb')) as f: all_chunks = f.find_matching_chunk_names('') assert len(all_chunks) == 3 assert 'log/A' in all_chunks assert 'log/chunk2' in all_chunks assert 'data/B' in all_chunks log_chunks = f.find_matching_chunk_names('log/') assert len(log_chunks) == 2 assert 'log/A' in log_chunks assert 'log/chunk2' in log_chunks data_chunks = f.find_matching_chunk_names('data/') assert len(data_chunks) == 1 assert 'data/B' in data_chunks other_chunks = f.find_matching_chunk_names('other/') assert len(other_chunks) == 0 def test_chunk_name_limit(tmp_path, open_mode): """Test that providing more than the maximum allowed chunk names errors.""" with gsd.fl.open( name=tmp_path / 'test.gsd', mode=open_mode.write, application='test_chunk_name_limit', schema='none', schema_version=[1, 2], ) as f: for i in range(65535): f.write_chunk(name=str(i), data=numpy.array([i], dtype=numpy.int32)) # The GSD specification limits to 65535 names: with pytest.raises(RuntimeError): f.write_chunk(name='65536', data=numpy.array([i], dtype=numpy.int32)) def test_many_names(tmp_path, open_mode): """Test that many chunk names can be written to a file.""" values = list(range(1000)) with gsd.fl.open( name=tmp_path / 'test.gsd', mode=open_mode.write, application='test_many_names', schema='none', schema_version=[1, 2], ) as f: for _ in range(5): random.shuffle(values) for value in values: f.write_chunk( name=str(value), data=numpy.array([value * 13], dtype=numpy.int32) ) f.end_frame() with gsd.fl.open( name=tmp_path / 'test.gsd', mode=open_mode.read, application='test_many_names', schema='none', schema_version=[1, 2], ) as f: for frame in range(5): random.shuffle(values) for value in values: data = numpy.array([value * 13], dtype=numpy.int32) data_read = f.read_chunk(frame=frame, name=str(value)) numpy.testing.assert_array_equal(data, data_read) with gsd.pygsd.GSDFile(file=open(str(tmp_path / 'test.gsd'), mode='rb')) as f: for frame in range(5): random.shuffle(values) for value in values: data = numpy.array([value * 13], dtype=numpy.int32) data_read = f.read_chunk(frame=frame, name=str(value)) numpy.testing.assert_array_equal(data, data_read) def test_gsd_v1_read(): """Test that the GSD v2 API can read v1 files.""" values = list(range(127)) values_str = [str(v) for v in values] values_str.sort() # test that we can: # 1) Read chunk values correctly # 2) Iterate through chunk names correctly def check_v1_file_read(f): assert f.gsd_version == (1, 0) for frame in range(5): random.shuffle(values) for value in values: data = numpy.array([value * 13], dtype=numpy.int32) data_read = f.read_chunk(frame=frame, name=str(value)) numpy.testing.assert_array_equal(data, data_read) chunk_names = f.find_matching_chunk_names('') chunk_names.sort() assert chunk_names == values_str # test with the C implemantation with gsd.fl.open( name=test_path / 'test_gsd_v1.gsd', mode='r', application='test_gsd_v1', schema='none', schema_version=[1, 2], ) as f: check_v1_file_read(f) # and the pure Python implementation with gsd.pygsd.GSDFile( file=open(str(test_path / 'test_gsd_v1.gsd'), mode='rb') ) as f: assert f.gsd_version == (1, 0) check_v1_file_read(f) def test_gsd_v1_upgrade_read(tmp_path, open_mode): """Test that v1 files can be upgraded to v2.""" values = list(range(127)) values_str = [str(v) for v in values] values_str.sort() # test that we can: # 1) Read chunk values correctly # 2) Iterate through chunk names correctly def check_v1_file_read(f): for frame in range(5): random.shuffle(values) for value in values: data = numpy.array([value * 13], dtype=numpy.int32) data_read = f.read_chunk(frame=frame, name=str(value)) numpy.testing.assert_array_equal(data, data_read) chunk_names = f.find_matching_chunk_names('') chunk_names.sort() assert chunk_names == values_str shutil.copy(test_path / 'test_gsd_v1.gsd', tmp_path / 'test_gsd_v1.gsd') with gsd.fl.open( name=tmp_path / 'test_gsd_v1.gsd', mode='r+', application='test_gsd_v1', schema='none', schema_version=[1, 2], ) as f: assert f.gsd_version == (1, 0) f.upgrade() # check that we can read the file contents after the upgrade in memory check_v1_file_read(f) # and the same tests again after closing and opening the file with gsd.fl.open( name=tmp_path / 'test_gsd_v1.gsd', mode=open_mode.read, application='test_gsd_v1', schema='none', schema_version=[1, 2], ) as f: assert f.gsd_version == (2, 0) check_v1_file_read(f) with gsd.pygsd.GSDFile( file=open(str(tmp_path / 'test_gsd_v1.gsd'), mode='rb') ) as f: assert f.gsd_version == (2, 0) check_v1_file_read(f) def test_gsd_v1_write(tmp_path, open_mode): """Test that v2 can write to v1 files.""" values = list(range(256)) # include a very long chunk name to check that the name is truncated # properly for the v1 format limitations long_name = 'abcdefg' * 1000 values.append(long_name) values_str = [] for v in values: check_v = v if isinstance(v, str) and len(v) > 63: # v1 files truncate names to 63 chars check_v = v[0:63] values_str.append(str(check_v)) values_str.sort() shutil.copy(test_path / 'test_gsd_v1.gsd', tmp_path / 'test_gsd_v1.gsd') # test that we can: # 1) Read chunk values correctly # 2) Iterate through chunk names correctly def check_v1_file_read(f): assert f.gsd_version == (1, 0) chunk_names = f.find_matching_chunk_names('') chunk_names.sort() assert chunk_names == values_str frame = 5 random.shuffle(values) for value in values: check_value = value if isinstance(value, int): data = numpy.array([value * 13], dtype=numpy.int32) else: data = numpy.array([hash(value)], dtype=numpy.int64) # v1 files truncate names to 63 chars if len(value) > 63: check_value = value[0:63] data_read = f.read_chunk(frame=frame, name=str(check_value)) numpy.testing.assert_array_equal(data, data_read) # test that we can write new entries to the file with gsd.fl.open( name=tmp_path / 'test_gsd_v1.gsd', mode='r+', application='test_gsd_v1', schema='none', schema_version=[1, 2], ) as f: assert f.gsd_version == (1, 0) for value in values: if isinstance(value, int): data = numpy.array([value * 13], dtype=numpy.int32) else: data = numpy.array([hash(value)], dtype=numpy.int64) f.write_chunk(name=str(value), data=data) f.end_frame() check_v1_file_read(f) # test opening again with the C implemantation with gsd.fl.open( name=tmp_path / 'test_gsd_v1.gsd', mode=open_mode.read, application='test_gsd_v1', schema='none', schema_version=[1, 2], ) as f: check_v1_file_read(f) # and the pure Python implementation with gsd.pygsd.GSDFile( file=open(str(tmp_path / 'test_gsd_v1.gsd'), mode='rb') ) as f: assert f.gsd_version == (1, 0) check_v1_file_read(f) def test_gsd_v1_upgrade_write(tmp_path, open_mode): """Test that upgraded files can be written to after upgraded.""" values = list(range(256)) # include a very long chunk name to check that the name can be written # after the upgrade long_name = 'abcdefg' * 1000 values.append(long_name) values_str = [str(v) for v in values] values_str.sort() shutil.copy(test_path / 'test_gsd_v1.gsd', tmp_path / 'test_gsd_v1.gsd') # test that we can: # 1) Read chunk values correctly # 2) Iterate through chunk names correctly def check_v1_file_read(f): chunk_names = f.find_matching_chunk_names('') chunk_names.sort() assert chunk_names == values_str frame = 5 random.shuffle(values) for value in values: if isinstance(value, int): data = numpy.array([value * 13], dtype=numpy.int32) else: data = numpy.array([hash(value)], dtype=numpy.int64) data_read = f.read_chunk(frame=frame, name=str(value)) numpy.testing.assert_array_equal(data, data_read) # test that we can write new entries to the file with gsd.fl.open( name=tmp_path / 'test_gsd_v1.gsd', mode='r+', application='test_gsd_v1', schema='none', schema_version=[1, 2], ) as f: assert f.gsd_version == (1, 0) f.upgrade() assert f.gsd_version == (2, 0) for value in values: if isinstance(value, int): data = numpy.array([value * 13], dtype=numpy.int32) else: data = numpy.array([hash(value)], dtype=numpy.int64) f.write_chunk(name=str(value), data=data) f.end_frame() check_v1_file_read(f) # test opening again with the C implemantation with gsd.fl.open( name=tmp_path / 'test_gsd_v1.gsd', mode=open_mode.read, application='test_gsd_v1', schema='none', schema_version=[1, 2], ) as f: assert f.gsd_version == (2, 0) check_v1_file_read(f) # and the pure Python implementation with gsd.pygsd.GSDFile( file=open(str(tmp_path / 'test_gsd_v1.gsd'), mode='rb') ) as f: assert f.gsd_version == (2, 0) check_v1_file_read(f) def test_zero_size(tmp_path, open_mode): """Test that zero-size data chunks are allowed.""" data = numpy.array([], dtype=numpy.float32) with gsd.fl.open( name=tmp_path / 'test_zero.gsd', mode=open_mode.write, application='test_zero', schema='none', schema_version=[1, 2], ) as f: f.write_chunk(name='data', data=data) f.end_frame() with gsd.fl.open( name=tmp_path / 'test_zero.gsd', mode=open_mode.read, application='test_zero', schema='none', schema_version=[1, 2], ) as f: assert f.nframes == 1 data_read = f.read_chunk(frame=0, name='data') assert data_read.shape == (0,) assert data_read.dtype == numpy.float32 # test again with pygsd with gsd.pygsd.GSDFile(file=open(str(tmp_path / 'test_zero.gsd'), mode='rb')) as f: assert f.nframes == 1 data_read = f.read_chunk(frame=0, name='data') assert data_read.shape == (0,) assert data_read.dtype == numpy.float32 @pytest.mark.skipif( sys.version_info < (3, 7), reason='Python 3.6 fails to handle non-ascii characters.' ) def test_utf8(tmp_path): """Test that the API handles UTF-8 encoding for the filename.""" data = numpy.array([1, 2, 3, 4, 5, 10012], dtype=numpy.int64) fname = '中文.gsd' with gsd.fl.open( name=tmp_path / fname, mode='x', application='test_open', schema='none', schema_version=[1, 2], ) as f: f.write_chunk(name='chunk1', data=data) f.end_frame() dir_list = os.listdir(tmp_path) print(dir_list) assert fname in dir_list with gsd.fl.open( name=tmp_path / fname, mode='w', application='test_open', schema='none', schema_version=[1, 2], ) as f: f.write_chunk(name='chunk1', data=data) f.end_frame() with gsd.fl.open( name=tmp_path / fname, mode='r', application='test_open', schema='none', schema_version=[1, 2], ) as f: f.read_chunk(0, name='chunk1') @pytest.mark.parametrize('mode', ['w', 'x', 'a', 'r+']) def test_read_write(tmp_path, mode): """Test that data chunks can read from files opened in all write modes.""" if mode[0] == 'r' or mode[0] == 'a': with gsd.fl.open( name=tmp_path / 'test_read_write.gsd', mode='w', application='test_read_write', schema='none', schema_version=[1, 2], ): pass data = numpy.array([10], dtype=numpy.int64) nframes = 1024 with gsd.fl.open( name=tmp_path / 'test_read_write.gsd', mode=mode, application='test_read_write', schema='none', schema_version=[1, 2], ) as f: assert f.mode == mode for i in range(nframes): data[0] = i f.write_chunk(name='data1', data=data) data[0] = i * 10 f.write_chunk(name='data10', data=data) f.end_frame() for i in range(nframes): data1 = f.read_chunk(frame=i, name='data1') data10 = f.read_chunk(frame=i, name='data10') assert data1[0] == i assert data10[0] == i * 10 @pytest.mark.parametrize('n_flush', [0, 1, 2]) def test_flush(tmp_path, open_mode, n_flush): """Test flush.""" data = numpy.array([1, 2, 3, 4, 5, 10012], dtype=numpy.int64) with gsd.fl.open( name=tmp_path / 'test_flush.gsd', mode=open_mode.write, application='test_flush', schema='none', schema_version=[1, 2], ) as f: f.write_chunk(name='chunk1', data=data) f.end_frame() f.write_chunk(name='chunk2', data=data) f.end_frame() f.write_chunk(name='chunk3', data=data) # Ensure that the data is buffered by opening the file with a 2nd # handle read-only and checking it. with gsd.fl.open(name=tmp_path / 'test_flush.gsd', mode='r') as f_readonly: assert not f_readonly.chunk_exists(frame=0, name='chunk1') assert not f_readonly.chunk_exists(frame=1, name='chunk2') assert f_readonly.nframes == 0 # 0 calls to flush tests the implicit flush on close, 2 calls to flush # tests that repeated calls are handled properly. for _i in range(n_flush): f.flush() with gsd.fl.open(name=tmp_path / 'test_flush.gsd', mode=open_mode.read) as f: assert f.chunk_exists(frame=0, name='chunk1') assert f.chunk_exists(frame=1, name='chunk2') # The third chunk is not written because end_frame is not called a # third time. assert not f.chunk_exists(frame=2, name='chunk3') assert f.nframes == 2 def test_maximum_write_buffer_size(tmp_path, open_mode): """Test maximum_write_buffer_size.""" with gsd.fl.open( name=tmp_path / 'test_maximum_write_buffer_size.gsd', mode=open_mode.write, application='test_maximum_write_buffer_size', schema='none', schema_version=[1, 2], ) as f: assert f.maximum_write_buffer_size > 0 f.maximum_write_buffer_size = 1024 assert f.maximum_write_buffer_size == 1024 with pytest.raises(RuntimeError): f.maximum_write_buffer_size = 0 def test_index_entries_to_buffer(tmp_path, open_mode): """Test index_entries_to_buffer.""" with gsd.fl.open( name=tmp_path / 'test_index_entries_to_buffer.gsd', mode=open_mode.write, application='test_index_entries_to_buffer', schema='none', schema_version=[1, 2], ) as f: assert f.index_entries_to_buffer > 0 f.index_entries_to_buffer = 1024 assert f.index_entries_to_buffer == 1024 with pytest.raises(RuntimeError): f.index_entries_to_buffer = 0 def test_file_exists_error(): """Test that IO errors throw the correct Python Excetion.""" with pytest.raises(FileExistsError): with gsd.fl.open( name=test_path / 'test_gsd_v1.gsd', mode='x', application='test_gsd_v1', schema='none', schema_version=[1, 2], ): pass def test_pending_index_entries(tmp_path): """Ensure that gsd preserves pending index entries.""" with gsd.fl.open( tmp_path / 'test_pending_index_entries.gsd', 'w', application='My application', schema='My Schema', schema_version=[1, 0], ) as f: # Frame 0 must be complete to trigger the bug. f.write_chunk(name='0', data=numpy.array([0])) f.end_frame() for i in range(16): f.write_chunk(name=str(i), data=numpy.array([i])) # Flush with pending chunks in the frame index. f.flush() f.end_frame() # All test chunks should be present in the file. for i in range(16): assert f.chunk_exists(name=str(i), frame=1) gsd-3.3.0/gsd/test/test_gsd_v1.gsd000066400000000000000000002153541462564674300170020ustar00rootroot00000000000000eeeetest_gsd_v1none111 11111 1$1 (1 ,1 01 41 81<1@1D1H1L1P1T1X1\1`1d1h1l1p1t1x1|11 1!1"1#1$1%1&1'1(1)1*1+1,1-1.1/101112131415161718191:1;1<1=1>1?2@2A2B 2C2D2E2F2G 2H$2I(2J,2K02L42M82N<2O@2PD2QH2RL2SP2TT2UX2V\2W`2Xd2Yh2Zl2[p2\t2]x2^|2_2`2a2b2c2d2e2f2g2h2i2j2k2l2m2n2o2p2q2r2s2t2u2v2w2x2y2z2{2|2}2~74428911868115391952700839584110486392775569991186212598167947312316113122105535412047171123851248621463410180641072496825132281668275102317111144389723210461117371215232078423027100109974088769341106121581010811645875611459263585126297910311119573665465133974960504390214lt7Dp3&Y g?2UcAL^N!@o8hEZ*.<'H+"_xb%|IkRQfy; MVu[} /y111 11111 1$1 (1 ,1 01 41 81<1@1D1H1L1P1T1X1\1`1d1h1l1p1t1x1|11 1!1"1#1$1%1&1'1(1)1*1+1,1-1.1/101112131415161718191:1;1<1=1>1?2@2A2B 2C2D2E2F2G 2H$2I(2J,2K02L42M82N<2O@2PD2QH2RL2SP2TT2UX2V\2W`2Xd2Yh2Zl2[p2\t2]x2^|2_2`2a2b2c2d2e2f2g2h2i2j2k2l2m2n2o2p2q2r2s2t2u2v2w2x2y2z2{2|2}2~2l3fSOS  S8S?S SS S`$S~(S7,S#0S4Se8SMTaT'T*T}T%TtT&T9T"TTdTATyT)+hUp NYR?;/MglQ&Ax_HfIt[uL 3^bZ!o.%D" <8@47'*|cVE2k}H2c111 11111 1$1 (1 ,1 01 41 81<1@1D1H1L1P1T1X1\1`1d1h1l1p1t1x1|11 1!1"1#1$1%1&1'1(1)1*1+1,1-1.1/101112131415161718191:1;1<1=1>1?2@2A2B 2C2D2E2F2G 2H$2I(2J,2K02L42M82N<2O@2PD2QH2RL2SP2TT2UX2V\2W`2Xd2Yh2Zl2[p2\t2]x2^|2_2`2a2b2c2d2e2f2g2h2i2j2k2l2m2n2o2p2q2r2s2t2u2v2w2x2y2z2{2|2}2~2l3fSOS  S8S?S SS S`$S~(S7,S#0S4Se8SMTaT'T*T}T%TtT&T9T"TTdTATyT)TIT"U'8# 7obz  $`(H,d0S4k8<@$DHmLPT_X1\5`Md|h ljppt2x|l>] V[Ag{TZfĕaȕK̕LЕ/ԕtؕܕ?.@  NXG 0( 6&) e$q(4,P0B4Q8^<@sD,HCL*PcTxX\`\dYhulEptOx|-w!9=yWr:v%RhiFĖȖ}̖JЖ<Ԗ~ؖܖnDU;+3 2 K 7; '$H(X,04 8Z<@!DHLLnPhTTXc\`3dDhmlSp[t0x||g{"5pq,s ^]kN%>ėȗE̗yЗ`ԗIؗ\ܗ:al8MrA P}=x 4$_(z,0U4F8~<u@D*H<LoP@TXj\t`&dvhJl+pYt#xe|/RGCwi)6bW19?.Ę(Ș$̘dИ ԘQؘ-ܘ BfVOhU? kfpDtlo/Q !y*b&3 _|V.7N8@%MLI[x'+g^uYEZ}4"R;<A?7!t3c;R_I&<N /2o LM%bf*'}H4|yhYZ[g@ l.QVAxU"Du8pEk^+b'111 11111 1$1 (1 ,1 01 41 81<1@1D1H1L1P1T1X1\1`1d1h1l1p1t1x1|11 1!1"1#1$1%1&1'1(1)1*1+1,1-1.1/101112131415161718191:1;1<1=1>1?2@2A2B 2C2D2E2F2G 2H$2I(2J,2K02L42M82N<2O@2PD2QH2RL2SP2TT2UX2V\2W`2Xd2Yh2Zl2[p2\t2]x2^|2_2`2a2b2c2d2e2f2g2h2i2j2k2l2m2n2o2p2q2r2s2t2u2v2w2x2y2z2{2|2}2~2l3fSOS  S8S?S SS S`$S~(S7,S#0S4Se8SMTaT'T*T}T%TtT&T9T"TTdTATyT)TIT"U'8# 7obz  $`(H,d0S4k8<@$DHmLPT_X1\5`Md|h ljppt2x|l>] V[Ag{TZfĕaȕK̕LЕ/ԕtؕܕ?.@  NXG 0( 6&) e$q(4,P0B4Q8^<@sD,HCL*PcTxX\`\dYhulEptOx|-w!9=yWr:v%RhiFĖȖ}̖JЖ<Ԗ~ؖܖnDU;+3 2 K 7; '$H(X,04 8Z<@!DHLLnPhTTXc\`3dDhmlSp[t0x||g{"5pq,s ^]kN%>ėȗE̗yЗ`ԗIؗ\ܗ:al8MrA P}=x 4$_(z,0U4F8~<u@D*H<LoP@TXj\t`&dvhJl+pYt#xe|/RGCwi)6bW19?.Ę(Ș$̘dИ ԘQؘ-ܘ BfVOf]E/) hR ?$G(,0Y4K8$<q@D-HL PTXW\`\d7hl0pstx>| oU 5nA1% 3@k{lP|Ovp}`+QHIJb S& w8._" u$r(y,(0^4g8<z@BDcHeL6P!TDXF\N`Zd:hlxpLt[xm|#<*2tj9',XVd T4Ca=;~MiR"D&txY^3NM?*7o;g.fy/+ AHuh2}%l I8<[U!VQEcLk_ 4@|Zpgsd-3.3.0/gsd/test/test_hoomd.py000066400000000000000000001131551462564674300165740ustar00rootroot00000000000000# Copyright (c) 2016-2024 The Regents of the University of Michigan # Part of GSD, released under the BSD 2-Clause License. """Test the gsd.hoomd API.""" import pickle import numpy import pytest import gsd.fl import gsd.hoomd def test_create(tmp_path): """Test that gsd files can be created.""" with gsd.hoomd.open(name=tmp_path / 'test_create.gsd', mode='w') as hf: assert hf.file.schema == 'hoomd' assert hf.file.schema_version >= (1, 0) def test_append(tmp_path, open_mode): """Test that gsd files can be appended to.""" frame = gsd.hoomd.Frame() frame.particles.N = 10 with gsd.hoomd.open(name=tmp_path / 'test_append.gsd', mode=open_mode.write) as hf: for i in range(5): frame.configuration.step = i + 1 hf.append(frame) with gsd.hoomd.open(name=tmp_path / 'test_append.gsd', mode=open_mode.read) as hf: assert len(hf) == 5 def test_flush(tmp_path, open_mode): """Test that HOOMTrajectory objects flush buffered writes.""" frame = gsd.hoomd.Frame() frame.particles.N = 10 hf = gsd.hoomd.open(name=tmp_path / 'test_append.gsd', mode=open_mode.write) for i in range(5): frame.configuration.step = i + 1 hf.append(frame) hf.flush() with gsd.hoomd.open(name=tmp_path / 'test_append.gsd', mode=open_mode.read) as hf: assert len(hf) == 5 def create_frame(i): """Helper function to create frame objects.""" frame = gsd.hoomd.Frame() frame.configuration.step = i + 1 return frame def test_extend(tmp_path, open_mode): """Test that the extend method works.""" frame = gsd.hoomd.Frame() frame.particles.N = 10 with gsd.hoomd.open(name=tmp_path / 'test_extend.gsd', mode=open_mode.write) as hf: hf.extend(create_frame(i) for i in range(5)) with gsd.hoomd.open(name=tmp_path / 'test_extend.gsd', mode=open_mode.read) as hf: assert len(hf) == 5 def test_defaults(tmp_path, open_mode): """Test that the property defaults are properly set.""" frame = gsd.hoomd.Frame() frame.particles.N = 2 frame.bonds.N = 3 frame.angles.N = 4 frame.dihedrals.N = 5 frame.impropers.N = 6 frame.constraints.N = 4 frame.pairs.N = 7 with gsd.hoomd.open( name=tmp_path / 'test_defaults.gsd', mode=open_mode.write ) as hf: hf.append(frame) with gsd.hoomd.open(name=tmp_path / 'test_defaults.gsd', mode=open_mode.read) as hf: s = hf[0] assert s.configuration.step == 0 assert s.configuration.dimensions == 3 numpy.testing.assert_array_equal( s.configuration.box, numpy.array([1, 1, 1, 0, 0, 0], dtype=numpy.float32) ) assert s.particles.N == 2 assert s.particles.types == ['A'] assert s.particles.type_shapes == [{}] numpy.testing.assert_array_equal( s.particles.typeid, numpy.array([0, 0], dtype=numpy.uint32) ) numpy.testing.assert_array_equal( s.particles.mass, numpy.array([1, 1], dtype=numpy.float32) ) numpy.testing.assert_array_equal( s.particles.diameter, numpy.array([1, 1], dtype=numpy.float32) ) numpy.testing.assert_array_equal( s.particles.body, numpy.array([-1, -1], dtype=numpy.int32) ) numpy.testing.assert_array_equal( s.particles.charge, numpy.array([0, 0], dtype=numpy.float32) ) numpy.testing.assert_array_equal( s.particles.moment_inertia, numpy.array([[0, 0, 0], [0, 0, 0]], dtype=numpy.float32), ) numpy.testing.assert_array_equal( s.particles.position, numpy.array([[0, 0, 0], [0, 0, 0]], dtype=numpy.float32), ) numpy.testing.assert_array_equal( s.particles.orientation, numpy.array([[1, 0, 0, 0], [1, 0, 0, 0]], dtype=numpy.float32), ) numpy.testing.assert_array_equal( s.particles.velocity, numpy.array([[0, 0, 0], [0, 0, 0]], dtype=numpy.float32), ) numpy.testing.assert_array_equal( s.particles.angmom, numpy.array([[0, 0, 0, 0], [0, 0, 0, 0]], dtype=numpy.float32), ) numpy.testing.assert_array_equal( s.particles.image, numpy.array([[0, 0, 0], [0, 0, 0]], dtype=numpy.int32) ) assert s.bonds.N == 3 assert s.bonds.types == [] numpy.testing.assert_array_equal( s.bonds.typeid, numpy.array([0, 0, 0], dtype=numpy.uint32) ) numpy.testing.assert_array_equal( s.bonds.group, numpy.array([[0, 0], [0, 0], [0, 0]], dtype=numpy.uint32) ) assert s.angles.N == 4 assert s.angles.types == [] numpy.testing.assert_array_equal( s.angles.typeid, numpy.array([0, 0, 0, 0], dtype=numpy.uint32) ) numpy.testing.assert_array_equal( s.angles.group, numpy.array( [[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]], dtype=numpy.uint32 ), ) assert s.dihedrals.N == 5 assert s.dihedrals.types == [] numpy.testing.assert_array_equal( s.dihedrals.typeid, numpy.array([0, 0, 0, 0, 0], dtype=numpy.uint32) ) numpy.testing.assert_array_equal( s.dihedrals.group, numpy.array( [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], dtype=numpy.uint32, ), ) assert s.impropers.N == 6 assert s.impropers.types == [] numpy.testing.assert_array_equal( s.impropers.typeid, numpy.array([0, 0, 0, 0, 0, 0], dtype=numpy.uint32) ) numpy.testing.assert_array_equal( s.impropers.group, numpy.array( [ [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], ], dtype=numpy.uint32, ), ) assert s.constraints.N == 4 numpy.testing.assert_array_equal( s.constraints.value, numpy.array([0, 0, 0, 0], dtype=numpy.float32) ) numpy.testing.assert_array_equal( s.constraints.group, numpy.array([[0, 0], [0, 0], [0, 0], [0, 0]], dtype=numpy.uint32), ) assert s.pairs.N == 7 assert s.pairs.types == [] numpy.testing.assert_array_equal( s.pairs.typeid, numpy.array([0] * 7, dtype=numpy.uint32) ) numpy.testing.assert_array_equal( s.pairs.group, numpy.array([[0, 0]] * 7, dtype=numpy.uint32) ) assert len(s.state) == 0 def make_nondefault_frame(): """Make a frame with all non-default values.""" frame0 = gsd.hoomd.Frame() frame0.configuration.step = 10000 frame0.configuration.dimensions = 2 frame0.configuration.box = [4, 5, 6, 1.0, 0.5, 0.25] frame0.particles.N = 2 frame0.particles.types = ['A', 'B', 'C'] frame0.particles.type_shapes = [ {'type': 'Sphere', 'diameter': 2.0}, {'type': 'Sphere', 'diameter': 3.0}, {'type': 'Sphere', 'diameter': 4.0}, ] frame0.particles.typeid = [1, 2] frame0.particles.mass = [2, 3] frame0.particles.diameter = [3, 4] frame0.particles.body = [10, 20] frame0.particles.charge = [0.5, 0.25] frame0.particles.moment_inertia = [[1, 2, 3], [3, 2, 1]] frame0.particles.position = [[0.1, 0.2, 0.3], [-1.0, -2.0, -3.0]] frame0.particles.orientation = [[1, 0.1, 0.2, 0.3], [0, -1.0, -2.0, -3.0]] frame0.particles.velocity = [[1.1, 2.2, 3.3], [-3.3, -2.2, -1.1]] frame0.particles.angmom = [[1, 1.1, 2.2, 3.3], [-1, -3.3, -2.2, -1.1]] frame0.particles.image = [[10, 20, 30], [5, 6, 7]] frame0.bonds.N = 1 frame0.bonds.types = ['bondA', 'bondB'] frame0.bonds.typeid = [1] frame0.bonds.group = [[0, 1]] frame0.angles.N = 1 frame0.angles.typeid = [2] frame0.angles.types = ['angleA', 'angleB'] frame0.angles.group = [[0, 1, 0]] frame0.dihedrals.N = 1 frame0.dihedrals.typeid = [3] frame0.dihedrals.types = ['dihedralA', 'dihedralB'] frame0.dihedrals.group = [[0, 1, 1, 0]] frame0.impropers.N = 1 frame0.impropers.typeid = [4] frame0.impropers.types = ['improperA', 'improperB'] frame0.impropers.group = [[1, 0, 0, 1]] frame0.constraints.N = 1 frame0.constraints.value = [1.1] frame0.constraints.group = [[0, 1]] frame0.pairs.N = 1 frame0.pairs.types = ['pairA', 'pairB'] frame0.pairs.typeid = [1] frame0.pairs.group = [[0, 3]] frame0.log['value'] = [1, 2, 4, 10, 12, 18, 22] return frame0 def assert_frames_equal(s, frame0, check_position=True, check_step=True): """Assert that two frames are equal.""" if check_step: assert s.configuration.step == frame0.configuration.step assert s.configuration.dimensions == frame0.configuration.dimensions numpy.testing.assert_array_equal(s.configuration.box, frame0.configuration.box) assert s.particles.N == frame0.particles.N assert s.particles.types == frame0.particles.types assert s.particles.type_shapes == frame0.particles.type_shapes numpy.testing.assert_array_equal(s.particles.typeid, frame0.particles.typeid) numpy.testing.assert_array_equal(s.particles.mass, frame0.particles.mass) numpy.testing.assert_array_equal(s.particles.diameter, frame0.particles.diameter) numpy.testing.assert_array_equal(s.particles.body, frame0.particles.body) numpy.testing.assert_array_equal(s.particles.charge, frame0.particles.charge) numpy.testing.assert_array_equal( s.particles.moment_inertia, frame0.particles.moment_inertia ) if check_position: numpy.testing.assert_array_equal( s.particles.position, frame0.particles.position ) numpy.testing.assert_array_equal( s.particles.orientation, frame0.particles.orientation ) numpy.testing.assert_array_equal(s.particles.velocity, frame0.particles.velocity) numpy.testing.assert_array_equal(s.particles.angmom, frame0.particles.angmom) numpy.testing.assert_array_equal(s.particles.image, frame0.particles.image) assert s.bonds.N == frame0.bonds.N assert s.bonds.types == frame0.bonds.types numpy.testing.assert_array_equal(s.bonds.typeid, frame0.bonds.typeid) numpy.testing.assert_array_equal(s.bonds.group, frame0.bonds.group) assert s.angles.N == frame0.angles.N assert s.angles.types == frame0.angles.types numpy.testing.assert_array_equal(s.angles.typeid, frame0.angles.typeid) numpy.testing.assert_array_equal(s.angles.group, frame0.angles.group) assert s.dihedrals.N == frame0.dihedrals.N assert s.dihedrals.types == frame0.dihedrals.types numpy.testing.assert_array_equal(s.dihedrals.typeid, frame0.dihedrals.typeid) numpy.testing.assert_array_equal(s.dihedrals.group, frame0.dihedrals.group) assert s.impropers.N == frame0.impropers.N assert s.impropers.types == frame0.impropers.types numpy.testing.assert_array_equal(s.impropers.typeid, frame0.impropers.typeid) numpy.testing.assert_array_equal(s.impropers.group, frame0.impropers.group) assert s.constraints.N == frame0.constraints.N numpy.testing.assert_array_equal(s.constraints.value, frame0.constraints.value) numpy.testing.assert_array_equal(s.constraints.group, frame0.constraints.group) assert s.pairs.N == frame0.pairs.N assert s.pairs.types == frame0.pairs.types numpy.testing.assert_array_equal(s.pairs.typeid, frame0.pairs.typeid) numpy.testing.assert_array_equal(s.pairs.group, frame0.pairs.group) def test_fallback(tmp_path, open_mode): """Test that properties fall back to defaults when the N changes.""" frame0 = make_nondefault_frame() frame1 = gsd.hoomd.Frame() frame1.particles.N = 2 frame1.particles.position = [[-2, -1, 0], [1, 3.0, 0.5]] frame1.bonds.N = None frame1.angles.N = None frame1.dihedrals.N = None frame1.impropers.N = None frame1.constraints.N = None frame1.pairs.N = None frame2 = gsd.hoomd.Frame() frame2.particles.N = 3 frame2.particles.types = ['q', 's'] frame2.particles.type_shapes = [ {}, {'type': 'Ellipsoid', 'a': 7.0, 'b': 5.0, 'c': 3.0}, ] frame2.bonds.N = 3 frame2.angles.N = 4 frame2.dihedrals.N = 5 frame2.impropers.N = 6 frame2.constraints.N = 4 frame2.pairs.N = 7 with gsd.hoomd.open( name=tmp_path / 'test_fallback.gsd', mode=open_mode.write ) as hf: hf.extend([frame0, frame1, frame2]) with gsd.hoomd.open(name=tmp_path / 'test_fallback.gsd', mode=open_mode.read) as hf: assert len(hf) == 3 s = hf[0] assert_frames_equal(s, frame0) assert 'value' in s.log numpy.testing.assert_array_equal(s.log['value'], frame0.log['value']) # test that everything but position remained the same in frame 1 s = hf[1] assert_frames_equal(s, frame0, check_position=False) assert 'value' in s.log numpy.testing.assert_array_equal(s.log['value'], frame0.log['value']) # check that the third frame goes back to defaults because it has a # different N s = hf[2] assert s.particles.N == 3 assert s.particles.types == ['q', 's'] assert s.particles.type_shapes == frame2.particles.type_shapes numpy.testing.assert_array_equal( s.particles.typeid, numpy.array([0, 0, 0], dtype=numpy.uint32) ) numpy.testing.assert_array_equal( s.particles.mass, numpy.array([1, 1, 1], dtype=numpy.float32) ) numpy.testing.assert_array_equal( s.particles.diameter, numpy.array([1, 1, 1], dtype=numpy.float32) ) numpy.testing.assert_array_equal( s.particles.body, numpy.array([-1, -1, -1], dtype=numpy.float32) ) numpy.testing.assert_array_equal( s.particles.charge, numpy.array([0, 0, 0], dtype=numpy.float32) ) numpy.testing.assert_array_equal( s.particles.moment_inertia, numpy.array([[0, 0, 0], [0, 0, 0], [0, 0, 0]], dtype=numpy.float32), ) numpy.testing.assert_array_equal( s.particles.position, numpy.array([[0, 0, 0], [0, 0, 0], [0, 0, 0]], dtype=numpy.float32), ) numpy.testing.assert_array_equal( s.particles.orientation, numpy.array( [[1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0]], dtype=numpy.float32 ), ) numpy.testing.assert_array_equal( s.particles.velocity, numpy.array([[0, 0, 0], [0, 0, 0], [0, 0, 0]], dtype=numpy.float32), ) numpy.testing.assert_array_equal( s.particles.angmom, numpy.array( [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], dtype=numpy.float32 ), ) numpy.testing.assert_array_equal( s.particles.image, numpy.array([[0, 0, 0], [0, 0, 0], [0, 0, 0]], dtype=numpy.int32), ) assert s.bonds.N == 3 assert s.bonds.types == frame0.bonds.types numpy.testing.assert_array_equal( s.bonds.typeid, numpy.array([0, 0, 0], dtype=numpy.uint32) ) numpy.testing.assert_array_equal( s.bonds.group, numpy.array([[0, 0], [0, 0], [0, 0]], dtype=numpy.uint32) ) assert s.angles.N == 4 assert s.angles.types == frame0.angles.types numpy.testing.assert_array_equal( s.angles.typeid, numpy.array([0, 0, 0, 0], dtype=numpy.uint32) ) numpy.testing.assert_array_equal( s.angles.group, numpy.array( [[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]], dtype=numpy.uint32 ), ) assert s.dihedrals.N == 5 assert s.dihedrals.types == frame0.dihedrals.types numpy.testing.assert_array_equal( s.dihedrals.typeid, numpy.array([0, 0, 0, 0, 0], dtype=numpy.uint32) ) numpy.testing.assert_array_equal( s.dihedrals.group, numpy.array( [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], dtype=numpy.uint32, ), ) assert s.impropers.N == 6 assert s.impropers.types == frame0.impropers.types numpy.testing.assert_array_equal( s.impropers.typeid, numpy.array([0, 0, 0, 0, 0, 0], dtype=numpy.uint32) ) numpy.testing.assert_array_equal( s.impropers.group, numpy.array( [ [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], ], dtype=numpy.uint32, ), ) assert s.constraints.N == 4 numpy.testing.assert_array_equal( s.constraints.value, numpy.array([0, 0, 0, 0], dtype=numpy.float32) ) numpy.testing.assert_array_equal( s.constraints.group, numpy.array([[0, 0], [0, 0], [0, 0], [0, 0]], dtype=numpy.uint32), ) assert s.pairs.N == 7 assert s.pairs.types == frame0.pairs.types numpy.testing.assert_array_equal( s.pairs.typeid, numpy.array([0] * 7, dtype=numpy.uint32) ) numpy.testing.assert_array_equal( s.pairs.group, numpy.array([[0, 0]] * 7, dtype=numpy.uint32) ) assert 'value' in s.log numpy.testing.assert_array_equal(s.log['value'], frame0.log['value']) def test_fallback_to_frame0(tmp_path, open_mode): """Test that missing entries fall back to data in frame when N matches.""" frame0 = make_nondefault_frame() frame1 = gsd.hoomd.Frame() frame1.configuration.step = 200000 frame1.particles.N = None frame1.bonds.N = None frame1.angles.N = None frame1.dihedrals.N = None frame1.impropers.N = None frame1.constraints.N = None frame1.pairs.N = None with gsd.hoomd.open( name=tmp_path / 'test_fallback2.gsd', mode=open_mode.write ) as hf: hf.extend([frame0, frame1]) with gsd.hoomd.open( name=tmp_path / 'test_fallback2.gsd', mode=open_mode.read ) as hf: assert len(hf) == 2 s = hf[1] assert s.configuration.step == frame1.configuration.step assert_frames_equal(s, frame0, check_step=False) assert 'value' in s.log numpy.testing.assert_array_equal(s.log['value'], frame0.log['value']) def test_no_fallback(tmp_path, open_mode): """Test that writes of default quantities do not fall back to frame 0.""" frame0 = make_nondefault_frame() frame1 = gsd.hoomd.Frame() frame1.configuration.step = 200000 frame1.configuration.dimensions = 3 frame1.configuration.box = [1, 1, 1, 0, 0, 0] frame1.particles.N = frame0.particles.N frame1.particles.types = ['A'] frame1.particles.typeid = [0] * frame0.particles.N frame1.particles.type_shapes = [{}] frame1.particles.mass = [1.0] * frame0.particles.N frame1.particles.charge = [0.0] * frame0.particles.N frame1.particles.diameter = [1.0] * frame0.particles.N frame1.particles.body = [-1] * frame0.particles.N frame1.particles.moment_inertia = [[0, 0, 0]] * frame0.particles.N frame1.particles.position = [[0, 0, 0]] * frame0.particles.N frame1.particles.orientation = [[1, 0, 0, 0]] * frame0.particles.N frame1.particles.velocity = [[0, 0, 0]] * frame0.particles.N frame1.particles.angmom = [[0, 0, 0, 0]] * frame0.particles.N frame1.particles.image = [[0, 0, 0]] * frame0.particles.N frame1.bonds.N = frame0.bonds.N frame1.bonds.types = ['A'] frame1.bonds.typeid = [0] * frame0.bonds.N frame1.bonds.group = [[0, 0]] * frame0.bonds.N frame1.angles.N = frame0.angles.N frame1.angles.types = ['A'] frame1.angles.typeid = [0] * frame0.angles.N frame1.angles.group = [[0, 0, 0]] * frame0.angles.N frame1.dihedrals.N = frame0.dihedrals.N frame1.dihedrals.types = ['A'] frame1.dihedrals.typeid = [0] * frame0.dihedrals.N frame1.dihedrals.group = [[0, 0, 0, 0]] * frame0.dihedrals.N frame1.impropers.N = frame0.impropers.N frame1.impropers.types = ['A'] frame1.impropers.typeid = [0] * frame0.impropers.N frame1.impropers.group = [[0, 0, 0, 0]] * frame0.impropers.N frame1.constraints.N = frame0.constraints.N frame1.constraints.value = [0] * frame0.constraints.N frame1.constraints.group = [[0, 0]] * frame0.constraints.N frame1.pairs.N = frame0.pairs.N frame1.pairs.types = ['A'] frame1.pairs.typeid = [0] * frame0.pairs.N frame1.pairs.group = [[0, 0]] * frame0.pairs.N with gsd.hoomd.open( name=tmp_path / 'test_no_fallback.gsd', mode=open_mode.write ) as hf: hf.extend([frame0, frame1]) with gsd.hoomd.open( name=tmp_path / 'test_no_fallback.gsd', mode=open_mode.read ) as hf: assert len(hf) == 2 s = hf[1] assert s.configuration.step == frame1.configuration.step assert_frames_equal(s, frame1) def test_iteration(tmp_path, open_mode): """Test the iteration protocols for hoomd trajectories.""" with gsd.hoomd.open( name=tmp_path / 'test_iteration.gsd', mode=open_mode.write ) as hf: hf.extend(create_frame(i) for i in range(20)) with gsd.hoomd.open( name=tmp_path / 'test_iteration.gsd', mode=open_mode.read ) as hf: step = hf[-1].configuration.step assert step == 20 step = hf[-2].configuration.step assert step == 19 step = hf[-3].configuration.step assert step == 18 step = hf[0].configuration.step assert step == 1 step = hf[-20].configuration.step assert step == 1 with pytest.raises(IndexError): step = hf[-21].configuration.step with pytest.raises(IndexError): step = hf[20] frames = hf[5:10] steps = [frame.configuration.step for frame in frames] assert steps == [6, 7, 8, 9, 10] frames = hf[15:50] steps = [frame.configuration.step for frame in frames] assert steps == [16, 17, 18, 19, 20] frames = hf[15:-3] steps = [frame.configuration.step for frame in frames] assert steps == [16, 17] def test_slicing_and_iteration(tmp_path, open_mode): """Test that hoomd trajectories can be sliced.""" with gsd.hoomd.open(name=tmp_path / 'test_slicing.gsd', mode=open_mode.write) as hf: hf.extend(create_frame(i) for i in range(20)) with gsd.hoomd.open(name=tmp_path / 'test_slicing.gsd', mode=open_mode.read) as hf: # Test len()-function on trajectory and sliced trajectory. assert len(hf) == 20 assert len(hf[:10]) == 10 # Test len()-function with explicit iterator. assert len(iter(hf)) == len(hf) assert len(iter(hf[:10])) == len(hf[:10]) # Test iteration with implicit iterator. # All iterations are run twice to check for issues # with iterator exhaustion. assert len(list(hf)) == len(hf) assert len(list(hf)) == len(hf) assert len(list(hf[:10])) == len(hf[:10]) assert len(list(hf[:10])) == len(hf[:10]) # Test iteration with explicit iterator. hf_iter = iter(hf) assert len(hf_iter) == len(hf) # sanity check assert len(list(hf_iter)) == len(hf) assert len(list(hf_iter)) == len(hf) # Test iteration with explicit sliced iterator. hf_iter = iter(hf[:10]) assert len(hf_iter) == 10 # sanity check assert len(list(hf_iter)) == 10 assert len(list(hf_iter)) == 10 # Test frame selection with pytest.raises(IndexError): hf[len(hf)] assert hf[0].configuration.step == hf[0].configuration.step assert hf[len(hf) - 1].configuration.step == hf[-1].configuration.step def test_view_slicing_and_iteration(tmp_path, open_mode): """Test that trajectories can be sliced.""" with gsd.hoomd.open(name=tmp_path / 'test_slicing.gsd', mode=open_mode.write) as hf: hf.extend(create_frame(i) for i in range(40)) with gsd.hoomd.open(name=tmp_path / 'test_slicing.gsd', mode=open_mode.read) as hf: view = hf[::2] # Test len()-function on trajectory and sliced view. assert len(view) == 20 assert len(view[:10]) == 10 assert len(view[::2]) == 10 # Test len()-function with explicit iterator. assert len(iter(view)) == len(view) assert len(iter(view[:10])) == len(view[:10]) # Test iteration with implicit iterator. # All iterations are run twice to check for issues # with iterator exhaustion. assert len(list(view)) == len(view) assert len(list(view)) == len(view) assert len(list(view[:10])) == len(view[:10]) assert len(list(view[:10])) == len(view[:10]) assert len(list(view[::2])) == len(view[::2]) assert len(list(view[::2])) == len(view[::2]) # Test iteration with explicit iterator. view_iter = iter(view) assert len(view_iter) == len(view) # sanity check assert len(list(view_iter)) == len(view) assert len(list(view_iter)) == len(view) # Test iteration with explicit sliced iterator. view_iter = iter(view[:10]) assert len(view_iter) == 10 # sanity check assert len(list(view_iter)) == 10 assert len(list(view_iter)) == 10 # Test frame selection with pytest.raises(IndexError): view[len(view)] assert view[0].configuration.step == view[0].configuration.step assert view[len(view) - 1].configuration.step == view[-1].configuration.step def test_truncate(tmp_path): """Test the truncate API.""" with gsd.hoomd.open(name=tmp_path / 'test_iteration.gsd', mode='w') as hf: hf.extend(create_frame(i) for i in range(20)) assert len(hf) == 20 s = hf[10] # noqa assert hf._initial_frame is not None hf.truncate() assert len(hf) == 0 assert hf._initial_frame is None def test_state(tmp_path, open_mode): """Test the state chunks.""" frame0 = gsd.hoomd.Frame() frame0.state['hpmc/sphere/radius'] = [2.0] frame0.state['hpmc/sphere/orientable'] = [1] frame1 = gsd.hoomd.Frame() frame1.state['hpmc/convex_polyhedron/N'] = [3] frame1.state['hpmc/convex_polyhedron/vertices'] = [ [-1, -1, -1], [0, 1, 1], [1, 0, 0], ] with gsd.hoomd.open(name=tmp_path / 'test_state.gsd', mode=open_mode.write) as hf: hf.extend([frame0, frame1]) with gsd.hoomd.open(name=tmp_path / 'test_state.gsd', mode=open_mode.read) as hf: assert len(hf) == 2 s = hf[0] numpy.testing.assert_array_equal( s.state['hpmc/sphere/radius'], frame0.state['hpmc/sphere/radius'] ) numpy.testing.assert_array_equal( s.state['hpmc/sphere/orientable'], frame0.state['hpmc/sphere/orientable'] ) s = hf[1] numpy.testing.assert_array_equal( s.state['hpmc/convex_polyhedron/N'], frame1.state['hpmc/convex_polyhedron/N'], ) numpy.testing.assert_array_equal( s.state['hpmc/convex_polyhedron/vertices'], frame1.state['hpmc/convex_polyhedron/vertices'], ) def test_log(tmp_path, open_mode): """Test the log chunks.""" frame0 = gsd.hoomd.Frame() frame0.log['particles/net_force'] = [[1, 2, 3], [4, 5, 6]] frame0.log['particles/pair_lj_energy'] = [0, -5, -8, -3] frame0.log['value/potential_energy'] = [10] frame0.log['value/pressure'] = [-3] frame1 = gsd.hoomd.Frame() frame1.log['particles/pair_lj_energy'] = [1, 2, -4, -10] frame1.log['value/pressure'] = [5] with gsd.hoomd.open(name=tmp_path / 'test_log.gsd', mode=open_mode.write) as hf: hf.extend([frame0, frame1]) with gsd.hoomd.open(name=tmp_path / 'test_log.gsd', mode=open_mode.read) as hf: assert len(hf) == 2 s = hf[0] numpy.testing.assert_array_equal( s.log['particles/net_force'], frame0.log['particles/net_force'] ) numpy.testing.assert_array_equal( s.log['particles/pair_lj_energy'], frame0.log['particles/pair_lj_energy'] ) numpy.testing.assert_array_equal( s.log['value/potential_energy'], frame0.log['value/potential_energy'] ) numpy.testing.assert_array_equal( s.log['value/pressure'], frame0.log['value/pressure'] ) s = hf[1] # unspecified entries pull from frame 0 numpy.testing.assert_array_equal( s.log['particles/net_force'], frame0.log['particles/net_force'] ) numpy.testing.assert_array_equal( s.log['value/potential_energy'], frame0.log['value/potential_energy'] ) # specified entries are different in frame 1 numpy.testing.assert_array_equal( s.log['particles/pair_lj_energy'], frame1.log['particles/pair_lj_energy'] ) numpy.testing.assert_array_equal( s.log['value/pressure'], frame1.log['value/pressure'] ) def test_pickle(tmp_path): """Test that hoomd trajectory objects can be pickled.""" with gsd.hoomd.open(name=tmp_path / 'test_pickling.gsd', mode='w') as traj: traj.extend(create_frame(i) for i in range(20)) with pytest.raises(pickle.PickleError): pkl = pickle.dumps(traj) with gsd.hoomd.open(name=tmp_path / 'test_pickling.gsd', mode='r') as traj: pkl = pickle.dumps(traj) with pickle.loads(pkl) as hf: assert len(hf) == 20 @pytest.mark.parametrize( 'container', ['particles', 'bonds', 'angles', 'dihedrals', 'impropers', 'pairs'] ) def test_no_duplicate_types(tmp_path, container): """Test that duplicate types raise an error.""" with gsd.hoomd.open(name=tmp_path / 'test_create.gsd', mode='w') as hf: frame = gsd.hoomd.Frame() getattr(frame, container).types = ['A', 'B', 'B', 'C'] with pytest.raises(ValueError): hf.append(frame) def test_read_log(tmp_path): """Test that data logged in gsd files are read correctly.""" frame0 = gsd.hoomd.Frame() frame0.log['particles/pair_lj_energy'] = [0, -5, -8, -3] frame0.log['particles/pair_lj_force'] = [ (0, 0, 0), (1, 1, 1), (2, 2, 2), (3, 3, 3), ] frame0.log['value/potential_energy'] = [10] frame0.log['value/pressure'] = [-3] frame1 = gsd.hoomd.Frame() frame1.configuration.step = 1 frame1.log['particles/pair_lj_energy'] = [1, 2, -4, -10] frame1.log['particles/pair_lj_force'] = [ (1, 1, 1), (2, 2, 2), (3, 3, 3), (4, 4, 4), ] frame1.log['value/pressure'] = [5] with gsd.hoomd.open(name=tmp_path / 'test_log.gsd', mode='w') as hf: hf.extend([frame0, frame1]) # Test scalar_only = False logged_data_dict = gsd.hoomd.read_log( name=tmp_path / 'test_log.gsd', scalar_only=False ) assert len(logged_data_dict) == 5 assert list(logged_data_dict.keys()) == [ 'configuration/step', 'log/particles/pair_lj_energy', 'log/particles/pair_lj_force', 'log/value/potential_energy', 'log/value/pressure', ] numpy.testing.assert_array_equal(logged_data_dict['configuration/step'], [0, 1]) numpy.testing.assert_array_equal( logged_data_dict['log/particles/pair_lj_energy'], [ frame0.log['particles/pair_lj_energy'], frame1.log['particles/pair_lj_energy'], ], ) numpy.testing.assert_array_equal( logged_data_dict['log/particles/pair_lj_force'], [frame0.log['particles/pair_lj_force'], frame1.log['particles/pair_lj_force']], ) numpy.testing.assert_array_equal( logged_data_dict['log/value/potential_energy'], [*frame0.log['value/potential_energy'], *frame0.log['value/potential_energy']], ) numpy.testing.assert_array_equal( logged_data_dict['log/value/pressure'], [*frame0.log['value/pressure'], *frame1.log['value/pressure']], ) # Test scalar_only = True logged_data_dict = gsd.hoomd.read_log( name=tmp_path / 'test_log.gsd', scalar_only=True ) assert len(logged_data_dict) == 3 assert list(logged_data_dict.keys()) == [ 'configuration/step', 'log/value/potential_energy', 'log/value/pressure', ] numpy.testing.assert_array_equal(logged_data_dict['configuration/step'], [0, 1]) numpy.testing.assert_array_equal( logged_data_dict['log/value/potential_energy'], [*frame0.log['value/potential_energy'], *frame0.log['value/potential_energy']], ) numpy.testing.assert_array_equal( logged_data_dict['log/value/pressure'], [*frame0.log['value/pressure'], *frame1.log['value/pressure']], ) def test_read_log_warning(tmp_path): """Test that read_log issues a warning.""" frame = gsd.hoomd.Frame() with gsd.hoomd.open(name=tmp_path / 'test_log.gsd', mode='w') as hf: hf.extend([frame]) with pytest.warns(RuntimeWarning): log = gsd.hoomd.read_log(tmp_path / 'test_log.gsd') assert list(log.keys()) == ['configuration/step'] def test_initial_frame_copy(tmp_path, open_mode): """Ensure that the user does not unintentionally modify _initial_frame.""" with gsd.hoomd.open( name=tmp_path / 'test_initial_frame_copy.gsd', mode=open_mode.write ) as hf: frame = make_nondefault_frame() hf.append(frame) frame.configuration.step *= 2 del frame.log['value'] hf.append(frame) with gsd.hoomd.open( name=tmp_path / 'test_initial_frame_copy.gsd', mode=open_mode.read ) as hf: assert len(hf) == 2 # Verify that the user does not get a reference to the initial frame cache. frame_0 = hf[0] initial = hf._initial_frame assert frame_0 is not initial # Verify that no mutable objects from the initial frame cache are presented to # the user. frame_1 = hf[1] assert frame_1.configuration.box is not initial.configuration.box assert frame_1.particles.types is not initial.particles.types assert frame_1.particles.type_shapes is not initial.particles.type_shapes assert frame_1.particles.position is initial.particles.position assert not frame_1.particles.position.flags.writeable assert frame_1.particles.typeid is initial.particles.typeid assert not frame_1.particles.typeid.flags.writeable assert frame_1.particles.mass is initial.particles.mass assert not frame_1.particles.mass.flags.writeable assert frame_1.particles.diameter is initial.particles.diameter assert not frame_1.particles.diameter.flags.writeable assert frame_1.particles.body is initial.particles.body assert not frame_1.particles.body.flags.writeable assert frame_1.particles.charge is initial.particles.charge assert not frame_1.particles.charge.flags.writeable assert frame_1.particles.moment_inertia is initial.particles.moment_inertia assert not frame_1.particles.moment_inertia.flags.writeable assert frame_1.particles.orientation is initial.particles.orientation assert not frame_1.particles.orientation.flags.writeable assert frame_1.particles.velocity is initial.particles.velocity assert not frame_1.particles.velocity.flags.writeable assert frame_1.particles.angmom is initial.particles.angmom assert not frame_1.particles.angmom.flags.writeable assert frame_1.particles.image is initial.particles.image assert not frame_1.particles.image.flags.writeable assert frame_1.bonds.types is not initial.bonds.types assert frame_1.bonds.typeid is initial.bonds.typeid assert frame_1.bonds.group is initial.bonds.group assert not frame_1.bonds.typeid.flags.writeable assert not frame_1.bonds.group.flags.writeable assert frame_1.angles.types is not initial.angles.types assert frame_1.angles.typeid is initial.angles.typeid assert frame_1.angles.group is initial.angles.group assert not frame_1.angles.typeid.flags.writeable assert not frame_1.angles.group.flags.writeable assert frame_1.dihedrals.types is not initial.dihedrals.types assert frame_1.dihedrals.typeid is initial.dihedrals.typeid assert frame_1.dihedrals.group is initial.dihedrals.group assert not frame_1.dihedrals.typeid.flags.writeable assert not frame_1.dihedrals.group.flags.writeable assert frame_1.impropers.types is not initial.impropers.types assert frame_1.impropers.typeid is initial.impropers.typeid assert frame_1.impropers.group is initial.impropers.group assert not frame_1.impropers.typeid.flags.writeable assert not frame_1.impropers.group.flags.writeable assert frame_1.constraints.value is initial.constraints.value assert frame_1.constraints.group is initial.constraints.group assert not frame_1.constraints.value.flags.writeable assert not frame_1.constraints.group.flags.writeable assert frame_1.pairs.types is not initial.pairs.types assert frame_1.pairs.typeid is initial.pairs.typeid assert frame_1.pairs.group is initial.pairs.group assert not frame_1.pairs.typeid.flags.writeable assert not frame_1.pairs.group.flags.writeable assert frame_1.log is not initial.log for key in frame_1.log.keys(): assert frame_1.log[key] is initial.log[key] assert not frame_1.log[key].flags.writeable gsd-3.3.0/gsd/test/test_largefile.py000066400000000000000000000023251462564674300174140ustar00rootroot00000000000000# Copyright (c) 2016-2024 The Regents of the University of Michigan # Part of GSD, released under the BSD 2-Clause License. """Test the gsd.fl API with large files.""" import gc import numpy import pytest import gsd.hoomd @pytest.mark.validate() @pytest.mark.parametrize('N', [2**27, 2**28, 2**29 + 1]) def test_large_n(tmp_path, N): """Test data chunks and files larger than 2 GB.""" gc.collect() data = numpy.linspace(0, N, num=N, endpoint=False, dtype=numpy.uint32) with gsd.fl.open( name=tmp_path / 'test_large_N.gsd', mode='x', application='test_large_N', schema='none', schema_version=[1, 2], ) as f: f.write_chunk(name='data', data=data) f.end_frame() with gsd.fl.open( name=tmp_path / 'test_large_N.gsd', mode='r', application='test_large_N', schema='none', schema_version=[1, 2], ) as f: read_data = f.read_chunk(frame=0, name='data') # compare the array with memory usage so this test can pass on CI # platforms diff = data - read_data data = None read_data = None gc.collect() diff = diff**2 assert numpy.sum(diff) == 0 gsd-3.3.0/gsd/version.py000066400000000000000000000006621462564674300151330ustar00rootroot00000000000000# Copyright (c) 2016-2024 The Regents of the University of Michigan # Part of GSD, released under the BSD 2-Clause License. """Define the current version of the gsd package. Attributes: version (str): GSD software version number. This is the version number of the software package as a whole, not the file layer version it reads/writes. """ version = '3.3.0' __all__ = [ 'version', ] gsd-3.3.0/pyproject.toml000066400000000000000000000025231462564674300152310ustar00rootroot00000000000000[project] requires-python = ">=3.6" name = "gsd" version = "3.3.0" description = "General simulation data file format." readme = "README.md" license = {text = "BSD-2-Clause"} authors = [ {name = "Joshua A. Anderson", email = "joaander@umich.edu"}, ] classifiers=[ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "Intended Audience :: Science/Research", "Operating System :: MacOS :: MacOS X", "Operating System :: POSIX", "License :: OSI Approved :: BSD License", "Topic :: Scientific/Engineering :: Physics", ] dependencies = ["numpy>=1.19.0"] [project.scripts] gsd = "gsd.__main__:main" [project.urls] Homepage = "https://gsd.readthedocs.io" Documentation = "https://gsd.readthedocs.io" Download = "https://github.com/glotzerlab/gsd/releases/download/v3.3.0/gsd-3.3.0.tar.gz" Source = "https://github.com/glotzerlab/gsd" Issues = "https://github.com/glotzerlab/gsd/issues" [tool.setuptools] packages = ["gsd", "gsd.test"] [build-system] requires = ["setuptools>=64.0.0", "wheel", "Cython", "numpy>=2.0.0rc1"] [tool.cibuildwheel] # Test the wheels. test-command = "pytest --pyargs gsd -v --log-level=DEBUG" # Build on 64-bit architectures. archs = ["auto64"] [tool.cibuildwheel.linux] # dependencies do not build for musl skip = ["pp* *musllinux*"] gsd-3.3.0/scripts/000077500000000000000000000000001462564674300140025ustar00rootroot00000000000000gsd-3.3.0/scripts/.gitignore000066400000000000000000000000151462564674300157660ustar00rootroot00000000000000*.gsd perf.* gsd-3.3.0/scripts/CMakeLists.txt000066400000000000000000000003711462564674300165430ustar00rootroot00000000000000add_executable(benchmark-write benchmark-write.cc ../gsd/gsd.c) set_property(TARGET benchmark-write PROPERTY CXX_STANDARD 11) add_executable(benchmark-read benchmark-read.cc ../gsd/gsd.c) set_property(TARGET benchmark-read PROPERTY CXX_STANDARD 11) gsd-3.3.0/scripts/benchmark-hoomd.py000077500000000000000000000156071462564674300174260ustar00rootroot00000000000000# Copyright (c) 2016-2024 The Regents of the University of Michigan # Part of GSD, released under the BSD 2-Clause License. """Benchmark GSD HOOMD file read/write.""" import math import os import random import sys import time from subprocess import PIPE, call import numpy import gsd.fl import gsd.hoomd import gsd.pygsd # import logging # logging.basicConfig(level=logging.DEBUG) def write_frame(file, frame_idx, position, orientation): """Write a frame to the file.""" frame = gsd.hoomd.Frame() frame.particles.N = position.shape[0] frame.configuration.step = frame_idx * 10 position[0][0] = frame_idx orientation[0][0] = frame_idx frame.particles.position = position frame.particles.orientation = orientation file.append(frame) def read_frame(file, frame_idx, position, orientation): """Read a frame from the file.""" frame = file[frame_idx] # noqa def write_file(file, nframes, N, position, orientation): """Write a whole file.""" steps = compute_actual_size(N, nframes) / (250 * 1024**2) step = int(nframes / steps) if step == 0: step = 1 for i in range(0, nframes): if i % step == 0: print(i, '/', nframes, file=sys.stderr, flush=True) write_frame(file, i, position, orientation) def read_sequential_file(file, nframes, nframes_read, N, position, orientation): """Read the file sequentially.""" steps = compute_actual_size(N, nframes) / (250 * 1024**2) step = int(nframes / steps) if step == 0: step = 1 for i in range(0, nframes_read): if i % step == 0: print(i, '/', nframes, file=sys.stderr, flush=True) read_frame(file, i, position, orientation) def read_random_file(file, nframes, nframes_read, N, position, orientation): """Read the file in random order.""" steps = compute_actual_size(N, nframes) / (250 * 1024**2) step = int(nframes / steps) if step == 0: step = 1 frames = list(range(0, nframes)) random.shuffle(frames) for i, f in enumerate(frames[:nframes_read]): if i % step == 0: print(i, '/', nframes, file=sys.stderr, flush=True) read_frame(file, f, position, orientation) def compute_nframes(N, size): """Compute the number of frames to write to the file.""" bytes_per_frame = (3 + 4) * 4 * N return int(math.ceil(size / bytes_per_frame)) def compute_actual_size(N, nframes): """Compute the actual size of the file.""" bytes_per_frame = (3 + 4) * 4 * N return nframes * bytes_per_frame # Run all benchmarks with the given options def run_benchmarks(N, size): """Run all the benchmarks.""" bmark_read_size = 0.25 * 1024**3 timings = {} rng = numpy.random.default_rng() position = rng.random((N, 3)).astype('float32') orientation = rng.random((N, 4)).astype('float32') nframes = compute_nframes(N, size) actual_size = compute_actual_size(N, nframes) nframes_read = int(nframes * bmark_read_size / actual_size) bmark_read_size = compute_actual_size(N, nframes_read) if nframes_read > nframes: nframes_read = nframes bmark_read_size = actual_size # first, write the file and time how long it takes print('Writing file: ', file=sys.stderr, flush=True) # if the file size is small, write it once to warm up the disk if size < 64 * 1024**3: with gsd.hoomd.open(name='test.gsd', mode='w') as hf: write_file(hf, nframes, N, position, orientation) # write it again and time this one with gsd.hoomd.open(name='test.gsd', mode='w') as hf: start = time.time() write_file(hf, nframes, N, position, orientation) # ensure that all writes to disk are completed and drop file system cache call(['sudo', '/bin/sync']) call(['sudo', '/sbin/sysctl', 'vm.drop_caches=3'], stdout=PIPE) end = time.time() timings['write'] = actual_size / 1024**2 / (end - start) # time how long it takes to open the file print('Opening file... ', file=sys.stderr, flush=True, end='') start = time.time() with gsd.hoomd.open(name='test.gsd', mode='r') as hf: end = time.time() print(end - start, 's', file=sys.stderr, flush=True) timings['open_time'] = end - start # Read the file sequentially and measure the time taken print('Sequential read file:', file=sys.stderr, flush=True) start = time.time() read_sequential_file(hf, nframes, nframes_read, N, position, orientation) end = time.time() timings['seq_read'] = bmark_read_size / 1024**2 / (end - start) # drop the file system cache call(['sudo', '/bin/sync']) call(['sudo', '/sbin/sysctl', 'vm.drop_caches=3'], stdout=PIPE) # Read the file randomly and measure the time taken print('Random read file:', file=sys.stderr, flush=True) start = time.time() read_random_file(hf, nframes, nframes_read, N, position, orientation) end = time.time() timings['random_read'] = bmark_read_size / 1024**2 / (end - start) timings['random_read_time'] = (end - start) / nframes_read / 1e-3 os.unlink('test.gsd') return timings def run_sweep(size, size_str): """Run a single sweep of benchmarks.""" # if size < 10*1024**3: if True: result = run_benchmarks(32 * 32, size) print( '{:<7} {:<6} {:<9.4g} {:<12.4g} {:<11.4g} {:<13.4g} {:<11.3g}'.format( size_str, '32^2', result['open_time'] * 1000, result['write'], result['seq_read'], result['random_read'], result['random_read_time'], ) ) sys.stdout.flush() result = run_benchmarks(128 * 128, size) print( '{:<7} {:<6} {:<9.4g} {:<12.4g} {:<11.4g} {:<13.4g} {:<11.3g}'.format( size_str, '128^2', result['open_time'] * 1000, result['write'], result['seq_read'], result['random_read'], result['random_read_time'], ) ) sys.stdout.flush() result = run_benchmarks(1024 * 1024, size) print( '{:<7} {:<6} {:<9.4g} {:<12.4g} {:<11.4g} {:<13.4g} {:<11.3g}'.format( size_str, '1024^2', result['open_time'] * 1000, result['write'], result['seq_read'], result['random_read'], result['random_read_time'], ) ) sys.stdout.flush() print( """ ======= ====== ========= ============ =========== ============= =========== Size N Open (ms) Write (MB/s) Read (MB/s) Random (MB/s) Random (ms) ======= ====== ========= ============ =========== ============= ===========""" ) run_sweep(128 * 1024**2, '128 MiB') run_sweep(1 * 1024**3, '1 GiB') # run_sweep(128*1024**3, "128 GiB"); print('======= ====== ========= ============ =========== ============= ===========') gsd-3.3.0/scripts/benchmark-read.cc000066400000000000000000000033261462564674300171600ustar00rootroot00000000000000#include #include #include #include #include #include "gsd.h" int main(int argc, char** argv) // NOLINT { const size_t n_keys = 40000; const size_t max_frames = 100; std::vector data; std::vector names; for (size_t i = 0; i < n_keys; i++) { std::ostringstream s; s << "log/hpmc/integrate/Sphere/quantity/" << i; names.push_back(s.str()); } gsd_handle handle; gsd_open(&handle, "test.gsd", GSD_OPEN_READONLY); size_t n_frames = gsd_get_nframes(&handle); size_t n_read = n_frames; if (n_read > max_frames) { n_read = max_frames; } std::cout << "Reading test.gsd with: " << n_keys << " keys and " << n_frames << " frames." << std::endl; auto t1 = std::chrono::high_resolution_clock::now(); for (size_t frame = 0; frame < n_read; frame++) { for (auto const& name : names) { const gsd_index_entry* e; e = gsd_find_chunk(&handle, frame, name.c_str()); if (data.empty()) { data.resize(e->N * e->M * gsd_sizeof_type((gsd_type)e->type)); } gsd_read_chunk(&handle, data.data(), e); } } auto t2 = std::chrono::high_resolution_clock::now(); std::chrono::duration time_span = std::chrono::duration_cast>(t2 - t1); double time_per_key = time_span.count() / double(n_keys) / double(n_read); const double us = 1e-6; std::cout << "Sequential read time: " << time_per_key / us << " microseconds/key." << std::endl; gsd_close(&handle); } gsd-3.3.0/scripts/benchmark-write.cc000066400000000000000000000051001462564674300173670ustar00rootroot00000000000000#include #include #include #include #include #ifdef _WIN32 #include #define fsync _commit #else // linux / mac #include #endif #include "gsd.h" int main(int argc, char** argv) // NOLINT { const size_t n_keys = 16; const size_t n_frames = 100; const size_t key_size = static_cast(1024) * static_cast(1024); std::vector data(key_size); for (size_t i = 0; i < key_size; i++) { data[i] = (double)i; } std::vector names; for (size_t i = 0; i < n_keys; i++) { std::ostringstream s; s << "log/hpmc/integrate/Sphere/quantity/" << i; names.push_back(s.str()); } std::cout << "Writing test.gsd with: " << n_keys << " keys, " << n_frames << " frames, " << "and " << key_size << " double(s) per key" << std::endl; gsd_handle handle; gsd_create_and_open(&handle, "test.gsd", "app", "schema", 0, GSD_OPEN_APPEND, 0); for (size_t frame = 0; frame < n_frames / 2; frame++) { for (auto const& name : names) { gsd_write_chunk(&handle, name.c_str(), GSD_TYPE_DOUBLE, key_size, 1, 0, data.data()); } gsd_end_frame(&handle); } fsync(handle.fd); auto t1 = std::chrono::high_resolution_clock::now(); for (size_t frame = 0; frame < n_frames / 2; frame++) { for (auto const& name : names) { gsd_write_chunk(&handle, name.c_str(), GSD_TYPE_DOUBLE, key_size, 1, 0, data.data()); } gsd_end_frame(&handle); } fsync(handle.fd); auto t2 = std::chrono::high_resolution_clock::now(); std::chrono::duration time_span = std::chrono::duration_cast>(t2 - t1); double time_per_key = time_span.count() / double(n_keys) / double(n_frames / double(2)); const double us = 1e-6; std::cout << "Write time: " << time_per_key / us << " microseconds/key." << std::endl; std::cout << "Write time: " << time_per_key / us * n_keys << " microseconds/frame." << std::endl; const double mb_per_second = double(key_size * 8 + static_cast(32) * static_cast(2)) / 1048576.0 / time_per_key; std::cout << "MB/s: " << mb_per_second << " MB/s." << std::endl; gsd_close(&handle); gsd_open(&handle, "test.gsd", GSD_OPEN_READONLY); std::cout << "Frames: " << gsd_get_nframes(&handle) << std::endl; gsd_close(&handle); } gsd-3.3.0/setup.py000066400000000000000000000011411462564674300140220ustar00rootroot00000000000000# Copyright (c) 2016-2024 The Regents of the University of Michigan # Part of GSD, released under the BSD 2-Clause License. """Install gsd.""" import numpy from Cython.Build import cythonize from setuptools import setup from setuptools.extension import Extension extensions = cythonize( [ Extension( 'gsd.fl', sources=['gsd/fl.pyx', 'gsd/gsd.c'], include_dirs=[numpy.get_include()], define_macros=[('NPY_NO_DEPRECATED_API', 'NPY_1_7_API_VERSION')], ) ], compiler_directives={'language_level': 3}, ) setup(ext_modules=extensions)