pax_global_header00006660000000000000000000000064147643450250014524gustar00rootroot0000000000000052 comment=166cf7b1fe12dd207a51125e6fc4e75edd7dfaf0 arrow-go-18.2.0/000077500000000000000000000000001476434502500133515ustar00rootroot00000000000000arrow-go-18.2.0/.asf.yaml000066400000000000000000000026241476434502500150700ustar00rootroot00000000000000# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # https://cwiki.apache.org/confluence/display/INFRA/Git+-+.asf.yaml+features github: description: "Official Go implementation of Apache Arrow" homepage: https://arrow.apache.org/go/ labels: - apache-arrow - go del_branch_on_merge: true enabled_merge_buttons: merge: false rebase: false squash: true features: issues: true protected_branches: main: required_linear_history: true notifications: commits: commits@arrow.apache.org issues_status: issues@arrow.apache.org issues_comment: github@arrow.apache.org pullrequests: github@arrow.apache.org publish: whoami: asf-site subdir: go arrow-go-18.2.0/.dockerallow000066400000000000000000000014401476434502500156570ustar00rootroot00000000000000# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. go.mod go.sum arrow-go-18.2.0/.editorconfig000066400000000000000000000014761476434502500160360ustar00rootroot00000000000000# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. [*.sh] indent_style = space indent_size = 2 arrow-go-18.2.0/.env000066400000000000000000000021601476434502500141410ustar00rootroot00000000000000# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # All of the following environment variables are required to set default values # for the parameters in docker-compose.yml. # different architecture notations ARCH=amd64 # Default repository to pull and push images from REPO=ghcr.io/apache/arrow-go # Default versions for platforms DEBIAN=12 # Default versions for various dependencies GO=1.22.6 arrow-go-18.2.0/.github/000077500000000000000000000000001476434502500147115ustar00rootroot00000000000000arrow-go-18.2.0/.github/CODEOWNERS000066400000000000000000000032761476434502500163140ustar00rootroot00000000000000# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # # Any committer can add themselves to any of the path patterns # and will subsequently get requested as a reviewer for any PRs # that change matching files. # # This file uses .gitignore syntax with a few exceptions see the # documentation about the syntax: https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners ## Components /arrow/ @zeroshade /internal/ @zeroshade /parquet/ @zeroshade go.mod @zeroshade go.sum @zeroshade # release scripts /ci/ @zeroshade @kou @raulcd @assignUser /dev/ @zeroshade @kou @raulcd @assignUser .env @assignUser @zeroshade @kou @raulcd compose.yaml @zeroshade @assignUser @kou @raulcd .dockerallow @zeroshade @assignUser @kou @raulcd # PR CI and repository files /.github/ @zeroshade @kou @assignUser @raulcd .asf.yaml @zeroshade @kou @assignUser @raulcd .pre-commit-config.yaml @kou @zeroshade .golangci.yaml @kou @zeroshade arrow-go-18.2.0/.github/ISSUE_TEMPLATE/000077500000000000000000000000001476434502500170745ustar00rootroot00000000000000arrow-go-18.2.0/.github/ISSUE_TEMPLATE/bug_report.yaml000066400000000000000000000026611476434502500221350ustar00rootroot00000000000000# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. name: Bug Report description: File a bug report labels: ["Type: bug"] assignees: [] body: - type: textarea id: description attributes: label: Describe the bug, including details regarding any error messages, version, and platform. description: Please include what you expected. validations: required: true - type: dropdown id: component attributes: label: Component(s) multiple: true options: - Benchmarking - Continuous Integration - Developer Tools - Documentation - Integration - Parquet - Release - Other validations: required: true arrow-go-18.2.0/.github/ISSUE_TEMPLATE/config.yaml000066400000000000000000000014561476434502500212330ustar00rootroot00000000000000# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. blank_issues_enabled: false arrow-go-18.2.0/.github/ISSUE_TEMPLATE/feature_request.yaml000066400000000000000000000027751476434502500231760ustar00rootroot00000000000000# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. name: Enhancement Request description: Request an enhancement to the project labels: ["Type: enhancement"] assignees: [] body: - type: markdown attributes: value: | Thanks for taking the time to share your feedback on ways Apache Arrow can be improved! - type: textarea id: description attributes: label: Describe the enhancement requested validations: required: true - type: dropdown id: component attributes: label: Component(s) multiple: true options: - Benchmarking - Continuous Integration - Developer Tools - Documentation - Integration - Parquet - Release - Other validations: required: true arrow-go-18.2.0/.github/ISSUE_TEMPLATE/usage_question.yaml000066400000000000000000000042541476434502500230200ustar00rootroot00000000000000# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. name: Usage Question description: Ask a question labels: ["Type: usage"] assignees: [] body: - type: markdown attributes: value: > While we enable issues as a mechanism for new contributors and passers-by who are unfamiliar with Apache Software Foundation projects to ask questions and interact with the project, we encourage users to ask such questions on public mailing lists: * Development discussions: dev@arrow.apache.org (first subscribe by sending an e-mail to dev-subscribe@arrow.apache.org). * User discussions: user@arrow.apache.org (first subscribe by sending an e-mail to user-subscribe@arrow.apache.org). * Mailing list archives: https://arrow.apache.org/community/ Do not be surprised by responses to issues raised here directing you to those mailing lists, or to report a bug or feature request here. Thank you! - type: textarea id: description attributes: label: > Describe the usage question you have. Please include as many useful details as possible. validations: required: true - type: dropdown id: component attributes: label: Component(s) multiple: true options: - Benchmarking - Continuous Integration - Developer Tools - Documentation - Integration - Parquet - Release - Other validations: required: true arrow-go-18.2.0/.github/dependabot.yml000066400000000000000000000017411476434502500175440ustar00rootroot00000000000000# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. version: 2 updates: - package-ecosystem: "github-actions" directory: "/" schedule: interval: "weekly" - package-ecosystem: "gomod" directory: "/" schedule: interval: "weekly" arrow-go-18.2.0/.github/pull_request_template.md000066400000000000000000000002241476434502500216500ustar00rootroot00000000000000### Rationale for this change ### What changes are included in this PR? ### Are these changes tested? ### Are there any user-facing changes? arrow-go-18.2.0/.github/workflows/000077500000000000000000000000001476434502500167465ustar00rootroot00000000000000arrow-go-18.2.0/.github/workflows/comment_bot.yml000066400000000000000000000027271476434502500220070ustar00rootroot00000000000000# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. name: Comment Bot on: issue_comment: types: - created - edited jobs: issue_assign: name: "Assign issue" permissions: contents: read issues: write if: github.event.comment.body == 'take' runs-on: ubuntu-latest steps: - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 with: github-token: ${{ secrets.GITHUB_TOKEN }} script: |- github.rest.issues.addAssignees({ owner: context.repo.owner, repo: context.repo.repo, issue_number: context.payload.issue.number, assignees: context.payload.comment.user.login }); arrow-go-18.2.0/.github/workflows/lint.yml000066400000000000000000000040071476434502500204400ustar00rootroot00000000000000# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. name: Lint on: push: branches: - '**' - '!dependabot/**' tags: - '*' pull_request: concurrency: group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} cancel-in-progress: true permissions: contents: read jobs: lint: name: Lint runs-on: ubuntu-latest timeout-minutes: 15 steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 - name: Setup Python uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 with: python-version: 3.12 - name: Setup Go uses: actions/setup-go@f111f3307d8850f501ac008e886eec1fd1932a34 # v5.3.0 with: go-version: '1.23' cache: true cache-dependency-path: go.sum - name: Install pre-commit run: | python -m pip install pre-commit - name: Cache pre-commit uses: actions/cache@v4 with: path: ~/.cache/pre-commit key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }} - name: Run pre-commit run: | pre-commit run --all-files --color=always --show-diff-on-failure arrow-go-18.2.0/.github/workflows/rc.yml000066400000000000000000000104421476434502500200760ustar00rootroot00000000000000# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. name: RC on: push: branches: - '**' - '!dependabot/**' tags: - '*-rc*' pull_request: concurrency: group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} cancel-in-progress: true permissions: contents: read jobs: archive: name: Archive runs-on: ubuntu-latest timeout-minutes: 5 steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: submodules: recursive - name: Prepare for tag if: github.ref_type == 'tag' run: | version=${GITHUB_REF_NAME%-rc*} version=${version#v} rc=${GITHUB_REF_NAME#*-rc} echo "VERSION=${version}" >> ${GITHUB_ENV} echo "RC=${rc}" >> ${GITHUB_ENV} - name: Prepare for branch if: github.ref_type == 'branch' run: | version=$(grep -o '^const PkgVersion = ".*"' "arrow/doc.go" | sed \ -e 's/^const PkgVersion = "//' \ -e 's/"$//') rc=$(date +%Y%m%d) echo "VERSION=${version}" >> ${GITHUB_ENV} echo "RC=${rc}" >> ${GITHUB_ENV} - name: Archive run: | id="apache-arrow-go-${VERSION}" tar_gz="${id}.tar.gz" echo "TAR_GZ=${tar_gz}" >> ${GITHUB_ENV} git archive HEAD --prefix "${id}/" --output "${tar_gz}" sha256sum "${tar_gz}" > "${tar_gz}.sha256" sha512sum "${tar_gz}" > "${tar_gz}.sha512" - name: Audit run: | dev/release/run_rat.sh "${TAR_GZ}" - uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1 with: name: archive path: | apache-arrow-go-* verify: name: Verify needs: - archive runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: - macos-latest - ubuntu-latest steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: submodules: recursive - uses: actions/download-artifact@cc203385981b70ca67e1cc392babf9cc229d5806 # v4.1.9 with: name: archive - name: Verify run: | tar_gz=$(echo apache-arrow-go-*.tar.gz) version=${tar_gz#apache-arrow-go-} version=${version%.tar.gz} # rc isn't used with VERIFY_DOWNLOAD=0 if [ "${GITHUB_REF_TYPE}" = "tag" ]; then rc="${GITHUB_REF_NAME#*-rc}" else rc=$(date +%Y%m%d) fi VERIFY_DEFAULT=0 dev/release/verify_rc.sh "${version}" "${rc}" env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} upload: name: Upload if: github.ref_type == 'tag' needs: - verify runs-on: ubuntu-latest permissions: contents: write steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: submodules: recursive - uses: actions/download-artifact@cc203385981b70ca67e1cc392babf9cc229d5806 # v4.1.9 with: name: archive - name: Upload run: | # TODO: Add support for release note gh release create ${GITHUB_REF_NAME} \ --prerelease \ --title "Apache Arrow Go ${GITHUB_REF_NAME}" \ --verify-tag \ apache-arrow-go-*.tar.gz \ apache-arrow-go-*.tar.gz.sha* env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} arrow-go-18.2.0/.github/workflows/test.yml000066400000000000000000000307111476434502500204520ustar00rootroot00000000000000# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. name: Test on: push: branches: - '**' - '!dependabot/**' tags: - '*' pull_request: concurrency: group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} cancel-in-progress: true permissions: contents: read jobs: docker: name: ${{ matrix.arch-label }} Debian 12 Go ${{ matrix.go }} runs-on: ${{ matrix.runs-on }} timeout-minutes: 30 strategy: fail-fast: false matrix: include: - arch-label: AMD64 arch: amd64 go: 1.23 runs-on: ubuntu-latest - arch-label: AMD64 arch: amd64 go: 1.24 runs-on: ubuntu-latest - arch-label: ARM64 arch: arm64v8 go: 1.23 runs-on: ubuntu-24.04-arm - arch-label: ARM64 arch: arm64v8 go: 1.24 runs-on: ubuntu-24.04-arm env: ARCH: ${{ matrix.arch }} GO: ${{ matrix.go }} steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: submodules: recursive - name: Login to GitHub Container registry uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Pull run: | docker compose pull debian || : - name: Test run: | docker compose run debian - name: Push if: >- success() && github.event_name == 'push' && github.repository == 'apache/arrow-go' && github.ref_name == 'main' continue-on-error: true run: | docker compose push debian docker-cgo: name: AMD64 Debian 12 Go ${{ matrix.go }} - CGO runs-on: ubuntu-latest timeout-minutes: 30 strategy: fail-fast: false matrix: go: - '1.23' - '1.24' env: GO: ${{ matrix.go }} steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: submodules: recursive - name: Login to GitHub Container registry uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Pull run: | docker compose pull debian || : docker compose pull debian-cgo || : - name: Build run: | docker compose build debian docker compose build debian-cgo - name: Test run: | docker compose run debian-cgo - name: Push if: >- success() && github.event_name == 'push' && github.repository == 'apache/arrow-go' && github.ref_name == 'main' continue-on-error: true run: | docker compose push debian-cgo docker-cgo-python: name: AMD64 Debian 12 Go ${{ matrix.go }} - CGO Python runs-on: ubuntu-latest timeout-minutes: 30 strategy: fail-fast: false matrix: go: - '1.23' - '1.24' env: GO: ${{ matrix.go }} steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: submodules: recursive - name: Login to GitHub Container registry uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Pull run: | docker compose pull debian || : docker compose pull debian-cgo-python || : - name: Build run: | docker compose build debian docker compose build debian-cgo-python - name: Test run: | docker compose run debian-cgo-python - name: Push if: >- success() && github.event_name == 'push' && github.repository == 'apache/arrow-go' && github.ref_name == 'main' continue-on-error: true run: | docker compose push debian-cgo-python macos: name: AMD64 macOS 14 Go ${{ matrix.go }} runs-on: macos-14 timeout-minutes: 20 strategy: fail-fast: false matrix: go: - '1.23' - '1.24' steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: submodules: recursive - name: Setup Go uses: actions/setup-go@f111f3307d8850f501ac008e886eec1fd1932a34 # v5.3.0 with: go-version: ${{ matrix.go }} cache: true cache-dependency-path: go.sum - name: Build run: | ci/scripts/build.sh $(pwd) - name: Test run: | ci/scripts/test.sh $(pwd) macos-cgo: name: AMD64 macOS 14 Go ${{ matrix.go }} - CGO runs-on: macos-14 timeout-minutes: 25 strategy: fail-fast: false matrix: go: - '1.23' - '1.24' env: ARROW_GO_TESTCGO: "1" steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: submodules: recursive - name: Setup Go uses: actions/setup-go@f111f3307d8850f501ac008e886eec1fd1932a34 # v5.3.0 with: go-version: ${{ matrix.go }} cache: true cache-dependency-path: go.sum - name: Brew Install Arrow run: brew install apache-arrow - name: Setup PKG_CONFIG_PATH run: | echo "PKG_CONFIG_PATH=$(brew --prefix openssl@3)/lib/pkgconfig:$PKG_CONFIG_PATH" >> $GITHUB_ENV - name: Build run: | ci/scripts/build.sh $(pwd) - name: Test run: | ci/scripts/test.sh $(pwd) windows: name: AMD64 Windows 2019 Go ${{ matrix.go }} runs-on: windows-2019 timeout-minutes: 20 strategy: fail-fast: false matrix: go: - '1.23' - '1.24' steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: submodules: recursive - name: Setup Go uses: actions/setup-go@f111f3307d8850f501ac008e886eec1fd1932a34 # v5.3.0 with: go-version: ${{ matrix.go }} cache: true cache-dependency-path: go.sum - name: Build shell: bash run: ci/scripts/build.sh $(pwd) - name: Test shell: bash run: ci/scripts/test.sh $(pwd) windows-mingw: name: AMD64 Windows MinGW ${{ matrix.mingw-n-bits }} CGO runs-on: windows-2019 timeout-minutes: 20 strategy: fail-fast: false matrix: mingw-n-bits: #- 32 runtime handling for CGO needs 64-bit currently - 64 env: ARROW_GO_TESTCGO: "1" MINGW_LINT: "1" steps: - name: Disable Crash Dialogs run: | reg add ` "HKCU\SOFTWARE\Microsoft\Windows\Windows Error Reporting" ` /v DontShowUI ` /t REG_DWORD ` /d 1 ` /f - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: submodules: recursive - uses: msys2/setup-msys2@v2 with: msystem: MINGW${{ matrix.mingw-n-bits }} update: true - name: Setup MSYS2 shell: msys2 {0} run: | ci/scripts/msys2_setup.sh - name: Get required Go version run: "(. .env && echo \"GO_VERSION=${GO}\") >> $GITHUB_ENV" - name: Update CGO Env vars shell: msys2 {0} run: | echo "CGO_CPPFLAGS=-I$(cygpath --windows ${MINGW_PREFIX}/include)" >> $GITHUB_ENV echo "CGO_LDFLAGS=-g -O2 -L$(cygpath --windows ${MINGW_PREFIX}/lib) -L$(cygpath --windows ${MINGW_PREFIX}/bin)" >> $GITHUB_ENV echo "MINGW_PREFIX=$(cygpath --windows ${MINGW_PREFIX})" >> $GITHUB_ENV - name: Setup Go uses: actions/setup-go@f111f3307d8850f501ac008e886eec1fd1932a34 # v5.3.0 with: go-version: "${{ env.GO_VERSION }}" cache: true cache-dependency-path: go.sum - name: Build shell: bash run: ci/scripts/build.sh $(pwd) - name: Test shell: bash run: ci/scripts/test.sh $(pwd) build-test-386: name: Cross-build and test for 386 runs-on: ubuntu-latest timeout-minutes: 20 steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: submodules: recursive - name: Get required Go version run: | (. .env && echo "GO_VERSION=${GO}") >> $GITHUB_ENV - name: Setup Go uses: actions/setup-go@f111f3307d8850f501ac008e886eec1fd1932a34 # v5.3.0 with: go-version: "${{ env.GO_VERSION }}" cache: true cache-dependency-path: go.sum - name: Build run: GOARCH=386 go build ./... - name: Test # WIP refactor, only tests in the specified dirs have been fixed run: GOARCH=386 go test ./parquet/file/... tinygo: name: TinyGo runs-on: ubuntu-latest env: TINYGO_VERSION: 0.33.0 timeout-minutes: 20 steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: submodules: recursive - name: Build and Run Example run: | docker run \ --rm \ -v $(pwd):/src \ -v $(pwd)/ci/scripts:/ci-scripts \ "tinygo/tinygo:$TINYGO_VERSION" \ /ci-scripts/tinygo_example.sh integration: name: AMD64 Conda Integration runs-on: ubuntu-latest timeout-minutes: 60 steps: - name: Checkout Arrow uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 repository: apache/arrow submodules: recursive - name: Checkout Arrow Rust uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: apache/arrow-rs path: rust - name: Checkout Arrow nanoarrow uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: apache/arrow-nanoarrow path: nanoarrow - name: Checkout Arrow Go uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: go - name: Checkout Arrow Java uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: apache/arrow-java path: java - name: Free up disk space run: | ci/scripts/util_free_space.sh - name: Cache Docker Volumes uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 with: path: .docker key: integration-conda-${{ hashFiles('cpp/**') }} restore-keys: conda- - name: Setup Python uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 with: python-version: 3.12 - name: Setup Archery run: pip install -e dev/archery[docker] - name: Execute Docker Build run: | source ci/scripts/util_enable_core_dumps.sh archery docker run \ -e ARCHERY_DEFAULT_BRANCH=${{ github.event.repository.default_branch }} \ -e ARCHERY_INTEGRATION_TARGET_IMPLEMENTATIONS=go \ -e ARCHERY_INTEGRATION_WITH_GO=1 \ -e ARCHERY_INTEGRATION_WITH_JAVA=1 \ -e ARCHERY_INTEGRATION_WITH_NANOARROW=1 \ -e ARCHERY_INTEGRATION_WITH_RUST=1 \ conda-integration arrow-go-18.2.0/.gitignore000066400000000000000000000016731476434502500153500ustar00rootroot00000000000000# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. .vscode /apache-arrow-go-*.tar.gz /apache-arrow-go-*.tar.gz.asc /apache-arrow-go.tar.gz /dev/release/apache-rat-*.jar /dev/release/filtered_rat.txt /dev/release/rat.xml arrow-go-18.2.0/.gitmodules000066400000000000000000000017421476434502500155320ustar00rootroot00000000000000# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. [submodule "arrow-testing"] path = arrow-testing url = https://github.com/apache/arrow-testing.git [submodule "parquet-testing"] path = parquet-testing url = https://github.com/apache/parquet-testing.git arrow-go-18.2.0/.golangci.yaml000066400000000000000000000020021476434502500160700ustar00rootroot00000000000000# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. linters: # Disable all linters. # Default: false disable-all: true # Enable specific linter # https://golangci-lint.run/usage/linters/#enabled-by-default enable: - gofmt - goimports - staticcheck issues: fix: true arrow-go-18.2.0/.pre-commit-config.yaml000066400000000000000000000042711476434502500176360ustar00rootroot00000000000000# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. repos: - repo: https://github.com/golangci/golangci-lint rev: v1.61.0 hooks: # no built-in support for multiple go.mod # https://github.com/golangci/golangci-lint/issues/828 - id: golangci-lint-full name: golangci-lint-full-arrow entry: bash -c 'cd arrow && golangci-lint run --timeout=5m' - id: golangci-lint-full name: golangci-lint-full-parquet entry: bash -c 'cd parquet && golangci-lint run' - id: golangci-lint-full name: golangci-lint-full-internal entry: bash -c 'cd internal && golangci-lint run' - repo: local hooks: - id: rat name: Release Audit Tool language: system entry: | bash -c " \ git archive HEAD \ --prefix=apache-arrow-go/ \ --output=apache-arrow-go.tar.gz && \ dev/release/run_rat.sh apache-arrow-go.tar.gz" always_run: true pass_filenames: false - repo: https://github.com/koalaman/shellcheck-precommit rev: v0.10.0 hooks: - id: shellcheck - repo: https://github.com/scop/pre-commit-shfmt rev: v3.9.0-1 hooks: - id: shfmt args: # The default args is "--write --simplify" but we don't use # "--simplify". Because it's conflicted will ShellCheck. - "--write" - repo: https://github.com/google/yamlfmt rev: v0.13.0 hooks: - id: yamlfmt arrow-go-18.2.0/CODE_OF_CONDUCT.md000066400000000000000000000016451476434502500161560ustar00rootroot00000000000000 # Code of Conduct * [Code of Conduct for The Apache Software Foundation][1] [1]: https://www.apache.org/foundation/policies/conduct.html arrow-go-18.2.0/CONTRIBUTING.md000066400000000000000000000055471476434502500156150ustar00rootroot00000000000000 # How to contribute to Apache Arrow Go We utilize [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) for our commit messages. This helps maintain the semantic versioning of this module. Please use the following commit types: `build`, `chore`, `ci`, `docs`, `feat`, `fix`, `perf`, `refactor`, `revert`, `style`, `test`. For PRs with changes entirely within a single sub-package, please use a scope that references that package such as `arrow/flight` or `parquet/pqarrow`. For more general PRs, a top level scope should be sufficient. For example: ``` fix(arrow/cdata): handle empty structs in C data interface ci: update CI environment feat(parquet): support new encoding type ``` ## Did you find a bug? The Arrow project uses GitHub as a bug tracker. To report a bug, sign in to your GitHub account, navigate to [GitHub issues](https://github.com/apache/arrow-go/issues) and click on **New issue** . To be assigned to an issue, add a comment "take" to that issue. Before you create a new bug entry, we recommend you first search among existing Arrow issues in [GitHub](https://github.com/apache/arrow-go/issues). ## Did you write a patch that fixes a bug or brings an improvement? If there is a corresponding issue for your patch, please make sure to [reference the issue](https://docs.github.com/en/issues/tracking-your-work-with-issues/using-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword) in your PR description. ## Do you want to propose a significant new feature or an important refactoring? We ask that all discussions about major changes in the codebase happen publicly on the [arrow-dev mailing-list](https://arrow.apache.org/community/#mailing-lists). ## Do you have questions about the source code, the build procedure or the development process? You can also ask on the mailing-list, see above. ## Further information Please read our [development documentation](https://arrow.apache.org/docs/developers/index.html) or look through the [New Contributor's Guide](https://arrow.apache.org/docs/developers/guide/index.html). arrow-go-18.2.0/LICENSE.txt000066400000000000000000000326541476434502500152060ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -------------------------------------------------------------------------------- This project includes code from the Go project, BSD 3-clause license + PATENTS weak patent termination clause (https://github.com/golang/go/blob/master/PATENTS): * arrow/flight/cookie_middleware.go Copyright (c) 2009 The Go Authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- This project includes code from the LLVM project: * arrow/compute/internal/kernels/_lib/types.h Apache License v2.0 with LLVM Exceptions. See https://llvm.org/LICENSE.txt for license information. SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -------------------------------------------------------------------------------- This project includes code from the brotli project (https://github.com/google/brotli): * parquet/compress/brotli.go Copyright: 2013 Google Inc. All Rights Reserved Distributed under MIT License. arrow-go-18.2.0/NOTICE.txt000066400000000000000000000002511476434502500150710ustar00rootroot00000000000000Apache Arrow Go Copyright 2016-2025 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). arrow-go-18.2.0/README.md000066400000000000000000000134531476434502500146360ustar00rootroot00000000000000 Apache Arrow for Go =================== [![Go Reference](https://pkg.go.dev/badge/github.com/apache/arrow-go/v18.svg)](https://pkg.go.dev/github.com/apache/arrow-go/v18) [Apache Arrow][arrow] is a cross-language development platform for in-memory data. It specifies a standardized language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware. It also provides computational libraries and zero-copy streaming messaging and inter-process communication. ### A note about FlightSQL drivers Go FlightSQL drivers live in the [ADBC repository](https://github.com/apache/arrow-adbc/tree/main/go/adbc). In particular, to use the Golang `database/sql` interface: ```golang import ( "database/sql" _ "github.com/apache/arrow-adbc/go/adbc/sqldriver/flightsql" ) func main() { dsn := "uri=grpc://localhost:12345;username=mickeymouse;password=p@55w0RD" db, err := sql.Open("flightsql", dsn) ... } ``` DSN option keys are expressed as `k=v`, delimited with `;`. Some options keys are defined in ADBC, others are defined in the FlightSQL ADBC driver. - Arrow ADBC [developer doc](https://arrow.apache.org/adbc/main/driver/flight_sql.html#client-options) - ADBC [source code](https://github.com/apache/arrow-adbc/blob/3d12fad1bae21029a8ff25604d6e65760c3f65bd/go/adbc/adbc.go#L149-L158) - FlightSQL driver option keys [source code](https://github.com/apache/arrow-adbc/blob/3d12fad1bae21029a8ff25604d6e65760c3f65bd/go/adbc/driver/flightsql/flightsql_adbc.go#L70-L81) Reference Counting ------------------ The library makes use of reference counting so that it can track when memory buffers are no longer used. This allows Arrow to update resource accounting, pool memory such and track overall memory usage as objects are created and released. Types expose two methods to deal with this pattern. The `Retain` method will increase the reference count by 1 and `Release` method will reduce the count by 1. Once the reference count of an object is zero, any associated object will be freed. `Retain` and `Release` are safe to call from multiple goroutines. ### When to call `Retain` / `Release`? * If you are passed an object and wish to take ownership of it, you must call `Retain`. You must later pair this with a call to `Release` when you no longer need the object. "Taking ownership" typically means you wish to access the object outside the scope of the current function call. * You own any object you create via functions whose name begins with `New` or `Copy` or when receiving an object over a channel. Therefore you must call `Release` once you no longer need the object. * If you send an object over a channel, you must call `Retain` before sending it as the receiver is assumed to own the object and will later call `Release` when it no longer needs the object. Performance ----------- The arrow package makes extensive use of [c2goasm][] to leverage LLVM's advanced optimizer and generate PLAN9 assembly functions from C/C++ code. The arrow package can be compiled without these optimizations using the `noasm` build tag. Alternatively, by configuring an environment variable, it is possible to dynamically configure which architecture optimizations are used at runtime. We use the (cpu)[https://pkg.go.dev/golang.org/x/sys/cpu] package to check dynamically for these features. ### Example Usage The following benchmarks demonstrate summing an array of 8192 values using various optimizations. Disable no architecture optimizations (thus using AVX2): ```sh $ INTEL_DISABLE_EXT=NONE go test -bench=8192 -run=. ./math goos: darwin goarch: amd64 pkg: github.com/apache/arrow-go/arrow/math BenchmarkFloat64Funcs_Sum_8192-8 2000000 687 ns/op 95375.41 MB/s BenchmarkInt64Funcs_Sum_8192-8 2000000 719 ns/op 91061.06 MB/s BenchmarkUint64Funcs_Sum_8192-8 2000000 691 ns/op 94797.29 MB/s PASS ok github.com/apache/arrow-go/arrow/math 6.444s ``` **NOTE:** `NONE` is simply ignored, thus enabling optimizations for AVX2 and SSE4 ---- Disable AVX2 architecture optimizations: ```sh $ INTEL_DISABLE_EXT=AVX2 go test -bench=8192 -run=. ./math goos: darwin goarch: amd64 pkg: github.com/apache/arrow-go/arrow/math BenchmarkFloat64Funcs_Sum_8192-8 1000000 1912 ns/op 34263.63 MB/s BenchmarkInt64Funcs_Sum_8192-8 1000000 1392 ns/op 47065.57 MB/s BenchmarkUint64Funcs_Sum_8192-8 1000000 1405 ns/op 46636.41 MB/s PASS ok github.com/apache/arrow-go/arrow/math 4.786s ``` ---- Disable ALL architecture optimizations, thus using pure Go implementation: ```sh $ INTEL_DISABLE_EXT=ALL go test -bench=8192 -run=. ./math goos: darwin goarch: amd64 pkg: github.com/apache/arrow-go/arrow/math BenchmarkFloat64Funcs_Sum_8192-8 200000 10285 ns/op 6371.41 MB/s BenchmarkInt64Funcs_Sum_8192-8 500000 3892 ns/op 16837.37 MB/s BenchmarkUint64Funcs_Sum_8192-8 500000 3929 ns/op 16680.00 MB/s PASS ok github.com/apache/arrow-go/arrow/math 6.179s ``` [arrow]: https://arrow.apache.org [c2goasm]: https://github.com/minio/c2goasm arrow-go-18.2.0/arrow-testing/000077500000000000000000000000001476434502500161565ustar00rootroot00000000000000arrow-go-18.2.0/arrow/000077500000000000000000000000001476434502500145035ustar00rootroot00000000000000arrow-go-18.2.0/arrow/.editorconfig000066400000000000000000000015051476434502500171610ustar00rootroot00000000000000# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. root = true [*.tmpl] indent_style = tab indent_size = 4arrow-go-18.2.0/arrow/.gitignore000066400000000000000000000021011476434502500164650ustar00rootroot00000000000000# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ### Go template # Binaries for programs and plugins *.exe *.dll *.so *.dylib *.o # Test binary, build with `go test -c` *.test # Output of the go coverage tool, specifically when used with LiteIDE *.out # Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736 .glide/ bin/ vendor/arrow-go-18.2.0/arrow/Gopkg.lock000066400000000000000000000024111476434502500164220ustar00rootroot00000000000000# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'. [[projects]] digest = "1:56c130d885a4aacae1dd9c7b71cfe39912c7ebc1ff7d2b46083c8812996dc43b" name = "github.com/davecgh/go-spew" packages = ["spew"] pruneopts = "" revision = "346938d642f2ec3594ed81d874461961cd0faa76" version = "v1.1.0" [[projects]] digest = "1:1d7e1867c49a6dd9856598ef7c3123604ea3daabf5b83f303ff457bcbc410b1d" name = "github.com/pkg/errors" packages = ["."] pruneopts = "" revision = "ba968bfe8b2f7e042a574c888954fccecfa385b4" version = "v0.8.1" [[projects]] digest = "1:256484dbbcd271f9ecebc6795b2df8cad4c458dd0f5fd82a8c2fa0c29f233411" name = "github.com/pmezard/go-difflib" packages = ["difflib"] pruneopts = "" revision = "792786c7400a136282c1664665ae0a8db921c6c2" version = "v1.0.0" [[projects]] digest = "1:2d0dc026c4aef5e2f3a0e06a4dabe268b840d8f63190cf6894e02134a03f52c5" name = "github.com/stretchr/testify" packages = ["assert"] pruneopts = "" revision = "b91bfb9ebec76498946beb6af7c0230c7cc7ba6c" version = "v1.2.0" [solve-meta] analyzer-name = "dep" analyzer-version = 1 input-imports = [ "github.com/pkg/errors", "github.com/stretchr/testify/assert", ] solver-name = "gps-cdcl" solver-version = 1 arrow-go-18.2.0/arrow/Gopkg.toml000066400000000000000000000016331476434502500164520ustar00rootroot00000000000000# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. [[constraint]] name = "github.com/stretchr/testify" version = "1.2.0" [[constraint]] name = "github.com/pkg/errors" version = "0.8.1"arrow-go-18.2.0/arrow/Makefile000066400000000000000000000040301476434502500161400ustar00rootroot00000000000000# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. GO_BUILD=go build GO_GEN=go generate GO_TEST?=go test GOPATH=$(realpath ../../../../../..) GO_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -not -name '*_test.go') ALL_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -name '*.s' -not -name '*_test.go') SOURCES_NO_VENDOR := $(shell find . -path ./vendor -prune -o -name "*.go" -not -name '*_test.go' -print) .PHONEY: test bench assembly generate assembly: @$(MAKE) -C memory assembly @$(MAKE) -C math assembly generate: bin/tmpl bin/tmpl -i -data=numeric.tmpldata type_traits_numeric.gen.go.tmpl type_traits_numeric.gen_test.go.tmpl array/numeric.gen.go.tmpl array/numericbuilder.gen_test.go.tmpl array/numericbuilder.gen.go.tmpl array/bufferbuilder_numeric.gen.go.tmpl bin/tmpl -i -data=datatype_numeric.gen.go.tmpldata datatype_numeric.gen.go.tmpl @$(MAKE) -C math generate fmt: $(SOURCES_NO_VENDOR) goimports -w $^ bench: $(GO_SOURCES) | assembly $(GO_TEST) $(GO_TEST_ARGS) -bench=. -run=- ./... bench-noasm: $(GO_SOURCES) $(GO_TEST) $(GO_TEST_ARGS) -tags='noasm' -bench=. -run=- ./... test: $(GO_SOURCES) | assembly $(GO_TEST) $(GO_TEST_ARGS) ./... test-noasm: $(GO_SOURCES) $(GO_TEST) $(GO_TEST_ARGS) -tags='noasm' ./... bin/tmpl: _tools/tmpl/main.go $(GO_BUILD) -o $@ ./_tools/tmpl arrow-go-18.2.0/arrow/_examples/000077500000000000000000000000001476434502500164605ustar00rootroot00000000000000arrow-go-18.2.0/arrow/_examples/helloworld/000077500000000000000000000000001476434502500206335ustar00rootroot00000000000000arrow-go-18.2.0/arrow/_examples/helloworld/main.go000066400000000000000000000036171476434502500221150ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package main import ( "os" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/math" "github.com/apache/arrow-go/v18/arrow/memory" ) func main() { schema := arrow.NewSchema([]arrow.Field{ {Name: "intField", Type: arrow.PrimitiveTypes.Int64, Nullable: false}, {Name: "stringField", Type: arrow.BinaryTypes.String, Nullable: false}, {Name: "floatField", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, }, nil) builder := array.NewRecordBuilder(memory.DefaultAllocator, schema) defer builder.Release() builder.Field(0).(*array.Int64Builder).AppendValues([]int64{1, 2, 3, 4, 5}, nil) builder.Field(1).(*array.StringBuilder).AppendValues([]string{"a", "b", "c", "d", "e"}, nil) builder.Field(2).(*array.Float64Builder).AppendValues([]float64{1, 0, 3, 0, 5}, []bool{true, false, true, false, true}) rec := builder.NewRecord() defer rec.Release() tbl := array.NewTableFromRecords(schema, []arrow.Record{rec}) defer tbl.Release() sum := math.Float64.Sum(tbl.Column(2).Data().Chunk(0).(*array.Float64)) if sum != 9 { defer os.Exit(1) } } arrow-go-18.2.0/arrow/_tools/000077500000000000000000000000001476434502500160025ustar00rootroot00000000000000arrow-go-18.2.0/arrow/_tools/tmpl/000077500000000000000000000000001476434502500167565ustar00rootroot00000000000000arrow-go-18.2.0/arrow/_tools/tmpl/main.go000066400000000000000000000125721476434502500202400ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package main import ( "bytes" "flag" "fmt" "go/format" "io/ioutil" "os" "os/exec" "path/filepath" "strings" "text/template" "github.com/apache/arrow-go/v18/internal/json" ) const Ext = ".tmpl" type pathSpec struct { in, out string } func (p *pathSpec) String() string { return p.in + " → " + p.out } func (p *pathSpec) IsGoFile() bool { return filepath.Ext(p.out) == ".go" } func parsePath(path string) (string, string) { p := strings.IndexByte(path, '=') if p == -1 { if filepath.Ext(path) != Ext { errExit("template file '%s' must have .tmpl extension", path) } return path, path[:len(path)-len(Ext)] } return path[:p], path[p+1:] } type data struct { In interface{} D listValue } func errExit(format string, a ...interface{}) { fmt.Fprintf(os.Stderr, format, a...) fmt.Fprintln(os.Stderr) os.Exit(1) } type listValue map[string]string func (l listValue) String() string { res := make([]string, 0, len(l)) for k, v := range l { res = append(res, fmt.Sprintf("%s=%s", k, v)) } return strings.Join(res, ", ") } func (l listValue) Set(v string) error { nv := strings.Split(v, "=") if len(nv) != 2 { return fmt.Errorf("expected NAME=VALUE, got %s", v) } l[nv[0]] = nv[1] return nil } func main() { var ( dataArg = flag.String("data", "", "input JSON data") gi = flag.Bool("i", false, "run goimports") in = &data{D: make(listValue)} ) flag.Var(&in.D, "d", "-d NAME=VALUE") flag.Parse() if *dataArg == "" { errExit("data option is required") } if *gi { if _, err := exec.LookPath("goimports"); err != nil { errExit("failed to find goimports: %s", err.Error()) } formatter = formatSource } else { formatter = format.Source } paths := flag.Args() if len(paths) == 0 { errExit("no tmpl files specified") } specs := make([]pathSpec, len(paths)) for i, p := range paths { in, out := parsePath(p) specs[i] = pathSpec{in: in, out: out} } in.In = readData(*dataArg) process(in, specs) } func mustReadAll(path string) []byte { data, err := ioutil.ReadFile(path) if err != nil { errExit(err.Error()) } return data } func readData(path string) interface{} { data := mustReadAll(path) var v interface{} if err := json.Unmarshal(StripComments(data), &v); err != nil { errExit("invalid JSON data: %s", err.Error()) } return v } func fileMode(path string) os.FileMode { stat, err := os.Stat(path) if err != nil { errExit(err.Error()) } return stat.Mode() } var funcs = template.FuncMap{ "lower": strings.ToLower, "upper": strings.ToUpper, } func process(data interface{}, specs []pathSpec) { for _, spec := range specs { var ( t *template.Template err error ) t, err = template.New("gen").Funcs(funcs).Parse(string(mustReadAll(spec.in))) if err != nil { errExit("error processing template '%s': %s", spec.in, err.Error()) } var buf bytes.Buffer if spec.IsGoFile() { // preamble fmt.Fprintf(&buf, "// Code generated by %s. DO NOT EDIT.\n", spec.in) fmt.Fprintln(&buf) } err = t.Execute(&buf, data) if err != nil { errExit("error executing template '%s': %s", spec.in, err.Error()) } generated := buf.Bytes() if spec.IsGoFile() { generated, err = formatter(generated) if err != nil { errExit("error formatting '%s': %s", spec.in, err.Error()) } } os.WriteFile(spec.out, generated, fileMode(spec.in)) } } var ( formatter func([]byte) ([]byte, error) ) func formatSource(in []byte) ([]byte, error) { r := bytes.NewReader(in) cmd := exec.Command("goimports") cmd.Stdin = r out, err := cmd.Output() if err != nil { if ee, ok := err.(*exec.ExitError); ok { return nil, fmt.Errorf("error running goimports: %s", string(ee.Stderr)) } return nil, fmt.Errorf("error running goimports: %s", string(out)) } return out, nil } func StripComments(raw []byte) []byte { var ( quoted, esc bool comment bool ) buf := bytes.Buffer{} for i := 0; i < len(raw); i++ { b := raw[i] if comment { switch b { case '/': comment = false j := bytes.IndexByte(raw[i+1:], '\n') if j == -1 { i = len(raw) } else { i += j // keep new line } case '*': j := bytes.Index(raw[i+1:], []byte("*/")) if j == -1 { i = len(raw) } else { i += j + 2 comment = false } } continue } if esc { esc = false continue } if b == '\\' && quoted { esc = true continue } if b == '"' || b == '\'' { quoted = !quoted } if b == '/' && !quoted { comment = true continue } buf.WriteByte(b) } if quoted || esc || comment { // unexpected state, so return raw bytes return raw } return buf.Bytes() } arrow-go-18.2.0/arrow/_tools/tmpl/main_test.go000066400000000000000000000040111476434502500212640ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package main import ( "testing" ) func TestStripComments(t *testing.T) { tests := []struct { name string in string exp string }{ {name: "none", in: `[1,2,3]`, exp: `[1,2,3]`}, {name: "single-line, line comment at end", in: `[1,2,3] // foo bar`, exp: `[1,2,3] `}, {name: "single-line, block comment at end", in: `[1,2,3] /* foo bar */ `, exp: `[1,2,3] `}, {name: "single-line, block comment at end", in: `[1,2,3] /* /* // */`, exp: `[1,2,3] `}, {name: "single-line, block comment in middle", in: `[1,/* foo bar */2,3]`, exp: `[1,2,3]`}, {name: "single-line, block comment in string", in: `[1,"/* foo bar */"]`, exp: `[1,"/* foo bar */"]`}, {name: "single-line, malformed block comment", in: `[1,2,/*]`, exp: `[1,2,/*]`}, {name: "single-line, malformed JSON", in: `[1,2,/]`, exp: `[1,2,/]`}, { name: "multi-line", in: `[ 1, 2, 3 ]`, exp: `[ 1, 2, 3 ]`, }, { name: "multi-line, multiple line comments", in: `[ // foo 1, // bar 2, 3 ] // fit`, exp: `[ 1, 2, 3 ] `, }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { got := string(StripComments([]byte(test.in))) if got != test.exp { t.Errorf("got:\n%s\nexp:\n%s", got, test.exp) } }) } } arrow-go-18.2.0/arrow/_tools/tools.go000066400000000000000000000016441476434502500174760ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build tools // +build tools package _tools import ( _ "golang.org/x/tools/cmd/goimports" _ "golang.org/x/tools/cmd/stringer" ) arrow-go-18.2.0/arrow/array.go000066400000000000000000000117641476434502500161610ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package arrow import ( "fmt" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" ) // ArrayData is the underlying memory and metadata of an Arrow array, corresponding // to the same-named object in the C++ implementation. // // The Array interface and subsequent typed objects provide strongly typed // accessors which support marshalling and other patterns to the data. // This interface allows direct access to the underlying raw byte buffers // which allows for manipulating the internal data and casting. For example, // one could cast the raw bytes from int64 to float64 like so: // // arrdata := GetMyInt64Data().Data() // newdata := array.NewData(arrow.PrimitiveTypes.Float64, arrdata.Len(), // arrdata.Buffers(), nil, arrdata.NullN(), arrdata.Offset()) // defer newdata.Release() // float64arr := array.NewFloat64Data(newdata) // defer float64arr.Release() // // This is also useful in an analytics setting where memory may be reused. For // example, if we had a group of operations all returning float64 such as: // // Log(Sqrt(Expr(arr))) // // The low-level implementations could have signatures such as: // // func Log(values arrow.ArrayData) arrow.ArrayData // // Another example would be a function that consumes one or more memory buffers // in an input array and replaces them with newly-allocated data, changing the // output data type as well. type ArrayData interface { // Retain increases the reference count by 1, it is safe to call // in multiple goroutines simultaneously. Retain() // Release decreases the reference count by 1, it is safe to call // in multiple goroutines simultaneously. Data is removed when reference // count is 0. Release() // DataType returns the current datatype stored in the object. DataType() DataType // NullN returns the number of nulls for this data instance. NullN() int // Len returns the length of this data instance Len() int // Offset returns the offset into the raw buffers where this data begins Offset() int // Buffers returns the slice of raw data buffers for this data instance. Their // meaning depends on the context of the data type. Buffers() []*memory.Buffer // Children returns the slice of children data instances, only relevant for // nested data types. For instance, List data will have a single child containing // elements of all the rows and Struct data will contain numfields children which // are the arrays for each field of the struct. Children() []ArrayData // Reset allows reusing this ArrayData object by replacing the data in this ArrayData // object without changing the reference count. Reset(newtype DataType, newlength int, newbuffers []*memory.Buffer, newchildren []ArrayData, newnulls int, newoffset int) // Dictionary returns the ArrayData object for the dictionary if this is a // dictionary array, otherwise it will be nil. Dictionary() ArrayData // SizeInBytes returns the size of the ArrayData buffers and any children and/or dictionary in bytes. SizeInBytes() uint64 } // Array represents an immutable sequence of values using the Arrow in-memory format. type Array interface { json.Marshaler fmt.Stringer // DataType returns the type metadata for this instance. DataType() DataType // NullN returns the number of null values in the array. NullN() int // NullBitmapBytes returns a byte slice of the validity bitmap. NullBitmapBytes() []byte // IsNull returns true if value at index is null. // NOTE: IsNull will panic if NullBitmapBytes is not empty and 0 > i ≥ Len. IsNull(i int) bool // IsValid returns true if value at index is not null. // NOTE: IsValid will panic if NullBitmapBytes is not empty and 0 > i ≥ Len. IsValid(i int) bool // ValueStr returns the value at index as a string. ValueStr(i int) string // Get single value to be marshalled with `json.Marshal` GetOneForMarshal(i int) interface{} Data() ArrayData // Len returns the number of elements in the array. Len() int // Retain increases the reference count by 1. // Retain may be called simultaneously from multiple goroutines. Retain() // Release decreases the reference count by 1. // Release may be called simultaneously from multiple goroutines. // When the reference count goes to zero, the memory is freed. Release() } arrow-go-18.2.0/arrow/array/000077500000000000000000000000001476434502500156215ustar00rootroot00000000000000arrow-go-18.2.0/arrow/array/array.go000066400000000000000000000222531476434502500172720ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "sync/atomic" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/internal/debug" ) const ( // UnknownNullCount specifies the NullN should be calculated from the null bitmap buffer. UnknownNullCount = -1 // NullValueStr represents a null value in arrow.Array.ValueStr and in Builder.AppendValueFromString. // It should be returned from the arrow.Array.ValueStr implementations. // Using it as the value in Builder.AppendValueFromString should be equivalent to Builder.AppendNull. NullValueStr = "(null)" ) type array struct { refCount int64 data *Data nullBitmapBytes []byte } // Retain increases the reference count by 1. // Retain may be called simultaneously from multiple goroutines. func (a *array) Retain() { atomic.AddInt64(&a.refCount, 1) } // Release decreases the reference count by 1. // Release may be called simultaneously from multiple goroutines. // When the reference count goes to zero, the memory is freed. func (a *array) Release() { debug.Assert(atomic.LoadInt64(&a.refCount) > 0, "too many releases") if atomic.AddInt64(&a.refCount, -1) == 0 { a.data.Release() a.data, a.nullBitmapBytes = nil, nil } } // DataType returns the type metadata for this instance. func (a *array) DataType() arrow.DataType { return a.data.dtype } // NullN returns the number of null values in the array. func (a *array) NullN() int { if a.data.nulls < 0 { a.data.nulls = a.data.length - bitutil.CountSetBits(a.nullBitmapBytes, a.data.offset, a.data.length) } return a.data.nulls } // NullBitmapBytes returns a byte slice of the validity bitmap. func (a *array) NullBitmapBytes() []byte { return a.nullBitmapBytes } func (a *array) Data() arrow.ArrayData { return a.data } // Len returns the number of elements in the array. func (a *array) Len() int { return a.data.length } // IsNull returns true if value at index is null. // NOTE: IsNull will panic if NullBitmapBytes is not empty and 0 > i ≥ Len. func (a *array) IsNull(i int) bool { return len(a.nullBitmapBytes) != 0 && bitutil.BitIsNotSet(a.nullBitmapBytes, a.data.offset+i) } // IsValid returns true if value at index is not null. // NOTE: IsValid will panic if NullBitmapBytes is not empty and 0 > i ≥ Len. func (a *array) IsValid(i int) bool { return len(a.nullBitmapBytes) == 0 || bitutil.BitIsSet(a.nullBitmapBytes, a.data.offset+i) } func (a *array) setData(data *Data) { // Retain before releasing in case a.data is the same as data. data.Retain() if a.data != nil { a.data.Release() } if len(data.buffers) > 0 && data.buffers[0] != nil { a.nullBitmapBytes = data.buffers[0].Bytes() } a.data = data } func (a *array) Offset() int { return a.data.Offset() } type arrayConstructorFn func(arrow.ArrayData) arrow.Array var ( makeArrayFn [64]arrayConstructorFn ) func invalidDataType(data arrow.ArrayData) arrow.Array { panic("invalid data type: " + data.DataType().ID().String()) } // MakeFromData constructs a strongly-typed array instance from generic Data. func MakeFromData(data arrow.ArrayData) arrow.Array { return makeArrayFn[byte(data.DataType().ID()&0x3f)](data) } // NewSlice constructs a zero-copy slice of the array with the indicated // indices i and j, corresponding to array[i:j]. // The returned array must be Release()'d after use. // // NewSlice panics if the slice is outside the valid range of the input array. // NewSlice panics if j < i. func NewSlice(arr arrow.Array, i, j int64) arrow.Array { data := NewSliceData(arr.Data(), i, j) slice := MakeFromData(data) data.Release() return slice } func init() { makeArrayFn = [...]arrayConstructorFn{ arrow.NULL: func(data arrow.ArrayData) arrow.Array { return NewNullData(data) }, arrow.BOOL: func(data arrow.ArrayData) arrow.Array { return NewBooleanData(data) }, arrow.UINT8: func(data arrow.ArrayData) arrow.Array { return NewUint8Data(data) }, arrow.INT8: func(data arrow.ArrayData) arrow.Array { return NewInt8Data(data) }, arrow.UINT16: func(data arrow.ArrayData) arrow.Array { return NewUint16Data(data) }, arrow.INT16: func(data arrow.ArrayData) arrow.Array { return NewInt16Data(data) }, arrow.UINT32: func(data arrow.ArrayData) arrow.Array { return NewUint32Data(data) }, arrow.INT32: func(data arrow.ArrayData) arrow.Array { return NewInt32Data(data) }, arrow.UINT64: func(data arrow.ArrayData) arrow.Array { return NewUint64Data(data) }, arrow.INT64: func(data arrow.ArrayData) arrow.Array { return NewInt64Data(data) }, arrow.FLOAT16: func(data arrow.ArrayData) arrow.Array { return NewFloat16Data(data) }, arrow.FLOAT32: func(data arrow.ArrayData) arrow.Array { return NewFloat32Data(data) }, arrow.FLOAT64: func(data arrow.ArrayData) arrow.Array { return NewFloat64Data(data) }, arrow.STRING: func(data arrow.ArrayData) arrow.Array { return NewStringData(data) }, arrow.BINARY: func(data arrow.ArrayData) arrow.Array { return NewBinaryData(data) }, arrow.FIXED_SIZE_BINARY: func(data arrow.ArrayData) arrow.Array { return NewFixedSizeBinaryData(data) }, arrow.DATE32: func(data arrow.ArrayData) arrow.Array { return NewDate32Data(data) }, arrow.DATE64: func(data arrow.ArrayData) arrow.Array { return NewDate64Data(data) }, arrow.TIMESTAMP: func(data arrow.ArrayData) arrow.Array { return NewTimestampData(data) }, arrow.TIME32: func(data arrow.ArrayData) arrow.Array { return NewTime32Data(data) }, arrow.TIME64: func(data arrow.ArrayData) arrow.Array { return NewTime64Data(data) }, arrow.INTERVAL_MONTHS: func(data arrow.ArrayData) arrow.Array { return NewMonthIntervalData(data) }, arrow.INTERVAL_DAY_TIME: func(data arrow.ArrayData) arrow.Array { return NewDayTimeIntervalData(data) }, arrow.DECIMAL32: func(data arrow.ArrayData) arrow.Array { return NewDecimal32Data(data) }, arrow.DECIMAL64: func(data arrow.ArrayData) arrow.Array { return NewDecimal64Data(data) }, arrow.DECIMAL128: func(data arrow.ArrayData) arrow.Array { return NewDecimal128Data(data) }, arrow.DECIMAL256: func(data arrow.ArrayData) arrow.Array { return NewDecimal256Data(data) }, arrow.LIST: func(data arrow.ArrayData) arrow.Array { return NewListData(data) }, arrow.STRUCT: func(data arrow.ArrayData) arrow.Array { return NewStructData(data) }, arrow.SPARSE_UNION: func(data arrow.ArrayData) arrow.Array { return NewSparseUnionData(data) }, arrow.DENSE_UNION: func(data arrow.ArrayData) arrow.Array { return NewDenseUnionData(data) }, arrow.DICTIONARY: func(data arrow.ArrayData) arrow.Array { return NewDictionaryData(data) }, arrow.MAP: func(data arrow.ArrayData) arrow.Array { return NewMapData(data) }, arrow.EXTENSION: func(data arrow.ArrayData) arrow.Array { return NewExtensionData(data) }, arrow.FIXED_SIZE_LIST: func(data arrow.ArrayData) arrow.Array { return NewFixedSizeListData(data) }, arrow.DURATION: func(data arrow.ArrayData) arrow.Array { return NewDurationData(data) }, arrow.LARGE_STRING: func(data arrow.ArrayData) arrow.Array { return NewLargeStringData(data) }, arrow.LARGE_BINARY: func(data arrow.ArrayData) arrow.Array { return NewLargeBinaryData(data) }, arrow.LARGE_LIST: func(data arrow.ArrayData) arrow.Array { return NewLargeListData(data) }, arrow.INTERVAL_MONTH_DAY_NANO: func(data arrow.ArrayData) arrow.Array { return NewMonthDayNanoIntervalData(data) }, arrow.RUN_END_ENCODED: func(data arrow.ArrayData) arrow.Array { return NewRunEndEncodedData(data) }, arrow.LIST_VIEW: func(data arrow.ArrayData) arrow.Array { return NewListViewData(data) }, arrow.LARGE_LIST_VIEW: func(data arrow.ArrayData) arrow.Array { return NewLargeListViewData(data) }, arrow.BINARY_VIEW: func(data arrow.ArrayData) arrow.Array { return NewBinaryViewData(data) }, arrow.STRING_VIEW: func(data arrow.ArrayData) arrow.Array { return NewStringViewData(data) }, // invalid data types to fill out array to size 2^6 - 1 63: invalidDataType, } } arrow-go-18.2.0/arrow/array/array_test.go000066400000000000000000000367511476434502500203410ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/extensions" "github.com/apache/arrow-go/v18/arrow/internal/testing/tools" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) type testDataType struct { id arrow.Type } func (d *testDataType) ID() arrow.Type { return d.id } func (d *testDataType) Name() string { panic("implement me") } func (d *testDataType) BitWidth() int { return 8 } func (d *testDataType) Bytes() int { return 1 } func (d *testDataType) Fingerprint() string { return "" } func (testDataType) Layout() arrow.DataTypeLayout { return arrow.DataTypeLayout{} } func (testDataType) String() string { return "" } func TestMakeFromData(t *testing.T) { tests := []struct { name string d arrow.DataType size int child []arrow.ArrayData dict *array.Data expPanic bool expError string }{ // supported types {name: "null", d: &testDataType{arrow.NULL}}, {name: "bool", d: &testDataType{arrow.BOOL}}, {name: "uint8", d: &testDataType{arrow.UINT8}}, {name: "uint16", d: &testDataType{arrow.UINT16}}, {name: "uint32", d: &testDataType{arrow.UINT32}}, {name: "uint64", d: &testDataType{arrow.UINT64}}, {name: "int8", d: &testDataType{arrow.INT8}}, {name: "int16", d: &testDataType{arrow.INT16}}, {name: "int32", d: &testDataType{arrow.INT32}}, {name: "int64", d: &testDataType{arrow.INT64}}, {name: "float16", d: &testDataType{arrow.FLOAT16}}, {name: "float32", d: &testDataType{arrow.FLOAT32}}, {name: "float64", d: &testDataType{arrow.FLOAT64}}, {name: "string", d: &testDataType{arrow.STRING}, size: 3}, {name: "binary", d: &testDataType{arrow.BINARY}, size: 3}, {name: "large_string", d: &testDataType{arrow.LARGE_STRING}, size: 3}, {name: "large_binary", d: &testDataType{arrow.LARGE_BINARY}, size: 3}, {name: "fixed_size_binary", d: &testDataType{arrow.FIXED_SIZE_BINARY}}, {name: "date32", d: &testDataType{arrow.DATE32}}, {name: "date64", d: &testDataType{arrow.DATE64}}, {name: "timestamp", d: &testDataType{arrow.TIMESTAMP}}, {name: "time32", d: &testDataType{arrow.TIME32}}, {name: "time64", d: &testDataType{arrow.TIME64}}, {name: "month_interval", d: arrow.FixedWidthTypes.MonthInterval}, {name: "day_time_interval", d: arrow.FixedWidthTypes.DayTimeInterval}, {name: "decimal32", d: &testDataType{arrow.DECIMAL32}}, {name: "decimal64", d: &testDataType{arrow.DECIMAL64}}, {name: "decimal128", d: &testDataType{arrow.DECIMAL128}}, {name: "decimal256", d: &testDataType{arrow.DECIMAL256}}, {name: "month_day_nano_interval", d: arrow.FixedWidthTypes.MonthDayNanoInterval}, {name: "list", d: &testDataType{arrow.LIST}, child: []arrow.ArrayData{ array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), }}, {name: "large list", d: &testDataType{arrow.LARGE_LIST}, child: []arrow.ArrayData{ array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), }}, {name: "struct", d: &testDataType{arrow.STRUCT}}, {name: "struct", d: &testDataType{arrow.STRUCT}, child: []arrow.ArrayData{ array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), }}, {name: "fixed_size_list", d: arrow.FixedSizeListOf(4, arrow.PrimitiveTypes.Int64), child: []arrow.ArrayData{ array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), }}, {name: "duration", d: &testDataType{arrow.DURATION}}, {name: "map", d: &testDataType{arrow.MAP}, child: []arrow.ArrayData{ array.NewData(&testDataType{arrow.STRUCT}, 0 /* length */, make([]*memory.Buffer, 3 /*null bitmap, values, offsets*/), []arrow.ArrayData{ array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), }, 0 /* nulls */, 0 /* offset */)}, }, {name: "sparse union", d: arrow.SparseUnionOf(nil, nil), child: []arrow.ArrayData{}, size: 2}, {name: "dense union", d: arrow.DenseUnionOf(nil, nil), child: []arrow.ArrayData{}, size: 3}, // various dictionary index types and value types {name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: &testDataType{arrow.INT64}}, dict: array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)}, {name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: &testDataType{arrow.INT32}}, dict: array.NewData(&testDataType{arrow.INT32}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)}, {name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int16, ValueType: &testDataType{arrow.UINT16}}, dict: array.NewData(&testDataType{arrow.UINT16}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)}, {name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint16, ValueType: &testDataType{arrow.INT64}}, dict: array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)}, {name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: &testDataType{arrow.UINT32}}, dict: array.NewData(&testDataType{arrow.UINT32}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)}, {name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint32, ValueType: &testDataType{arrow.TIMESTAMP}}, dict: array.NewData(&testDataType{arrow.TIMESTAMP}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)}, {name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int64, ValueType: &testDataType{arrow.UINT32}}, dict: array.NewData(&testDataType{arrow.UINT32}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)}, {name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint64, ValueType: &testDataType{arrow.TIMESTAMP}}, dict: array.NewData(&testDataType{arrow.TIMESTAMP}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)}, {name: "extension", d: &testDataType{arrow.EXTENSION}, expPanic: true, expError: "arrow/array: DataType for ExtensionArray must implement arrow.ExtensionType"}, {name: "extension", d: extensions.NewUUIDType()}, {name: "run end encoded", d: arrow.RunEndEncodedOf(arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Int64), child: []arrow.ArrayData{ array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), }}, // invalid types {name: "invalid(-1)", d: &testDataType{arrow.Type(-1)}, expPanic: true, expError: "invalid data type: Type(-1)"}, {name: "invalid(63)", d: &testDataType{arrow.Type(63)}, expPanic: true, expError: "invalid data type: Type(63)"}, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { var ( b [4]*memory.Buffer n = 4 data arrow.ArrayData ) if test.size != 0 { n = test.size } if test.dict != nil { data = array.NewDataWithDictionary(test.d, 0, b[:n], 0, 0, test.dict) } else { data = array.NewData(test.d, 0, b[:n], test.child, 0, 0) } if test.expPanic { assert.PanicsWithValue(t, test.expError, func() { array.MakeFromData(data) }) } else { assert.NotNil(t, array.MakeFromData(data)) } }) } } func bbits(v ...int32) []byte { return tools.IntsToBitsLSB(v...) } func TestArray_NullN(t *testing.T) { tests := []struct { name string l int bm []byte n int exp int }{ {name: "unknown,l16", l: 16, bm: bbits(0x11001010, 0x00110011), n: array.UnknownNullCount, exp: 8}, {name: "unknown,l12,ignores last nibble", l: 12, bm: bbits(0x11001010, 0x00111111), n: array.UnknownNullCount, exp: 6}, {name: "unknown,l12,12 nulls", l: 12, bm: bbits(0x00000000, 0x00000000), n: array.UnknownNullCount, exp: 12}, {name: "unknown,l12,00 nulls", l: 12, bm: bbits(0x11111111, 0x11111111), n: array.UnknownNullCount, exp: 0}, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { buf := memory.NewBufferBytes(test.bm) data := array.NewData(arrow.FixedWidthTypes.Boolean, test.l, []*memory.Buffer{buf, nil}, nil, test.n, 0) buf.Release() ar := array.MakeFromData(data) data.Release() got := ar.NullN() ar.Release() assert.Equal(t, test.exp, got) }) } } func TestArraySlice(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) var ( valids = []bool{true, true, true, false, true, true} vs = []float64{1, 2, 3, 0, 4, 5} ) b := array.NewFloat64Builder(pool) defer b.Release() for _, tc := range []struct { i, j int panics bool len int }{ {i: 0, j: len(valids), panics: false, len: len(valids)}, {i: len(valids), j: len(valids), panics: false, len: 0}, {i: 0, j: 1, panics: false, len: 1}, {i: 1, j: 1, panics: false, len: 0}, {i: 0, j: len(valids) + 1, panics: true}, {i: 2, j: 1, panics: true}, {i: len(valids) + 1, j: len(valids) + 1, panics: true}, } { t.Run("", func(t *testing.T) { b.AppendValues(vs, valids) arr := b.NewFloat64Array() defer arr.Release() if got, want := arr.Len(), len(valids); got != want { t.Fatalf("got=%d, want=%d", got, want) } if tc.panics { defer func() { e := recover() if e == nil { t.Fatalf("this should have panicked, but did not") } }() } slice := array.NewSlice(arr, int64(tc.i), int64(tc.j)).(*array.Float64) defer slice.Release() if got, want := slice.Len(), tc.len; got != want { t.Fatalf("invalid slice length: got=%d, want=%d", got, want) } }) } } func TestArraySliceTypes(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) valids := []bool{true, true, true, false, true, true} for _, tc := range []struct { values interface{} builder array.Builder append func(b array.Builder, vs interface{}) }{ { values: []bool{true, false, true, false, true, false}, builder: array.NewBooleanBuilder(pool), append: func(b array.Builder, vs interface{}) { b.(*array.BooleanBuilder).AppendValues(vs.([]bool), valids) }, }, { values: []uint8{1, 2, 3, 0, 4, 5}, builder: array.NewUint8Builder(pool), append: func(b array.Builder, vs interface{}) { b.(*array.Uint8Builder).AppendValues(vs.([]uint8), valids) }, }, { values: []uint16{1, 2, 3, 0, 4, 5}, builder: array.NewUint16Builder(pool), append: func(b array.Builder, vs interface{}) { b.(*array.Uint16Builder).AppendValues(vs.([]uint16), valids) }, }, { values: []uint32{1, 2, 3, 0, 4, 5}, builder: array.NewUint32Builder(pool), append: func(b array.Builder, vs interface{}) { b.(*array.Uint32Builder).AppendValues(vs.([]uint32), valids) }, }, { values: []uint64{1, 2, 3, 0, 4, 5}, builder: array.NewUint64Builder(pool), append: func(b array.Builder, vs interface{}) { b.(*array.Uint64Builder).AppendValues(vs.([]uint64), valids) }, }, { values: []int8{1, 2, 3, 0, 4, 5}, builder: array.NewInt8Builder(pool), append: func(b array.Builder, vs interface{}) { b.(*array.Int8Builder).AppendValues(vs.([]int8), valids) }, }, { values: []int16{1, 2, 3, 0, 4, 5}, builder: array.NewInt16Builder(pool), append: func(b array.Builder, vs interface{}) { b.(*array.Int16Builder).AppendValues(vs.([]int16), valids) }, }, { values: []int32{1, 2, 3, 0, 4, 5}, builder: array.NewInt32Builder(pool), append: func(b array.Builder, vs interface{}) { b.(*array.Int32Builder).AppendValues(vs.([]int32), valids) }, }, { values: []int64{1, 2, 3, 0, 4, 5}, builder: array.NewInt64Builder(pool), append: func(b array.Builder, vs interface{}) { b.(*array.Int64Builder).AppendValues(vs.([]int64), valids) }, }, { values: []float32{1, 2, 3, 0, 4, 5}, builder: array.NewFloat32Builder(pool), append: func(b array.Builder, vs interface{}) { b.(*array.Float32Builder).AppendValues(vs.([]float32), valids) }, }, { values: []float64{1, 2, 3, 0, 4, 5}, builder: array.NewFloat64Builder(pool), append: func(b array.Builder, vs interface{}) { b.(*array.Float64Builder).AppendValues(vs.([]float64), valids) }, }, } { t.Run("", func(t *testing.T) { defer tc.builder.Release() b := tc.builder tc.append(b, tc.values) arr := b.NewArray() defer arr.Release() if got, want := arr.Len(), len(valids); got != want { t.Fatalf("invalid length: got=%d, want=%d", got, want) } slice := array.NewSlice(arr, 2, 5) defer slice.Release() if got, want := slice.Len(), 3; got != want { t.Fatalf("invalid slice length: got=%d, want=%d", got, want) } shortSlice := array.NewSlice(arr, 2, 3) defer shortSlice.Release() sliceOfShortSlice := array.NewSlice(shortSlice, 0, 1) defer sliceOfShortSlice.Release() if got, want := sliceOfShortSlice.Len(), 1; got != want { t.Fatalf("invalid short slice length: got=%d, want=%d", got, want) } }) } } arrow-go-18.2.0/arrow/array/binary.go000066400000000000000000000255151476434502500174440ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "bytes" "encoding/base64" "fmt" "strings" "unsafe" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" ) type BinaryLike interface { arrow.Array ValueLen(int) int ValueBytes() []byte ValueOffset64(int) int64 } // A type which represents an immutable sequence of variable-length binary strings. type Binary struct { array valueOffsets []int32 valueBytes []byte } // NewBinaryData constructs a new Binary array from data. func NewBinaryData(data arrow.ArrayData) *Binary { a := &Binary{} a.refCount = 1 a.setData(data.(*Data)) return a } // Value returns the slice at index i. This value should not be mutated. func (a *Binary) Value(i int) []byte { if i < 0 || i >= a.array.data.length { panic("arrow/array: index out of range") } idx := a.array.data.offset + i return a.valueBytes[a.valueOffsets[idx]:a.valueOffsets[idx+1]] } // ValueStr returns a copy of the base64-encoded string value or NullValueStr func (a *Binary) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } return base64.StdEncoding.EncodeToString(a.Value(i)) } // ValueString returns the string at index i without performing additional allocations. // The string is only valid for the lifetime of the Binary array. func (a *Binary) ValueString(i int) string { b := a.Value(i) return *(*string)(unsafe.Pointer(&b)) } func (a *Binary) ValueOffset(i int) int { if i < 0 || i >= a.array.data.length { panic("arrow/array: index out of range") } return int(a.valueOffsets[a.array.data.offset+i]) } func (a *Binary) ValueOffset64(i int) int64 { return int64(a.ValueOffset(i)) } func (a *Binary) ValueLen(i int) int { if i < 0 || i >= a.array.data.length { panic("arrow/array: index out of range") } beg := a.array.data.offset + i return int(a.valueOffsets[beg+1] - a.valueOffsets[beg]) } func (a *Binary) ValueOffsets() []int32 { beg := a.array.data.offset end := beg + a.array.data.length + 1 return a.valueOffsets[beg:end] } func (a *Binary) ValueBytes() []byte { beg := a.array.data.offset end := beg + a.array.data.length return a.valueBytes[a.valueOffsets[beg]:a.valueOffsets[end]] } func (a *Binary) String() string { o := new(strings.Builder) o.WriteString("[") for i := 0; i < a.Len(); i++ { if i > 0 { o.WriteString(" ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(o, "%q", a.ValueString(i)) } } o.WriteString("]") return o.String() } func (a *Binary) setData(data *Data) { if len(data.buffers) != 3 { panic("len(data.buffers) != 3") } a.array.setData(data) if valueData := data.buffers[2]; valueData != nil { a.valueBytes = valueData.Bytes() } if valueOffsets := data.buffers[1]; valueOffsets != nil { a.valueOffsets = arrow.Int32Traits.CastFromBytes(valueOffsets.Bytes()) } if a.array.data.length < 1 { return } expNumOffsets := a.array.data.offset + a.array.data.length + 1 if len(a.valueOffsets) < expNumOffsets { panic(fmt.Errorf("arrow/array: binary offset buffer must have at least %d values", expNumOffsets)) } if int(a.valueOffsets[expNumOffsets-1]) > len(a.valueBytes) { panic("arrow/array: binary offsets out of bounds of data buffer") } } func (a *Binary) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } return a.Value(i) } func (a *Binary) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i := 0; i < a.Len(); i++ { vals[i] = a.GetOneForMarshal(i) } // golang marshal standard says that []byte will be marshalled // as a base64-encoded string return json.Marshal(vals) } func arrayEqualBinary(left, right *Binary) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if !bytes.Equal(left.Value(i), right.Value(i)) { return false } } return true } type LargeBinary struct { array valueOffsets []int64 valueBytes []byte } func NewLargeBinaryData(data arrow.ArrayData) *LargeBinary { a := &LargeBinary{} a.refCount = 1 a.setData(data.(*Data)) return a } func (a *LargeBinary) Value(i int) []byte { if i < 0 || i >= a.array.data.length { panic("arrow/array: index out of range") } idx := a.array.data.offset + i return a.valueBytes[a.valueOffsets[idx]:a.valueOffsets[idx+1]] } func (a *LargeBinary) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } return base64.StdEncoding.EncodeToString(a.Value(i)) } func (a *LargeBinary) ValueString(i int) string { b := a.Value(i) return *(*string)(unsafe.Pointer(&b)) } func (a *LargeBinary) ValueOffset(i int) int64 { if i < 0 || i >= a.array.data.length { panic("arrow/array: index out of range") } return a.valueOffsets[a.array.data.offset+i] } func (a *LargeBinary) ValueOffset64(i int) int64 { return a.ValueOffset(i) } func (a *LargeBinary) ValueLen(i int) int { if i < 0 || i >= a.array.data.length { panic("arrow/array: index out of range") } beg := a.array.data.offset + i return int(a.valueOffsets[beg+1] - a.valueOffsets[beg]) } func (a *LargeBinary) ValueOffsets() []int64 { beg := a.array.data.offset end := beg + a.array.data.length + 1 return a.valueOffsets[beg:end] } func (a *LargeBinary) ValueBytes() []byte { beg := a.array.data.offset end := beg + a.array.data.length return a.valueBytes[a.valueOffsets[beg]:a.valueOffsets[end]] } func (a *LargeBinary) String() string { var o strings.Builder o.WriteString("[") for i := 0; i < a.Len(); i++ { if i > 0 { o.WriteString(" ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(&o, "%q", a.ValueString(i)) } } o.WriteString("]") return o.String() } func (a *LargeBinary) setData(data *Data) { if len(data.buffers) != 3 { panic("len(data.buffers) != 3") } a.array.setData(data) if valueData := data.buffers[2]; valueData != nil { a.valueBytes = valueData.Bytes() } if valueOffsets := data.buffers[1]; valueOffsets != nil { a.valueOffsets = arrow.Int64Traits.CastFromBytes(valueOffsets.Bytes()) } if a.array.data.length < 1 { return } expNumOffsets := a.array.data.offset + a.array.data.length + 1 if len(a.valueOffsets) < expNumOffsets { panic(fmt.Errorf("arrow/array: large binary offset buffer must have at least %d values", expNumOffsets)) } if int(a.valueOffsets[expNumOffsets-1]) > len(a.valueBytes) { panic("arrow/array: large binary offsets out of bounds of data buffer") } } func (a *LargeBinary) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } return a.Value(i) } func (a *LargeBinary) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i := 0; i < a.Len(); i++ { vals[i] = a.GetOneForMarshal(i) } // golang marshal standard says that []byte will be marshalled // as a base64-encoded string return json.Marshal(vals) } func arrayEqualLargeBinary(left, right *LargeBinary) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if !bytes.Equal(left.Value(i), right.Value(i)) { return false } } return true } type ViewLike interface { arrow.Array ValueHeader(int) *arrow.ViewHeader } type BinaryView struct { array values []arrow.ViewHeader dataBuffers []*memory.Buffer } func NewBinaryViewData(data arrow.ArrayData) *BinaryView { a := &BinaryView{} a.refCount = 1 a.setData(data.(*Data)) return a } func (a *BinaryView) setData(data *Data) { if len(data.buffers) < 2 { panic("len(data.buffers) < 2") } a.array.setData(data) if valueData := data.buffers[1]; valueData != nil { a.values = arrow.ViewHeaderTraits.CastFromBytes(valueData.Bytes()) } a.dataBuffers = data.buffers[2:] } func (a *BinaryView) ValueHeader(i int) *arrow.ViewHeader { if i < 0 || i >= a.array.data.length { panic("arrow/array: index out of range") } return &a.values[a.array.data.offset+i] } func (a *BinaryView) Value(i int) []byte { s := a.ValueHeader(i) if s.IsInline() { return s.InlineBytes() } start := s.BufferOffset() buf := a.dataBuffers[s.BufferIndex()] return buf.Bytes()[start : start+int32(s.Len())] } func (a *BinaryView) ValueLen(i int) int { s := a.ValueHeader(i) return s.Len() } // ValueString returns the value at index i as a string instead of // a byte slice, without copying the underlying data. func (a *BinaryView) ValueString(i int) string { b := a.Value(i) return *(*string)(unsafe.Pointer(&b)) } func (a *BinaryView) String() string { var o strings.Builder o.WriteString("[") for i := 0; i < a.Len(); i++ { if i > 0 { o.WriteString(" ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(&o, "%q", a.ValueString(i)) } } o.WriteString("]") return o.String() } // ValueStr is paired with AppendValueFromString in that it returns // the value at index i as a string: Semantically this means that for // a null value it will return the string "(null)", otherwise it will // return the value as a base64 encoded string suitable for CSV/JSON. // // This is always going to be less performant than just using ValueString // and exists to fulfill the Array interface to provide a method which // can produce a human readable string for a given index. func (a *BinaryView) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } return base64.StdEncoding.EncodeToString(a.Value(i)) } func (a *BinaryView) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } return a.Value(i) } func (a *BinaryView) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i := 0; i < a.Len(); i++ { vals[i] = a.GetOneForMarshal(i) } // golang marshal standard says that []byte will be marshalled // as a base64-encoded string return json.Marshal(vals) } func arrayEqualBinaryView(left, right *BinaryView) bool { leftBufs, rightBufs := left.dataBuffers, right.dataBuffers for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if !left.ValueHeader(i).Equals(leftBufs, right.ValueHeader(i), rightBufs) { return false } } return true } var ( _ arrow.Array = (*Binary)(nil) _ arrow.Array = (*LargeBinary)(nil) _ arrow.Array = (*BinaryView)(nil) _ BinaryLike = (*Binary)(nil) _ BinaryLike = (*LargeBinary)(nil) ) arrow-go-18.2.0/arrow/array/binary_test.go000066400000000000000000000455771476434502500205150ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "reflect" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) func TestBinary(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) values := [][]byte{ []byte("AAA"), nil, []byte("BBBB"), } valid := []bool{true, false, true} b.AppendValues(values, valid) b.Retain() b.Release() a := b.NewBinaryArray() assert.Equal(t, 3, a.Len()) assert.Equal(t, 1, a.NullN()) assert.Equal(t, []byte("AAA"), a.Value(0)) assert.Equal(t, []byte{}, a.Value(1)) assert.Equal(t, []byte("BBBB"), a.Value(2)) assert.Equal(t, "QUFB", a.ValueStr(0)) assert.Equal(t, NullValueStr, a.ValueStr(1)) a.Release() // Test builder reset and NewArray API. b.AppendValues(values, valid) a = b.NewArray().(*Binary) assert.Equal(t, 3, a.Len()) assert.Equal(t, 1, a.NullN()) assert.Equal(t, []byte("AAA"), a.Value(0)) assert.Equal(t, []byte{}, a.Value(1)) assert.Equal(t, []byte("BBBB"), a.Value(2)) assert.Equal(t, "QUFB", a.ValueStr(0)) assert.Equal(t, NullValueStr, a.ValueStr(1)) a.Release() b.Release() } func TestLargeBinary(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) b := NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) values := [][]byte{ []byte("AAA"), nil, []byte("BBBB"), } valid := []bool{true, false, true} b.AppendValues(values, valid) b.Retain() b.Release() assert.Panics(t, func() { b.NewBinaryArray() }) a := b.NewLargeBinaryArray() assert.Equal(t, 3, a.Len()) assert.Equal(t, 1, a.NullN()) assert.Equal(t, []byte("AAA"), a.Value(0)) assert.Equal(t, []byte{}, a.Value(1)) assert.Equal(t, []byte("BBBB"), a.Value(2)) assert.Equal(t, "QUFB", a.ValueStr(0)) assert.Equal(t, NullValueStr, a.ValueStr(1)) a.Release() // Test builder reset and NewArray API. b.AppendValues(values, valid) a = b.NewArray().(*LargeBinary) assert.Equal(t, 3, a.Len()) assert.Equal(t, 1, a.NullN()) assert.Equal(t, []byte("AAA"), a.Value(0)) assert.Equal(t, []byte{}, a.Value(1)) assert.Equal(t, []byte("BBBB"), a.Value(2)) assert.Equal(t, "QUFB", a.ValueStr(0)) assert.Equal(t, NullValueStr, a.ValueStr(1)) a.Release() b.Release() } func TestBinarySliceData(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) values := []string{"a", "bc", "def", "g", "hijk", "lm", "n", "opq", "rs", "tu"} b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) defer b.Release() for _, v := range values { b.AppendString(v) } arr := b.NewArray().(*Binary) defer arr.Release() if got, want := arr.Len(), len(values); got != want { t.Fatalf("got=%d, want=%d", got, want) } vs := make([]string, arr.Len()) for i := range vs { vs[i] = arr.ValueString(i) } if got, want := vs, values; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } tests := []struct { interval [2]int64 want []string }{ { interval: [2]int64{0, 0}, want: []string{}, }, { interval: [2]int64{0, 5}, want: []string{"a", "bc", "def", "g", "hijk"}, }, { interval: [2]int64{0, 10}, want: []string{"a", "bc", "def", "g", "hijk", "lm", "n", "opq", "rs", "tu"}, }, { interval: [2]int64{5, 10}, want: []string{"lm", "n", "opq", "rs", "tu"}, }, { interval: [2]int64{10, 10}, want: []string{}, }, { interval: [2]int64{2, 7}, want: []string{"def", "g", "hijk", "lm", "n"}, }, } for _, tc := range tests { t.Run("", func(t *testing.T) { slice := NewSlice(arr, tc.interval[0], tc.interval[1]).(*Binary) defer slice.Release() if got, want := slice.Len(), len(tc.want); got != want { t.Fatalf("got=%d, want=%d", got, want) } vs := make([]string, slice.Len()) for i := range vs { vs[i] = slice.ValueString(i) } if got, want := vs, tc.want; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } }) } } func TestBinarySliceDataWithNull(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} valids := []bool{true, true, false, false, true, true, true, true, false, true} b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) defer b.Release() b.AppendStringValues(values, valids) arr := b.NewArray().(*Binary) defer arr.Release() if got, want := arr.Len(), len(values); got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := arr.NullN(), 3; got != want { t.Fatalf("got=%d, want=%d", got, want) } vs := make([]string, arr.Len()) for i := range vs { vs[i] = arr.ValueString(i) } if got, want := vs, values; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } tests := []struct { interval [2]int64 nulls int want []string }{ { interval: [2]int64{0, 2}, nulls: 0, want: []string{"a", "bc"}, }, { interval: [2]int64{0, 3}, nulls: 1, want: []string{"a", "bc", ""}, }, { interval: [2]int64{0, 4}, nulls: 2, want: []string{"a", "bc", "", ""}, }, { interval: [2]int64{4, 8}, nulls: 0, want: []string{"hijk", "lm", "", "opq"}, }, { interval: [2]int64{2, 9}, nulls: 3, want: []string{"", "", "hijk", "lm", "", "opq", ""}, }, } for _, tc := range tests { t.Run("", func(t *testing.T) { slice := NewSlice(arr, tc.interval[0], tc.interval[1]).(*Binary) defer slice.Release() if got, want := slice.Len(), len(tc.want); got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := slice.NullN(), tc.nulls; got != want { t.Errorf("got=%d, want=%d", got, want) } vs := make([]string, slice.Len()) for i := range vs { vs[i] = slice.ValueString(i) } if got, want := vs, tc.want; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } }) } } func TestBinarySliceOutOfBounds(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) values := []string{"a", "bc", "def", "g", "hijk", "lm", "n", "opq", "rs", "tu"} b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) defer b.Release() for _, v := range values { b.AppendString(v) } arr := b.NewArray().(*Binary) defer arr.Release() slice := NewSlice(arr, 3, 8).(*Binary) defer slice.Release() tests := []struct { index int panic bool }{ { index: -1, panic: true, }, { index: 5, panic: true, }, { index: 0, panic: false, }, { index: 4, panic: false, }, } for _, tc := range tests { t.Run("", func(t *testing.T) { var val string if tc.panic { defer func() { e := recover() if e == nil { t.Fatalf("this should have panicked, but did not; slice value %q", val) } if got, want := e.(string), "arrow/array: index out of range"; got != want { t.Fatalf("invalid error. got=%q, want=%q", got, want) } }() } else { defer func() { if e := recover(); e != nil { t.Fatalf("unexpected panic: %v", e) } }() } val = slice.ValueString(tc.index) }) } } func TestBinaryValueOffset(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} valids := []bool{true, true, false, false, true, true, true, true, false, true} b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) defer b.Release() b.AppendStringValues(values, valids) arr := b.NewArray().(*Binary) defer arr.Release() slice := NewSlice(arr, 2, 9).(*Binary) defer slice.Release() offset := 3 vs := values[2:9] for i, v := range vs { assert.Equal(t, offset, slice.ValueOffset(i)) offset += len(v) } } func TestLargeBinaryValueOffset(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} valids := []bool{true, true, false, false, true, true, true, true, false, true} b := NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) defer b.Release() b.AppendStringValues(values, valids) arr := b.NewArray().(*LargeBinary) defer arr.Release() slice := NewSlice(arr, 2, 9).(*LargeBinary) defer slice.Release() offset := 3 vs := values[2:9] for i, v := range vs { assert.EqualValues(t, offset, slice.ValueOffset(i)) offset += len(v) } } func TestBinaryValueLen(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} valids := []bool{true, true, false, false, true, true, true, true, false, true} b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) defer b.Release() b.AppendStringValues(values, valids) arr := b.NewArray().(*Binary) defer arr.Release() slice := NewSlice(arr, 2, 9).(*Binary) defer slice.Release() vs := values[2:9] for i, v := range vs { assert.Equal(t, len(v), slice.ValueLen(i)) } } func TestLargeBinaryValueLen(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} valids := []bool{true, true, false, false, true, true, true, true, false, true} b := NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) defer b.Release() b.AppendStringValues(values, valids) arr := b.NewArray().(*LargeBinary) defer arr.Release() slice := NewSlice(arr, 2, 9).(*LargeBinary) defer slice.Release() vs := values[2:9] for i, v := range vs { assert.Equal(t, len(v), slice.ValueLen(i)) } } func TestBinaryValueOffsets(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} valids := []bool{true, true, false, false, true, true, true, true, false, true} b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) defer b.Release() b.AppendStringValues(values, valids) arr := b.NewArray().(*Binary) defer arr.Release() assert.Equal(t, []int32{0, 1, 3, 3, 3, 7, 9, 9, 12, 12, 14}, arr.ValueOffsets()) slice := NewSlice(arr, 2, 9).(*Binary) defer slice.Release() assert.Equal(t, []int32{3, 3, 3, 7, 9, 9, 12, 12}, slice.ValueOffsets()) } func TestLargeBinaryValueOffsets(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} valids := []bool{true, true, false, false, true, true, true, true, false, true} b := NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) defer b.Release() b.AppendStringValues(values, valids) arr := b.NewArray().(*LargeBinary) defer arr.Release() assert.Equal(t, []int64{0, 1, 3, 3, 3, 7, 9, 9, 12, 12, 14}, arr.ValueOffsets()) slice := NewSlice(arr, 2, 9).(*LargeBinary) defer slice.Release() assert.Equal(t, []int64{3, 3, 3, 7, 9, 9, 12, 12}, slice.ValueOffsets()) } func TestBinaryValueBytes(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} valids := []bool{true, true, false, false, true, true, true, true, false, true} b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) defer b.Release() b.AppendStringValues(values, valids) arr := b.NewArray().(*Binary) defer arr.Release() assert.Equal(t, []byte{'a', 'b', 'c', 'h', 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'q', 't', 'u'}, arr.ValueBytes()) slice := NewSlice(arr, 2, 9).(*Binary) defer slice.Release() assert.Equal(t, []byte{'h', 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'q'}, slice.ValueBytes()) } func TestLargeBinaryValueBytes(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} valids := []bool{true, true, false, false, true, true, true, true, false, true} b := NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) defer b.Release() b.AppendStringValues(values, valids) arr := b.NewArray().(*LargeBinary) defer arr.Release() assert.Equal(t, []byte{'a', 'b', 'c', 'h', 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'q', 't', 'u'}, arr.ValueBytes()) slice := NewSlice(arr, 2, 9).(*LargeBinary) defer slice.Release() assert.Equal(t, []byte{'h', 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'q'}, slice.ValueBytes()) } func TestBinaryStringer(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) values := []string{"a", "bc", "", "é", "", "hijk", "lm", "", "opq", "", "tu"} valids := []bool{true, true, false, true, false, true, true, true, true, false, true} b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) defer b.Release() b.AppendStringValues(values, valids) arr := b.NewArray().(*Binary) defer arr.Release() got := arr.String() want := `["a" "bc" (null) "é" (null) "hijk" "lm" "" "opq" (null) "tu"]` if got != want { t.Fatalf("invalid stringer:\ngot= %s\nwant=%s\n", got, want) } } func TestLargeBinaryStringer(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) values := []string{"a", "bc", "", "é", "", "hijk", "lm", "", "opq", "", "tu"} valids := []bool{true, true, false, true, false, true, true, true, true, false, true} b := NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) defer b.Release() b.AppendStringValues(values, valids) arr := b.NewArray().(*LargeBinary) defer arr.Release() got := arr.String() want := `["a" "bc" (null) "é" (null) "hijk" "lm" "" "opq" (null) "tu"]` if got != want { t.Fatalf("invalid stringer:\ngot= %s\nwant=%s\n", got, want) } } func TestBinaryInvalidOffsets(t *testing.T) { const expectedPanic = "arrow/array: binary offsets out of bounds of data buffer" makeBuffers := func(valids []bool, offsets []int32, data string) []*memory.Buffer { offsetBuf := memory.NewBufferBytes(arrow.Int32Traits.CastToBytes(offsets)) var nullBufBytes []byte var nullBuf *memory.Buffer if valids != nil { nullBufBytes = make([]byte, bitutil.BytesForBits(int64(len(valids)))) for i, v := range valids { bitutil.SetBitTo(nullBufBytes, i, v) } nullBuf = memory.NewBufferBytes(nullBufBytes) } return []*memory.Buffer{nullBuf, offsetBuf, memory.NewBufferBytes([]byte(data))} } assert.NotPanics(t, func() { buffers := makeBuffers(nil, []int32{}, "") NewBinaryData(NewData(arrow.BinaryTypes.Binary, 0, buffers, nil, 0, 0)) }, "empty array with no offsets") assert.NotPanics(t, func() { buffers := makeBuffers(nil, []int32{0, 5}, "") NewBinaryData(NewData(arrow.BinaryTypes.Binary, 0, buffers, nil, 0, 0)) }, "empty array, offsets ignored") assert.NotPanics(t, func() { buffers := makeBuffers(nil, []int32{0, 3, 4, 9}, "oooabcdef") NewBinaryData(NewData(arrow.BinaryTypes.Binary, 1, buffers, nil, 0, 2)) }, "data has offset and value offsets are valid") assert.NotPanics(t, func() { buffers := makeBuffers(nil, []int32{0, 3, 6, 9, 9}, "012345678") arr := NewBinaryData(NewData(arrow.BinaryTypes.Binary, 4, buffers, nil, 0, 0)) if assert.Equal(t, 4, arr.Len()) && assert.Zero(t, arr.NullN()) { assert.EqualValues(t, "012", arr.Value(0)) assert.EqualValues(t, "345", arr.Value(1)) assert.EqualValues(t, "678", arr.Value(2)) assert.EqualValues(t, "", arr.Value(3), "trailing empty binary value will have offset past end") } }, "simple valid case") assert.NotPanics(t, func() { buffers := makeBuffers([]bool{true, false, true, false}, []int32{0, 3, 4, 9, 9}, "oooabcdef") arr := NewBinaryData(NewData(arrow.BinaryTypes.Binary, 4, buffers, nil, 2, 0)) if assert.Equal(t, 4, arr.Len()) && assert.Equal(t, 2, arr.NullN()) { assert.EqualValues(t, "ooo", arr.Value(0)) assert.True(t, arr.IsNull(1)) assert.EqualValues(t, "bcdef", arr.Value(2)) assert.True(t, arr.IsNull(3)) } }, "simple valid case with nulls") assert.PanicsWithValue(t, expectedPanic, func() { buffers := makeBuffers(nil, []int32{0, 5}, "abc") NewBinaryData(NewData(arrow.BinaryTypes.Binary, 1, buffers, nil, 0, 0)) }, "last offset is overflowing") assert.PanicsWithError(t, "arrow/array: binary offset buffer must have at least 2 values", func() { buffers := makeBuffers(nil, []int32{0}, "abc") NewBinaryData(NewData(arrow.BinaryTypes.Binary, 1, buffers, nil, 0, 0)) }, "last offset is missing") assert.PanicsWithValue(t, expectedPanic, func() { buffers := makeBuffers(nil, []int32{0, 3, 10, 15}, "oooabcdef") NewBinaryData(NewData(arrow.BinaryTypes.Binary, 1, buffers, nil, 0, 2)) }, "data has offset and value offset is overflowing") } func TestBinaryStringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} valid := []bool{true, true, false, false, true, true, true, true, false, true} b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) defer b.Release() b.AppendStringValues(values, valid) arr := b.NewArray().(*Binary) defer arr.Release() // 2. create array via AppendValueFromString b1 := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*Binary) defer arr1.Release() assert.True(t, Equal(arr, arr1)) } func TestBinaryViewStringRoundTrip(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) values := []string{"a", "bc", "", "", "supercalifragilistic", "", "expialidocious"} valid := []bool{true, true, false, false, true, true, true} b := NewBinaryViewBuilder(mem) defer b.Release() b.AppendStringValues(values, valid) arr := b.NewArray().(*BinaryView) defer arr.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b.AppendValueFromString(arr.ValueStr(i))) } arr1 := b.NewArray().(*BinaryView) defer arr1.Release() assert.True(t, Equal(arr, arr1)) } arrow-go-18.2.0/arrow/array/binarybuilder.go000066400000000000000000000415401476434502500210070ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "bytes" "encoding/base64" "fmt" "math" "reflect" "sync/atomic" "unsafe" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" ) // A BinaryBuilder is used to build a Binary array using the Append methods. type BinaryBuilder struct { builder dtype arrow.BinaryDataType offsets bufBuilder values *byteBufferBuilder appendOffsetVal func(int) getOffsetVal func(int) int maxCapacity uint64 offsetByteWidth int } // NewBinaryBuilder can be used for any of the variable length binary types, // Binary, LargeBinary, String, LargeString by passing the appropriate data type func NewBinaryBuilder(mem memory.Allocator, dtype arrow.BinaryDataType) *BinaryBuilder { var ( offsets bufBuilder offsetValFn func(int) maxCapacity uint64 offsetByteWidth int getOffsetVal func(int) int ) switch dtype.Layout().Buffers[1].ByteWidth { case 4: b := newInt32BufferBuilder(mem) offsetValFn = func(v int) { b.AppendValue(int32(v)) } getOffsetVal = func(i int) int { return int(b.Value(i)) } offsets = b maxCapacity = math.MaxInt32 offsetByteWidth = arrow.Int32SizeBytes case 8: b := newInt64BufferBuilder(mem) offsetValFn = func(v int) { b.AppendValue(int64(v)) } getOffsetVal = func(i int) int { return int(b.Value(i)) } offsets = b maxCapacity = math.MaxInt64 offsetByteWidth = arrow.Int64SizeBytes } b := &BinaryBuilder{ builder: builder{refCount: 1, mem: mem}, dtype: dtype, offsets: offsets, values: newByteBufferBuilder(mem), appendOffsetVal: offsetValFn, maxCapacity: maxCapacity, offsetByteWidth: offsetByteWidth, getOffsetVal: getOffsetVal, } return b } func (b *BinaryBuilder) Type() arrow.DataType { return b.dtype } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. // Release may be called simultaneously from multiple goroutines. func (b *BinaryBuilder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.offsets != nil { b.offsets.Release() b.offsets = nil } if b.values != nil { b.values.Release() b.values = nil } } } func (b *BinaryBuilder) Append(v []byte) { b.Reserve(1) b.appendNextOffset() b.values.Append(v) b.UnsafeAppendBoolToBitmap(true) } func (b *BinaryBuilder) AppendString(v string) { b.Append([]byte(v)) } func (b *BinaryBuilder) AppendNull() { b.Reserve(1) b.appendNextOffset() b.UnsafeAppendBoolToBitmap(false) } func (b *BinaryBuilder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *BinaryBuilder) AppendEmptyValue() { b.Reserve(1) b.appendNextOffset() b.UnsafeAppendBoolToBitmap(true) } func (b *BinaryBuilder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *BinaryBuilder) AppendValues(v [][]byte, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) for _, vv := range v { b.appendNextOffset() b.values.Append(vv) } b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } // AppendStringValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *BinaryBuilder) AppendStringValues(v []string, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) for _, vv := range v { b.appendNextOffset() b.values.Append([]byte(vv)) } b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *BinaryBuilder) UnsafeAppend(v []byte) { b.appendNextOffset() b.values.unsafeAppend(v) b.UnsafeAppendBoolToBitmap(true) } func (b *BinaryBuilder) Value(i int) []byte { start := b.getOffsetVal(i) var end int if i == (b.length - 1) { end = b.values.Len() } else { end = b.getOffsetVal(i + 1) } return b.values.Bytes()[start:end] } func (b *BinaryBuilder) init(capacity int) { b.builder.init(capacity) b.offsets.resize((capacity + 1) * b.offsetByteWidth) } // DataLen returns the number of bytes in the data array. func (b *BinaryBuilder) DataLen() int { return b.values.length } // DataCap returns the total number of bytes that can be stored // without allocating additional memory. func (b *BinaryBuilder) DataCap() int { return b.values.capacity } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *BinaryBuilder) Reserve(n int) { b.builder.reserve(n, b.Resize) } // ReserveData ensures there is enough space for appending n bytes // by checking the capacity and resizing the data buffer if necessary. func (b *BinaryBuilder) ReserveData(n int) { if b.values.capacity < b.values.length+n { b.values.resize(b.values.Len() + n) } } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may be reduced. func (b *BinaryBuilder) Resize(n int) { b.offsets.resize((n + 1) * b.offsetByteWidth) if (n * b.offsetByteWidth) < b.offsets.Len() { b.offsets.SetLength(n * b.offsetByteWidth) } b.builder.resize(n, b.init) } func (b *BinaryBuilder) ResizeData(n int) { b.values.length = n } // NewArray creates a Binary array from the memory buffers used by the builder and resets the BinaryBuilder // so it can be used to build a new array. // // Builds the appropriate Binary or LargeBinary array based on the datatype // it was initialized with. func (b *BinaryBuilder) NewArray() arrow.Array { if b.offsetByteWidth == arrow.Int32SizeBytes { return b.NewBinaryArray() } return b.NewLargeBinaryArray() } // NewBinaryArray creates a Binary array from the memory buffers used by the builder and resets the BinaryBuilder // so it can be used to build a new array. func (b *BinaryBuilder) NewBinaryArray() (a *Binary) { if b.offsetByteWidth != arrow.Int32SizeBytes { panic("arrow/array: invalid call to NewBinaryArray when building a LargeBinary array") } data := b.newData() a = NewBinaryData(data) data.Release() return } func (b *BinaryBuilder) NewLargeBinaryArray() (a *LargeBinary) { if b.offsetByteWidth != arrow.Int64SizeBytes { panic("arrow/array: invalid call to NewLargeBinaryArray when building a Binary array") } data := b.newData() a = NewLargeBinaryData(data) data.Release() return } func (b *BinaryBuilder) newData() (data *Data) { b.appendNextOffset() offsets, values := b.offsets.Finish(), b.values.Finish() data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, offsets, values}, nil, b.nulls, 0) if offsets != nil { offsets.Release() } if values != nil { values.Release() } b.builder.reset() return } func (b *BinaryBuilder) appendNextOffset() { numBytes := b.values.Len() debug.Assert(uint64(numBytes) <= b.maxCapacity, "exceeded maximum capacity of binary array") b.appendOffsetVal(numBytes) } func (b *BinaryBuilder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } if b.dtype.IsUtf8() { b.Append([]byte(s)) return nil } decodedVal, err := base64.StdEncoding.DecodeString(s) if err != nil { return fmt.Errorf("could not decode base64 string: %w", err) } b.Append(decodedVal) return nil } func (b *BinaryBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch v := t.(type) { case string: data, err := base64.StdEncoding.DecodeString(v) if err != nil { return err } b.Append(data) case []byte: b.Append(v) case nil: b.AppendNull() default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Type: reflect.TypeOf([]byte{}), Offset: dec.InputOffset(), } } return nil } func (b *BinaryBuilder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *BinaryBuilder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("binary builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } const ( dfltBlockSize = 32 << 10 // 32 KB viewValueSizeLimit int32 = math.MaxInt32 ) type BinaryViewBuilder struct { builder dtype arrow.BinaryDataType data *memory.Buffer rawData []arrow.ViewHeader blockBuilder multiBufferBuilder } func NewBinaryViewBuilder(mem memory.Allocator) *BinaryViewBuilder { return &BinaryViewBuilder{ dtype: arrow.BinaryTypes.BinaryView, builder: builder{ refCount: 1, mem: mem, }, blockBuilder: multiBufferBuilder{ refCount: 1, blockSize: dfltBlockSize, mem: mem, }, } } func (b *BinaryViewBuilder) SetBlockSize(sz uint) { b.blockBuilder.blockSize = int(sz) } func (b *BinaryViewBuilder) Type() arrow.DataType { return b.dtype } func (b *BinaryViewBuilder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) != 0 { return } if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } } func (b *BinaryViewBuilder) init(capacity int) { b.builder.init(capacity) b.data = memory.NewResizableBuffer(b.mem) bytesN := arrow.ViewHeaderTraits.BytesRequired(capacity) b.data.Resize(bytesN) b.rawData = arrow.ViewHeaderTraits.CastFromBytes(b.data.Bytes()) } func (b *BinaryViewBuilder) Resize(n int) { nbuild := n if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) return } b.builder.resize(nbuild, b.init) b.data.Resize(arrow.ViewHeaderTraits.BytesRequired(n)) b.rawData = arrow.ViewHeaderTraits.CastFromBytes(b.data.Bytes()) } func (b *BinaryViewBuilder) ReserveData(length int) { if int32(length) > viewValueSizeLimit { panic(fmt.Errorf("%w: BinaryView or StringView elements cannot reference strings larger than 2GB", arrow.ErrInvalid)) } b.blockBuilder.Reserve(int(length)) } func (b *BinaryViewBuilder) Reserve(n int) { b.builder.reserve(n, b.Resize) } func (b *BinaryViewBuilder) Append(v []byte) { if int32(len(v)) > viewValueSizeLimit { panic(fmt.Errorf("%w: BinaryView or StringView elements cannot reference strings larger than 2GB", arrow.ErrInvalid)) } if !arrow.IsViewInline(len(v)) { b.ReserveData(len(v)) } b.Reserve(1) b.UnsafeAppend(v) } // AppendString is identical to Append, only accepting a string instead // of a byte slice, avoiding the extra copy that would occur if you simply // did []byte(v). // // This is different than AppendValueFromString which exists for the // Builder interface, in that this expects raw binary data which is // appended unmodified. AppendValueFromString expects base64 encoded binary // data instead. func (b *BinaryViewBuilder) AppendString(v string) { // create a []byte without copying the bytes // in go1.20 this would be unsafe.StringData val := *(*[]byte)(unsafe.Pointer(&struct { string int }{v, len(v)})) b.Append(val) } func (b *BinaryViewBuilder) AppendNull() { b.Reserve(1) b.UnsafeAppendBoolToBitmap(false) } func (b *BinaryViewBuilder) AppendNulls(n int) { b.Reserve(n) for i := 0; i < n; i++ { b.UnsafeAppendBoolToBitmap(false) } } func (b *BinaryViewBuilder) AppendEmptyValue() { b.Reserve(1) b.UnsafeAppendBoolToBitmap(true) } func (b *BinaryViewBuilder) AppendEmptyValues(n int) { b.Reserve(n) b.unsafeAppendBoolsToBitmap(nil, n) } func (b *BinaryViewBuilder) UnsafeAppend(v []byte) { hdr := &b.rawData[b.length] hdr.SetBytes(v) if !hdr.IsInline() { b.blockBuilder.UnsafeAppend(hdr, v) } b.UnsafeAppendBoolToBitmap(true) } func (b *BinaryViewBuilder) AppendValues(v [][]byte, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) outOfLineTotal := 0 for i, vv := range v { if len(valid) == 0 || valid[i] { if !arrow.IsViewInline(len(vv)) { outOfLineTotal += len(vv) } } } b.ReserveData(outOfLineTotal) for i, vv := range v { if len(valid) == 0 || valid[i] { hdr := &b.rawData[b.length+i] hdr.SetBytes(vv) if !hdr.IsInline() { b.blockBuilder.UnsafeAppend(hdr, vv) } } } b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *BinaryViewBuilder) AppendStringValues(v []string, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) outOfLineTotal := 0 for i, vv := range v { if len(valid) == 0 || valid[i] { if !arrow.IsViewInline(len(vv)) { outOfLineTotal += len(vv) } } } b.ReserveData(outOfLineTotal) for i, vv := range v { if len(valid) == 0 || valid[i] { hdr := &b.rawData[b.length+i] hdr.SetString(vv) if !hdr.IsInline() { b.blockBuilder.UnsafeAppendString(hdr, vv) } } } b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } // AppendValueFromString is paired with ValueStr for fulfilling the // base Builder interface. This is intended to read in a human-readable // string such as from CSV or JSON and append it to the array. // // For Binary values are expected to be base64 encoded (and will be // decoded as such before being appended). func (b *BinaryViewBuilder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } if b.dtype.IsUtf8() { b.Append([]byte(s)) return nil } decodedVal, err := base64.StdEncoding.DecodeString(s) if err != nil { return fmt.Errorf("could not decode base64 string: %w", err) } b.Append(decodedVal) return nil } func (b *BinaryViewBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch v := t.(type) { case string: data, err := base64.StdEncoding.DecodeString(v) if err != nil { return err } b.Append(data) case []byte: b.Append(v) case nil: b.AppendNull() default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Type: reflect.TypeOf([]byte{}), Offset: dec.InputOffset(), } } return nil } func (b *BinaryViewBuilder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *BinaryViewBuilder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("binary view builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } func (b *BinaryViewBuilder) newData() (data *Data) { bytesRequired := arrow.ViewHeaderTraits.BytesRequired(b.length) if bytesRequired > 0 && bytesRequired < b.data.Len() { // trim buffers b.data.Resize(bytesRequired) } dataBuffers := b.blockBuilder.Finish() data = NewData(b.dtype, b.length, append([]*memory.Buffer{ b.nullBitmap, b.data}, dataBuffers...), nil, b.nulls, 0) b.reset() if b.data != nil { b.data.Release() b.data = nil b.rawData = nil for _, buf := range dataBuffers { buf.Release() } } return } func (b *BinaryViewBuilder) NewBinaryViewArray() (a *BinaryView) { data := b.newData() a = NewBinaryViewData(data) data.Release() return } func (b *BinaryViewBuilder) NewArray() arrow.Array { return b.NewBinaryViewArray() } var ( _ Builder = (*BinaryBuilder)(nil) _ Builder = (*BinaryViewBuilder)(nil) ) arrow-go-18.2.0/arrow/array/binarybuilder_test.go000066400000000000000000000116541476434502500220510ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "bytes" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) func TestBinaryBuilder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) exp := [][]byte{[]byte("foo"), []byte("bar"), nil, []byte("sydney"), []byte("cameron")} for _, v := range exp { if v == nil { ab.AppendNull() } else { ab.Append(v) } } assert.Equal(t, len(exp), ab.Len(), "unexpected Len()") assert.Equal(t, 1, ab.NullN(), "unexpected NullN()") for i, v := range exp { if v == nil { v = []byte{} } assert.Equal(t, v, ab.Value(i), "unexpected BinaryArrayBuilder.Value(%d)", i) } // Zm9v is foo in base64 assert.NoError(t, ab.AppendValueFromString("Zm9v")) ar := ab.NewBinaryArray() assert.Equal(t, "Zm9v", ar.ValueStr(5)) ab.Release() ar.Release() // check state of builder after NewBinaryArray assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewBinaryArray did not reset state") assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewBinaryArray did not reset state") assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewBinaryArray did not reset state") } func TestBinaryBuilder_ReserveData(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) // call ReserveData and ensure the capacity doesn't change // when appending entries until that count. ab.ReserveData(256) expCap := ab.DataCap() for i := 0; i < 256/8; i++ { ab.Append(bytes.Repeat([]byte("a"), 8)) } assert.Equal(t, expCap, ab.DataCap(), "unexpected BinaryArrayBuilder.DataCap()") ar := ab.NewBinaryArray() ab.Release() ar.Release() // check state of builder after NewBinaryArray assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewBinaryArray did not reset state") assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewBinaryArray did not reset state") assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewBinaryArray did not reset state") } func TestBinaryBuilderLarge(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) exp := [][]byte{[]byte("foo"), []byte("bar"), nil, []byte("sydney"), []byte("cameron")} for _, v := range exp { if v == nil { ab.AppendNull() } else { ab.Append(v) } } assert.Equal(t, len(exp), ab.Len(), "unexpected Len()") assert.Equal(t, 1, ab.NullN(), "unexpected NullN()") for i, v := range exp { if v == nil { v = []byte{} } assert.Equal(t, v, ab.Value(i), "unexpected BinaryArrayBuilder.Value(%d)", i) } ar := ab.NewLargeBinaryArray() ab.Release() ar.Release() // check state of builder after NewBinaryArray assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewBinaryArray did not reset state") assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewBinaryArray did not reset state") assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewBinaryArray did not reset state") } func TestBinaryBuilderLarge_ReserveData(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) // call ReserveData and ensure the capacity doesn't change // when appending entries until that count. ab.ReserveData(256) expCap := ab.DataCap() for i := 0; i < 256/8; i++ { ab.Append(bytes.Repeat([]byte("a"), 8)) } assert.Equal(t, expCap, ab.DataCap(), "unexpected BinaryArrayBuilder.DataCap()") ar := ab.NewLargeBinaryArray() ab.Release() ar.Release() // check state of builder after NewBinaryArray assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewBinaryArray did not reset state") assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewBinaryArray did not reset state") assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewBinaryArray did not reset state") } arrow-go-18.2.0/arrow/array/boolean.go000066400000000000000000000061571476434502500176000ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "fmt" "strconv" "strings" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" ) // A type which represents an immutable sequence of boolean values. type Boolean struct { array values []byte } // NewBoolean creates a boolean array from the data memory.Buffer and contains length elements. // The nullBitmap buffer can be nil of there are no null values. // If nulls is not known, use UnknownNullCount to calculate the value of NullN at runtime from the nullBitmap buffer. func NewBoolean(length int, data *memory.Buffer, nullBitmap *memory.Buffer, nulls int) *Boolean { arrdata := NewData(arrow.FixedWidthTypes.Boolean, length, []*memory.Buffer{nullBitmap, data}, nil, nulls, 0) defer arrdata.Release() return NewBooleanData(arrdata) } func NewBooleanData(data arrow.ArrayData) *Boolean { a := &Boolean{} a.refCount = 1 a.setData(data.(*Data)) return a } func (a *Boolean) Value(i int) bool { if i < 0 || i >= a.array.data.length { panic("arrow/array: index out of range") } return bitutil.BitIsSet(a.values, a.array.data.offset+i) } func (a *Boolean) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } else { return strconv.FormatBool(a.Value(i)) } } func (a *Boolean) String() string { o := new(strings.Builder) o.WriteString("[") for i := 0; i < a.Len(); i++ { if i > 0 { fmt.Fprintf(o, " ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(o, "%v", a.Value(i)) } } o.WriteString("]") return o.String() } func (a *Boolean) setData(data *Data) { a.array.setData(data) vals := data.buffers[1] if vals != nil { a.values = vals.Bytes() } } func (a *Boolean) GetOneForMarshal(i int) interface{} { if a.IsValid(i) { return a.Value(i) } return nil } func (a *Boolean) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i := 0; i < a.Len(); i++ { if a.IsValid(i) { vals[i] = a.Value(i) } else { vals[i] = nil } } return json.Marshal(vals) } func arrayEqualBoolean(left, right *Boolean) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if left.Value(i) != right.Value(i) { return false } } return true } var ( _ arrow.Array = (*Boolean)(nil) ) arrow-go-18.2.0/arrow/array/boolean_test.go000066400000000000000000000164411476434502500206340ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "fmt" "reflect" "strings" "testing" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) func TestBooleanSliceData(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) values := []bool{true, false, true, true, true, true, true, false, true, false} b := array.NewBooleanBuilder(pool) defer b.Release() for _, v := range values { b.Append(v) } arr := b.NewArray().(*array.Boolean) defer arr.Release() if got, want := arr.Len(), len(values); got != want { t.Fatalf("got=%d, want=%d", got, want) } vs := make([]bool, arr.Len()) for i := range vs { vs[i] = arr.Value(i) } if got, want := vs, values; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } tests := []struct { interval [2]int64 want []bool }{ { interval: [2]int64{0, 0}, want: []bool{}, }, { interval: [2]int64{10, 10}, want: []bool{}, }, { interval: [2]int64{0, 5}, want: []bool{true, false, true, true, true}, }, { interval: [2]int64{5, 10}, want: []bool{true, true, false, true, false}, }, { interval: [2]int64{2, 7}, want: []bool{true, true, true, true, true}, }, } for _, tc := range tests { t.Run("", func(t *testing.T) { slice := array.NewSlice(arr, tc.interval[0], tc.interval[1]).(*array.Boolean) defer slice.Release() if got, want := slice.Len(), len(tc.want); got != want { t.Fatalf("got=%d, want=%d", got, want) } vs := make([]bool, slice.Len()) for i := range vs { vs[i] = slice.Value(i) } if got, want := vs, tc.want; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } }) } } func TestBooleanSliceDataWithNull(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) values := []bool{true, false, true, false, false, false, true, false, true, false} valids := []bool{true, false, true, true, true, true, true, false, true, true} b := array.NewBooleanBuilder(pool) defer b.Release() b.AppendValues(values, valids) arr := b.NewArray().(*array.Boolean) defer arr.Release() if got, want := arr.Len(), len(valids); got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := arr.NullN(), 2; got != want { t.Fatalf("got=%d, want=%d", got, want) } vs := make([]bool, arr.Len()) for i := range vs { vs[i] = arr.Value(i) } if got, want := vs, values; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } tests := []struct { interval [2]int64 nulls int want []bool }{ { interval: [2]int64{2, 9}, nulls: 1, want: []bool{true, false, false, false, true, false, true}, }, { interval: [2]int64{0, 7}, nulls: 1, want: []bool{true, false, true, false, false, false, true}, }, { interval: [2]int64{1, 8}, nulls: 2, want: []bool{false, true, false, false, false, true, false}, }, { interval: [2]int64{2, 7}, nulls: 0, want: []bool{true, false, false, false, true}, }, } for _, tc := range tests { t.Run("", func(t *testing.T) { slice := array.NewSlice(arr, tc.interval[0], tc.interval[1]).(*array.Boolean) defer slice.Release() if got, want := slice.NullN(), tc.nulls; got != want { t.Errorf("got=%d, want=%d", got, want) } if got, want := slice.Len(), len(tc.want); got != want { t.Fatalf("got=%d, want=%d", got, want) } vs := make([]bool, slice.Len()) for i := range vs { vs[i] = slice.Value(i) } if got, want := vs, tc.want; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } }) } } func TestBooleanSliceOutOfBounds(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) values := []bool{true, false, true, false, true, false, true, false, true, false} b := array.NewBooleanBuilder(pool) defer b.Release() for _, v := range values { b.Append(v) } arr := b.NewArray().(*array.Boolean) defer arr.Release() slice := array.NewSlice(arr, 3, 8).(*array.Boolean) defer slice.Release() tests := []struct { index int panic bool }{ { index: -1, panic: true, }, { index: 5, panic: true, }, { index: 0, panic: false, }, { index: 4, panic: false, }, } for _, tc := range tests { t.Run("", func(t *testing.T) { var val bool if tc.panic { defer func() { e := recover() if e == nil { t.Fatalf("this should have panicked, but did not; slice value %v", val) } if got, want := e.(string), "arrow/array: index out of range"; got != want { t.Fatalf("invalid error. got=%q, want=%q", got, want) } }() } else { defer func() { if e := recover(); e != nil { t.Fatalf("unexpected panic: %v", e) } }() } val = slice.Value(tc.index) }) } } func TestBooleanStringer(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) var ( values = []bool{true, false, true, false, true, false, true, false, true, false} valids = []bool{true, true, false, true, true, true, false, true, true, true} ) b := array.NewBooleanBuilder(pool) defer b.Release() b.AppendValues(values, valids) arr := b.NewArray().(*array.Boolean) defer arr.Release() out := new(strings.Builder) fmt.Fprintf(out, "%v", arr) const want = "[true false (null) false true false (null) false true false]" if got := out.String(); got != want { t.Fatalf("invalid stringer:\ngot= %q\nwant=%q", got, want) } assert.Equal(t, "true", arr.ValueStr(0)) assert.Equal(t, "false", arr.ValueStr(1)) assert.Equal(t, array.NullValueStr, arr.ValueStr(2)) } func TestBooleanStringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) values := []bool{true, false, true, true, true, true, true, false, true, false} valid := []bool{true, false, false, true, false, true, true, false, true, false} b := array.NewBooleanBuilder(mem) defer b.Release() b.AppendValues(values, valid) arr := b.NewArray().(*array.Boolean) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewBooleanBuilder(mem) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.Boolean) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } arrow-go-18.2.0/arrow/array/booleanbuilder.go000066400000000000000000000141761476434502500211470ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "bytes" "fmt" "reflect" "strconv" "sync/atomic" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" ) type BooleanBuilder struct { builder data *memory.Buffer rawData []byte } func NewBooleanBuilder(mem memory.Allocator) *BooleanBuilder { return &BooleanBuilder{builder: builder{refCount: 1, mem: mem}} } func (b *BooleanBuilder) Type() arrow.DataType { return arrow.FixedWidthTypes.Boolean } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. // Release may be called simultaneously from multiple goroutines. func (b *BooleanBuilder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } } } func (b *BooleanBuilder) Append(v bool) { b.Reserve(1) b.UnsafeAppend(v) } func (b *BooleanBuilder) AppendByte(v byte) { b.Reserve(1) b.UnsafeAppend(v != 0) } func (b *BooleanBuilder) AppendNull() { b.Reserve(1) b.UnsafeAppendBoolToBitmap(false) } func (b *BooleanBuilder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *BooleanBuilder) AppendEmptyValue() { b.Reserve(1) b.UnsafeAppend(false) } func (b *BooleanBuilder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *BooleanBuilder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } val, err := strconv.ParseBool(s) if err != nil { return err } b.Append(val) return nil } func (b *BooleanBuilder) UnsafeAppend(v bool) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) if v { bitutil.SetBit(b.rawData, b.length) } else { bitutil.ClearBit(b.rawData, b.length) } b.length++ } func (b *BooleanBuilder) AppendValues(v []bool, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) for i, vv := range v { bitutil.SetBitTo(b.rawData, b.length+i, vv) } b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *BooleanBuilder) init(capacity int) { b.builder.init(capacity) b.data = memory.NewResizableBuffer(b.mem) bytesN := arrow.BooleanTraits.BytesRequired(capacity) b.data.Resize(bytesN) b.rawData = b.data.Bytes() } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *BooleanBuilder) Reserve(n int) { b.builder.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *BooleanBuilder) Resize(n int) { if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(n, b.init) b.data.Resize(arrow.BooleanTraits.BytesRequired(n)) b.rawData = b.data.Bytes() } } // NewArray creates a Boolean array from the memory buffers used by the builder and resets the BooleanBuilder // so it can be used to build a new array. func (b *BooleanBuilder) NewArray() arrow.Array { return b.NewBooleanArray() } // NewBooleanArray creates a Boolean array from the memory buffers used by the builder and resets the BooleanBuilder // so it can be used to build a new array. func (b *BooleanBuilder) NewBooleanArray() (a *Boolean) { data := b.newData() a = NewBooleanData(data) data.Release() return } func (b *BooleanBuilder) newData() *Data { bytesRequired := arrow.BooleanTraits.BytesRequired(b.length) if bytesRequired > 0 && bytesRequired < b.data.Len() { // trim buffers b.data.Resize(bytesRequired) } res := NewData(arrow.FixedWidthTypes.Boolean, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) b.reset() if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } return res } func (b *BooleanBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch v := t.(type) { case bool: b.Append(v) case string: val, err := strconv.ParseBool(v) if err != nil { return err } b.Append(val) case json.Number: val, err := strconv.ParseBool(v.String()) if err != nil { return err } b.Append(val) case nil: b.AppendNull() default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Type: reflect.TypeOf(true), Offset: dec.InputOffset(), } } return nil } func (b *BooleanBuilder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *BooleanBuilder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) dec.UseNumber() t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("boolean builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } func (b *BooleanBuilder) Value(i int) bool { return bitutil.BitIsSet(b.rawData, i) } var ( _ Builder = (*BooleanBuilder)(nil) ) arrow-go-18.2.0/arrow/array/booleanbuilder_test.go000066400000000000000000000052621476434502500222020ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "testing" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/internal/testing/tools" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) func TestBooleanBuilder_AppendValues(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) b := array.NewBooleanBuilder(mem) exp := tools.Bools(1, 1, 0, 1, 1, 0) b.AppendValues(exp, nil) assert.NoError(t, b.AppendValueFromString("true")) assert.NoError(t, b.AppendValueFromString("false")) exp = tools.Bools(1, 1, 0, 1, 1, 0, 1, 0) got := make([]bool, len(exp)) // make sure we can read the values directly from the builder. for i := 0; i < b.Len(); i++ { got[i] = b.Value(i) } assert.Equal(t, exp, got) got = make([]bool, len(exp)) // reset a := b.NewBooleanArray() b.Release() for i := 0; i < a.Len(); i++ { got[i] = a.Value(i) } assert.Equal(t, exp, got) a.Release() } func TestBooleanBuilder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewBooleanBuilder(mem) defer ab.Release() want := tools.Bools(1, 1, 0, 1, 1, 0, 1, 0) boolValues := func(a *array.Boolean) []bool { vs := make([]bool, a.Len()) for i := range vs { vs[i] = a.Value(i) } return vs } ab.AppendValues([]bool{}, nil) a := ab.NewBooleanArray() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(nil, nil) a = ab.NewBooleanArray() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(want, nil) a = ab.NewBooleanArray() assert.Equal(t, want, boolValues(a)) a.Release() ab.AppendValues([]bool{}, nil) ab.AppendValues(want, nil) a = ab.NewBooleanArray() assert.Equal(t, want, boolValues(a)) a.Release() ab.AppendValues(want, nil) ab.AppendValues([]bool{}, nil) a = ab.NewBooleanArray() assert.Equal(t, want, boolValues(a)) a.Release() } arrow-go-18.2.0/arrow/array/bufferbuilder.go000066400000000000000000000154601476434502500207760ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "sync/atomic" "unsafe" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" ) type bufBuilder interface { Retain() Release() Len() int Cap() int Bytes() []byte resize(int) Advance(int) SetLength(int) Append([]byte) Reset() Finish() *memory.Buffer } // A bufferBuilder provides common functionality for populating memory with a sequence of type-specific values. // Specialized implementations provide type-safe APIs for appending and accessing the memory. type bufferBuilder struct { refCount int64 mem memory.Allocator buffer *memory.Buffer length int capacity int bytes []byte } // Retain increases the reference count by 1. // Retain may be called simultaneously from multiple goroutines. func (b *bufferBuilder) Retain() { atomic.AddInt64(&b.refCount, 1) } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. // Release may be called simultaneously from multiple goroutines. func (b *bufferBuilder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.buffer != nil { b.buffer.Release() b.buffer, b.bytes = nil, nil } } } // Len returns the length of the memory buffer in bytes. func (b *bufferBuilder) Len() int { return b.length } // Cap returns the total number of bytes that can be stored without allocating additional memory. func (b *bufferBuilder) Cap() int { return b.capacity } // Bytes returns a slice of length b.Len(). // The slice is only valid for use until the next buffer modification. That is, until the next call // to Advance, Reset, Finish or any Append function. The slice aliases the buffer content at least until the next // buffer modification. func (b *bufferBuilder) Bytes() []byte { return b.bytes[:b.length] } func (b *bufferBuilder) resize(elements int) { if b.buffer == nil { b.buffer = memory.NewResizableBuffer(b.mem) } b.buffer.ResizeNoShrink(elements) oldCapacity := b.capacity b.capacity = b.buffer.Cap() b.bytes = b.buffer.Buf() if b.capacity > oldCapacity { memory.Set(b.bytes[oldCapacity:], 0) } } func (b *bufferBuilder) SetLength(length int) { if length > b.length { b.Advance(length) return } b.length = length } // Advance increases the buffer by length and initializes the skipped bytes to zero. func (b *bufferBuilder) Advance(length int) { if b.capacity < b.length+length { newCapacity := bitutil.NextPowerOf2(b.length + length) b.resize(newCapacity) } b.length += length } // Append appends the contents of v to the buffer, resizing it if necessary. func (b *bufferBuilder) Append(v []byte) { if b.capacity < b.length+len(v) { newCapacity := bitutil.NextPowerOf2(b.length + len(v)) b.resize(newCapacity) } b.unsafeAppend(v) } // Reset returns the buffer to an empty state. Reset releases the memory and sets the length and capacity to zero. func (b *bufferBuilder) Reset() { if b.buffer != nil { b.buffer.Release() } b.buffer, b.bytes = nil, nil b.capacity, b.length = 0, 0 } // Finish TODO(sgc) func (b *bufferBuilder) Finish() (buffer *memory.Buffer) { if b.length > 0 { b.buffer.ResizeNoShrink(b.length) } buffer = b.buffer b.buffer = nil b.Reset() if buffer == nil { buffer = memory.NewBufferBytes(nil) } return } func (b *bufferBuilder) unsafeAppend(data []byte) { copy(b.bytes[b.length:], data) b.length += len(data) } type multiBufferBuilder struct { refCount int64 blockSize int mem memory.Allocator blocks []*memory.Buffer currentOutBuffer int } // Retain increases the reference count by 1. // Retain may be called simultaneously from multiple goroutines. func (b *multiBufferBuilder) Retain() { atomic.AddInt64(&b.refCount, 1) } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. // Release may be called simultaneously from multiple goroutines. func (b *multiBufferBuilder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { b.Reset() } } func (b *multiBufferBuilder) Reserve(nbytes int) { if len(b.blocks) == 0 { out := memory.NewResizableBuffer(b.mem) if nbytes < b.blockSize { nbytes = b.blockSize } out.Reserve(nbytes) b.currentOutBuffer = 0 b.blocks = []*memory.Buffer{out} return } curBuf := b.blocks[b.currentOutBuffer] remain := curBuf.Cap() - curBuf.Len() if nbytes <= remain { return } // search for underfull block that has enough bytes for i, block := range b.blocks { remaining := block.Cap() - block.Len() if nbytes <= remaining { b.currentOutBuffer = i return } } // current buffer doesn't have enough space, no underfull buffers // make new buffer and set that as our current. newBuf := memory.NewResizableBuffer(b.mem) if nbytes < b.blockSize { nbytes = b.blockSize } newBuf.Reserve(nbytes) b.currentOutBuffer = len(b.blocks) b.blocks = append(b.blocks, newBuf) } func (b *multiBufferBuilder) RemainingBytes() int { if len(b.blocks) == 0 { return 0 } buf := b.blocks[b.currentOutBuffer] return buf.Cap() - buf.Len() } func (b *multiBufferBuilder) Reset() { b.currentOutBuffer = 0 for _, block := range b.Finish() { block.Release() } } func (b *multiBufferBuilder) UnsafeAppend(hdr *arrow.ViewHeader, val []byte) { buf := b.blocks[b.currentOutBuffer] idx, offset := b.currentOutBuffer, buf.Len() hdr.SetIndexOffset(int32(idx), int32(offset)) n := copy(buf.Buf()[offset:], val) buf.ResizeNoShrink(offset + n) } func (b *multiBufferBuilder) UnsafeAppendString(hdr *arrow.ViewHeader, val string) { // create a byte slice with zero-copies // in go1.20 this would be equivalent to unsafe.StringData v := *(*[]byte)(unsafe.Pointer(&struct { string int }{val, len(val)})) b.UnsafeAppend(hdr, v) } func (b *multiBufferBuilder) Finish() (out []*memory.Buffer) { b.currentOutBuffer = 0 out, b.blocks = b.blocks, nil return } arrow-go-18.2.0/arrow/array/bufferbuilder_byte.go000066400000000000000000000022621476434502500220150ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import "github.com/apache/arrow-go/v18/arrow/memory" type byteBufferBuilder struct { bufferBuilder } func newByteBufferBuilder(mem memory.Allocator) *byteBufferBuilder { return &byteBufferBuilder{bufferBuilder: bufferBuilder{refCount: 1, mem: mem}} } func (b *byteBufferBuilder) Values() []byte { return b.Bytes() } func (b *byteBufferBuilder) Value(i int) byte { return b.bytes[i] } arrow-go-18.2.0/arrow/array/bufferbuilder_numeric.gen.go000066400000000000000000000123531476434502500232660ustar00rootroot00000000000000// Code generated by array/bufferbuilder_numeric.gen.go.tmpl. DO NOT EDIT. // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/memory" ) type int64BufferBuilder struct { bufferBuilder } func newInt64BufferBuilder(mem memory.Allocator) *int64BufferBuilder { return &int64BufferBuilder{bufferBuilder: bufferBuilder{refCount: 1, mem: mem}} } // AppendValues appends the contents of v to the buffer, growing the buffer as needed. func (b *int64BufferBuilder) AppendValues(v []int64) { b.Append(arrow.Int64Traits.CastToBytes(v)) } // Values returns a slice of length b.Len(). // The slice is only valid for use until the next buffer modification. That is, until the next call // to Advance, Reset, Finish or any Append function. The slice aliases the buffer content at least until the next // buffer modification. func (b *int64BufferBuilder) Values() []int64 { return arrow.Int64Traits.CastFromBytes(b.Bytes()) } // Value returns the int64 element at the index i. Value will panic if i is negative or ≥ Len. func (b *int64BufferBuilder) Value(i int) int64 { return b.Values()[i] } // Len returns the number of int64 elements in the buffer. func (b *int64BufferBuilder) Len() int { return b.length / arrow.Int64SizeBytes } // AppendValue appends v to the buffer, growing the buffer as needed. func (b *int64BufferBuilder) AppendValue(v int64) { if b.capacity < b.length+arrow.Int64SizeBytes { newCapacity := bitutil.NextPowerOf2(b.length + arrow.Int64SizeBytes) b.resize(newCapacity) } arrow.Int64Traits.PutValue(b.bytes[b.length:], v) b.length += arrow.Int64SizeBytes } type int32BufferBuilder struct { bufferBuilder } func newInt32BufferBuilder(mem memory.Allocator) *int32BufferBuilder { return &int32BufferBuilder{bufferBuilder: bufferBuilder{refCount: 1, mem: mem}} } // AppendValues appends the contents of v to the buffer, growing the buffer as needed. func (b *int32BufferBuilder) AppendValues(v []int32) { b.Append(arrow.Int32Traits.CastToBytes(v)) } // Values returns a slice of length b.Len(). // The slice is only valid for use until the next buffer modification. That is, until the next call // to Advance, Reset, Finish or any Append function. The slice aliases the buffer content at least until the next // buffer modification. func (b *int32BufferBuilder) Values() []int32 { return arrow.Int32Traits.CastFromBytes(b.Bytes()) } // Value returns the int32 element at the index i. Value will panic if i is negative or ≥ Len. func (b *int32BufferBuilder) Value(i int) int32 { return b.Values()[i] } // Len returns the number of int32 elements in the buffer. func (b *int32BufferBuilder) Len() int { return b.length / arrow.Int32SizeBytes } // AppendValue appends v to the buffer, growing the buffer as needed. func (b *int32BufferBuilder) AppendValue(v int32) { if b.capacity < b.length+arrow.Int32SizeBytes { newCapacity := bitutil.NextPowerOf2(b.length + arrow.Int32SizeBytes) b.resize(newCapacity) } arrow.Int32Traits.PutValue(b.bytes[b.length:], v) b.length += arrow.Int32SizeBytes } type int8BufferBuilder struct { bufferBuilder } func newInt8BufferBuilder(mem memory.Allocator) *int8BufferBuilder { return &int8BufferBuilder{bufferBuilder: bufferBuilder{refCount: 1, mem: mem}} } // AppendValues appends the contents of v to the buffer, growing the buffer as needed. func (b *int8BufferBuilder) AppendValues(v []int8) { b.Append(arrow.Int8Traits.CastToBytes(v)) } // Values returns a slice of length b.Len(). // The slice is only valid for use until the next buffer modification. That is, until the next call // to Advance, Reset, Finish or any Append function. The slice aliases the buffer content at least until the next // buffer modification. func (b *int8BufferBuilder) Values() []int8 { return arrow.Int8Traits.CastFromBytes(b.Bytes()) } // Value returns the int8 element at the index i. Value will panic if i is negative or ≥ Len. func (b *int8BufferBuilder) Value(i int) int8 { return b.Values()[i] } // Len returns the number of int8 elements in the buffer. func (b *int8BufferBuilder) Len() int { return b.length / arrow.Int8SizeBytes } // AppendValue appends v to the buffer, growing the buffer as needed. func (b *int8BufferBuilder) AppendValue(v int8) { if b.capacity < b.length+arrow.Int8SizeBytes { newCapacity := bitutil.NextPowerOf2(b.length + arrow.Int8SizeBytes) b.resize(newCapacity) } arrow.Int8Traits.PutValue(b.bytes[b.length:], v) b.length += arrow.Int8SizeBytes } arrow-go-18.2.0/arrow/array/bufferbuilder_numeric.gen.go.tmpl000066400000000000000000000052031476434502500242350ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/memory" ) {{range .In}} {{$TypeNamePrefix := .name}} {{if .Opt.BufferBuilder}} type {{$TypeNamePrefix}}BufferBuilder struct { bufferBuilder } func new{{.Name}}BufferBuilder(mem memory.Allocator) *{{$TypeNamePrefix}}BufferBuilder { return &{{$TypeNamePrefix}}BufferBuilder{bufferBuilder:bufferBuilder{refCount: 1, mem:mem}} } // AppendValues appends the contents of v to the buffer, growing the buffer as needed. func (b *{{$TypeNamePrefix}}BufferBuilder) AppendValues(v []{{.Type}}) { b.Append(arrow.{{.Name}}Traits.CastToBytes(v)) } // Values returns a slice of length b.Len(). // The slice is only valid for use until the next buffer modification. That is, until the next call // to Advance, Reset, Finish or any Append function. The slice aliases the buffer content at least until the next // buffer modification. func (b *{{$TypeNamePrefix}}BufferBuilder) Values() []{{.Type}} { return arrow.{{.Name}}Traits.CastFromBytes(b.Bytes()) } // Value returns the {{.Type}} element at the index i. Value will panic if i is negative or ≥ Len. func (b *{{$TypeNamePrefix}}BufferBuilder) Value(i int) {{.Type}} { return b.Values()[i] } // Len returns the number of {{.Type}} elements in the buffer. func (b *{{$TypeNamePrefix}}BufferBuilder) Len() int { return b.length/arrow.{{.Name}}SizeBytes } // AppendValue appends v to the buffer, growing the buffer as needed. func (b *{{$TypeNamePrefix}}BufferBuilder) AppendValue(v {{.Type}}) { if b.capacity < b.length+arrow.{{.Name}}SizeBytes { newCapacity := bitutil.NextPowerOf2(b.length + arrow.{{.Name}}SizeBytes) b.resize(newCapacity) } arrow.{{.Name}}Traits.PutValue(b.bytes[b.length:], v) b.length+=arrow.{{.Name}}SizeBytes } {{end}} {{end}} arrow-go-18.2.0/arrow/array/bufferbuilder_numeric_test.go000066400000000000000000000062211476434502500235520ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "testing" "unsafe" "github.com/apache/arrow-go/v18/arrow/endian" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) func TestInt32BufferBuilder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) bb := newInt32BufferBuilder(mem) exp := []int32{0x01020304, 0x05060708, 0x090a0b0c, 0x0d0e0f01, 0x02030405, 0x06070809} bb.AppendValues(exp[:3]) bb.AppendValues(exp[3:]) var expBuf []byte if endian.IsBigEndian { expBuf = []byte{ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, } } else { expBuf = []byte{ 0x04, 0x03, 0x02, 0x01, 0x08, 0x07, 0x06, 0x05, 0x0c, 0x0b, 0x0a, 0x09, 0x01, 0x0f, 0x0e, 0x0d, 0x05, 0x04, 0x03, 0x02, 0x09, 0x08, 0x07, 0x06, } } assert.Equal(t, expBuf, bb.Bytes(), "unexpected byte values") assert.Equal(t, exp, bb.Values(), "unexpected int32 values") assert.Equal(t, len(exp), bb.Len(), "unexpected Len()") buflen := bb.Len() bfr := bb.Finish() assert.Equal(t, buflen*int(unsafe.Sizeof(int32(0))), bfr.Len(), "Buffer was not resized") assert.Len(t, bfr.Bytes(), bfr.Len(), "Buffer.Bytes() != Buffer.Len()") bfr.Release() assert.Len(t, bb.Bytes(), 0, "BufferBuilder was not reset after Finish") assert.Zero(t, bb.Len(), "BufferBuilder was not reset after Finish") bb.Release() } func TestInt32BufferBuilder_AppendValue(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) bb := newInt32BufferBuilder(mem) exp := []int32{0x01020304, 0x05060708, 0x090a0b0c, 0x0d0e0f01, 0x02030405, 0x06070809} for _, v := range exp { bb.AppendValue(v) } var expBuf []byte if endian.IsBigEndian { expBuf = []byte{ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, } } else { expBuf = []byte{ 0x04, 0x03, 0x02, 0x01, 0x08, 0x07, 0x06, 0x05, 0x0c, 0x0b, 0x0a, 0x09, 0x01, 0x0f, 0x0e, 0x0d, 0x05, 0x04, 0x03, 0x02, 0x09, 0x08, 0x07, 0x06, } } assert.Equal(t, expBuf, bb.Bytes(), "unexpected byte values") assert.Equal(t, exp, bb.Values(), "unexpected int32 values") assert.Equal(t, len(exp), bb.Len(), "unexpected Len()") bb.Release() } arrow-go-18.2.0/arrow/array/builder.go000066400000000000000000000254221476434502500176030ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "fmt" "sync/atomic" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" ) const ( minBuilderCapacity = 1 << 5 ) // Builder provides an interface to build arrow arrays. type Builder interface { // you can unmarshal a json array to add the values to a builder json.Unmarshaler // Type returns the datatype that this is building Type() arrow.DataType // Retain increases the reference count by 1. // Retain may be called simultaneously from multiple goroutines. Retain() // Release decreases the reference count by 1. Release() // Len returns the number of elements in the array builder. Len() int // Cap returns the total number of elements that can be stored // without allocating additional memory. Cap() int // NullN returns the number of null values in the array builder. NullN() int // AppendNull adds a new null value to the array being built. AppendNull() // AppendNulls adds new n null values to the array being built. AppendNulls(n int) // AppendEmptyValue adds a new zero value of the appropriate type AppendEmptyValue() // AppendEmptyValues adds new n zero values of the appropriate type AppendEmptyValues(n int) // AppendValueFromString adds a new value from a string. Inverse of array.ValueStr(i int) string AppendValueFromString(string) error // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. Reserve(n int) // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. Resize(n int) // NewArray creates a new array from the memory buffers used // by the builder and resets the Builder so it can be used to build // a new array. NewArray() arrow.Array // IsNull returns if a previously appended value at a given index is null or not. IsNull(i int) bool // SetNull sets the value at index i to null. SetNull(i int) UnsafeAppendBoolToBitmap(bool) init(capacity int) resize(newBits int, init func(int)) UnmarshalOne(*json.Decoder) error Unmarshal(*json.Decoder) error newData() *Data } // builder provides common functionality for managing the validity bitmap (nulls) when building arrays. type builder struct { refCount int64 mem memory.Allocator nullBitmap *memory.Buffer nulls int length int capacity int } // Retain increases the reference count by 1. // Retain may be called simultaneously from multiple goroutines. func (b *builder) Retain() { atomic.AddInt64(&b.refCount, 1) } // Len returns the number of elements in the array builder. func (b *builder) Len() int { return b.length } // Cap returns the total number of elements that can be stored without allocating additional memory. func (b *builder) Cap() int { return b.capacity } // NullN returns the number of null values in the array builder. func (b *builder) NullN() int { return b.nulls } func (b *builder) IsNull(i int) bool { return b.nullBitmap.Len() != 0 && bitutil.BitIsNotSet(b.nullBitmap.Bytes(), i) } func (b *builder) SetNull(i int) { if i < 0 || i >= b.length { panic("arrow/array: index out of range") } bitutil.ClearBit(b.nullBitmap.Bytes(), i) } func (b *builder) init(capacity int) { toAlloc := bitutil.CeilByte(capacity) / 8 b.nullBitmap = memory.NewResizableBuffer(b.mem) b.nullBitmap.Resize(toAlloc) b.capacity = capacity memory.Set(b.nullBitmap.Buf(), 0) } func (b *builder) reset() { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } b.nulls = 0 b.length = 0 b.capacity = 0 } func (b *builder) resize(newBits int, init func(int)) { if b.nullBitmap == nil { init(newBits) return } newBytesN := bitutil.CeilByte(newBits) / 8 oldBytesN := b.nullBitmap.Len() b.nullBitmap.Resize(newBytesN) b.capacity = newBits if oldBytesN < newBytesN { // TODO(sgc): necessary? memory.Set(b.nullBitmap.Buf()[oldBytesN:], 0) } if newBits < b.length { b.length = newBits b.nulls = newBits - bitutil.CountSetBits(b.nullBitmap.Buf(), 0, newBits) } } func (b *builder) reserve(elements int, resize func(int)) { if b.nullBitmap == nil { b.nullBitmap = memory.NewResizableBuffer(b.mem) } if b.length+elements > b.capacity { newCap := bitutil.NextPowerOf2(b.length + elements) resize(newCap) } } // unsafeAppendBoolsToBitmap appends the contents of valid to the validity bitmap. // As an optimization, if the valid slice is empty, the next length bits will be set to valid (not null). func (b *builder) unsafeAppendBoolsToBitmap(valid []bool, length int) { if len(valid) == 0 { b.unsafeSetValid(length) return } byteOffset := b.length / 8 bitOffset := byte(b.length % 8) nullBitmap := b.nullBitmap.Bytes() bitSet := nullBitmap[byteOffset] for _, v := range valid { if bitOffset == 8 { bitOffset = 0 nullBitmap[byteOffset] = bitSet byteOffset++ bitSet = nullBitmap[byteOffset] } if v { bitSet |= bitutil.BitMask[bitOffset] } else { bitSet &= bitutil.FlippedBitMask[bitOffset] b.nulls++ } bitOffset++ } if bitOffset != 0 { nullBitmap[byteOffset] = bitSet } b.length += len(valid) } // unsafeSetValid sets the next length bits to valid in the validity bitmap. func (b *builder) unsafeSetValid(length int) { padToByte := min(8-(b.length%8), length) if padToByte == 8 { padToByte = 0 } bits := b.nullBitmap.Bytes() for i := b.length; i < b.length+padToByte; i++ { bitutil.SetBit(bits, i) } start := (b.length + padToByte) / 8 fastLength := (length - padToByte) / 8 memory.Set(bits[start:start+fastLength], 0xff) newLength := b.length + length // trailing bytes for i := b.length + padToByte + (fastLength * 8); i < newLength; i++ { bitutil.SetBit(bits, i) } b.length = newLength } func (b *builder) UnsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } func NewBuilder(mem memory.Allocator, dtype arrow.DataType) Builder { // FIXME(sbinet): use a type switch on dtype instead? switch dtype.ID() { case arrow.NULL: return NewNullBuilder(mem) case arrow.BOOL: return NewBooleanBuilder(mem) case arrow.UINT8: return NewUint8Builder(mem) case arrow.INT8: return NewInt8Builder(mem) case arrow.UINT16: return NewUint16Builder(mem) case arrow.INT16: return NewInt16Builder(mem) case arrow.UINT32: return NewUint32Builder(mem) case arrow.INT32: return NewInt32Builder(mem) case arrow.UINT64: return NewUint64Builder(mem) case arrow.INT64: return NewInt64Builder(mem) case arrow.FLOAT16: return NewFloat16Builder(mem) case arrow.FLOAT32: return NewFloat32Builder(mem) case arrow.FLOAT64: return NewFloat64Builder(mem) case arrow.STRING: return NewStringBuilder(mem) case arrow.LARGE_STRING: return NewLargeStringBuilder(mem) case arrow.BINARY: return NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) case arrow.LARGE_BINARY: return NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) case arrow.FIXED_SIZE_BINARY: typ := dtype.(*arrow.FixedSizeBinaryType) return NewFixedSizeBinaryBuilder(mem, typ) case arrow.DATE32: return NewDate32Builder(mem) case arrow.DATE64: return NewDate64Builder(mem) case arrow.TIMESTAMP: typ := dtype.(*arrow.TimestampType) return NewTimestampBuilder(mem, typ) case arrow.TIME32: typ := dtype.(*arrow.Time32Type) return NewTime32Builder(mem, typ) case arrow.TIME64: typ := dtype.(*arrow.Time64Type) return NewTime64Builder(mem, typ) case arrow.INTERVAL_MONTHS: return NewMonthIntervalBuilder(mem) case arrow.INTERVAL_DAY_TIME: return NewDayTimeIntervalBuilder(mem) case arrow.INTERVAL_MONTH_DAY_NANO: return NewMonthDayNanoIntervalBuilder(mem) case arrow.DECIMAL32: if typ, ok := dtype.(*arrow.Decimal32Type); ok { return NewDecimal32Builder(mem, typ) } case arrow.DECIMAL64: if typ, ok := dtype.(*arrow.Decimal64Type); ok { return NewDecimal64Builder(mem, typ) } case arrow.DECIMAL128: if typ, ok := dtype.(*arrow.Decimal128Type); ok { return NewDecimal128Builder(mem, typ) } case arrow.DECIMAL256: if typ, ok := dtype.(*arrow.Decimal256Type); ok { return NewDecimal256Builder(mem, typ) } case arrow.LIST: typ := dtype.(*arrow.ListType) return NewListBuilderWithField(mem, typ.ElemField()) case arrow.STRUCT: typ := dtype.(*arrow.StructType) return NewStructBuilder(mem, typ) case arrow.SPARSE_UNION: typ := dtype.(*arrow.SparseUnionType) return NewSparseUnionBuilder(mem, typ) case arrow.DENSE_UNION: typ := dtype.(*arrow.DenseUnionType) return NewDenseUnionBuilder(mem, typ) case arrow.DICTIONARY: typ := dtype.(*arrow.DictionaryType) return NewDictionaryBuilder(mem, typ) case arrow.LARGE_LIST: typ := dtype.(*arrow.LargeListType) return NewLargeListBuilderWithField(mem, typ.ElemField()) case arrow.MAP: typ := dtype.(*arrow.MapType) return NewMapBuilderWithType(mem, typ) case arrow.LIST_VIEW: typ := dtype.(*arrow.ListViewType) return NewListViewBuilderWithField(mem, typ.ElemField()) case arrow.LARGE_LIST_VIEW: typ := dtype.(*arrow.LargeListViewType) return NewLargeListViewBuilderWithField(mem, typ.ElemField()) case arrow.EXTENSION: if custom, ok := dtype.(CustomExtensionBuilder); ok { return custom.NewBuilder(mem) } if typ, ok := dtype.(arrow.ExtensionType); ok { return NewExtensionBuilder(mem, typ) } panic(fmt.Errorf("arrow/array: invalid extension type: %T", dtype)) case arrow.FIXED_SIZE_LIST: typ := dtype.(*arrow.FixedSizeListType) return NewFixedSizeListBuilderWithField(mem, typ.Len(), typ.ElemField()) case arrow.DURATION: typ := dtype.(*arrow.DurationType) return NewDurationBuilder(mem, typ) case arrow.RUN_END_ENCODED: typ := dtype.(*arrow.RunEndEncodedType) return NewRunEndEncodedBuilder(mem, typ.RunEnds(), typ.Encoded()) case arrow.BINARY_VIEW: return NewBinaryViewBuilder(mem) case arrow.STRING_VIEW: return NewStringViewBuilder(mem) } panic(fmt.Errorf("arrow/array: unsupported builder for %T", dtype)) } arrow-go-18.2.0/arrow/array/builder_test.go000066400000000000000000000060061476434502500206370ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "testing" "github.com/apache/arrow-go/v18/arrow/internal/testing/tools" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) func TestBuilder_Init(t *testing.T) { type exp struct{ size int } tests := []struct { name string cap int exp exp }{ {"07 bits", 07, exp{size: 1}}, {"19 bits", 19, exp{size: 3}}, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { ab := &builder{mem: memory.NewGoAllocator()} ab.init(test.cap) assert.Equal(t, test.cap, ab.Cap(), "invalid capacity") assert.Equal(t, test.exp.size, ab.nullBitmap.Len(), "invalid length") }) } } func TestBuilder_UnsafeSetValid(t *testing.T) { ab := &builder{mem: memory.NewGoAllocator()} ab.init(32) ab.unsafeAppendBoolsToBitmap(tools.Bools(0, 0, 0, 0, 0), 5) assert.Equal(t, 5, ab.Len()) assert.Equal(t, []byte{0, 0, 0, 0}, ab.nullBitmap.Bytes()) ab.unsafeSetValid(17) assert.Equal(t, []byte{0xe0, 0xff, 0x3f, 0}, ab.nullBitmap.Bytes()) } func TestBuilder_resize(t *testing.T) { b := &builder{mem: memory.NewGoAllocator()} n := 64 b.init(n) assert.Equal(t, n, b.Cap()) assert.Equal(t, 0, b.Len()) b.UnsafeAppendBoolToBitmap(true) for i := 1; i < n; i++ { b.UnsafeAppendBoolToBitmap(false) } assert.Equal(t, n, b.Cap()) assert.Equal(t, n, b.Len()) assert.Equal(t, n-1, b.NullN()) n = 5 b.resize(n, b.init) assert.Equal(t, n, b.Len()) assert.Equal(t, n-1, b.NullN()) b.resize(32, b.init) assert.Equal(t, n, b.Len()) assert.Equal(t, n-1, b.NullN()) } func TestBuilder_IsNull(t *testing.T) { b := &builder{mem: memory.NewGoAllocator()} n := 32 b.init(n) assert.True(t, b.IsNull(0)) assert.True(t, b.IsNull(1)) for i := 0; i < n; i++ { b.UnsafeAppendBoolToBitmap(i%2 == 0) } for i := 0; i < n; i++ { assert.Equal(t, i%2 != 0, b.IsNull(i)) } } func TestBuilder_SetNull(t *testing.T) { b := &builder{mem: memory.NewGoAllocator()} n := 32 b.init(n) for i := 0; i < n; i++ { // Set everything to true b.UnsafeAppendBoolToBitmap(true) } for i := 0; i < n; i++ { if i%2 == 0 { // Set all even numbers to null b.SetNull(i) } } for i := 0; i < n; i++ { if i%2 == 0 { assert.True(t, b.IsNull(i)) } else { assert.False(t, b.IsNull(i)) } } } arrow-go-18.2.0/arrow/array/compare.go000066400000000000000000000546251476434502500176120ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "fmt" "math" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/float16" "github.com/apache/arrow-go/v18/internal/bitutils" ) // RecordEqual reports whether the two provided records are equal. func RecordEqual(left, right arrow.Record) bool { switch { case left.NumCols() != right.NumCols(): return false case left.NumRows() != right.NumRows(): return false } for i := range left.Columns() { lc := left.Column(i) rc := right.Column(i) if !Equal(lc, rc) { return false } } return true } // RecordApproxEqual reports whether the two provided records are approximately equal. // For non-floating point columns, it is equivalent to RecordEqual. func RecordApproxEqual(left, right arrow.Record, opts ...EqualOption) bool { switch { case left.NumCols() != right.NumCols(): return false case left.NumRows() != right.NumRows(): return false } opt := newEqualOption(opts...) for i := range left.Columns() { lc := left.Column(i) rc := right.Column(i) if !arrayApproxEqual(lc, rc, opt) { return false } } return true } // helper function to evaluate a function on two chunked object having possibly different // chunk layouts. the function passed in will be called for each corresponding slice of the // two chunked arrays and if the function returns false it will end the loop early. func chunkedBinaryApply(left, right *arrow.Chunked, fn func(left arrow.Array, lbeg, lend int64, right arrow.Array, rbeg, rend int64) bool) { var ( pos int64 length int64 = int64(left.Len()) leftIdx, rightIdx int leftPos, rightPos int64 ) for pos < length { var cleft, cright arrow.Array for { cleft, cright = left.Chunk(leftIdx), right.Chunk(rightIdx) if leftPos == int64(cleft.Len()) { leftPos = 0 leftIdx++ continue } if rightPos == int64(cright.Len()) { rightPos = 0 rightIdx++ continue } break } sz := int64(min(cleft.Len()-int(leftPos), cright.Len()-int(rightPos))) pos += sz if !fn(cleft, leftPos, leftPos+sz, cright, rightPos, rightPos+sz) { return } leftPos += sz rightPos += sz } } // ChunkedEqual reports whether two chunked arrays are equal regardless of their chunkings func ChunkedEqual(left, right *arrow.Chunked) bool { switch { case left == right: return true case left.Len() != right.Len(): return false case left.NullN() != right.NullN(): return false case !arrow.TypeEqual(left.DataType(), right.DataType()): return false } var isequal bool = true chunkedBinaryApply(left, right, func(left arrow.Array, lbeg, lend int64, right arrow.Array, rbeg, rend int64) bool { isequal = SliceEqual(left, lbeg, lend, right, rbeg, rend) return isequal }) return isequal } // ChunkedApproxEqual reports whether two chunked arrays are approximately equal regardless of their chunkings // for non-floating point arrays, this is equivalent to ChunkedEqual func ChunkedApproxEqual(left, right *arrow.Chunked, opts ...EqualOption) bool { switch { case left == right: return true case left.Len() != right.Len(): return false case left.NullN() != right.NullN(): return false case !arrow.TypeEqual(left.DataType(), right.DataType()): return false } var isequal bool chunkedBinaryApply(left, right, func(left arrow.Array, lbeg, lend int64, right arrow.Array, rbeg, rend int64) bool { isequal = SliceApproxEqual(left, lbeg, lend, right, rbeg, rend, opts...) return isequal }) return isequal } // TableEqual returns if the two tables have the same data in the same schema func TableEqual(left, right arrow.Table) bool { switch { case left.NumCols() != right.NumCols(): return false case left.NumRows() != right.NumRows(): return false } for i := 0; int64(i) < left.NumCols(); i++ { lc := left.Column(i) rc := right.Column(i) if !lc.Field().Equal(rc.Field()) { return false } if !ChunkedEqual(lc.Data(), rc.Data()) { return false } } return true } // TableEqual returns if the two tables have the approximately equal data in the same schema func TableApproxEqual(left, right arrow.Table, opts ...EqualOption) bool { switch { case left.NumCols() != right.NumCols(): return false case left.NumRows() != right.NumRows(): return false } for i := 0; int64(i) < left.NumCols(); i++ { lc := left.Column(i) rc := right.Column(i) if !lc.Field().Equal(rc.Field()) { return false } if !ChunkedApproxEqual(lc.Data(), rc.Data(), opts...) { return false } } return true } // Equal reports whether the two provided arrays are equal. func Equal(left, right arrow.Array) bool { switch { case !baseArrayEqual(left, right): return false case left.Len() == 0: return true case left.NullN() == left.Len(): return true } // at this point, we know both arrays have same type, same length, same number of nulls // and nulls at the same place. // compare the values. switch l := left.(type) { case *Null: return true case *Boolean: r := right.(*Boolean) return arrayEqualBoolean(l, r) case *FixedSizeBinary: r := right.(*FixedSizeBinary) return arrayEqualFixedSizeBinary(l, r) case *Binary: r := right.(*Binary) return arrayEqualBinary(l, r) case *String: r := right.(*String) return arrayEqualString(l, r) case *LargeBinary: r := right.(*LargeBinary) return arrayEqualLargeBinary(l, r) case *LargeString: r := right.(*LargeString) return arrayEqualLargeString(l, r) case *BinaryView: r := right.(*BinaryView) return arrayEqualBinaryView(l, r) case *StringView: r := right.(*StringView) return arrayEqualStringView(l, r) case *Int8: r := right.(*Int8) return arrayEqualInt8(l, r) case *Int16: r := right.(*Int16) return arrayEqualInt16(l, r) case *Int32: r := right.(*Int32) return arrayEqualInt32(l, r) case *Int64: r := right.(*Int64) return arrayEqualInt64(l, r) case *Uint8: r := right.(*Uint8) return arrayEqualUint8(l, r) case *Uint16: r := right.(*Uint16) return arrayEqualUint16(l, r) case *Uint32: r := right.(*Uint32) return arrayEqualUint32(l, r) case *Uint64: r := right.(*Uint64) return arrayEqualUint64(l, r) case *Float16: r := right.(*Float16) return arrayEqualFloat16(l, r) case *Float32: r := right.(*Float32) return arrayEqualFloat32(l, r) case *Float64: r := right.(*Float64) return arrayEqualFloat64(l, r) case *Decimal32: r := right.(*Decimal32) return arrayEqualDecimal(l, r) case *Decimal64: r := right.(*Decimal64) return arrayEqualDecimal(l, r) case *Decimal128: r := right.(*Decimal128) return arrayEqualDecimal(l, r) case *Decimal256: r := right.(*Decimal256) return arrayEqualDecimal(l, r) case *Date32: r := right.(*Date32) return arrayEqualDate32(l, r) case *Date64: r := right.(*Date64) return arrayEqualDate64(l, r) case *Time32: r := right.(*Time32) return arrayEqualTime32(l, r) case *Time64: r := right.(*Time64) return arrayEqualTime64(l, r) case *Timestamp: r := right.(*Timestamp) return arrayEqualTimestamp(l, r) case *List: r := right.(*List) return arrayEqualList(l, r) case *LargeList: r := right.(*LargeList) return arrayEqualLargeList(l, r) case *ListView: r := right.(*ListView) return arrayEqualListView(l, r) case *LargeListView: r := right.(*LargeListView) return arrayEqualLargeListView(l, r) case *FixedSizeList: r := right.(*FixedSizeList) return arrayEqualFixedSizeList(l, r) case *Struct: r := right.(*Struct) return arrayEqualStruct(l, r) case *MonthInterval: r := right.(*MonthInterval) return arrayEqualMonthInterval(l, r) case *DayTimeInterval: r := right.(*DayTimeInterval) return arrayEqualDayTimeInterval(l, r) case *MonthDayNanoInterval: r := right.(*MonthDayNanoInterval) return arrayEqualMonthDayNanoInterval(l, r) case *Duration: r := right.(*Duration) return arrayEqualDuration(l, r) case *Map: r := right.(*Map) return arrayEqualMap(l, r) case ExtensionArray: r := right.(ExtensionArray) return arrayEqualExtension(l, r) case *Dictionary: r := right.(*Dictionary) return arrayEqualDict(l, r) case *SparseUnion: r := right.(*SparseUnion) return arraySparseUnionEqual(l, r) case *DenseUnion: r := right.(*DenseUnion) return arrayDenseUnionEqual(l, r) case *RunEndEncoded: r := right.(*RunEndEncoded) return arrayRunEndEncodedEqual(l, r) default: panic(fmt.Errorf("arrow/array: unknown array type %T", l)) } } // SliceEqual reports whether slices left[lbeg:lend] and right[rbeg:rend] are equal. func SliceEqual(left arrow.Array, lbeg, lend int64, right arrow.Array, rbeg, rend int64) bool { l := NewSlice(left, lbeg, lend) defer l.Release() r := NewSlice(right, rbeg, rend) defer r.Release() return Equal(l, r) } // SliceApproxEqual reports whether slices left[lbeg:lend] and right[rbeg:rend] are approximately equal. func SliceApproxEqual(left arrow.Array, lbeg, lend int64, right arrow.Array, rbeg, rend int64, opts ...EqualOption) bool { opt := newEqualOption(opts...) return sliceApproxEqual(left, lbeg, lend, right, rbeg, rend, opt) } func sliceApproxEqual(left arrow.Array, lbeg, lend int64, right arrow.Array, rbeg, rend int64, opt equalOption) bool { l := NewSlice(left, lbeg, lend) defer l.Release() r := NewSlice(right, rbeg, rend) defer r.Release() return arrayApproxEqual(l, r, opt) } const defaultAbsoluteTolerance = 1e-5 type equalOption struct { atol float64 // absolute tolerance nansEq bool // whether NaNs are considered equal. unorderedMapKeys bool // whether maps are allowed to have different entries order } func (eq equalOption) f16(f1, f2 float16.Num) bool { v1 := float64(f1.Float32()) v2 := float64(f2.Float32()) switch { case eq.nansEq: return math.Abs(v1-v2) <= eq.atol || (math.IsNaN(v1) && math.IsNaN(v2)) default: return math.Abs(v1-v2) <= eq.atol } } func (eq equalOption) f32(f1, f2 float32) bool { v1 := float64(f1) v2 := float64(f2) switch { case eq.nansEq: return v1 == v2 || math.Abs(v1-v2) <= eq.atol || (math.IsNaN(v1) && math.IsNaN(v2)) default: return v1 == v2 || math.Abs(v1-v2) <= eq.atol } } func (eq equalOption) f64(v1, v2 float64) bool { switch { case eq.nansEq: return v1 == v2 || math.Abs(v1-v2) <= eq.atol || (math.IsNaN(v1) && math.IsNaN(v2)) default: return v1 == v2 || math.Abs(v1-v2) <= eq.atol } } func newEqualOption(opts ...EqualOption) equalOption { eq := equalOption{ atol: defaultAbsoluteTolerance, nansEq: false, } for _, opt := range opts { opt(&eq) } return eq } // EqualOption is a functional option type used to configure how Records and Arrays are compared. type EqualOption func(*equalOption) // WithNaNsEqual configures the comparison functions so that NaNs are considered equal. func WithNaNsEqual(v bool) EqualOption { return func(o *equalOption) { o.nansEq = v } } // WithAbsTolerance configures the comparison functions so that 2 floating point values // v1 and v2 are considered equal if |v1-v2| <= atol. func WithAbsTolerance(atol float64) EqualOption { return func(o *equalOption) { o.atol = atol } } // WithUnorderedMapKeys configures the comparison functions so that Map with different entries order are considered equal. func WithUnorderedMapKeys(v bool) EqualOption { return func(o *equalOption) { o.unorderedMapKeys = v } } // ApproxEqual reports whether the two provided arrays are approximately equal. // For non-floating point arrays, it is equivalent to Equal. func ApproxEqual(left, right arrow.Array, opts ...EqualOption) bool { opt := newEqualOption(opts...) return arrayApproxEqual(left, right, opt) } func arrayApproxEqual(left, right arrow.Array, opt equalOption) bool { switch { case !baseArrayEqual(left, right): return false case left.Len() == 0: return true case left.NullN() == left.Len(): return true } // at this point, we know both arrays have same type, same length, same number of nulls // and nulls at the same place. // compare the values. switch l := left.(type) { case *Null: return true case *Boolean: r := right.(*Boolean) return arrayEqualBoolean(l, r) case *FixedSizeBinary: r := right.(*FixedSizeBinary) return arrayEqualFixedSizeBinary(l, r) case *Binary: r := right.(*Binary) return arrayEqualBinary(l, r) case *String: r := right.(*String) return arrayApproxEqualString(l, r) case *LargeBinary: r := right.(*LargeBinary) return arrayEqualLargeBinary(l, r) case *LargeString: r := right.(*LargeString) return arrayApproxEqualLargeString(l, r) case *BinaryView: r := right.(*BinaryView) return arrayEqualBinaryView(l, r) case *StringView: r := right.(*StringView) return arrayApproxEqualStringView(l, r) case *Int8: r := right.(*Int8) return arrayEqualInt8(l, r) case *Int16: r := right.(*Int16) return arrayEqualInt16(l, r) case *Int32: r := right.(*Int32) return arrayEqualInt32(l, r) case *Int64: r := right.(*Int64) return arrayEqualInt64(l, r) case *Uint8: r := right.(*Uint8) return arrayEqualUint8(l, r) case *Uint16: r := right.(*Uint16) return arrayEqualUint16(l, r) case *Uint32: r := right.(*Uint32) return arrayEqualUint32(l, r) case *Uint64: r := right.(*Uint64) return arrayEqualUint64(l, r) case *Float16: r := right.(*Float16) return arrayApproxEqualFloat16(l, r, opt) case *Float32: r := right.(*Float32) return arrayApproxEqualFloat32(l, r, opt) case *Float64: r := right.(*Float64) return arrayApproxEqualFloat64(l, r, opt) case *Decimal32: r := right.(*Decimal32) return arrayEqualDecimal(l, r) case *Decimal64: r := right.(*Decimal64) return arrayEqualDecimal(l, r) case *Decimal128: r := right.(*Decimal128) return arrayEqualDecimal(l, r) case *Decimal256: r := right.(*Decimal256) return arrayEqualDecimal(l, r) case *Date32: r := right.(*Date32) return arrayEqualDate32(l, r) case *Date64: r := right.(*Date64) return arrayEqualDate64(l, r) case *Time32: r := right.(*Time32) return arrayEqualTime32(l, r) case *Time64: r := right.(*Time64) return arrayEqualTime64(l, r) case *Timestamp: r := right.(*Timestamp) return arrayEqualTimestamp(l, r) case *List: r := right.(*List) return arrayApproxEqualList(l, r, opt) case *LargeList: r := right.(*LargeList) return arrayApproxEqualLargeList(l, r, opt) case *ListView: r := right.(*ListView) return arrayApproxEqualListView(l, r, opt) case *LargeListView: r := right.(*LargeListView) return arrayApproxEqualLargeListView(l, r, opt) case *FixedSizeList: r := right.(*FixedSizeList) return arrayApproxEqualFixedSizeList(l, r, opt) case *Struct: r := right.(*Struct) return arrayApproxEqualStruct(l, r, opt) case *MonthInterval: r := right.(*MonthInterval) return arrayEqualMonthInterval(l, r) case *DayTimeInterval: r := right.(*DayTimeInterval) return arrayEqualDayTimeInterval(l, r) case *MonthDayNanoInterval: r := right.(*MonthDayNanoInterval) return arrayEqualMonthDayNanoInterval(l, r) case *Duration: r := right.(*Duration) return arrayEqualDuration(l, r) case *Map: r := right.(*Map) if opt.unorderedMapKeys { return arrayApproxEqualMap(l, r, opt) } return arrayApproxEqualList(l.List, r.List, opt) case *Dictionary: r := right.(*Dictionary) return arrayApproxEqualDict(l, r, opt) case ExtensionArray: r := right.(ExtensionArray) return arrayApproxEqualExtension(l, r, opt) case *SparseUnion: r := right.(*SparseUnion) return arraySparseUnionApproxEqual(l, r, opt) case *DenseUnion: r := right.(*DenseUnion) return arrayDenseUnionApproxEqual(l, r, opt) case *RunEndEncoded: r := right.(*RunEndEncoded) return arrayRunEndEncodedApproxEqual(l, r, opt) default: panic(fmt.Errorf("arrow/array: unknown array type %T", l)) } } func baseArrayEqual(left, right arrow.Array) bool { switch { case left.Len() != right.Len(): return false case left.NullN() != right.NullN(): return false case !arrow.TypeEqual(left.DataType(), right.DataType()): // We do not check for metadata as in the C++ implementation. return false case !validityBitmapEqual(left, right): return false } return true } func validityBitmapEqual(left, right arrow.Array) bool { // TODO(alexandreyc): make it faster by comparing byte slices of the validity bitmap? n := left.Len() if n != right.Len() { return false } for i := 0; i < n; i++ { if left.IsNull(i) != right.IsNull(i) { return false } } return true } func arrayApproxEqualString(left, right *String) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if stripNulls(left.Value(i)) != stripNulls(right.Value(i)) { return false } } return true } func arrayApproxEqualLargeString(left, right *LargeString) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if stripNulls(left.Value(i)) != stripNulls(right.Value(i)) { return false } } return true } func arrayApproxEqualStringView(left, right *StringView) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if stripNulls(left.Value(i)) != stripNulls(right.Value(i)) { return false } } return true } func arrayApproxEqualFloat16(left, right *Float16, opt equalOption) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if !opt.f16(left.Value(i), right.Value(i)) { return false } } return true } func arrayApproxEqualFloat32(left, right *Float32, opt equalOption) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if !opt.f32(left.Value(i), right.Value(i)) { return false } } return true } func arrayApproxEqualFloat64(left, right *Float64, opt equalOption) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if !opt.f64(left.Value(i), right.Value(i)) { return false } } return true } func arrayApproxEqualList(left, right *List, opt equalOption) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } o := func() bool { l := left.newListValue(i) defer l.Release() r := right.newListValue(i) defer r.Release() return arrayApproxEqual(l, r, opt) }() if !o { return false } } return true } func arrayApproxEqualLargeList(left, right *LargeList, opt equalOption) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } o := func() bool { l := left.newListValue(i) defer l.Release() r := right.newListValue(i) defer r.Release() return arrayApproxEqual(l, r, opt) }() if !o { return false } } return true } func arrayApproxEqualListView(left, right *ListView, opt equalOption) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } o := func() bool { l := left.newListValue(i) defer l.Release() r := right.newListValue(i) defer r.Release() return arrayApproxEqual(l, r, opt) }() if !o { return false } } return true } func arrayApproxEqualLargeListView(left, right *LargeListView, opt equalOption) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } o := func() bool { l := left.newListValue(i) defer l.Release() r := right.newListValue(i) defer r.Release() return arrayApproxEqual(l, r, opt) }() if !o { return false } } return true } func arrayApproxEqualFixedSizeList(left, right *FixedSizeList, opt equalOption) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } o := func() bool { l := left.newListValue(i) defer l.Release() r := right.newListValue(i) defer r.Release() return arrayApproxEqual(l, r, opt) }() if !o { return false } } return true } func arrayApproxEqualStruct(left, right *Struct, opt equalOption) bool { return bitutils.VisitSetBitRuns( left.NullBitmapBytes(), int64(left.Offset()), int64(left.Len()), approxEqualStructRun(left, right, opt), ) == nil } func approxEqualStructRun(left, right *Struct, opt equalOption) bitutils.VisitFn { return func(pos int64, length int64) error { for i := range left.fields { if !sliceApproxEqual(left.fields[i], pos, pos+length, right.fields[i], pos, pos+length, opt) { return arrow.ErrInvalid } } return nil } } // arrayApproxEqualMap doesn't care about the order of keys (in Go map traversal order is undefined) func arrayApproxEqualMap(left, right *Map, opt equalOption) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if !arrayApproxEqualSingleMapEntry(left.newListValue(i).(*Struct), right.newListValue(i).(*Struct), opt) { return false } } return true } // arrayApproxEqualSingleMapEntry is a helper function that checks if a single entry pair is approx equal. // Basically, it doesn't care about key order. // structs passed will be released func arrayApproxEqualSingleMapEntry(left, right *Struct, opt equalOption) bool { defer left.Release() defer right.Release() // we don't compare the validity bitmap, but we want other checks from baseArrayEqual switch { case left.Len() != right.Len(): return false case left.NullN() != right.NullN(): return false case !arrow.TypeEqual(left.DataType(), right.DataType()): // We do not check for metadata as in the C++ implementation. return false case left.NullN() == left.Len(): return true } used := make(map[int]bool, right.Len()) for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } found := false lBeg, lEnd := int64(i), int64(i+1) for j := 0; j < right.Len(); j++ { if used[j] { continue } if right.IsNull(j) { used[j] = true continue } rBeg, rEnd := int64(j), int64(j+1) // check keys (field 0) if !sliceApproxEqual(left.Field(0), lBeg, lEnd, right.Field(0), rBeg, rEnd, opt) { continue } // only now check the values if sliceApproxEqual(left.Field(1), lBeg, lEnd, right.Field(1), rBeg, rEnd, opt) { found = true used[j] = true break } } if !found { return false } } return len(used) == right.Len() } arrow-go-18.2.0/arrow/array/compare_test.go000066400000000000000000000535001476434502500206400ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "fmt" "math" "sort" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/float16" "github.com/apache/arrow-go/v18/arrow/internal/arrdata" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) func TestArrayEqual(t *testing.T) { for name, recs := range arrdata.Records { t.Run(name, func(t *testing.T) { rec := recs[0] schema := rec.Schema() for i, col := range rec.Columns() { t.Run(schema.Field(i).Name, func(t *testing.T) { arr := col if !array.Equal(arr, arr) { t.Fatalf("identical arrays should compare equal:\narray=%v", arr) } sub1 := array.NewSlice(arr, 1, int64(arr.Len())) defer sub1.Release() sub2 := array.NewSlice(arr, 0, int64(arr.Len()-1)) defer sub2.Release() if array.Equal(sub1, sub2) && name != "nulls" { t.Fatalf("non-identical arrays should not compare equal:\nsub1=%v\nsub2=%v\narrf=%v\n", sub1, sub2, arr) } }) } }) } } func TestArraySliceEqual(t *testing.T) { for name, recs := range arrdata.Records { t.Run(name, func(t *testing.T) { rec := recs[0] schema := rec.Schema() for i, col := range rec.Columns() { t.Run(schema.Field(i).Name, func(t *testing.T) { arr := col if !array.SliceEqual( arr, 0, int64(arr.Len()), arr, 0, int64(arr.Len()), ) { t.Fatalf("identical slices should compare equal:\narray=%v", arr) } sub1 := array.NewSlice(arr, 1, int64(arr.Len())) defer sub1.Release() sub2 := array.NewSlice(arr, 0, int64(arr.Len()-1)) defer sub2.Release() if array.SliceEqual(sub1, 0, int64(sub1.Len()), sub2, 0, int64(sub2.Len())) && name != "nulls" { t.Fatalf("non-identical slices should not compare equal:\nsub1=%v\nsub2=%v\narrf=%v\n", sub1, sub2, arr) } }) } }) } } func TestArrayApproxEqual(t *testing.T) { for name, recs := range arrdata.Records { t.Run(name, func(t *testing.T) { rec := recs[0] schema := rec.Schema() for i, col := range rec.Columns() { t.Run(schema.Field(i).Name, func(t *testing.T) { arr := col if !array.ApproxEqual(arr, arr) { t.Fatalf("identical arrays should compare equal:\narray=%v", arr) } sub1 := array.NewSlice(arr, 1, int64(arr.Len())) defer sub1.Release() sub2 := array.NewSlice(arr, 0, int64(arr.Len()-1)) defer sub2.Release() if array.ApproxEqual(sub1, sub2) && name != "nulls" { t.Fatalf("non-identical arrays should not compare equal:\nsub1=%v\nsub2=%v\narrf=%v\n", sub1, sub2, arr) } }) } }) } } func TestArrayApproxEqualStrings(t *testing.T) { for _, tc := range []struct { name string a1 interface{} a2 interface{} want bool }{ { name: "string", a1: []string{"a", "b", "c", "d", "e", "f"}, a2: []string{"a", "b", "c", "d", "e", "f"}, want: true, }, { name: "string", a1: []string{"a", "b\x00"}, a2: []string{"a", "b"}, want: true, }, { name: "string", a1: []string{"a", "b\x00"}, a2: []string{"a\x00", "b"}, want: true, }, { name: "equal large strings", a1: []string{"a", "b", "c", "d", "e", "f"}, a2: []string{"a", "b", "c", "d", "e", "f"}, want: true, }, { name: "equal large strings with nulls", a1: []string{"a", "b\x00"}, a2: []string{"a", "b"}, want: true, }, { name: "equal large strings with nulls in both", a1: []string{"Apache", "Arrow\x00"}, a2: []string{"Apache\x00", "Arrow"}, want: true, }, { name: "equal string views", a1: []string{"a", "b", "c", "d", "e", "f"}, a2: []string{"a", "b", "c", "d", "e", "f"}, want: true, }, { name: "equal string views with nulls", a1: []string{"Apache", "Arrow\x00"}, a2: []string{"Apache", "Arrow"}, want: true, }, { name: "equal string views with nulls in both", a1: []string{"Apache", "Arrow\x00"}, a2: []string{"Apache\x00", "Arrow"}, want: true, }, } { t.Run(tc.name, func(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) var a1, a2 arrow.Array switch tc.name { case "equal large strings", "equal large strings with nulls", "equal large strings with nulls in both": a1 = arrayOfLargeString(mem, tc.a1.([]string), nil) a2 = arrayOfLargeString(mem, tc.a2.([]string), nil) case "equal string views", "equal string views with nulls", "equal string views with nulls in both": a1 = arrayOfStringView(mem, tc.a1.([]string), nil) a2 = arrayOfStringView(mem, tc.a2.([]string), nil) default: a1 = arrayOf(mem, tc.a1, nil) a2 = arrayOf(mem, tc.a2, nil) } defer a1.Release() defer a2.Release() if got, want := array.ApproxEqual(a1, a2), tc.want; got != want { t.Fatalf("invalid comparison: got=%v, want=%v\na1: %v\na2: %v\n", got, want, a1, a2) } }) } } func TestArrayApproxEqualFloats(t *testing.T) { f16sFrom := func(vs []float64) []float16.Num { o := make([]float16.Num, len(vs)) for i, v := range vs { o[i] = float16.New(float32(v)) } return o } for _, tc := range []struct { name string a1 interface{} a2 interface{} opts []array.EqualOption want bool }{ { name: "f16", a1: f16sFrom([]float64{1, 2, 3, 4, 5, 6}), a2: f16sFrom([]float64{1, 2, 3, 4, 5, 6}), want: true, }, { name: "f16-no-tol", a1: f16sFrom([]float64{1, 2, 3, 4, 5, 6}), a2: f16sFrom([]float64{1, 2, 3, 4, 5, 7}), want: false, }, { name: "f16-tol-ok", a1: f16sFrom([]float64{1, 2, 3, 4, 5, 6}), a2: f16sFrom([]float64{1, 2, 3, 4, 5, 7}), opts: []array.EqualOption{array.WithAbsTolerance(1)}, want: true, }, { name: "f16-nan", a1: f16sFrom([]float64{1, 2, 3, 4, 5, 6}), a2: f16sFrom([]float64{1, 2, 3, 4, 5, math.NaN()}), want: false, }, { name: "f16-nan-not", a1: f16sFrom([]float64{1, 2, 3, 4, 5, 6}), a2: f16sFrom([]float64{1, 2, 3, 4, 5, math.NaN()}), opts: []array.EqualOption{array.WithNaNsEqual(true)}, want: false, }, { name: "f16-nan-ok", a1: f16sFrom([]float64{1, 2, 3, 4, 5, math.NaN()}), a2: f16sFrom([]float64{1, 2, 3, 4, 5, math.NaN()}), opts: []array.EqualOption{array.WithNaNsEqual(true)}, want: true, }, { name: "f16-nan-no-tol", a1: f16sFrom([]float64{1, 2, 3, 4, 5, math.NaN()}), a2: f16sFrom([]float64{1, 2, 3, 4, 6, math.NaN()}), opts: []array.EqualOption{array.WithNaNsEqual(true)}, want: false, }, { name: "f16-nan-tol", a1: f16sFrom([]float64{1, 2, 3, 4, 5, math.NaN()}), a2: f16sFrom([]float64{1, 2, 3, 4, 6, math.NaN()}), opts: []array.EqualOption{array.WithNaNsEqual(true), array.WithAbsTolerance(1)}, want: true, }, { name: "f32", a1: []float32{1, 2, 3, 4, 5, 6}, a2: []float32{1, 2, 3, 4, 5, 6}, want: true, }, { name: "f32-no-tol", a1: []float32{1, 2, 3, 4, 5, 6}, a2: []float32{1, 2, 3, 4, 5, 7}, want: false, }, { name: "f32-tol-ok", a1: []float32{1, 2, 3, 4, 5, 6}, a2: []float32{1, 2, 3, 4, 5, 7}, opts: []array.EqualOption{array.WithAbsTolerance(1)}, want: true, }, { name: "f32-nan", a1: []float32{1, 2, 3, 4, 5, 6}, a2: []float32{1, 2, 3, 4, 5, float32(math.NaN())}, want: false, }, { name: "f32-nan-not", a1: []float32{1, 2, 3, 4, 5, 6}, a2: []float32{1, 2, 3, 4, 5, float32(math.NaN())}, opts: []array.EqualOption{array.WithNaNsEqual(true)}, want: false, }, { name: "f32-nan-ok", a1: []float32{1, 2, 3, 4, 5, float32(math.NaN())}, a2: []float32{1, 2, 3, 4, 5, float32(math.NaN())}, opts: []array.EqualOption{array.WithNaNsEqual(true)}, want: true, }, { name: "f32-nan-no-tol", a1: []float32{1, 2, 3, 4, 5, float32(math.NaN())}, a2: []float32{1, 2, 3, 4, 6, float32(math.NaN())}, opts: []array.EqualOption{array.WithNaNsEqual(true)}, want: false, }, { name: "f32-nan-tol", a1: []float32{1, 2, 3, 4, 5, float32(math.NaN())}, a2: []float32{1, 2, 3, 4, 6, float32(math.NaN())}, opts: []array.EqualOption{array.WithNaNsEqual(true), array.WithAbsTolerance(1)}, want: true, }, { name: "f64", a1: []float64{1, 2, 3, 4, 5, 6}, a2: []float64{1, 2, 3, 4, 5, 6}, want: true, }, { name: "f64-no-tol", a1: []float64{1, 2, 3, 4, 5, 6}, a2: []float64{1, 2, 3, 4, 5, 7}, want: false, }, { name: "f64-tol-ok", a1: []float64{1, 2, 3, 4, 5, 6}, a2: []float64{1, 2, 3, 4, 5, 7}, opts: []array.EqualOption{array.WithAbsTolerance(1)}, want: true, }, { name: "f64-nan", a1: []float64{1, 2, 3, 4, 5, 6}, a2: []float64{1, 2, 3, 4, 5, math.NaN()}, want: false, }, { name: "f64-nan-not", a1: []float64{1, 2, 3, 4, 5, 6}, a2: []float64{1, 2, 3, 4, 5, math.NaN()}, opts: []array.EqualOption{array.WithNaNsEqual(true)}, want: false, }, { name: "f64-nan-ok", a1: []float64{1, 2, 3, 4, 5, math.NaN()}, a2: []float64{1, 2, 3, 4, 5, math.NaN()}, opts: []array.EqualOption{array.WithNaNsEqual(true)}, want: true, }, { name: "f64-nan-no-tol", a1: []float64{1, 2, 3, 4, 5, math.NaN()}, a2: []float64{1, 2, 3, 4, 6, math.NaN()}, opts: []array.EqualOption{array.WithNaNsEqual(true)}, want: false, }, { name: "f64-nan-tol", a1: []float64{1, 2, 3, 4, 5, math.NaN()}, a2: []float64{1, 2, 3, 4, 6, math.NaN()}, opts: []array.EqualOption{array.WithNaNsEqual(true), array.WithAbsTolerance(1)}, want: true, }, } { t.Run(tc.name, func(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) a1 := arrayOf(mem, tc.a1, nil) defer a1.Release() a2 := arrayOf(mem, tc.a2, nil) defer a2.Release() if got, want := array.ApproxEqual(a1, a2, tc.opts...), tc.want; got != want { t.Fatalf("invalid comparison: got=%v, want=%v\na1: %v\na2: %v\n", got, want, a1, a2) } }) } } func testStringMap(mem memory.Allocator, m map[string]string, keys []string) *array.Map { dt := arrow.MapOf(arrow.BinaryTypes.String, arrow.BinaryTypes.String) builder := array.NewMapBuilderWithType(mem, dt) defer builder.Release() key, item := builder.KeyBuilder().(*array.StringBuilder), builder.ItemBuilder().(*array.StringBuilder) builder.AppendNull() builder.Append(true) for _, k := range keys { key.Append(k) v, ok := m[k] if !ok { item.AppendNull() continue } item.Append(v) } return builder.NewMapArray() } func TestArrayApproxEqualMaps(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) t.Run("different order", func(t *testing.T) { m := map[string]string{"x": "x", "y": "y", "z": "z"} keys := []string{"z", "y", "x", "null"} a := testStringMap(mem, m, keys) defer a.Release() asc := make([]string, len(keys)) copy(asc, keys) sort.Strings(asc) assert.NotEqual(t, keys, asc) b := testStringMap(mem, m, asc) defer b.Release() assert.False(t, array.ApproxEqual(a, b)) assert.True(t, array.ApproxEqual(a, b, array.WithUnorderedMapKeys(true))) }) t.Run("extra left value", func(t *testing.T) { m := map[string]string{"x": "x", "y": "y", "z": "z", "extra": "extra"} aKeys := []string{"z", "y", "x", "extra"} a := testStringMap(mem, m, aKeys) defer a.Release() bKeys := []string{"z", "y", "x"} b := testStringMap(mem, m, bKeys) defer b.Release() assert.NotEqual(t, aKeys, bKeys) assert.Equal(t, a.NullN(), b.NullN()) assert.False(t, array.ApproxEqual(a, b)) assert.False(t, array.ApproxEqual(a, b, array.WithUnorderedMapKeys(true))) }) t.Run("extra right value", func(t *testing.T) { m := map[string]string{"x": "x", "y": "y", "z": "z", "extra": "extra"} aKeys := []string{"z", "y", "x"} a := testStringMap(mem, m, aKeys) defer a.Release() bKeys := []string{"z", "y", "x", "extra"} b := testStringMap(mem, m, bKeys) defer b.Release() assert.NotEqual(t, aKeys, bKeys) assert.Equal(t, a.NullN(), b.NullN()) assert.False(t, array.ApproxEqual(a, b)) assert.False(t, array.ApproxEqual(a, b, array.WithUnorderedMapKeys(true))) }) t.Run("unmatched value", func(t *testing.T) { m := map[string]string{"x": "x", "y": "y", "z": "z", "extra": "extra", "extra2": "extra"} aKeys := []string{"z", "y", "x", "extra"} a := testStringMap(mem, m, aKeys) defer a.Release() bKeys := []string{"z", "y", "x", "extra2"} b := testStringMap(mem, m, bKeys) defer b.Release() assert.NotEqual(t, aKeys, bKeys) assert.Equal(t, a.NullN(), b.NullN()) assert.False(t, array.ApproxEqual(a, b)) assert.False(t, array.ApproxEqual(a, b, array.WithUnorderedMapKeys(true))) }) t.Run("different value", func(t *testing.T) { m := map[string]string{"x": "x", "y": "y", "z": "z", "extra": "extra"} keys := []string{"z", "y", "x", "extra"} a := testStringMap(mem, m, keys) defer a.Release() m["extra"] = "different" b := testStringMap(mem, m, keys) defer b.Release() assert.Equal(t, a.NullN(), b.NullN()) assert.False(t, array.ApproxEqual(a, b)) assert.False(t, array.ApproxEqual(a, b, array.WithUnorderedMapKeys(true))) }) } func arrayOf(mem memory.Allocator, a interface{}, valids []bool) arrow.Array { if mem == nil { mem = memory.NewGoAllocator() } switch a := a.(type) { case []float16.Num: bldr := array.NewFloat16Builder(mem) defer bldr.Release() bldr.AppendValues(a, valids) return bldr.NewFloat16Array() case []float32: bldr := array.NewFloat32Builder(mem) defer bldr.Release() bldr.AppendValues(a, valids) return bldr.NewFloat32Array() case []float64: bldr := array.NewFloat64Builder(mem) defer bldr.Release() bldr.AppendValues(a, valids) return bldr.NewFloat64Array() case []string: bldr := array.NewStringBuilder(mem) defer bldr.Release() bldr.AppendValues(a, valids) return bldr.NewStringArray() default: panic(fmt.Errorf("arrdata: invalid data slice type %T", a)) } } func arrayOfLargeString(mem memory.Allocator, a []string, valids []bool) arrow.Array { bldr := array.NewLargeStringBuilder(mem) defer bldr.Release() bldr.AppendValues(a, valids) return bldr.NewLargeStringArray() } func arrayOfStringView(mem memory.Allocator, a []string, valids []bool) arrow.Array { bldr := array.NewStringViewBuilder(mem) defer bldr.Release() bldr.AppendValues(a, valids) return bldr.NewStringViewArray() } func TestArrayEqualBaseArray(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) b1 := array.NewBooleanBuilder(mem) defer b1.Release() b1.Append(true) a1 := b1.NewBooleanArray() defer a1.Release() b2 := array.NewBooleanBuilder(mem) defer b2.Release() a2 := b2.NewBooleanArray() defer a2.Release() if array.Equal(a1, a2) { t.Errorf("two arrays with different lengths must not be equal") } b3 := array.NewBooleanBuilder(mem) defer b3.Release() b3.AppendNull() a3 := b3.NewBooleanArray() defer a3.Release() if array.Equal(a1, a3) { t.Errorf("two arrays with different number of null values must not be equal") } b4 := array.NewInt32Builder(mem) defer b4.Release() b4.Append(0) a4 := b4.NewInt32Array() defer a4.Release() if array.Equal(a1, a4) { t.Errorf("two arrays with different types must not be equal") } b5 := array.NewBooleanBuilder(mem) defer b5.Release() b5.AppendNull() b5.Append(true) a5 := b5.NewBooleanArray() defer a5.Release() b1.AppendNull() if array.Equal(a1, a5) { t.Errorf("two arrays with different validity bitmaps must not be equal") } } func TestArrayEqualNull(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) null := array.NewNull(0) defer null.Release() if !array.Equal(null, null) { t.Fatalf("identical arrays should compare equal") } n0 := array.NewNull(10) defer n0.Release() n1 := array.NewNull(10) defer n1.Release() if !array.Equal(n0, n0) { t.Fatalf("identical arrays should compare equal") } if !array.Equal(n1, n1) { t.Fatalf("identical arrays should compare equal") } if !array.Equal(n0, n1) || !array.Equal(n1, n0) { t.Fatalf("n0 and n1 should compare equal") } sub07 := array.NewSlice(n0, 0, 7) defer sub07.Release() sub08 := array.NewSlice(n0, 0, 8) defer sub08.Release() sub19 := array.NewSlice(n0, 1, 9) defer sub19.Release() if !array.Equal(sub08, sub19) { t.Fatalf("sub08 and sub19 should compare equal") } if array.Equal(sub08, sub07) { t.Fatalf("sub08 and sub07 should not compare equal") } } func TestArrayEqualMaskedArray(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewInt32Builder(mem) defer ab.Release() valids := []bool{false, false, false, false} ab.AppendValues([]int32{1, 2, 0, 4}, valids) a1 := ab.NewInt32Array() defer a1.Release() ab.AppendValues([]int32{1, 2, 3, 4}, valids) a2 := ab.NewInt32Array() defer a2.Release() if !array.Equal(a1, a1) || !array.Equal(a2, a2) { t.Errorf("an array must be equal to itself") } if !array.Equal(a1, a2) { t.Errorf("%v must be equal to %v", a1, a2) } } func TestArrayEqualDifferentMaskedValues(t *testing.T) { // test 2 int32 arrays, with same nulls (but different masked values) compare equal. mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewInt32Builder(mem) defer ab.Release() valids := []bool{true, true, false, true} ab.AppendValues([]int32{1, 2, 0, 4}, valids) a1 := ab.NewInt32Array() defer a1.Release() ab.AppendValues([]int32{1, 2, 3, 4}, valids) a2 := ab.NewInt32Array() defer a2.Release() if !array.Equal(a1, a1) || !array.Equal(a2, a2) { t.Errorf("an array must be equal to itself") } if !array.Equal(a1, a2) { t.Errorf("%v must be equal to %v", a1, a2) } } func TestRecordEqual(t *testing.T) { for name, recs := range arrdata.Records { t.Run(name, func(t *testing.T) { rec0 := recs[0] rec1 := recs[1] if !array.RecordEqual(rec0, rec0) { t.Fatalf("identical records should compare equal:\nrecord:\n%v", rec0) } if array.RecordEqual(rec0, rec1) && name != "nulls" { t.Fatalf("non-identical records should not compare equal:\nrec0:\n%v\nrec1:\n%v", rec0, rec1) } sub00 := rec0.NewSlice(0, recs[0].NumRows()-1) defer sub00.Release() sub01 := rec0.NewSlice(1, recs[0].NumRows()) defer sub01.Release() if array.RecordEqual(sub00, sub01) && name != "nulls" { t.Fatalf("non-identical records should not compare equal:\nsub0:\n%v\nsub1:\n%v", sub00, sub01) } }) } } func TestRecordApproxEqual(t *testing.T) { for name, recs := range arrdata.Records { t.Run(name, func(t *testing.T) { rec0 := recs[0] rec1 := recs[1] if !array.RecordApproxEqual(rec0, rec0) { t.Fatalf("identical records should compare equal:\nrecord:\n%v", rec0) } if array.RecordApproxEqual(rec0, rec1) && name != "nulls" { t.Fatalf("non-identical records should not compare equal:\nrec0:\n%v\nrec1:\n%v", rec0, rec1) } sub00 := rec0.NewSlice(0, recs[0].NumRows()-1) defer sub00.Release() sub01 := rec0.NewSlice(1, recs[0].NumRows()) defer sub01.Release() if array.RecordApproxEqual(sub00, sub01) && name != "nulls" { t.Fatalf("non-identical records should not compare equal:\nsub0:\n%v\nsub1:\n%v", sub00, sub01) } }) } } func TestChunkedEqual(t *testing.T) { for name, recs := range arrdata.Records { t.Run(name, func(t *testing.T) { tbl := array.NewTableFromRecords(recs[0].Schema(), recs) defer tbl.Release() for i := 0; i < int(tbl.NumCols()); i++ { if !array.ChunkedEqual(tbl.Column(i).Data(), tbl.Column(i).Data()) && name != "nulls" { t.Fatalf("identical chunked arrays should compare as equal:\narr:%v\n", tbl.Column(i).Data()) } } }) } } func TestChunkedApproxEqual(t *testing.T) { fb := array.NewFloat64Builder(memory.DefaultAllocator) defer fb.Release() fb.AppendValues([]float64{1, 2, 3, 4, 5}, nil) f1 := fb.NewFloat64Array() defer f1.Release() fb.AppendValues([]float64{6, 7}, nil) f2 := fb.NewFloat64Array() defer f2.Release() fb.AppendValues([]float64{8, 9, 10}, nil) f3 := fb.NewFloat64Array() defer f3.Release() c1 := arrow.NewChunked( arrow.PrimitiveTypes.Float64, []arrow.Array{f1, f2, f3}, ) defer c1.Release() fb.AppendValues([]float64{1, 2, 3}, nil) f4 := fb.NewFloat64Array() defer f4.Release() fb.AppendValues([]float64{4, 5}, nil) f5 := fb.NewFloat64Array() defer f5.Release() fb.AppendValues([]float64{6, 7, 8, 9}, nil) f6 := fb.NewFloat64Array() defer f6.Release() fb.AppendValues([]float64{10}, nil) f7 := fb.NewFloat64Array() defer f7.Release() c2 := arrow.NewChunked( arrow.PrimitiveTypes.Float64, []arrow.Array{f4, f5, f6, f7}, ) defer c2.Release() assert.True(t, array.ChunkedEqual(c1, c2)) assert.True(t, array.ChunkedApproxEqual(c1, c2)) } func TestTableEqual(t *testing.T) { for name, recs := range arrdata.Records { t.Run(name, func(t *testing.T) { tbl := array.NewTableFromRecords(recs[0].Schema(), recs) defer tbl.Release() if !array.TableEqual(tbl, tbl) { t.Fatalf("identical tables should compare as equal:\tbl:%v\n", tbl) } if !array.TableApproxEqual(tbl, tbl) { t.Fatalf("identical tables should compare as approx equal:\tbl:%v\n", tbl) } }) } } arrow-go-18.2.0/arrow/array/concat.go000066400000000000000000000733201476434502500174240ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "errors" "fmt" "math" "math/bits" "unsafe" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/encoded" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/bitutils" "github.com/apache/arrow-go/v18/internal/utils" ) // Concatenate creates a new arrow.Array which is the concatenation of the // passed in arrays. Returns nil if an error is encountered. // // The passed in arrays still need to be released manually, and will not be // released by this function. func Concatenate(arrs []arrow.Array, mem memory.Allocator) (result arrow.Array, err error) { if len(arrs) == 0 { return nil, errors.New("array/concat: must pass at least one array") } // gather Data of inputs data := make([]arrow.ArrayData, len(arrs)) for i, ar := range arrs { if !arrow.TypeEqual(ar.DataType(), arrs[0].DataType()) { return nil, fmt.Errorf("arrays to be concatenated must be identically typed, but %s and %s were encountered", arrs[0].DataType(), ar.DataType()) } data[i] = ar.Data() } out, err := concat(data, mem) if err != nil { return nil, err } defer out.Release() return MakeFromData(out), nil } // simple struct to hold ranges type rng struct { offset, len int } // simple bitmap struct to reference a specific slice of a bitmap where the range // offset and length are in bits type bitmap struct { data []byte rng rng } // gather up the bitmaps from the passed in data objects func gatherBitmaps(data []arrow.ArrayData, idx int) []bitmap { out := make([]bitmap, len(data)) for i, d := range data { if d.Buffers()[idx] != nil { out[i].data = d.Buffers()[idx].Bytes() } out[i].rng.offset = d.Offset() out[i].rng.len = d.Len() } return out } // gatherFixedBuffers gathers up the buffer objects of the given index, specifically // returning only the slices of the buffers which are relevant to the passed in arrays // in case they are themselves slices of other arrays. nil buffers are ignored and not // in the output slice. func gatherFixedBuffers(data []arrow.ArrayData, idx, byteWidth int) []*memory.Buffer { out := make([]*memory.Buffer, 0, len(data)) for _, d := range data { buf := d.Buffers()[idx] if buf == nil { continue } out = append(out, memory.NewBufferBytes(buf.Bytes()[d.Offset()*byteWidth:(d.Offset()+d.Len())*byteWidth])) } return out } // gatherBuffersFixedWidthType is like gatherFixedBuffers, but uses a datatype to determine the size // to use for determining the byte slice rather than a passed in bytewidth. func gatherBuffersFixedWidthType(data []arrow.ArrayData, idx int, fixed arrow.FixedWidthDataType) []*memory.Buffer { return gatherFixedBuffers(data, idx, fixed.BitWidth()/8) } // gatherBufferRanges requires that len(ranges) == len(data) and returns a list of buffers // which represent the corresponding range of each buffer in the specified index of each // data object. func gatherBufferRanges(data []arrow.ArrayData, idx int, ranges []rng) []*memory.Buffer { out := make([]*memory.Buffer, 0, len(data)) for i, d := range data { buf := d.Buffers()[idx] if buf == nil { debug.Assert(ranges[i].len == 0, "misaligned buffer value ranges") continue } out = append(out, memory.NewBufferBytes(buf.Bytes()[ranges[i].offset:ranges[i].offset+ranges[i].len])) } return out } // gatherChildren gathers the children data objects for child of index idx for all of the data objects. func gatherChildren(data []arrow.ArrayData, idx int) []arrow.ArrayData { return gatherChildrenMultiplier(data, idx, 1) } // gatherChildrenMultiplier gathers the full data slice of the underlying values from the children data objects // such as the values data for a list array so that it can return a slice of the buffer for a given // index into the children. func gatherChildrenMultiplier(data []arrow.ArrayData, idx, multiplier int) []arrow.ArrayData { out := make([]arrow.ArrayData, len(data)) for i, d := range data { out[i] = NewSliceData(d.Children()[idx], int64(d.Offset()*multiplier), int64(d.Offset()+d.Len())*int64(multiplier)) } return out } // gatherChildrenRanges returns a slice of Data objects which each represent slices of the given ranges from the // child in the specified index from each data object. func gatherChildrenRanges(data []arrow.ArrayData, idx int, ranges []rng) []arrow.ArrayData { debug.Assert(len(data) == len(ranges), "mismatched children ranges for concat") out := make([]arrow.ArrayData, len(data)) for i, d := range data { out[i] = NewSliceData(d.Children()[idx], int64(ranges[i].offset), int64(ranges[i].offset+ranges[i].len)) } return out } // creates a single contiguous buffer which contains the concatenation of all of the passed // in buffer objects. func concatBuffers(bufs []*memory.Buffer, mem memory.Allocator) *memory.Buffer { outLen := 0 for _, b := range bufs { outLen += b.Len() } out := memory.NewResizableBuffer(mem) out.Resize(outLen) data := out.Bytes() for _, b := range bufs { copy(data, b.Bytes()) data = data[b.Len():] } return out } func handle32BitOffsets(outLen int, buffers []*memory.Buffer, out *memory.Buffer) (*memory.Buffer, []rng, error) { dst := arrow.Int32Traits.CastFromBytes(out.Bytes()) valuesRanges := make([]rng, len(buffers)) nextOffset := int32(0) nextElem := int(0) for i, b := range buffers { if b.Len() == 0 { valuesRanges[i].offset = 0 valuesRanges[i].len = 0 continue } // when we gather our buffers, we sliced off the last offset from the buffer // so that we could count the lengths accurately src := arrow.Int32Traits.CastFromBytes(b.Bytes()) valuesRanges[i].offset = int(src[0]) // expand our slice to see that final offset expand := src[:len(src)+1] // compute the length of this range by taking the final offset and subtracting where we started. valuesRanges[i].len = int(expand[len(src)]) - valuesRanges[i].offset if nextOffset > math.MaxInt32-int32(valuesRanges[i].len) { return nil, nil, errors.New("offset overflow while concatenating arrays") } // adjust each offset by the difference between our last ending point and our starting point adj := nextOffset - src[0] for j, o := range src { dst[nextElem+j] = adj + o } // the next index for an element in the output buffer nextElem += b.Len() / arrow.Int32SizeBytes // update our offset counter to be the total current length of our output nextOffset += int32(valuesRanges[i].len) } // final offset should point to the end of the data dst[outLen] = nextOffset return out, valuesRanges, nil } func unifyDictionaries(mem memory.Allocator, data []arrow.ArrayData, dt *arrow.DictionaryType) ([]*memory.Buffer, arrow.Array, error) { unifier, err := NewDictionaryUnifier(mem, dt.ValueType) if err != nil { return nil, nil, err } defer unifier.Release() newLookup := make([]*memory.Buffer, len(data)) for i, d := range data { dictArr := MakeFromData(d.Dictionary()) defer dictArr.Release() newLookup[i], err = unifier.UnifyAndTranspose(dictArr) if err != nil { return nil, nil, err } } unified, err := unifier.GetResultWithIndexType(dt.IndexType) if err != nil { for _, b := range newLookup { b.Release() } return nil, nil, err } return newLookup, unified, nil } func concatDictIndices(mem memory.Allocator, data []arrow.ArrayData, idxType arrow.FixedWidthDataType, transpositions []*memory.Buffer) (out *memory.Buffer, err error) { defer func() { if err != nil && out != nil { out.Release() out = nil } }() idxWidth := idxType.BitWidth() / 8 outLen := 0 for i, d := range data { outLen += d.Len() defer transpositions[i].Release() } out = memory.NewResizableBuffer(mem) out.Resize(outLen * idxWidth) outData := out.Bytes() for i, d := range data { transposeMap := arrow.Int32Traits.CastFromBytes(transpositions[i].Bytes()) src := d.Buffers()[1].Bytes() if d.Buffers()[0] == nil { if err = utils.TransposeIntsBuffers(idxType, idxType, src, outData, d.Offset(), 0, d.Len(), transposeMap); err != nil { return } } else { rdr := bitutils.NewBitRunReader(d.Buffers()[0].Bytes(), int64(d.Offset()), int64(d.Len())) pos := 0 for { run := rdr.NextRun() if run.Len == 0 { break } if run.Set { err = utils.TransposeIntsBuffers(idxType, idxType, src, outData, d.Offset()+pos, pos, int(run.Len), transposeMap) if err != nil { return } } else { memory.Set(outData[pos:pos+(int(run.Len)*idxWidth)], 0x00) } pos += int(run.Len) } } outData = outData[d.Len()*idxWidth:] } return } func handle64BitOffsets(outLen int, buffers []*memory.Buffer, out *memory.Buffer) (*memory.Buffer, []rng, error) { dst := arrow.Int64Traits.CastFromBytes(out.Bytes()) valuesRanges := make([]rng, len(buffers)) nextOffset := int64(0) nextElem := int(0) for i, b := range buffers { if b.Len() == 0 { valuesRanges[i].offset = 0 valuesRanges[i].len = 0 continue } // when we gather our buffers, we sliced off the last offset from the buffer // so that we could count the lengths accurately src := arrow.Int64Traits.CastFromBytes(b.Bytes()) valuesRanges[i].offset = int(src[0]) // expand our slice to see that final offset expand := src[:len(src)+1] // compute the length of this range by taking the final offset and subtracting where we started. valuesRanges[i].len = int(expand[len(src)]) - valuesRanges[i].offset if nextOffset > math.MaxInt64-int64(valuesRanges[i].len) { return nil, nil, errors.New("offset overflow while concatenating arrays") } // adjust each offset by the difference between our last ending point and our starting point adj := nextOffset - src[0] for j, o := range src { dst[nextElem+j] = adj + o } // the next index for an element in the output buffer nextElem += b.Len() / arrow.Int64SizeBytes // update our offset counter to be the total current length of our output nextOffset += int64(valuesRanges[i].len) } // final offset should point to the end of the data dst[outLen] = nextOffset return out, valuesRanges, nil } // concatOffsets creates a single offset buffer which represents the concatenation of all of the // offsets buffers, adjusting the offsets appropriately to their new relative locations. // // It also returns the list of ranges that need to be fetched for the corresponding value buffers // to construct the final concatenated value buffer. func concatOffsets(buffers []*memory.Buffer, byteWidth int, mem memory.Allocator) (*memory.Buffer, []rng, error) { outLen := 0 for _, b := range buffers { outLen += b.Len() / byteWidth } out := memory.NewResizableBuffer(mem) out.Resize(byteWidth * (outLen + 1)) switch byteWidth { case arrow.Int64SizeBytes: return handle64BitOffsets(outLen, buffers, out) default: return handle32BitOffsets(outLen, buffers, out) } } func sumArraySizes(data []arrow.ArrayData) int { outSize := 0 for _, arr := range data { outSize += arr.Len() } return outSize } func getListViewBufferValues[T int32 | int64](data arrow.ArrayData, i int) []T { bytes := data.Buffers()[i].Bytes() base := (*T)(unsafe.Pointer(&bytes[0])) ret := unsafe.Slice(base, data.Offset()+data.Len()) return ret[data.Offset():] } func putListViewOffsets32(in arrow.ArrayData, displacement int32, out *memory.Buffer, outOff int) { debug.Assert(in.DataType().ID() == arrow.LIST_VIEW, "putListViewOffsets32: expected LIST_VIEW data") inOff, inLen := in.Offset(), in.Len() if inLen == 0 { return } bitmap := in.Buffers()[0] srcOffsets := getListViewBufferValues[int32](in, 1) srcSizes := getListViewBufferValues[int32](in, 2) isValidAndNonEmpty := func(i int) bool { return (bitmap == nil || bitutil.BitIsSet(bitmap.Bytes(), inOff+i)) && srcSizes[i] > 0 } dstOffsets := arrow.Int32Traits.CastFromBytes(out.Bytes()) for i, offset := range srcOffsets { if isValidAndNonEmpty(i) { // This is guaranteed by RangeOfValuesUsed returning the smallest offset // of valid and non-empty list-views. debug.Assert(offset+displacement >= 0, "putListViewOffsets32: offset underflow while concatenating arrays") dstOffsets[outOff+i] = offset + displacement } else { dstOffsets[outOff+i] = 0 } } } func putListViewOffsets64(in arrow.ArrayData, displacement int64, out *memory.Buffer, outOff int) { debug.Assert(in.DataType().ID() == arrow.LARGE_LIST_VIEW, "putListViewOffsets64: expected LARGE_LIST_VIEW data") inOff, inLen := in.Offset(), in.Len() if inLen == 0 { return } bitmap := in.Buffers()[0] srcOffsets := getListViewBufferValues[int64](in, 1) srcSizes := getListViewBufferValues[int64](in, 2) isValidAndNonEmpty := func(i int) bool { return (bitmap == nil || bitutil.BitIsSet(bitmap.Bytes(), inOff+i)) && srcSizes[i] > 0 } dstOffsets := arrow.Int64Traits.CastFromBytes(out.Bytes()) for i, offset := range srcOffsets { if isValidAndNonEmpty(i) { // This is guaranteed by RangeOfValuesUsed returning the smallest offset // of valid and non-empty list-views. debug.Assert(offset+displacement >= 0, "putListViewOffsets64: offset underflow while concatenating arrays") dstOffsets[outOff+i] = offset + displacement } else { dstOffsets[outOff+i] = 0 } } } // Concatenate buffers holding list-view offsets into a single buffer of offsets // // valueRanges contains the relevant ranges of values in the child array actually // referenced to by the views. Most commonly, these ranges will start from 0, // but when that is not the case, we need to adjust the displacement of offsets. // The concatenated child array does not contain values from the beginning // if they are not referenced to by any view. func concatListViewOffsets(data []arrow.ArrayData, byteWidth int, valueRanges []rng, mem memory.Allocator) (*memory.Buffer, error) { outSize := sumArraySizes(data) if byteWidth == 4 && outSize > math.MaxInt32 { return nil, fmt.Errorf("%w: offset overflow while concatenating arrays", arrow.ErrInvalid) } out := memory.NewResizableBuffer(mem) out.Resize(byteWidth * outSize) numChildValues, elementsLength := 0, 0 for i, arr := range data { displacement := numChildValues - valueRanges[i].offset if byteWidth == 4 { putListViewOffsets32(arr, int32(displacement), out, elementsLength) } else { putListViewOffsets64(arr, int64(displacement), out, elementsLength) } elementsLength += arr.Len() numChildValues += valueRanges[i].len } debug.Assert(elementsLength == outSize, "implementation error") return out, nil } func zeroNullListViewSizes[T int32 | int64](data arrow.ArrayData) { if data.Len() == 0 || data.Buffers()[0] == nil { return } validity := data.Buffers()[0].Bytes() sizes := getListViewBufferValues[T](data, 2) for i := 0; i < data.Len(); i++ { if !bitutil.BitIsSet(validity, data.Offset()+i) { sizes[i] = 0 } } } func concatListView(data []arrow.ArrayData, offsetType arrow.FixedWidthDataType, out *Data, mem memory.Allocator) (err error) { // Calculate the ranges of values that each list-view array uses valueRanges := make([]rng, len(data)) for i, input := range data { offset, len := rangeOfValuesUsed(input) valueRanges[i].offset = offset valueRanges[i].len = len } // Gather the children ranges of each input array childData := gatherChildrenRanges(data, 0, valueRanges) for _, c := range childData { defer c.Release() } // Concatenate the values values, err := concat(childData, mem) if err != nil { return err } // Concatenate the offsets offsetBuffer, err := concatListViewOffsets(data, offsetType.Bytes(), valueRanges, mem) if err != nil { return err } // Concatenate the sizes sizeBuffers := gatherBuffersFixedWidthType(data, 2, offsetType) sizeBuffer := concatBuffers(sizeBuffers, mem) out.childData = []arrow.ArrayData{values} out.buffers[1] = offsetBuffer out.buffers[2] = sizeBuffer // To make sure the sizes don't reference values that are not in the new // concatenated values array, we zero the sizes of null list-view values. if offsetType.ID() == arrow.INT32 { zeroNullListViewSizes[int32](out) } else { zeroNullListViewSizes[int64](out) } return nil } // concat is the implementation for actually performing the concatenation of the arrow.ArrayData // objects that we can call internally for nested types. func concat(data []arrow.ArrayData, mem memory.Allocator) (arr arrow.ArrayData, err error) { out := &Data{refCount: 1, dtype: data[0].DataType(), nulls: 0} defer func() { if pErr := recover(); pErr != nil { err = utils.FormatRecoveredError("arrow/concat", pErr) } if err != nil { out.Release() } }() for _, d := range data { out.length += d.Len() if out.nulls == UnknownNullCount || d.NullN() == UnknownNullCount { out.nulls = UnknownNullCount continue } out.nulls += d.NullN() } out.buffers = make([]*memory.Buffer, len(data[0].Buffers())) if out.nulls != 0 && out.dtype.ID() != arrow.NULL { bm, err := concatBitmaps(gatherBitmaps(data, 0), mem) if err != nil { return nil, err } out.buffers[0] = bm } dt := out.dtype if dt.ID() == arrow.EXTENSION { dt = dt.(arrow.ExtensionType).StorageType() } switch dt := dt.(type) { case *arrow.NullType: case *arrow.BooleanType: bm, err := concatBitmaps(gatherBitmaps(data, 1), mem) if err != nil { return nil, err } out.buffers[1] = bm case *arrow.DictionaryType: idxType := dt.IndexType.(arrow.FixedWidthDataType) // two cases: all dictionaries are the same or we need to unify them dictsSame := true dict0 := MakeFromData(data[0].Dictionary()) defer dict0.Release() for _, d := range data { dict := MakeFromData(d.Dictionary()) if !Equal(dict0, dict) { dict.Release() dictsSame = false break } dict.Release() } indexBuffers := gatherBuffersFixedWidthType(data, 1, idxType) if dictsSame { out.dictionary = dict0.Data().(*Data) out.dictionary.Retain() out.buffers[1] = concatBuffers(indexBuffers, mem) break } indexLookup, unifiedDict, err := unifyDictionaries(mem, data, dt) if err != nil { return nil, err } defer unifiedDict.Release() out.dictionary = unifiedDict.Data().(*Data) out.dictionary.Retain() out.buffers[1], err = concatDictIndices(mem, data, idxType, indexLookup) if err != nil { return nil, err } case arrow.FixedWidthDataType: out.buffers[1] = concatBuffers(gatherBuffersFixedWidthType(data, 1, dt), mem) case arrow.BinaryViewDataType: out.buffers = out.buffers[:2] for _, d := range data { for _, buf := range d.Buffers()[2:] { buf.Retain() out.buffers = append(out.buffers, buf) } } out.buffers[1] = concatBuffers(gatherFixedBuffers(data, 1, arrow.ViewHeaderSizeBytes), mem) var ( s = arrow.ViewHeaderTraits.CastFromBytes(out.buffers[1].Bytes()) i = data[0].Len() precedingBufsCount int ) for idx := 1; idx < len(data); idx++ { precedingBufsCount += len(data[idx-1].Buffers()) - 2 for end := i + data[idx].Len(); i < end; i++ { if s[i].IsInline() { continue } bufIndex := s[i].BufferIndex() + int32(precedingBufsCount) s[i].SetIndexOffset(bufIndex, s[i].BufferOffset()) } } case arrow.BinaryDataType: offsetWidth := dt.Layout().Buffers[1].ByteWidth offsetBuffer, valueRanges, err := concatOffsets(gatherFixedBuffers(data, 1, offsetWidth), offsetWidth, mem) if err != nil { return nil, err } out.buffers[1] = offsetBuffer out.buffers[2] = concatBuffers(gatherBufferRanges(data, 2, valueRanges), mem) case *arrow.ListType: offsetWidth := dt.Layout().Buffers[1].ByteWidth offsetBuffer, valueRanges, err := concatOffsets(gatherFixedBuffers(data, 1, offsetWidth), offsetWidth, mem) if err != nil { return nil, err } childData := gatherChildrenRanges(data, 0, valueRanges) for _, c := range childData { defer c.Release() } out.buffers[1] = offsetBuffer out.childData = make([]arrow.ArrayData, 1) out.childData[0], err = concat(childData, mem) if err != nil { return nil, err } case *arrow.LargeListType: offsetWidth := dt.Layout().Buffers[1].ByteWidth offsetBuffer, valueRanges, err := concatOffsets(gatherFixedBuffers(data, 1, offsetWidth), offsetWidth, mem) if err != nil { return nil, err } childData := gatherChildrenRanges(data, 0, valueRanges) for _, c := range childData { defer c.Release() } out.buffers[1] = offsetBuffer out.childData = make([]arrow.ArrayData, 1) out.childData[0], err = concat(childData, mem) if err != nil { return nil, err } case *arrow.ListViewType: offsetType := arrow.PrimitiveTypes.Int32.(arrow.FixedWidthDataType) err := concatListView(data, offsetType, out, mem) if err != nil { return nil, err } case *arrow.LargeListViewType: offsetType := arrow.PrimitiveTypes.Int64.(arrow.FixedWidthDataType) err := concatListView(data, offsetType, out, mem) if err != nil { return nil, err } case *arrow.FixedSizeListType: childData := gatherChildrenMultiplier(data, 0, int(dt.Len())) for _, c := range childData { defer c.Release() } children, err := concat(childData, mem) if err != nil { return nil, err } out.childData = []arrow.ArrayData{children} case *arrow.StructType: out.childData = make([]arrow.ArrayData, dt.NumFields()) for i := range dt.Fields() { children := gatherChildren(data, i) for _, c := range children { defer c.Release() } childData, err := concat(children, mem) if err != nil { return nil, err } out.childData[i] = childData } case *arrow.MapType: offsetWidth := dt.Layout().Buffers[1].ByteWidth offsetBuffer, valueRanges, err := concatOffsets(gatherFixedBuffers(data, 1, offsetWidth), offsetWidth, mem) if err != nil { return nil, err } childData := gatherChildrenRanges(data, 0, valueRanges) for _, c := range childData { defer c.Release() } out.buffers[1] = offsetBuffer out.childData = make([]arrow.ArrayData, 1) out.childData[0], err = concat(childData, mem) if err != nil { return nil, err } case *arrow.RunEndEncodedType: physicalLength, overflow := int(0), false // we can't use gatherChildren because the Offset and Len of // data doesn't correspond to the physical length or offset runs := make([]arrow.ArrayData, len(data)) values := make([]arrow.ArrayData, len(data)) for i, d := range data { plen := encoded.GetPhysicalLength(d) off := encoded.FindPhysicalOffset(d) runs[i] = NewSliceData(d.Children()[0], int64(off), int64(off+plen)) defer runs[i].Release() values[i] = NewSliceData(d.Children()[1], int64(off), int64(off+plen)) defer values[i].Release() physicalLength, overflow = addOvf(physicalLength, plen) if overflow { return nil, fmt.Errorf("%w: run end encoded array length must fit into a 32-bit signed integer", arrow.ErrInvalid) } } runEndsByteWidth := runs[0].DataType().(arrow.FixedWidthDataType).Bytes() runEndsBuffers := gatherFixedBuffers(runs, 1, runEndsByteWidth) outRunEndsLen := physicalLength * runEndsByteWidth outRunEndsBuf := memory.NewResizableBuffer(mem) outRunEndsBuf.Resize(outRunEndsLen) defer outRunEndsBuf.Release() if err := updateRunEnds(runEndsByteWidth, data, runEndsBuffers, outRunEndsBuf); err != nil { return nil, err } out.childData = make([]arrow.ArrayData, 2) out.childData[0] = NewData(data[0].Children()[0].DataType(), int(physicalLength), []*memory.Buffer{nil, outRunEndsBuf}, nil, 0, 0) var err error out.childData[1], err = concat(values, mem) if err != nil { out.childData[0].Release() return nil, err } default: return nil, fmt.Errorf("concatenate not implemented for type %s", dt) } return out, nil } // check overflow in the addition, taken from bits.Add but adapted for signed integers // rather than unsigned integers. bits.UintSize will be either 32 or 64 based on // whether our architecture is 32 bit or 64. The operation is the same for both cases, // the only difference is how much we need to shift by 30 for 32 bit and 62 for 64 bit. // Thus, bits.UintSize - 2 is how much we shift right by to check if we had an overflow // in the signed addition. // // First return is the result of the sum, the second return is true if there was an overflow func addOvf(x, y int) (int, bool) { sum := x + y return sum, ((x&y)|((x|y)&^sum))>>(bits.UintSize-2) == 1 } // concatenate bitmaps together and return a buffer with the combined bitmaps func concatBitmaps(bitmaps []bitmap, mem memory.Allocator) (*memory.Buffer, error) { var ( outlen int overflow bool ) for _, bm := range bitmaps { if outlen, overflow = addOvf(outlen, bm.rng.len); overflow { return nil, errors.New("length overflow when concatenating arrays") } } out := memory.NewResizableBuffer(mem) out.Resize(int(bitutil.BytesForBits(int64(outlen)))) dst := out.Bytes() offset := 0 for _, bm := range bitmaps { if bm.data == nil { // if the bitmap is nil, that implies that the value is true for all elements bitutil.SetBitsTo(out.Bytes(), int64(offset), int64(bm.rng.len), true) } else { bitutil.CopyBitmap(bm.data, bm.rng.offset, bm.rng.len, dst, offset) } offset += bm.rng.len } return out, nil } func updateRunEnds(byteWidth int, inputData []arrow.ArrayData, inputBuffers []*memory.Buffer, outputBuffer *memory.Buffer) error { switch byteWidth { case 2: out := arrow.Int16Traits.CastFromBytes(outputBuffer.Bytes()) return updateRunsInt16(inputData, inputBuffers, out) case 4: out := arrow.Int32Traits.CastFromBytes(outputBuffer.Bytes()) return updateRunsInt32(inputData, inputBuffers, out) case 8: out := arrow.Int64Traits.CastFromBytes(outputBuffer.Bytes()) return updateRunsInt64(inputData, inputBuffers, out) } return fmt.Errorf("%w: invalid dataType for RLE runEnds", arrow.ErrInvalid) } func updateRunsInt16(inputData []arrow.ArrayData, inputBuffers []*memory.Buffer, output []int16) error { // for now we will not attempt to optimize by checking if we // can fold the end and beginning of each array we're concatenating // into a single run pos := 0 for i, buf := range inputBuffers { if buf.Len() == 0 { continue } src := arrow.Int16Traits.CastFromBytes(buf.Bytes()) if pos == 0 { pos += copy(output, src) continue } lastEnd := output[pos-1] // we can check the last runEnd in the src and add it to the // last value that we're adjusting them all by to see if we // are going to overflow if int64(lastEnd)+int64(int(src[len(src)-1])-inputData[i].Offset()) > math.MaxInt16 { return fmt.Errorf("%w: overflow in run-length-encoded run ends concat", arrow.ErrInvalid) } // adjust all of the run ends by first normalizing them (e - data[i].offset) // then adding the previous value we ended on. Since the offset // is a logical length offset it should be accurate to just subtract // it from each value. for j, e := range src { output[pos+j] = lastEnd + int16(int(e)-inputData[i].Offset()) } pos += len(src) } return nil } func updateRunsInt32(inputData []arrow.ArrayData, inputBuffers []*memory.Buffer, output []int32) error { // for now we will not attempt to optimize by checking if we // can fold the end and beginning of each array we're concatenating // into a single run pos := 0 for i, buf := range inputBuffers { if buf.Len() == 0 { continue } src := arrow.Int32Traits.CastFromBytes(buf.Bytes()) if pos == 0 { pos += copy(output, src) continue } lastEnd := output[pos-1] // we can check the last runEnd in the src and add it to the // last value that we're adjusting them all by to see if we // are going to overflow if int64(lastEnd)+int64(int(src[len(src)-1])-inputData[i].Offset()) > math.MaxInt32 { return fmt.Errorf("%w: overflow in run-length-encoded run ends concat", arrow.ErrInvalid) } // adjust all of the run ends by first normalizing them (e - data[i].offset) // then adding the previous value we ended on. Since the offset // is a logical length offset it should be accurate to just subtract // it from each value. for j, e := range src { output[pos+j] = lastEnd + int32(int(e)-inputData[i].Offset()) } pos += len(src) } return nil } func updateRunsInt64(inputData []arrow.ArrayData, inputBuffers []*memory.Buffer, output []int64) error { // for now we will not attempt to optimize by checking if we // can fold the end and beginning of each array we're concatenating // into a single run pos := 0 for i, buf := range inputBuffers { if buf.Len() == 0 { continue } src := arrow.Int64Traits.CastFromBytes(buf.Bytes()) if pos == 0 { pos += copy(output, src) continue } lastEnd := output[pos-1] // we can check the last runEnd in the src and add it to the // last value that we're adjusting them all by to see if we // are going to overflow if uint64(lastEnd)+uint64(int(src[len(src)-1])-inputData[i].Offset()) > math.MaxInt64 { return fmt.Errorf("%w: overflow in run-length-encoded run ends concat", arrow.ErrInvalid) } // adjust all of the run ends by first normalizing them (e - data[i].offset) // then adding the previous value we ended on. Since the offset // is a logical length offset it should be accurate to just subtract // it from each value. for j, e := range src { output[pos+j] = lastEnd + e - int64(inputData[i].Offset()) } pos += len(src) } return nil } arrow-go-18.2.0/arrow/array/concat_test.go000066400000000000000000000613631476434502500204670ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "fmt" "math" "sort" "strings" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/internal/testing/gen" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" "golang.org/x/exp/rand" ) func TestConcatenateValueBuffersNull(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) inputs := make([]arrow.Array, 0) bldr := array.NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) defer bldr.Release() arr := bldr.NewArray() defer arr.Release() inputs = append(inputs, arr) bldr.AppendNull() arr = bldr.NewArray() defer arr.Release() inputs = append(inputs, arr) actual, err := array.Concatenate(inputs, mem) assert.NoError(t, err) defer actual.Release() assert.True(t, array.Equal(actual, inputs[1])) } func TestConcatenate(t *testing.T) { tests := []struct { dt arrow.DataType }{ {arrow.FixedWidthTypes.Boolean}, {arrow.PrimitiveTypes.Int8}, {arrow.PrimitiveTypes.Uint8}, {arrow.PrimitiveTypes.Int16}, {arrow.PrimitiveTypes.Uint16}, {arrow.PrimitiveTypes.Int32}, {arrow.PrimitiveTypes.Uint32}, {arrow.PrimitiveTypes.Int64}, {arrow.PrimitiveTypes.Uint64}, {arrow.PrimitiveTypes.Float32}, {arrow.PrimitiveTypes.Float64}, {arrow.BinaryTypes.String}, {arrow.BinaryTypes.LargeString}, {arrow.ListOf(arrow.PrimitiveTypes.Int8)}, {arrow.LargeListOf(arrow.PrimitiveTypes.Int8)}, {arrow.ListViewOf(arrow.PrimitiveTypes.Int8)}, {arrow.LargeListViewOf(arrow.PrimitiveTypes.Int8)}, {arrow.FixedSizeListOf(3, arrow.PrimitiveTypes.Int8)}, {arrow.StructOf()}, {arrow.MapOf(arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Int8)}, {&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: arrow.PrimitiveTypes.Float64}}, {arrow.BinaryTypes.StringView}, } for _, tt := range tests { t.Run(tt.dt.Name(), func(t *testing.T) { suite.Run(t, &ConcatTestSuite{ seed: 0xdeadbeef, dt: tt.dt, nullProbs: []float64{0.0, 0.1, 0.5, 0.9, 1.0}, sizes: []int32{0, 1, 2, 4, 16, 31, 1234}, }) }) } } type ConcatTestSuite struct { suite.Suite seed uint64 rng gen.RandomArrayGenerator dt arrow.DataType nullProbs []float64 sizes []int32 mem *memory.CheckedAllocator } func (cts *ConcatTestSuite) SetupSuite() { cts.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) cts.rng = gen.NewRandomArrayGenerator(cts.seed, cts.mem) } func (cts *ConcatTestSuite) TearDownSuite() { cts.mem.AssertSize(cts.T(), 0) } func (cts *ConcatTestSuite) generateArr(size int64, nullprob float64) arrow.Array { switch cts.dt.ID() { case arrow.BOOL: return cts.rng.Boolean(size, 0.5, nullprob) case arrow.INT8: return cts.rng.Int8(size, 0, 127, nullprob) case arrow.UINT8: return cts.rng.Uint8(size, 0, 127, nullprob) case arrow.INT16: return cts.rng.Int16(size, 0, 127, nullprob) case arrow.UINT16: return cts.rng.Uint16(size, 0, 127, nullprob) case arrow.INT32: return cts.rng.Int32(size, 0, 127, nullprob) case arrow.UINT32: return cts.rng.Uint32(size, 0, 127, nullprob) case arrow.INT64: return cts.rng.Int64(size, 0, 127, nullprob) case arrow.UINT64: return cts.rng.Uint64(size, 0, 127, nullprob) case arrow.FLOAT32: return cts.rng.Float32(size, 0, 127, nullprob) case arrow.FLOAT64: return cts.rng.Float64(size, 0, 127, nullprob) case arrow.NULL: return array.NewNull(int(size)) case arrow.STRING: return cts.rng.String(size, 0, 15, nullprob) case arrow.LARGE_STRING: return cts.rng.LargeString(size, 0, 15, nullprob) case arrow.STRING_VIEW: return cts.rng.StringView(size, 0, 20, nullprob) case arrow.LIST: valuesSize := size * 4 values := cts.rng.Int8(valuesSize, 0, 127, nullprob).(*array.Int8) defer values.Release() offsetsVector := cts.offsets(int32(valuesSize), int32(size)) // ensure the first and last offsets encompass the whole values offsetsVector[0] = 0 offsetsVector[len(offsetsVector)-1] = int32(valuesSize) bldr := array.NewListBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8) defer bldr.Release() valid := make([]bool, len(offsetsVector)-1) for i := range valid { valid[i] = true } bldr.AppendValues(offsetsVector, valid) vb := bldr.ValueBuilder().(*array.Int8Builder) for i := 0; i < values.Len(); i++ { if values.IsValid(i) { vb.Append(values.Value(i)) } else { vb.AppendNull() } } return bldr.NewArray() case arrow.LARGE_LIST: valuesSize := size * 8 values := cts.rng.Int8(valuesSize, 0, 127, nullprob).(*array.Int8) defer values.Release() offsetsVector := cts.largeoffsets(int64(valuesSize), int32(size)) // ensure the first and last offsets encompass the whole values offsetsVector[0] = 0 offsetsVector[len(offsetsVector)-1] = int64(valuesSize) bldr := array.NewLargeListBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8) defer bldr.Release() valid := make([]bool, len(offsetsVector)-1) for i := range valid { valid[i] = true } bldr.AppendValues(offsetsVector, valid) vb := bldr.ValueBuilder().(*array.Int8Builder) for i := 0; i < values.Len(); i++ { if values.IsValid(i) { vb.Append(values.Value(i)) } else { vb.AppendNull() } } return bldr.NewArray() case arrow.LIST_VIEW: arr := cts.rng.ListView(cts.dt.(arrow.VarLenListLikeType), size, 0, 20, nullprob) err := arr.ValidateFull() cts.NoError(err) return arr case arrow.LARGE_LIST_VIEW: arr := cts.rng.LargeListView(cts.dt.(arrow.VarLenListLikeType), size, 0, 20, nullprob) err := arr.ValidateFull() cts.NoError(err) return arr case arrow.FIXED_SIZE_LIST: const listsize = 3 valuesSize := size * listsize values := cts.rng.Int8(valuesSize, 0, 127, nullprob) defer values.Release() data := array.NewData(arrow.FixedSizeListOf(listsize, arrow.PrimitiveTypes.Int8), int(size), []*memory.Buffer{nil}, []arrow.ArrayData{values.Data()}, 0, 0) defer data.Release() return array.MakeFromData(data) case arrow.STRUCT: foo := cts.rng.Int8(size, 0, 127, nullprob) defer foo.Release() bar := cts.rng.Float64(size, 0, 127, nullprob) defer bar.Release() baz := cts.rng.Boolean(size, 0.5, nullprob) defer baz.Release() data := array.NewData(arrow.StructOf( arrow.Field{Name: "foo", Type: foo.DataType(), Nullable: true}, arrow.Field{Name: "bar", Type: bar.DataType(), Nullable: true}, arrow.Field{Name: "baz", Type: baz.DataType(), Nullable: true}), int(size), []*memory.Buffer{nil}, []arrow.ArrayData{foo.Data(), bar.Data(), baz.Data()}, 0, 0) defer data.Release() return array.NewStructData(data) case arrow.MAP: valuesSize := size * 4 keys := cts.rng.Uint16(valuesSize, 0, 127, 0).(*array.Uint16) defer keys.Release() values := cts.rng.Int8(valuesSize, 0, 127, nullprob).(*array.Int8) defer values.Release() offsetsVector := cts.offsets(int32(valuesSize), int32(size)) offsetsVector[0] = 0 offsetsVector[len(offsetsVector)-1] = int32(valuesSize) bldr := array.NewMapBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Int8, false) defer bldr.Release() kb := bldr.KeyBuilder().(*array.Uint16Builder) vb := bldr.ItemBuilder().(*array.Int8Builder) valid := make([]bool, len(offsetsVector)-1) for i := range valid { valid[i] = true } bldr.AppendValues(offsetsVector, valid) for i := 0; i < int(valuesSize); i++ { kb.Append(keys.Value(i)) if values.IsValid(i) { vb.Append(values.Value(i)) } else { vb.AppendNull() } } return bldr.NewArray() case arrow.DICTIONARY: indices := cts.rng.Int32(size, 0, 127, nullprob) defer indices.Release() dict := cts.rng.Float64(128, 0.0, 127.0, nullprob) defer dict.Release() return array.NewDictionaryArray(cts.dt, indices, dict) default: return nil } } func (cts *ConcatTestSuite) slices(arr arrow.Array, offsets []int32) []arrow.Array { slices := make([]arrow.Array, len(offsets)-1) for i := 0; i != len(slices); i++ { slices[i] = array.NewSlice(arr, int64(offsets[i]), int64(offsets[i+1])) } return slices } func (cts *ConcatTestSuite) checkTrailingBitsZeroed(bitmap *memory.Buffer, length int64) { if preceding := bitutil.PrecedingBitmask[length%8]; preceding != 0 { lastByte := bitmap.Bytes()[length/8] cts.Equal(lastByte&preceding, lastByte, length, preceding) } } func (cts *ConcatTestSuite) offsets(length, slicecount int32) []int32 { offsets := make([]int32, slicecount+1) dist := rand.New(rand.NewSource(cts.seed)) for i := range offsets { offsets[i] = dist.Int31n(length + 1) } sort.Slice(offsets, func(i, j int) bool { return offsets[i] < offsets[j] }) return offsets } func (cts *ConcatTestSuite) largeoffsets(length int64, slicecount int32) []int64 { offsets := make([]int64, slicecount+1) dist := rand.New(rand.NewSource(cts.seed)) for i := range offsets { offsets[i] = dist.Int63n(length + 1) } sort.Slice(offsets, func(i, j int) bool { return offsets[i] < offsets[j] }) return offsets } func (cts *ConcatTestSuite) TestCheckConcat() { for _, sz := range cts.sizes { cts.Run(fmt.Sprintf("size %d", sz), func() { offsets := cts.offsets(sz, 3) for _, np := range cts.nullProbs { cts.Run(fmt.Sprintf("nullprob %0.2f", np), func() { scopedMem := memory.NewCheckedAllocatorScope(cts.mem) defer scopedMem.CheckSize(cts.T()) arr := cts.generateArr(int64(sz), np) defer arr.Release() expected := array.NewSlice(arr, int64(offsets[0]), int64(offsets[len(offsets)-1])) defer expected.Release() slices := cts.slices(arr, offsets) for _, s := range slices { if s.DataType().ID() == arrow.LIST_VIEW { err := s.(*array.ListView).ValidateFull() cts.NoError(err) } defer s.Release() } actual, err := array.Concatenate(slices, cts.mem) cts.NoError(err) if arr.DataType().ID() == arrow.LIST_VIEW { lv := actual.(*array.ListView) err := lv.ValidateFull() cts.NoError(err) } defer actual.Release() cts.Truef(array.Equal(expected, actual), "expected: %s\ngot: %s\n", expected, actual) if len(actual.Data().Buffers()) > 0 { if actual.Data().Buffers()[0] != nil { cts.checkTrailingBitsZeroed(actual.Data().Buffers()[0], int64(actual.Len())) } if actual.DataType().ID() == arrow.BOOL { cts.checkTrailingBitsZeroed(actual.Data().Buffers()[1], int64(actual.Len())) } } }) } }) } } func TestConcatDifferentDicts(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) t.Run("simple dicts", func(t *testing.T) { scopedMem := memory.NewCheckedAllocatorScope(mem) defer scopedMem.CheckSize(t) dictType := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: arrow.BinaryTypes.String} dict1, err := array.DictArrayFromJSON(mem, dictType, `[1, 2, null, 3, 0]`, `["A0", "A1", "A2", "A3"]`) require.NoError(t, err) defer dict1.Release() dict2, err := array.DictArrayFromJSON(mem, dictType, `[null, 4, 2, 1]`, `["B0", "B1", "B2", "B3", "B4"]`) require.NoError(t, err) defer dict2.Release() expected, err := array.DictArrayFromJSON(mem, dictType, `[1, 2, null, 3, 0, null, 8, 6, 5]`, `["A0", "A1", "A2", "A3", "B0", "B1", "B2", "B3", "B4"]`) require.NoError(t, err) defer expected.Release() concat, err := array.Concatenate([]arrow.Array{dict1, dict2}, mem) assert.NoError(t, err) defer concat.Release() assert.Truef(t, array.Equal(concat, expected), "got: %s, expected: %s", concat, expected) }) t.Run("larger", func(t *testing.T) { scopedMem := memory.NewCheckedAllocatorScope(mem) defer scopedMem.CheckSize(t) const size = 500 dictType := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint16, ValueType: arrow.BinaryTypes.String} idxBuilder, exIdxBldr := array.NewUint16Builder(mem), array.NewUint16Builder(mem) defer idxBuilder.Release() defer exIdxBldr.Release() idxBuilder.Reserve(size) exIdxBldr.Reserve(size * 2) for i := uint16(0); i < size; i++ { idxBuilder.UnsafeAppend(i) exIdxBldr.UnsafeAppend(i) } for i := uint16(size); i < 2*size; i++ { exIdxBldr.UnsafeAppend(i) } indices, expIndices := idxBuilder.NewArray(), exIdxBldr.NewArray() defer indices.Release() defer expIndices.Release() // create three dictionaries. First maps i -> "{i}", second maps i->"{500+i}", // each for 500 values and the third maps i -> "{i}" but for 1000 values. // first and second concatenated should end up equaling the third. All strings // padded to length 8 so we can know the size ahead of time. valuesOneBldr, valuesTwoBldr := array.NewStringBuilder(mem), array.NewStringBuilder(mem) defer valuesOneBldr.Release() defer valuesTwoBldr.Release() valuesOneBldr.Reserve(size) valuesTwoBldr.Reserve(size) valuesOneBldr.ReserveData(size * 8) valuesTwoBldr.ReserveData(size * 8) for i := 0; i < size; i++ { valuesOneBldr.Append(fmt.Sprintf("%-8d", i)) valuesTwoBldr.Append(fmt.Sprintf("%-8d", i+size)) } dict1, dict2 := valuesOneBldr.NewArray(), valuesTwoBldr.NewArray() defer dict1.Release() defer dict2.Release() expectedDict, err := array.Concatenate([]arrow.Array{dict1, dict2}, mem) require.NoError(t, err) defer expectedDict.Release() one, two := array.NewDictionaryArray(dictType, indices, dict1), array.NewDictionaryArray(dictType, indices, dict2) defer one.Release() defer two.Release() expected := array.NewDictionaryArray(dictType, expIndices, expectedDict) defer expected.Release() combined, err := array.Concatenate([]arrow.Array{one, two}, mem) assert.NoError(t, err) defer combined.Release() assert.Truef(t, array.Equal(combined, expected), "got: %s, expected: %s", combined, expected) }) } func TestConcatDictionaryPartialOverlap(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) dt := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: arrow.BinaryTypes.String} dictOne, err := array.DictArrayFromJSON(mem, dt, `[1, 2, null, 3, 0]`, `["A0", "A1", "C2", "C3"]`) require.NoError(t, err) defer dictOne.Release() dictTwo, err := array.DictArrayFromJSON(mem, dt, `[null, 4, 2, 1]`, `["B0", "B1", "C2", "C3", "B4"]`) require.NoError(t, err) defer dictTwo.Release() expected, err := array.DictArrayFromJSON(mem, dt, `[1, 2, null, 3, 0, null, 6, 2, 5]`, `["A0", "A1", "C2", "C3", "B0", "B1", "B4"]`) require.NoError(t, err) defer expected.Release() actual, err := array.Concatenate([]arrow.Array{dictOne, dictTwo}, mem) assert.NoError(t, err) defer actual.Release() assert.Truef(t, array.Equal(actual, expected), "got: %s, expected: %s", actual, expected) } func TestConcatDictionaryDifferentSizeIndex(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) dt := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: arrow.BinaryTypes.String} biggerDt := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint16, ValueType: arrow.BinaryTypes.String} dictOne, err := array.DictArrayFromJSON(mem, dt, `[0]`, `["A0"]`) require.NoError(t, err) defer dictOne.Release() dictTwo, err := array.DictArrayFromJSON(mem, biggerDt, `[0]`, `["B0"]`) require.NoError(t, err) defer dictTwo.Release() arr, err := array.Concatenate([]arrow.Array{dictOne, dictTwo}, mem) assert.Nil(t, arr) assert.Error(t, err) } func TestConcatDictionaryUnifyNullInDict(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) dt := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: arrow.BinaryTypes.String} dictOne, err := array.DictArrayFromJSON(mem, dt, `[0, 1]`, `[null, "A"]`) require.NoError(t, err) defer dictOne.Release() dictTwo, err := array.DictArrayFromJSON(mem, dt, `[0, 1]`, `[null, "B"]`) require.NoError(t, err) defer dictTwo.Release() expected, err := array.DictArrayFromJSON(mem, dt, `[0, 1, 0, 2]`, `[null, "A", "B"]`) require.NoError(t, err) defer expected.Release() actual, err := array.Concatenate([]arrow.Array{dictOne, dictTwo}, mem) assert.NoError(t, err) defer actual.Release() assert.Truef(t, array.Equal(actual, expected), "got: %s, expected: %s", actual, expected) } func TestConcatDictionaryEnlargedIndices(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) const size = math.MaxUint8 + 1 dt := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: arrow.PrimitiveTypes.Uint16} idxBuilder := array.NewUint8Builder(mem) defer idxBuilder.Release() idxBuilder.Reserve(size) for i := 0; i < size; i++ { idxBuilder.UnsafeAppend(uint8(i)) } indices := idxBuilder.NewUint8Array() defer indices.Release() valuesBuilder := array.NewUint16Builder(mem) defer valuesBuilder.Release() valuesBuilder.Reserve(size) valuesBuilderTwo := array.NewUint16Builder(mem) defer valuesBuilderTwo.Release() valuesBuilderTwo.Reserve(size) for i := uint16(0); i < size; i++ { valuesBuilder.UnsafeAppend(i) valuesBuilderTwo.UnsafeAppend(i + size) } dict1, dict2 := valuesBuilder.NewUint16Array(), valuesBuilderTwo.NewUint16Array() defer dict1.Release() defer dict2.Release() d1, d2 := array.NewDictionaryArray(dt, indices, dict1), array.NewDictionaryArray(dt, indices, dict2) defer d1.Release() defer d2.Release() _, err := array.Concatenate([]arrow.Array{d1, d2}, mem) assert.Error(t, err) biggerDt := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint16, ValueType: arrow.PrimitiveTypes.Uint16} bigger1, bigger2 := array.NewDictionaryArray(biggerDt, dict1, dict1), array.NewDictionaryArray(biggerDt, dict1, dict2) defer bigger1.Release() defer bigger2.Release() combined, err := array.Concatenate([]arrow.Array{bigger1, bigger2}, mem) assert.NoError(t, err) defer combined.Release() assert.EqualValues(t, size*2, combined.Len()) } func TestConcatDictionaryNullSlots(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) dt := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint32, ValueType: arrow.BinaryTypes.String} dict1, err := array.DictArrayFromJSON(mem, dt, `[null, null, null, null]`, `[]`) require.NoError(t, err) defer dict1.Release() dict2, err := array.DictArrayFromJSON(mem, dt, `[null, null, null, null, 0, 1]`, `["a", "b"]`) require.NoError(t, err) defer dict2.Release() expected, err := array.DictArrayFromJSON(mem, dt, `[null, null, null, null, null, null, null, null, 0, 1]`, `["a", "b"]`) require.NoError(t, err) defer expected.Release() actual, err := array.Concatenate([]arrow.Array{dict1, dict2}, mem) assert.NoError(t, err) defer actual.Release() assert.Truef(t, array.Equal(actual, expected), "got: %s, expected: %s", actual, expected) } func TestConcatRunEndEncoded(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) tests := []struct { offsetType arrow.DataType expected interface{} }{ {arrow.PrimitiveTypes.Int16, []int16{1, 11, 111, 211, 311, 411, 500, 600}}, {arrow.PrimitiveTypes.Int32, []int32{1, 11, 111, 211, 311, 411, 500, 600}}, {arrow.PrimitiveTypes.Int64, []int64{1, 11, 111, 211, 311, 411, 500, 600}}, } for _, tt := range tests { t.Run(tt.offsetType.String(), func(t *testing.T) { arrs := make([]arrow.Array, 0) bldr := array.NewRunEndEncodedBuilder(mem, tt.offsetType, arrow.BinaryTypes.String) defer bldr.Release() valBldr := bldr.ValueBuilder().(*array.StringBuilder) bldr.Append(1) valBldr.Append("Hello") bldr.AppendNull() bldr.ContinueRun(9) bldr.Append(100) valBldr.Append("World") arrs = append(arrs, bldr.NewArray()) bldr.Append(100) valBldr.Append("Goku") bldr.Append(100) valBldr.Append("Gohan") bldr.Append(100) valBldr.Append("Goten") arrs = append(arrs, bldr.NewArray()) bldr.AppendNull() bldr.ContinueRun(99) bldr.Append(100) valBldr.Append("Vegeta") bldr.Append(100) valBldr.Append("Trunks") next := bldr.NewArray() defer next.Release() // remove the initial null with an offset and dig into the next run arrs = append(arrs, array.NewSlice(next, 111, int64(next.Len()))) for _, a := range arrs { defer a.Release() } result, err := array.Concatenate(arrs, mem) assert.NoError(t, err) defer result.Release() rle := result.(*array.RunEndEncoded) assert.EqualValues(t, 8, rle.GetPhysicalLength()) assert.EqualValues(t, 0, rle.GetPhysicalOffset()) var values interface{} switch endsArr := rle.RunEndsArr().(type) { case *array.Int16: values = endsArr.Int16Values() case *array.Int32: values = endsArr.Int32Values() case *array.Int64: values = endsArr.Int64Values() } assert.Equal(t, tt.expected, values) expectedValues, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["Hello", null, "World", "Goku", "Gohan", "Goten", "Vegeta", "Trunks"]`)) defer expectedValues.Release() assert.Truef(t, array.Equal(expectedValues, rle.Values()), "expected: %s\ngot: %s", expectedValues, rle.Values()) }) } } func TestConcatAlmostOverflowRunEndEncoding(t *testing.T) { tests := []struct { offsetType arrow.DataType max uint64 }{ {arrow.PrimitiveTypes.Int16, math.MaxInt16}, {arrow.PrimitiveTypes.Int32, math.MaxInt32}, {arrow.PrimitiveTypes.Int64, math.MaxInt64}, } for _, tt := range tests { t.Run(tt.offsetType.String(), func(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) arrs := make([]arrow.Array, 0) bldr := array.NewRunEndEncodedBuilder(mem, tt.offsetType, arrow.BinaryTypes.String) defer bldr.Release() valBldr := bldr.ValueBuilder().(*array.StringBuilder) // max is not evenly divisible by 4, so we add one to each // to account for that so our final concatenate will overflow bldr.Append((tt.max / 4) + 1) valBldr.Append("foo") bldr.Append((tt.max / 4) + 1) valBldr.Append("bar") arrs = append(arrs, bldr.NewArray()) bldr.Append((tt.max / 4) + 1) valBldr.Append("baz") bldr.Append((tt.max / 4)) valBldr.Append("bop") arrs = append(arrs, bldr.NewArray()) defer func() { for _, a := range arrs { a.Release() } }() arr, err := array.Concatenate(arrs, mem) assert.NoError(t, err) defer arr.Release() }) } } func TestConcatOverflowRunEndEncoding(t *testing.T) { tests := []struct { offsetType arrow.DataType max uint64 }{ {arrow.PrimitiveTypes.Int16, math.MaxInt16}, {arrow.PrimitiveTypes.Int32, math.MaxInt32}, {arrow.PrimitiveTypes.Int64, math.MaxInt64}, } for _, tt := range tests { t.Run(tt.offsetType.String(), func(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) arrs := make([]arrow.Array, 0) bldr := array.NewRunEndEncodedBuilder(mem, tt.offsetType, arrow.BinaryTypes.String) defer bldr.Release() valBldr := bldr.ValueBuilder().(*array.StringBuilder) // max is not evenly divisible by 4, so we add one to each // to account for that so our final concatenate will overflow bldr.Append((tt.max / 4) + 1) valBldr.Append("foo") bldr.Append((tt.max / 4) + 1) valBldr.Append("bar") arrs = append(arrs, bldr.NewArray()) bldr.Append((tt.max / 4) + 1) valBldr.Append("baz") bldr.Append((tt.max / 4) + 1) valBldr.Append("bop") arrs = append(arrs, bldr.NewArray()) defer func() { for _, a := range arrs { a.Release() } }() arr, err := array.Concatenate(arrs, mem) assert.Nil(t, arr) assert.ErrorIs(t, err, arrow.ErrInvalid) }) } } func TestConcatPanic(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) allocator := &panicAllocator{ n: 400, Allocator: mem, } g := gen.NewRandomArrayGenerator(0, memory.DefaultAllocator) ar1 := g.ArrayOf(arrow.STRING, 32, 0) defer ar1.Release() ar2 := g.ArrayOf(arrow.STRING, 32, 0) defer ar2.Release() concat, err := array.Concatenate([]arrow.Array{ar1, ar2}, allocator) assert.Error(t, err) assert.Nil(t, concat) } arrow-go-18.2.0/arrow/array/data.go000066400000000000000000000165101476434502500170640ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "hash/maphash" "math/bits" "sync/atomic" "unsafe" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" ) // Data represents the memory and metadata of an Arrow array. type Data struct { refCount int64 dtype arrow.DataType nulls int offset int length int // for dictionary arrays: buffers will be the null validity bitmap and the indexes that reference // values in the dictionary member. childData would be empty in a dictionary array buffers []*memory.Buffer // TODO(sgc): should this be an interface? childData []arrow.ArrayData // TODO(sgc): managed by ListArray, StructArray and UnionArray types dictionary *Data // only populated for dictionary arrays } // NewData creates a new Data. func NewData(dtype arrow.DataType, length int, buffers []*memory.Buffer, childData []arrow.ArrayData, nulls, offset int) *Data { for _, b := range buffers { if b != nil { b.Retain() } } for _, child := range childData { if child != nil { child.Retain() } } return &Data{ refCount: 1, dtype: dtype, nulls: nulls, length: length, offset: offset, buffers: buffers, childData: childData, } } // NewDataWithDictionary creates a new data object, but also sets the provided dictionary into the data if it's not nil func NewDataWithDictionary(dtype arrow.DataType, length int, buffers []*memory.Buffer, nulls, offset int, dict *Data) *Data { data := NewData(dtype, length, buffers, nil, nulls, offset) if dict != nil { dict.Retain() } data.dictionary = dict return data } func (d *Data) Copy() *Data { // don't pass the slices directly, otherwise it retains the connection // we need to make new slices and populate them with the same pointers bufs := make([]*memory.Buffer, len(d.buffers)) copy(bufs, d.buffers) children := make([]arrow.ArrayData, len(d.childData)) copy(children, d.childData) data := NewData(d.dtype, d.length, bufs, children, d.nulls, d.offset) data.SetDictionary(d.dictionary) return data } // Reset sets the Data for re-use. func (d *Data) Reset(dtype arrow.DataType, length int, buffers []*memory.Buffer, childData []arrow.ArrayData, nulls, offset int) { // Retain new buffers before releasing existing buffers in-case they're the same ones to prevent accidental premature // release. for _, b := range buffers { if b != nil { b.Retain() } } for _, b := range d.buffers { if b != nil { b.Release() } } d.buffers = buffers // Retain new children data before releasing existing children data in-case they're the same ones to prevent accidental // premature release. for _, d := range childData { if d != nil { d.Retain() } } for _, d := range d.childData { if d != nil { d.Release() } } d.childData = childData d.dtype = dtype d.length = length d.nulls = nulls d.offset = offset } // Retain increases the reference count by 1. // Retain may be called simultaneously from multiple goroutines. func (d *Data) Retain() { atomic.AddInt64(&d.refCount, 1) } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. // Release may be called simultaneously from multiple goroutines. func (d *Data) Release() { debug.Assert(atomic.LoadInt64(&d.refCount) > 0, "too many releases") if atomic.AddInt64(&d.refCount, -1) == 0 { for _, b := range d.buffers { if b != nil { b.Release() } } for _, b := range d.childData { b.Release() } if d.dictionary != nil { d.dictionary.Release() } d.dictionary, d.buffers, d.childData = nil, nil, nil } } // DataType returns the DataType of the data. func (d *Data) DataType() arrow.DataType { return d.dtype } func (d *Data) SetNullN(n int) { d.nulls = n } // NullN returns the number of nulls. func (d *Data) NullN() int { return d.nulls } // Len returns the length. func (d *Data) Len() int { return d.length } // Offset returns the offset. func (d *Data) Offset() int { return d.offset } // Buffers returns the buffers. func (d *Data) Buffers() []*memory.Buffer { return d.buffers } func (d *Data) Children() []arrow.ArrayData { return d.childData } // Dictionary returns the ArrayData object for the dictionary member, or nil func (d *Data) Dictionary() arrow.ArrayData { return d.dictionary } // SetDictionary allows replacing the dictionary for this particular Data object func (d *Data) SetDictionary(dict arrow.ArrayData) { if d.dictionary != nil { d.dictionary.Release() d.dictionary = nil } if dict.(*Data) != nil { dict.Retain() d.dictionary = dict.(*Data) } } // SizeInBytes returns the size of the Data and any children and/or dictionary in bytes by // recursively examining the nested structures of children and/or dictionary. // The value returned is an upper-bound since offset is not taken into account. func (d *Data) SizeInBytes() uint64 { var size uint64 if d == nil { return 0 } for _, b := range d.Buffers() { if b != nil { size += uint64(b.Len()) } } for _, c := range d.Children() { size += c.SizeInBytes() } if d.dictionary != nil { size += d.dictionary.SizeInBytes() } return size } // NewSliceData returns a new slice that shares backing data with the input. // The returned Data slice starts at i and extends j-i elements, such as: // // slice := data[i:j] // // The returned value must be Release'd after use. // // NewSliceData panics if the slice is outside the valid range of the input Data. // NewSliceData panics if j < i. func NewSliceData(data arrow.ArrayData, i, j int64) arrow.ArrayData { if j > int64(data.Len()) || i > j || data.Offset()+int(i) > data.Offset()+data.Len() { panic("arrow/array: index out of range") } for _, b := range data.Buffers() { if b != nil { b.Retain() } } for _, child := range data.Children() { if child != nil { child.Retain() } } if data.(*Data).dictionary != nil { data.(*Data).dictionary.Retain() } o := &Data{ refCount: 1, dtype: data.DataType(), nulls: UnknownNullCount, length: int(j - i), offset: data.Offset() + int(i), buffers: data.Buffers(), childData: data.Children(), dictionary: data.(*Data).dictionary, } if data.NullN() == 0 { o.nulls = 0 } return o } func Hash(h *maphash.Hash, data arrow.ArrayData) { a := data.(*Data) h.Write((*[bits.UintSize / 8]byte)(unsafe.Pointer(&a.length))[:]) h.Write((*[bits.UintSize / 8]byte)(unsafe.Pointer(&a.length))[:]) if len(a.buffers) > 0 && a.buffers[0] != nil { h.Write(a.buffers[0].Bytes()) } for _, c := range a.childData { Hash(h, c) } } arrow-go-18.2.0/arrow/array/data_test.go000066400000000000000000000121221476434502500201160ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "slices" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) func TestDataReset(t *testing.T) { var ( buffers1 = make([]*memory.Buffer, 0, 3) buffers2 = make([]*memory.Buffer, 0, 3) ) for i := 0; i < cap(buffers1); i++ { buffers1 = append(buffers1, memory.NewBufferBytes([]byte("some-bytes1"))) buffers2 = append(buffers2, memory.NewBufferBytes([]byte("some-bytes2"))) } data := NewData(&arrow.StringType{}, 10, buffers1, nil, 0, 0) data.Reset(&arrow.Int64Type{}, 5, buffers2, nil, 1, 2) for i := 0; i < 2; i++ { assert.Equal(t, buffers2, data.Buffers()) assert.Equal(t, &arrow.Int64Type{}, data.DataType()) assert.Equal(t, 1, data.NullN()) assert.Equal(t, 2, data.Offset()) assert.Equal(t, 5, data.Len()) // Make sure it works when resetting the data with its own buffers (new buffers are retained // before old ones are released.) data.Reset(&arrow.Int64Type{}, 5, data.Buffers(), nil, 1, 2) } } func TestSizeInBytes(t *testing.T) { var buffers1 = make([]*memory.Buffer, 0, 3) for i := 0; i < cap(buffers1); i++ { buffers1 = append(buffers1, memory.NewBufferBytes([]byte("15-bytes-buffer"))) } data := NewData(&arrow.StringType{}, 10, buffers1, nil, 0, 0) var arrayData arrow.ArrayData = data dataWithChild := NewData(&arrow.StringType{}, 10, buffers1, []arrow.ArrayData{arrayData}, 0, 0) buffers2 := slices.Clone(buffers1) buffers2[0] = nil dataWithNilBuffer := NewData(&arrow.StringType{}, 10, buffers2, nil, 0, 0) t.Run("nil buffers", func(t *testing.T) { expectedSize := uint64(30) if actualSize := dataWithNilBuffer.SizeInBytes(); actualSize != expectedSize { t.Errorf("expected size %d, got %d", expectedSize, actualSize) } }) t.Run("buffers only", func(t *testing.T) { expectedSize := uint64(45) if actualSize := data.SizeInBytes(); actualSize != expectedSize { t.Errorf("expected size %d, got %d", expectedSize, actualSize) } }) t.Run("buffers and child data", func(t *testing.T) { // 45 bytes in buffers, 45 bytes in child data expectedSize := uint64(90) if actualSize := dataWithChild.SizeInBytes(); actualSize != expectedSize { t.Errorf("expected size %d, got %d", expectedSize, actualSize) } }) t.Run("buffers and nested child data", func(t *testing.T) { var dataWithChildArrayData arrow.ArrayData = dataWithChild var dataWithNestedChild arrow.ArrayData = NewData(&arrow.StringType{}, 10, buffers1, []arrow.ArrayData{dataWithChildArrayData}, 0, 0) // 45 bytes in buffers, 90 bytes in nested child data expectedSize := uint64(135) if actualSize := dataWithNestedChild.SizeInBytes(); actualSize != expectedSize { t.Errorf("expected size %d, got %d", expectedSize, actualSize) } }) t.Run("buffers and dictionary", func(t *testing.T) { dictData := data dataWithDict := NewDataWithDictionary(&arrow.StringType{}, 10, buffers1, 0, 0, dictData) // 45 bytes in buffers, 45 bytes in dictionary expectedSize := uint64(90) if actualSize := dataWithDict.SizeInBytes(); actualSize != expectedSize { t.Errorf("expected size %d, got %d", expectedSize, actualSize) } }) t.Run("sliced data", func(t *testing.T) { sliceData := NewSliceData(arrayData, 3, 5) // offset is not taken into account in SizeInBytes() expectedSize := uint64(45) if actualSize := sliceData.SizeInBytes(); actualSize != expectedSize { t.Errorf("expected size %d, got %d", expectedSize, actualSize) } }) t.Run("sliced data with children", func(t *testing.T) { var dataWithChildArrayData arrow.ArrayData = dataWithChild sliceData := NewSliceData(dataWithChildArrayData, 3, 5) // offset is not taken into account in SizeInBytes() expectedSize := uint64(90) if actualSize := sliceData.SizeInBytes(); actualSize != expectedSize { t.Errorf("expected size %d, got %d", expectedSize, actualSize) } }) t.Run("buffers with children which are sliced data", func(t *testing.T) { sliceData := NewSliceData(arrayData, 3, 5) dataWithSlicedChildren := NewData(&arrow.StringType{}, 10, buffers1, []arrow.ArrayData{sliceData}, 0, 0) // offset is not taken into account in SizeInBytes() expectedSize := uint64(90) if actualSize := dataWithSlicedChildren.SizeInBytes(); actualSize != expectedSize { t.Errorf("expected size %d, got %d", expectedSize, actualSize) } }) } arrow-go-18.2.0/arrow/array/decimal.go000066400000000000000000000235601476434502500175540ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "bytes" "fmt" "reflect" "strings" "sync/atomic" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/decimal" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" ) type baseDecimal[T interface { decimal.DecimalTypes decimal.Num[T] }] struct { array values []T } func newDecimalData[T interface { decimal.DecimalTypes decimal.Num[T] }](data arrow.ArrayData) *baseDecimal[T] { a := &baseDecimal[T]{} a.refCount = 1 a.setData(data.(*Data)) return a } func (a *baseDecimal[T]) Value(i int) T { return a.values[i] } func (a *baseDecimal[T]) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } return a.GetOneForMarshal(i).(string) } func (a *baseDecimal[T]) Values() []T { return a.values } func (a *baseDecimal[T]) String() string { o := new(strings.Builder) o.WriteString("[") for i := 0; i < a.Len(); i++ { if i > 0 { fmt.Fprintf(o, " ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(o, "%v", a.Value(i)) } } o.WriteString("]") return o.String() } func (a *baseDecimal[T]) setData(data *Data) { a.array.setData(data) vals := data.buffers[1] if vals != nil { a.values = arrow.GetData[T](vals.Bytes()) beg := a.array.data.offset end := beg + a.array.data.length a.values = a.values[beg:end] } } func (a *baseDecimal[T]) GetOneForMarshal(i int) any { if a.IsNull(i) { return nil } typ := a.DataType().(arrow.DecimalType) n, scale := a.Value(i), typ.GetScale() return n.ToBigFloat(scale).Text('g', int(typ.GetPrecision())) } func (a *baseDecimal[T]) MarshalJSON() ([]byte, error) { vals := make([]any, a.Len()) for i := 0; i < a.Len(); i++ { vals[i] = a.GetOneForMarshal(i) } return json.Marshal(vals) } func arrayEqualDecimal[T interface { decimal.DecimalTypes decimal.Num[T] }](left, right *baseDecimal[T]) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if left.Value(i) != right.Value(i) { return false } } return true } type Decimal32 = baseDecimal[decimal.Decimal32] func NewDecimal32Data(data arrow.ArrayData) *Decimal32 { return newDecimalData[decimal.Decimal32](data) } type Decimal64 = baseDecimal[decimal.Decimal64] func NewDecimal64Data(data arrow.ArrayData) *Decimal64 { return newDecimalData[decimal.Decimal64](data) } type Decimal128 = baseDecimal[decimal.Decimal128] func NewDecimal128Data(data arrow.ArrayData) *Decimal128 { return newDecimalData[decimal.Decimal128](data) } type Decimal256 = baseDecimal[decimal.Decimal256] func NewDecimal256Data(data arrow.ArrayData) *Decimal256 { return newDecimalData[decimal.Decimal256](data) } type Decimal32Builder = baseDecimalBuilder[decimal.Decimal32] type Decimal64Builder = baseDecimalBuilder[decimal.Decimal64] type Decimal128Builder struct { *baseDecimalBuilder[decimal.Decimal128] } func (b *Decimal128Builder) NewDecimal128Array() *Decimal128 { return b.NewDecimalArray() } type Decimal256Builder struct { *baseDecimalBuilder[decimal.Decimal256] } func (b *Decimal256Builder) NewDecimal256Array() *Decimal256 { return b.NewDecimalArray() } type baseDecimalBuilder[T interface { decimal.DecimalTypes decimal.Num[T] }] struct { builder traits decimal.Traits[T] dtype arrow.DecimalType data *memory.Buffer rawData []T } func newDecimalBuilder[T interface { decimal.DecimalTypes decimal.Num[T] }, DT arrow.DecimalType](mem memory.Allocator, dtype DT) *baseDecimalBuilder[T] { return &baseDecimalBuilder[T]{ builder: builder{refCount: 1, mem: mem}, dtype: dtype, } } func (b *baseDecimalBuilder[T]) Type() arrow.DataType { return b.dtype } func (b *baseDecimalBuilder[T]) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.data != nil { b.data.Release() b.data, b.rawData = nil, nil } } } func (b *baseDecimalBuilder[T]) Append(v T) { b.Reserve(1) b.UnsafeAppend(v) } func (b *baseDecimalBuilder[T]) UnsafeAppend(v T) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v b.length++ } func (b *baseDecimalBuilder[T]) AppendNull() { b.Reserve(1) b.UnsafeAppendBoolToBitmap(false) } func (b *baseDecimalBuilder[T]) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *baseDecimalBuilder[T]) AppendEmptyValue() { var empty T b.Append(empty) } func (b *baseDecimalBuilder[T]) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *baseDecimalBuilder[T]) UnsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } func (b *baseDecimalBuilder[T]) AppendValues(v []T, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) if len(v) > 0 { copy(b.rawData[b.length:], v) } b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *baseDecimalBuilder[T]) init(capacity int) { b.builder.init(capacity) b.data = memory.NewResizableBuffer(b.mem) bytesN := int(reflect.TypeFor[T]().Size()) * capacity b.data.Resize(bytesN) b.rawData = arrow.GetData[T](b.data.Bytes()) } func (b *baseDecimalBuilder[T]) Reserve(n int) { b.builder.reserve(n, b.Resize) } func (b *baseDecimalBuilder[T]) Resize(n int) { nBuilder := n if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(nBuilder, b.init) b.data.Resize(b.traits.BytesRequired(n)) b.rawData = arrow.GetData[T](b.data.Bytes()) } } func (b *baseDecimalBuilder[T]) NewDecimalArray() (a *baseDecimal[T]) { data := b.newData() a = newDecimalData[T](data) data.Release() return } func (b *baseDecimalBuilder[T]) NewArray() arrow.Array { return b.NewDecimalArray() } func (b *baseDecimalBuilder[T]) newData() (data *Data) { bytesRequired := b.traits.BytesRequired(b.length) if bytesRequired > 0 && bytesRequired < b.data.Len() { // trim buffers b.data.Resize(bytesRequired) } data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) b.reset() if b.data != nil { b.data.Release() b.data, b.rawData = nil, nil } return } func (b *baseDecimalBuilder[T]) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } val, err := b.traits.FromString(s, b.dtype.GetPrecision(), b.dtype.GetScale()) if err != nil { b.AppendNull() return err } b.Append(val) return nil } func (b *baseDecimalBuilder[T]) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } var token T switch v := t.(type) { case float64: token, err = b.traits.FromFloat64(v, b.dtype.GetPrecision(), b.dtype.GetScale()) if err != nil { return err } b.Append(token) case string: token, err = b.traits.FromString(v, b.dtype.GetPrecision(), b.dtype.GetScale()) if err != nil { return err } b.Append(token) case json.Number: token, err = b.traits.FromString(v.String(), b.dtype.GetPrecision(), b.dtype.GetScale()) if err != nil { return err } b.Append(token) case nil: b.AppendNull() default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Type: reflect.TypeFor[T](), Offset: dec.InputOffset(), } } return nil } func (b *baseDecimalBuilder[T]) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *baseDecimalBuilder[T]) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("decimal builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } func NewDecimal32Builder(mem memory.Allocator, dtype *arrow.Decimal32Type) *Decimal32Builder { b := newDecimalBuilder[decimal.Decimal32](mem, dtype) b.traits = decimal.Dec32Traits return b } func NewDecimal64Builder(mem memory.Allocator, dtype *arrow.Decimal64Type) *Decimal64Builder { b := newDecimalBuilder[decimal.Decimal64](mem, dtype) b.traits = decimal.Dec64Traits return b } func NewDecimal128Builder(mem memory.Allocator, dtype *arrow.Decimal128Type) *Decimal128Builder { b := newDecimalBuilder[decimal.Decimal128](mem, dtype) b.traits = decimal.Dec128Traits return &Decimal128Builder{b} } func NewDecimal256Builder(mem memory.Allocator, dtype *arrow.Decimal256Type) *Decimal256Builder { b := newDecimalBuilder[decimal.Decimal256](mem, dtype) b.traits = decimal.Dec256Traits return &Decimal256Builder{b} } var ( _ arrow.Array = (*Decimal32)(nil) _ arrow.Array = (*Decimal64)(nil) _ arrow.Array = (*Decimal128)(nil) _ arrow.Array = (*Decimal256)(nil) _ Builder = (*Decimal32Builder)(nil) _ Builder = (*Decimal64Builder)(nil) _ Builder = (*Decimal128Builder)(nil) _ Builder = (*Decimal256Builder)(nil) ) arrow-go-18.2.0/arrow/array/decimal128_test.go000066400000000000000000000171071476434502500210460ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/decimal128" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) func TestNewDecimal128Builder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewDecimal128Builder(mem, &arrow.Decimal128Type{Precision: 10, Scale: 1}) defer ab.Release() ab.Retain() ab.Release() want := []decimal128.Num{ decimal128.New(1, 1), decimal128.New(2, 2), decimal128.New(3, 3), {}, decimal128.FromI64(-5), decimal128.FromI64(-6), {}, decimal128.FromI64(8), decimal128.FromI64(9), decimal128.FromI64(10), } valids := []bool{true, true, true, false, true, true, false, true, true, true} for i, valid := range valids { switch { case valid: ab.Append(want[i]) default: ab.AppendNull() } } // check state of builder before NewDecimal128Array assert.Equal(t, 10, ab.Len(), "unexpected Len()") assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") a := ab.NewArray().(*array.Decimal128) a.Retain() a.Release() // check state of builder after NewDecimal128Array assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewDecimal128Array did not reset state") assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewDecimal128Array did not reset state") assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewDecimal128Array did not reset state") // check state of array assert.Equal(t, 2, a.NullN(), "unexpected null count") assert.Equal(t, want, a.Values(), "unexpected Decimal128Values") assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity assert.Equal(t, 4, a.Data().Buffers()[0].Len(), "should be 4 bytes due to minBuilderCapacity") assert.Len(t, a.Values(), 10, "unexpected length of Decimal128Values") assert.Equal(t, 10*arrow.Decimal128SizeBytes, a.Data().Buffers()[1].Len()) a.Release() ab.Append(decimal128.FromI64(7)) ab.Append(decimal128.FromI64(8)) a = ab.NewDecimal128Array() assert.Equal(t, 0, a.NullN()) assert.Equal(t, []decimal128.Num{decimal128.FromI64(7), decimal128.FromI64(8)}, a.Values()) assert.Len(t, a.Values(), 2) assert.Equal(t, 2*arrow.Decimal128SizeBytes, a.Data().Buffers()[1].Len()) a.Release() } func TestDecimal128Builder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewDecimal128Builder(mem, &arrow.Decimal128Type{Precision: 10, Scale: 1}) defer ab.Release() want := []decimal128.Num{decimal128.FromI64(3), decimal128.FromI64(4)} ab.AppendValues([]decimal128.Num{}, nil) a := ab.NewDecimal128Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(nil, nil) a = ab.NewDecimal128Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(want, nil) a = ab.NewDecimal128Array() assert.Equal(t, want, a.Values()) a.Release() ab.AppendValues([]decimal128.Num{}, nil) ab.AppendValues(want, nil) a = ab.NewDecimal128Array() assert.Equal(t, want, a.Values()) a.Release() ab.AppendValues(want, nil) ab.AppendValues([]decimal128.Num{}, nil) a = ab.NewDecimal128Array() assert.Equal(t, want, a.Values()) a.Release() } func TestDecimal128Slice(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dtype := &arrow.Decimal128Type{Precision: 10, Scale: 1} b := array.NewDecimal128Builder(mem, dtype) defer b.Release() var data = []decimal128.Num{ decimal128.FromI64(-1), decimal128.FromI64(+0), decimal128.FromI64(+1), decimal128.New(-4, 4), } b.AppendValues(data[:2], nil) b.AppendNull() b.Append(data[3]) arr := b.NewDecimal128Array() defer arr.Release() if got, want := arr.Len(), len(data); got != want { t.Fatalf("invalid array length: got=%d, want=%d", got, want) } slice := array.NewSliceData(arr.Data(), 2, 4) defer slice.Release() sub1 := array.MakeFromData(slice) defer sub1.Release() v, ok := sub1.(*array.Decimal128) if !ok { t.Fatalf("could not type-assert to array.String") } if got, want := v.String(), `[(null) {4 -4}]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } assert.Equal(t, array.NullValueStr, v.ValueStr(0)) assert.Equal(t, "-7.378697629e+18", v.ValueStr(1)) if got, want := v.NullN(), 1; got != want { t.Fatalf("got=%q, want=%q", got, want) } if got, want := v.Data().Offset(), 2; got != want { t.Fatalf("invalid offset: got=%d, want=%d", got, want) } } func TestDecimal128StringRoundTrip(t *testing.T) { dt := &arrow.Decimal128Type{Precision: 20, Scale: 5} // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) b := array.NewDecimal128Builder(mem, dt) defer b.Release() values := []decimal128.Num{ decimal128.New(1, 1), decimal128.New(1, 2), decimal128.New(1, 3), {}, decimal128.FromI64(-5), decimal128.FromI64(-6), {}, decimal128.FromI64(8), decimal128.FromI64(9), decimal128.FromI64(10), } val1, err := decimal128.FromString("0.99", dt.Precision, dt.Scale) if err != nil { t.Fatal(err) } val2, err := decimal128.FromString("1234567890.12345", dt.Precision, dt.Scale) if err != nil { t.Fatal(err) } values = append(values, val1, val2) valid := []bool{true, true, true, false, true, true, false, true, true, true, true, true} b.AppendValues(values, valid) arr := b.NewArray().(*array.Decimal128) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewDecimal128Builder(mem, dt) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.Decimal128) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } func TestDecimal128GetOneForMarshal(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dtype := &arrow.Decimal128Type{Precision: 38, Scale: 20} b := array.NewDecimal128Builder(mem, dtype) defer b.Release() cases := []struct { give any want any }{ {"1", "1"}, {"1.25", "1.25"}, {"0.99", "0.99"}, {"1234567890.123456789", "1234567890.123456789"}, {nil, nil}, {"-0.99", "-0.99"}, {"-1234567890.123456789", "-1234567890.123456789"}, {"0.0000000000000000001", "1e-19"}, } for _, v := range cases { if v.give == nil { b.AppendNull() continue } dt, err := decimal128.FromString(v.give.(string), dtype.Precision, dtype.Scale) if err != nil { t.Fatal(err) } b.Append(dt) } arr := b.NewDecimal128Array() defer arr.Release() if got, want := arr.Len(), len(cases); got != want { t.Fatalf("invalid array length: got=%d, want=%d", got, want) } for i := range cases { assert.Equalf(t, cases[i].want, arr.GetOneForMarshal(i), "unexpected value at index %d", i) } } arrow-go-18.2.0/arrow/array/decimal256_test.go000066400000000000000000000177041476434502500210530ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/decimal256" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) func TestNewDecimal256Builder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewDecimal256Builder(mem, &arrow.Decimal256Type{Precision: 10, Scale: 1}) defer ab.Release() ab.Retain() ab.Release() want := []decimal256.Num{ decimal256.New(1, 1, 1, 1), decimal256.New(2, 2, 2, 2), decimal256.New(3, 3, 3, 3), {}, decimal256.FromI64(-5), decimal256.FromI64(-6), {}, decimal256.FromI64(8), decimal256.FromI64(9), decimal256.FromI64(10), } valids := []bool{true, true, true, false, true, true, false, true, true, true} for i, valid := range valids { switch { case valid: ab.Append(want[i]) default: ab.AppendNull() } } // check state of builder before NewDecimal256Array assert.Equal(t, 10, ab.Len(), "unexpected Len()") assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") a := ab.NewArray().(*array.Decimal256) a.Retain() a.Release() // check state of builder after NewDecimal256Array assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewDecimal256Array did not reset state") assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewDecimal256Array did not reset state") assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewDecimal256Array did not reset state") // check state of array assert.Equal(t, 2, a.NullN(), "unexpected null count") assert.Equal(t, want, a.Values(), "unexpected Decimal256Values") assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity assert.Equal(t, 4, a.Data().Buffers()[0].Len(), "should be 4 bytes due to minBuilderCapacity") assert.Len(t, a.Values(), 10, "unexpected length of Decimal256Values") assert.Equal(t, 10*arrow.Decimal256SizeBytes, a.Data().Buffers()[1].Len()) a.Release() ab.Append(decimal256.FromI64(7)) ab.Append(decimal256.FromI64(8)) a = ab.NewDecimal256Array() assert.Equal(t, 0, a.NullN()) assert.Equal(t, 4, a.Data().Buffers()[0].Len(), "should be 4 bytes due to minBuilderCapacity") assert.Equal(t, []decimal256.Num{decimal256.FromI64(7), decimal256.FromI64(8)}, a.Values()) assert.Len(t, a.Values(), 2) assert.Equal(t, 2*arrow.Decimal256SizeBytes, a.Data().Buffers()[1].Len()) a.Release() } func TestDecimal256Builder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewDecimal256Builder(mem, &arrow.Decimal256Type{Precision: 10, Scale: 1}) defer ab.Release() want := []decimal256.Num{decimal256.FromI64(3), decimal256.FromI64(4)} ab.AppendValues([]decimal256.Num{}, nil) a := ab.NewDecimal256Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(nil, nil) a = ab.NewDecimal256Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(want, nil) a = ab.NewDecimal256Array() assert.Equal(t, want, a.Values()) a.Release() ab.AppendValues([]decimal256.Num{}, nil) ab.AppendValues(want, nil) a = ab.NewDecimal256Array() assert.Equal(t, want, a.Values()) a.Release() ab.AppendValues(want, nil) ab.AppendValues([]decimal256.Num{}, nil) a = ab.NewDecimal256Array() assert.Equal(t, want, a.Values()) a.Release() } func TestDecimal256Slice(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dtype := &arrow.Decimal256Type{Precision: 10, Scale: 1} b := array.NewDecimal256Builder(mem, dtype) defer b.Release() var data = []decimal256.Num{ decimal256.FromI64(-1), decimal256.FromI64(+0), decimal256.FromI64(+1), decimal256.New(4, 4, 4, 4), } b.AppendValues(data[:2], nil) b.AppendNull() b.Append(data[3]) arr := b.NewDecimal256Array() defer arr.Release() if got, want := arr.Len(), len(data); got != want { t.Fatalf("invalid array length: got=%d, want=%d", got, want) } slice := array.NewSliceData(arr.Data(), 2, 4) defer slice.Release() sub1 := array.MakeFromData(slice) defer sub1.Release() v, ok := sub1.(*array.Decimal256) if !ok { t.Fatalf("could not type-assert to array.String") } if got, want := v.String(), `[(null) {[4 4 4 4]}]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } assert.Equal(t, array.NullValueStr, v.ValueStr(0)) assert.Equal(t, "2.510840694e+57", v.ValueStr(1)) if got, want := v.NullN(), 1; got != want { t.Fatalf("got=%q, want=%q", got, want) } if got, want := v.Data().Offset(), 2; got != want { t.Fatalf("invalid offset: got=%d, want=%d", got, want) } } func TestDecimal256StringRoundTrip(t *testing.T) { dt := &arrow.Decimal256Type{Precision: 70, Scale: 10} // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) b := array.NewDecimal256Builder(mem, dt) defer b.Release() values := []decimal256.Num{ decimal256.New(1, 1, 1, 1), decimal256.New(2, 2, 2, 2), decimal256.New(3, 3, 3, 3), {}, decimal256.FromI64(-5), decimal256.FromI64(-6), {}, decimal256.FromI64(8), decimal256.FromI64(9), decimal256.FromI64(10), } val1, err := decimal256.FromString("0.99", dt.Precision, dt.Scale) if err != nil { t.Fatal(err) } val2, err := decimal256.FromString("1234567890.123456789", dt.Precision, dt.Scale) if err != nil { t.Fatal(err) } values = append(values, val1, val2) valid := []bool{true, true, true, false, true, true, false, true, true, true, true, true} b.AppendValues(values, valid) arr := b.NewArray().(*array.Decimal256) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewDecimal256Builder(mem, dt) defer b1.Release() for i := 0; i < arr.Len(); i++ { v := arr.ValueStr(i) assert.NoError(t, b1.AppendValueFromString(v)) } arr1 := b1.NewArray().(*array.Decimal256) defer arr1.Release() for i := 0; i < arr.Len(); i++ { if arr.IsNull(i) && arr1.IsNull(i) { continue } if arr.Value(i) != arr1.Value(i) { t.Fatalf("unexpected value at index %d: got=%v, want=%v", i, arr1.Value(i), arr.Value(i)) } } assert.True(t, array.Equal(arr, arr1)) } func TestDecimal256GetOneForMarshal(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dtype := &arrow.Decimal256Type{Precision: 38, Scale: 20} b := array.NewDecimal256Builder(mem, dtype) defer b.Release() cases := []struct { give any want any }{ {"1", "1"}, {"1.25", "1.25"}, {"0.99", "0.99"}, {"1234567890.123456789", "1234567890.123456789"}, {nil, nil}, {"-0.99", "-0.99"}, {"-1234567890.123456789", "-1234567890.123456789"}, {"0.0000000000000000001", "1e-19"}, } for _, v := range cases { if v.give == nil { b.AppendNull() continue } dt, err := decimal256.FromString(v.give.(string), dtype.Precision, dtype.Scale) if err != nil { t.Fatal(err) } b.Append(dt) } arr := b.NewDecimal256Array() defer arr.Release() if got, want := arr.Len(), len(cases); got != want { t.Fatalf("invalid array length: got=%d, want=%d", got, want) } for i := range cases { assert.Equalf(t, cases[i].want, arr.GetOneForMarshal(i), "unexpected value at index %d", i) } } arrow-go-18.2.0/arrow/array/decimal_test.go000066400000000000000000000147541476434502500206200ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "fmt" "math/big" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/decimal128" "github.com/apache/arrow-go/v18/arrow/decimal256" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/suite" ) type decimalValue interface{} func bitmapFromSlice(vals []bool) []byte { out := make([]byte, int(bitutil.BytesForBits(int64(len(vals))))) writer := bitutil.NewBitmapWriter(out, 0, len(vals)) for _, val := range vals { if val { writer.Set() } else { writer.Clear() } writer.Next() } writer.Finish() return out } type DecimalTestSuite struct { suite.Suite dt arrow.DataType mem *memory.CheckedAllocator } func (d *DecimalTestSuite) SetupTest() { d.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) } func (d *DecimalTestSuite) TearDownTest() { d.mem.AssertSize(d.T(), 0) } func (d *DecimalTestSuite) makeData(input []decimalValue, out []byte) { switch d.dt.ID() { case arrow.DECIMAL128: for _, v := range input { arrow.Decimal128Traits.PutValue(out, v.(decimal128.Num)) out = out[arrow.Decimal128SizeBytes:] } case arrow.DECIMAL256: for _, v := range input { arrow.Decimal256Traits.PutValue(out, v.(decimal256.Num)) out = out[arrow.Decimal256SizeBytes:] } } } func (d *DecimalTestSuite) testCreate(bitWidth int, prec int32, draw []decimalValue, valids []bool, offset int64) arrow.Array { switch bitWidth { case 128: d.dt = &arrow.Decimal128Type{Precision: prec, Scale: 4} case 256: d.dt = &arrow.Decimal256Type{Precision: prec, Scale: 4} } bldr := array.NewBuilder(d.mem, d.dt) defer bldr.Release() bldr.Reserve(len(draw)) nullCount := 0 for i, b := range valids { if b { switch v := draw[i].(type) { case decimal128.Num: bldr.(*array.Decimal128Builder).Append(v) case decimal256.Num: bldr.(*array.Decimal256Builder).Append(v) } } else { bldr.AppendNull() nullCount++ } } arr := bldr.NewArray() d.EqualValues(0, bldr.Len()) rawBytes := make([]byte, len(draw)*(d.dt.(arrow.FixedWidthDataType).BitWidth()/8)) d.makeData(draw, rawBytes) expectedData := memory.NewBufferBytes(rawBytes) expectedNullBitmap := bitmapFromSlice(valids) expectedNullCount := len(draw) - bitutil.CountSetBits(expectedNullBitmap, 0, len(valids)) expected := array.NewData(d.dt, len(valids), []*memory.Buffer{memory.NewBufferBytes(expectedNullBitmap), expectedData}, nil, expectedNullCount, 0) defer expected.Release() expectedArr := array.MakeFromData(expected) defer expectedArr.Release() lhs := array.NewSlice(arr, offset, int64(arr.Len())-offset) rhs := array.NewSlice(expectedArr, offset, int64(expectedArr.Len())-offset) defer func() { lhs.Release() rhs.Release() }() d.Truef(array.Equal(lhs, rhs), "expected: %s, got: %s\n", rhs, lhs) return arr } type Decimal128TestSuite struct { DecimalTestSuite } func (d *Decimal128TestSuite) runTest(f func(prec int32)) { for prec := int32(1); prec <= 38; prec++ { d.Run(fmt.Sprintf("prec=%d", prec), func() { f(prec) }) } } func (d *Decimal128TestSuite) TestNoNulls() { d.runTest(func(prec int32) { draw := []decimalValue{decimal128.FromU64(1), decimal128.FromI64(-2), decimal128.FromU64(2389), decimal128.FromU64(4), decimal128.FromI64(-12348)} valids := []bool{true, true, true, true, true} arr := d.testCreate(128, prec, draw, valids, 0) arr.Release() arr = d.testCreate(128, prec, draw, valids, 2) arr.Release() }) } func (d *Decimal128TestSuite) TestWithNulls() { d.runTest(func(prec int32) { draw := []decimalValue{decimal128.FromU64(1), decimal128.FromU64(2), decimal128.FromI64(-1), decimal128.FromI64(4), decimal128.FromI64(-1), decimal128.FromI64(1), decimal128.FromI64(2)} bigVal, _ := (&big.Int{}).SetString("230342903942234234", 10) draw = append(draw, decimal128.FromBigInt(bigVal)) bigNeg, _ := (&big.Int{}).SetString("-23049302932235234", 10) draw = append(draw, decimal128.FromBigInt(bigNeg)) valids := []bool{true, true, false, true, false, true, true, true, true} arr := d.testCreate(128, prec, draw, valids, 0) arr.Release() arr = d.testCreate(128, prec, draw, valids, 2) arr.Release() }) } type Decimal256TestSuite struct { DecimalTestSuite } func (d *Decimal256TestSuite) runTest(f func(prec int32)) { for _, prec := range []int32{1, 2, 5, 10, 38, 39, 40, 75, 76} { d.Run(fmt.Sprintf("prec=%d", prec), func() { f(prec) }) } } func (d *Decimal256TestSuite) TestNoNulls() { d.runTest(func(prec int32) { draw := []decimalValue{decimal256.FromU64(1), decimal256.FromI64(-2), decimal256.FromU64(2389), decimal256.FromU64(4), decimal256.FromI64(-12348)} valids := []bool{true, true, true, true, true} arr := d.testCreate(256, prec, draw, valids, 0) arr.Release() arr = d.testCreate(256, prec, draw, valids, 2) arr.Release() }) } func (d *Decimal256TestSuite) TestWithNulls() { d.runTest(func(prec int32) { draw := []decimalValue{decimal256.FromU64(1), decimal256.FromU64(2), decimal256.FromI64(-1), decimal256.FromI64(4), decimal256.FromI64(-1), decimal256.FromI64(1), decimal256.FromI64(2)} // (pow(2, 255) - 1) bigVal, _ := (&big.Int{}).SetString("57896044618658097711785492504343953926634992332820282019728792003956564819967", 10) draw = append(draw, decimal256.FromBigInt(bigVal)) draw = append(draw, decimal256.FromBigInt(bigVal.Neg(bigVal))) valids := []bool{true, true, false, true, false, true, true, true, true} arr := d.testCreate(256, prec, draw, valids, 0) arr.Release() arr = d.testCreate(256, prec, draw, valids, 2) arr.Release() }) } func TestDecimal(t *testing.T) { suite.Run(t, new(Decimal128TestSuite)) suite.Run(t, new(Decimal256TestSuite)) } arrow-go-18.2.0/arrow/array/dictionary.go000066400000000000000000001605661476434502500203330ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "bytes" "errors" "fmt" "math" "math/bits" "sync/atomic" "unsafe" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/decimal" "github.com/apache/arrow-go/v18/arrow/decimal128" "github.com/apache/arrow-go/v18/arrow/decimal256" "github.com/apache/arrow-go/v18/arrow/float16" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/hashing" "github.com/apache/arrow-go/v18/internal/json" "github.com/apache/arrow-go/v18/internal/utils" ) // Dictionary represents the type for dictionary-encoded data with a data // dependent dictionary. // // A dictionary array contains an array of non-negative integers (the "dictionary" // indices") along with a data type containing a "dictionary" corresponding to // the distinct values represented in the data. // // For example, the array: // // ["foo", "bar", "foo", "bar", "foo", "bar"] // // with dictionary ["bar", "foo"], would have the representation of: // // indices: [1, 0, 1, 0, 1, 0] // dictionary: ["bar", "foo"] // // The indices in principle may be any integer type. type Dictionary struct { array indices arrow.Array dict arrow.Array } // NewDictionaryArray constructs a dictionary array with the provided indices // and dictionary using the given type. func NewDictionaryArray(typ arrow.DataType, indices, dict arrow.Array) *Dictionary { a := &Dictionary{} a.array.refCount = 1 dictdata := NewData(typ, indices.Len(), indices.Data().Buffers(), indices.Data().Children(), indices.NullN(), indices.Data().Offset()) dictdata.dictionary = dict.Data().(*Data) dict.Data().Retain() defer dictdata.Release() a.setData(dictdata) return a } // checkIndexBounds returns an error if any value in the provided integer // arraydata is >= the passed upperlimit or < 0. otherwise nil func checkIndexBounds(indices *Data, upperlimit uint64) error { if indices.length == 0 { return nil } var maxval uint64 switch indices.dtype.ID() { case arrow.UINT8: maxval = math.MaxUint8 case arrow.UINT16: maxval = math.MaxUint16 case arrow.UINT32: maxval = math.MaxUint32 case arrow.UINT64: maxval = math.MaxUint64 } // for unsigned integers, if the values array is larger than the maximum // index value (especially for UINT8/UINT16), then there's no need to // boundscheck. for signed integers we still need to bounds check // because a value could be < 0. isSigned := maxval == 0 if !isSigned && upperlimit > maxval { return nil } start := indices.offset end := indices.offset + indices.length // TODO(ARROW-15950): lift BitSetRunReader from parquet to utils // and use it here for performance improvement. switch indices.dtype.ID() { case arrow.INT8: data := arrow.Int8Traits.CastFromBytes(indices.buffers[1].Bytes()) min, max := utils.GetMinMaxInt8(data[start:end]) if min < 0 || max >= int8(upperlimit) { return fmt.Errorf("contains out of bounds index: min: %d, max: %d", min, max) } case arrow.UINT8: data := arrow.Uint8Traits.CastFromBytes(indices.buffers[1].Bytes()) _, max := utils.GetMinMaxUint8(data[start:end]) if max >= uint8(upperlimit) { return fmt.Errorf("contains out of bounds index: max: %d", max) } case arrow.INT16: data := arrow.Int16Traits.CastFromBytes(indices.buffers[1].Bytes()) min, max := utils.GetMinMaxInt16(data[start:end]) if min < 0 || max >= int16(upperlimit) { return fmt.Errorf("contains out of bounds index: min: %d, max: %d", min, max) } case arrow.UINT16: data := arrow.Uint16Traits.CastFromBytes(indices.buffers[1].Bytes()) _, max := utils.GetMinMaxUint16(data[start:end]) if max >= uint16(upperlimit) { return fmt.Errorf("contains out of bounds index: max: %d", max) } case arrow.INT32: data := arrow.Int32Traits.CastFromBytes(indices.buffers[1].Bytes()) min, max := utils.GetMinMaxInt32(data[start:end]) if min < 0 || max >= int32(upperlimit) { return fmt.Errorf("contains out of bounds index: min: %d, max: %d", min, max) } case arrow.UINT32: data := arrow.Uint32Traits.CastFromBytes(indices.buffers[1].Bytes()) _, max := utils.GetMinMaxUint32(data[start:end]) if max >= uint32(upperlimit) { return fmt.Errorf("contains out of bounds index: max: %d", max) } case arrow.INT64: data := arrow.Int64Traits.CastFromBytes(indices.buffers[1].Bytes()) min, max := utils.GetMinMaxInt64(data[start:end]) if min < 0 || max >= int64(upperlimit) { return fmt.Errorf("contains out of bounds index: min: %d, max: %d", min, max) } case arrow.UINT64: data := arrow.Uint64Traits.CastFromBytes(indices.buffers[1].Bytes()) _, max := utils.GetMinMaxUint64(data[indices.offset : indices.offset+indices.length]) if max >= upperlimit { return fmt.Errorf("contains out of bounds value: max: %d", max) } default: return fmt.Errorf("invalid type for bounds checking: %T", indices.dtype) } return nil } // NewValidatedDictionaryArray constructs a dictionary array from the provided indices // and dictionary arrays, while also performing validation checks to ensure correctness // such as bounds checking at are usually skipped for performance. func NewValidatedDictionaryArray(typ *arrow.DictionaryType, indices, dict arrow.Array) (*Dictionary, error) { if indices.DataType().ID() != typ.IndexType.ID() { return nil, fmt.Errorf("dictionary type index (%T) does not match indices array type (%T)", typ.IndexType, indices.DataType()) } if !arrow.TypeEqual(typ.ValueType, dict.DataType()) { return nil, fmt.Errorf("dictionary value type (%T) does not match dict array type (%T)", typ.ValueType, dict.DataType()) } if err := checkIndexBounds(indices.Data().(*Data), uint64(dict.Len())); err != nil { return nil, err } return NewDictionaryArray(typ, indices, dict), nil } // NewDictionaryData creates a strongly typed Dictionary array from // an ArrayData object with a datatype of arrow.Dictionary and a dictionary func NewDictionaryData(data arrow.ArrayData) *Dictionary { a := &Dictionary{} a.refCount = 1 a.setData(data.(*Data)) return a } func (d *Dictionary) Retain() { atomic.AddInt64(&d.refCount, 1) } func (d *Dictionary) Release() { debug.Assert(atomic.LoadInt64(&d.refCount) > 0, "too many releases") if atomic.AddInt64(&d.refCount, -1) == 0 { d.data.Release() d.data, d.nullBitmapBytes = nil, nil d.indices.Release() d.indices = nil if d.dict != nil { d.dict.Release() d.dict = nil } } } func (d *Dictionary) setData(data *Data) { d.array.setData(data) dictType := data.dtype.(*arrow.DictionaryType) if data.dictionary == nil { if data.length > 0 { panic("arrow/array: no dictionary set in Data for Dictionary array") } } else { debug.Assert(arrow.TypeEqual(dictType.ValueType, data.dictionary.DataType()), "mismatched dictionary value types") } indexData := NewData(dictType.IndexType, data.length, data.buffers, data.childData, data.nulls, data.offset) defer indexData.Release() d.indices = MakeFromData(indexData) } // Dictionary returns the values array that makes up the dictionary for this // array. func (d *Dictionary) Dictionary() arrow.Array { if d.dict == nil { d.dict = MakeFromData(d.data.dictionary) } return d.dict } // Indices returns the underlying array of indices as it's own array func (d *Dictionary) Indices() arrow.Array { return d.indices } // CanCompareIndices returns true if the dictionary arrays can be compared // without having to unify the dictionaries themselves first. // This means that the index types are equal too. func (d *Dictionary) CanCompareIndices(other *Dictionary) bool { if !arrow.TypeEqual(d.indices.DataType(), other.indices.DataType()) { return false } minlen := int64(min(d.data.dictionary.length, other.data.dictionary.length)) return SliceEqual(d.Dictionary(), 0, minlen, other.Dictionary(), 0, minlen) } func (d *Dictionary) ValueStr(i int) string { if d.IsNull(i) { return NullValueStr } return d.Dictionary().ValueStr(d.GetValueIndex(i)) } func (d *Dictionary) String() string { return fmt.Sprintf("{ dictionary: %v\n indices: %v }", d.Dictionary(), d.Indices()) } // GetValueIndex returns the dictionary index for the value at index i of the array. // The actual value can be retrieved by using d.Dictionary().(valuetype).Value(d.GetValueIndex(i)) func (d *Dictionary) GetValueIndex(i int) int { indiceData := d.data.buffers[1].Bytes() // we know the value is non-negative per the spec, so // we can use the unsigned value regardless. switch d.indices.DataType().ID() { case arrow.UINT8, arrow.INT8: return int(uint8(indiceData[d.data.offset+i])) case arrow.UINT16, arrow.INT16: return int(arrow.Uint16Traits.CastFromBytes(indiceData)[d.data.offset+i]) case arrow.UINT32, arrow.INT32: idx := arrow.Uint32Traits.CastFromBytes(indiceData)[d.data.offset+i] debug.Assert(bits.UintSize == 64 || idx <= math.MaxInt32, "arrow/dictionary: truncation of index value") return int(idx) case arrow.UINT64, arrow.INT64: idx := arrow.Uint64Traits.CastFromBytes(indiceData)[d.data.offset+i] debug.Assert((bits.UintSize == 32 && idx <= math.MaxInt32) || (bits.UintSize == 64 && idx <= math.MaxInt64), "arrow/dictionary: truncation of index value") return int(idx) } debug.Assert(false, "unreachable dictionary index") return -1 } func (d *Dictionary) GetOneForMarshal(i int) interface{} { if d.IsNull(i) { return nil } vidx := d.GetValueIndex(i) return d.Dictionary().GetOneForMarshal(vidx) } func (d *Dictionary) MarshalJSON() ([]byte, error) { vals := make([]interface{}, d.Len()) for i := 0; i < d.Len(); i++ { vals[i] = d.GetOneForMarshal(i) } return json.Marshal(vals) } func arrayEqualDict(l, r *Dictionary) bool { return Equal(l.Dictionary(), r.Dictionary()) && Equal(l.indices, r.indices) } func arrayApproxEqualDict(l, r *Dictionary, opt equalOption) bool { return arrayApproxEqual(l.Dictionary(), r.Dictionary(), opt) && arrayApproxEqual(l.indices, r.indices, opt) } // helper for building the properly typed indices of the dictionary builder type IndexBuilder struct { Builder Append func(int) } func createIndexBuilder(mem memory.Allocator, dt arrow.FixedWidthDataType) (ret IndexBuilder, err error) { ret = IndexBuilder{Builder: NewBuilder(mem, dt)} switch dt.ID() { case arrow.INT8: ret.Append = func(idx int) { ret.Builder.(*Int8Builder).Append(int8(idx)) } case arrow.UINT8: ret.Append = func(idx int) { ret.Builder.(*Uint8Builder).Append(uint8(idx)) } case arrow.INT16: ret.Append = func(idx int) { ret.Builder.(*Int16Builder).Append(int16(idx)) } case arrow.UINT16: ret.Append = func(idx int) { ret.Builder.(*Uint16Builder).Append(uint16(idx)) } case arrow.INT32: ret.Append = func(idx int) { ret.Builder.(*Int32Builder).Append(int32(idx)) } case arrow.UINT32: ret.Append = func(idx int) { ret.Builder.(*Uint32Builder).Append(uint32(idx)) } case arrow.INT64: ret.Append = func(idx int) { ret.Builder.(*Int64Builder).Append(int64(idx)) } case arrow.UINT64: ret.Append = func(idx int) { ret.Builder.(*Uint64Builder).Append(uint64(idx)) } default: debug.Assert(false, "dictionary index type must be integral") err = fmt.Errorf("dictionary index type must be integral, not %s", dt) } return } // helper function to construct an appropriately typed memo table based on // the value type for the dictionary func createMemoTable(mem memory.Allocator, dt arrow.DataType) (ret hashing.MemoTable, err error) { switch dt.ID() { case arrow.INT8: ret = hashing.NewInt8MemoTable(0) case arrow.UINT8: ret = hashing.NewUint8MemoTable(0) case arrow.INT16: ret = hashing.NewInt16MemoTable(0) case arrow.UINT16: ret = hashing.NewUint16MemoTable(0) case arrow.INT32: ret = hashing.NewInt32MemoTable(0) case arrow.UINT32: ret = hashing.NewUint32MemoTable(0) case arrow.INT64: ret = hashing.NewInt64MemoTable(0) case arrow.UINT64: ret = hashing.NewUint64MemoTable(0) case arrow.DURATION, arrow.TIMESTAMP, arrow.DATE64, arrow.TIME64: ret = hashing.NewInt64MemoTable(0) case arrow.TIME32, arrow.DATE32, arrow.INTERVAL_MONTHS: ret = hashing.NewInt32MemoTable(0) case arrow.FLOAT16: ret = hashing.NewUint16MemoTable(0) case arrow.FLOAT32: ret = hashing.NewFloat32MemoTable(0) case arrow.FLOAT64: ret = hashing.NewFloat64MemoTable(0) case arrow.BINARY, arrow.FIXED_SIZE_BINARY, arrow.DECIMAL32, arrow.DECIMAL64, arrow.DECIMAL128, arrow.DECIMAL256, arrow.INTERVAL_DAY_TIME, arrow.INTERVAL_MONTH_DAY_NANO: ret = hashing.NewBinaryMemoTable(0, 0, NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)) case arrow.STRING: ret = hashing.NewBinaryMemoTable(0, 0, NewBinaryBuilder(mem, arrow.BinaryTypes.String)) case arrow.NULL: default: err = fmt.Errorf("unimplemented dictionary value type, %s", dt) } return } type DictionaryBuilder interface { Builder NewDictionaryArray() *Dictionary NewDelta() (indices, delta arrow.Array, err error) AppendArray(arrow.Array) error AppendIndices([]int, []bool) ResetFull() DictionarySize() int } type dictionaryBuilder struct { builder dt *arrow.DictionaryType deltaOffset int memoTable hashing.MemoTable idxBuilder IndexBuilder } // NewDictionaryBuilderWithDict initializes a dictionary builder and inserts the values from `init` as the first // values in the dictionary, but does not insert them as values into the array. func NewDictionaryBuilderWithDict(mem memory.Allocator, dt *arrow.DictionaryType, init arrow.Array) DictionaryBuilder { if init != nil && !arrow.TypeEqual(dt.ValueType, init.DataType()) { panic(fmt.Errorf("arrow/array: cannot initialize dictionary type %T with array of type %T", dt.ValueType, init.DataType())) } idxbldr, err := createIndexBuilder(mem, dt.IndexType.(arrow.FixedWidthDataType)) if err != nil { panic(fmt.Errorf("arrow/array: unsupported builder for index type of %T", dt)) } memo, err := createMemoTable(mem, dt.ValueType) if err != nil { panic(fmt.Errorf("arrow/array: unsupported builder for value type of %T", dt)) } bldr := dictionaryBuilder{ builder: builder{refCount: 1, mem: mem}, idxBuilder: idxbldr, memoTable: memo, dt: dt, } switch dt.ValueType.ID() { case arrow.NULL: ret := &NullDictionaryBuilder{bldr} debug.Assert(init == nil, "arrow/array: doesn't make sense to init a null dictionary") return ret case arrow.UINT8: ret := &Uint8DictionaryBuilder{bldr} if init != nil { if err = ret.InsertDictValues(init.(*Uint8)); err != nil { panic(err) } } return ret case arrow.INT8: ret := &Int8DictionaryBuilder{bldr} if init != nil { if err = ret.InsertDictValues(init.(*Int8)); err != nil { panic(err) } } return ret case arrow.UINT16: ret := &Uint16DictionaryBuilder{bldr} if init != nil { if err = ret.InsertDictValues(init.(*Uint16)); err != nil { panic(err) } } return ret case arrow.INT16: ret := &Int16DictionaryBuilder{bldr} if init != nil { if err = ret.InsertDictValues(init.(*Int16)); err != nil { panic(err) } } return ret case arrow.UINT32: ret := &Uint32DictionaryBuilder{bldr} if init != nil { if err = ret.InsertDictValues(init.(*Uint32)); err != nil { panic(err) } } return ret case arrow.INT32: ret := &Int32DictionaryBuilder{bldr} if init != nil { if err = ret.InsertDictValues(init.(*Int32)); err != nil { panic(err) } } return ret case arrow.UINT64: ret := &Uint64DictionaryBuilder{bldr} if init != nil { if err = ret.InsertDictValues(init.(*Uint64)); err != nil { panic(err) } } return ret case arrow.INT64: ret := &Int64DictionaryBuilder{bldr} if init != nil { if err = ret.InsertDictValues(init.(*Int64)); err != nil { panic(err) } } return ret case arrow.FLOAT16: ret := &Float16DictionaryBuilder{bldr} if init != nil { if err = ret.InsertDictValues(init.(*Float16)); err != nil { panic(err) } } return ret case arrow.FLOAT32: ret := &Float32DictionaryBuilder{bldr} if init != nil { if err = ret.InsertDictValues(init.(*Float32)); err != nil { panic(err) } } return ret case arrow.FLOAT64: ret := &Float64DictionaryBuilder{bldr} if init != nil { if err = ret.InsertDictValues(init.(*Float64)); err != nil { panic(err) } } return ret case arrow.STRING: ret := &BinaryDictionaryBuilder{bldr} if init != nil { if err = ret.InsertStringDictValues(init.(*String)); err != nil { panic(err) } } return ret case arrow.BINARY: ret := &BinaryDictionaryBuilder{bldr} if init != nil { if err = ret.InsertDictValues(init.(*Binary)); err != nil { panic(err) } } return ret case arrow.FIXED_SIZE_BINARY: ret := &FixedSizeBinaryDictionaryBuilder{ bldr, dt.ValueType.(*arrow.FixedSizeBinaryType).ByteWidth, } if init != nil { if err = ret.InsertDictValues(init.(*FixedSizeBinary)); err != nil { panic(err) } } return ret case arrow.DATE32: ret := &Date32DictionaryBuilder{bldr} if init != nil { if err = ret.InsertDictValues(init.(*Date32)); err != nil { panic(err) } } return ret case arrow.DATE64: ret := &Date64DictionaryBuilder{bldr} if init != nil { if err = ret.InsertDictValues(init.(*Date64)); err != nil { panic(err) } } return ret case arrow.TIMESTAMP: ret := &TimestampDictionaryBuilder{bldr} if init != nil { if err = ret.InsertDictValues(init.(*Timestamp)); err != nil { panic(err) } } return ret case arrow.TIME32: ret := &Time32DictionaryBuilder{bldr} if init != nil { if err = ret.InsertDictValues(init.(*Time32)); err != nil { panic(err) } } return ret case arrow.TIME64: ret := &Time64DictionaryBuilder{bldr} if init != nil { if err = ret.InsertDictValues(init.(*Time64)); err != nil { panic(err) } } return ret case arrow.INTERVAL_MONTHS: ret := &MonthIntervalDictionaryBuilder{bldr} if init != nil { if err = ret.InsertDictValues(init.(*MonthInterval)); err != nil { panic(err) } } return ret case arrow.INTERVAL_DAY_TIME: ret := &DayTimeDictionaryBuilder{bldr} if init != nil { if err = ret.InsertDictValues(init.(*DayTimeInterval)); err != nil { panic(err) } } return ret case arrow.DECIMAL32: ret := &Decimal32DictionaryBuilder{bldr} if init != nil { if err = ret.InsertDictValues(init.(*Decimal32)); err != nil { panic(err) } } return ret case arrow.DECIMAL64: ret := &Decimal64DictionaryBuilder{bldr} if init != nil { if err = ret.InsertDictValues(init.(*Decimal64)); err != nil { panic(err) } } return ret case arrow.DECIMAL128: ret := &Decimal128DictionaryBuilder{bldr} if init != nil { if err = ret.InsertDictValues(init.(*Decimal128)); err != nil { panic(err) } } return ret case arrow.DECIMAL256: ret := &Decimal256DictionaryBuilder{bldr} if init != nil { if err = ret.InsertDictValues(init.(*Decimal256)); err != nil { panic(err) } } return ret case arrow.LIST: case arrow.STRUCT: case arrow.SPARSE_UNION: case arrow.DENSE_UNION: case arrow.DICTIONARY: case arrow.MAP: case arrow.EXTENSION: case arrow.FIXED_SIZE_LIST: case arrow.DURATION: ret := &DurationDictionaryBuilder{bldr} if init != nil { if err = ret.InsertDictValues(init.(*Duration)); err != nil { panic(err) } } return ret case arrow.LARGE_STRING: case arrow.LARGE_BINARY: case arrow.LARGE_LIST: case arrow.INTERVAL_MONTH_DAY_NANO: ret := &MonthDayNanoDictionaryBuilder{bldr} if init != nil { if err = ret.InsertDictValues(init.(*MonthDayNanoInterval)); err != nil { panic(err) } } return ret } panic("arrow/array: unimplemented dictionary key type") } func NewDictionaryBuilder(mem memory.Allocator, dt *arrow.DictionaryType) DictionaryBuilder { return NewDictionaryBuilderWithDict(mem, dt, nil) } func (b *dictionaryBuilder) Type() arrow.DataType { return b.dt } func (b *dictionaryBuilder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { b.idxBuilder.Release() b.idxBuilder.Builder = nil if binmemo, ok := b.memoTable.(*hashing.BinaryMemoTable); ok { binmemo.Release() } b.memoTable = nil } } func (b *dictionaryBuilder) AppendNull() { b.length += 1 b.nulls += 1 b.idxBuilder.AppendNull() } func (b *dictionaryBuilder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *dictionaryBuilder) AppendEmptyValue() { b.length += 1 b.idxBuilder.AppendEmptyValue() } func (b *dictionaryBuilder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *dictionaryBuilder) Reserve(n int) { b.idxBuilder.Reserve(n) } func (b *dictionaryBuilder) Resize(n int) { b.idxBuilder.Resize(n) b.length = b.idxBuilder.Len() } func (b *dictionaryBuilder) ResetFull() { b.builder.reset() b.idxBuilder.NewArray().Release() b.memoTable.Reset() } func (b *dictionaryBuilder) Cap() int { return b.idxBuilder.Cap() } func (b *dictionaryBuilder) IsNull(i int) bool { return b.idxBuilder.IsNull(i) } func (b *dictionaryBuilder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("dictionary builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } func (b *dictionaryBuilder) Unmarshal(dec *json.Decoder) error { bldr := NewBuilder(b.mem, b.dt.ValueType) defer bldr.Release() if err := bldr.Unmarshal(dec); err != nil { return err } arr := bldr.NewArray() defer arr.Release() return b.AppendArray(arr) } func (b *dictionaryBuilder) AppendValueFromString(s string) error { bldr := NewBuilder(b.mem, b.dt.ValueType) defer bldr.Release() if err := bldr.AppendValueFromString(s); err != nil { return err } arr := bldr.NewArray() defer arr.Release() return b.AppendArray(arr) } func (b *dictionaryBuilder) UnmarshalOne(dec *json.Decoder) error { bldr := NewBuilder(b.mem, b.dt.ValueType) defer bldr.Release() if err := bldr.UnmarshalOne(dec); err != nil { return err } arr := bldr.NewArray() defer arr.Release() return b.AppendArray(arr) } func (b *dictionaryBuilder) NewArray() arrow.Array { return b.NewDictionaryArray() } func (b *dictionaryBuilder) newData() *Data { indices, dict, err := b.newWithDictOffset(0) if err != nil { panic(err) } indices.dtype = b.dt indices.dictionary = dict return indices } func (b *dictionaryBuilder) NewDictionaryArray() *Dictionary { a := &Dictionary{} a.refCount = 1 indices := b.newData() a.setData(indices) indices.Release() return a } func (b *dictionaryBuilder) newWithDictOffset(offset int) (indices, dict *Data, err error) { idxarr := b.idxBuilder.NewArray() defer idxarr.Release() indices = idxarr.Data().(*Data) b.deltaOffset = b.memoTable.Size() dict, err = GetDictArrayData(b.mem, b.dt.ValueType, b.memoTable, offset) b.reset() indices.Retain() return } // NewDelta returns the dictionary indices and a delta dictionary since the // last time NewArray or NewDictionaryArray were called, and resets the state // of the builder (except for the dictionary / memotable) func (b *dictionaryBuilder) NewDelta() (indices, delta arrow.Array, err error) { indicesData, deltaData, err := b.newWithDictOffset(b.deltaOffset) if err != nil { return nil, nil, err } defer indicesData.Release() defer deltaData.Release() indices, delta = MakeFromData(indicesData), MakeFromData(deltaData) return } func (b *dictionaryBuilder) insertDictValue(val interface{}) error { _, _, err := b.memoTable.GetOrInsert(val) return err } func (b *dictionaryBuilder) insertDictBytes(val []byte) error { _, _, err := b.memoTable.GetOrInsertBytes(val) return err } func (b *dictionaryBuilder) appendValue(val interface{}) error { idx, _, err := b.memoTable.GetOrInsert(val) b.idxBuilder.Append(idx) b.length += 1 return err } func (b *dictionaryBuilder) appendBytes(val []byte) error { idx, _, err := b.memoTable.GetOrInsertBytes(val) b.idxBuilder.Append(idx) b.length += 1 return err } func getvalFn(arr arrow.Array) func(i int) interface{} { switch typedarr := arr.(type) { case *Int8: return func(i int) interface{} { return typedarr.Value(i) } case *Uint8: return func(i int) interface{} { return typedarr.Value(i) } case *Int16: return func(i int) interface{} { return typedarr.Value(i) } case *Uint16: return func(i int) interface{} { return typedarr.Value(i) } case *Int32: return func(i int) interface{} { return typedarr.Value(i) } case *Uint32: return func(i int) interface{} { return typedarr.Value(i) } case *Int64: return func(i int) interface{} { return typedarr.Value(i) } case *Uint64: return func(i int) interface{} { return typedarr.Value(i) } case *Float16: return func(i int) interface{} { return typedarr.Value(i).Uint16() } case *Float32: return func(i int) interface{} { return typedarr.Value(i) } case *Float64: return func(i int) interface{} { return typedarr.Value(i) } case *Duration: return func(i int) interface{} { return int64(typedarr.Value(i)) } case *Timestamp: return func(i int) interface{} { return int64(typedarr.Value(i)) } case *Date64: return func(i int) interface{} { return int64(typedarr.Value(i)) } case *Time64: return func(i int) interface{} { return int64(typedarr.Value(i)) } case *Time32: return func(i int) interface{} { return int32(typedarr.Value(i)) } case *Date32: return func(i int) interface{} { return int32(typedarr.Value(i)) } case *MonthInterval: return func(i int) interface{} { return int32(typedarr.Value(i)) } case *Binary: return func(i int) interface{} { return typedarr.Value(i) } case *FixedSizeBinary: return func(i int) interface{} { return typedarr.Value(i) } case *String: return func(i int) interface{} { return typedarr.Value(i) } case *Decimal32: return func(i int) interface{} { val := typedarr.Value(i) return (*(*[arrow.Decimal32SizeBytes]byte)(unsafe.Pointer(&val)))[:] } case *Decimal64: return func(i int) interface{} { val := typedarr.Value(i) return (*(*[arrow.Decimal64SizeBytes]byte)(unsafe.Pointer(&val)))[:] } case *Decimal128: return func(i int) interface{} { val := typedarr.Value(i) return (*(*[arrow.Decimal128SizeBytes]byte)(unsafe.Pointer(&val)))[:] } case *Decimal256: return func(i int) interface{} { val := typedarr.Value(i) return (*(*[arrow.Decimal256SizeBytes]byte)(unsafe.Pointer(&val)))[:] } case *DayTimeInterval: return func(i int) interface{} { val := typedarr.Value(i) return (*(*[arrow.DayTimeIntervalSizeBytes]byte)(unsafe.Pointer(&val)))[:] } case *MonthDayNanoInterval: return func(i int) interface{} { val := typedarr.Value(i) return (*(*[arrow.MonthDayNanoIntervalSizeBytes]byte)(unsafe.Pointer(&val)))[:] } } panic("arrow/array: invalid dictionary value type") } func (b *dictionaryBuilder) AppendArray(arr arrow.Array) error { debug.Assert(arrow.TypeEqual(b.dt.ValueType, arr.DataType()), "wrong value type of array to append to dict") valfn := getvalFn(arr) for i := 0; i < arr.Len(); i++ { if arr.IsNull(i) { b.AppendNull() } else { if err := b.appendValue(valfn(i)); err != nil { return err } } } return nil } func (b *dictionaryBuilder) IndexBuilder() IndexBuilder { return b.idxBuilder } func (b *dictionaryBuilder) AppendIndices(indices []int, valid []bool) { b.length += len(indices) switch idxbldr := b.idxBuilder.Builder.(type) { case *Int8Builder: vals := make([]int8, len(indices)) for i, v := range indices { vals[i] = int8(v) } idxbldr.AppendValues(vals, valid) case *Int16Builder: vals := make([]int16, len(indices)) for i, v := range indices { vals[i] = int16(v) } idxbldr.AppendValues(vals, valid) case *Int32Builder: vals := make([]int32, len(indices)) for i, v := range indices { vals[i] = int32(v) } idxbldr.AppendValues(vals, valid) case *Int64Builder: vals := make([]int64, len(indices)) for i, v := range indices { vals[i] = int64(v) } idxbldr.AppendValues(vals, valid) case *Uint8Builder: vals := make([]uint8, len(indices)) for i, v := range indices { vals[i] = uint8(v) } idxbldr.AppendValues(vals, valid) case *Uint16Builder: vals := make([]uint16, len(indices)) for i, v := range indices { vals[i] = uint16(v) } idxbldr.AppendValues(vals, valid) case *Uint32Builder: vals := make([]uint32, len(indices)) for i, v := range indices { vals[i] = uint32(v) } idxbldr.AppendValues(vals, valid) case *Uint64Builder: vals := make([]uint64, len(indices)) for i, v := range indices { vals[i] = uint64(v) } idxbldr.AppendValues(vals, valid) } } func (b *dictionaryBuilder) DictionarySize() int { return b.memoTable.Size() } type NullDictionaryBuilder struct { dictionaryBuilder } func (b *NullDictionaryBuilder) NewArray() arrow.Array { return b.NewDictionaryArray() } func (b *NullDictionaryBuilder) NewDictionaryArray() *Dictionary { idxarr := b.idxBuilder.NewArray() defer idxarr.Release() out := idxarr.Data().(*Data) dictarr := NewNull(0) defer dictarr.Release() dictarr.data.Retain() out.dtype = b.dt out.dictionary = dictarr.data return NewDictionaryData(out) } func (b *NullDictionaryBuilder) AppendArray(arr arrow.Array) error { if arr.DataType().ID() != arrow.NULL { return fmt.Errorf("cannot append non-null array to null dictionary") } for i := 0; i < arr.(*Null).Len(); i++ { b.AppendNull() } return nil } type Int8DictionaryBuilder struct { dictionaryBuilder } func (b *Int8DictionaryBuilder) Append(v int8) error { return b.appendValue(v) } func (b *Int8DictionaryBuilder) InsertDictValues(arr *Int8) (err error) { for _, v := range arr.values { if err = b.insertDictValue(v); err != nil { break } } return } type Uint8DictionaryBuilder struct { dictionaryBuilder } func (b *Uint8DictionaryBuilder) Append(v uint8) error { return b.appendValue(v) } func (b *Uint8DictionaryBuilder) InsertDictValues(arr *Uint8) (err error) { for _, v := range arr.values { if err = b.insertDictValue(v); err != nil { break } } return } type Int16DictionaryBuilder struct { dictionaryBuilder } func (b *Int16DictionaryBuilder) Append(v int16) error { return b.appendValue(v) } func (b *Int16DictionaryBuilder) InsertDictValues(arr *Int16) (err error) { for _, v := range arr.values { if err = b.insertDictValue(v); err != nil { break } } return } type Uint16DictionaryBuilder struct { dictionaryBuilder } func (b *Uint16DictionaryBuilder) Append(v uint16) error { return b.appendValue(v) } func (b *Uint16DictionaryBuilder) InsertDictValues(arr *Uint16) (err error) { for _, v := range arr.values { if err = b.insertDictValue(v); err != nil { break } } return } type Int32DictionaryBuilder struct { dictionaryBuilder } func (b *Int32DictionaryBuilder) Append(v int32) error { return b.appendValue(v) } func (b *Int32DictionaryBuilder) InsertDictValues(arr *Int32) (err error) { for _, v := range arr.values { if err = b.insertDictValue(v); err != nil { break } } return } type Uint32DictionaryBuilder struct { dictionaryBuilder } func (b *Uint32DictionaryBuilder) Append(v uint32) error { return b.appendValue(v) } func (b *Uint32DictionaryBuilder) InsertDictValues(arr *Uint32) (err error) { for _, v := range arr.values { if err = b.insertDictValue(v); err != nil { break } } return } type Int64DictionaryBuilder struct { dictionaryBuilder } func (b *Int64DictionaryBuilder) Append(v int64) error { return b.appendValue(v) } func (b *Int64DictionaryBuilder) InsertDictValues(arr *Int64) (err error) { for _, v := range arr.values { if err = b.insertDictValue(v); err != nil { break } } return } type Uint64DictionaryBuilder struct { dictionaryBuilder } func (b *Uint64DictionaryBuilder) Append(v uint64) error { return b.appendValue(v) } func (b *Uint64DictionaryBuilder) InsertDictValues(arr *Uint64) (err error) { for _, v := range arr.values { if err = b.insertDictValue(v); err != nil { break } } return } type DurationDictionaryBuilder struct { dictionaryBuilder } func (b *DurationDictionaryBuilder) Append(v arrow.Duration) error { return b.appendValue(int64(v)) } func (b *DurationDictionaryBuilder) InsertDictValues(arr *Duration) (err error) { for _, v := range arr.values { if err = b.insertDictValue(int64(v)); err != nil { break } } return } type TimestampDictionaryBuilder struct { dictionaryBuilder } func (b *TimestampDictionaryBuilder) Append(v arrow.Timestamp) error { return b.appendValue(int64(v)) } func (b *TimestampDictionaryBuilder) InsertDictValues(arr *Timestamp) (err error) { for _, v := range arr.values { if err = b.insertDictValue(int64(v)); err != nil { break } } return } type Time32DictionaryBuilder struct { dictionaryBuilder } func (b *Time32DictionaryBuilder) Append(v arrow.Time32) error { return b.appendValue(int32(v)) } func (b *Time32DictionaryBuilder) InsertDictValues(arr *Time32) (err error) { for _, v := range arr.values { if err = b.insertDictValue(int32(v)); err != nil { break } } return } type Time64DictionaryBuilder struct { dictionaryBuilder } func (b *Time64DictionaryBuilder) Append(v arrow.Time64) error { return b.appendValue(int64(v)) } func (b *Time64DictionaryBuilder) InsertDictValues(arr *Time64) (err error) { for _, v := range arr.values { if err = b.insertDictValue(int64(v)); err != nil { break } } return } type Date32DictionaryBuilder struct { dictionaryBuilder } func (b *Date32DictionaryBuilder) Append(v arrow.Date32) error { return b.appendValue(int32(v)) } func (b *Date32DictionaryBuilder) InsertDictValues(arr *Date32) (err error) { for _, v := range arr.values { if err = b.insertDictValue(int32(v)); err != nil { break } } return } type Date64DictionaryBuilder struct { dictionaryBuilder } func (b *Date64DictionaryBuilder) Append(v arrow.Date64) error { return b.appendValue(int64(v)) } func (b *Date64DictionaryBuilder) InsertDictValues(arr *Date64) (err error) { for _, v := range arr.values { if err = b.insertDictValue(int64(v)); err != nil { break } } return } type MonthIntervalDictionaryBuilder struct { dictionaryBuilder } func (b *MonthIntervalDictionaryBuilder) Append(v arrow.MonthInterval) error { return b.appendValue(int32(v)) } func (b *MonthIntervalDictionaryBuilder) InsertDictValues(arr *MonthInterval) (err error) { for _, v := range arr.values { if err = b.insertDictValue(int32(v)); err != nil { break } } return } type Float16DictionaryBuilder struct { dictionaryBuilder } func (b *Float16DictionaryBuilder) Append(v float16.Num) error { return b.appendValue(v.Uint16()) } func (b *Float16DictionaryBuilder) InsertDictValues(arr *Float16) (err error) { for _, v := range arr.values { if err = b.insertDictValue(v.Uint16()); err != nil { break } } return } type Float32DictionaryBuilder struct { dictionaryBuilder } func (b *Float32DictionaryBuilder) Append(v float32) error { return b.appendValue(v) } func (b *Float32DictionaryBuilder) InsertDictValues(arr *Float32) (err error) { for _, v := range arr.values { if err = b.insertDictValue(v); err != nil { break } } return } type Float64DictionaryBuilder struct { dictionaryBuilder } func (b *Float64DictionaryBuilder) Append(v float64) error { return b.appendValue(v) } func (b *Float64DictionaryBuilder) InsertDictValues(arr *Float64) (err error) { for _, v := range arr.values { if err = b.insertDictValue(v); err != nil { break } } return } type BinaryDictionaryBuilder struct { dictionaryBuilder } func (b *BinaryDictionaryBuilder) Append(v []byte) error { if v == nil { b.AppendNull() return nil } return b.appendBytes(v) } func (b *BinaryDictionaryBuilder) AppendString(v string) error { return b.appendBytes([]byte(v)) } func (b *BinaryDictionaryBuilder) InsertDictValues(arr *Binary) (err error) { if !arrow.TypeEqual(arr.DataType(), b.dt.ValueType) { return fmt.Errorf("dictionary insert type mismatch: cannot insert values of type %T to dictionary type %T", arr.DataType(), b.dt.ValueType) } for i := 0; i < arr.Len(); i++ { if err = b.insertDictBytes(arr.Value(i)); err != nil { break } } return } func (b *BinaryDictionaryBuilder) InsertStringDictValues(arr *String) (err error) { if !arrow.TypeEqual(arr.DataType(), b.dt.ValueType) { return fmt.Errorf("dictionary insert type mismatch: cannot insert values of type %T to dictionary type %T", arr.DataType(), b.dt.ValueType) } for i := 0; i < arr.Len(); i++ { if err = b.insertDictValue(arr.Value(i)); err != nil { break } } return } func (b *BinaryDictionaryBuilder) GetValueIndex(i int) int { switch b := b.idxBuilder.Builder.(type) { case *Uint8Builder: return int(b.Value(i)) case *Int8Builder: return int(b.Value(i)) case *Uint16Builder: return int(b.Value(i)) case *Int16Builder: return int(b.Value(i)) case *Uint32Builder: return int(b.Value(i)) case *Int32Builder: return int(b.Value(i)) case *Uint64Builder: return int(b.Value(i)) case *Int64Builder: return int(b.Value(i)) default: return -1 } } func (b *BinaryDictionaryBuilder) Value(i int) []byte { switch mt := b.memoTable.(type) { case *hashing.BinaryMemoTable: return mt.Value(i) } return nil } func (b *BinaryDictionaryBuilder) ValueStr(i int) string { return string(b.Value(i)) } type FixedSizeBinaryDictionaryBuilder struct { dictionaryBuilder byteWidth int } func (b *FixedSizeBinaryDictionaryBuilder) Append(v []byte) error { return b.appendValue(v[:b.byteWidth]) } func (b *FixedSizeBinaryDictionaryBuilder) InsertDictValues(arr *FixedSizeBinary) (err error) { var ( beg = arr.array.data.offset * b.byteWidth end = (arr.array.data.offset + arr.data.length) * b.byteWidth ) data := arr.valueBytes[beg:end] for len(data) > 0 { if err = b.insertDictValue(data[:b.byteWidth]); err != nil { break } data = data[b.byteWidth:] } return } type Decimal32DictionaryBuilder struct { dictionaryBuilder } func (b *Decimal32DictionaryBuilder) Append(v decimal.Decimal32) error { return b.appendValue((*(*[arrow.Decimal32SizeBytes]byte)(unsafe.Pointer(&v)))[:]) } func (b *Decimal32DictionaryBuilder) InsertDictValues(arr *Decimal32) (err error) { data := arrow.Decimal32Traits.CastToBytes(arr.values) for len(data) > 0 { if err = b.insertDictValue(data[:arrow.Decimal32SizeBytes]); err != nil { break } data = data[arrow.Decimal32SizeBytes:] } return } type Decimal64DictionaryBuilder struct { dictionaryBuilder } func (b *Decimal64DictionaryBuilder) Append(v decimal.Decimal64) error { return b.appendValue((*(*[arrow.Decimal64SizeBytes]byte)(unsafe.Pointer(&v)))[:]) } func (b *Decimal64DictionaryBuilder) InsertDictValues(arr *Decimal64) (err error) { data := arrow.Decimal64Traits.CastToBytes(arr.values) for len(data) > 0 { if err = b.insertDictValue(data[:arrow.Decimal64SizeBytes]); err != nil { break } data = data[arrow.Decimal64SizeBytes:] } return } type Decimal128DictionaryBuilder struct { dictionaryBuilder } func (b *Decimal128DictionaryBuilder) Append(v decimal128.Num) error { return b.appendValue((*(*[arrow.Decimal128SizeBytes]byte)(unsafe.Pointer(&v)))[:]) } func (b *Decimal128DictionaryBuilder) InsertDictValues(arr *Decimal128) (err error) { data := arrow.Decimal128Traits.CastToBytes(arr.values) for len(data) > 0 { if err = b.insertDictValue(data[:arrow.Decimal128SizeBytes]); err != nil { break } data = data[arrow.Decimal128SizeBytes:] } return } type Decimal256DictionaryBuilder struct { dictionaryBuilder } func (b *Decimal256DictionaryBuilder) Append(v decimal256.Num) error { return b.appendValue((*(*[arrow.Decimal256SizeBytes]byte)(unsafe.Pointer(&v)))[:]) } func (b *Decimal256DictionaryBuilder) InsertDictValues(arr *Decimal256) (err error) { data := arrow.Decimal256Traits.CastToBytes(arr.values) for len(data) > 0 { if err = b.insertDictValue(data[:arrow.Decimal256SizeBytes]); err != nil { break } data = data[arrow.Decimal256SizeBytes:] } return } type MonthDayNanoDictionaryBuilder struct { dictionaryBuilder } func (b *MonthDayNanoDictionaryBuilder) Append(v arrow.MonthDayNanoInterval) error { return b.appendValue((*(*[arrow.MonthDayNanoIntervalSizeBytes]byte)(unsafe.Pointer(&v)))[:]) } func (b *MonthDayNanoDictionaryBuilder) InsertDictValues(arr *MonthDayNanoInterval) (err error) { data := arrow.MonthDayNanoIntervalTraits.CastToBytes(arr.values) for len(data) > 0 { if err = b.insertDictValue(data[:arrow.MonthDayNanoIntervalSizeBytes]); err != nil { break } data = data[arrow.MonthDayNanoIntervalSizeBytes:] } return } type DayTimeDictionaryBuilder struct { dictionaryBuilder } func (b *DayTimeDictionaryBuilder) Append(v arrow.DayTimeInterval) error { return b.appendValue((*(*[arrow.DayTimeIntervalSizeBytes]byte)(unsafe.Pointer(&v)))[:]) } func (b *DayTimeDictionaryBuilder) InsertDictValues(arr *DayTimeInterval) (err error) { data := arrow.DayTimeIntervalTraits.CastToBytes(arr.values) for len(data) > 0 { if err = b.insertDictValue(data[:arrow.DayTimeIntervalSizeBytes]); err != nil { break } data = data[arrow.DayTimeIntervalSizeBytes:] } return } func IsTrivialTransposition(transposeMap []int32) bool { for i, t := range transposeMap { if t != int32(i) { return false } } return true } func TransposeDictIndices(mem memory.Allocator, data arrow.ArrayData, inType, outType arrow.DataType, dict arrow.ArrayData, transposeMap []int32) (arrow.ArrayData, error) { // inType may be different from data->dtype if data is ExtensionType if inType.ID() != arrow.DICTIONARY || outType.ID() != arrow.DICTIONARY { return nil, errors.New("arrow/array: expected dictionary type") } var ( inDictType = inType.(*arrow.DictionaryType) outDictType = outType.(*arrow.DictionaryType) inIndexType = inDictType.IndexType outIndexType = outDictType.IndexType.(arrow.FixedWidthDataType) ) if inIndexType.ID() == outIndexType.ID() && IsTrivialTransposition(transposeMap) { // index type and values will be identical, we can reuse the existing buffers return NewDataWithDictionary(outType, data.Len(), []*memory.Buffer{data.Buffers()[0], data.Buffers()[1]}, data.NullN(), data.Offset(), dict.(*Data)), nil } // default path: compute the transposed indices as a new buffer outBuf := memory.NewResizableBuffer(mem) outBuf.Resize(data.Len() * int(bitutil.BytesForBits(int64(outIndexType.BitWidth())))) defer outBuf.Release() // shift null buffer if original offset is non-zero var nullBitmap *memory.Buffer if data.Offset() != 0 && data.NullN() != 0 { nullBitmap = memory.NewResizableBuffer(mem) nullBitmap.Resize(int(bitutil.BytesForBits(int64(data.Len())))) bitutil.CopyBitmap(data.Buffers()[0].Bytes(), data.Offset(), data.Len(), nullBitmap.Bytes(), 0) defer nullBitmap.Release() } else { nullBitmap = data.Buffers()[0] } outData := NewDataWithDictionary(outType, data.Len(), []*memory.Buffer{nullBitmap, outBuf}, data.NullN(), 0, dict.(*Data)) err := utils.TransposeIntsBuffers(inIndexType, outIndexType, data.Buffers()[1].Bytes(), outBuf.Bytes(), data.Offset(), outData.offset, data.Len(), transposeMap) return outData, err } // DictionaryUnifier defines the interface used for unifying, and optionally producing // transposition maps for, multiple dictionary arrays incrementally. type DictionaryUnifier interface { // Unify adds the provided array of dictionary values to be unified. Unify(arrow.Array) error // UnifyAndTranspose adds the provided array of dictionary values, // just like Unify but returns an allocated buffer containing a mapping // to transpose dictionary indices. UnifyAndTranspose(dict arrow.Array) (transposed *memory.Buffer, err error) // GetResult returns the dictionary type (choosing the smallest index type // that can represent all the values) and the new unified dictionary. // // Calling GetResult clears the existing dictionary from the unifier so it // can be reused by calling Unify/UnifyAndTranspose again with new arrays. GetResult() (outType arrow.DataType, outDict arrow.Array, err error) // GetResultWithIndexType is like GetResult, but allows specifying the type // of the dictionary indexes rather than letting the unifier pick. If the // passed in index type isn't large enough to represent all of the dictionary // values, an error will be returned instead. The new unified dictionary // is returned. GetResultWithIndexType(indexType arrow.DataType) (arrow.Array, error) // Release should be called to clean up any allocated scratch memo-table used // for building the unified dictionary. Release() } type unifier struct { mem memory.Allocator valueType arrow.DataType memoTable hashing.MemoTable } // NewDictionaryUnifier constructs and returns a new dictionary unifier for dictionaries // of valueType, using the provided allocator for allocating the unified dictionary // and the memotable used for building it. // // This will only work for non-nested types currently. a nested valueType or dictionary type // will result in an error. func NewDictionaryUnifier(alloc memory.Allocator, valueType arrow.DataType) (DictionaryUnifier, error) { memoTable, err := createMemoTable(alloc, valueType) if err != nil { return nil, err } return &unifier{ mem: alloc, valueType: valueType, memoTable: memoTable, }, nil } func (u *unifier) Release() { if bin, ok := u.memoTable.(*hashing.BinaryMemoTable); ok { bin.Release() } } func (u *unifier) Unify(dict arrow.Array) (err error) { if !arrow.TypeEqual(u.valueType, dict.DataType()) { return fmt.Errorf("dictionary type different from unifier: %s, expected: %s", dict.DataType(), u.valueType) } valFn := getvalFn(dict) for i := 0; i < dict.Len(); i++ { if dict.IsNull(i) { u.memoTable.GetOrInsertNull() continue } if _, _, err = u.memoTable.GetOrInsert(valFn(i)); err != nil { return err } } return } func (u *unifier) UnifyAndTranspose(dict arrow.Array) (transposed *memory.Buffer, err error) { if !arrow.TypeEqual(u.valueType, dict.DataType()) { return nil, fmt.Errorf("dictionary type different from unifier: %s, expected: %s", dict.DataType(), u.valueType) } transposed = memory.NewResizableBuffer(u.mem) transposed.Resize(arrow.Int32Traits.BytesRequired(dict.Len())) newIdxes := arrow.Int32Traits.CastFromBytes(transposed.Bytes()) valFn := getvalFn(dict) for i := 0; i < dict.Len(); i++ { if dict.IsNull(i) { idx, _ := u.memoTable.GetOrInsertNull() newIdxes[i] = int32(idx) continue } idx, _, err := u.memoTable.GetOrInsert(valFn(i)) if err != nil { transposed.Release() return nil, err } newIdxes[i] = int32(idx) } return } func (u *unifier) GetResult() (outType arrow.DataType, outDict arrow.Array, err error) { dictLen := u.memoTable.Size() var indexType arrow.DataType switch { case dictLen <= math.MaxInt8: indexType = arrow.PrimitiveTypes.Int8 case dictLen <= math.MaxInt16: indexType = arrow.PrimitiveTypes.Int16 case dictLen <= math.MaxInt32: indexType = arrow.PrimitiveTypes.Int32 default: indexType = arrow.PrimitiveTypes.Int64 } outType = &arrow.DictionaryType{IndexType: indexType, ValueType: u.valueType} dictData, err := GetDictArrayData(u.mem, u.valueType, u.memoTable, 0) if err != nil { return nil, nil, err } u.memoTable.Reset() defer dictData.Release() outDict = MakeFromData(dictData) return } func (u *unifier) GetResultWithIndexType(indexType arrow.DataType) (arrow.Array, error) { dictLen := u.memoTable.Size() var toobig bool switch indexType.ID() { case arrow.UINT8: toobig = dictLen > math.MaxUint8 case arrow.INT8: toobig = dictLen > math.MaxInt8 case arrow.UINT16: toobig = dictLen > math.MaxUint16 case arrow.INT16: toobig = dictLen > math.MaxInt16 case arrow.UINT32: toobig = uint(dictLen) > math.MaxUint32 case arrow.INT32: toobig = dictLen > math.MaxInt32 case arrow.UINT64: toobig = uint64(dictLen) > uint64(math.MaxUint64) case arrow.INT64: default: return nil, fmt.Errorf("arrow/array: invalid dictionary index type: %s, must be integral", indexType) } if toobig { return nil, errors.New("arrow/array: cannot combine dictionaries. unified dictionary requires a larger index type") } dictData, err := GetDictArrayData(u.mem, u.valueType, u.memoTable, 0) if err != nil { return nil, err } u.memoTable.Reset() defer dictData.Release() return MakeFromData(dictData), nil } type binaryUnifier struct { mem memory.Allocator memoTable *hashing.BinaryMemoTable } // NewBinaryDictionaryUnifier constructs and returns a new dictionary unifier for dictionaries // of binary values, using the provided allocator for allocating the unified dictionary // and the memotable used for building it. func NewBinaryDictionaryUnifier(alloc memory.Allocator) DictionaryUnifier { return &binaryUnifier{ mem: alloc, memoTable: hashing.NewBinaryMemoTable(0, 0, NewBinaryBuilder(alloc, arrow.BinaryTypes.Binary)), } } func (u *binaryUnifier) Release() { u.memoTable.Release() } func (u *binaryUnifier) Unify(dict arrow.Array) (err error) { if !arrow.TypeEqual(arrow.BinaryTypes.Binary, dict.DataType()) { return fmt.Errorf("dictionary type different from unifier: %s, expected: %s", dict.DataType(), arrow.BinaryTypes.Binary) } typedDict := dict.(*Binary) for i := 0; i < dict.Len(); i++ { if dict.IsNull(i) { u.memoTable.GetOrInsertNull() continue } if _, _, err = u.memoTable.GetOrInsertBytes(typedDict.Value(i)); err != nil { return err } } return } func (u *binaryUnifier) UnifyAndTranspose(dict arrow.Array) (transposed *memory.Buffer, err error) { if !arrow.TypeEqual(arrow.BinaryTypes.Binary, dict.DataType()) { return nil, fmt.Errorf("dictionary type different from unifier: %s, expected: %s", dict.DataType(), arrow.BinaryTypes.Binary) } transposed = memory.NewResizableBuffer(u.mem) transposed.Resize(arrow.Int32Traits.BytesRequired(dict.Len())) newIdxes := arrow.Int32Traits.CastFromBytes(transposed.Bytes()) typedDict := dict.(*Binary) for i := 0; i < dict.Len(); i++ { if dict.IsNull(i) { idx, _ := u.memoTable.GetOrInsertNull() newIdxes[i] = int32(idx) continue } idx, _, err := u.memoTable.GetOrInsertBytes(typedDict.Value(i)) if err != nil { transposed.Release() return nil, err } newIdxes[i] = int32(idx) } return } func (u *binaryUnifier) GetResult() (outType arrow.DataType, outDict arrow.Array, err error) { dictLen := u.memoTable.Size() var indexType arrow.DataType switch { case dictLen <= math.MaxInt8: indexType = arrow.PrimitiveTypes.Int8 case dictLen <= math.MaxInt16: indexType = arrow.PrimitiveTypes.Int16 case dictLen <= math.MaxInt32: indexType = arrow.PrimitiveTypes.Int32 default: indexType = arrow.PrimitiveTypes.Int64 } outType = &arrow.DictionaryType{IndexType: indexType, ValueType: arrow.BinaryTypes.Binary} dictData, err := GetDictArrayData(u.mem, arrow.BinaryTypes.Binary, u.memoTable, 0) if err != nil { return nil, nil, err } u.memoTable.Reset() defer dictData.Release() outDict = MakeFromData(dictData) return } func (u *binaryUnifier) GetResultWithIndexType(indexType arrow.DataType) (arrow.Array, error) { dictLen := u.memoTable.Size() var toobig bool switch indexType.ID() { case arrow.UINT8: toobig = dictLen > math.MaxUint8 case arrow.INT8: toobig = dictLen > math.MaxInt8 case arrow.UINT16: toobig = dictLen > math.MaxUint16 case arrow.INT16: toobig = dictLen > math.MaxInt16 case arrow.UINT32: toobig = uint(dictLen) > math.MaxUint32 case arrow.INT32: toobig = dictLen > math.MaxInt32 case arrow.UINT64: toobig = uint64(dictLen) > uint64(math.MaxUint64) case arrow.INT64: default: return nil, fmt.Errorf("arrow/array: invalid dictionary index type: %s, must be integral", indexType) } if toobig { return nil, errors.New("arrow/array: cannot combine dictionaries. unified dictionary requires a larger index type") } dictData, err := GetDictArrayData(u.mem, arrow.BinaryTypes.Binary, u.memoTable, 0) if err != nil { return nil, err } u.memoTable.Reset() defer dictData.Release() return MakeFromData(dictData), nil } func unifyRecursive(mem memory.Allocator, typ arrow.DataType, chunks []*Data) (changed bool, err error) { debug.Assert(len(chunks) != 0, "must provide non-zero length chunk slice") var extType arrow.DataType if typ.ID() == arrow.EXTENSION { extType = typ typ = typ.(arrow.ExtensionType).StorageType() } if nestedTyp, ok := typ.(arrow.NestedType); ok { children := make([]*Data, len(chunks)) for i, f := range nestedTyp.Fields() { for j, c := range chunks { children[j] = c.childData[i].(*Data) } childChanged, err := unifyRecursive(mem, f.Type, children) if err != nil { return false, err } if childChanged { // only when unification actually occurs for j := range chunks { chunks[j].childData[i] = children[j] } changed = true } } } if typ.ID() == arrow.DICTIONARY { dictType := typ.(*arrow.DictionaryType) var ( uni DictionaryUnifier newDict arrow.Array ) // unify any nested dictionaries first, but the unifier doesn't support // nested dictionaries yet so this would fail. uni, err = NewDictionaryUnifier(mem, dictType.ValueType) if err != nil { return changed, err } defer uni.Release() transposeMaps := make([]*memory.Buffer, len(chunks)) for i, c := range chunks { debug.Assert(c.dictionary != nil, "missing dictionary data for dictionary array") arr := MakeFromData(c.dictionary) defer arr.Release() if transposeMaps[i], err = uni.UnifyAndTranspose(arr); err != nil { return } defer transposeMaps[i].Release() } if newDict, err = uni.GetResultWithIndexType(dictType.IndexType); err != nil { return } defer newDict.Release() for j := range chunks { chnk, err := TransposeDictIndices(mem, chunks[j], typ, typ, newDict.Data(), arrow.Int32Traits.CastFromBytes(transposeMaps[j].Bytes())) if err != nil { return changed, err } chunks[j].Release() chunks[j] = chnk.(*Data) if extType != nil { chunks[j].dtype = extType } } changed = true } return } // UnifyChunkedDicts takes a chunked array of dictionary type and will unify // the dictionary across all of the chunks with the returned chunked array // having all chunks share the same dictionary. // // The return from this *must* have Release called on it unless an error is returned // in which case the *arrow.Chunked will be nil. // // If there is 1 or fewer chunks, then nothing is modified and this function will just // call Retain on the passed in Chunked array (so Release can safely be called on it). // The same is true if the type of the array is not a dictionary or if no changes are // needed for all of the chunks to be using the same dictionary. func UnifyChunkedDicts(alloc memory.Allocator, chnkd *arrow.Chunked) (*arrow.Chunked, error) { if len(chnkd.Chunks()) <= 1 { chnkd.Retain() return chnkd, nil } chunksData := make([]*Data, len(chnkd.Chunks())) for i, c := range chnkd.Chunks() { c.Data().Retain() chunksData[i] = c.Data().(*Data) } changed, err := unifyRecursive(alloc, chnkd.DataType(), chunksData) if err != nil || !changed { for _, c := range chunksData { c.Release() } if err == nil { chnkd.Retain() } else { chnkd = nil } return chnkd, err } chunks := make([]arrow.Array, len(chunksData)) for i, c := range chunksData { chunks[i] = MakeFromData(c) defer chunks[i].Release() c.Release() } return arrow.NewChunked(chnkd.DataType(), chunks), nil } // UnifyTableDicts performs UnifyChunkedDicts on each column of the table so that // any dictionary column will have the dictionaries of its chunks unified. // // The returned Table should always be Release'd unless a non-nil error was returned, // in which case the table returned will be nil. func UnifyTableDicts(alloc memory.Allocator, table arrow.Table) (arrow.Table, error) { cols := make([]arrow.Column, table.NumCols()) for i := 0; i < int(table.NumCols()); i++ { chnkd, err := UnifyChunkedDicts(alloc, table.Column(i).Data()) if err != nil { return nil, err } defer chnkd.Release() cols[i] = *arrow.NewColumn(table.Schema().Field(i), chnkd) defer cols[i].Release() } return NewTable(table.Schema(), cols, table.NumRows()), nil } var ( _ arrow.Array = (*Dictionary)(nil) _ Builder = (*dictionaryBuilder)(nil) ) arrow-go-18.2.0/arrow/array/dictionary_test.go000066400000000000000000002032461476434502500213630ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "fmt" "math" "math/rand" "reflect" "strings" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/decimal128" "github.com/apache/arrow-go/v18/arrow/decimal256" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/types" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" ) type PrimitiveDictionaryTestSuite struct { suite.Suite mem *memory.CheckedAllocator typ arrow.DataType reftyp reflect.Type } func (p *PrimitiveDictionaryTestSuite) SetupTest() { p.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) } func (p *PrimitiveDictionaryTestSuite) TearDownTest() { p.mem.AssertSize(p.T(), 0) } func TestPrimitiveDictionaryBuilders(t *testing.T) { tests := []struct { name string typ arrow.DataType reftyp reflect.Type }{ {"int8", arrow.PrimitiveTypes.Int8, reflect.TypeOf(int8(0))}, {"uint8", arrow.PrimitiveTypes.Uint8, reflect.TypeOf(uint8(0))}, {"int16", arrow.PrimitiveTypes.Int16, reflect.TypeOf(int16(0))}, {"uint16", arrow.PrimitiveTypes.Uint16, reflect.TypeOf(uint16(0))}, {"int32", arrow.PrimitiveTypes.Int32, reflect.TypeOf(int32(0))}, {"uint32", arrow.PrimitiveTypes.Uint32, reflect.TypeOf(uint32(0))}, {"int64", arrow.PrimitiveTypes.Int64, reflect.TypeOf(int64(0))}, {"uint64", arrow.PrimitiveTypes.Uint64, reflect.TypeOf(uint64(0))}, {"float32", arrow.PrimitiveTypes.Float32, reflect.TypeOf(float32(0))}, {"float64", arrow.PrimitiveTypes.Float64, reflect.TypeOf(float64(0))}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { suite.Run(t, &PrimitiveDictionaryTestSuite{typ: tt.typ, reftyp: tt.reftyp}) }) } } func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderBasic() { expectedType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: p.typ} bldr := array.NewDictionaryBuilder(p.mem, expectedType) defer bldr.Release() builder := reflect.ValueOf(bldr) appfn := builder.MethodByName("Append") p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) bldr.AppendNull() p.EqualValues(4, bldr.Len()) p.EqualValues(1, bldr.NullN()) p.EqualValues(2, bldr.DictionarySize()) arr := bldr.NewArray().(*array.Dictionary) defer arr.Release() p.True(arrow.TypeEqual(expectedType, arr.DataType())) expectedDict, _, err := array.FromJSON(p.mem, expectedType.ValueType, strings.NewReader("[1, 2]")) p.NoError(err) defer expectedDict.Release() expectedIndices, _, err := array.FromJSON(p.mem, expectedType.IndexType, strings.NewReader("[0, 1, 0, null]")) p.NoError(err) defer expectedIndices.Release() expected := array.NewDictionaryArray(expectedType, expectedIndices, expectedDict) defer expected.Release() p.True(array.Equal(expected, arr)) } func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderInit() { valueType := p.typ dictArr, _, err := array.FromJSON(p.mem, valueType, strings.NewReader("[1, 2]")) p.NoError(err) defer dictArr.Release() dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: valueType} bldr := array.NewDictionaryBuilderWithDict(p.mem, dictType, dictArr) defer bldr.Release() builder := reflect.ValueOf(bldr) appfn := builder.MethodByName("Append") p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) bldr.AppendNull() p.EqualValues(4, bldr.Len()) p.EqualValues(1, bldr.NullN()) arr := bldr.NewDictionaryArray() defer arr.Release() expectedIndices, _, err := array.FromJSON(p.mem, dictType.IndexType, strings.NewReader("[0, 1, 0, null]")) p.NoError(err) defer expectedIndices.Release() expected := array.NewDictionaryArray(dictType, expectedIndices, dictArr) defer expected.Release() p.True(array.Equal(expected, arr)) } func (p *PrimitiveDictionaryTestSuite) TestDictionaryNewBuilder() { valueType := p.typ dictArr, _, err := array.FromJSON(p.mem, valueType, strings.NewReader("[1, 2]")) p.NoError(err) defer dictArr.Release() dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: valueType} bldr := array.NewBuilder(p.mem, dictType) defer bldr.Release() builder := reflect.ValueOf(bldr) appfn := builder.MethodByName("Append") p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) bldr.AppendNull() p.EqualValues(4, bldr.Len()) p.EqualValues(1, bldr.NullN()) arr := bldr.NewArray().(*array.Dictionary) defer arr.Release() expectedIndices, _, err := array.FromJSON(p.mem, dictType.IndexType, strings.NewReader("[0, 1, 0, null]")) p.NoError(err) defer expectedIndices.Release() expected := array.NewDictionaryArray(dictType, expectedIndices, dictArr) defer expected.Release() p.True(array.Equal(expected, arr)) } func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderAppendArr() { valueType := p.typ intermediate, _, err := array.FromJSON(p.mem, valueType, strings.NewReader("[1, 2, 1]")) p.NoError(err) defer intermediate.Release() expectedType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: p.typ} bldr := array.NewDictionaryBuilder(p.mem, expectedType) defer bldr.Release() bldr.AppendArray(intermediate) result := bldr.NewArray() defer result.Release() expectedDict, _, err := array.FromJSON(p.mem, expectedType.ValueType, strings.NewReader("[1, 2]")) p.NoError(err) defer expectedDict.Release() expectedIndices, _, err := array.FromJSON(p.mem, expectedType.IndexType, strings.NewReader("[0, 1, 0]")) p.NoError(err) defer expectedIndices.Release() expected := array.NewDictionaryArray(expectedType, expectedIndices, expectedDict) defer expected.Release() p.True(array.Equal(expected, result)) } func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderDeltaDictionary() { expectedType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: p.typ} bldr := array.NewDictionaryBuilder(p.mem, expectedType) defer bldr.Release() builder := reflect.ValueOf(bldr) appfn := builder.MethodByName("Append") p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) result := bldr.NewArray() defer result.Release() exdict, _, err := array.FromJSON(p.mem, p.typ, strings.NewReader("[1, 2]")) p.NoError(err) defer exdict.Release() exindices, _, err := array.FromJSON(p.mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0, 1]")) p.NoError(err) defer exindices.Release() expected := array.NewDictionaryArray(result.DataType().(*arrow.DictionaryType), exindices, exdict) defer expected.Release() p.True(array.Equal(expected, result)) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface()) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface()) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface()) indices, delta, err := bldr.NewDelta() p.NoError(err) defer indices.Release() defer delta.Release() exindices, _, _ = array.FromJSON(p.mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[1, 2, 2, 0, 2]")) defer exindices.Release() exdelta, _, _ := array.FromJSON(p.mem, p.typ, strings.NewReader("[3]")) defer exdelta.Release() p.True(array.Equal(exindices, indices)) p.True(array.Equal(exdelta, delta)) } func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderDoubleDeltaDictionary() { expectedType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: p.typ} bldr := array.NewDictionaryBuilder(p.mem, expectedType) defer bldr.Release() builder := reflect.ValueOf(bldr) appfn := builder.MethodByName("Append") p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) result := bldr.NewArray() defer result.Release() exdict, _, err := array.FromJSON(p.mem, p.typ, strings.NewReader("[1, 2]")) p.NoError(err) defer exdict.Release() exindices, _, err := array.FromJSON(p.mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0, 1]")) p.NoError(err) defer exindices.Release() expected := array.NewDictionaryArray(result.DataType().(*arrow.DictionaryType), exindices, exdict) defer expected.Release() p.True(array.Equal(expected, result)) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface()) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface()) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface()) indices, delta, err := bldr.NewDelta() p.NoError(err) defer indices.Release() defer delta.Release() exindices, _, _ = array.FromJSON(p.mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[1, 2, 2, 0, 2]")) defer exindices.Release() exdelta, _, _ := array.FromJSON(p.mem, p.typ, strings.NewReader("[3]")) defer exdelta.Release() p.True(array.Equal(exindices, indices)) p.True(array.Equal(exdelta, delta)) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface()) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(4).Convert(p.reftyp)})[0].Interface()) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(5).Convert(p.reftyp)})[0].Interface()) indices, delta, err = bldr.NewDelta() p.NoError(err) defer indices.Release() defer delta.Release() exindices, _, _ = array.FromJSON(p.mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 2, 3, 4]")) defer exindices.Release() exdelta, _, _ = array.FromJSON(p.mem, p.typ, strings.NewReader("[4, 5]")) defer exdelta.Release() p.True(array.Equal(exindices, indices)) p.True(array.Equal(exdelta, delta)) } func (p *PrimitiveDictionaryTestSuite) TestNewResetBehavior() { expectedType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: p.typ} bldr := array.NewDictionaryBuilder(p.mem, expectedType) defer bldr.Release() builder := reflect.ValueOf(bldr) appfn := builder.MethodByName("Append") p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) bldr.AppendNull() p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) p.Less(0, bldr.Cap()) p.Less(0, bldr.NullN()) p.Equal(4, bldr.Len()) result := bldr.NewDictionaryArray() defer result.Release() p.Zero(bldr.Cap()) p.Zero(bldr.Len()) p.Zero(bldr.NullN()) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface()) bldr.AppendNull() p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(4).Convert(p.reftyp)})[0].Interface()) result = bldr.NewDictionaryArray() defer result.Release() p.Equal(4, result.Dictionary().Len()) } func (p *PrimitiveDictionaryTestSuite) TestResetFull() { expectedType := &arrow.DictionaryType{IndexType: &arrow.Int32Type{}, ValueType: p.typ} bldr := array.NewDictionaryBuilder(p.mem, expectedType) defer bldr.Release() builder := reflect.ValueOf(bldr) appfn := builder.MethodByName("Append") p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) bldr.AppendNull() p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) result := bldr.NewDictionaryArray() defer result.Release() p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface()) result = bldr.NewDictionaryArray() defer result.Release() exindices, _, _ := array.FromJSON(p.mem, arrow.PrimitiveTypes.Int32, strings.NewReader("[2]")) exdict, _, _ := array.FromJSON(p.mem, p.typ, strings.NewReader("[1, 2, 3]")) defer exindices.Release() defer exdict.Release() p.True(array.Equal(exindices, result.Indices())) p.True(array.Equal(exdict, result.Dictionary())) bldr.ResetFull() p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(4).Convert(p.reftyp)})[0].Interface()) result = bldr.NewDictionaryArray() defer result.Release() exindices, _, _ = array.FromJSON(p.mem, arrow.PrimitiveTypes.Int32, strings.NewReader("[0]")) exdict, _, _ = array.FromJSON(p.mem, p.typ, strings.NewReader("[4]")) defer exindices.Release() defer exdict.Release() p.True(array.Equal(exindices, result.Indices())) p.True(array.Equal(exdict, result.Dictionary())) } func (p *PrimitiveDictionaryTestSuite) TestStringRoundTrip() { dt := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: p.typ} b := array.NewDictionaryBuilder(p.mem, dt) defer b.Release() builder := reflect.ValueOf(b) fn := builder.MethodByName("Append") p.Nil(fn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) p.Nil(fn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) p.Nil(fn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) b.AppendNull() p.EqualValues(4, b.Len()) p.EqualValues(1, b.NullN()) arr := b.NewArray().(*array.Dictionary) defer arr.Release() p.True(arrow.TypeEqual(dt, arr.DataType())) b1 := array.NewDictionaryBuilder(p.mem, dt) defer b1.Release() for i := 0; i < arr.Len(); i++ { p.NoError(b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.Dictionary) defer arr1.Release() p.Equal(arr.Len(), arr1.Len()) p.True(array.Equal(arr, arr1)) } func TestBasicStringDictionaryBuilder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: arrow.BinaryTypes.String} bldr := array.NewDictionaryBuilder(mem, dictType) defer bldr.Release() builder := bldr.(*array.BinaryDictionaryBuilder) assert.NoError(t, builder.Append([]byte("test"))) assert.NoError(t, builder.AppendString("test2")) assert.NoError(t, builder.AppendString("test")) assert.Equal(t, "test", builder.ValueStr(builder.GetValueIndex(0))) assert.Equal(t, "test2", builder.ValueStr(builder.GetValueIndex(1))) assert.Equal(t, "test", builder.ValueStr(builder.GetValueIndex(2))) result := bldr.NewDictionaryArray() defer result.Release() exdict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["test", "test2"]`)) defer exdict.Release() exint, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0]")) defer exint.Release() assert.True(t, arrow.TypeEqual(dictType, result.DataType())) expected := array.NewDictionaryArray(dictType, exint, exdict) defer expected.Release() assert.True(t, array.Equal(expected, result)) } func TestStringDictionaryInsertValues(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) exdict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["c", "a", "b", "d"]`)) defer exdict.Release() invalidDict, _, err := array.FromJSON(mem, arrow.BinaryTypes.Binary, strings.NewReader(`["ZQ==", "Zg=="]`)) assert.NoError(t, err) defer invalidDict.Release() dictType := &arrow.DictionaryType{IndexType: &arrow.Int16Type{}, ValueType: arrow.BinaryTypes.String} bldr := array.NewDictionaryBuilder(mem, dictType) defer bldr.Release() builder := bldr.(*array.BinaryDictionaryBuilder) assert.NoError(t, builder.InsertStringDictValues(exdict.(*array.String))) // inserting again should have no effect assert.NoError(t, builder.InsertStringDictValues(exdict.(*array.String))) assert.Error(t, builder.InsertDictValues(invalidDict.(*array.Binary))) for i := 0; i < 2; i++ { builder.AppendString("c") builder.AppendString("a") builder.AppendString("b") builder.AppendNull() builder.AppendString("d") } assert.Equal(t, 10, bldr.Len()) result := bldr.NewDictionaryArray() defer result.Release() exindices, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int16, strings.NewReader("[0, 1, 2, null, 3, 0, 1, 2, null, 3]")) defer exindices.Release() expected := array.NewDictionaryArray(dictType, exindices, exdict) defer expected.Release() assert.True(t, array.Equal(expected, result)) } func TestStringDictionaryBuilderInit(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) dictArr, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["test", "test2"]`)) defer dictArr.Release() intarr, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0]")) defer intarr.Release() dictType := &arrow.DictionaryType{IndexType: intarr.DataType().(arrow.FixedWidthDataType), ValueType: arrow.BinaryTypes.String} bldr := array.NewDictionaryBuilderWithDict(mem, dictType, dictArr) defer bldr.Release() builder := bldr.(*array.BinaryDictionaryBuilder) assert.NoError(t, builder.AppendString("test")) assert.NoError(t, builder.AppendString("test2")) assert.NoError(t, builder.AppendString("test")) result := bldr.NewDictionaryArray() defer result.Release() expected := array.NewDictionaryArray(dictType, intarr, dictArr) defer expected.Release() assert.True(t, array.Equal(expected, result)) } func TestStringDictionaryBuilderOnlyNull(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: arrow.BinaryTypes.String} bldr := array.NewDictionaryBuilder(mem, dictType) defer bldr.Release() bldr.AppendNull() result := bldr.NewDictionaryArray() defer result.Release() dict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader("[]")) defer dict.Release() intarr, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[null]")) defer intarr.Release() expected := array.NewDictionaryArray(dictType, intarr, dict) defer expected.Release() assert.True(t, array.Equal(expected, result)) } func TestStringDictionaryBuilderDelta(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: arrow.BinaryTypes.String} bldr := array.NewDictionaryBuilder(mem, dictType) defer bldr.Release() builder := bldr.(*array.BinaryDictionaryBuilder) assert.NoError(t, builder.AppendString("test")) assert.NoError(t, builder.AppendString("test2")) assert.NoError(t, builder.AppendString("test")) result := bldr.NewDictionaryArray() defer result.Release() exdict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["test", "test2"]`)) defer exdict.Release() exint, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0]")) defer exint.Release() assert.True(t, arrow.TypeEqual(dictType, result.DataType())) expected := array.NewDictionaryArray(dictType, exint, exdict) defer expected.Release() assert.True(t, array.Equal(expected, result)) assert.NoError(t, builder.AppendString("test2")) assert.NoError(t, builder.AppendString("test3")) assert.NoError(t, builder.AppendString("test2")) indices, delta, err := builder.NewDelta() assert.NoError(t, err) defer indices.Release() defer delta.Release() exdelta, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["test3"]`)) defer exdelta.Release() exint, _, _ = array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[1, 2, 1]")) defer exint.Release() assert.True(t, array.Equal(exdelta, delta)) assert.True(t, array.Equal(exint, indices)) } func TestStringDictionaryBuilderBigDelta(t *testing.T) { const testlen = 2048 mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) dictType := &arrow.DictionaryType{IndexType: &arrow.Int16Type{}, ValueType: arrow.BinaryTypes.String} bldr := array.NewDictionaryBuilder(mem, dictType) defer bldr.Release() builder := bldr.(*array.BinaryDictionaryBuilder) strbldr := array.NewStringBuilder(mem) defer strbldr.Release() intbldr := array.NewInt16Builder(mem) defer intbldr.Release() for idx := int16(0); idx < testlen; idx++ { var b strings.Builder b.WriteString("test") fmt.Fprint(&b, idx) val := b.String() assert.NoError(t, builder.AppendString(val)) strbldr.Append(val) intbldr.Append(idx) } result := bldr.NewDictionaryArray() defer result.Release() strarr := strbldr.NewStringArray() defer strarr.Release() intarr := intbldr.NewInt16Array() defer intarr.Release() expected := array.NewDictionaryArray(dictType, intarr, strarr) defer expected.Release() assert.True(t, array.Equal(expected, result)) strbldr2 := array.NewStringBuilder(mem) defer strbldr2.Release() intbldr2 := array.NewInt16Builder(mem) defer intbldr2.Release() for idx := int16(0); idx < testlen; idx++ { builder.AppendString("test1") intbldr2.Append(1) } for idx := int16(0); idx < testlen; idx++ { builder.AppendString("test_new_value1") intbldr2.Append(testlen) } strbldr2.Append("test_new_value1") indices2, delta2, err := bldr.NewDelta() assert.NoError(t, err) defer indices2.Release() defer delta2.Release() strarr2 := strbldr2.NewStringArray() defer strarr2.Release() intarr2 := intbldr2.NewInt16Array() defer intarr2.Release() assert.True(t, array.Equal(intarr2, indices2)) assert.True(t, array.Equal(strarr2, delta2)) strbldr3 := array.NewStringBuilder(mem) defer strbldr3.Release() intbldr3 := array.NewInt16Builder(mem) defer intbldr3.Release() for idx := int16(0); idx < testlen; idx++ { assert.NoError(t, builder.AppendString("test2")) intbldr3.Append(2) } for idx := int16(0); idx < testlen; idx++ { assert.NoError(t, builder.AppendString("test_new_value2")) intbldr3.Append(testlen + 1) } strbldr3.Append("test_new_value2") indices3, delta3, err := bldr.NewDelta() assert.NoError(t, err) defer indices3.Release() defer delta3.Release() strarr3 := strbldr3.NewStringArray() defer strarr3.Release() intarr3 := intbldr3.NewInt16Array() defer intarr3.Release() assert.True(t, array.Equal(intarr3, indices3)) assert.True(t, array.Equal(strarr3, delta3)) } func TestStringDictionaryBuilderIsNull(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: arrow.BinaryTypes.String} bldr := array.NewDictionaryBuilder(mem, dictType) defer bldr.Release() builder := bldr.(*array.BinaryDictionaryBuilder) assert.NoError(t, builder.AppendString("test")) builder.AppendNull() assert.NoError(t, builder.AppendString("test2")) assert.NoError(t, builder.AppendString("test")) assert.False(t, bldr.IsNull(0)) assert.True(t, bldr.IsNull(1)) assert.False(t, bldr.IsNull(2)) assert.False(t, bldr.IsNull(3)) } func TestFixedSizeBinaryDictionaryBuilder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: &arrow.FixedSizeBinaryType{ByteWidth: 4}} bldr := array.NewDictionaryBuilder(mem, dictType) defer bldr.Release() builder := bldr.(*array.FixedSizeBinaryDictionaryBuilder) test := []byte{12, 12, 11, 12} test2 := []byte{12, 12, 11, 11} assert.NoError(t, builder.Append(test)) assert.NoError(t, builder.Append(test2)) assert.NoError(t, builder.Append(test)) result := builder.NewDictionaryArray() defer result.Release() fsbBldr := array.NewFixedSizeBinaryBuilder(mem, dictType.ValueType.(*arrow.FixedSizeBinaryType)) defer fsbBldr.Release() fsbBldr.Append(test) fsbBldr.Append(test2) fsbArr := fsbBldr.NewFixedSizeBinaryArray() defer fsbArr.Release() intbldr := array.NewInt8Builder(mem) defer intbldr.Release() intbldr.AppendValues([]int8{0, 1, 0}, nil) intArr := intbldr.NewInt8Array() defer intArr.Release() expected := array.NewDictionaryArray(dictType, intArr, fsbArr) defer expected.Release() assert.True(t, array.Equal(expected, result)) } func TestFixedSizeBinaryDictionaryBuilderInit(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) fsbBldr := array.NewFixedSizeBinaryBuilder(mem, &arrow.FixedSizeBinaryType{ByteWidth: 4}) defer fsbBldr.Release() test, test2 := []byte("abcd"), []byte("wxyz") fsbBldr.AppendValues([][]byte{test, test2}, nil) dictArr := fsbBldr.NewFixedSizeBinaryArray() defer dictArr.Release() dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: dictArr.DataType()} bldr := array.NewDictionaryBuilderWithDict(mem, dictType, dictArr) defer bldr.Release() builder := bldr.(*array.FixedSizeBinaryDictionaryBuilder) assert.NoError(t, builder.Append(test)) assert.NoError(t, builder.Append(test2)) assert.NoError(t, builder.Append(test)) result := builder.NewDictionaryArray() defer result.Release() indices, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0]")) defer indices.Release() expected := array.NewDictionaryArray(dictType, indices, dictArr) defer expected.Release() assert.True(t, array.Equal(expected, result)) } func TestFixedSizeBinaryDictionaryBuilderMakeBuilder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) fsbBldr := array.NewFixedSizeBinaryBuilder(mem, &arrow.FixedSizeBinaryType{ByteWidth: 4}) defer fsbBldr.Release() test, test2 := []byte("abcd"), []byte("wxyz") fsbBldr.AppendValues([][]byte{test, test2}, nil) dictArr := fsbBldr.NewFixedSizeBinaryArray() defer dictArr.Release() dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: dictArr.DataType()} bldr := array.NewBuilder(mem, dictType) defer bldr.Release() builder := bldr.(*array.FixedSizeBinaryDictionaryBuilder) assert.NoError(t, builder.Append(test)) assert.NoError(t, builder.Append(test2)) assert.NoError(t, builder.Append(test)) result := builder.NewDictionaryArray() defer result.Release() indices, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0]")) defer indices.Release() expected := array.NewDictionaryArray(dictType, indices, dictArr) defer expected.Release() assert.True(t, array.Equal(expected, result)) } func TestFixedSizeBinaryDictionaryBuilderDeltaDictionary(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: &arrow.FixedSizeBinaryType{ByteWidth: 4}} bldr := array.NewDictionaryBuilder(mem, dictType) defer bldr.Release() builder := bldr.(*array.FixedSizeBinaryDictionaryBuilder) test := []byte{12, 12, 11, 12} test2 := []byte{12, 12, 11, 11} test3 := []byte{12, 12, 11, 10} assert.NoError(t, builder.Append(test)) assert.NoError(t, builder.Append(test2)) assert.NoError(t, builder.Append(test)) result1 := bldr.NewDictionaryArray() defer result1.Release() fsbBuilder := array.NewFixedSizeBinaryBuilder(mem, dictType.ValueType.(*arrow.FixedSizeBinaryType)) defer fsbBuilder.Release() fsbBuilder.AppendValues([][]byte{test, test2}, nil) fsbArr1 := fsbBuilder.NewFixedSizeBinaryArray() defer fsbArr1.Release() intBuilder := array.NewInt8Builder(mem) defer intBuilder.Release() intBuilder.AppendValues([]int8{0, 1, 0}, nil) intArr1 := intBuilder.NewInt8Array() defer intArr1.Release() expected := array.NewDictionaryArray(dictType, intArr1, fsbArr1) defer expected.Release() assert.True(t, array.Equal(expected, result1)) assert.NoError(t, builder.Append(test)) assert.NoError(t, builder.Append(test2)) assert.NoError(t, builder.Append(test3)) indices2, delta2, err := builder.NewDelta() assert.NoError(t, err) defer indices2.Release() defer delta2.Release() fsbBuilder.Append(test3) fsbArr2 := fsbBuilder.NewFixedSizeBinaryArray() defer fsbArr2.Release() intBuilder.AppendValues([]int8{0, 1, 2}, nil) intArr2 := intBuilder.NewInt8Array() defer intArr2.Release() assert.True(t, array.Equal(intArr2, indices2)) assert.True(t, array.Equal(fsbArr2, delta2)) } func TestFixedSizeBinaryDictionaryStringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: &arrow.FixedSizeBinaryType{ByteWidth: 4}} b := array.NewDictionaryBuilder(mem, dictType) defer b.Release() builder := b.(*array.FixedSizeBinaryDictionaryBuilder) test := []byte{12, 12, 11, 12} test2 := []byte{12, 12, 11, 11} assert.NoError(t, builder.Append(test)) assert.NoError(t, builder.Append(test2)) assert.NoError(t, builder.Append(test)) arr := builder.NewDictionaryArray() defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewDictionaryBuilder(mem, dictType) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.Dictionary) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } func TestDecimal128DictionaryBuilderBasic(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) test := []decimal128.Num{decimal128.FromI64(12), decimal128.FromI64(12), decimal128.FromI64(11), decimal128.FromI64(12)} dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: &arrow.Decimal128Type{Precision: 2, Scale: 0}} bldr := array.NewDictionaryBuilder(mem, dictType) defer bldr.Release() builder := bldr.(*array.Decimal128DictionaryBuilder) for _, v := range test { assert.NoError(t, builder.Append(v)) } result := bldr.NewDictionaryArray() defer result.Release() indices, _, _ := array.FromJSON(mem, dictType.IndexType, strings.NewReader("[0, 0, 1, 0]")) defer indices.Release() dict, _, _ := array.FromJSON(mem, dictType.ValueType, strings.NewReader("[12, 11]")) defer dict.Release() expected := array.NewDictionaryArray(dictType, indices, dict) defer expected.Release() assert.True(t, array.ApproxEqual(expected, result)) } func TestDecimal256DictionaryBuilderBasic(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) test := []decimal256.Num{decimal256.FromI64(12), decimal256.FromI64(12), decimal256.FromI64(11), decimal256.FromI64(12)} dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: &arrow.Decimal256Type{Precision: 2, Scale: 0}} bldr := array.NewDictionaryBuilder(mem, dictType) defer bldr.Release() builder := bldr.(*array.Decimal256DictionaryBuilder) for _, v := range test { assert.NoError(t, builder.Append(v)) } result := bldr.NewDictionaryArray() defer result.Release() indices, _, _ := array.FromJSON(mem, dictType.IndexType, strings.NewReader("[0, 0, 1, 0]")) defer indices.Release() dict, _, _ := array.FromJSON(mem, dictType.ValueType, strings.NewReader("[12, 11]")) defer dict.Release() expected := array.NewDictionaryArray(dictType, indices, dict) defer expected.Release() assert.True(t, array.ApproxEqual(expected, result)) } func TestNullDictionaryBuilderBasic(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: arrow.Null} bldr := array.NewBuilder(mem, dictType) defer bldr.Release() builder := bldr.(*array.NullDictionaryBuilder) builder.AppendNulls(3) assert.Equal(t, 3, builder.Len()) assert.Equal(t, 3, builder.NullN()) nullarr, _, _ := array.FromJSON(mem, arrow.Null, strings.NewReader("[null, null, null]")) defer nullarr.Release() assert.NoError(t, builder.AppendArray(nullarr)) assert.Equal(t, 6, bldr.Len()) assert.Equal(t, 6, bldr.NullN()) result := builder.NewDictionaryArray() defer result.Release() assert.Equal(t, 6, result.Len()) assert.Equal(t, 6, result.NullN()) } func TestDictionaryEquals(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) var ( isValid = []bool{true, true, false, true, true, true} dict, dict2 arrow.Array indices, indices2, indices3 arrow.Array ) dict, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["foo", "bar", "baz"]`)) defer dict.Release() dictType := &arrow.DictionaryType{IndexType: &arrow.Uint16Type{}, ValueType: arrow.BinaryTypes.String} dict2, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["foo", "bar", "baz", "qux"]`)) defer dict2.Release() dictType2 := &arrow.DictionaryType{IndexType: &arrow.Uint16Type{}, ValueType: arrow.BinaryTypes.String} idxbuilder := array.NewUint16Builder(mem) defer idxbuilder.Release() idxbuilder.AppendValues([]uint16{1, 2, math.MaxUint16, 0, 2, 0}, isValid) indices = idxbuilder.NewArray() defer indices.Release() idxbuilder.AppendValues([]uint16{1, 2, 0, 0, 2, 0}, isValid) indices2 = idxbuilder.NewArray() defer indices2.Release() idxbuilder.AppendValues([]uint16{1, 1, 0, 0, 2, 0}, isValid) indices3 = idxbuilder.NewArray() defer indices3.Release() var ( arr = array.NewDictionaryArray(dictType, indices, dict) arr2 = array.NewDictionaryArray(dictType, indices2, dict) arr3 = array.NewDictionaryArray(dictType2, indices, dict2) arr4 = array.NewDictionaryArray(dictType, indices3, dict) ) defer func() { arr.Release() arr2.Release() arr3.Release() arr4.Release() }() assert.True(t, array.Equal(arr, arr)) // equal because the unequal index is masked by null assert.True(t, array.Equal(arr, arr2)) // unequal dictionaries assert.False(t, array.Equal(arr, arr3)) // unequal indices assert.False(t, array.Equal(arr, arr4)) assert.True(t, array.SliceEqual(arr, 3, 6, arr4, 3, 6)) assert.False(t, array.SliceEqual(arr, 1, 3, arr4, 1, 3)) sz := arr.Len() slice := array.NewSlice(arr, 2, int64(sz)) defer slice.Release() slice2 := array.NewSlice(arr, 2, int64(sz)) defer slice2.Release() assert.Equal(t, sz-2, slice.Len()) assert.True(t, array.Equal(slice, slice2)) assert.True(t, array.SliceEqual(arr, 2, int64(arr.Len()), slice, 0, int64(slice.Len()))) // chained slice slice2 = array.NewSlice(arr, 1, int64(arr.Len())) defer slice2.Release() slice2 = array.NewSlice(slice2, 1, int64(slice2.Len())) defer slice2.Release() assert.True(t, array.Equal(slice, slice2)) slice = array.NewSlice(arr, 1, 4) defer slice.Release() slice2 = array.NewSlice(arr, 1, 4) defer slice2.Release() assert.Equal(t, 3, slice.Len()) assert.True(t, array.Equal(slice, slice2)) assert.True(t, array.SliceEqual(arr, 1, 4, slice, 0, int64(slice.Len()))) } func TestDictionaryIndexTypes(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) dictIndexTypes := []arrow.DataType{ arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Uint8, arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Uint32, arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Uint64, } for _, indextyp := range dictIndexTypes { t.Run(indextyp.Name(), func(t *testing.T) { scope := memory.NewCheckedAllocatorScope(mem) defer scope.CheckSize(t) dictType := &arrow.DictionaryType{IndexType: indextyp, ValueType: arrow.BinaryTypes.String} bldr := array.NewDictionaryBuilder(mem, dictType) defer bldr.Release() builder := bldr.(*array.BinaryDictionaryBuilder) builder.AppendString("foo") builder.AppendString("bar") builder.AppendString("foo") builder.AppendString("baz") builder.Append(nil) assert.Equal(t, 5, builder.Len()) assert.Equal(t, 1, builder.NullN()) result := builder.NewDictionaryArray() defer result.Release() expectedIndices, _, _ := array.FromJSON(mem, indextyp, strings.NewReader("[0, 1, 0, 2, null]")) defer expectedIndices.Release() assert.True(t, array.Equal(expectedIndices, result.Indices())) }) } } func TestDictionaryFromArrays(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) dict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["foo", "bar", "baz"]`)) defer dict.Release() dictIndexTypes := []arrow.DataType{ arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Uint8, arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Uint32, arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Uint64, } for _, indextyp := range dictIndexTypes { t.Run(indextyp.Name(), func(t *testing.T) { scope := memory.NewCheckedAllocatorScope(mem) defer scope.CheckSize(t) dictType := &arrow.DictionaryType{IndexType: indextyp, ValueType: arrow.BinaryTypes.String} indices1, _, _ := array.FromJSON(mem, indextyp, strings.NewReader("[1, 2, 0, 0, 2, 0]")) defer indices1.Release() indices2, _, _ := array.FromJSON(mem, indextyp, strings.NewReader("[1, 2, 0, 3, 2, 0]")) defer indices2.Release() arr1, err := array.NewValidatedDictionaryArray(dictType, indices1, dict) assert.NoError(t, err) defer arr1.Release() _, err = array.NewValidatedDictionaryArray(dictType, indices2, dict) assert.Error(t, err) switch indextyp.ID() { case arrow.INT8, arrow.INT16, arrow.INT32, arrow.INT64: indices3, _, _ := array.FromJSON(mem, indextyp, strings.NewReader("[1, 2, 0, null, 2, 0]")) defer indices3.Release() bitutil.ClearBit(indices3.Data().Buffers()[0].Bytes(), 2) arr3, err := array.NewValidatedDictionaryArray(dictType, indices3, dict) assert.NoError(t, err) defer arr3.Release() } indices4, _, _ := array.FromJSON(mem, indextyp, strings.NewReader("[1, 2, null, 3, 2, 0]")) defer indices4.Release() _, err = array.NewValidatedDictionaryArray(dictType, indices4, dict) assert.Error(t, err) diffIndexType := arrow.PrimitiveTypes.Int8 if indextyp.ID() == arrow.INT8 { diffIndexType = arrow.PrimitiveTypes.Uint8 } _, err = array.NewValidatedDictionaryArray(&arrow.DictionaryType{IndexType: diffIndexType, ValueType: arrow.BinaryTypes.String}, indices4, dict) assert.Error(t, err) }) } } func TestListOfDictionary(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) rootBuilder := array.NewBuilder(mem, arrow.ListOf(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int16, ValueType: arrow.BinaryTypes.String})) defer rootBuilder.Release() listBldr := rootBuilder.(*array.ListBuilder) dictBldr := listBldr.ValueBuilder().(*array.BinaryDictionaryBuilder) listBldr.Append(true) expected := []string{} for _, a := range []byte("abc") { for _, d := range []byte("def") { for _, g := range []byte("ghi") { for _, j := range []byte("jkl") { for _, m := range []byte("mno") { for _, p := range []byte("pqr") { if a+d+g+j+m+p%16 == 0 { listBldr.Append(true) } str := string([]byte{a, d, g, j, m, p}) dictBldr.AppendString(str) expected = append(expected, str) } } } } } } strbldr := array.NewStringBuilder(mem) defer strbldr.Release() strbldr.AppendValues(expected, nil) expectedDict := strbldr.NewStringArray() defer expectedDict.Release() arr := rootBuilder.NewArray() defer arr.Release() actualDict := arr.(*array.List).ListValues().(*array.Dictionary) assert.True(t, array.Equal(expectedDict, actualDict.Dictionary())) } func TestDictionaryCanCompareIndices(t *testing.T) { makeDict := func(mem memory.Allocator, idxType, valueType arrow.DataType, dictJSON string) *array.Dictionary { indices, _, _ := array.FromJSON(mem, idxType, strings.NewReader("[]")) defer indices.Release() dict, _, _ := array.FromJSON(mem, valueType, strings.NewReader(dictJSON)) defer dict.Release() out, _ := array.NewValidatedDictionaryArray(&arrow.DictionaryType{IndexType: idxType, ValueType: valueType}, indices, dict) return out } compareSwap := func(t *testing.T, l, r *array.Dictionary, expected bool) { assert.Equalf(t, expected, l.CanCompareIndices(r), "left: %s\nright: %s\n", l, r) assert.Equalf(t, expected, r.CanCompareIndices(l), "left: %s\nright: %s\n", r, l) } mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) t.Run("same", func(t *testing.T) { arr := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "bar"]`) defer arr.Release() same := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "bar"]`) defer same.Release() compareSwap(t, arr, same, true) }) t.Run("prefix dict", func(t *testing.T) { arr := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "bar", "quux"]`) defer arr.Release() prefixDict := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "bar"]`) defer prefixDict.Release() compareSwap(t, arr, prefixDict, true) }) t.Run("indices need cast", func(t *testing.T) { arr := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "bar"]`) defer arr.Release() needcast := makeDict(mem, arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String, `["foo", "bar"]`) defer needcast.Release() compareSwap(t, arr, needcast, false) }) t.Run("non prefix", func(t *testing.T) { arr := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "bar", "quux"]`) defer arr.Release() nonPrefix := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "blink"]`) defer nonPrefix.Release() compareSwap(t, arr, nonPrefix, false) }) } func TestDictionaryGetValueIndex(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) indicesJson := "[5, 0, 1, 3, 2, 4]" indices64, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(indicesJson)) defer indices64.Release() dict, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader("[10, 20, 30, 40, 50, 60]")) defer dict.Release() dictIndexTypes := []arrow.DataType{ arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Uint8, arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Uint32, arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Uint64, } i64Index := indices64.(*array.Int64) for _, idxt := range dictIndexTypes { t.Run(idxt.Name(), func(t *testing.T) { indices, _, _ := array.FromJSON(mem, idxt, strings.NewReader(indicesJson)) defer indices.Release() dictType := &arrow.DictionaryType{IndexType: idxt, ValueType: arrow.PrimitiveTypes.Int32} dictArr := array.NewDictionaryArray(dictType, indices, dict) defer dictArr.Release() const offset = 1 slicedDictArr := array.NewSlice(dictArr, offset, int64(dictArr.Len())) defer slicedDictArr.Release() assert.EqualValues(t, "10", slicedDictArr.(*array.Dictionary).ValueStr(0)) for i := 0; i < indices.Len(); i++ { assert.EqualValues(t, i64Index.Value(i), dictArr.GetValueIndex(i)) if i < slicedDictArr.Len() { assert.EqualValues(t, i64Index.Value(i+offset), slicedDictArr.(*array.Dictionary).GetValueIndex(i)) } } }) } } func checkTransposeMap(t *testing.T, b *memory.Buffer, exp []int32) bool { got := arrow.Int32Traits.CastFromBytes(b.Bytes()) return assert.Equal(t, exp, got) } func TestDictionaryUnifierNumeric(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) dictType := arrow.PrimitiveTypes.Int64 d1, _, err := array.FromJSON(mem, dictType, strings.NewReader(`[3, 4, 7]`)) require.NoError(t, err) d2, _, err := array.FromJSON(mem, dictType, strings.NewReader(`[1, 7, 4, 8]`)) require.NoError(t, err) d3, _, err := array.FromJSON(mem, dictType, strings.NewReader(`[1, -200]`)) require.NoError(t, err) expected := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: dictType} expectedDict, _, err := array.FromJSON(mem, dictType, strings.NewReader(`[3, 4, 7, 1, 8, -200]`)) require.NoError(t, err) defer func() { d1.Release() d2.Release() d3.Release() expectedDict.Release() }() unifier, err := array.NewDictionaryUnifier(mem, dictType) assert.NoError(t, err) defer unifier.Release() assert.NoError(t, unifier.Unify(d1)) assert.NoError(t, unifier.Unify(d2)) assert.NoError(t, unifier.Unify(d3)) invalid, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[1, -200]`)) defer invalid.Release() assert.EqualError(t, unifier.Unify(invalid), "dictionary type different from unifier: int32, expected: int64") outType, outDict, err := unifier.GetResult() assert.NoError(t, err) defer outDict.Release() assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected) assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict) b1, err := unifier.UnifyAndTranspose(d1) assert.NoError(t, err) b2, err := unifier.UnifyAndTranspose(d2) assert.NoError(t, err) b3, err := unifier.UnifyAndTranspose(d3) assert.NoError(t, err) outType, outDict, err = unifier.GetResult() assert.NoError(t, err) defer func() { outDict.Release() b1.Release() b2.Release() b3.Release() }() assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected) assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict) checkTransposeMap(t, b1, []int32{0, 1, 2}) checkTransposeMap(t, b2, []int32{3, 2, 1, 4}) checkTransposeMap(t, b3, []int32{3, 5}) } func TestDictionaryUnifierString(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) dictType := arrow.BinaryTypes.String d1, _, err := array.FromJSON(mem, dictType, strings.NewReader(`["foo", "bar"]`)) require.NoError(t, err) defer d1.Release() d2, _, err := array.FromJSON(mem, dictType, strings.NewReader(`["quux", "foo"]`)) require.NoError(t, err) defer d2.Release() expected := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: dictType} expectedDict, _, _ := array.FromJSON(mem, dictType, strings.NewReader(`["foo", "bar", "quux"]`)) defer expectedDict.Release() unifier, err := array.NewDictionaryUnifier(mem, dictType) assert.NoError(t, err) defer unifier.Release() assert.NoError(t, unifier.Unify(d1)) assert.NoError(t, unifier.Unify(d2)) outType, outDict, err := unifier.GetResult() assert.NoError(t, err) defer outDict.Release() assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected) assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict) b1, err := unifier.UnifyAndTranspose(d1) assert.NoError(t, err) b2, err := unifier.UnifyAndTranspose(d2) assert.NoError(t, err) outType, outDict, err = unifier.GetResult() assert.NoError(t, err) defer func() { outDict.Release() b1.Release() b2.Release() }() assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected) assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict) checkTransposeMap(t, b1, []int32{0, 1}) checkTransposeMap(t, b2, []int32{2, 0}) } func TestDictionaryUnifierBinary(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) dictType := arrow.BinaryTypes.Binary d1, _, err := array.FromJSON(mem, dictType, strings.NewReader(`["Zm9vCg==", "YmFyCg=="]`)) // base64("foo\n"), base64("bar\n") require.NoError(t, err) defer d1.Release() d2, _, err := array.FromJSON(mem, dictType, strings.NewReader(`["cXV1eAo=", "Zm9vCg=="]`)) // base64("quux\n"), base64("foo\n") require.NoError(t, err) defer d2.Release() expected := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: dictType} expectedDict, _, _ := array.FromJSON(mem, dictType, strings.NewReader(`["Zm9vCg==", "YmFyCg==", "cXV1eAo="]`)) defer expectedDict.Release() unifier := array.NewBinaryDictionaryUnifier(mem) defer unifier.Release() assert.NoError(t, unifier.Unify(d1)) assert.NoError(t, unifier.Unify(d2)) outType, outDict, err := unifier.GetResult() assert.NoError(t, err) defer outDict.Release() assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected) assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict) b1, err := unifier.UnifyAndTranspose(d1) assert.NoError(t, err) b2, err := unifier.UnifyAndTranspose(d2) assert.NoError(t, err) outType, outDict, err = unifier.GetResult() assert.NoError(t, err) defer func() { outDict.Release() b1.Release() b2.Release() }() assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected) assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict) checkTransposeMap(t, b1, []int32{0, 1}) checkTransposeMap(t, b2, []int32{2, 0}) } func TestDictionaryUnifierFixedSizeBinary(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) dictType := &arrow.FixedSizeBinaryType{ByteWidth: 3} data := memory.NewBufferBytes([]byte(`foobarbazqux`)) fsbData := array.NewData(dictType, 2, []*memory.Buffer{nil, memory.SliceBuffer(data, 0, 6)}, nil, 0, 0) defer fsbData.Release() d1 := array.NewFixedSizeBinaryData(fsbData) fsbData = array.NewData(dictType, 3, []*memory.Buffer{nil, memory.SliceBuffer(data, 3, 9)}, nil, 0, 0) defer fsbData.Release() d2 := array.NewFixedSizeBinaryData(fsbData) fsbData = array.NewData(dictType, 4, []*memory.Buffer{nil, data}, nil, 0, 0) defer fsbData.Release() expectedDict := array.NewFixedSizeBinaryData(fsbData) expected := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: dictType} unifier, err := array.NewDictionaryUnifier(mem, dictType) assert.NoError(t, err) defer func() { d1.Release() d2.Release() expectedDict.Release() unifier.Release() }() assert.NoError(t, unifier.Unify(d1)) assert.NoError(t, unifier.Unify(d2)) outType, outDict, err := unifier.GetResult() assert.NoError(t, err) defer outDict.Release() assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected) assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict) b1, err := unifier.UnifyAndTranspose(d1) assert.NoError(t, err) b2, err := unifier.UnifyAndTranspose(d2) assert.NoError(t, err) outType, outDict, err = unifier.GetResult() assert.NoError(t, err) defer func() { outDict.Release() b1.Release() b2.Release() }() assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected) assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict) checkTransposeMap(t, b1, []int32{0, 1}) checkTransposeMap(t, b2, []int32{1, 2, 3}) } func TestDictionaryUnifierLarge(t *testing.T) { // unifying larger dictionaries should choose the right index type mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) bldr := array.NewInt32Builder(mem) defer bldr.Release() bldr.Reserve(120) for i := int32(0); i < 120; i++ { bldr.UnsafeAppend(i) } d1 := bldr.NewInt32Array() defer d1.Release() assert.EqualValues(t, 120, d1.Len()) bldr.Reserve(30) for i := int32(110); i < 140; i++ { bldr.UnsafeAppend(i) } d2 := bldr.NewInt32Array() defer d2.Release() assert.EqualValues(t, 30, d2.Len()) bldr.Reserve(140) for i := int32(0); i < 140; i++ { bldr.UnsafeAppend(i) } expectedDict := bldr.NewInt32Array() defer expectedDict.Release() assert.EqualValues(t, 140, expectedDict.Len()) // int8 would be too narrow to hold all the values expected := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int16, ValueType: arrow.PrimitiveTypes.Int32} unifier, err := array.NewDictionaryUnifier(mem, arrow.PrimitiveTypes.Int32) assert.NoError(t, err) defer unifier.Release() assert.NoError(t, unifier.Unify(d1)) assert.NoError(t, unifier.Unify(d2)) outType, outDict, err := unifier.GetResult() assert.NoError(t, err) defer outDict.Release() assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected) assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict) } func checkDictionaryArray(t *testing.T, arr, expectedVals, expectedIndices arrow.Array) bool { require.IsType(t, (*array.Dictionary)(nil), arr) dictArr := arr.(*array.Dictionary) ret := true ret = ret && assert.Truef(t, array.Equal(expectedVals, dictArr.Dictionary()), "got: %s, expected: %s", dictArr.Dictionary(), expectedVals) return ret && assert.Truef(t, array.Equal(expectedIndices, dictArr.Indices()), "got: %s, expected: %s", dictArr.Indices(), expectedIndices) } func TestDictionaryUnifierSimpleChunkedArray(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) dictType := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.BinaryTypes.String} chunk1, _, _ := array.FromJSON(mem, dictType, strings.NewReader(`["ab", "cd", null, "cd"]`)) chunk2, _, _ := array.FromJSON(mem, dictType, strings.NewReader(`["ef", "cd", "ef"]`)) chunk3, _, _ := array.FromJSON(mem, dictType, strings.NewReader(`["ef", "ab", null, "ab"]`)) chunk4, _, _ := array.FromJSON(mem, dictType, strings.NewReader(`[]`)) chunked := arrow.NewChunked(dictType, []arrow.Array{chunk1, chunk2, chunk3, chunk4}) defer func() { chunk1.Release() chunk2.Release() chunk3.Release() chunk4.Release() chunked.Release() }() unified, err := array.UnifyChunkedDicts(mem, chunked) assert.NoError(t, err) defer unified.Release() assert.Len(t, unified.Chunks(), 4) expectedDict, _, _ := array.FromJSON(mem, dictType.ValueType, strings.NewReader(`["ab", "cd", "ef"]`)) defer expectedDict.Release() c1Indices, _, _ := array.FromJSON(mem, dictType.IndexType, strings.NewReader(`[0, 1, null, 1]`)) defer c1Indices.Release() c2Indices, _, _ := array.FromJSON(mem, dictType.IndexType, strings.NewReader(`[2, 1, 2]`)) defer c2Indices.Release() c3Indices, _, _ := array.FromJSON(mem, dictType.IndexType, strings.NewReader(`[2, 0, null, 0]`)) defer c3Indices.Release() c4Indices, _, _ := array.FromJSON(mem, dictType.IndexType, strings.NewReader(`[]`)) defer c4Indices.Release() checkDictionaryArray(t, unified.Chunk(0), expectedDict, c1Indices) checkDictionaryArray(t, unified.Chunk(1), expectedDict, c2Indices) checkDictionaryArray(t, unified.Chunk(2), expectedDict, c3Indices) checkDictionaryArray(t, unified.Chunk(3), expectedDict, c4Indices) } func TestDictionaryUnifierChunkedArrayZeroChunks(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) dictType := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.BinaryTypes.String} chunked := arrow.NewChunked(dictType, []arrow.Array{}) unified, err := array.UnifyChunkedDicts(mem, chunked) assert.NoError(t, err) assert.True(t, array.ChunkedEqual(unified, chunked)) } func TestDictionaryUnifierChunkedArrayOneChunk(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) dictType := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.BinaryTypes.String} chunk1, _, _ := array.FromJSON(mem, dictType, strings.NewReader(`["ab", "cd", null, "cd"]`)) defer chunk1.Release() chunked := arrow.NewChunked(dictType, []arrow.Array{chunk1}) defer chunked.Release() unified, err := array.UnifyChunkedDicts(mem, chunked) assert.NoError(t, err) defer unified.Release() assert.True(t, array.ChunkedEqual(unified, chunked)) assert.Same(t, unified, chunked) } func TestDictionaryUnifierChunkedArrayNoDict(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) typ := arrow.PrimitiveTypes.Int8 chunk1, _, _ := array.FromJSON(mem, typ, strings.NewReader(`[1, 1, 2, 3]`)) defer chunk1.Release() chunk2, _, _ := array.FromJSON(mem, typ, strings.NewReader(`[5, 8, 13]`)) defer chunk2.Release() chunked := arrow.NewChunked(typ, []arrow.Array{chunk1, chunk2}) defer chunked.Release() unified, err := array.UnifyChunkedDicts(mem, chunked) assert.NoError(t, err) defer unified.Release() assert.True(t, array.ChunkedEqual(unified, chunked)) assert.Same(t, unified, chunked) } func TestDictionaryUnifierChunkedArrayNested(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) typ := arrow.ListOf(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int16, ValueType: arrow.BinaryTypes.String}) chunk1, _, err := array.FromJSON(mem, typ, strings.NewReader(`[["ab", "cd"], ["cd"]]`)) assert.NoError(t, err) // defer chunk1.Release() chunk2, _, err := array.FromJSON(mem, typ, strings.NewReader(`[[], ["ef", "cd", "ef"]]`)) assert.NoError(t, err) // defer chunk2.Release() chunked := arrow.NewChunked(typ, []arrow.Array{chunk1, chunk2}) // defer chunked.Release() unified, err := array.UnifyChunkedDicts(mem, chunked) assert.NoError(t, err) // defer unified.Release() assert.Len(t, unified.Chunks(), 2) expectedDict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["ab", "cd", "ef"]`)) // defer expectedDict.Release() unified1 := unified.Chunk(0).(*array.List) assert.Equal(t, []int32{0, 2, 3}, unified1.Offsets()) expectedIndices1, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int16, strings.NewReader(`[0, 1, 1]`)) // defer expectedIndices1.Release() checkDictionaryArray(t, unified1.ListValues(), expectedDict, expectedIndices1) unified2 := unified.Chunk(1).(*array.List) assert.Equal(t, []int32{0, 0, 3}, unified2.Offsets()) expectedIndices2, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int16, strings.NewReader(`[2, 1, 2]`)) // defer expectedIndices2.Release() checkDictionaryArray(t, unified2.ListValues(), expectedDict, expectedIndices2) defer func() { expectedIndices1.Release() expectedIndices2.Release() expectedDict.Release() unified.Release() chunked.Release() chunk2.Release() chunk1.Release() }() } func TestDictionaryUnifierChunkedArrayExtension(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) dt := types.NewDictExtensionType() chunk1, _, err := array.FromJSON(mem, dt, strings.NewReader(`["ab", null, "cd", "ab"]`)) assert.NoError(t, err) defer chunk1.Release() chunk2, _, err := array.FromJSON(mem, dt, strings.NewReader(`["ef", "ab", "ab"]`)) assert.NoError(t, err) defer chunk2.Release() chunked := arrow.NewChunked(dt, []arrow.Array{chunk1, chunk2}) defer chunked.Release() unified, err := array.UnifyChunkedDicts(mem, chunked) assert.NoError(t, err) defer unified.Release() assert.Len(t, unified.Chunks(), 2) expectedDict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["ab", "cd", "ef"]`)) defer expectedDict.Release() unified1 := unified.Chunk(0).(array.ExtensionArray) assert.Truef(t, arrow.TypeEqual(dt, unified1.DataType()), "expected: %s, got: %s", dt, unified1.DataType()) indices, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[0, null, 1, 0]`)) defer indices.Release() checkDictionaryArray(t, unified1.Storage(), expectedDict, indices) unified2 := unified.Chunk(1).(array.ExtensionArray) assert.Truef(t, arrow.TypeEqual(dt, unified2.DataType()), "expected: %s, got: %s", dt, unified1.DataType()) indices, _, _ = array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[2, 0, 0]`)) defer indices.Release() checkDictionaryArray(t, unified2.Storage(), expectedDict, indices) } func TestDictionaryUnifierChunkedArrayNestedDict(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) innerType := arrow.ListOf(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint32, ValueType: arrow.BinaryTypes.String}) innerDict1, _, err := array.FromJSON(mem, innerType, strings.NewReader(`[["ab", "cd"], [], ["cd", null]]`)) assert.NoError(t, err) defer innerDict1.Release() indices1, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[2, 1, 0, 1, 2]`)) defer indices1.Release() chunk1 := array.NewDictionaryArray(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: innerType}, indices1, innerDict1) defer chunk1.Release() innerDict2, _, err := array.FromJSON(mem, innerType, strings.NewReader(`[["cd", "ef"], ["cd", null], []]`)) assert.NoError(t, err) defer innerDict2.Release() indices2, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[1, 2, 2, 0]`)) defer indices2.Release() chunk2 := array.NewDictionaryArray(&arrow.DictionaryType{IndexType: indices2.DataType(), ValueType: innerType}, indices2, innerDict2) defer chunk2.Release() chunked := arrow.NewChunked(chunk1.DataType(), []arrow.Array{chunk1, chunk2}) defer chunked.Release() unified, err := array.UnifyChunkedDicts(mem, chunked) assert.Nil(t, unified) assert.EqualError(t, err, "unimplemented dictionary value type, list, nullable>") } func TestDictionaryUnifierTableZeroColumns(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) schema := arrow.NewSchema([]arrow.Field{}, nil) table := array.NewTable(schema, []arrow.Column{}, 42) defer table.Release() unified, err := array.UnifyTableDicts(mem, table) assert.NoError(t, err) assert.True(t, schema.Equal(unified.Schema())) assert.EqualValues(t, 42, unified.NumRows()) assert.True(t, array.TableEqual(table, unified)) } func TestDictionaryAppendIndices(t *testing.T) { indexTypes := []arrow.DataType{ arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Uint8, arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Uint32, arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Uint64, } mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) dict, _, err := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["a", "b", "c", "d", "e", "f"]`)) require.NoError(t, err) defer dict.Release() indices := []int{3, 4, 0, 3, 1, 4, 4, 5} for _, typ := range indexTypes { t.Run(typ.String(), func(t *testing.T) { scoped := memory.NewCheckedAllocatorScope(mem) defer scoped.CheckSize(t) dictType := &arrow.DictionaryType{ IndexType: typ, ValueType: dict.DataType()} bldr := array.NewDictionaryBuilderWithDict(mem, dictType, dict) defer bldr.Release() bldr.AppendIndices(indices, nil) arr := bldr.NewDictionaryArray() defer arr.Release() arrIndices := arr.Indices() assert.EqualValues(t, len(indices), arr.Len()) assert.EqualValues(t, len(indices), arrIndices.Len()) assert.Equal(t, fmt.Sprint(indices), arrIndices.String()) }) } } type panicAllocator struct { n int paniced bool memory.Allocator } func (p *panicAllocator) Allocate(size int) []byte { if size > p.n { p.paniced = true panic("panic allocator") } return p.Allocator.Allocate(size) } func (p *panicAllocator) Reallocate(size int, b []byte) []byte { return p.Allocator.Reallocate(size, b) } func (p *panicAllocator) Free(b []byte) { p.Allocator.Free(b) } func TestBinaryDictionaryPanic(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) allocator := &panicAllocator{ n: 400, Allocator: mem, } expectedType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: arrow.BinaryTypes.String} bldr := array.NewDictionaryBuilder(allocator, expectedType) defer bldr.Release() bldr.AppendNull() allocator.n = 0 // force panic func() { defer func() { recover() }() bldr.NewArray() }() assert.True(t, allocator.paniced) } func BenchmarkBinaryDictionaryBuilder(b *testing.B) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(b, 0) dictType := &arrow.DictionaryType{IndexType: &arrow.Int32Type{}, ValueType: arrow.BinaryTypes.String} bldr := array.NewDictionaryBuilder(mem, dictType) defer bldr.Release() randString := func() string { return fmt.Sprintf("test-%d", rand.Intn(30)) } builder := bldr.(*array.BinaryDictionaryBuilder) for i := 0; i < b.N; i++ { assert.NoError(b, builder.AppendString(randString())) } } arrow-go-18.2.0/arrow/array/diff.go000066400000000000000000000250031476434502500170600ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "fmt" "strings" "github.com/apache/arrow-go/v18/arrow" ) // Edit represents one entry in the edit script to compare two arrays. type Edit struct { Insert bool RunLength int64 } // Edits is a slice of Edit structs that represents an edit script to compare two arrays. // When applied to the base array, it produces the target array. // Each element of "insert" determines whether an element was inserted into (true) // or deleted from (false) base. Each insertion or deletion is followed by a run of // elements which are unchanged from base to target; the length of this run is stored // in RunLength. (Note that the edit script begins and ends with a run of shared // elements but both fields of the struct must have the same length. To accommodate this // the first element of "insert" should be ignored.) // // For example for base "hlloo" and target "hello", the edit script would be // [ // // {"insert": false, "run_length": 1}, // leading run of length 1 ("h") // {"insert": true, "run_length": 3}, // insert("e") then a run of length 3 ("llo") // {"insert": false, "run_length": 0} // delete("o") then an empty run // // ] type Edits []Edit // String returns a simple string representation of the edit script. func (e Edits) String() string { return fmt.Sprintf("%v", []Edit(e)) } // UnifiedDiff returns a string representation of the diff of base and target in Unified Diff format. func (e Edits) UnifiedDiff(base, target arrow.Array) string { var s strings.Builder baseIndex := int64(0) targetIndex := int64(0) wrotePosition := false for i := 0; i < len(e); i++ { if i > 0 { if !wrotePosition { s.WriteString(fmt.Sprintf("@@ -%d, +%d @@\n", baseIndex, targetIndex)) wrotePosition = true } if e[i].Insert { s.WriteString(fmt.Sprintf("+%v\n", stringAt(target, targetIndex))) targetIndex++ } else { s.WriteString(fmt.Sprintf("-%v\n", stringAt(base, baseIndex))) baseIndex++ } } for j := int64(0); j < e[i].RunLength; j++ { baseIndex++ targetIndex++ wrotePosition = false } } return s.String() } func stringAt(arr arrow.Array, i int64) string { if arr.IsNull(int(i)) { return "null" } dt := arr.DataType() switch { case arrow.TypeEqual(dt, arrow.PrimitiveTypes.Float32): return fmt.Sprintf("%f", arr.(*Float32).Value(int(i))) case arrow.TypeEqual(dt, arrow.PrimitiveTypes.Float64): return fmt.Sprintf("%f", arr.(*Float64).Value(int(i))) case arrow.TypeEqual(dt, arrow.PrimitiveTypes.Date32): return arr.(*Date32).Value(int(i)).FormattedString() case arrow.TypeEqual(dt, arrow.PrimitiveTypes.Date64): return arr.(*Date64).Value(int(i)).FormattedString() case arrow.TypeEqual(dt, arrow.FixedWidthTypes.Timestamp_s): return arr.(*Timestamp).Value(int(i)).ToTime(arrow.Second).String() case arrow.TypeEqual(dt, arrow.FixedWidthTypes.Timestamp_ms): return arr.(*Timestamp).Value(int(i)).ToTime(arrow.Millisecond).String() case arrow.TypeEqual(dt, arrow.FixedWidthTypes.Timestamp_us): return arr.(*Timestamp).Value(int(i)).ToTime(arrow.Microsecond).String() case arrow.TypeEqual(dt, arrow.FixedWidthTypes.Timestamp_ns): return arr.(*Timestamp).Value(int(i)).ToTime(arrow.Nanosecond).String() } s := NewSlice(arr, i, i+1) defer s.Release() st, _ := s.MarshalJSON() return strings.Trim(string(st[1:len(st)-1]), "\n") } // Diff compares two arrays, returning an edit script which expresses the difference // between them. The edit script can be applied to the base array to produce the target. // 'base' is a baseline for comparison. // 'target' is an array of identical type to base whose elements differ from base's. func Diff(base, target arrow.Array) (edits Edits, err error) { if !arrow.TypeEqual(base.DataType(), target.DataType()) { return nil, fmt.Errorf("%w: only taking the diff of like-typed arrays is supported", arrow.ErrNotImplemented) } switch base.DataType().ID() { case arrow.EXTENSION: return Diff(base.(ExtensionArray).Storage(), target.(ExtensionArray).Storage()) case arrow.DICTIONARY: return nil, fmt.Errorf("%w: diffing arrays of type %s is not implemented", arrow.ErrNotImplemented, base.DataType()) case arrow.RUN_END_ENCODED: return nil, fmt.Errorf("%w: diffing arrays of type %s is not implemented", arrow.ErrNotImplemented, base.DataType()) } d := newQuadraticSpaceMyersDiff(base, target) return d.Diff() } // editPoint represents an intermediate state in the comparison of two arrays type editPoint struct { base int target int } type quadraticSpaceMyersDiff struct { base arrow.Array target arrow.Array finishIndex int editCount int endpointBase []int insert []bool baseBegin int targetBegin int baseEnd int targetEnd int } func newQuadraticSpaceMyersDiff(base, target arrow.Array) *quadraticSpaceMyersDiff { d := &quadraticSpaceMyersDiff{ base: base, target: target, finishIndex: -1, editCount: 0, endpointBase: []int{}, insert: []bool{}, baseBegin: 0, targetBegin: 0, baseEnd: base.Len(), targetEnd: target.Len(), } d.endpointBase = []int{d.extendFrom(editPoint{d.baseBegin, d.targetBegin}).base} if d.baseEnd-d.baseBegin == d.targetEnd-d.targetBegin && d.endpointBase[0] == d.baseEnd { // trivial case: base == target d.finishIndex = 0 } return d } func (d *quadraticSpaceMyersDiff) valuesEqual(baseIndex, targetIndex int) bool { baseNull := d.base.IsNull(baseIndex) targetNull := d.target.IsNull(targetIndex) if baseNull || targetNull { return baseNull && targetNull } return SliceEqual(d.base, int64(baseIndex), int64(baseIndex+1), d.target, int64(targetIndex), int64(targetIndex+1)) } // increment the position within base and target (the elements skipped in this way were // present in both sequences) func (d *quadraticSpaceMyersDiff) extendFrom(p editPoint) editPoint { for p.base != d.baseEnd && p.target != d.targetEnd { if !d.valuesEqual(p.base, p.target) { break } p.base++ p.target++ } return p } // increment the position within base (the element pointed to was deleted) // then extend maximally func (d *quadraticSpaceMyersDiff) deleteOne(p editPoint) editPoint { if p.base != d.baseEnd { p.base++ } return d.extendFrom(p) } // increment the position within target (the element pointed to was inserted) // then extend maximally func (d *quadraticSpaceMyersDiff) insertOne(p editPoint) editPoint { if p.target != d.targetEnd { p.target++ } return d.extendFrom(p) } // beginning of a range for storing per-edit state in endpointBase and insert func storageOffset(editCount int) int { return editCount * (editCount + 1) / 2 } // given edit_count and index, augment endpointBase[index] with the corresponding // position in target (which is only implicitly represented in editCount, index) func (d *quadraticSpaceMyersDiff) getEditPoint(editCount, index int) editPoint { insertionsMinusDeletions := 2*(index-storageOffset(editCount)) - editCount maximalBase := d.endpointBase[index] maximalTarget := min(d.targetBegin+((maximalBase-d.baseBegin)+insertionsMinusDeletions), d.targetEnd) return editPoint{maximalBase, maximalTarget} } func (d *quadraticSpaceMyersDiff) Next() { d.editCount++ if len(d.endpointBase) < storageOffset(d.editCount+1) { d.endpointBase = append(d.endpointBase, make([]int, storageOffset(d.editCount+1)-len(d.endpointBase))...) } if len(d.insert) < storageOffset(d.editCount+1) { d.insert = append(d.insert, make([]bool, storageOffset(d.editCount+1)-len(d.insert))...) } previousOffset := storageOffset(d.editCount - 1) currentOffset := storageOffset(d.editCount) // try deleting from base first for i, iOut := 0, 0; i < d.editCount; i, iOut = i+1, iOut+1 { previousEndpoint := d.getEditPoint(d.editCount-1, i+previousOffset) d.endpointBase[iOut+currentOffset] = d.deleteOne(previousEndpoint).base } // check if inserting from target could do better for i, iOut := 0, 1; i < d.editCount; i, iOut = i+1, iOut+1 { // retrieve the previously computed best endpoint for (editCount, iOut) // for comparison with the best endpoint achievable with an insertion endpointAfterDeletion := d.getEditPoint(d.editCount, iOut+currentOffset) previousEndpoint := d.getEditPoint(d.editCount-1, i+previousOffset) endpointAfterInsertion := d.insertOne(previousEndpoint) if endpointAfterInsertion.base-endpointAfterDeletion.base >= 0 { // insertion was more efficient; keep it and mark the insertion in insert d.insert[iOut+currentOffset] = true d.endpointBase[iOut+currentOffset] = endpointAfterInsertion.base } } finish := editPoint{d.baseEnd, d.targetEnd} for iOut := 0; iOut < d.editCount+1; iOut++ { if d.getEditPoint(d.editCount, iOut+currentOffset) == finish { d.finishIndex = iOut + currentOffset return } } } func (d *quadraticSpaceMyersDiff) Done() bool { return d.finishIndex != -1 } func (d *quadraticSpaceMyersDiff) GetEdits() (Edits, error) { if !d.Done() { panic("GetEdits called but Done() = false") } length := d.editCount + 1 edits := make(Edits, length) index := d.finishIndex endpoint := d.getEditPoint(d.editCount, d.finishIndex) for i := d.editCount; i > 0; i-- { insert := d.insert[index] edits[i].Insert = insert insertionsMinusDeletions := (endpoint.base - d.baseBegin) - (endpoint.target - d.targetBegin) if insert { insertionsMinusDeletions++ } else { insertionsMinusDeletions-- } index = (i-1-insertionsMinusDeletions)/2 + storageOffset(i-1) // endpoint of previous edit previous := d.getEditPoint(i-1, index) in := 0 if insert { in = 1 } edits[i].RunLength = int64(endpoint.base - previous.base - (1 - in)) endpoint = previous } edits[0].Insert = false edits[0].RunLength = int64(endpoint.base - d.baseBegin) return edits, nil } func (d *quadraticSpaceMyersDiff) Diff() (edits Edits, err error) { for !d.Done() { d.Next() } return d.GetEdits() } arrow-go-18.2.0/arrow/array/diff_test.go000066400000000000000000000525701476434502500201300ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "fmt" "math/rand" "reflect" "strings" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/extensions" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" ) type diffTestCase struct { dataType arrow.DataType baseJSON string targetJSON string wantInsert []bool wantRunLength []int64 } func (s *diffTestCase) check(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) base, _, err := array.FromJSON(mem, s.dataType, strings.NewReader(s.baseJSON)) if err != nil { t.Fatal(err) } defer base.Release() target, _, err := array.FromJSON(mem, s.dataType, strings.NewReader(s.targetJSON)) if err != nil { t.Fatal(err) } defer target.Release() edits, err := array.Diff(base, target) if err != nil { t.Fatalf("got unexpected error %v", err) } gotInserts := make([]bool, len(edits)) gotRunLengths := make([]int64, len(edits)) for i, edit := range edits { gotInserts[i] = edit.Insert gotRunLengths[i] = edit.RunLength } if !reflect.DeepEqual(gotInserts, s.wantInsert) { t.Errorf("Diff(\n base=%v, \ntarget=%v\n) got insert %v, want %v", base, target, gotInserts, s.wantInsert) } if !reflect.DeepEqual(gotRunLengths, s.wantRunLength) { t.Errorf("Diff(\n base=%v, \ntarget=%v\n) got run length %v, want %v", base, target, gotRunLengths, s.wantRunLength) } } func TestDiff_Trivial(t *testing.T) { cases := []struct { name string base string target string wantInsert []bool wantRunLength []int64 }{ { name: "empty", base: `[]`, target: `[]`, wantInsert: []bool{false}, wantRunLength: []int64{0}, }, { name: "nulls", base: `[null, null]`, target: `[null, null, null, null]`, wantInsert: []bool{false, true, true}, wantRunLength: []int64{2, 0, 0}, }, { name: "equal", base: `[1, 2, 3]`, target: `[1, 2, 3]`, wantInsert: []bool{false}, wantRunLength: []int64{3}, }, } for _, tc := range cases { d := diffTestCase{ dataType: arrow.PrimitiveTypes.Int32, baseJSON: tc.base, targetJSON: tc.target, wantInsert: tc.wantInsert, wantRunLength: tc.wantRunLength, } t.Run(tc.name, d.check) } } func TestDiff_Basics(t *testing.T) { cases := []struct { name string base string target string wantInsert []bool wantRunLength []int64 }{ { name: "insert one", base: `[1, 2, null, 5]`, target: `[1, 2, 3, null, 5]`, wantInsert: []bool{false, true}, wantRunLength: []int64{2, 2}, }, { name: "delete one", base: `[1, 2, 3, null, 5]`, target: `[1, 2, null, 5]`, wantInsert: []bool{false, false}, wantRunLength: []int64{2, 2}, }, { name: "change one", base: `[1, 2, 3, null, 5]`, target: `[1, 2, 23, null, 5]`, wantInsert: []bool{false, false, true}, wantRunLength: []int64{2, 0, 2}, }, { name: "null out one", base: `[1, 2, 3, null, 5]`, target: `[1, 2, null, null, 5]`, wantInsert: []bool{false, false, true}, wantRunLength: []int64{2, 1, 1}, }, { name: "append some", base: `[1, 2, 3, null, 5]`, target: `[1, 2, 3, null, 5, 6, 7, 8, 9]`, wantInsert: []bool{false, true, true, true, true}, wantRunLength: []int64{5, 0, 0, 0, 0}, }, { name: "prepend some", base: `[1, 2, 3, null, 5]`, target: `[6, 4, 2, 0, 1, 2, 3, null, 5]`, wantInsert: []bool{false, true, true, true, true}, wantRunLength: []int64{0, 0, 0, 0, 5}, }, } for _, tc := range cases { d := diffTestCase{ dataType: arrow.PrimitiveTypes.Int32, baseJSON: tc.base, targetJSON: tc.target, wantInsert: tc.wantInsert, wantRunLength: tc.wantRunLength, } t.Run(tc.name, d.check) } } func TestDiff_BasicsWithBooleans(t *testing.T) { cases := []struct { name string base string target string wantInsert []bool wantRunLength []int64 }{ { name: "insert one", base: `[true, true, true]`, target: `[true, false, true, true]`, wantInsert: []bool{false, true}, wantRunLength: []int64{1, 2}, }, { name: "delete one", base: `[true, false, true, true]`, target: `[true, true, true]`, wantInsert: []bool{false, false}, wantRunLength: []int64{1, 2}, }, { name: "change one", base: `[false, false, true]`, target: `[true, false, true]`, wantInsert: []bool{false, false, true}, wantRunLength: []int64{0, 0, 2}, }, { name: "null out one", base: `[true, false, true]`, target: `[true, false, null]`, wantInsert: []bool{false, false, true}, wantRunLength: []int64{2, 0, 0}, }, } for _, tc := range cases { d := diffTestCase{ dataType: &arrow.BooleanType{}, baseJSON: tc.base, targetJSON: tc.target, wantInsert: tc.wantInsert, wantRunLength: tc.wantRunLength, } t.Run(tc.name, d.check) } } func TestDiff_BasicsWithStrings(t *testing.T) { cases := []struct { name string base string target string wantInsert []bool wantRunLength []int64 }{ { name: "insert one", base: `["give", "a", "break"]`, target: `["give", "me", "a", "break"]`, wantInsert: []bool{false, true}, wantRunLength: []int64{1, 2}, }, { name: "delete one", base: `["give", "me", "a", "break"]`, target: `["give", "a", "break"]`, wantInsert: []bool{false, false}, wantRunLength: []int64{1, 2}, }, { name: "change one", base: `["give", "a", "break"]`, target: `["gimme", "a", "break"]`, wantInsert: []bool{false, false, true}, wantRunLength: []int64{0, 0, 2}, }, { name: "null out one", base: `["give", "a", "break"]`, target: `["give", "a", null]`, wantInsert: []bool{false, false, true}, wantRunLength: []int64{2, 0, 0}, }, } for _, tc := range cases { d := diffTestCase{ dataType: &arrow.StringType{}, baseJSON: tc.base, targetJSON: tc.target, wantInsert: tc.wantInsert, wantRunLength: tc.wantRunLength, } t.Run(tc.name, d.check) } } func TestDiff_BasicsWithLists(t *testing.T) { cases := []struct { name string base string target string wantInsert []bool wantRunLength []int64 }{ { name: "insert one", base: `[[2, 3, 1], [], [13]]`, target: `[[2, 3, 1], [5, 9], [], [13]]`, wantInsert: []bool{false, true}, wantRunLength: []int64{1, 2}, }, { name: "delete one", base: `[[2, 3, 1], [5, 9], [], [13]]`, target: `[[2, 3, 1], [], [13]]`, wantInsert: []bool{false, false}, wantRunLength: []int64{1, 2}, }, { name: "change one", base: `[[2, 3, 1], [], [13]]`, target: `[[3, 3, 3], [], [13]]`, wantInsert: []bool{false, false, true}, wantRunLength: []int64{0, 0, 2}, }, { name: "null out one", base: `[[2, 3, 1], [], [13]]`, target: `[[2, 3, 1], [], null]`, wantInsert: []bool{false, false, true}, wantRunLength: []int64{2, 0, 0}, }, } for _, tc := range cases { d := diffTestCase{ dataType: arrow.ListOf(arrow.PrimitiveTypes.Int32), baseJSON: tc.base, targetJSON: tc.target, wantInsert: tc.wantInsert, wantRunLength: tc.wantRunLength, } t.Run(tc.name, d.check) } } func TestDiff_BasicsWithStructs(t *testing.T) { cases := []struct { name string base string target string wantInsert []bool wantRunLength []int64 }{ { name: "insert one", base: `[{"foo": "!", "bar": 3}, {}, {"bar": 13}]`, target: `[{"foo": "!", "bar": 3}, {"foo": "?"}, {}, {"bar": 13}]`, wantInsert: []bool{false, true}, wantRunLength: []int64{1, 2}, }, { name: "delete one", base: `[{"foo": "!", "bar": 3}, {"foo": "?"}, {}, {"bar": 13}]`, target: `[{"foo": "!", "bar": 3}, {}, {"bar": 13}]`, wantInsert: []bool{false, false}, wantRunLength: []int64{1, 2}, }, { name: "change one", base: `[{"foo": "!", "bar": 3}, {}, {"bar": 13}]`, target: `[{"foo": "!", "bar": 2}, {}, {"bar": 13}]`, wantInsert: []bool{false, false, true}, wantRunLength: []int64{0, 0, 2}, }, { name: "null out one", base: `[{"foo": "!", "bar": 3}, {}, {"bar": 13}]`, target: `[{"foo": "!", "bar": 3}, {}, null]`, wantInsert: []bool{false, false, true}, wantRunLength: []int64{2, 0, 0}, }, } for _, tc := range cases { f1 := arrow.Field{Name: "foo", Type: arrow.BinaryTypes.String, Nullable: true} f2 := arrow.Field{Name: "bar", Type: arrow.PrimitiveTypes.Int32, Nullable: true} d := diffTestCase{ dataType: arrow.StructOf(f1, f2), baseJSON: tc.base, targetJSON: tc.target, wantInsert: tc.wantInsert, wantRunLength: tc.wantRunLength, } t.Run(tc.name, d.check) } } func TestDiff_Random(t *testing.T) { rng := rand.New(rand.NewSource(0xdeadbeef)) for i := 0; i < 100; i++ { t.Run(fmt.Sprintf("case-%d", i), func(t *testing.T) { testRandomCase(t, rng) }) } } func testRandomCase(t *testing.T, rng *rand.Rand) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dataType := arrow.PrimitiveTypes.Int32 baseValues := make([]int32, rng.Intn(10)) for i := range baseValues { baseValues[i] = rng.Int31() } baseJSON, err := json.Marshal(baseValues) if err != nil { t.Fatal(err) } targetValues := make([]int32, rng.Intn(10)) for i := range targetValues { // create runs with some probability if rng.Intn(2) == 0 && len(baseValues) > 0 { targetValues[i] = baseValues[rng.Intn(len(baseValues))] } else { targetValues[i] = rng.Int31() } } targetJSON, err := json.Marshal(targetValues) if err != nil { t.Fatal(err) } base, _, err := array.FromJSON(mem, dataType, strings.NewReader(string(baseJSON))) if err != nil { t.Fatal(err) } defer base.Release() target, _, err := array.FromJSON(mem, dataType, strings.NewReader(string(targetJSON))) if err != nil { t.Fatal(err) } defer target.Release() edits, err := array.Diff(base, target) if err != nil { t.Fatalf("got unexpected error %v", err) } validateEditScript(t, edits, base, target) } // validateEditScript checks that the edit script produces target when applied to base. func validateEditScript(t *testing.T, edits array.Edits, base, target arrow.Array) { if len(edits) == 0 { t.Fatalf("edit script has run length of zero") } baseIndex := int64(0) targetIndex := int64(0) for i := 0; i < len(edits); i++ { if i > 0 { if edits[i].Insert { targetIndex++ } else { baseIndex++ } } for j := int64(0); j < edits[i].RunLength; j++ { if !array.SliceEqual(base, baseIndex, baseIndex+1, target, targetIndex, targetIndex+1) { t.Fatalf("edit script (%v) when applied to base %v does not produce target %v", edits, base, target) } baseIndex += 1 targetIndex += 1 } } if baseIndex != int64(base.Len()) || targetIndex != int64(target.Len()) { t.Fatalf("edit script (%v) when applied to base %v does not produce target %v", edits, base, target) } } type diffStringTestCase struct { dataType arrow.DataType name string baseJSON string targetJSON string want string } func (s *diffStringTestCase) check(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) base, _, err := array.FromJSON(mem, s.dataType, strings.NewReader(s.baseJSON)) if err != nil { t.Fatal(err) } defer base.Release() target, _, err := array.FromJSON(mem, s.dataType, strings.NewReader(s.targetJSON)) if err != nil { t.Fatal(err) } defer target.Release() edits, err := array.Diff(base, target) if err != nil { t.Fatalf("got unexpected error %v", err) } got := edits.UnifiedDiff(base, target) if got != s.want { t.Errorf("got:\n%v\n, want:\n%v", got, s.want) } } func TestEdits_UnifiedDiff(t *testing.T) { msPerDay := 24 * 60 * 60 * 1000 cases := []diffStringTestCase{ { name: "no changes", dataType: arrow.BinaryTypes.String, baseJSON: `["give", "me", "a", "break"]`, targetJSON: `["give", "me", "a", "break"]`, want: ``, }, { name: "insert one", dataType: arrow.BinaryTypes.String, baseJSON: `["give", "a", "break"]`, targetJSON: `["give", "me", "a", "break"]`, want: `@@ -1, +1 @@ +"me" `, }, { name: "delete one", dataType: arrow.BinaryTypes.String, baseJSON: `["give", "me", "a", "break"]`, targetJSON: `["give", "a", "break"]`, want: `@@ -1, +1 @@ -"me" `, }, { name: "change one", dataType: arrow.BinaryTypes.String, baseJSON: `["give", "a", "break"]`, targetJSON: `["gimme", "a", "break"]`, want: `@@ -0, +0 @@ -"give" +"gimme" `, }, { name: "null out one", dataType: arrow.BinaryTypes.String, baseJSON: `["give", "a", "break"]`, targetJSON: `["give", "a", null]`, want: `@@ -2, +2 @@ -"break" +null `, }, { name: "strings with escaped chars", dataType: arrow.BinaryTypes.String, baseJSON: `["newline:\\n", "quote:'", "backslash:\\\\"]`, targetJSON: `["newline:\\n", "tab:\\t", "quote:\\\"", "backslash:\\\\"]`, want: `@@ -1, +1 @@ -"quote:'" +"tab:\\t" +"quote:\\\"" `, }, { name: "date32", dataType: arrow.PrimitiveTypes.Date32, baseJSON: `[0, 1, 2, 31, 4]`, targetJSON: `[0, 1, 31, 2, 4]`, want: `@@ -2, +2 @@ -1970-01-03 @@ -4, +3 @@ +1970-01-03 `, }, { name: "date64", dataType: arrow.PrimitiveTypes.Date64, baseJSON: fmt.Sprintf(`[%d, %d, %d, %d, %d]`, 0*msPerDay, 1*msPerDay, 2*msPerDay, 31*msPerDay, 4*msPerDay), targetJSON: fmt.Sprintf(`[%d, %d, %d, %d, %d]`, 0*msPerDay, 1*msPerDay, 31*msPerDay, 2*msPerDay, 4*msPerDay), want: `@@ -2, +2 @@ -1970-01-03 @@ -4, +3 @@ +1970-01-03 `, }, { name: "timestamp_s", dataType: arrow.FixedWidthTypes.Timestamp_s, baseJSON: fmt.Sprintf(`[0, 1, %d, 2, 4]`, 678+(5+60*(4+60*(3+24*int64(1))))), targetJSON: fmt.Sprintf(`[0, 1, 2, %d, 4]`, 678+(5+60*(4+60*(3+24*int64(1))))), want: `@@ -2, +2 @@ -1970-01-02 03:15:23 +0000 UTC @@ -4, +3 @@ +1970-01-02 03:15:23 +0000 UTC `, }, { name: "timestamp_ms", dataType: arrow.FixedWidthTypes.Timestamp_ms, baseJSON: fmt.Sprintf(`[0, 1, %d, 2, 4]`, 678+1000*(5+60*(4+60*(3+24*int64(1))))), targetJSON: fmt.Sprintf(`[0, 1, 2, %d, 4]`, 678+1000*(5+60*(4+60*(3+24*int64(1))))), want: `@@ -2, +2 @@ -1970-01-02 03:04:05.678 +0000 UTC @@ -4, +3 @@ +1970-01-02 03:04:05.678 +0000 UTC `, }, { name: "timestamp_us", dataType: arrow.FixedWidthTypes.Timestamp_us, baseJSON: fmt.Sprintf(`[0, 1, %d, 2, 4]`, 678+1000000*(5+60*(4+60*(3+24*int64(1))))), targetJSON: fmt.Sprintf(`[0, 1, 2, %d, 4]`, 678+1000000*(5+60*(4+60*(3+24*int64(1))))), want: `@@ -2, +2 @@ -1970-01-02 03:04:05.000678 +0000 UTC @@ -4, +3 @@ +1970-01-02 03:04:05.000678 +0000 UTC `, }, { name: "timestamp_ns", dataType: arrow.FixedWidthTypes.Timestamp_ns, baseJSON: fmt.Sprintf(`[0, 1, %d, 2, 4]`, 678+1000000000*(5+60*(4+60*(3+24*int64(1))))), targetJSON: fmt.Sprintf(`[0, 1, 2, %d, 4]`, 678+1000000000*(5+60*(4+60*(3+24*int64(1))))), want: `@@ -2, +2 @@ -1970-01-02 03:04:05.000000678 +0000 UTC @@ -4, +3 @@ +1970-01-02 03:04:05.000000678 +0000 UTC `, }, { name: "lists", dataType: arrow.ListOf(arrow.PrimitiveTypes.Int32), baseJSON: `[[2, 3, 1], [], [13], []]`, targetJSON: `[[2, 3, 1], [5, 9], [], [13]]`, want: `@@ -1, +1 @@ +[5,9] @@ -3, +4 @@ -[] `, }, { name: "maps", dataType: arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32), baseJSON: `[ [{"key": "foo", "value": 2}, {"key": "bar", "value": 3}, {"key": "baz", "value": 1}], [{"key": "quux", "value": 13}], [] ]`, targetJSON: `[ [{"key": "foo", "value": 2}, {"key": "bar", "value": 3}, {"key": "baz", "value": 1}], [{"key": "ytho", "value": 11}], [{"key": "quux", "value": 13}], [] ]`, want: `@@ -1, +1 @@ +[{"key":"ytho","value":11}] `, }, { name: "structs", dataType: arrow.StructOf( []arrow.Field{ {Name: "foo", Type: arrow.BinaryTypes.String, Nullable: true}, {Name: "bar", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, }..., ), baseJSON: `[{"foo": "!", "bar": 3}, {}, {"bar": 13}]`, targetJSON: `[{"foo": null, "bar": 2}, {}, {"bar": 13}]`, want: `@@ -0, +0 @@ -{"bar":3,"foo":"!"} +{"bar":2,"foo":null} `, }, { name: "unions", dataType: arrow.UnionOf(arrow.SparseMode, []arrow.Field{ {Name: "foo", Type: arrow.BinaryTypes.String}, {Name: "bar", Type: arrow.PrimitiveTypes.Int32}, }, []arrow.UnionTypeCode{2, 5}, ), baseJSON: `[[2, "!"], [5, 3], [5, 13]]`, targetJSON: `[[2, "!"], [2, "3"], [5, 13]]`, want: `@@ -1, +1 @@ -[5,3] +[2,"3"] `, }, { name: "string", dataType: arrow.BinaryTypes.String, baseJSON: `["h", "l", "l", "o", "o"]`, targetJSON: `["h", "e", "l", "l", "o", "0"]`, want: `@@ -1, +1 @@ +"e" @@ -4, +5 @@ -"o" +"0" `, }, { name: "int8", dataType: arrow.PrimitiveTypes.Int8, baseJSON: `[0, 1, 2, 3, 5, 8, 11, 13, 17]`, targetJSON: `[2, 3, 5, 7, 11, 13, 17, 19]`, want: `@@ -0, +0 @@ -0 -1 @@ -5, +3 @@ -8 +7 @@ -9, +7 @@ +19 `, }, { name: "int16", dataType: arrow.PrimitiveTypes.Int16, baseJSON: `[0, 1, 2, 3, 5, 8, 11, 13, 17]`, targetJSON: `[2, 3, 5, 7, 11, 13, 17, 19]`, want: `@@ -0, +0 @@ -0 -1 @@ -5, +3 @@ -8 +7 @@ -9, +7 @@ +19 `, }, { name: "int32", dataType: arrow.PrimitiveTypes.Int32, baseJSON: `[0, 1, 2, 3, 5, 8, 11, 13, 17]`, targetJSON: `[2, 3, 5, 7, 11, 13, 17, 19]`, want: `@@ -0, +0 @@ -0 -1 @@ -5, +3 @@ -8 +7 @@ -9, +7 @@ +19 `, }, { name: "int64", dataType: arrow.PrimitiveTypes.Int64, baseJSON: `[0, 1, 2, 3, 5, 8, 11, 13, 17]`, targetJSON: `[2, 3, 5, 7, 11, 13, 17, 19]`, want: `@@ -0, +0 @@ -0 -1 @@ -5, +3 @@ -8 +7 @@ -9, +7 @@ +19 `, }, { name: "uint8", dataType: arrow.PrimitiveTypes.Uint8, baseJSON: `[0, 1, 2, 3, 5, 8, 11, 13, 17]`, targetJSON: `[2, 3, 5, 7, 11, 13, 17, 19]`, want: `@@ -0, +0 @@ -0 -1 @@ -5, +3 @@ -8 +7 @@ -9, +7 @@ +19 `, }, { name: "uint16", dataType: arrow.PrimitiveTypes.Uint16, baseJSON: `[0, 1, 2, 3, 5, 8, 11, 13, 17]`, targetJSON: `[2, 3, 5, 7, 11, 13, 17, 19]`, want: `@@ -0, +0 @@ -0 -1 @@ -5, +3 @@ -8 +7 @@ -9, +7 @@ +19 `, }, { name: "uint32", dataType: arrow.PrimitiveTypes.Uint32, baseJSON: `[0, 1, 2, 3, 5, 8, 11, 13, 17]`, targetJSON: `[2, 3, 5, 7, 11, 13, 17, 19]`, want: `@@ -0, +0 @@ -0 -1 @@ -5, +3 @@ -8 +7 @@ -9, +7 @@ +19 `, }, { name: "uint64", dataType: arrow.PrimitiveTypes.Uint64, baseJSON: `[0, 1, 2, 3, 5, 8, 11, 13, 17]`, targetJSON: `[2, 3, 5, 7, 11, 13, 17, 19]`, want: `@@ -0, +0 @@ -0 -1 @@ -5, +3 @@ -8 +7 @@ -9, +7 @@ +19 `, }, { name: "float32", dataType: arrow.PrimitiveTypes.Float32, baseJSON: `[0.1, 0.3, -0.5]`, targetJSON: `[0.1, -0.5, 0.3]`, want: `@@ -1, +1 @@ -0.300000 @@ -3, +2 @@ +0.300000 `, }, { name: "float64", dataType: arrow.PrimitiveTypes.Float64, baseJSON: `[0.1, 0.3, -0.5]`, targetJSON: `[0.1, -0.5, 0.3]`, want: `@@ -1, +1 @@ -0.300000 @@ -3, +2 @@ +0.300000 `, }, { name: "equal nulls", dataType: arrow.PrimitiveTypes.Int32, baseJSON: `[null, null]`, targetJSON: `[null, null]`, want: ``, }, { name: "nulls", dataType: arrow.PrimitiveTypes.Int32, baseJSON: `[1, null, null, null]`, targetJSON: `[null, 1, null, 2]`, want: `@@ -0, +0 @@ -1 @@ -2, +1 @@ -null +1 @@ -4, +3 @@ +2 `, }, { name: "extensions", dataType: extensions.NewUUIDType(), baseJSON: `["00000000-0000-0000-0000-000000000000", "00000000-0000-0000-0000-000000000001"]`, targetJSON: `["00000000-0000-0000-0000-000000000001", "00000000-0000-0000-0000-000000000002"]`, want: `@@ -0, +0 @@ -"00000000-0000-0000-0000-000000000000" @@ -2, +1 @@ +"00000000-0000-0000-0000-000000000002" `, }, } for _, tc := range cases { t.Run(tc.name, tc.check) } } arrow-go-18.2.0/arrow/array/doc.go000066400000000000000000000015651476434502500167240ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. /* Package array provides implementations of various Arrow array types. */ package array arrow-go-18.2.0/arrow/array/encoded.go000066400000000000000000000345721476434502500175640ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "bytes" "fmt" "math" "reflect" "sync/atomic" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/encoded" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" "github.com/apache/arrow-go/v18/internal/utils" ) // RunEndEncoded represents an array containing two children: // an array of int32 values defining the ends of each run of values // and an array of values type RunEndEncoded struct { array ends arrow.Array values arrow.Array } func NewRunEndEncodedArray(runEnds, values arrow.Array, logicalLength, offset int) *RunEndEncoded { data := NewData(arrow.RunEndEncodedOf(runEnds.DataType(), values.DataType()), logicalLength, []*memory.Buffer{nil}, []arrow.ArrayData{runEnds.Data(), values.Data()}, 0, offset) defer data.Release() return NewRunEndEncodedData(data) } func NewRunEndEncodedData(data arrow.ArrayData) *RunEndEncoded { r := &RunEndEncoded{} r.refCount = 1 r.setData(data.(*Data)) return r } func (r *RunEndEncoded) Values() arrow.Array { return r.values } func (r *RunEndEncoded) RunEndsArr() arrow.Array { return r.ends } func (r *RunEndEncoded) Retain() { r.array.Retain() r.values.Retain() r.ends.Retain() } func (r *RunEndEncoded) Release() { r.array.Release() r.values.Release() r.ends.Release() } // LogicalValuesArray returns an array holding the values of each // run, only over the range of run values inside the logical offset/length // range of the parent array. // // # Example // // For this array: // // RunEndEncoded: { Offset: 150, Length: 1500 } // RunEnds: [ 1, 2, 4, 6, 10, 1000, 1750, 2000 ] // Values: [ "a", "b", "c", "d", "e", "f", "g", "h" ] // // LogicalValuesArray will return the following array: // // [ "f", "g" ] // // This is because the offset of 150 tells it to skip the values until // "f" which corresponds with the logical offset (the run from 10 - 1000), // and stops after "g" because the length + offset goes to 1650 which is // within the run from 1000 - 1750, corresponding to the "g" value. // // # Note // // The return from this needs to be Released. func (r *RunEndEncoded) LogicalValuesArray() arrow.Array { physOffset := r.GetPhysicalOffset() physLength := r.GetPhysicalLength() data := NewSliceData(r.data.Children()[1], int64(physOffset), int64(physOffset+physLength)) defer data.Release() return MakeFromData(data) } // LogicalRunEndsArray returns an array holding the logical indexes // of each run end, only over the range of run end values relative // to the logical offset/length range of the parent array. // // For arrays with an offset, this is not a slice of the existing // internal run ends array. Instead a new array is created with run-ends // that are adjusted so the new array can have an offset of 0. As a result // this method can be expensive to call for an array with a non-zero offset. // // # Example // // For this array: // // RunEndEncoded: { Offset: 150, Length: 1500 } // RunEnds: [ 1, 2, 4, 6, 10, 1000, 1750, 2000 ] // Values: [ "a", "b", "c", "d", "e", "f", "g", "h" ] // // LogicalRunEndsArray will return the following array: // // [ 850, 1500 ] // // This is because the offset of 150 tells us to skip all run-ends less // than 150 (by finding the physical offset), and we adjust the run-ends // accordingly (1000 - 150 = 850). The logical length of the array is 1500, // so we know we don't want to go past the 1750 run end. Thus the last // run-end is determined by doing: min(1750 - 150, 1500) = 1500. // // # Note // // The return from this needs to be Released func (r *RunEndEncoded) LogicalRunEndsArray(mem memory.Allocator) arrow.Array { physOffset := r.GetPhysicalOffset() physLength := r.GetPhysicalLength() if r.data.offset == 0 { data := NewSliceData(r.data.childData[0], 0, int64(physLength)) defer data.Release() return MakeFromData(data) } bldr := NewBuilder(mem, r.data.childData[0].DataType()) defer bldr.Release() bldr.Resize(physLength) switch e := r.ends.(type) { case *Int16: for _, v := range e.Int16Values()[physOffset : physOffset+physLength] { v -= int16(r.data.offset) v = int16(utils.Min(int(v), r.data.length)) bldr.(*Int16Builder).Append(v) } case *Int32: for _, v := range e.Int32Values()[physOffset : physOffset+physLength] { v -= int32(r.data.offset) v = int32(utils.Min(int(v), r.data.length)) bldr.(*Int32Builder).Append(v) } case *Int64: for _, v := range e.Int64Values()[physOffset : physOffset+physLength] { v -= int64(r.data.offset) v = int64(utils.Min(int(v), r.data.length)) bldr.(*Int64Builder).Append(v) } } return bldr.NewArray() } func (r *RunEndEncoded) setData(data *Data) { if len(data.childData) != 2 { panic(fmt.Errorf("%w: arrow/array: RLE array must have exactly 2 children", arrow.ErrInvalid)) } debug.Assert(data.dtype.ID() == arrow.RUN_END_ENCODED, "invalid type for RunLengthEncoded") if !data.dtype.(*arrow.RunEndEncodedType).ValidRunEndsType(data.childData[0].DataType()) { panic(fmt.Errorf("%w: arrow/array: run ends array must be int16, int32, or int64", arrow.ErrInvalid)) } if data.childData[0].NullN() > 0 { panic(fmt.Errorf("%w: arrow/array: run ends array cannot contain nulls", arrow.ErrInvalid)) } r.array.setData(data) r.ends = MakeFromData(r.data.childData[0]) r.values = MakeFromData(r.data.childData[1]) } func (r *RunEndEncoded) GetPhysicalOffset() int { return encoded.FindPhysicalOffset(r.data) } func (r *RunEndEncoded) GetPhysicalLength() int { return encoded.GetPhysicalLength(r.data) } // GetPhysicalIndex can be used to get the run-encoded value instead of costly LogicalValuesArray // in the following way: // // r.Values().(valuetype).Value(r.GetPhysicalIndex(i)) func (r *RunEndEncoded) GetPhysicalIndex(i int) int { return encoded.FindPhysicalIndex(r.data, i+r.data.offset) } // ValueStr will return the str representation of the value at the logical offset i. func (r *RunEndEncoded) ValueStr(i int) string { return r.values.ValueStr(r.GetPhysicalIndex(i)) } func (r *RunEndEncoded) String() string { var buf bytes.Buffer buf.WriteByte('[') for i := 0; i < r.ends.Len(); i++ { if i != 0 { buf.WriteByte(',') } value := r.values.GetOneForMarshal(i) if byts, ok := value.(json.RawMessage); ok { value = string(byts) } fmt.Fprintf(&buf, "{%d -> %v}", r.ends.GetOneForMarshal(i), value) } buf.WriteByte(']') return buf.String() } func (r *RunEndEncoded) GetOneForMarshal(i int) interface{} { return r.values.GetOneForMarshal(r.GetPhysicalIndex(i)) } func (r *RunEndEncoded) MarshalJSON() ([]byte, error) { var buf bytes.Buffer enc := json.NewEncoder(&buf) buf.WriteByte('[') for i := 0; i < r.Len(); i++ { if i != 0 { buf.WriteByte(',') } if err := enc.Encode(r.GetOneForMarshal(i)); err != nil { return nil, err } } buf.WriteByte(']') return buf.Bytes(), nil } func arrayRunEndEncodedEqual(l, r *RunEndEncoded) bool { // types were already checked before getting here, so we know // the encoded types are equal mr := encoded.NewMergedRuns([2]arrow.Array{l, r}) for mr.Next() { lIndex := mr.IndexIntoArray(0) rIndex := mr.IndexIntoArray(1) if !SliceEqual(l.values, lIndex, lIndex+1, r.values, rIndex, rIndex+1) { return false } } return true } func arrayRunEndEncodedApproxEqual(l, r *RunEndEncoded, opt equalOption) bool { // types were already checked before getting here, so we know // the encoded types are equal mr := encoded.NewMergedRuns([2]arrow.Array{l, r}) for mr.Next() { lIndex := mr.IndexIntoArray(0) rIndex := mr.IndexIntoArray(1) if !sliceApproxEqual(l.values, lIndex, lIndex+1, r.values, rIndex, rIndex+1, opt) { return false } } return true } type RunEndEncodedBuilder struct { builder dt arrow.DataType runEnds Builder values Builder maxRunEnd uint64 // currently, mixing AppendValueFromString & UnmarshalOne is unsupported lastUnmarshalled interface{} unmarshalled bool // tracks if Unmarshal was called (in case lastUnmarshalled is nil) lastStr *string } func NewRunEndEncodedBuilder(mem memory.Allocator, runEnds, encoded arrow.DataType) *RunEndEncodedBuilder { dt := arrow.RunEndEncodedOf(runEnds, encoded) if !dt.ValidRunEndsType(runEnds) { panic("arrow/ree: invalid runEnds type for run length encoded array") } var maxEnd uint64 switch runEnds.ID() { case arrow.INT16: maxEnd = math.MaxInt16 case arrow.INT32: maxEnd = math.MaxInt32 case arrow.INT64: maxEnd = math.MaxInt64 } return &RunEndEncodedBuilder{ builder: builder{refCount: 1, mem: mem}, dt: dt, runEnds: NewBuilder(mem, runEnds), values: NewBuilder(mem, encoded), maxRunEnd: maxEnd, lastUnmarshalled: nil, } } func (b *RunEndEncodedBuilder) Type() arrow.DataType { return b.dt } func (b *RunEndEncodedBuilder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { b.values.Release() b.runEnds.Release() } } func (b *RunEndEncodedBuilder) addLength(n uint64) { if uint64(b.length)+n > b.maxRunEnd { panic(fmt.Errorf("%w: %s array length must fit be less than %d", arrow.ErrInvalid, b.dt, b.maxRunEnd)) } b.length += int(n) } func (b *RunEndEncodedBuilder) finishRun() { b.lastUnmarshalled = nil b.lastStr = nil b.unmarshalled = false if b.length == 0 { return } switch bldr := b.runEnds.(type) { case *Int16Builder: bldr.Append(int16(b.length)) case *Int32Builder: bldr.Append(int32(b.length)) case *Int64Builder: bldr.Append(int64(b.length)) } } func (b *RunEndEncodedBuilder) ValueBuilder() Builder { return b.values } func (b *RunEndEncodedBuilder) Append(n uint64) { b.finishRun() b.addLength(n) } func (b *RunEndEncodedBuilder) AppendRuns(runs []uint64) { for _, r := range runs { b.finishRun() b.addLength(r) } } func (b *RunEndEncodedBuilder) ContinueRun(n uint64) { b.addLength(n) } func (b *RunEndEncodedBuilder) AppendNull() { b.finishRun() b.values.AppendNull() b.addLength(1) } func (b *RunEndEncodedBuilder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *RunEndEncodedBuilder) NullN() int { return UnknownNullCount } func (b *RunEndEncodedBuilder) AppendEmptyValue() { b.AppendNull() } func (b *RunEndEncodedBuilder) AppendEmptyValues(n int) { b.AppendNulls(n) } func (b *RunEndEncodedBuilder) Reserve(n int) { b.values.Reserve(n) b.runEnds.Reserve(n) } func (b *RunEndEncodedBuilder) Resize(n int) { b.values.Resize(n) b.runEnds.Resize(n) } func (b *RunEndEncodedBuilder) NewRunEndEncodedArray() *RunEndEncoded { data := b.newData() defer data.Release() return NewRunEndEncodedData(data) } func (b *RunEndEncodedBuilder) NewArray() arrow.Array { return b.NewRunEndEncodedArray() } func (b *RunEndEncodedBuilder) newData() (data *Data) { b.finishRun() values := b.values.NewArray() defer values.Release() runEnds := b.runEnds.NewArray() defer runEnds.Release() data = NewData( b.dt, b.length, []*memory.Buffer{}, []arrow.ArrayData{runEnds.Data(), values.Data()}, 0, 0) b.reset() return } // AppendValueFromString can't be used in conjunction with UnmarshalOne func (b *RunEndEncodedBuilder) AppendValueFromString(s string) error { // we don't support mixing AppendValueFromString & UnmarshalOne if b.unmarshalled { return fmt.Errorf("%w: mixing AppendValueFromString & UnmarshalOne not yet implemented", arrow.ErrNotImplemented) } if s == NullValueStr { b.AppendNull() return nil } if b.lastStr != nil && s == *b.lastStr { b.ContinueRun(1) return nil } b.Append(1) lastStr := s b.lastStr = &lastStr return b.ValueBuilder().AppendValueFromString(s) } // UnmarshalOne can't be used in conjunction with AppendValueFromString func (b *RunEndEncodedBuilder) UnmarshalOne(dec *json.Decoder) error { // we don't support mixing AppendValueFromString & UnmarshalOne if b.lastStr != nil { return fmt.Errorf("%w: mixing AppendValueFromString & UnmarshalOne not yet implemented", arrow.ErrNotImplemented) } var value interface{} if err := dec.Decode(&value); err != nil { return err } // if we unmarshalled the same value as the previous one, we want to // continue the run. However, there's an edge case. At the start of // unmarshalling, lastUnmarshalled will be nil, but we might get // nil as the first value we unmarshal. In that case we want to // make sure we add a new run instead. We can detect that case by // checking that the number of runEnds matches the number of values // we have, which means no matter what we have to start a new run if reflect.DeepEqual(value, b.lastUnmarshalled) && (value != nil || b.runEnds.Len() != b.values.Len()) { b.ContinueRun(1) return nil } data, err := json.Marshal(value) if err != nil { return err } b.Append(1) b.lastUnmarshalled = value b.unmarshalled = true return b.ValueBuilder().UnmarshalOne(json.NewDecoder(bytes.NewReader(data))) } // Unmarshal can't be used in conjunction with AppendValueFromString (as it calls UnmarshalOne) func (b *RunEndEncodedBuilder) Unmarshal(dec *json.Decoder) error { b.finishRun() for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } // UnmarshalJSON can't be used in conjunction with AppendValueFromString (as it calls UnmarshalOne) func (b *RunEndEncodedBuilder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("list builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } var ( _ arrow.Array = (*RunEndEncoded)(nil) _ Builder = (*RunEndEncodedBuilder)(nil) ) arrow-go-18.2.0/arrow/array/encoded_test.go000066400000000000000000000402331476434502500206120ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "strings" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) var ( stringValues, _, _ = array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.String, strings.NewReader(`["Hello", "World", null]`)) int32Values, _, _ = array.FromJSON(memory.DefaultAllocator, arrow.PrimitiveTypes.Int32, strings.NewReader(`[10, 20, 30]`)) int32OnlyNull = array.MakeArrayOfNull(memory.DefaultAllocator, arrow.PrimitiveTypes.Int32, 3) ) func TestMakeRLEArray(t *testing.T) { rleArr := array.NewRunEndEncodedArray(int32Values, stringValues, 3, 0) defer rleArr.Release() arrData := rleArr.Data() newArr := array.MakeFromData(arrData) defer newArr.Release() assert.Same(t, newArr.Data(), arrData) assert.IsType(t, (*array.RunEndEncoded)(nil), newArr) } func TestRLEFromRunEndsAndValues(t *testing.T) { rleArray := array.NewRunEndEncodedArray(int32Values, int32Values, 3, 0) defer rleArray.Release() assert.EqualValues(t, 3, rleArray.Len()) assert.Truef(t, array.Equal(int32Values, rleArray.Values()), "expected: %s\ngot: %s", int32Values, rleArray.Values()) assert.Truef(t, array.Equal(int32Values, rleArray.RunEndsArr()), "expected: %s\ngot: %s", int32Values, rleArray.RunEndsArr()) assert.Zero(t, rleArray.Offset()) assert.Zero(t, rleArray.Data().NullN()) // one dummy buffer, since code may assume there's at least one nil buffer assert.Len(t, rleArray.Data().Buffers(), 1) // explicit offset rleArray = array.NewRunEndEncodedArray(int32Values, stringValues, 2, 1) defer rleArray.Release() assert.EqualValues(t, 2, rleArray.Len()) assert.Truef(t, array.Equal(stringValues, rleArray.Values()), "expected: %s\ngot: %s", stringValues, rleArray.Values()) assert.Truef(t, array.Equal(int32Values, rleArray.RunEndsArr()), "expected: %s\ngot: %s", int32Values, rleArray.RunEndsArr()) assert.EqualValues(t, 1, rleArray.Offset()) assert.Zero(t, rleArray.Data().NullN()) assert.PanicsWithError(t, "invalid: arrow/array: run ends array must be int16, int32, or int64", func() { array.NewRunEndEncodedArray(stringValues, int32Values, 3, 0) }) assert.PanicsWithError(t, "invalid: arrow/array: run ends array cannot contain nulls", func() { array.NewRunEndEncodedArray(int32OnlyNull, int32Values, 3, 0) }) } func TestRunLengthEncodedOffsetLength(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) runEnds, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[100, 200, 300, 400, 500]`)) defer runEnds.Release() values, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["Hello", "beautiful", "world", "of", "RLE"]`)) defer values.Release() rleArray := array.NewRunEndEncodedArray(runEnds, values, 500, 0) defer rleArray.Release() assert.EqualValues(t, 5, rleArray.GetPhysicalLength()) assert.EqualValues(t, 0, rleArray.GetPhysicalOffset()) slice := array.NewSlice(rleArray, 199, 204).(*array.RunEndEncoded) defer slice.Release() assert.EqualValues(t, 2, slice.GetPhysicalLength()) assert.EqualValues(t, 1, slice.GetPhysicalOffset()) slice2 := array.NewSlice(rleArray, 199, 300).(*array.RunEndEncoded) defer slice2.Release() assert.EqualValues(t, 2, slice2.GetPhysicalLength()) assert.EqualValues(t, 1, slice2.GetPhysicalOffset()) slice3 := array.NewSlice(rleArray, 400, 500).(*array.RunEndEncoded) defer slice3.Release() assert.EqualValues(t, 1, slice3.GetPhysicalLength()) assert.EqualValues(t, 4, slice3.GetPhysicalOffset()) slice4 := array.NewSlice(rleArray, 0, 150).(*array.RunEndEncoded) defer slice4.Release() assert.EqualValues(t, 2, slice4.GetPhysicalLength()) assert.EqualValues(t, 0, slice4.GetPhysicalOffset()) zeroLengthAtEnd := array.NewSlice(rleArray, 500, 500).(*array.RunEndEncoded) defer zeroLengthAtEnd.Release() assert.EqualValues(t, 0, zeroLengthAtEnd.GetPhysicalLength()) assert.EqualValues(t, 5, zeroLengthAtEnd.GetPhysicalOffset()) } func TestRLECompare(t *testing.T) { rleArray := array.NewRunEndEncodedArray(int32Values, stringValues, 30, 0) // second that is a copy of the first standardEquals := array.MakeFromData(rleArray.Data().(*array.Data).Copy()) defer rleArray.Release() defer standardEquals.Release() assert.Truef(t, array.Equal(rleArray, standardEquals), "left: %s\nright: %s", rleArray, standardEquals) assert.False(t, array.Equal(array.NewSlice(rleArray, 0, 29), array.NewSlice(rleArray, 1, 30))) // array that is logically the same as our rleArray, but has 2 small // runs for the first value instead of one large run mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) t.Run("logical duplicate", func(t *testing.T) { dupRunEnds, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[5, 10, 20, 30]`)) defer dupRunEnds.Release() strValues, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["Hello", "Hello", "World", null]`)) defer strValues.Release() dupArr := array.NewRunEndEncodedArray(dupRunEnds, strValues, 30, 0) defer dupArr.Release() assert.Truef(t, array.Equal(rleArray, dupArr), "expected: %sgot: %s", rleArray, dupArr) }) t.Run("emptyArr", func(t *testing.T) { emptyRuns, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[]`)) emptyVals, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`[]`)) defer emptyRuns.Release() defer emptyVals.Release() emptyArr := array.NewRunEndEncodedArray(emptyRuns, emptyVals, 0, 0) defer emptyArr.Release() dataCopy := emptyArr.Data().(*array.Data).Copy() defer dataCopy.Release() emptyArr2 := array.MakeFromData(dataCopy) defer emptyArr2.Release() assert.Truef(t, array.Equal(emptyArr, emptyArr2), "expected: %sgot: %s", emptyArr, emptyArr2) }) t.Run("different offsets", func(t *testing.T) { // three different slices that have the value [3, 3, 3, 4, 4, 4, 4] offsetsa, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[2, 5, 12, 58, 60]`)) offsetsb, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[81, 86, 99, 100]`)) offsetsc, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[3, 7]`)) valsa, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[1, 2, 3, 4, 5]`)) valsb, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[2, 3, 4, 5]`)) valsc, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[3, 4]`)) defer func() { offsetsa.Release() offsetsb.Release() offsetsc.Release() valsa.Release() valsb.Release() valsc.Release() }() differentOffsetsA := array.NewRunEndEncodedArray(offsetsa, valsa, 60, 0) defer differentOffsetsA.Release() differentOffsetsB := array.NewRunEndEncodedArray(offsetsb, valsb, 100, 0) defer differentOffsetsB.Release() differentOffsetsC := array.NewRunEndEncodedArray(offsetsc, valsc, 7, 0) defer differentOffsetsC.Release() sliceA := array.NewSlice(differentOffsetsA, 9, 16) defer sliceA.Release() sliceB := array.NewSlice(differentOffsetsB, 83, 90) defer sliceB.Release() assert.True(t, array.Equal(sliceA, sliceB)) assert.True(t, array.Equal(sliceA, differentOffsetsC)) assert.True(t, array.Equal(sliceB, differentOffsetsC)) }) } func TestRunEndEncodedBuilder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) bldr := array.NewBuilder(mem, arrow.RunEndEncodedOf(arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String)) defer bldr.Release() assert.IsType(t, (*array.RunEndEncodedBuilder)(nil), bldr) reeBldr := bldr.(*array.RunEndEncodedBuilder) valBldr := reeBldr.ValueBuilder().(*array.StringBuilder) reeBldr.Append(100) valBldr.Append("Hello") reeBldr.Append(100) valBldr.Append("beautiful") reeBldr.Append(50) valBldr.Append("world") reeBldr.ContinueRun(50) reeBldr.Append(100) valBldr.Append("of") reeBldr.Append(100) valBldr.Append("RLE") reeBldr.AppendNull() rleArray := reeBldr.NewRunEndEncodedArray() defer rleArray.Release() assert.EqualValues(t, 501, rleArray.Len()) assert.EqualValues(t, 6, rleArray.GetPhysicalLength()) assert.Equal(t, arrow.INT16, rleArray.RunEndsArr().DataType().ID()) assert.Equal(t, []int16{100, 200, 300, 400, 500, 501}, rleArray.RunEndsArr().(*array.Int16).Int16Values()) strValues := rleArray.Values().(*array.String) assert.Equal(t, "Hello", strValues.Value(0)) assert.Equal(t, "beautiful", strValues.Value(1)) assert.Equal(t, "world", strValues.Value(2)) assert.Equal(t, "of", strValues.Value(3)) assert.Equal(t, "RLE", strValues.Value(4)) assert.True(t, strValues.IsNull(5)) assert.Equal(t, "Hello", strValues.ValueStr(0)) } func TestRunEndEncodedStringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) b := array.NewRunEndEncodedBuilder(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String) defer b.Release() valBldr := b.ValueBuilder().(*array.StringBuilder) b.Append(100) valBldr.Append("Hello") b.Append(100) valBldr.Append("beautiful") b.Append(50) valBldr.Append("world") b.ContinueRun(50) b.Append(100) valBldr.Append("of") b.Append(100) valBldr.Append("RLE") b.AppendNull() arr := b.NewArray().(*array.RunEndEncoded) defer arr.Release() logical := arr.LogicalValuesArray() defer logical.Release() // 2. create array via AppendValueFromString b1 := array.NewRunEndEncodedBuilder(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.RunEndEncoded) defer arr1.Release() logical1 := arr1.LogicalValuesArray() defer logical1.Release() assert.True(t, array.Equal(arr, arr1)) assert.True(t, array.Equal(logical, logical1)) } func TestREEBuilderOverflow(t *testing.T) { for _, typ := range []arrow.DataType{arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int64} { t.Run("run_ends="+typ.String(), func(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) bldr := array.NewRunEndEncodedBuilder(mem, typ, arrow.BinaryTypes.String) defer bldr.Release() valBldr := bldr.ValueBuilder().(*array.StringBuilder) assert.Panics(t, func() { valBldr.Append("Foo") maxVal := uint64(1< 0 { o.WriteString(" ") } if !a.IsValid(i) { o.WriteString(NullValueStr) continue } sub := a.newListValue(i) fmt.Fprintf(o, "%v", sub) sub.Release() } o.WriteString("]") return o.String() } func (a *FixedSizeList) newListValue(i int) arrow.Array { beg, end := a.ValueOffsets(i) return NewSlice(a.values, beg, end) } func (a *FixedSizeList) setData(data *Data) { a.array.setData(data) a.n = a.DataType().(*arrow.FixedSizeListType).Len() a.values = MakeFromData(data.childData[0]) } func arrayEqualFixedSizeList(left, right *FixedSizeList) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } o := func() bool { l := left.newListValue(i) defer l.Release() r := right.newListValue(i) defer r.Release() return Equal(l, r) }() if !o { return false } } return true } // Len returns the number of elements in the array. func (a *FixedSizeList) Len() int { return a.array.Len() } func (a *FixedSizeList) ValueOffsets(i int) (start, end int64) { n := int64(a.n) off := int64(a.array.data.offset) start, end = (off+int64(i))*n, (off+int64(i+1))*n return } func (a *FixedSizeList) Retain() { a.array.Retain() a.values.Retain() } func (a *FixedSizeList) Release() { a.array.Release() a.values.Release() } func (a *FixedSizeList) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } slice := a.newListValue(i) defer slice.Release() v, err := json.Marshal(slice) if err != nil { panic(err) } return json.RawMessage(v) } func (a *FixedSizeList) MarshalJSON() ([]byte, error) { var buf bytes.Buffer enc := json.NewEncoder(&buf) buf.WriteByte('[') for i := 0; i < a.Len(); i++ { if i != 0 { buf.WriteByte(',') } if a.IsNull(i) { enc.Encode(nil) continue } slice := a.newListValue(i) if err := enc.Encode(slice); err != nil { return nil, err } slice.Release() } buf.WriteByte(']') return buf.Bytes(), nil } type FixedSizeListBuilder struct { baseListBuilder n int32 // number of elements in the fixed-size list. } // NewFixedSizeListBuilder returns a builder, using the provided memory allocator. // The created list builder will create a list whose elements will be of type etype. func NewFixedSizeListBuilder(mem memory.Allocator, n int32, etype arrow.DataType) *FixedSizeListBuilder { return &FixedSizeListBuilder{ baseListBuilder{ builder: builder{refCount: 1, mem: mem}, values: NewBuilder(mem, etype), dt: arrow.FixedSizeListOf(n, etype), }, n, } } // NewFixedSizeListBuilderWithField returns a builder similarly to // NewFixedSizeListBuilder, but it accepts a child rather than just a datatype // to ensure nullability context is preserved. func NewFixedSizeListBuilderWithField(mem memory.Allocator, n int32, field arrow.Field) *FixedSizeListBuilder { return &FixedSizeListBuilder{ baseListBuilder{ builder: builder{refCount: 1, mem: mem}, values: NewBuilder(mem, field.Type), dt: arrow.FixedSizeListOfField(n, field), }, n, } } func (b *FixedSizeListBuilder) Type() arrow.DataType { return b.dt } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *FixedSizeListBuilder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.values != nil { b.values.Release() b.values = nil } } } func (b *FixedSizeListBuilder) Append(v bool) { b.Reserve(1) b.unsafeAppendBoolToBitmap(v) } // AppendNull will append null values to the underlying values by itself func (b *FixedSizeListBuilder) AppendNull() { b.Reserve(1) b.unsafeAppendBoolToBitmap(false) // require to append this due to value indexes for i := int32(0); i < b.n; i++ { b.values.AppendNull() } } // AppendNulls will append n null values to the underlying values by itself func (b *FixedSizeListBuilder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *FixedSizeListBuilder) AppendEmptyValue() { b.Append(true) for i := int32(0); i < b.n; i++ { b.values.AppendEmptyValue() } } func (b *FixedSizeListBuilder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *FixedSizeListBuilder) AppendValues(valid []bool) { b.Reserve(len(valid)) b.builder.unsafeAppendBoolsToBitmap(valid, len(valid)) } func (b *FixedSizeListBuilder) unsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } func (b *FixedSizeListBuilder) init(capacity int) { b.builder.init(capacity) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *FixedSizeListBuilder) Reserve(n int) { b.builder.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *FixedSizeListBuilder) Resize(n int) { if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(n, b.builder.init) } } func (b *FixedSizeListBuilder) ValueBuilder() Builder { return b.values } // NewArray creates a List array from the memory buffers used by the builder and resets the FixedSizeListBuilder // so it can be used to build a new array. func (b *FixedSizeListBuilder) NewArray() arrow.Array { return b.NewListArray() } // NewListArray creates a List array from the memory buffers used by the builder and resets the FixedSizeListBuilder // so it can be used to build a new array. func (b *FixedSizeListBuilder) NewListArray() (a *FixedSizeList) { data := b.newData() a = NewFixedSizeListData(data) data.Release() return } func (b *FixedSizeListBuilder) newData() (data *Data) { values := b.values.NewArray() defer values.Release() data = NewData( b.dt, b.length, []*memory.Buffer{b.nullBitmap}, []arrow.ArrayData{values.Data()}, b.nulls, 0, ) b.reset() return } func (b *FixedSizeListBuilder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } dec := json.NewDecoder(strings.NewReader(s)) return b.UnmarshalOne(dec) } func (b *FixedSizeListBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch t { case json.Delim('['): b.Append(true) if err := b.values.Unmarshal(dec); err != nil { return err } // consume ']' _, err := dec.Token() return err case nil: b.AppendNull() default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Struct: b.dt.String(), } } return nil } func (b *FixedSizeListBuilder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *FixedSizeListBuilder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("fixed size list builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } var ( _ arrow.Array = (*FixedSizeList)(nil) _ Builder = (*FixedSizeListBuilder)(nil) ) arrow-go-18.2.0/arrow/array/fixed_size_list_test.go000066400000000000000000000152471476434502500224040ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "reflect" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) func TestFixedSizeListArray(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) var ( vs = []int32{0, 1, 2, 3, 4, 5, 6} lengths = []int{3, 0, 4} isValid = []bool{true, false, true} ) lb := array.NewFixedSizeListBuilder(pool, int32(len(vs)), arrow.PrimitiveTypes.Int32) defer lb.Release() for i := 0; i < 10; i++ { vb := lb.ValueBuilder().(*array.Int32Builder) vb.Reserve(len(vs)) pos := 0 for i, length := range lengths { lb.Append(isValid[i]) for j := 0; j < length; j++ { vb.Append(vs[pos]) pos++ } } arr := lb.NewArray().(*array.FixedSizeList) defer arr.Release() arr.Retain() arr.Release() if got, want := arr.DataType().ID(), arrow.FIXED_SIZE_LIST; got != want { t.Fatalf("got=%v, want=%v", got, want) } if got, want := arr.Len(), len(isValid); got != want { t.Fatalf("got=%d, want=%d", got, want) } for i := range lengths { if got, want := arr.IsValid(i), isValid[i]; got != want { t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) } if got, want := arr.IsNull(i), lengths[i] == 0; got != want { t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) } } varr := arr.ListValues().(*array.Int32) if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } } } func TestFixedSizeListArrayEmpty(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) lb := array.NewFixedSizeListBuilder(pool, 3, arrow.PrimitiveTypes.Int32) defer lb.Release() arr := lb.NewArray().(*array.FixedSizeList) defer arr.Release() if got, want := arr.Len(), 0; got != want { t.Fatalf("got=%d, want=%d", got, want) } } func TestFixedSizeListArrayBulkAppend(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) var ( vs = []int32{0, 1, 2, 3, 4, 5, 6} lengths = []int{3, 0, 4} isValid = []bool{true, false, true} ) lb := array.NewFixedSizeListBuilder(pool, int32(len(vs)), arrow.PrimitiveTypes.Int32) defer lb.Release() vb := lb.ValueBuilder().(*array.Int32Builder) vb.Reserve(len(vs)) lb.AppendValues(isValid) for _, v := range vs { vb.Append(v) } arr := lb.NewArray().(*array.FixedSizeList) defer arr.Release() if got, want := arr.DataType().ID(), arrow.FIXED_SIZE_LIST; got != want { t.Fatalf("got=%v, want=%v", got, want) } if got, want := arr.Len(), len(isValid); got != want { t.Fatalf("got=%d, want=%d", got, want) } for i := range lengths { if got, want := arr.IsValid(i), isValid[i]; got != want { t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) } if got, want := arr.IsNull(i), lengths[i] == 0; got != want { t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) } } varr := arr.ListValues().(*array.Int32) if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } } func TestFixedSizeListArrayStringer(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) const N = 3 var ( vs = [][N]int32{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, -9, -8}} isValid = []bool{true, false, true, true} ) lb := array.NewFixedSizeListBuilder(pool, N, arrow.PrimitiveTypes.Int32) defer lb.Release() vb := lb.ValueBuilder().(*array.Int32Builder) vb.Reserve(len(vs)) for i, v := range vs { lb.Append(isValid[i]) vb.AppendValues(v[:], nil) } arr := lb.NewArray().(*array.FixedSizeList) defer arr.Release() arr.Retain() arr.Release() want := `[[0 1 2] (null) [6 7 8] [9 -9 -8]]` if got, want := arr.String(), want; got != want { t.Fatalf("got=%q, want=%q", got, want) } assert.Equal(t, "[0,1,2]", arr.ValueStr(0)) assert.Equal(t, array.NullValueStr, arr.ValueStr(1)) } func TestFixedSizeListArraySlice(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) const N = 3 var ( vs = [][N]int32{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, -9, -8}} isValid = []bool{true, false, true, true} ) lb := array.NewFixedSizeListBuilder(pool, N, arrow.PrimitiveTypes.Int32) defer lb.Release() vb := lb.ValueBuilder().(*array.Int32Builder) vb.Reserve(len(vs)) for i, v := range vs { lb.Append(isValid[i]) vb.AppendValues(v[:], nil) } arr := lb.NewArray().(*array.FixedSizeList) defer arr.Release() arr.Retain() arr.Release() want := `[[0 1 2] (null) [6 7 8] [9 -9 -8]]` if got, want := arr.String(), want; got != want { t.Fatalf("got=%q, want=%q", got, want) } sub := array.NewSlice(arr, 1, 3).(*array.FixedSizeList) defer sub.Release() want = `[(null) [6 7 8]]` if got, want := sub.String(), want; got != want { t.Fatalf("got=%q, want=%q", got, want) } } func TestFixedSizeListStringRoundTrip(t *testing.T) { // 1. create array pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) const N = 3 var ( values = [][N]int32{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, -9, -8}} valid = []bool{true, false, true, true} ) b := array.NewFixedSizeListBuilder(pool, N, arrow.PrimitiveTypes.Int32) defer b.Release() vb := b.ValueBuilder().(*array.Int32Builder) vb.Reserve(len(values)) for i, v := range values { b.Append(valid[i]) vb.AppendValues(v[:], nil) } arr := b.NewArray().(*array.FixedSizeList) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewFixedSizeListBuilder(pool, N, arrow.PrimitiveTypes.Int32) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.FixedSizeList) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } arrow-go-18.2.0/arrow/array/fixedsize_binary.go000066400000000000000000000055731476434502500215200ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "bytes" "encoding/base64" "fmt" "strings" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/internal/json" ) // A type which represents an immutable sequence of fixed-length binary strings. type FixedSizeBinary struct { array valueBytes []byte bytewidth int32 } // NewFixedSizeBinaryData constructs a new fixed-size binary array from data. func NewFixedSizeBinaryData(data arrow.ArrayData) *FixedSizeBinary { a := &FixedSizeBinary{bytewidth: int32(data.DataType().(arrow.FixedWidthDataType).BitWidth() / 8)} a.refCount = 1 a.setData(data.(*Data)) return a } // Value returns the fixed-size slice at index i. This value should not be mutated. func (a *FixedSizeBinary) Value(i int) []byte { i += a.array.data.offset var ( bw = int(a.bytewidth) beg = i * bw end = (i + 1) * bw ) return a.valueBytes[beg:end] } func (a *FixedSizeBinary) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } return base64.StdEncoding.EncodeToString(a.Value(i)) } func (a *FixedSizeBinary) String() string { o := new(strings.Builder) o.WriteString("[") for i := 0; i < a.Len(); i++ { if i > 0 { o.WriteString(" ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(o, "%q", a.Value(i)) } } o.WriteString("]") return o.String() } func (a *FixedSizeBinary) setData(data *Data) { a.array.setData(data) vals := data.buffers[1] if vals != nil { a.valueBytes = vals.Bytes() } } func (a *FixedSizeBinary) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } return a.Value(i) } func (a *FixedSizeBinary) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i := 0; i < a.Len(); i++ { if a.IsValid(i) { vals[i] = a.Value(i) } else { vals[i] = nil } } return json.Marshal(vals) } func arrayEqualFixedSizeBinary(left, right *FixedSizeBinary) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if !bytes.Equal(left.Value(i), right.Value(i)) { return false } } return true } var ( _ arrow.Array = (*FixedSizeBinary)(nil) ) arrow-go-18.2.0/arrow/array/fixedsize_binary_test.go000066400000000000000000000114721476434502500225520ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "testing" "github.com/stretchr/testify/assert" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/memory" ) func TestFixedSizeBinary(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dtype := arrow.FixedSizeBinaryType{ByteWidth: 7} b := array.NewFixedSizeBinaryBuilder(mem, &dtype) zero := make([]byte, dtype.ByteWidth) values := [][]byte{ []byte("7654321"), nil, []byte("AZERTYU"), } valid := []bool{true, false, true} b.AppendValues(values, valid) // encoded abcdefg base64 assert.NoError(t, b.AppendValueFromString("YWJjZGVmZw==")) b.Retain() b.Release() a := b.NewFixedSizeBinaryArray() assert.Equal(t, 4, a.Len()) assert.Equal(t, 1, a.NullN()) assert.Equal(t, []byte("7654321"), a.Value(0)) assert.Equal(t, "YWJjZGVmZw==", a.ValueStr(3)) assert.Equal(t, zero, a.Value(1)) assert.Equal(t, true, a.IsNull(1)) assert.Equal(t, false, a.IsValid(1)) assert.Equal(t, []byte("AZERTYU"), a.Value(2)) a.Release() // Test builder reset and NewArray API. b.AppendValues(values, valid) a = b.NewArray().(*array.FixedSizeBinary) assert.Equal(t, 3, a.Len()) assert.Equal(t, 1, a.NullN()) assert.Equal(t, []byte("7654321"), a.Value(0)) assert.Equal(t, zero, a.Value(1)) assert.Equal(t, []byte("AZERTYU"), a.Value(2)) a.Release() b.Release() } func TestFixedSizeBinarySlice(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dtype := &arrow.FixedSizeBinaryType{ByteWidth: 4} b := array.NewFixedSizeBinaryBuilder(mem, dtype) defer b.Release() var data = [][]byte{ []byte("ABCD"), []byte("1234"), nil, []byte("AZER"), } b.AppendValues(data[:2], nil) b.AppendNull() b.Append(data[3]) arr := b.NewFixedSizeBinaryArray() defer arr.Release() slice := array.NewSliceData(arr.Data(), 2, 4) defer slice.Release() sub1 := array.MakeFromData(slice) defer sub1.Release() v, ok := sub1.(*array.FixedSizeBinary) if !ok { t.Fatalf("could not type-assert to array.String") } if got, want := v.String(), `[(null) "AZER"]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } if got, want := v.NullN(), 1; got != want { t.Fatalf("got=%q, want=%q", got, want) } } func TestFixedSizeBinary_MarshalUnmarshalJSON(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dtype := &arrow.FixedSizeBinaryType{ByteWidth: 4} b := array.NewFixedSizeBinaryBuilder(mem, dtype) defer b.Release() var data = [][]byte{ []byte("ABCD"), []byte("1234"), nil, []byte("AZER"), } b.AppendValues(data[:2], nil) b.AppendNull() b.Append(data[3]) arr := b.NewFixedSizeBinaryArray() defer arr.Release() jsonBytes, err := arr.MarshalJSON() if err != nil { t.Fatalf("failed to marshal json: %v", err) } err = b.UnmarshalJSON(jsonBytes) if err != nil { t.Fatalf("failed to unmarshal json: %v", err) } gotArr := b.NewFixedSizeBinaryArray() defer gotArr.Release() gotString := gotArr.String() wantString := arr.String() if gotString != wantString { t.Fatalf("got=%q, want=%q", gotString, wantString) } } func TestFixedSizeBinaryStringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dt := &arrow.FixedSizeBinaryType{ByteWidth: 7} b := array.NewFixedSizeBinaryBuilder(mem, dt) values := [][]byte{ []byte("7654321"), nil, []byte("AZERTYU"), } valid := []bool{true, false, true} b.AppendValues(values, valid) // encoded abcdefg base64 assert.NoError(t, b.AppendValueFromString("YWJjZGVmZw==")) arr := b.NewArray().(*array.FixedSizeBinary) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewFixedSizeBinaryBuilder(mem, dt) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.FixedSizeBinary) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } arrow-go-18.2.0/arrow/array/fixedsize_binarybuilder.go000066400000000000000000000153731476434502500230660ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "bytes" "encoding/base64" "fmt" "reflect" "sync/atomic" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" ) // A FixedSizeBinaryBuilder is used to build a FixedSizeBinary array using the Append methods. type FixedSizeBinaryBuilder struct { builder dtype *arrow.FixedSizeBinaryType values *byteBufferBuilder } func NewFixedSizeBinaryBuilder(mem memory.Allocator, dtype *arrow.FixedSizeBinaryType) *FixedSizeBinaryBuilder { b := &FixedSizeBinaryBuilder{ builder: builder{refCount: 1, mem: mem}, dtype: dtype, values: newByteBufferBuilder(mem), } return b } func (b *FixedSizeBinaryBuilder) Type() arrow.DataType { return b.dtype } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. // Release may be called simultaneously from multiple goroutines. func (b *FixedSizeBinaryBuilder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.values != nil { b.values.Release() b.values = nil } } } func (b *FixedSizeBinaryBuilder) Append(v []byte) { if len(v) != b.dtype.ByteWidth { // TODO(alexandre): should we return an error instead? panic("len(v) != b.dtype.ByteWidth") } b.Reserve(1) b.values.Append(v) b.UnsafeAppendBoolToBitmap(true) } func (b *FixedSizeBinaryBuilder) AppendNull() { b.Reserve(1) b.values.Advance(b.dtype.ByteWidth) b.UnsafeAppendBoolToBitmap(false) } func (b *FixedSizeBinaryBuilder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *FixedSizeBinaryBuilder) AppendEmptyValue() { b.Reserve(1) b.values.Advance(b.dtype.ByteWidth) b.UnsafeAppendBoolToBitmap(true) } func (b *FixedSizeBinaryBuilder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *FixedSizeBinaryBuilder) UnsafeAppend(v []byte) { b.values.unsafeAppend(v) b.UnsafeAppendBoolToBitmap(true) } // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *FixedSizeBinaryBuilder) AppendValues(v [][]byte, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) for _, vv := range v { switch len(vv) { case 0: b.values.Advance(b.dtype.ByteWidth) case b.dtype.ByteWidth: b.values.Append(vv) default: panic(fmt.Errorf("array: invalid binary length (got=%d, want=%d)", len(vv), b.dtype.ByteWidth)) } } b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *FixedSizeBinaryBuilder) init(capacity int) { b.builder.init(capacity) b.values.resize(capacity * b.dtype.ByteWidth) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *FixedSizeBinaryBuilder) Reserve(n int) { b.builder.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *FixedSizeBinaryBuilder) Resize(n int) { b.builder.resize(n, b.init) } // NewArray creates a FixedSizeBinary array from the memory buffers used by the // builder and resets the FixedSizeBinaryBuilder so it can be used to build a new array. func (b *FixedSizeBinaryBuilder) NewArray() arrow.Array { return b.NewFixedSizeBinaryArray() } // NewFixedSizeBinaryArray creates a FixedSizeBinary array from the memory buffers used by the builder and resets the FixedSizeBinaryBuilder // so it can be used to build a new array. func (b *FixedSizeBinaryBuilder) NewFixedSizeBinaryArray() (a *FixedSizeBinary) { data := b.newData() a = NewFixedSizeBinaryData(data) data.Release() return } func (b *FixedSizeBinaryBuilder) newData() (data *Data) { values := b.values.Finish() data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, values}, nil, b.nulls, 0) if values != nil { values.Release() } b.builder.reset() return } func (b *FixedSizeBinaryBuilder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } data, err := base64.StdEncoding.DecodeString(s) if err != nil { b.AppendNull() return err } b.Append(data) return nil } func (b *FixedSizeBinaryBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } var val []byte switch v := t.(type) { case string: data, err := base64.StdEncoding.DecodeString(v) if err != nil { return err } val = data case []byte: val = v case nil: b.AppendNull() return nil default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Type: reflect.TypeOf([]byte{}), Offset: dec.InputOffset(), Struct: fmt.Sprintf("FixedSizeBinary[%d]", b.dtype.ByteWidth), } } if len(val) != b.dtype.ByteWidth { return &json.UnmarshalTypeError{ Value: fmt.Sprint(val), Type: reflect.TypeOf([]byte{}), Offset: dec.InputOffset(), Struct: fmt.Sprintf("FixedSizeBinary[%d]", b.dtype.ByteWidth), } } b.Append(val) return nil } func (b *FixedSizeBinaryBuilder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *FixedSizeBinaryBuilder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("fixed size binary builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } var ( _ Builder = (*FixedSizeBinaryBuilder)(nil) ) arrow-go-18.2.0/arrow/array/fixedsize_binarybuilder_test.go000066400000000000000000000061261476434502500241210ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) func TestFixedSizeBinaryBuilder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dtype := arrow.FixedSizeBinaryType{ByteWidth: 7} b := NewFixedSizeBinaryBuilder(mem, &dtype) b.Append([]byte("1234567")) b.AppendNull() b.Append([]byte("ABCDEFG")) b.AppendNull() assert.Equal(t, 4, b.Len(), "unexpected Len()") assert.Equal(t, 2, b.NullN(), "unexpected NullN()") values := [][]byte{ []byte("7654321"), nil, []byte("AZERTYU"), } b.AppendValues(values, []bool{true, false, true}) assert.Equal(t, 7, b.Len(), "unexpected Len()") assert.Equal(t, 3, b.NullN(), "unexpected NullN()") a := b.NewFixedSizeBinaryArray() // check state of builder after NewFixedSizeBinaryArray assert.Zero(t, b.Len(), "unexpected ArrayBuilder.Len(), NewFixedSizeBinaryArray did not reset state") assert.Zero(t, b.Cap(), "unexpected ArrayBuilder.Cap(), NewFixedSizeBinaryArray did not reset state") assert.Zero(t, b.NullN(), "unexpected ArrayBuilder.NullN(), NewFixedSizeBinaryArray did not reset state") assert.Equal(t, a.String(), `["1234567" (null) "ABCDEFG" (null) "7654321" (null) "AZERTYU"]`) b.Release() a.Release() } func TestFixedSizeBinaryBuilder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dtype := arrow.FixedSizeBinaryType{ByteWidth: 7} ab := NewFixedSizeBinaryBuilder(mem, &dtype) defer ab.Release() want := [][]byte{ []byte("1234567"), []byte("AZERTYU"), []byte("7654321"), } fixedSizeValues := func(a *FixedSizeBinary) [][]byte { vs := make([][]byte, a.Len()) for i := range vs { vs[i] = a.Value(i) } return vs } ab.AppendValues([][]byte{}, nil) a := ab.NewFixedSizeBinaryArray() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(nil, nil) a = ab.NewFixedSizeBinaryArray() assert.Zero(t, a.Len()) a.Release() ab.AppendValues([][]byte{}, nil) ab.AppendValues(want, nil) a = ab.NewFixedSizeBinaryArray() assert.Equal(t, want, fixedSizeValues(a)) a.Release() ab.AppendValues(want, nil) ab.AppendValues([][]byte{}, nil) a = ab.NewFixedSizeBinaryArray() assert.Equal(t, want, fixedSizeValues(a)) a.Release() } arrow-go-18.2.0/arrow/array/float16.go000066400000000000000000000054161476434502500174320ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "fmt" "strings" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/float16" "github.com/apache/arrow-go/v18/internal/json" ) // A type which represents an immutable sequence of Float16 values. type Float16 struct { array values []float16.Num } func NewFloat16Data(data arrow.ArrayData) *Float16 { a := &Float16{} a.refCount = 1 a.setData(data.(*Data)) return a } func (a *Float16) Value(i int) float16.Num { return a.values[i] } func (a *Float16) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } return a.Value(i).String() } func (a *Float16) Values() []float16.Num { return a.values } func (a *Float16) String() string { o := new(strings.Builder) o.WriteString("[") for i := 0; i < a.Len(); i++ { if i > 0 { fmt.Fprintf(o, " ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(o, "%v", a.values[i].Float32()) } } o.WriteString("]") return o.String() } func (a *Float16) setData(data *Data) { a.array.setData(data) vals := data.buffers[1] if vals != nil { a.values = arrow.Float16Traits.CastFromBytes(vals.Bytes()) beg := a.array.data.offset end := beg + a.array.data.length a.values = a.values[beg:end] } } func (a *Float16) GetOneForMarshal(i int) interface{} { if a.IsValid(i) { return a.values[i].Float32() } return nil } func (a *Float16) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i, v := range a.values { if !a.IsValid(i) { vals[i] = nil continue } switch { case v.IsNaN(): vals[i] = "NaN" case v.IsInf() && !v.Signbit(): vals[i] = "+Inf" case v.IsInf() && v.Signbit(): vals[i] = "-Inf" default: vals[i] = v.Float32() } } return json.Marshal(vals) } func arrayEqualFloat16(left, right *Float16) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if left.Value(i) != right.Value(i) { return false } } return true } var ( _ arrow.Array = (*Float16)(nil) ) arrow-go-18.2.0/arrow/array/float16_builder.go000066400000000000000000000152501476434502500211350ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "bytes" "fmt" "reflect" "strconv" "sync/atomic" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/float16" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" ) type Float16Builder struct { builder data *memory.Buffer rawData []float16.Num } func NewFloat16Builder(mem memory.Allocator) *Float16Builder { return &Float16Builder{builder: builder{refCount: 1, mem: mem}} } func (b *Float16Builder) Type() arrow.DataType { return arrow.FixedWidthTypes.Float16 } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Float16Builder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } } } func (b *Float16Builder) Append(v float16.Num) { b.Reserve(1) b.UnsafeAppend(v) } func (b *Float16Builder) UnsafeAppend(v float16.Num) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v b.length++ } func (b *Float16Builder) AppendNull() { b.Reserve(1) b.UnsafeAppendBoolToBitmap(false) } func (b *Float16Builder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *Float16Builder) AppendEmptyValue() { b.Reserve(1) b.UnsafeAppend(float16.Num{}) } func (b *Float16Builder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *Float16Builder) UnsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *Float16Builder) AppendValues(v []float16.Num, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) if len(v) > 0 { arrow.Float16Traits.Copy(b.rawData[b.length:], v) } b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *Float16Builder) init(capacity int) { b.builder.init(capacity) b.data = memory.NewResizableBuffer(b.mem) bytesN := arrow.Uint16Traits.BytesRequired(capacity) b.data.Resize(bytesN) b.rawData = arrow.Float16Traits.CastFromBytes(b.data.Bytes()) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *Float16Builder) Reserve(n int) { b.builder.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *Float16Builder) Resize(n int) { nBuilder := n if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(nBuilder, b.init) b.data.Resize(arrow.Float16Traits.BytesRequired(n)) b.rawData = arrow.Float16Traits.CastFromBytes(b.data.Bytes()) } } // NewArray creates a Float16 array from the memory buffers used by the builder and resets the Float16Builder // so it can be used to build a new array. func (b *Float16Builder) NewArray() arrow.Array { return b.NewFloat16Array() } // NewFloat16Array creates a Float16 array from the memory buffers used by the builder and resets the Float16Builder // so it can be used to build a new array. func (b *Float16Builder) NewFloat16Array() (a *Float16) { data := b.newData() a = NewFloat16Data(data) data.Release() return } func (b *Float16Builder) newData() (data *Data) { bytesRequired := arrow.Float16Traits.BytesRequired(b.length) if bytesRequired > 0 && bytesRequired < b.data.Len() { // trim buffers b.data.Resize(bytesRequired) } data = NewData(arrow.FixedWidthTypes.Float16, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) b.reset() if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } return } func (b *Float16Builder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } v, err := strconv.ParseFloat(s, 32) if err != nil { b.AppendNull() return err } b.Append(float16.New(float32(v))) return nil } func (b *Float16Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch v := t.(type) { case float64: b.Append(float16.New(float32(v))) case string: f, err := strconv.ParseFloat(v, 32) if err != nil { return err } // this will currently silently truncate if it is too large b.Append(float16.New(float32(f))) case json.Number: f, err := v.Float64() if err != nil { return err } b.Append(float16.New(float32(f))) case nil: b.AppendNull() default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Type: reflect.TypeOf(float16.Num{}), Offset: dec.InputOffset(), } } return nil } func (b *Float16Builder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } // UnmarshalJSON will add values to this builder from unmarshalling the // array of values. Currently values that are larger than a float16 will // be silently truncated. func (b *Float16Builder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("float16 builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } arrow-go-18.2.0/arrow/array/float16_builder_test.go000066400000000000000000000105471476434502500222000ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "testing" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/float16" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) func float32Values(a *array.Float16) []float32 { values := make([]float32, a.Len()) for i, v := range a.Values() { values[i] = v.Float32() } return values } func TestNewFloat16Builder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewFloat16Builder(mem) ab.Append(float16.New(1)) ab.Append(float16.New(2)) ab.Append(float16.New(3)) ab.AppendNull() ab.Append(float16.New(5)) ab.Append(float16.New(6)) ab.AppendNull() ab.Append(float16.New(8)) ab.Append(float16.New(9)) ab.Append(float16.New(10)) assert.NoError(t, ab.AppendValueFromString("11.0")) // check state of builder before NewFloat16Array assert.Equal(t, 11, ab.Len(), "unexpected Len()") assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") a := ab.NewFloat16Array() assert.Equal(t, "1", a.ValueStr(0)) // check state of builder after NewFloat16Array assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewFloat16Array did not reset state") assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewFloat16Array did not reset state") assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewFloat16Array did not reset state") // check state of array assert.Equal(t, 2, a.NullN(), "unexpected null count") assert.Equal(t, []float32{1, 2, 3, 0, 5, 6, 0, 8, 9, 10, 11}, float32Values(a), "unexpected Float16Values") assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity assert.Len(t, a.Values(), 11, "unexpected length of Float16Values") a.Release() ab.Append(float16.New(7)) ab.Append(float16.New(8)) a = ab.NewFloat16Array() assert.Equal(t, 0, a.NullN()) assert.Equal(t, []float32{7, 8}, float32Values(a)) assert.Len(t, a.Values(), 2) a.Release() } func TestFloat16Builder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewFloat16Builder(mem) defer ab.Release() want := []float16.Num{float16.New(3), float16.New(4)} ab.AppendValues([]float16.Num{}, nil) a := ab.NewFloat16Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(nil, nil) a = ab.NewFloat16Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(want, nil) a = ab.NewFloat16Array() assert.Equal(t, want, a.Values()) a.Release() ab.AppendValues([]float16.Num{}, nil) ab.AppendValues(want, nil) a = ab.NewFloat16Array() assert.Equal(t, want, a.Values()) a.Release() ab.AppendValues(want, nil) ab.AppendValues([]float16.Num{}, nil) a = ab.NewFloat16Array() assert.Equal(t, want, a.Values()) a.Release() } func TestFloat16StringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) b := array.NewFloat16Builder(mem) defer b.Release() b.Append(float16.New(1)) b.Append(float16.New(2)) b.Append(float16.New(3)) b.AppendNull() b.Append(float16.New(5)) b.Append(float16.New(6)) b.AppendNull() b.Append(float16.New(8)) b.Append(float16.New(9)) b.Append(float16.New(10)) arr := b.NewArray().(*array.Float16) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewFloat16Builder(mem) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.Float16) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } arrow-go-18.2.0/arrow/array/interval.go000066400000000000000000000603571476434502500200070ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "bytes" "fmt" "strconv" "strings" "sync/atomic" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" ) func NewIntervalData(data arrow.ArrayData) arrow.Array { switch data.DataType().(type) { case *arrow.MonthIntervalType: return NewMonthIntervalData(data.(*Data)) case *arrow.DayTimeIntervalType: return NewDayTimeIntervalData(data.(*Data)) case *arrow.MonthDayNanoIntervalType: return NewMonthDayNanoIntervalData(data.(*Data)) default: panic(fmt.Errorf("arrow/array: unknown interval data type %T", data.DataType())) } } // A type which represents an immutable sequence of arrow.MonthInterval values. type MonthInterval struct { array values []arrow.MonthInterval } func NewMonthIntervalData(data arrow.ArrayData) *MonthInterval { a := &MonthInterval{} a.refCount = 1 a.setData(data.(*Data)) return a } func (a *MonthInterval) Value(i int) arrow.MonthInterval { return a.values[i] } func (a *MonthInterval) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } return fmt.Sprintf("%v", a.Value(i)) } func (a *MonthInterval) MonthIntervalValues() []arrow.MonthInterval { return a.values } func (a *MonthInterval) String() string { o := new(strings.Builder) o.WriteString("[") for i, v := range a.values { if i > 0 { fmt.Fprintf(o, " ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(o, "%v", v) } } o.WriteString("]") return o.String() } func (a *MonthInterval) setData(data *Data) { a.array.setData(data) vals := data.buffers[1] if vals != nil { a.values = arrow.MonthIntervalTraits.CastFromBytes(vals.Bytes()) beg := a.array.data.offset end := beg + a.array.data.length a.values = a.values[beg:end] } } func (a *MonthInterval) GetOneForMarshal(i int) interface{} { if a.IsValid(i) { return a.values[i] } return nil } // MarshalJSON will create a json array out of a MonthInterval array, // each value will be an object of the form {"months": #} where // # is the numeric value of that index func (a *MonthInterval) MarshalJSON() ([]byte, error) { if a.NullN() == 0 { return json.Marshal(a.values) } vals := make([]interface{}, a.Len()) for i := 0; i < a.Len(); i++ { if a.IsValid(i) { vals[i] = a.values[i] } else { vals[i] = nil } } return json.Marshal(vals) } func arrayEqualMonthInterval(left, right *MonthInterval) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if left.Value(i) != right.Value(i) { return false } } return true } type MonthIntervalBuilder struct { builder data *memory.Buffer rawData []arrow.MonthInterval } func NewMonthIntervalBuilder(mem memory.Allocator) *MonthIntervalBuilder { return &MonthIntervalBuilder{builder: builder{refCount: 1, mem: mem}} } func (b *MonthIntervalBuilder) Type() arrow.DataType { return arrow.FixedWidthTypes.MonthInterval } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *MonthIntervalBuilder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } } } func (b *MonthIntervalBuilder) Append(v arrow.MonthInterval) { b.Reserve(1) b.UnsafeAppend(v) } func (b *MonthIntervalBuilder) AppendNull() { b.Reserve(1) b.UnsafeAppendBoolToBitmap(false) } func (b *MonthIntervalBuilder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *MonthIntervalBuilder) AppendEmptyValue() { b.Append(arrow.MonthInterval(0)) } func (b *MonthIntervalBuilder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *MonthIntervalBuilder) UnsafeAppend(v arrow.MonthInterval) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v b.length++ } func (b *MonthIntervalBuilder) UnsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *MonthIntervalBuilder) AppendValues(v []arrow.MonthInterval, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) arrow.MonthIntervalTraits.Copy(b.rawData[b.length:], v) b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *MonthIntervalBuilder) init(capacity int) { b.builder.init(capacity) b.data = memory.NewResizableBuffer(b.mem) bytesN := arrow.MonthIntervalTraits.BytesRequired(capacity) b.data.Resize(bytesN) b.rawData = arrow.MonthIntervalTraits.CastFromBytes(b.data.Bytes()) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *MonthIntervalBuilder) Reserve(n int) { b.builder.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *MonthIntervalBuilder) Resize(n int) { nBuilder := n if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(nBuilder, b.init) b.data.Resize(arrow.MonthIntervalTraits.BytesRequired(n)) b.rawData = arrow.MonthIntervalTraits.CastFromBytes(b.data.Bytes()) } } // NewArray creates a MonthInterval array from the memory buffers used by the builder and resets the MonthIntervalBuilder // so it can be used to build a new array. func (b *MonthIntervalBuilder) NewArray() arrow.Array { return b.NewMonthIntervalArray() } // NewMonthIntervalArray creates a MonthInterval array from the memory buffers used by the builder and resets the MonthIntervalBuilder // so it can be used to build a new array. func (b *MonthIntervalBuilder) NewMonthIntervalArray() (a *MonthInterval) { data := b.newData() a = NewMonthIntervalData(data) data.Release() return } func (b *MonthIntervalBuilder) newData() (data *Data) { bytesRequired := arrow.MonthIntervalTraits.BytesRequired(b.length) if bytesRequired > 0 && bytesRequired < b.data.Len() { // trim buffers b.data.Resize(bytesRequired) } data = NewData(arrow.FixedWidthTypes.MonthInterval, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) b.reset() if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } return } func (b *MonthIntervalBuilder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } v, err := strconv.ParseInt(s, 10, 32) if err != nil { b.AppendNull() return err } b.Append(arrow.MonthInterval(v)) return nil } func (b *MonthIntervalBuilder) UnmarshalOne(dec *json.Decoder) error { var v *arrow.MonthInterval if err := dec.Decode(&v); err != nil { return err } if v == nil { b.AppendNull() } else { b.Append(*v) } return nil } func (b *MonthIntervalBuilder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } // UnmarshalJSON will add the unmarshalled values of an array to the builder, // values are expected to be strings of the form "#months" where # is the int32 // value that will be added to the builder. func (b *MonthIntervalBuilder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("month interval builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } // A type which represents an immutable sequence of arrow.DayTimeInterval values. type DayTimeInterval struct { array values []arrow.DayTimeInterval } func NewDayTimeIntervalData(data arrow.ArrayData) *DayTimeInterval { a := &DayTimeInterval{} a.refCount = 1 a.setData(data.(*Data)) return a } func (a *DayTimeInterval) Value(i int) arrow.DayTimeInterval { return a.values[i] } func (a *DayTimeInterval) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } data, err := json.Marshal(a.GetOneForMarshal(i)) if err != nil { panic(err) } return string(data) } func (a *DayTimeInterval) DayTimeIntervalValues() []arrow.DayTimeInterval { return a.values } func (a *DayTimeInterval) String() string { o := new(strings.Builder) o.WriteString("[") for i, v := range a.values { if i > 0 { fmt.Fprintf(o, " ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(o, "%v", v) } } o.WriteString("]") return o.String() } func (a *DayTimeInterval) setData(data *Data) { a.array.setData(data) vals := data.buffers[1] if vals != nil { a.values = arrow.DayTimeIntervalTraits.CastFromBytes(vals.Bytes()) beg := a.array.data.offset end := beg + a.array.data.length a.values = a.values[beg:end] } } func (a *DayTimeInterval) GetOneForMarshal(i int) interface{} { if a.IsValid(i) { return a.values[i] } return nil } // MarshalJSON will marshal this array to JSON as an array of objects, // consisting of the form {"days": #, "milliseconds": #} for each element. func (a *DayTimeInterval) MarshalJSON() ([]byte, error) { if a.NullN() == 0 { return json.Marshal(a.values) } vals := make([]interface{}, a.Len()) for i, v := range a.values { if a.IsValid(i) { vals[i] = v } else { vals[i] = nil } } return json.Marshal(vals) } func arrayEqualDayTimeInterval(left, right *DayTimeInterval) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if left.Value(i) != right.Value(i) { return false } } return true } type DayTimeIntervalBuilder struct { builder data *memory.Buffer rawData []arrow.DayTimeInterval } func NewDayTimeIntervalBuilder(mem memory.Allocator) *DayTimeIntervalBuilder { return &DayTimeIntervalBuilder{builder: builder{refCount: 1, mem: mem}} } func (b *DayTimeIntervalBuilder) Type() arrow.DataType { return arrow.FixedWidthTypes.DayTimeInterval } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *DayTimeIntervalBuilder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } } } func (b *DayTimeIntervalBuilder) Append(v arrow.DayTimeInterval) { b.Reserve(1) b.UnsafeAppend(v) } func (b *DayTimeIntervalBuilder) AppendNull() { b.Reserve(1) b.UnsafeAppendBoolToBitmap(false) } func (b *DayTimeIntervalBuilder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *DayTimeIntervalBuilder) AppendEmptyValue() { b.Append(arrow.DayTimeInterval{}) } func (b *DayTimeIntervalBuilder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *DayTimeIntervalBuilder) UnsafeAppend(v arrow.DayTimeInterval) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v b.length++ } func (b *DayTimeIntervalBuilder) UnsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *DayTimeIntervalBuilder) AppendValues(v []arrow.DayTimeInterval, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) arrow.DayTimeIntervalTraits.Copy(b.rawData[b.length:], v) b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *DayTimeIntervalBuilder) init(capacity int) { b.builder.init(capacity) b.data = memory.NewResizableBuffer(b.mem) bytesN := arrow.DayTimeIntervalTraits.BytesRequired(capacity) b.data.Resize(bytesN) b.rawData = arrow.DayTimeIntervalTraits.CastFromBytes(b.data.Bytes()) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *DayTimeIntervalBuilder) Reserve(n int) { b.builder.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *DayTimeIntervalBuilder) Resize(n int) { nBuilder := n if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(nBuilder, b.init) b.data.Resize(arrow.DayTimeIntervalTraits.BytesRequired(n)) b.rawData = arrow.DayTimeIntervalTraits.CastFromBytes(b.data.Bytes()) } } // NewArray creates a DayTimeInterval array from the memory buffers used by the builder and resets the DayTimeIntervalBuilder // so it can be used to build a new array. func (b *DayTimeIntervalBuilder) NewArray() arrow.Array { return b.NewDayTimeIntervalArray() } // NewDayTimeIntervalArray creates a DayTimeInterval array from the memory buffers used by the builder and resets the DayTimeIntervalBuilder // so it can be used to build a new array. func (b *DayTimeIntervalBuilder) NewDayTimeIntervalArray() (a *DayTimeInterval) { data := b.newData() a = NewDayTimeIntervalData(data) data.Release() return } func (b *DayTimeIntervalBuilder) newData() (data *Data) { bytesRequired := arrow.DayTimeIntervalTraits.BytesRequired(b.length) if bytesRequired > 0 && bytesRequired < b.data.Len() { // trim buffers b.data.Resize(bytesRequired) } data = NewData(arrow.FixedWidthTypes.DayTimeInterval, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) b.reset() if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } return } func (b *DayTimeIntervalBuilder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } var v arrow.DayTimeInterval if err := json.Unmarshal([]byte(s), &v); err != nil { b.AppendNull() return err } b.Append(v) return nil } func (b *DayTimeIntervalBuilder) UnmarshalOne(dec *json.Decoder) error { var v *arrow.DayTimeInterval if err := dec.Decode(&v); err != nil { return err } if v == nil { b.AppendNull() } else { b.Append(*v) } return nil } func (b *DayTimeIntervalBuilder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } // UnmarshalJSON will add the values unmarshalled from an array to the builder, // with the values expected to be objects of the form {"days": #, "milliseconds": #} func (b *DayTimeIntervalBuilder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("day_time interval builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } // A type which represents an immutable sequence of arrow.DayTimeInterval values. type MonthDayNanoInterval struct { array values []arrow.MonthDayNanoInterval } func NewMonthDayNanoIntervalData(data arrow.ArrayData) *MonthDayNanoInterval { a := &MonthDayNanoInterval{} a.refCount = 1 a.setData(data.(*Data)) return a } func (a *MonthDayNanoInterval) Value(i int) arrow.MonthDayNanoInterval { return a.values[i] } func (a *MonthDayNanoInterval) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } data, err := json.Marshal(a.GetOneForMarshal(i)) if err != nil { panic(err) } return string(data) } func (a *MonthDayNanoInterval) MonthDayNanoIntervalValues() []arrow.MonthDayNanoInterval { return a.values } func (a *MonthDayNanoInterval) String() string { o := new(strings.Builder) o.WriteString("[") for i, v := range a.values { if i > 0 { fmt.Fprintf(o, " ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(o, "%v", v) } } o.WriteString("]") return o.String() } func (a *MonthDayNanoInterval) setData(data *Data) { a.array.setData(data) vals := data.buffers[1] if vals != nil { a.values = arrow.MonthDayNanoIntervalTraits.CastFromBytes(vals.Bytes()) beg := a.array.data.offset end := beg + a.array.data.length a.values = a.values[beg:end] } } func (a *MonthDayNanoInterval) GetOneForMarshal(i int) interface{} { if a.IsValid(i) { return a.values[i] } return nil } // MarshalJSON will marshal this array to a JSON array with elements // marshalled to the form {"months": #, "days": #, "nanoseconds": #} func (a *MonthDayNanoInterval) MarshalJSON() ([]byte, error) { if a.NullN() == 0 { return json.Marshal(a.values) } vals := make([]interface{}, a.Len()) for i, v := range a.values { if a.IsValid(i) { vals[i] = v } else { vals[i] = nil } } return json.Marshal(vals) } func arrayEqualMonthDayNanoInterval(left, right *MonthDayNanoInterval) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if left.Value(i) != right.Value(i) { return false } } return true } type MonthDayNanoIntervalBuilder struct { builder data *memory.Buffer rawData []arrow.MonthDayNanoInterval } func NewMonthDayNanoIntervalBuilder(mem memory.Allocator) *MonthDayNanoIntervalBuilder { return &MonthDayNanoIntervalBuilder{builder: builder{refCount: 1, mem: mem}} } func (b *MonthDayNanoIntervalBuilder) Type() arrow.DataType { return arrow.FixedWidthTypes.MonthDayNanoInterval } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *MonthDayNanoIntervalBuilder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } } } func (b *MonthDayNanoIntervalBuilder) Append(v arrow.MonthDayNanoInterval) { b.Reserve(1) b.UnsafeAppend(v) } func (b *MonthDayNanoIntervalBuilder) AppendNull() { b.Reserve(1) b.UnsafeAppendBoolToBitmap(false) } func (b *MonthDayNanoIntervalBuilder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *MonthDayNanoIntervalBuilder) AppendEmptyValue() { b.Append(arrow.MonthDayNanoInterval{}) } func (b *MonthDayNanoIntervalBuilder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *MonthDayNanoIntervalBuilder) UnsafeAppend(v arrow.MonthDayNanoInterval) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v b.length++ } func (b *MonthDayNanoIntervalBuilder) UnsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *MonthDayNanoIntervalBuilder) AppendValues(v []arrow.MonthDayNanoInterval, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) arrow.MonthDayNanoIntervalTraits.Copy(b.rawData[b.length:], v) b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *MonthDayNanoIntervalBuilder) init(capacity int) { b.builder.init(capacity) b.data = memory.NewResizableBuffer(b.mem) bytesN := arrow.MonthDayNanoIntervalTraits.BytesRequired(capacity) b.data.Resize(bytesN) b.rawData = arrow.MonthDayNanoIntervalTraits.CastFromBytes(b.data.Bytes()) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *MonthDayNanoIntervalBuilder) Reserve(n int) { b.builder.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *MonthDayNanoIntervalBuilder) Resize(n int) { nBuilder := n if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(nBuilder, b.init) b.data.Resize(arrow.MonthDayNanoIntervalTraits.BytesRequired(n)) b.rawData = arrow.MonthDayNanoIntervalTraits.CastFromBytes(b.data.Bytes()) } } // NewArray creates a MonthDayNanoInterval array from the memory buffers used by the builder and resets the MonthDayNanoIntervalBuilder // so it can be used to build a new array. func (b *MonthDayNanoIntervalBuilder) NewArray() arrow.Array { return b.NewMonthDayNanoIntervalArray() } // NewMonthDayNanoIntervalArray creates a MonthDayNanoInterval array from the memory buffers used by the builder and resets the MonthDayNanoIntervalBuilder // so it can be used to build a new array. func (b *MonthDayNanoIntervalBuilder) NewMonthDayNanoIntervalArray() (a *MonthDayNanoInterval) { data := b.newData() a = NewMonthDayNanoIntervalData(data) data.Release() return } func (b *MonthDayNanoIntervalBuilder) newData() (data *Data) { bytesRequired := arrow.MonthDayNanoIntervalTraits.BytesRequired(b.length) if bytesRequired > 0 && bytesRequired < b.data.Len() { // trim buffers b.data.Resize(bytesRequired) } data = NewData(arrow.FixedWidthTypes.MonthDayNanoInterval, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) b.reset() if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } return } func (b *MonthDayNanoIntervalBuilder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } var v arrow.MonthDayNanoInterval if err := json.Unmarshal([]byte(s), &v); err != nil { return err } b.Append(v) return nil } func (b *MonthDayNanoIntervalBuilder) UnmarshalOne(dec *json.Decoder) error { var v *arrow.MonthDayNanoInterval if err := dec.Decode(&v); err != nil { return err } if v == nil { b.AppendNull() } else { b.Append(*v) } return nil } func (b *MonthDayNanoIntervalBuilder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } // UnmarshalJSON unmarshals a JSON array of objects and adds them to this builder, // each element of the array is expected to be an object of the form // {"months": #, "days": #, "nanoseconds": #} func (b *MonthDayNanoIntervalBuilder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("month_day_nano interval builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } var ( _ arrow.Array = (*MonthInterval)(nil) _ arrow.Array = (*DayTimeInterval)(nil) _ arrow.Array = (*MonthDayNanoInterval)(nil) _ Builder = (*MonthIntervalBuilder)(nil) _ Builder = (*DayTimeIntervalBuilder)(nil) _ Builder = (*MonthDayNanoIntervalBuilder)(nil) ) arrow-go-18.2.0/arrow/array/interval_test.go000066400000000000000000000327051476434502500210420ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "math" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) func TestMonthIntervalArray(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) var ( want = []arrow.MonthInterval{1, 2, 3, 4} valids = []bool{true, true, false, true} ) b := array.NewMonthIntervalBuilder(mem) defer b.Release() b.Retain() b.Release() b.AppendValues(want[:2], nil) b.AppendNull() b.Append(want[3]) if got, want := b.Len(), len(want); got != want { t.Fatalf("invalid len: got=%d, want=%d", got, want) } if got, want := b.NullN(), 1; got != want { t.Fatalf("invalid nulls: got=%d, want=%d", got, want) } arr := b.NewMonthIntervalArray() defer arr.Release() arr.Retain() arr.Release() if got, want := arr.Len(), len(want); got != want { t.Fatalf("invalid len: got=%d, want=%d", got, want) } if got, want := arr.NullN(), 1; got != want { t.Fatalf("invalid nulls: got=%d, want=%d", got, want) } for i := range want { if arr.IsNull(i) != !valids[i] { t.Fatalf("arr[%d]-validity: got=%v want=%v", i, !arr.IsNull(i), valids[i]) } switch { case arr.IsNull(i): default: got := arr.Value(i) if got != want[i] { t.Fatalf("arr[%d]: got=%q, want=%q", i, got, want[i]) } } } sub := array.MakeFromData(arr.Data()) defer sub.Release() if sub.DataType().ID() != arrow.INTERVAL_MONTHS { t.Fatalf("invalid type: got=%q, want=interval_months", sub.DataType().Name()) } if _, ok := sub.(*array.MonthInterval); !ok { t.Fatalf("could not type-assert to array.MonthInterval") } if got, want := arr.String(), `[1 2 (null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } slice := array.NewSliceData(arr.Data(), 2, 4) defer slice.Release() sub1 := array.MakeFromData(slice) defer sub1.Release() v, ok := sub1.(*array.MonthInterval) if !ok { t.Fatalf("could not type-assert to array.MonthInterval") } if got, want := v.String(), `[(null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } } func TestMonthIntervalBuilder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) want := []arrow.MonthInterval{1, 2, 3, 4} b := array.NewMonthIntervalBuilder(mem) defer b.Release() miValues := func(a *array.MonthInterval) []arrow.MonthInterval { vs := make([]arrow.MonthInterval, a.Len()) for i := range vs { vs[i] = a.Value(i) } return vs } b.AppendValues([]arrow.MonthInterval{}, nil) arr := b.NewMonthIntervalArray() assert.Zero(t, arr.Len()) arr.Release() b.AppendValues(nil, nil) arr = b.NewMonthIntervalArray() assert.Zero(t, arr.Len()) arr.Release() b.AppendValues([]arrow.MonthInterval{}, nil) b.AppendValues(want, nil) arr = b.NewMonthIntervalArray() assert.Equal(t, want, miValues(arr)) arr.Release() b.AppendValues(want, nil) b.AppendValues([]arrow.MonthInterval{}, nil) arr = b.NewMonthIntervalArray() assert.Equal(t, want, miValues(arr)) arr.Release() } func TestMonthIntervalStringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) var ( values = []arrow.MonthInterval{1, 2, 3, 4} valid = []bool{true, true, false, true} ) b := array.NewMonthIntervalBuilder(mem) defer b.Release() b.AppendValues(values, valid) arr := b.NewArray().(*array.MonthInterval) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewMonthIntervalBuilder(mem) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.MonthInterval) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } func TestDayTimeArray(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) var ( want = []arrow.DayTimeInterval{ {Days: 1, Milliseconds: 1}, {Days: 2, Milliseconds: 2}, {Days: 3, Milliseconds: 3}, {Days: 4, Milliseconds: 4}} valids = []bool{true, true, false, true} ) b := array.NewDayTimeIntervalBuilder(mem) defer b.Release() b.Retain() b.Release() b.AppendValues(want[:2], nil) b.AppendNull() b.Append(want[3]) if got, want := b.Len(), len(want); got != want { t.Fatalf("invalid len: got=%d, want=%d", got, want) } if got, want := b.NullN(), 1; got != want { t.Fatalf("invalid nulls: got=%d, want=%d", got, want) } arr := b.NewDayTimeIntervalArray() defer arr.Release() arr.Retain() arr.Release() if got, want := arr.Len(), len(want); got != want { t.Fatalf("invalid len: got=%d, want=%d", got, want) } if got, want := arr.NullN(), 1; got != want { t.Fatalf("invalid nulls: got=%d, want=%d", got, want) } for i := range want { if arr.IsNull(i) != !valids[i] { t.Fatalf("arr[%d]-validity: got=%v want=%v", i, !arr.IsNull(i), valids[i]) } switch { case arr.IsNull(i): default: got := arr.Value(i) if got != want[i] { t.Fatalf("arr[%d]: got=%q, want=%q", i, got, want[i]) } } } sub := array.MakeFromData(arr.Data()) defer sub.Release() if sub.DataType().ID() != arrow.INTERVAL_DAY_TIME { t.Fatalf("invalid type: got=%q, want=interval_day_time", sub.DataType().Name()) } if _, ok := sub.(*array.DayTimeInterval); !ok { t.Fatalf("could not type-assert to array.DayTimeInterval") } if got, want := arr.String(), `[{1 1} {2 2} (null) {4 4}]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } slice := array.NewSliceData(arr.Data(), 2, 4) defer slice.Release() sub1 := array.MakeFromData(slice) defer sub1.Release() v, ok := sub1.(*array.DayTimeInterval) if !ok { t.Fatalf("could not type-assert to array.DayInterval") } if got, want := v.String(), `[(null) {4 4}]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } } func TestDayTimeIntervalBuilder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) want := []arrow.DayTimeInterval{ {Days: 1, Milliseconds: 1}, {Days: 2, Milliseconds: 2}, {Days: 3, Milliseconds: 3}, {Days: 4, Milliseconds: 4}} b := array.NewDayTimeIntervalBuilder(mem) defer b.Release() dtValues := func(a *array.DayTimeInterval) []arrow.DayTimeInterval { vs := make([]arrow.DayTimeInterval, a.Len()) for i := range vs { vs[i] = a.Value(i) } return vs } b.AppendValues([]arrow.DayTimeInterval{}, nil) arr := b.NewDayTimeIntervalArray() assert.Zero(t, arr.Len()) arr.Release() b.AppendValues(nil, nil) arr = b.NewDayTimeIntervalArray() assert.Zero(t, arr.Len()) arr.Release() b.AppendValues([]arrow.DayTimeInterval{}, nil) b.AppendValues(want, nil) arr = b.NewDayTimeIntervalArray() assert.Equal(t, want, dtValues(arr)) arr.Release() b.AppendValues(want, nil) b.AppendValues([]arrow.DayTimeInterval{}, nil) arr = b.NewDayTimeIntervalArray() assert.Equal(t, want, dtValues(arr)) arr.Release() } func TestDayTimeIntervalStringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) var ( values = []arrow.DayTimeInterval{ {Days: 1, Milliseconds: 1}, {Days: 2, Milliseconds: 2}, {Days: 3, Milliseconds: 3}, {Days: 4, Milliseconds: 4}, } valid = []bool{true, true, false, true} ) b := array.NewDayTimeIntervalBuilder(mem) defer b.Release() b.AppendValues(values, valid) arr := b.NewArray().(*array.DayTimeInterval) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewDayTimeIntervalBuilder(mem) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.DayTimeInterval) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } func TestMonthDayNanoArray(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) var ( want = []arrow.MonthDayNanoInterval{ {Months: 1, Days: 1, Nanoseconds: 1000}, {Months: 2, Days: 2, Nanoseconds: 2000}, {Months: 3, Days: 3, Nanoseconds: 3000}, {Months: 4, Days: 4, Nanoseconds: 4000}, {Months: 0, Days: 0, Nanoseconds: 0}, {Months: -1, Days: -2, Nanoseconds: -300}, {Months: math.MaxInt32, Days: math.MinInt32, Nanoseconds: math.MaxInt64}, {Months: math.MinInt32, Days: math.MaxInt32, Nanoseconds: math.MinInt64}, } valids = []bool{true, true, false, true, true, true, false, true} ) b := array.NewMonthDayNanoIntervalBuilder(mem) defer b.Release() b.Retain() b.Release() b.AppendValues(want[:2], nil) b.AppendNull() b.Append(want[3]) b.AppendValues(want[4:], valids[4:]) if got, want := b.Len(), len(want); got != want { t.Fatalf("invalid len: got=%d, want=%d", got, want) } if got, want := b.NullN(), 2; got != want { t.Fatalf("invalid nulls: got=%d, want=%d", got, want) } arr := b.NewMonthDayNanoIntervalArray() defer arr.Release() arr.Retain() arr.Release() if got, want := arr.Len(), len(want); got != want { t.Fatalf("invalid len: got=%d, want=%d", got, want) } if got, want := arr.NullN(), 2; got != want { t.Fatalf("invalid nulls: got=%d, want=%d", got, want) } for i := range want { if arr.IsNull(i) != !valids[i] { t.Fatalf("arr[%d]-validity: got=%v want=%v", i, !arr.IsNull(i), valids[i]) } switch { case arr.IsNull(i): default: got := arr.Value(i) if got != want[i] { t.Fatalf("arr[%d]: got=%q, want=%q", i, got, want[i]) } } } sub := array.MakeFromData(arr.Data()) defer sub.Release() if sub.DataType().ID() != arrow.INTERVAL_MONTH_DAY_NANO { t.Fatalf("invalid type: got=%q, want=interval", sub.DataType().Name()) } if _, ok := sub.(*array.MonthDayNanoInterval); !ok { t.Fatalf("could not type-assert to array.MonthDayNanoInterval") } if got, want := arr.String(), `[{1 1 1000} {2 2 2000} (null) {4 4 4000} {0 0 0} {-1 -2 -300} (null) {-2147483648 2147483647 -9223372036854775808}]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } slice := array.NewSliceData(arr.Data(), 2, 4) defer slice.Release() sub1 := array.MakeFromData(slice) defer sub1.Release() v, ok := sub1.(*array.MonthDayNanoInterval) if !ok { t.Fatalf("could not type-assert to array.MonthDayNanoInterval") } if got, want := v.String(), `[(null) {4 4 4000}]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } } func TestMonthDayNanoIntervalBuilder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) want := []arrow.MonthDayNanoInterval{ {Months: 1, Days: 1, Nanoseconds: 1000}, {Months: 2, Days: 2, Nanoseconds: 2000}, {Months: 3, Days: 3, Nanoseconds: 3000}, {Months: 4, Days: 4, Nanoseconds: 4000}} b := array.NewMonthDayNanoIntervalBuilder(mem) defer b.Release() dtValues := func(a *array.MonthDayNanoInterval) []arrow.MonthDayNanoInterval { vs := make([]arrow.MonthDayNanoInterval, a.Len()) for i := range vs { vs[i] = a.Value(i) } return vs } b.AppendValues([]arrow.MonthDayNanoInterval{}, nil) arr := b.NewMonthDayNanoIntervalArray() assert.Zero(t, arr.Len()) arr.Release() b.AppendValues(nil, nil) arr = b.NewMonthDayNanoIntervalArray() assert.Zero(t, arr.Len()) arr.Release() b.AppendValues([]arrow.MonthDayNanoInterval{}, nil) b.AppendValues(want, nil) arr = b.NewMonthDayNanoIntervalArray() assert.Equal(t, want, dtValues(arr)) arr.Release() b.AppendValues(want, nil) b.AppendValues([]arrow.MonthDayNanoInterval{}, nil) arr = b.NewMonthDayNanoIntervalArray() assert.Equal(t, want, dtValues(arr)) arr.Release() } func TestMonthDayNanoIntervalStringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) var ( values = []arrow.MonthDayNanoInterval{ {Months: 1, Days: 1, Nanoseconds: 1000}, {Months: 2, Days: 2, Nanoseconds: 2000}, {Months: 3, Days: 3, Nanoseconds: 3000}, {Months: 4, Days: 4, Nanoseconds: 4000}, {Months: 0, Days: 0, Nanoseconds: 0}, {Months: -1, Days: -2, Nanoseconds: -300}, {Months: math.MaxInt32, Days: math.MinInt32, Nanoseconds: math.MaxInt64}, {Months: math.MinInt32, Days: math.MaxInt32, Nanoseconds: math.MinInt64}, } valid = []bool{true, true, false, true, true, true, false, true} ) b := array.NewMonthDayNanoIntervalBuilder(mem) defer b.Release() b.AppendValues(values, valid) arr := b.NewArray().(*array.MonthDayNanoInterval) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewMonthDayNanoIntervalBuilder(mem) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.MonthDayNanoInterval) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } arrow-go-18.2.0/arrow/array/json_reader.go000066400000000000000000000117401476434502500204460ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "errors" "fmt" "io" "sync/atomic" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" ) type Option func(config) type config interface{} // WithChunk sets the chunk size for reading in json records. The default is to // read in one row per record batch as a single object. If chunk size is set to // a negative value, then the entire file is read as a single record batch. // Otherwise a record batch is read in with chunk size rows per record batch until // it reaches EOF. func WithChunk(n int) Option { return func(cfg config) { switch cfg := cfg.(type) { case *JSONReader: cfg.chunk = n default: panic(fmt.Errorf("arrow/json): unknown config type %T", cfg)) } } } // WithAllocator specifies the allocator to use for creating the record batches, // if it is not called, then memory.DefaultAllocator will be used. func WithAllocator(mem memory.Allocator) Option { return func(cfg config) { switch cfg := cfg.(type) { case *JSONReader: cfg.mem = mem default: panic(fmt.Errorf("arrow/json): unknown config type %T", cfg)) } } } // JSONReader is a json reader that meets the RecordReader interface definition. // // To read in an array of objects as a record, you can use RecordFromJSON // which is equivalent to reading the json as a struct array whose fields are // the columns of the record. This primarily exists to fit the RecordReader // interface as a matching reader for the csv reader. type JSONReader struct { r *json.Decoder schema *arrow.Schema bldr *RecordBuilder refs int64 cur arrow.Record err error chunk int done bool mem memory.Allocator next func() bool } // NewJSONReader returns a json RecordReader which expects to find one json object // per row of dataset. Using WithChunk can control how many rows are processed // per record, which is how many objects become a single record from the file. // // If it is desired to write out an array of rows, then simply use RecordToStructArray // and json.Marshal the struct array for the same effect. func NewJSONReader(r io.Reader, schema *arrow.Schema, opts ...Option) *JSONReader { rr := &JSONReader{ r: json.NewDecoder(r), schema: schema, refs: 1, chunk: 1, } for _, o := range opts { o(rr) } if rr.mem == nil { rr.mem = memory.DefaultAllocator } rr.bldr = NewRecordBuilder(rr.mem, schema) switch { case rr.chunk < 0: rr.next = rr.nextall case rr.chunk > 1: rr.next = rr.nextn default: rr.next = rr.next1 } return rr } // Err returns the last encountered error func (r *JSONReader) Err() error { return r.err } func (r *JSONReader) Schema() *arrow.Schema { return r.schema } // Record returns the last read in record. The returned record is only valid // until the next call to Next unless Retain is called on the record itself. func (r *JSONReader) Record() arrow.Record { return r.cur } func (r *JSONReader) Retain() { atomic.AddInt64(&r.refs, 1) } func (r *JSONReader) Release() { debug.Assert(atomic.LoadInt64(&r.refs) > 0, "too many releases") if atomic.AddInt64(&r.refs, -1) == 0 { if r.cur != nil { r.cur.Release() r.bldr.Release() r.r = nil } } } // Next returns true if it read in a record, which will be available via Record // and false if there is either an error or the end of the reader. func (r *JSONReader) Next() bool { if r.cur != nil { r.cur.Release() r.cur = nil } if r.err != nil || r.done { return false } return r.next() } func (r *JSONReader) readNext() bool { r.err = r.r.Decode(r.bldr) if r.err != nil { r.done = true if errors.Is(r.err, io.EOF) { r.err = nil } return false } return true } func (r *JSONReader) nextall() bool { for r.readNext() { } r.cur = r.bldr.NewRecord() return r.cur.NumRows() > 0 } func (r *JSONReader) next1() bool { if !r.readNext() { return false } r.cur = r.bldr.NewRecord() return true } func (r *JSONReader) nextn() bool { var n = 0 for i := 0; i < r.chunk && !r.done; i, n = i+1, n+1 { if !r.readNext() { break } } if n > 0 { r.cur = r.bldr.NewRecord() } return n > 0 } var ( _ RecordReader = (*JSONReader)(nil) ) arrow-go-18.2.0/arrow/array/json_reader_test.go000066400000000000000000000106461476434502500215110ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "strings" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) const jsondata = ` {"region": "NY", "model": "3", "sales": 742.0} {"region": "NY", "model": "S", "sales": 304.125} {"region": "NY", "model": "X", "sales": 136.25} {"region": "NY", "model": "Y", "sales": 27.5} {"region": "CA", "model": "3", "sales": 512} {"region": "CA", "model": "S", "sales": 978} {"region": "CA", "model": "X", "sales": 1.0} {"region": "CA", "model": "Y", "sales": 69} {"region": "QC", "model": "3", "sales": 273.5} {"region": "QC", "model": "S", "sales": 13} {"region": "QC", "model": "X", "sales": 54} {"region": "QC", "model": "Y", "sales": 21} {"region": "QC", "model": "3", "sales": 152.25} {"region": "QC", "model": "S", "sales": 10} {"region": "QC", "model": "X", "sales": 42} {"region": "QC", "model": "Y", "sales": 37}` func TestJSONReader(t *testing.T) { schema := arrow.NewSchema([]arrow.Field{ {Name: "region", Type: arrow.BinaryTypes.String, Nullable: true}, {Name: "model", Type: arrow.BinaryTypes.String}, {Name: "sales", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, }, nil) rdr := array.NewJSONReader(strings.NewReader(jsondata), schema) defer rdr.Release() n := 0 for rdr.Next() { n++ rec := rdr.Record() assert.NotNil(t, rec) assert.EqualValues(t, 1, rec.NumRows()) assert.EqualValues(t, 3, rec.NumCols()) } assert.NoError(t, rdr.Err()) assert.Equal(t, 16, n) } func TestJSONReaderAll(t *testing.T) { schema := arrow.NewSchema([]arrow.Field{ {Name: "region", Type: arrow.BinaryTypes.String, Nullable: true}, {Name: "model", Type: arrow.BinaryTypes.String}, {Name: "sales", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, }, nil) mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) rdr := array.NewJSONReader(strings.NewReader(jsondata), schema, array.WithAllocator(mem), array.WithChunk(-1)) defer rdr.Release() assert.True(t, rdr.Next()) rec := rdr.Record() assert.NotNil(t, rec) assert.NoError(t, rdr.Err()) assert.EqualValues(t, 16, rec.NumRows()) assert.EqualValues(t, 3, rec.NumCols()) assert.False(t, rdr.Next()) } func TestJSONReaderChunked(t *testing.T) { schema := arrow.NewSchema([]arrow.Field{ {Name: "region", Type: arrow.BinaryTypes.String, Nullable: true}, {Name: "model", Type: arrow.BinaryTypes.String}, {Name: "sales", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, }, nil) mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) rdr := array.NewJSONReader(strings.NewReader(jsondata), schema, array.WithAllocator(mem), array.WithChunk(4)) defer rdr.Release() n := 0 for rdr.Next() { n++ rec := rdr.Record() assert.NotNil(t, rec) assert.NoError(t, rdr.Err()) assert.EqualValues(t, 4, rec.NumRows()) } assert.Equal(t, 4, n) assert.NoError(t, rdr.Err()) } func TestUnmarshalJSON(t *testing.T) { schema := arrow.NewSchema([]arrow.Field{ {Name: "region", Type: arrow.BinaryTypes.String, Nullable: true}, {Name: "model", Type: arrow.BinaryTypes.String}, {Name: "sales", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, }, nil) mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) recordBuilder := array.NewRecordBuilder(mem, schema) defer recordBuilder.Release() jsondata := `{"region": "NY", "model": "3", "sales": 742.0, "extra": 1234}` err := recordBuilder.UnmarshalJSON([]byte(jsondata)) assert.NoError(t, err) record := recordBuilder.NewRecord() defer record.Release() assert.NotNil(t, record) } arrow-go-18.2.0/arrow/array/list.go000066400000000000000000001160621476434502500171310ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "bytes" "fmt" "strings" "sync/atomic" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" ) type ListLike interface { arrow.Array ListValues() arrow.Array ValueOffsets(i int) (start, end int64) } type VarLenListLike interface { ListLike } // List represents an immutable sequence of array values. type List struct { array values arrow.Array offsets []int32 } var _ ListLike = (*List)(nil) // NewListData returns a new List array value, from data. func NewListData(data arrow.ArrayData) *List { a := &List{} a.refCount = 1 a.setData(data.(*Data)) return a } func (a *List) ListValues() arrow.Array { return a.values } func (a *List) ValueStr(i int) string { if !a.IsValid(i) { return NullValueStr } return string(a.GetOneForMarshal(i).(json.RawMessage)) } func (a *List) String() string { o := new(strings.Builder) o.WriteString("[") for i := 0; i < a.Len(); i++ { if i > 0 { o.WriteString(" ") } if a.IsNull(i) { o.WriteString(NullValueStr) continue } sub := a.newListValue(i) fmt.Fprintf(o, "%v", sub) sub.Release() } o.WriteString("]") return o.String() } func (a *List) newListValue(i int) arrow.Array { beg, end := a.ValueOffsets(i) return NewSlice(a.values, beg, end) } func (a *List) setData(data *Data) { debug.Assert(len(data.buffers) >= 2, "list data should have 2 buffers") a.array.setData(data) vals := data.buffers[1] if vals != nil { a.offsets = arrow.Int32Traits.CastFromBytes(vals.Bytes()) } a.values = MakeFromData(data.childData[0]) } func (a *List) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } slice := a.newListValue(i) defer slice.Release() v, err := json.Marshal(slice) if err != nil { panic(err) } return json.RawMessage(v) } func (a *List) MarshalJSON() ([]byte, error) { var buf bytes.Buffer enc := json.NewEncoder(&buf) buf.WriteByte('[') for i := 0; i < a.Len(); i++ { if i != 0 { buf.WriteByte(',') } if err := enc.Encode(a.GetOneForMarshal(i)); err != nil { return nil, err } } buf.WriteByte(']') return buf.Bytes(), nil } func arrayEqualList(left, right *List) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } o := func() bool { l := left.newListValue(i) defer l.Release() r := right.newListValue(i) defer r.Release() return Equal(l, r) }() if !o { return false } } return true } // Len returns the number of elements in the array. func (a *List) Len() int { return a.array.Len() } func (a *List) Offsets() []int32 { return a.offsets } func (a *List) Retain() { a.array.Retain() a.values.Retain() } func (a *List) Release() { a.array.Release() a.values.Release() } func (a *List) ValueOffsets(i int) (start, end int64) { debug.Assert(i >= 0 && i < a.array.data.length, "index out of range") j := i + a.array.data.offset start, end = int64(a.offsets[j]), int64(a.offsets[j+1]) return } // LargeList represents an immutable sequence of array values. type LargeList struct { array values arrow.Array offsets []int64 } var _ ListLike = (*LargeList)(nil) // NewLargeListData returns a new LargeList array value, from data. func NewLargeListData(data arrow.ArrayData) *LargeList { a := new(LargeList) a.refCount = 1 a.setData(data.(*Data)) return a } func (a *LargeList) ListValues() arrow.Array { return a.values } func (a *LargeList) ValueStr(i int) string { if !a.IsValid(i) { return NullValueStr } return string(a.GetOneForMarshal(i).(json.RawMessage)) } func (a *LargeList) String() string { o := new(strings.Builder) o.WriteString("[") for i := 0; i < a.Len(); i++ { if i > 0 { o.WriteString(" ") } if a.IsNull(i) { o.WriteString(NullValueStr) continue } sub := a.newListValue(i) fmt.Fprintf(o, "%v", sub) sub.Release() } o.WriteString("]") return o.String() } func (a *LargeList) newListValue(i int) arrow.Array { beg, end := a.ValueOffsets(i) return NewSlice(a.values, beg, end) } func (a *LargeList) setData(data *Data) { debug.Assert(len(data.buffers) >= 2, "list data should have 2 buffers") a.array.setData(data) vals := data.buffers[1] if vals != nil { a.offsets = arrow.Int64Traits.CastFromBytes(vals.Bytes()) } a.values = MakeFromData(data.childData[0]) } func (a *LargeList) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } slice := a.newListValue(i) defer slice.Release() v, err := json.Marshal(slice) if err != nil { panic(err) } return json.RawMessage(v) } func (a *LargeList) MarshalJSON() ([]byte, error) { var buf bytes.Buffer enc := json.NewEncoder(&buf) buf.WriteByte('[') for i := 0; i < a.Len(); i++ { if i != 0 { buf.WriteByte(',') } if err := enc.Encode(a.GetOneForMarshal(i)); err != nil { return nil, err } } buf.WriteByte(']') return buf.Bytes(), nil } func arrayEqualLargeList(left, right *LargeList) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } o := func() bool { l := left.newListValue(i) defer l.Release() r := right.newListValue(i) defer r.Release() return Equal(l, r) }() if !o { return false } } return true } // Len returns the number of elements in the array. func (a *LargeList) Len() int { return a.array.Len() } func (a *LargeList) Offsets() []int64 { return a.offsets } func (a *LargeList) ValueOffsets(i int) (start, end int64) { debug.Assert(i >= 0 && i < a.array.data.length, "index out of range") j := i + a.array.data.offset start, end = a.offsets[j], a.offsets[j+1] return } func (a *LargeList) Retain() { a.array.Retain() a.values.Retain() } func (a *LargeList) Release() { a.array.Release() a.values.Release() } type baseListBuilder struct { builder values Builder // value builder for the list's elements. offsets Builder // actual list type dt arrow.DataType appendOffsetVal func(int) } type ListLikeBuilder interface { Builder ValueBuilder() Builder Append(bool) } type VarLenListLikeBuilder interface { ListLikeBuilder AppendWithSize(bool, int) } type ListBuilder struct { baseListBuilder } type LargeListBuilder struct { baseListBuilder } // NewListBuilder returns a builder, using the provided memory allocator. // The created list builder will create a list whose elements will be of type etype. func NewListBuilder(mem memory.Allocator, etype arrow.DataType) *ListBuilder { offsetBldr := NewInt32Builder(mem) return &ListBuilder{ baseListBuilder{ builder: builder{refCount: 1, mem: mem}, values: NewBuilder(mem, etype), offsets: offsetBldr, dt: arrow.ListOf(etype), appendOffsetVal: func(o int) { offsetBldr.Append(int32(o)) }, }, } } // NewListBuilderWithField takes a field to use for the child rather than just // a datatype to allow for more customization. func NewListBuilderWithField(mem memory.Allocator, field arrow.Field) *ListBuilder { offsetBldr := NewInt32Builder(mem) return &ListBuilder{ baseListBuilder{ builder: builder{refCount: 1, mem: mem}, values: NewBuilder(mem, field.Type), offsets: offsetBldr, dt: arrow.ListOfField(field), appendOffsetVal: func(o int) { offsetBldr.Append(int32(o)) }, }, } } func (b *baseListBuilder) Type() arrow.DataType { switch dt := b.dt.(type) { case *arrow.ListType: f := dt.ElemField() f.Type = b.values.Type() return arrow.ListOfField(f) case *arrow.LargeListType: f := dt.ElemField() f.Type = b.values.Type() return arrow.LargeListOfField(f) } return nil } // NewLargeListBuilder returns a builder, using the provided memory allocator. // The created list builder will create a list whose elements will be of type etype. func NewLargeListBuilder(mem memory.Allocator, etype arrow.DataType) *LargeListBuilder { offsetBldr := NewInt64Builder(mem) return &LargeListBuilder{ baseListBuilder{ builder: builder{refCount: 1, mem: mem}, values: NewBuilder(mem, etype), offsets: offsetBldr, dt: arrow.LargeListOf(etype), appendOffsetVal: func(o int) { offsetBldr.Append(int64(o)) }, }, } } // NewLargeListBuilderWithField takes a field rather than just an element type // to allow for more customization of the final type of the LargeList Array func NewLargeListBuilderWithField(mem memory.Allocator, field arrow.Field) *LargeListBuilder { offsetBldr := NewInt64Builder(mem) return &LargeListBuilder{ baseListBuilder{ builder: builder{refCount: 1, mem: mem}, values: NewBuilder(mem, field.Type), offsets: offsetBldr, dt: arrow.LargeListOfField(field), appendOffsetVal: func(o int) { offsetBldr.Append(int64(o)) }, }, } } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *baseListBuilder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } b.values.Release() b.offsets.Release() } } func (b *baseListBuilder) appendNextOffset() { b.appendOffsetVal(b.values.Len()) } func (b *baseListBuilder) Append(v bool) { b.Reserve(1) b.unsafeAppendBoolToBitmap(v) b.appendNextOffset() } func (b *baseListBuilder) AppendWithSize(v bool, _ int) { b.Append(v) } func (b *baseListBuilder) AppendNull() { b.Reserve(1) b.unsafeAppendBoolToBitmap(false) b.appendNextOffset() } func (b *baseListBuilder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *baseListBuilder) AppendEmptyValue() { b.Append(true) } func (b *baseListBuilder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *ListBuilder) AppendValues(offsets []int32, valid []bool) { b.Reserve(len(valid)) b.offsets.(*Int32Builder).AppendValues(offsets, nil) b.builder.unsafeAppendBoolsToBitmap(valid, len(valid)) } func (b *LargeListBuilder) AppendValues(offsets []int64, valid []bool) { b.Reserve(len(valid)) b.offsets.(*Int64Builder).AppendValues(offsets, nil) b.builder.unsafeAppendBoolsToBitmap(valid, len(valid)) } func (b *baseListBuilder) unsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } func (b *baseListBuilder) init(capacity int) { b.builder.init(capacity) b.offsets.init(capacity + 1) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *baseListBuilder) Reserve(n int) { b.builder.reserve(n, b.resizeHelper) b.offsets.Reserve(n) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *baseListBuilder) Resize(n int) { b.resizeHelper(n) b.offsets.Resize(n) } func (b *baseListBuilder) resizeHelper(n int) { if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(n, b.builder.init) } } func (b *baseListBuilder) ValueBuilder() Builder { return b.values } // NewArray creates a List array from the memory buffers used by the builder and resets the ListBuilder // so it can be used to build a new array. func (b *ListBuilder) NewArray() arrow.Array { return b.NewListArray() } // NewArray creates a LargeList array from the memory buffers used by the builder and resets the LargeListBuilder // so it can be used to build a new array. func (b *LargeListBuilder) NewArray() arrow.Array { return b.NewLargeListArray() } // NewListArray creates a List array from the memory buffers used by the builder and resets the ListBuilder // so it can be used to build a new array. func (b *ListBuilder) NewListArray() (a *List) { data := b.newData() a = NewListData(data) data.Release() return } // NewLargeListArray creates a List array from the memory buffers used by the builder and resets the LargeListBuilder // so it can be used to build a new array. func (b *LargeListBuilder) NewLargeListArray() (a *LargeList) { data := b.newData() a = NewLargeListData(data) data.Release() return } func (b *baseListBuilder) newData() (data *Data) { if b.offsets.Len() != b.length+1 { b.appendNextOffset() } values := b.values.NewArray() defer values.Release() var offsets *memory.Buffer if b.offsets != nil { arr := b.offsets.NewArray() defer arr.Release() offsets = arr.Data().Buffers()[1] } data = NewData( b.Type(), b.length, []*memory.Buffer{ b.nullBitmap, offsets, }, []arrow.ArrayData{values.Data()}, b.nulls, 0, ) b.reset() return } func (b *baseListBuilder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } return b.UnmarshalOne(json.NewDecoder(strings.NewReader(s))) } func (b *baseListBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch t { case json.Delim('['): b.Append(true) if err := b.values.Unmarshal(dec); err != nil { return err } // consume ']' _, err := dec.Token() return err case nil: b.AppendNull() default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Struct: b.dt.String(), } } return nil } func (b *baseListBuilder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *baseListBuilder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("list builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } // ListView represents an immutable sequence of array values defined by an // offset into a child array and a length. type ListView struct { array values arrow.Array offsets []int32 sizes []int32 } var _ VarLenListLike = (*ListView)(nil) func NewListViewData(data arrow.ArrayData) *ListView { a := &ListView{} a.refCount = 1 a.setData(data.(*Data)) return a } func (a *ListView) ListValues() arrow.Array { return a.values } func (a *ListView) ValueStr(i int) string { if !a.IsValid(i) { return NullValueStr } return string(a.GetOneForMarshal(i).(json.RawMessage)) } func (a *ListView) String() string { o := new(strings.Builder) o.WriteString("[") for i := 0; i < a.Len(); i++ { if i > 0 { o.WriteString(" ") } if a.IsNull(i) { o.WriteString(NullValueStr) continue } sub := a.newListValue(i) fmt.Fprintf(o, "%v", sub) sub.Release() } o.WriteString("]") return o.String() } func (a *ListView) newListValue(i int) arrow.Array { beg, end := a.ValueOffsets(i) return NewSlice(a.values, beg, end) } func (a *ListView) setData(data *Data) { debug.Assert(len(data.buffers) >= 3, "list-view data should have 3 buffers") a.array.setData(data) offsets := data.buffers[1] if offsets != nil { a.offsets = arrow.Int32Traits.CastFromBytes(offsets.Bytes()) } sizes := data.buffers[2] if sizes != nil { a.sizes = arrow.Int32Traits.CastFromBytes(sizes.Bytes()) } a.values = MakeFromData(data.childData[0]) } func (a *ListView) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } slice := a.newListValue(i) defer slice.Release() v, err := json.Marshal(slice) if err != nil { panic(err) } return json.RawMessage(v) } func (a *ListView) MarshalJSON() ([]byte, error) { var buf bytes.Buffer enc := json.NewEncoder(&buf) buf.WriteByte('[') for i := 0; i < a.Len(); i++ { if i != 0 { buf.WriteByte(',') } if err := enc.Encode(a.GetOneForMarshal(i)); err != nil { return nil, err } } buf.WriteByte(']') return buf.Bytes(), nil } func arrayEqualListView(left, right *ListView) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } o := func() bool { l := left.newListValue(i) defer l.Release() r := right.newListValue(i) defer r.Release() return Equal(l, r) }() if !o { return false } } return true } // Len returns the number of elements in the array. func (a *ListView) Len() int { return a.array.Len() } func (a *ListView) Offsets() []int32 { return a.offsets } func (a *ListView) Sizes() []int32 { return a.sizes } func (a *ListView) Retain() { a.array.Retain() a.values.Retain() } func (a *ListView) Release() { a.array.Release() a.values.Release() } func (a *ListView) ValueOffsets(i int) (start, end int64) { debug.Assert(i >= 0 && i < a.array.data.length, "index out of range") j := i + a.array.data.offset size := int64(a.sizes[j]) // If size is 0, skip accessing offsets. if size == 0 { start, end = 0, 0 return } start = int64(a.offsets[j]) end = start + size return } // LargeListView represents an immutable sequence of array values defined by an // offset into a child array and a length. type LargeListView struct { array values arrow.Array offsets []int64 sizes []int64 } var _ VarLenListLike = (*LargeListView)(nil) // NewLargeListViewData returns a new LargeListView array value, from data. func NewLargeListViewData(data arrow.ArrayData) *LargeListView { a := new(LargeListView) a.refCount = 1 a.setData(data.(*Data)) return a } func (a *LargeListView) ListValues() arrow.Array { return a.values } func (a *LargeListView) ValueStr(i int) string { if !a.IsValid(i) { return NullValueStr } return string(a.GetOneForMarshal(i).(json.RawMessage)) } func (a *LargeListView) String() string { o := new(strings.Builder) o.WriteString("[") for i := 0; i < a.Len(); i++ { if i > 0 { o.WriteString(" ") } if a.IsNull(i) { o.WriteString(NullValueStr) continue } sub := a.newListValue(i) fmt.Fprintf(o, "%v", sub) sub.Release() } o.WriteString("]") return o.String() } func (a *LargeListView) newListValue(i int) arrow.Array { beg, end := a.ValueOffsets(i) return NewSlice(a.values, beg, end) } func (a *LargeListView) setData(data *Data) { debug.Assert(len(data.buffers) >= 3, "list-view data should have 3 buffers") a.array.setData(data) offsets := data.buffers[1] if offsets != nil { a.offsets = arrow.Int64Traits.CastFromBytes(offsets.Bytes()) } sizes := data.buffers[2] if sizes != nil { a.sizes = arrow.Int64Traits.CastFromBytes(sizes.Bytes()) } a.values = MakeFromData(data.childData[0]) } func (a *LargeListView) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } slice := a.newListValue(i) defer slice.Release() v, err := json.Marshal(slice) if err != nil { panic(err) } return json.RawMessage(v) } func (a *LargeListView) MarshalJSON() ([]byte, error) { var buf bytes.Buffer enc := json.NewEncoder(&buf) buf.WriteByte('[') for i := 0; i < a.Len(); i++ { if i != 0 { buf.WriteByte(',') } if err := enc.Encode(a.GetOneForMarshal(i)); err != nil { return nil, err } } buf.WriteByte(']') return buf.Bytes(), nil } func arrayEqualLargeListView(left, right *LargeListView) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } o := func() bool { l := left.newListValue(i) defer l.Release() r := right.newListValue(i) defer r.Release() return Equal(l, r) }() if !o { return false } } return true } // Len returns the number of elements in the array. func (a *LargeListView) Len() int { return a.array.Len() } func (a *LargeListView) Offsets() []int64 { return a.offsets } func (a *LargeListView) Sizes() []int64 { return a.sizes } func (a *LargeListView) ValueOffsets(i int) (start, end int64) { debug.Assert(i >= 0 && i < a.array.data.length, "index out of range") j := i + a.array.data.offset size := a.sizes[j] // If size is 0, skip accessing offsets. if size == 0 { return 0, 0 } start = a.offsets[j] end = start + size return } func (a *LargeListView) Retain() { a.array.Retain() a.values.Retain() } func (a *LargeListView) Release() { a.array.Release() a.values.Release() } // Accessors for offsets and sizes to make ListView and LargeListView validation generic. type offsetsAndSizes interface { offsetAt(slot int64) int64 sizeAt(slot int64) int64 } var _ offsetsAndSizes = (*ListView)(nil) var _ offsetsAndSizes = (*LargeListView)(nil) func (a *ListView) offsetAt(slot int64) int64 { return int64(a.offsets[int64(a.data.offset)+slot]) } func (a *ListView) sizeAt(slot int64) int64 { return int64(a.sizes[int64(a.data.offset)+slot]) } func (a *LargeListView) offsetAt(slot int64) int64 { return a.offsets[int64(a.data.offset)+slot] } func (a *LargeListView) sizeAt(slot int64) int64 { return a.sizes[int64(a.data.offset)+slot] } func outOfBoundsListViewOffset(l offsetsAndSizes, slot int64, offsetLimit int64) error { offset := l.offsetAt(slot) return fmt.Errorf("%w: Offset invariant failure: offset for slot %d out of bounds. Expected %d to be at least 0 and less than %d", arrow.ErrInvalid, slot, offset, offsetLimit) } func outOfBoundsListViewSize(l offsetsAndSizes, slot int64, offsetLimit int64) error { size := l.sizeAt(slot) if size < 0 { return fmt.Errorf("%w: Offset invariant failure: size for slot %d out of bounds: %d < 0", arrow.ErrInvalid, slot, size) } offset := l.offsetAt(slot) return fmt.Errorf("%w: Offset invariant failure: size for slot %d out of bounds: %d + %d > %d", arrow.ErrInvalid, slot, offset, size, offsetLimit) } // Pre-condition: Basic validation has already been performed func (a *array) fullyValidateOffsetsAndSizes(l offsetsAndSizes, offsetLimit int64) error { for slot := int64(0); slot < int64(a.Len()); slot += 1 { size := l.sizeAt(slot) if size > 0 { offset := l.offsetAt(slot) if offset < 0 || offset > offsetLimit { return outOfBoundsListViewOffset(l, slot, offsetLimit) } if size > offsetLimit-int64(offset) { return outOfBoundsListViewSize(l, slot, offsetLimit) } } else if size < 0 { return outOfBoundsListViewSize(l, slot, offsetLimit) } } return nil } func (a *array) validateOffsetsAndMaybeSizes(l offsetsAndSizes, offsetByteWidth int, isListView bool, offsetLimit int64, fullValidation bool) error { nonEmpty := a.Len() > 0 if a.data.buffers[1] == nil { // For length 0, an empty offsets buffer is accepted (ARROW-544). if nonEmpty { return fmt.Errorf("non-empty array but offsets are null") } return nil } if isListView && a.data.buffers[2] == nil { if nonEmpty { return fmt.Errorf("non-empty array but sizes are null") } return nil } var requiredOffsets int if nonEmpty { requiredOffsets = a.Len() + a.Offset() if !isListView { requiredOffsets += 1 } } else { requiredOffsets = 0 } offsetsByteSize := a.data.buffers[1].Len() if offsetsByteSize/offsetByteWidth < requiredOffsets { return fmt.Errorf("offsets buffer size (bytes): %d isn't large enough for length: %d and offset: %d", offsetsByteSize, a.Len(), a.Offset()) } if isListView { requiredSizes := a.Len() + a.Offset() sizesBytesSize := a.data.buffers[2].Len() if sizesBytesSize/offsetByteWidth < requiredSizes { return fmt.Errorf("sizes buffer size (bytes): %d isn't large enough for length: %d and offset: %d", sizesBytesSize, a.Len(), a.Offset()) } } if fullValidation && requiredOffsets > 0 { if isListView { return a.fullyValidateOffsetsAndSizes(l, offsetLimit) } // TODO: implement validation of List and LargeList // return fullyValidateOffsets(offset_limit) return nil } return nil } func (a *ListView) validate(fullValidation bool) error { values := a.array.data.childData[0] offsetLimit := values.Len() return a.array.validateOffsetsAndMaybeSizes(a, 4, true, int64(offsetLimit), fullValidation) } func (a *ListView) Validate() error { return a.validate(false) } func (a *ListView) ValidateFull() error { return a.validate(true) } func (a *LargeListView) validate(fullValidation bool) error { values := a.array.data.childData[0] offsetLimit := values.Len() return a.array.validateOffsetsAndMaybeSizes(a, 8, true, int64(offsetLimit), fullValidation) } func (a *LargeListView) Validate() error { return a.validate(false) } func (a *LargeListView) ValidateFull() error { return a.validate(true) } type baseListViewBuilder struct { builder values Builder // value builder for the list-view's elements. offsets Builder sizes Builder // actual list-view type dt arrow.DataType appendOffsetVal func(int) appendSizeVal func(int) } type ListViewBuilder struct { baseListViewBuilder } type LargeListViewBuilder struct { baseListViewBuilder } // NewListViewBuilder returns a builder, using the provided memory allocator. // The created list-view builder will create a list whose elements will be // of type etype. func NewListViewBuilder(mem memory.Allocator, etype arrow.DataType) *ListViewBuilder { offsetBldr := NewInt32Builder(mem) sizeBldr := NewInt32Builder(mem) return &ListViewBuilder{ baseListViewBuilder{ builder: builder{refCount: 1, mem: mem}, values: NewBuilder(mem, etype), offsets: offsetBldr, sizes: sizeBldr, dt: arrow.ListViewOf(etype), appendOffsetVal: func(o int) { offsetBldr.Append(int32(o)) }, appendSizeVal: func(s int) { sizeBldr.Append(int32(s)) }, }, } } // NewListViewBuilderWithField takes a field to use for the child rather than just // a datatype to allow for more customization. func NewListViewBuilderWithField(mem memory.Allocator, field arrow.Field) *ListViewBuilder { offsetBldr := NewInt32Builder(mem) sizeBldr := NewInt32Builder(mem) return &ListViewBuilder{ baseListViewBuilder{ builder: builder{refCount: 1, mem: mem}, values: NewBuilder(mem, field.Type), offsets: offsetBldr, sizes: sizeBldr, dt: arrow.ListViewOfField(field), appendOffsetVal: func(o int) { offsetBldr.Append(int32(o)) }, appendSizeVal: func(s int) { sizeBldr.Append(int32(s)) }, }, } } func (b *baseListViewBuilder) Type() arrow.DataType { switch dt := b.dt.(type) { case *arrow.ListViewType: f := dt.ElemField() f.Type = b.values.Type() return arrow.ListViewOfField(f) case *arrow.LargeListViewType: f := dt.ElemField() f.Type = b.values.Type() return arrow.LargeListViewOfField(f) } return nil } // NewLargeListViewBuilder returns a builder, using the provided memory allocator. // The created list-view builder will create a list whose elements will be of type etype. func NewLargeListViewBuilder(mem memory.Allocator, etype arrow.DataType) *LargeListViewBuilder { offsetBldr := NewInt64Builder(mem) sizeBldr := NewInt64Builder(mem) return &LargeListViewBuilder{ baseListViewBuilder{ builder: builder{refCount: 1, mem: mem}, values: NewBuilder(mem, etype), offsets: offsetBldr, sizes: sizeBldr, dt: arrow.LargeListViewOf(etype), appendOffsetVal: func(o int) { offsetBldr.Append(int64(o)) }, appendSizeVal: func(s int) { sizeBldr.Append(int64(s)) }, }, } } // NewLargeListViewBuilderWithField takes a field rather than just an element type // to allow for more customization of the final type of the LargeListView Array func NewLargeListViewBuilderWithField(mem memory.Allocator, field arrow.Field) *LargeListViewBuilder { offsetBldr := NewInt64Builder(mem) sizeBldr := NewInt64Builder(mem) return &LargeListViewBuilder{ baseListViewBuilder{ builder: builder{refCount: 1, mem: mem}, values: NewBuilder(mem, field.Type), offsets: offsetBldr, sizes: sizeBldr, dt: arrow.LargeListViewOfField(field), appendOffsetVal: func(o int) { offsetBldr.Append(int64(o)) }, appendSizeVal: func(o int) { sizeBldr.Append(int64(o)) }, }, } } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *baseListViewBuilder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } b.values.Release() b.offsets.Release() b.sizes.Release() } } func (b *baseListViewBuilder) AppendDimensions(offset int, listSize int) { b.Reserve(1) b.unsafeAppendBoolToBitmap(true) b.appendOffsetVal(offset) b.appendSizeVal(listSize) } func (b *baseListViewBuilder) Append(v bool) { debug.Assert(false, "baseListViewBuilder.Append should never be called -- use AppendWithSize instead") } func (b *baseListViewBuilder) AppendWithSize(v bool, listSize int) { debug.Assert(v || listSize == 0, "invalid list-view should have size 0") b.Reserve(1) b.unsafeAppendBoolToBitmap(v) b.appendOffsetVal(b.values.Len()) b.appendSizeVal(listSize) } func (b *baseListViewBuilder) AppendNull() { b.AppendWithSize(false, 0) } func (b *baseListViewBuilder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *baseListViewBuilder) AppendEmptyValue() { b.AppendWithSize(true, 0) } func (b *baseListViewBuilder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *ListViewBuilder) AppendValuesWithSizes(offsets []int32, sizes []int32, valid []bool) { b.Reserve(len(valid)) b.offsets.(*Int32Builder).AppendValues(offsets, nil) b.sizes.(*Int32Builder).AppendValues(sizes, nil) b.builder.unsafeAppendBoolsToBitmap(valid, len(valid)) } func (b *LargeListViewBuilder) AppendValuesWithSizes(offsets []int64, sizes []int64, valid []bool) { b.Reserve(len(valid)) b.offsets.(*Int64Builder).AppendValues(offsets, nil) b.sizes.(*Int64Builder).AppendValues(sizes, nil) b.builder.unsafeAppendBoolsToBitmap(valid, len(valid)) } func (b *baseListViewBuilder) unsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } func (b *baseListViewBuilder) init(capacity int) { b.builder.init(capacity) b.offsets.init(capacity) b.sizes.init(capacity) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *baseListViewBuilder) Reserve(n int) { b.builder.reserve(n, b.resizeHelper) b.offsets.Reserve(n) b.sizes.Reserve(n) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *baseListViewBuilder) Resize(n int) { b.resizeHelper(n) b.offsets.Resize(n) b.sizes.Resize(n) } func (b *baseListViewBuilder) resizeHelper(n int) { if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(n, b.builder.init) } } func (b *baseListViewBuilder) ValueBuilder() Builder { return b.values } // NewArray creates a ListView array from the memory buffers used by the builder and // resets the ListViewBuilder so it can be used to build a new array. func (b *ListViewBuilder) NewArray() arrow.Array { return b.NewListViewArray() } // NewArray creates a LargeListView array from the memory buffers used by the builder // and resets the LargeListViewBuilder so it can be used to build a new array. func (b *LargeListViewBuilder) NewArray() arrow.Array { return b.NewLargeListViewArray() } // NewListViewArray creates a ListView array from the memory buffers used by the builder // and resets the ListViewBuilder so it can be used to build a new array. func (b *ListViewBuilder) NewListViewArray() (a *ListView) { data := b.newData() a = NewListViewData(data) data.Release() return } // NewLargeListViewArray creates a ListView array from the memory buffers used by the // builder and resets the LargeListViewBuilder so it can be used to build a new array. func (b *LargeListViewBuilder) NewLargeListViewArray() (a *LargeListView) { data := b.newData() a = NewLargeListViewData(data) data.Release() return } func (b *baseListViewBuilder) newData() (data *Data) { values := b.values.NewArray() defer values.Release() var offsets *memory.Buffer if b.offsets != nil { arr := b.offsets.NewArray() defer arr.Release() offsets = arr.Data().Buffers()[1] } var sizes *memory.Buffer if b.sizes != nil { arr := b.sizes.NewArray() defer arr.Release() sizes = arr.Data().Buffers()[1] } data = NewData( b.Type(), b.length, []*memory.Buffer{ b.nullBitmap, offsets, sizes, }, []arrow.ArrayData{values.Data()}, b.nulls, 0, ) b.reset() return } func (b *baseListViewBuilder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } return b.UnmarshalOne(json.NewDecoder(strings.NewReader(s))) } func (b *baseListViewBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch t { case json.Delim('['): offset := b.values.Len() // 0 is a placeholder size as we don't know the actual size yet b.AppendWithSize(true, 0) if err := b.values.Unmarshal(dec); err != nil { return err } // consume ']' _, err := dec.Token() // replace the last size with the actual size switch b.sizes.(type) { case *Int32Builder: b.sizes.(*Int32Builder).rawData[b.sizes.Len()-1] = int32(b.values.Len() - offset) case *Int64Builder: b.sizes.(*Int64Builder).rawData[b.sizes.Len()-1] = int64(b.values.Len() - offset) } return err case nil: b.AppendNull() default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Struct: b.dt.String(), } } return nil } func (b *baseListViewBuilder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *baseListViewBuilder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("list-view builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } // Find the minimum offset+size in a LIST_VIEW/LARGE_LIST_VIEW array. // // Pre-conditions: // // input.DataType() is ListViewType if Offset=int32 or LargeListViewType if Offset=int64 // input.Len() > 0 && input.NullN() != input.Len() func minListViewOffset[Offset int32 | int64](input arrow.ArrayData) Offset { var bitmap []byte if input.Buffers()[0] != nil { bitmap = input.Buffers()[0].Bytes() } offsets := arrow.GetData[Offset](input.Buffers()[1].Bytes())[input.Offset():] sizes := arrow.GetData[Offset](input.Buffers()[2].Bytes())[input.Offset():] isNull := func(i int) bool { return bitmap != nil && bitutil.BitIsNotSet(bitmap, input.Offset()+i) } // It's very likely that the first non-null non-empty list-view starts at // offset 0 of the child array. i := 0 for i < input.Len() && (isNull(i) || sizes[i] == 0) { i += 1 } if i >= input.Len() { return 0 } minOffset := offsets[i] if minOffset == 0 { // early exit: offset 0 found already return 0 } // Slow path: scan the buffers entirely. i += 1 for ; i < input.Len(); i += 1 { if isNull(i) { continue } offset := offsets[i] if offset < minOffset && sizes[i] > 0 { minOffset = offset } } return minOffset } // Find the maximum offset+size in a LIST_VIEW/LARGE_LIST_VIEW array. // // Pre-conditions: // // input.DataType() is ListViewType if Offset=int32 or LargeListViewType if Offset=int64 // input.Len() > 0 && input.NullN() != input.Len() func maxListViewEnd[Offset int32 | int64](input arrow.ArrayData) Offset { inputOffset := input.Offset() var bitmap []byte if input.Buffers()[0] != nil { bitmap = input.Buffers()[0].Bytes() } offsets := arrow.GetData[Offset](input.Buffers()[1].Bytes())[inputOffset:] sizes := arrow.GetData[Offset](input.Buffers()[2].Bytes())[inputOffset:] isNull := func(i int) bool { return bitmap != nil && bitutil.BitIsNotSet(bitmap, inputOffset+i) } i := input.Len() - 1 // safe because input.Len() > 0 for i != 0 && (isNull(i) || sizes[i] == 0) { i -= 1 } offset := offsets[i] size := sizes[i] if i == 0 { if isNull(i) || sizes[i] == 0 { return 0 } else { return offset + size } } values := input.Children()[0] maxEnd := offsets[i] + sizes[i] if maxEnd == Offset(values.Len()) { // Early-exit: maximum possible view-end found already. return maxEnd } // Slow path: scan the buffers entirely. for ; i >= 0; i -= 1 { offset := offsets[i] size := sizes[i] if size > 0 && !isNull(i) { if offset+size > maxEnd { maxEnd = offset + size if maxEnd == Offset(values.Len()) { return maxEnd } } } } return maxEnd } func rangeOfValuesUsed(input arrow.ArrayData) (int, int) { if input.Len() == 0 || input.NullN() == input.Len() { return 0, 0 } var minOffset, maxEnd int switch input.DataType().(type) { case *arrow.ListViewType: minOffset = int(minListViewOffset[int32](input)) maxEnd = int(maxListViewEnd[int32](input)) case *arrow.LargeListViewType: minOffset = int(minListViewOffset[int64](input)) maxEnd = int(maxListViewEnd[int64](input)) case *arrow.ListType: offsets := arrow.Int32Traits.CastFromBytes(input.Buffers()[1].Bytes())[input.Offset():] minOffset = int(offsets[0]) maxEnd = int(offsets[len(offsets)-1]) case *arrow.LargeListType: offsets := arrow.Int64Traits.CastFromBytes(input.Buffers()[1].Bytes())[input.Offset():] minOffset = int(offsets[0]) maxEnd = int(offsets[len(offsets)-1]) case *arrow.MapType: offsets := arrow.Int32Traits.CastFromBytes(input.Buffers()[1].Bytes())[input.Offset():] minOffset = int(offsets[0]) maxEnd = int(offsets[len(offsets)-1]) } return minOffset, maxEnd - minOffset } // Returns the smallest contiguous range of values of the child array that are // referenced by all the list values in the input array. func RangeOfValuesUsed(input VarLenListLike) (int, int) { return rangeOfValuesUsed(input.Data()) } var ( _ arrow.Array = (*List)(nil) _ arrow.Array = (*LargeList)(nil) _ arrow.Array = (*ListView)(nil) _ arrow.Array = (*LargeListView)(nil) _ Builder = (*ListBuilder)(nil) _ Builder = (*LargeListBuilder)(nil) _ Builder = (*ListViewBuilder)(nil) _ Builder = (*LargeListViewBuilder)(nil) _ VarLenListLike = (*List)(nil) _ VarLenListLike = (*LargeList)(nil) _ VarLenListLike = (*Map)(nil) _ VarLenListLike = (*ListView)(nil) _ VarLenListLike = (*LargeListView)(nil) _ ListLike = (*FixedSizeList)(nil) _ VarLenListLikeBuilder = (*ListBuilder)(nil) _ VarLenListLikeBuilder = (*LargeListBuilder)(nil) _ VarLenListLikeBuilder = (*ListBuilder)(nil) _ VarLenListLikeBuilder = (*LargeListBuilder)(nil) _ VarLenListLikeBuilder = (*MapBuilder)(nil) _ ListLikeBuilder = (*FixedSizeListBuilder)(nil) ) arrow-go-18.2.0/arrow/array/list_test.go000066400000000000000000000616361476434502500201760ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "reflect" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) func TestListArray(t *testing.T) { tests := []struct { typeID arrow.Type offsets interface{} sizes interface{} dt arrow.DataType }{ {arrow.LIST, []int32{0, 3, 3, 3, 7}, nil, arrow.ListOf(arrow.PrimitiveTypes.Int32)}, {arrow.LARGE_LIST, []int64{0, 3, 3, 3, 7}, nil, arrow.LargeListOf(arrow.PrimitiveTypes.Int32)}, {arrow.LIST, []int32{0, 3, 3, 3, 7}, nil, arrow.ListOfField(arrow.Field{Name: "item", Type: arrow.PrimitiveTypes.Int32, Nullable: true})}, {arrow.LARGE_LIST, []int64{0, 3, 3, 3, 7}, nil, arrow.LargeListOfField(arrow.Field{Name: "item", Type: arrow.PrimitiveTypes.Int32, Nullable: true})}, {arrow.LIST_VIEW, []int32{0, 3, 3, 3}, []int32{3, 0, 0, 4}, arrow.ListViewOf(arrow.PrimitiveTypes.Int32)}, {arrow.LARGE_LIST_VIEW, []int64{0, 3, 3, 3}, []int64{3, 0, 0, 4}, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32)}, } for _, tt := range tests { t.Run(tt.typeID.String(), func(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) var ( vs = []int32{0, 1, 2, 3, 4, 5, 6} lengths = []int{3, 0, 0, 4} isValid = []bool{true, false, true, true} ) lb := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) defer lb.Release() for i := 0; i < 10; i++ { vb := lb.ValueBuilder().(*array.Int32Builder) vb.Reserve(len(vs)) pos := 0 for i, length := range lengths { lb.AppendWithSize(isValid[i], length) for j := 0; j < length; j++ { vb.Append(vs[pos]) pos++ } } arr := lb.NewArray().(array.ListLike) defer arr.Release() arr.Retain() arr.Release() if got, want := arr.DataType().ID(), tt.typeID; got != want { t.Fatalf("got=%v, want=%v", got, want) } if got, want := arr.Len(), len(isValid); got != want { t.Fatalf("got=%d, want=%d", got, want) } for i := range lengths { if got, want := arr.IsValid(i), isValid[i]; got != want { t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) } if got, want := arr.IsNull(i), !isValid[i]; got != want { t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) } } var gotOffsets, gotSizes interface{} switch tt.typeID { case arrow.LIST: arr := arr.(*array.List) gotOffsets = arr.Offsets() case arrow.LARGE_LIST: arr := arr.(*array.LargeList) gotOffsets = arr.Offsets() case arrow.LIST_VIEW: arr := arr.(*array.ListView) gotOffsets = arr.Offsets() gotSizes = arr.Sizes() case arrow.LARGE_LIST_VIEW: arr := arr.(*array.LargeListView) gotOffsets = arr.Offsets() gotSizes = arr.Sizes() } if !reflect.DeepEqual(gotOffsets, tt.offsets) { t.Fatalf("got=%v, want=%v", gotOffsets, tt.offsets) } if tt.typeID == arrow.LIST_VIEW || tt.typeID == arrow.LARGE_LIST_VIEW { if !reflect.DeepEqual(gotSizes, tt.sizes) { t.Fatalf("got=%v, want=%v", gotSizes, tt.sizes) } } varr := arr.ListValues().(*array.Int32) if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } } }) } } // Like the list-view tests in TestListArray, but with out-of-order offsets. func TestListViewArray(t *testing.T) { tests := []struct { typeID arrow.Type offsets interface{} sizes interface{} dt arrow.DataType }{ {arrow.LIST_VIEW, []int32{5, 0, 0, 1}, []int32{3, 0, 0, 4}, arrow.ListViewOf(arrow.PrimitiveTypes.Int32)}, {arrow.LARGE_LIST_VIEW, []int64{5, 0, 0, 1}, []int64{3, 0, 0, 4}, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32)}, } for _, tt := range tests { t.Run(tt.typeID.String(), func(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) var ( vs = []int32{-1, 3, 4, 5, 6, 0, 1, 2} lengths = []int{3, 0, 0, 4} isValid = []bool{true, false, true, true} ) lb := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) defer lb.Release() for i := 0; i < 10; i++ { switch lvb := lb.(type) { case *array.ListViewBuilder: lvb.AppendDimensions(5, 3) lb.AppendNull() lvb.AppendDimensions(0, 0) lvb.AppendDimensions(1, 4) case *array.LargeListViewBuilder: lvb.AppendDimensions(5, 3) lb.AppendNull() lvb.AppendDimensions(0, 0) lvb.AppendDimensions(1, 4) } vb := lb.ValueBuilder().(*array.Int32Builder) vb.Reserve(len(vs)) vb.AppendValues(vs, []bool{false, true, true, true, true, true, true, true}) arr := lb.NewArray().(array.ListLike) defer arr.Release() arr.Retain() arr.Release() if got, want := arr.DataType().ID(), tt.typeID; got != want { t.Fatalf("got=%v, want=%v", got, want) } if got, want := arr.Len(), len(isValid); got != want { t.Fatalf("got=%d, want=%d", got, want) } for i := range lengths { if got, want := arr.IsValid(i), isValid[i]; got != want { t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) } if got, want := arr.IsNull(i), !isValid[i]; got != want { t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) } } var gotOffsets, gotSizes interface{} switch tt.typeID { case arrow.LIST_VIEW: arr := arr.(*array.ListView) gotOffsets = arr.Offsets() gotSizes = arr.Sizes() case arrow.LARGE_LIST_VIEW: arr := arr.(*array.LargeListView) gotOffsets = arr.Offsets() gotSizes = arr.Sizes() } if !reflect.DeepEqual(gotOffsets, tt.offsets) { t.Fatalf("got=%v, want=%v", gotOffsets, tt.offsets) } if !reflect.DeepEqual(gotSizes, tt.sizes) { t.Fatalf("got=%v, want=%v", gotSizes, tt.sizes) } varr := arr.ListValues().(*array.Int32) if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } } }) } } func TestListArrayEmpty(t *testing.T) { typ := []arrow.DataType{ arrow.ListOf(arrow.PrimitiveTypes.Int32), arrow.LargeListOf(arrow.PrimitiveTypes.Int32), arrow.ListViewOf(arrow.PrimitiveTypes.Int32), arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32), } for _, dt := range typ { t.Run(dt.String(), func(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) lb := array.NewBuilder(pool, dt) defer lb.Release() arr := lb.NewArray() defer arr.Release() if got, want := arr.Len(), 0; got != want { t.Fatalf("got=%d, want=%d", got, want) } }) } } func TestListArrayBulkAppend(t *testing.T) { tests := []struct { typeID arrow.Type offsets interface{} sizes interface{} dt arrow.DataType }{ {arrow.LIST, []int32{0, 3, 3, 3, 7}, nil, arrow.ListOf(arrow.PrimitiveTypes.Int32)}, {arrow.LARGE_LIST, []int64{0, 3, 3, 3, 7}, nil, arrow.LargeListOf(arrow.PrimitiveTypes.Int32)}, {arrow.LIST_VIEW, []int32{0, 3, 3, 3}, []int32{3, 0, 0, 4}, arrow.ListViewOf(arrow.PrimitiveTypes.Int32)}, {arrow.LARGE_LIST_VIEW, []int64{0, 3, 3, 3}, []int64{3, 0, 0, 4}, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32)}, } for _, tt := range tests { t.Run(tt.typeID.String(), func(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) var ( vs = []int32{0, 1, 2, 3, 4, 5, 6} lengths = []int{3, 0, 0, 4} isValid = []bool{true, false, true, true} ) lb := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) defer lb.Release() vb := lb.ValueBuilder().(*array.Int32Builder) vb.Reserve(len(vs)) switch tt.typeID { case arrow.LIST: lb.(*array.ListBuilder).AppendValues(tt.offsets.([]int32), isValid) case arrow.LARGE_LIST: lb.(*array.LargeListBuilder).AppendValues(tt.offsets.([]int64), isValid) case arrow.LIST_VIEW: lb.(*array.ListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int32), tt.sizes.([]int32), isValid) case arrow.LARGE_LIST_VIEW: lb.(*array.LargeListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int64), tt.sizes.([]int64), isValid) } for _, v := range vs { vb.Append(v) } arr := lb.NewArray().(array.VarLenListLike) defer arr.Release() if got, want := arr.DataType().ID(), tt.typeID; got != want { t.Fatalf("got=%v, want=%v", got, want) } if got, want := arr.Len(), len(isValid); got != want { t.Fatalf("got=%d, want=%d", got, want) } for i := range lengths { if got, want := arr.IsValid(i), isValid[i]; got != want { t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) } if got, want := arr.IsNull(i), !isValid[i]; got != want { t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) } } var gotOffsets, gotSizes interface{} switch tt.typeID { case arrow.LIST: arr := arr.(*array.List) gotOffsets = arr.Offsets() case arrow.LARGE_LIST: arr := arr.(*array.LargeList) gotOffsets = arr.Offsets() case arrow.LIST_VIEW: arr := arr.(*array.ListView) gotOffsets = arr.Offsets() gotSizes = arr.Sizes() case arrow.LARGE_LIST_VIEW: arr := arr.(*array.LargeListView) gotOffsets = arr.Offsets() gotSizes = arr.Sizes() } if !reflect.DeepEqual(gotOffsets, tt.offsets) { t.Fatalf("got=%v, want=%v", gotOffsets, tt.offsets) } if tt.typeID == arrow.LIST_VIEW || tt.typeID == arrow.LARGE_LIST_VIEW { if !reflect.DeepEqual(gotSizes, tt.sizes) { t.Fatalf("got=%v, want=%v", gotSizes, tt.sizes) } } varr := arr.ListValues().(*array.Int32) if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } }) } } func TestListViewArrayBulkAppend(t *testing.T) { tests := []struct { typeID arrow.Type offsets interface{} sizes interface{} dt arrow.DataType }{ {arrow.LIST_VIEW, []int32{5, 0, 0, 1}, []int32{3, 0, 0, 4}, arrow.ListViewOf(arrow.PrimitiveTypes.Int32)}, {arrow.LARGE_LIST_VIEW, []int64{5, 0, 0, 1}, []int64{3, 0, 0, 4}, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32)}, } for _, tt := range tests { t.Run(tt.typeID.String(), func(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) var ( vs = []int32{-1, 3, 4, 5, 6, 0, 1, 2} lengths = []int{3, 0, 0, 4} isValid = []bool{true, false, true, true} ) lb := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) defer lb.Release() vb := lb.ValueBuilder().(*array.Int32Builder) vb.Reserve(len(vs)) switch tt.typeID { case arrow.LIST_VIEW: lb.(*array.ListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int32), tt.sizes.([]int32), isValid) case arrow.LARGE_LIST_VIEW: lb.(*array.LargeListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int64), tt.sizes.([]int64), isValid) } for _, v := range vs { vb.Append(v) } arr := lb.NewArray().(array.VarLenListLike) defer arr.Release() if got, want := arr.DataType().ID(), tt.typeID; got != want { t.Fatalf("got=%v, want=%v", got, want) } if got, want := arr.Len(), len(isValid); got != want { t.Fatalf("got=%d, want=%d", got, want) } for i := range lengths { if got, want := arr.IsValid(i), isValid[i]; got != want { t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) } if got, want := arr.IsNull(i), !isValid[i]; got != want { t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) } } var gotOffsets, gotSizes interface{} switch tt.typeID { case arrow.LIST_VIEW: arr := arr.(*array.ListView) gotOffsets = arr.Offsets() gotSizes = arr.Sizes() case arrow.LARGE_LIST_VIEW: arr := arr.(*array.LargeListView) gotOffsets = arr.Offsets() gotSizes = arr.Sizes() } if !reflect.DeepEqual(gotOffsets, tt.offsets) { t.Fatalf("got=%v, want=%v", gotOffsets, tt.offsets) } if !reflect.DeepEqual(gotSizes, tt.sizes) { t.Fatalf("got=%v, want=%v", gotSizes, tt.sizes) } varr := arr.ListValues().(*array.Int32) if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } }) } } func TestListArraySlice(t *testing.T) { tests := []struct { typeID arrow.Type offsets interface{} sizes interface{} dt arrow.DataType }{ {arrow.LIST, []int32{0, 3, 3, 3, 7}, nil, arrow.ListOf(arrow.PrimitiveTypes.Int32)}, {arrow.LARGE_LIST, []int64{0, 3, 3, 3, 7}, nil, arrow.LargeListOf(arrow.PrimitiveTypes.Int32)}, {arrow.LIST_VIEW, []int32{0, 3, 3, 3, 7}, []int32{3, 0, 0, 4}, arrow.ListViewOf(arrow.PrimitiveTypes.Int32)}, {arrow.LARGE_LIST_VIEW, []int64{0, 3, 3, 3, 7}, []int64{3, 0, 0, 4}, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32)}, } for _, tt := range tests { t.Run(tt.typeID.String(), func(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) var ( vs = []int32{0, 1, 2, 3, 4, 5, 6} lengths = []int{3, 0, 0, 4} isValid = []bool{true, false, true, true} ) lb := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) defer lb.Release() vb := lb.ValueBuilder().(*array.Int32Builder) vb.Reserve(len(vs)) switch tt.typeID { case arrow.LIST: lb.(*array.ListBuilder).AppendValues(tt.offsets.([]int32), isValid) case arrow.LARGE_LIST: lb.(*array.LargeListBuilder).AppendValues(tt.offsets.([]int64), isValid) case arrow.LIST_VIEW: lb.(*array.ListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int32), tt.sizes.([]int32), isValid) case arrow.LARGE_LIST_VIEW: lb.(*array.LargeListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int64), tt.sizes.([]int64), isValid) } for _, v := range vs { vb.Append(v) } arr := lb.NewArray().(array.VarLenListLike) defer arr.Release() if got, want := arr.DataType().ID(), tt.typeID; got != want { t.Fatalf("got=%v, want=%v", got, want) } if got, want := arr.Len(), len(isValid); got != want { t.Fatalf("got=%d, want=%d", got, want) } for i := range lengths { if got, want := arr.IsValid(i), isValid[i]; got != want { t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) } if got, want := arr.IsNull(i), !isValid[i]; got != want { t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) } } var gotOffsets, gotSizes interface{} switch tt.typeID { case arrow.LIST: arr := arr.(*array.List) gotOffsets = arr.Offsets() case arrow.LARGE_LIST: arr := arr.(*array.LargeList) gotOffsets = arr.Offsets() case arrow.LIST_VIEW: arr := arr.(*array.ListView) gotOffsets = arr.Offsets() gotSizes = arr.Sizes() case arrow.LARGE_LIST_VIEW: arr := arr.(*array.LargeListView) gotOffsets = arr.Offsets() gotSizes = arr.Sizes() } if !reflect.DeepEqual(gotOffsets, tt.offsets) { t.Fatalf("got=%v, want=%v", gotOffsets, tt.offsets) } if tt.typeID == arrow.LIST_VIEW || tt.typeID == arrow.LARGE_LIST_VIEW { if !reflect.DeepEqual(gotSizes, tt.sizes) { t.Fatalf("got=%v, want=%v", gotSizes, tt.sizes) } } varr := arr.ListValues().(*array.Int32) if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } if got, want := arr.String(), `[[0 1 2] (null) [] [3 4 5 6]]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } assert.Equal(t, "[0,1,2]", arr.ValueStr(0)) sub := array.NewSlice(arr, 1, 4).(array.ListLike) defer sub.Release() if got, want := sub.String(), `[(null) [] [3 4 5 6]]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } }) } } func TestListViewArraySlice(t *testing.T) { tests := []struct { typeID arrow.Type offsets interface{} sizes interface{} dt arrow.DataType }{ {arrow.LIST_VIEW, []int32{5, 0, 0, 1}, []int32{3, 0, 0, 4}, arrow.ListViewOf(arrow.PrimitiveTypes.Int32)}, {arrow.LARGE_LIST_VIEW, []int64{5, 0, 0, 1}, []int64{3, 0, 0, 4}, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32)}, } for _, tt := range tests { t.Run(tt.typeID.String(), func(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) var ( vs = []int32{-1, 3, 4, 5, 6, 0, 1, 2} lengths = []int{3, 0, 0, 4} isValid = []bool{true, false, true, true} ) lb := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) defer lb.Release() vb := lb.ValueBuilder().(*array.Int32Builder) vb.Reserve(len(vs)) switch tt.typeID { case arrow.LIST_VIEW: lb.(*array.ListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int32), tt.sizes.([]int32), isValid) case arrow.LARGE_LIST_VIEW: lb.(*array.LargeListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int64), tt.sizes.([]int64), isValid) } for _, v := range vs { vb.Append(v) } arr := lb.NewArray().(array.VarLenListLike) defer arr.Release() if got, want := arr.DataType().ID(), tt.typeID; got != want { t.Fatalf("got=%v, want=%v", got, want) } if got, want := arr.Len(), len(isValid); got != want { t.Fatalf("got=%d, want=%d", got, want) } for i := range lengths { if got, want := arr.IsValid(i), isValid[i]; got != want { t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) } if got, want := arr.IsNull(i), !isValid[i]; got != want { t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) } } var gotOffsets, gotSizes interface{} switch tt.typeID { case arrow.LIST_VIEW: arr := arr.(*array.ListView) gotOffsets = arr.Offsets() gotSizes = arr.Sizes() case arrow.LARGE_LIST_VIEW: arr := arr.(*array.LargeListView) gotOffsets = arr.Offsets() gotSizes = arr.Sizes() } if !reflect.DeepEqual(gotOffsets, tt.offsets) { t.Fatalf("got=%v, want=%v", gotOffsets, tt.offsets) } if !reflect.DeepEqual(gotSizes, tt.sizes) { t.Fatalf("got=%v, want=%v", gotSizes, tt.sizes) } varr := arr.ListValues().(*array.Int32) if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } if got, want := arr.String(), `[[0 1 2] (null) [] [3 4 5 6]]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } assert.Equal(t, "[0,1,2]", arr.ValueStr(0)) sub := array.NewSlice(arr, 1, 4).(array.ListLike) defer sub.Release() if got, want := sub.String(), `[(null) [] [3 4 5 6]]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } }) } } func TestVarLenListLikeStringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) builders := []array.VarLenListLikeBuilder{ array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32), array.NewListViewBuilder(mem, arrow.PrimitiveTypes.Int32), array.NewLargeListBuilder(mem, arrow.PrimitiveTypes.Int32), array.NewLargeListViewBuilder(mem, arrow.PrimitiveTypes.Int32), } builders1 := []array.VarLenListLikeBuilder{ array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32), array.NewListViewBuilder(mem, arrow.PrimitiveTypes.Int32), array.NewLargeListBuilder(mem, arrow.PrimitiveTypes.Int32), array.NewLargeListViewBuilder(mem, arrow.PrimitiveTypes.Int32), } for i, b := range builders { defer b.Release() vb := b.ValueBuilder().(*array.Int32Builder) var values = [][]int32{ {0, 1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6, 7}, {2, 3, 4, 5, 6, 7, 8}, {3, 4, 5, 6, 7, 8, 9}, } for _, value := range values { b.AppendNull() b.AppendWithSize(true, 2*len(value)) for _, el := range value { vb.Append(el) vb.AppendNull() } b.AppendWithSize(false, 0) } arr := b.NewArray() defer arr.Release() // 2. create array via AppendValueFromString b1 := builders1[i] defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray() defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } } // Test the string roun-trip for a list-view containing out-of-order offsets. func TestListViewStringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) builders := []array.VarLenListLikeBuilder{ array.NewListViewBuilder(mem, arrow.PrimitiveTypes.Int32), array.NewLargeListViewBuilder(mem, arrow.PrimitiveTypes.Int32), } builders1 := []array.VarLenListLikeBuilder{ array.NewListViewBuilder(mem, arrow.PrimitiveTypes.Int32), array.NewLargeListViewBuilder(mem, arrow.PrimitiveTypes.Int32), } for i, b := range builders { defer b.Release() switch lvb := b.(type) { case *array.ListViewBuilder: lvb.AppendDimensions(5, 3) b.AppendNull() lvb.AppendDimensions(0, 0) lvb.AppendDimensions(1, 4) case *array.LargeListViewBuilder: lvb.AppendDimensions(5, 3) b.AppendNull() lvb.AppendDimensions(0, 0) lvb.AppendDimensions(1, 4) } vb := b.ValueBuilder().(*array.Int32Builder) vs := []int32{-1, 3, 4, 5, 6, 0, 1, 2} isValid := []bool{false, true, true, true, true, true, true, true} vb.Reserve(len(vs)) vb.AppendValues(vs, isValid) arr := b.NewArray() defer arr.Release() // 2. create array via AppendValueFromString b1 := builders1[i] defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray() defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } } func TestRangeOfValuesUsed(t *testing.T) { tests := []struct { typeID arrow.Type dt arrow.DataType }{ {arrow.LIST, arrow.ListOf(arrow.PrimitiveTypes.Int16)}, {arrow.LARGE_LIST, arrow.LargeListOf(arrow.PrimitiveTypes.Int16)}, {arrow.LIST_VIEW, arrow.ListViewOf(arrow.PrimitiveTypes.Int16)}, {arrow.LARGE_LIST_VIEW, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int16)}, } for _, tt := range tests { t.Run(tt.typeID.String(), func(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) isListView := tt.typeID == arrow.LIST_VIEW || tt.typeID == arrow.LARGE_LIST_VIEW bldr := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) defer bldr.Release() var arr array.VarLenListLike // Empty array arr = bldr.NewArray().(array.VarLenListLike) defer arr.Release() offset, len := array.RangeOfValuesUsed(arr) assert.Equal(t, 0, offset) assert.Equal(t, 0, len) // List-like array with only nulls bldr.AppendNulls(3) arr = bldr.NewArray().(array.VarLenListLike) defer arr.Release() offset, len = array.RangeOfValuesUsed(arr) assert.Equal(t, 0, offset) assert.Equal(t, 0, len) // Array with nulls and non-nulls (starting at a non-zero offset) vb := bldr.ValueBuilder().(*array.Int16Builder) vb.Append(-2) vb.Append(-1) bldr.AppendWithSize(false, 0) bldr.AppendWithSize(true, 2) vb.Append(0) vb.Append(1) bldr.AppendWithSize(true, 3) vb.Append(2) vb.Append(3) vb.Append(4) if isListView { vb.Append(10) vb.Append(11) } arr = bldr.NewArray().(array.VarLenListLike) defer arr.Release() offset, len = array.RangeOfValuesUsed(arr) assert.Equal(t, 2, offset) assert.Equal(t, 5, len) // Overlapping list-views // [null, [0, 1, 2, 3, 4, 5], [1, 2], null, [4], null, null] vb = bldr.ValueBuilder().(*array.Int16Builder) vb.Append(-2) vb.Append(-1) bldr.AppendWithSize(false, 0) if isListView { bldr.AppendWithSize(true, 6) vb.Append(0) bldr.AppendWithSize(true, 2) vb.Append(1) vb.Append(2) vb.Append(3) bldr.AppendWithSize(false, 0) bldr.AppendWithSize(true, 1) vb.Append(4) vb.Append(5) // -- used range ends here -- vb.Append(10) vb.Append(11) } else { bldr.AppendWithSize(true, 6) vb.Append(0) vb.Append(1) vb.Append(2) vb.Append(3) vb.Append(4) vb.Append(5) bldr.AppendWithSize(true, 2) vb.Append(1) vb.Append(2) bldr.AppendWithSize(false, 0) bldr.AppendWithSize(true, 1) vb.Append(4) } bldr.AppendNulls(2) arr = bldr.NewArray().(array.VarLenListLike) defer arr.Release() // Check the range offset, len = array.RangeOfValuesUsed(arr) assert.Equal(t, 2, offset) if isListView { assert.Equal(t, 6, len) } else { assert.Equal(t, 9, len) } }) } } arrow-go-18.2.0/arrow/array/map.go000066400000000000000000000260601476434502500167310ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "bytes" "fmt" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" ) // Map represents an immutable sequence of Key/Value structs. It is a // logical type that is implemented as a List. type Map struct { *List keys, items arrow.Array } var _ ListLike = (*Map)(nil) // NewMapData returns a new Map array value, from data func NewMapData(data arrow.ArrayData) *Map { a := &Map{List: &List{}} a.refCount = 1 a.setData(data.(*Data)) return a } // KeysSorted checks the datatype that was used to construct this array and // returns the KeysSorted boolean value used to denote if the key array is // sorted for each list element. // // Important note: Nothing is enforced regarding the KeysSorted value, it is // solely a metadata field that should be set if keys within each value are sorted. // This value is not used at all in regards to comparisons / equality. func (a *Map) KeysSorted() bool { return a.DataType().(*arrow.MapType).KeysSorted } func (a *Map) validateData(data *Data) { if len(data.childData) != 1 || data.childData[0] == nil { panic("arrow/array: expected one child array for map array") } if data.childData[0].DataType().ID() != arrow.STRUCT { panic("arrow/array: map array child should be struct type") } if data.childData[0].NullN() != 0 { panic("arrow/array: map array child array should have no nulls") } if len(data.childData[0].Children()) != 2 { panic("arrow/array: map array child array should have two fields") } if data.childData[0].Children()[0].NullN() != 0 { panic("arrow/array: map array keys array should have no nulls") } } func (a *Map) setData(data *Data) { a.validateData(data) a.List.setData(data) a.keys = MakeFromData(data.childData[0].Children()[0]) a.items = MakeFromData(data.childData[0].Children()[1]) } // Keys returns the full Array of Key values, equivalent to grabbing // the key field of the child struct. func (a *Map) Keys() arrow.Array { return a.keys } // Items returns the full Array of Item values, equivalent to grabbing // the Value field (the second field) of the child struct. func (a *Map) Items() arrow.Array { return a.items } // Retain increases the reference count by 1. // Retain may be called simultaneously from multiple goroutines. func (a *Map) Retain() { a.List.Retain() a.keys.Retain() a.items.Retain() } // Release decreases the reference count by 1. // Release may be called simultaneously from multiple goroutines. // When the reference count goes to zero, the memory is freed. func (a *Map) Release() { a.List.Release() a.keys.Release() a.items.Release() } func arrayEqualMap(left, right *Map) bool { // since Map is implemented using a list, we can just use arrayEqualList return arrayEqualList(left.List, right.List) } type MapBuilder struct { listBuilder *ListBuilder etype *arrow.MapType keytype, itemtype arrow.DataType keyBuilder, itemBuilder Builder keysSorted bool } // NewMapBuilder returns a builder, using the provided memory allocator. // The created Map builder will create a map array whose keys will be a non-nullable // array of type `keytype` and whose mapped items will be a nullable array of itemtype. // // KeysSorted is not enforced at all by the builder, it should only be set to true // building using keys in sorted order for each value. The KeysSorted value will just be // used when creating the DataType for the map. // // # Example // // Simple example provided of converting a []map[string]int32 to an array.Map // by using a MapBuilder: // // /* assume maplist == []map[string]int32 */ // bldr := array.NewMapBuilder(memory.DefaultAllocator, arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32, false) // defer bldr.Release() // kb := bldr.KeyBuilder().(*array.StringBuilder) // ib := bldr.ItemBuilder().(*array.Int32Builder) // for _, m := range maplist { // bldr.Append(true) // for k, v := range m { // kb.Append(k) // ib.Append(v) // } // } // maparr := bldr.NewMapArray() // defer maparr.Release() func NewMapBuilder(mem memory.Allocator, keytype, itemtype arrow.DataType, keysSorted bool) *MapBuilder { etype := arrow.MapOf(keytype, itemtype) etype.KeysSorted = keysSorted listBldr := NewListBuilder(mem, etype.Elem()) keyBldr := listBldr.ValueBuilder().(*StructBuilder).FieldBuilder(0) keyBldr.Retain() itemBldr := listBldr.ValueBuilder().(*StructBuilder).FieldBuilder(1) itemBldr.Retain() return &MapBuilder{ listBuilder: listBldr, keyBuilder: keyBldr, itemBuilder: itemBldr, etype: etype, keytype: keytype, itemtype: itemtype, keysSorted: keysSorted, } } func NewMapBuilderWithType(mem memory.Allocator, dt *arrow.MapType) *MapBuilder { listBldr := NewListBuilder(mem, dt.Elem()) keyBldr := listBldr.ValueBuilder().(*StructBuilder).FieldBuilder(0) keyBldr.Retain() itemBldr := listBldr.ValueBuilder().(*StructBuilder).FieldBuilder(1) itemBldr.Retain() return &MapBuilder{ listBuilder: listBldr, keyBuilder: keyBldr, itemBuilder: itemBldr, etype: dt, keytype: dt.KeyType(), itemtype: dt.ItemType(), keysSorted: dt.KeysSorted, } } func (b *MapBuilder) Type() arrow.DataType { return b.etype } // Retain increases the reference count by 1 for the sub-builders (list, key, item). // Retain may be called simultaneously from multiple goroutines. func (b *MapBuilder) Retain() { b.listBuilder.Retain() b.keyBuilder.Retain() b.itemBuilder.Retain() } // Release decreases the reference count by 1 for the sub builders (list, key, item). func (b *MapBuilder) Release() { b.listBuilder.Release() b.keyBuilder.Release() b.itemBuilder.Release() } // Len returns the current number of Maps that are in the builder func (b *MapBuilder) Len() int { return b.listBuilder.Len() } // Cap returns the total number of elements that can be stored // without allocating additional memory. func (b *MapBuilder) Cap() int { return b.listBuilder.Cap() } // NullN returns the number of null values in the array builder. func (b *MapBuilder) NullN() int { return b.listBuilder.NullN() } // IsNull returns if a previously appended value at a given index is null or not. func (b *MapBuilder) IsNull(i int) bool { return b.listBuilder.IsNull(i) } // Append adds a new Map element to the array, calling Append(false) is // equivalent to calling AppendNull. func (b *MapBuilder) Append(v bool) { b.adjustStructBuilderLen() b.listBuilder.Append(v) } func (b *MapBuilder) AppendWithSize(v bool, _ int) { b.Append(v) } // AppendNull adds a null map entry to the array. func (b *MapBuilder) AppendNull() { b.Append(false) } // AppendNulls adds null map entry to the array. func (b *MapBuilder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *MapBuilder) SetNull(i int) { b.listBuilder.SetNull(i) } func (b *MapBuilder) AppendEmptyValue() { b.Append(true) } func (b *MapBuilder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } // Reserve enough space for n maps func (b *MapBuilder) Reserve(n int) { b.listBuilder.Reserve(n) } // Resize adjust the space allocated by b to n map elements. If n is greater than // b.Cap(), additional memory will be allocated. If n is smaller, the allocated memory may be reduced. func (b *MapBuilder) Resize(n int) { b.listBuilder.Resize(n) } // AppendValues is for bulk appending a group of elements with offsets provided // and validity booleans provided. func (b *MapBuilder) AppendValues(offsets []int32, valid []bool) { b.adjustStructBuilderLen() b.listBuilder.AppendValues(offsets, valid) } func (b *MapBuilder) UnsafeAppendBoolToBitmap(v bool) { b.listBuilder.UnsafeAppendBoolToBitmap(v) } func (b *MapBuilder) init(capacity int) { b.listBuilder.init(capacity) } func (b *MapBuilder) resize(newBits int, init func(int)) { b.listBuilder.resize(newBits, init) } func (b *MapBuilder) adjustStructBuilderLen() { sb := b.listBuilder.ValueBuilder().(*StructBuilder) if sb.Len() < b.keyBuilder.Len() { valids := make([]bool, b.keyBuilder.Len()-sb.Len()) for i := range valids { valids[i] = true } sb.AppendValues(valids) } } // NewArray creates a new Map array from the memory buffers used by the builder, and // resets the builder so it can be used again to build a new Map array. func (b *MapBuilder) NewArray() arrow.Array { return b.NewMapArray() } // NewMapArray creates a new Map array from the memory buffers used by the builder, and // resets the builder so it can be used again to build a new Map array. func (b *MapBuilder) NewMapArray() (a *Map) { if !b.etype.ItemField().Nullable && b.ItemBuilder().NullN() > 0 { panic("arrow/array: item not nullable") } data := b.newData() defer data.Release() a = NewMapData(data) return } func (b *MapBuilder) newData() (data *Data) { b.adjustStructBuilderLen() values := b.listBuilder.NewListArray() defer values.Release() data = NewData(b.etype, values.Len(), values.data.buffers, values.data.childData, values.NullN(), 0) return } // KeyBuilder returns a builder that can be used to populate the keys of the maps. func (b *MapBuilder) KeyBuilder() Builder { return b.keyBuilder } // ItemBuilder returns a builder that can be used to populate the values that the // keys point to. func (b *MapBuilder) ItemBuilder() Builder { return b.itemBuilder } // ValueBuilder can be used instead of separately using the Key/Item builders // to build the list as a List of Structs rather than building the keys/items // separately. func (b *MapBuilder) ValueBuilder() Builder { return b.listBuilder.ValueBuilder() } func (b *MapBuilder) AppendValueFromString(s string) error { return b.listBuilder.AppendValueFromString(s) } func (b *MapBuilder) UnmarshalOne(dec *json.Decoder) error { return b.listBuilder.UnmarshalOne(dec) } func (b *MapBuilder) Unmarshal(dec *json.Decoder) error { return b.listBuilder.Unmarshal(dec) } func (b *MapBuilder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("map builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } var ( _ arrow.Array = (*Map)(nil) _ Builder = (*MapBuilder)(nil) _ ListLikeBuilder = (*MapBuilder)(nil) ) arrow-go-18.2.0/arrow/array/map_test.go000066400000000000000000000161771476434502500200000ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "strconv" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) func TestMapArray(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) var ( arr, equalArr, unequalArr *array.Map equalValid = []bool{true, true, true, true, true, true, true} equalOffsets = []int32{0, 1, 2, 5, 6, 7, 8, 10} equalKeys = []string{"a", "a", "a", "b", "c", "a", "a", "a", "a", "b"} equalValues = []int32{1, 2, 3, 4, 5, 2, 2, 2, 5, 6} unequalValid = []bool{true, true, true} unequalOffsets = []int32{0, 1, 4, 7} unequalKeys = []string{"a", "a", "b", "c", "a", "b", "c"} unequalValues = []int32{1, 2, 2, 2, 3, 4, 5} ) bldr := array.NewMapBuilder(pool, arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32, false) defer bldr.Release() kb := bldr.KeyBuilder().(*array.StringBuilder) ib := bldr.ItemBuilder().(*array.Int32Builder) bldr.AppendValues(equalOffsets, equalValid) for _, k := range equalKeys { kb.Append(k) } ib.AppendValues(equalValues, nil) assert.Equal(t, len(equalValid), bldr.Len()) assert.Zero(t, bldr.NullN()) arr = bldr.NewMapArray() defer arr.Release() bldr.AppendValues(equalOffsets, equalValid) for _, k := range equalKeys { kb.Append(k) } ib.AppendValues(equalValues, nil) equalArr = bldr.NewMapArray() defer equalArr.Release() bldr.AppendValues(unequalOffsets, unequalValid) for _, k := range unequalKeys { kb.Append(k) } ib.AppendValues(unequalValues, nil) unequalArr = bldr.NewMapArray() defer unequalArr.Release() assert.True(t, array.Equal(arr, arr)) assert.True(t, array.Equal(arr, equalArr)) assert.True(t, array.Equal(equalArr, arr)) assert.False(t, array.Equal(equalArr, unequalArr)) assert.False(t, array.Equal(unequalArr, equalArr)) assert.True(t, array.SliceEqual(arr, 0, 1, unequalArr, 0, 1)) assert.False(t, array.SliceEqual(arr, 0, 2, unequalArr, 0, 2)) assert.False(t, array.SliceEqual(arr, 1, 2, unequalArr, 1, 2)) assert.True(t, array.SliceEqual(arr, 2, 3, unequalArr, 2, 3)) t.Run("items non nullable", func(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dt := arrow.MapOf(arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Int16) dt.KeysSorted = true dt.SetItemNullable(false) bldr := array.NewBuilder(pool, dt).(*array.MapBuilder) defer bldr.Release() kb := bldr.KeyBuilder().(*array.Int16Builder) ib := bldr.ItemBuilder().(*array.Int16Builder) bldr.Append(true) kb.Append(1) ib.AppendNull() assert.Panics(t, func() { _ = bldr.NewArray() }) }) } func TestMapArrayBuildIntToInt(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) var ( dtype = arrow.MapOf(arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Int16) keys = []int16{0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5} items = []int16{1, 1, 2, 3, 5, 8, -1, -1, 0, 1, -1, 2} validItems = []bool{true, true, true, true, true, true, false, false, true, true, false, true} offsets = []int32{0, 6, 6, 12, 12} validMaps = []bool{true, false, true, true} ) bldr := array.NewBuilder(pool, dtype).(*array.MapBuilder) defer bldr.Release() bldr.Reserve(len(validMaps)) kb := bldr.KeyBuilder().(*array.Int16Builder) ib := bldr.ItemBuilder().(*array.Int16Builder) bldr.Append(true) kb.AppendValues(keys[:6], nil) ib.AppendValues(items[:6], nil) bldr.AppendNull() bldr.Append(true) kb.AppendValues(keys[6:], nil) ib.AppendValues(items[6:], []bool{false, false, true, true, false, true}) bldr.Append(true) arr := bldr.NewArray().(*array.Map) defer arr.Release() assert.Equal(t, arrow.MAP, arr.DataType().ID()) assert.EqualValues(t, len(validMaps), arr.Len()) for i, ex := range validMaps { assert.Equal(t, ex, arr.IsValid(i)) assert.Equal(t, !ex, arr.IsNull(i)) } assert.Equal(t, offsets, arr.Offsets()) assert.Equal(t, keys, arr.Keys().(*array.Int16).Int16Values()) itemArr := arr.Items().(*array.Int16) for i, ex := range validItems { if ex { assert.True(t, itemArr.IsValid(i)) assert.False(t, itemArr.IsNull(i)) assert.Equal(t, items[i], itemArr.Value(i)) } else { assert.False(t, itemArr.IsValid(i)) assert.True(t, itemArr.IsNull(i)) } } assert.Equal(t, "[{[0 1 2 3 4 5] [1 1 2 3 5 8]} (null) {[0 1 2 3 4 5] [(null) (null) 0 1 (null) 2]} {[] []}]", arr.String()) } func TestMapStringRoundTrip(t *testing.T) { // 1. create array dt := arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32) mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) b := array.NewMapBuilderWithType(mem, dt) defer b.Release() kb := b.KeyBuilder().(*array.StringBuilder) ib := b.ItemBuilder().(*array.Int32Builder) for n := 0; n < 10; n++ { b.AppendNull() b.Append(true) for r := 'a'; r <= 'z'; r++ { kb.Append(string(r) + strconv.Itoa(n)) if (n+int(r))%2 == 0 { ib.AppendNull() } else { ib.Append(int32(n + int(r))) } } } arr := b.NewArray().(*array.Map) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewMapBuilderWithType(mem, dt) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.Map) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } func TestMapBuilder_SetNull(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) var ( arr *array.Map equalValid = []bool{true, true, true, true, true, true, true} equalOffsets = []int32{0, 1, 2, 5, 6, 7, 8, 10} equalKeys = []string{"a", "a", "a", "b", "c", "a", "a", "a", "a", "b"} equalValues = []int32{1, 2, 3, 4, 5, 2, 2, 2, 5, 6} ) bldr := array.NewMapBuilder(pool, arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32, false) defer bldr.Release() kb := bldr.KeyBuilder().(*array.StringBuilder) ib := bldr.ItemBuilder().(*array.Int32Builder) bldr.AppendValues(equalOffsets, equalValid) for _, k := range equalKeys { kb.Append(k) } ib.AppendValues(equalValues, nil) bldr.SetNull(0) bldr.SetNull(3) arr = bldr.NewMapArray() defer arr.Release() assert.True(t, arr.IsNull(0)) assert.True(t, arr.IsValid(1)) assert.True(t, arr.IsNull(3)) } arrow-go-18.2.0/arrow/array/null.go000066400000000000000000000117401476434502500171250ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "bytes" "fmt" "reflect" "strings" "sync/atomic" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" ) // Null represents an immutable, degenerate array with no physical storage. type Null struct { array } // NewNull returns a new Null array value of size n. func NewNull(n int) *Null { a := &Null{} a.refCount = 1 data := NewData( arrow.Null, n, []*memory.Buffer{nil}, nil, n, 0, ) a.setData(data) data.Release() return a } // NewNullData returns a new Null array value, from data. func NewNullData(data arrow.ArrayData) *Null { a := &Null{} a.refCount = 1 a.setData(data.(*Data)) return a } func (a *Null) ValueStr(int) string { return NullValueStr } func (a *Null) Value(int) interface{} { return nil } func (a *Null) String() string { o := new(strings.Builder) o.WriteString("[") for i := 0; i < a.Len(); i++ { if i > 0 { o.WriteString(" ") } o.WriteString(NullValueStr) } o.WriteString("]") return o.String() } func (a *Null) setData(data *Data) { a.array.setData(data) a.array.nullBitmapBytes = nil a.array.data.nulls = a.array.data.length } func (a *Null) GetOneForMarshal(i int) interface{} { return nil } func (a *Null) MarshalJSON() ([]byte, error) { return json.Marshal(make([]interface{}, a.Len())) } type NullBuilder struct { builder } // NewNullBuilder returns a builder, using the provided memory allocator. func NewNullBuilder(mem memory.Allocator) *NullBuilder { return &NullBuilder{builder: builder{refCount: 1, mem: mem}} } func (b *NullBuilder) Type() arrow.DataType { return arrow.Null } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *NullBuilder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } } } func (b *NullBuilder) AppendNull() { b.builder.length++ b.builder.nulls++ } func (b *NullBuilder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *NullBuilder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } return fmt.Errorf("cannot convert %q to null", s) } func (b *NullBuilder) AppendEmptyValue() { b.AppendNull() } func (b *NullBuilder) AppendEmptyValues(n int) { b.AppendNulls(n) } func (*NullBuilder) Reserve(size int) {} func (*NullBuilder) Resize(size int) {} func (*NullBuilder) init(cap int) {} func (*NullBuilder) resize(newBits int, init func(int)) {} // NewArray creates a Null array from the memory buffers used by the builder and resets the NullBuilder // so it can be used to build a new array. func (b *NullBuilder) NewArray() arrow.Array { return b.NewNullArray() } // NewNullArray creates a Null array from the memory buffers used by the builder and resets the NullBuilder // so it can be used to build a new array. func (b *NullBuilder) NewNullArray() (a *Null) { data := b.newData() a = NewNullData(data) data.Release() return } func (b *NullBuilder) newData() (data *Data) { data = NewData( arrow.Null, b.length, []*memory.Buffer{nil}, nil, b.nulls, 0, ) b.reset() return } func (b *NullBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch t.(type) { case nil: b.AppendNull() default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Type: reflect.TypeOf(nil), Offset: dec.InputOffset(), } } return nil } func (b *NullBuilder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *NullBuilder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("null builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } var ( _ arrow.Array = (*Null)(nil) _ Builder = (*NullBuilder)(nil) ) arrow-go-18.2.0/arrow/array/null_test.go000066400000000000000000000055151476434502500201670ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) func TestNullArray(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) b := array.NewNullBuilder(pool) defer b.Release() b.AppendNull() b.AppendNulls(2) b.AppendEmptyValue() b.AppendEmptyValues(2) arr1 := b.NewArray().(*array.Null) defer arr1.Release() if got, want := arr1.Len(), 6; got != want { t.Fatalf("invalid null array length: got=%d, want=%d", got, want) } if got, want := arr1.NullN(), 6; got != want { t.Fatalf("invalid number of nulls: got=%d, want=%d", got, want) } if got, want := arr1.DataType(), arrow.Null; got != want { t.Fatalf("invalid null data type: got=%v, want=%v", got, want) } arr1.Retain() arr1.Release() if arr1.Data() == nil { t.Fatalf("invalid null data") } arr2 := b.NewNullArray() defer arr2.Release() if got, want := arr2.Len(), 0; got != want { t.Fatalf("invalid null array length: got=%d, want=%d", got, want) } arr3 := array.NewNull(10) defer arr3.Release() if got, want := arr3.Len(), 10; got != want { t.Fatalf("invalid null array length: got=%d, want=%d", got, want) } if got, want := arr3.NullN(), 10; got != want { t.Fatalf("invalid number of nulls: got=%d, want=%d", got, want) } } func TestNullStringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) b := array.NewNullBuilder(mem) defer b.Release() b.AppendNull() b.AppendNulls(2) b.AppendEmptyValue() b.AppendEmptyValues(2) arr := b.NewArray().(*array.Null) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewNullBuilder(mem) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.Null) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } arrow-go-18.2.0/arrow/array/numeric.gen.go000066400000000000000000000725511476434502500203740ustar00rootroot00000000000000// Code generated by array/numeric.gen.go.tmpl. DO NOT EDIT. // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "fmt" "math" "strconv" "strings" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/internal/json" ) // A type which represents an immutable sequence of int64 values. type Int64 struct { array values []int64 } // NewInt64Data creates a new Int64. func NewInt64Data(data arrow.ArrayData) *Int64 { a := &Int64{} a.refCount = 1 a.setData(data.(*Data)) return a } // Reset resets the array for re-use. func (a *Int64) Reset(data *Data) { a.setData(data) } // Value returns the value at the specified index. func (a *Int64) Value(i int) int64 { return a.values[i] } // Values returns the values. func (a *Int64) Int64Values() []int64 { return a.values } // String returns a string representation of the array. func (a *Int64) String() string { o := new(strings.Builder) o.WriteString("[") for i, v := range a.values { if i > 0 { fmt.Fprintf(o, " ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(o, "%v", v) } } o.WriteString("]") return o.String() } func (a *Int64) setData(data *Data) { a.array.setData(data) vals := data.buffers[1] if vals != nil { a.values = arrow.Int64Traits.CastFromBytes(vals.Bytes()) beg := a.array.data.offset end := beg + a.array.data.length a.values = a.values[beg:end] } } func (a *Int64) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } return strconv.FormatInt(int64(a.Value(i)), 10) } func (a *Int64) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } return a.values[i] } func (a *Int64) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i := 0; i < a.Len(); i++ { if a.IsValid(i) { vals[i] = a.values[i] } else { vals[i] = nil } } return json.Marshal(vals) } func arrayEqualInt64(left, right *Int64) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if left.Value(i) != right.Value(i) { return false } } return true } // A type which represents an immutable sequence of uint64 values. type Uint64 struct { array values []uint64 } // NewUint64Data creates a new Uint64. func NewUint64Data(data arrow.ArrayData) *Uint64 { a := &Uint64{} a.refCount = 1 a.setData(data.(*Data)) return a } // Reset resets the array for re-use. func (a *Uint64) Reset(data *Data) { a.setData(data) } // Value returns the value at the specified index. func (a *Uint64) Value(i int) uint64 { return a.values[i] } // Values returns the values. func (a *Uint64) Uint64Values() []uint64 { return a.values } // String returns a string representation of the array. func (a *Uint64) String() string { o := new(strings.Builder) o.WriteString("[") for i, v := range a.values { if i > 0 { fmt.Fprintf(o, " ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(o, "%v", v) } } o.WriteString("]") return o.String() } func (a *Uint64) setData(data *Data) { a.array.setData(data) vals := data.buffers[1] if vals != nil { a.values = arrow.Uint64Traits.CastFromBytes(vals.Bytes()) beg := a.array.data.offset end := beg + a.array.data.length a.values = a.values[beg:end] } } func (a *Uint64) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } return strconv.FormatUint(uint64(a.Value(i)), 10) } func (a *Uint64) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } return a.values[i] } func (a *Uint64) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i := 0; i < a.Len(); i++ { if a.IsValid(i) { vals[i] = a.values[i] } else { vals[i] = nil } } return json.Marshal(vals) } func arrayEqualUint64(left, right *Uint64) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if left.Value(i) != right.Value(i) { return false } } return true } // A type which represents an immutable sequence of float64 values. type Float64 struct { array values []float64 } // NewFloat64Data creates a new Float64. func NewFloat64Data(data arrow.ArrayData) *Float64 { a := &Float64{} a.refCount = 1 a.setData(data.(*Data)) return a } // Reset resets the array for re-use. func (a *Float64) Reset(data *Data) { a.setData(data) } // Value returns the value at the specified index. func (a *Float64) Value(i int) float64 { return a.values[i] } // Values returns the values. func (a *Float64) Float64Values() []float64 { return a.values } // String returns a string representation of the array. func (a *Float64) String() string { o := new(strings.Builder) o.WriteString("[") for i, v := range a.values { if i > 0 { fmt.Fprintf(o, " ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(o, "%v", v) } } o.WriteString("]") return o.String() } func (a *Float64) setData(data *Data) { a.array.setData(data) vals := data.buffers[1] if vals != nil { a.values = arrow.Float64Traits.CastFromBytes(vals.Bytes()) beg := a.array.data.offset end := beg + a.array.data.length a.values = a.values[beg:end] } } func (a *Float64) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } return strconv.FormatFloat(float64(a.Value(i)), 'g', -1, 64) } func (a *Float64) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } return a.values[i] } func (a *Float64) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i := 0; i < a.Len(); i++ { if !a.IsValid(i) { vals[i] = nil continue } f := a.Value(i) switch { case math.IsNaN(f): vals[i] = "NaN" case math.IsInf(f, 1): vals[i] = "+Inf" case math.IsInf(f, -1): vals[i] = "-Inf" default: vals[i] = f } } return json.Marshal(vals) } func arrayEqualFloat64(left, right *Float64) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if left.Value(i) != right.Value(i) { return false } } return true } // A type which represents an immutable sequence of int32 values. type Int32 struct { array values []int32 } // NewInt32Data creates a new Int32. func NewInt32Data(data arrow.ArrayData) *Int32 { a := &Int32{} a.refCount = 1 a.setData(data.(*Data)) return a } // Reset resets the array for re-use. func (a *Int32) Reset(data *Data) { a.setData(data) } // Value returns the value at the specified index. func (a *Int32) Value(i int) int32 { return a.values[i] } // Values returns the values. func (a *Int32) Int32Values() []int32 { return a.values } // String returns a string representation of the array. func (a *Int32) String() string { o := new(strings.Builder) o.WriteString("[") for i, v := range a.values { if i > 0 { fmt.Fprintf(o, " ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(o, "%v", v) } } o.WriteString("]") return o.String() } func (a *Int32) setData(data *Data) { a.array.setData(data) vals := data.buffers[1] if vals != nil { a.values = arrow.Int32Traits.CastFromBytes(vals.Bytes()) beg := a.array.data.offset end := beg + a.array.data.length a.values = a.values[beg:end] } } func (a *Int32) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } return strconv.FormatInt(int64(a.Value(i)), 10) } func (a *Int32) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } return a.values[i] } func (a *Int32) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i := 0; i < a.Len(); i++ { if a.IsValid(i) { vals[i] = a.values[i] } else { vals[i] = nil } } return json.Marshal(vals) } func arrayEqualInt32(left, right *Int32) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if left.Value(i) != right.Value(i) { return false } } return true } // A type which represents an immutable sequence of uint32 values. type Uint32 struct { array values []uint32 } // NewUint32Data creates a new Uint32. func NewUint32Data(data arrow.ArrayData) *Uint32 { a := &Uint32{} a.refCount = 1 a.setData(data.(*Data)) return a } // Reset resets the array for re-use. func (a *Uint32) Reset(data *Data) { a.setData(data) } // Value returns the value at the specified index. func (a *Uint32) Value(i int) uint32 { return a.values[i] } // Values returns the values. func (a *Uint32) Uint32Values() []uint32 { return a.values } // String returns a string representation of the array. func (a *Uint32) String() string { o := new(strings.Builder) o.WriteString("[") for i, v := range a.values { if i > 0 { fmt.Fprintf(o, " ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(o, "%v", v) } } o.WriteString("]") return o.String() } func (a *Uint32) setData(data *Data) { a.array.setData(data) vals := data.buffers[1] if vals != nil { a.values = arrow.Uint32Traits.CastFromBytes(vals.Bytes()) beg := a.array.data.offset end := beg + a.array.data.length a.values = a.values[beg:end] } } func (a *Uint32) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } return strconv.FormatUint(uint64(a.Value(i)), 10) } func (a *Uint32) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } return a.values[i] } func (a *Uint32) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i := 0; i < a.Len(); i++ { if a.IsValid(i) { vals[i] = a.values[i] } else { vals[i] = nil } } return json.Marshal(vals) } func arrayEqualUint32(left, right *Uint32) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if left.Value(i) != right.Value(i) { return false } } return true } // A type which represents an immutable sequence of float32 values. type Float32 struct { array values []float32 } // NewFloat32Data creates a new Float32. func NewFloat32Data(data arrow.ArrayData) *Float32 { a := &Float32{} a.refCount = 1 a.setData(data.(*Data)) return a } // Reset resets the array for re-use. func (a *Float32) Reset(data *Data) { a.setData(data) } // Value returns the value at the specified index. func (a *Float32) Value(i int) float32 { return a.values[i] } // Values returns the values. func (a *Float32) Float32Values() []float32 { return a.values } // String returns a string representation of the array. func (a *Float32) String() string { o := new(strings.Builder) o.WriteString("[") for i, v := range a.values { if i > 0 { fmt.Fprintf(o, " ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(o, "%v", v) } } o.WriteString("]") return o.String() } func (a *Float32) setData(data *Data) { a.array.setData(data) vals := data.buffers[1] if vals != nil { a.values = arrow.Float32Traits.CastFromBytes(vals.Bytes()) beg := a.array.data.offset end := beg + a.array.data.length a.values = a.values[beg:end] } } func (a *Float32) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } return strconv.FormatFloat(float64(a.Value(i)), 'g', -1, 32) } func (a *Float32) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } return a.values[i] } func (a *Float32) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i := 0; i < a.Len(); i++ { if !a.IsValid(i) { vals[i] = nil continue } f := a.Value(i) v := strconv.FormatFloat(float64(f), 'g', -1, 32) switch v { case "NaN", "+Inf", "-Inf": vals[i] = v default: vals[i] = f } } return json.Marshal(vals) } func arrayEqualFloat32(left, right *Float32) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if left.Value(i) != right.Value(i) { return false } } return true } // A type which represents an immutable sequence of int16 values. type Int16 struct { array values []int16 } // NewInt16Data creates a new Int16. func NewInt16Data(data arrow.ArrayData) *Int16 { a := &Int16{} a.refCount = 1 a.setData(data.(*Data)) return a } // Reset resets the array for re-use. func (a *Int16) Reset(data *Data) { a.setData(data) } // Value returns the value at the specified index. func (a *Int16) Value(i int) int16 { return a.values[i] } // Values returns the values. func (a *Int16) Int16Values() []int16 { return a.values } // String returns a string representation of the array. func (a *Int16) String() string { o := new(strings.Builder) o.WriteString("[") for i, v := range a.values { if i > 0 { fmt.Fprintf(o, " ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(o, "%v", v) } } o.WriteString("]") return o.String() } func (a *Int16) setData(data *Data) { a.array.setData(data) vals := data.buffers[1] if vals != nil { a.values = arrow.Int16Traits.CastFromBytes(vals.Bytes()) beg := a.array.data.offset end := beg + a.array.data.length a.values = a.values[beg:end] } } func (a *Int16) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } return strconv.FormatInt(int64(a.Value(i)), 10) } func (a *Int16) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } return a.values[i] } func (a *Int16) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i := 0; i < a.Len(); i++ { if a.IsValid(i) { vals[i] = a.values[i] } else { vals[i] = nil } } return json.Marshal(vals) } func arrayEqualInt16(left, right *Int16) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if left.Value(i) != right.Value(i) { return false } } return true } // A type which represents an immutable sequence of uint16 values. type Uint16 struct { array values []uint16 } // NewUint16Data creates a new Uint16. func NewUint16Data(data arrow.ArrayData) *Uint16 { a := &Uint16{} a.refCount = 1 a.setData(data.(*Data)) return a } // Reset resets the array for re-use. func (a *Uint16) Reset(data *Data) { a.setData(data) } // Value returns the value at the specified index. func (a *Uint16) Value(i int) uint16 { return a.values[i] } // Values returns the values. func (a *Uint16) Uint16Values() []uint16 { return a.values } // String returns a string representation of the array. func (a *Uint16) String() string { o := new(strings.Builder) o.WriteString("[") for i, v := range a.values { if i > 0 { fmt.Fprintf(o, " ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(o, "%v", v) } } o.WriteString("]") return o.String() } func (a *Uint16) setData(data *Data) { a.array.setData(data) vals := data.buffers[1] if vals != nil { a.values = arrow.Uint16Traits.CastFromBytes(vals.Bytes()) beg := a.array.data.offset end := beg + a.array.data.length a.values = a.values[beg:end] } } func (a *Uint16) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } return strconv.FormatUint(uint64(a.Value(i)), 10) } func (a *Uint16) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } return a.values[i] } func (a *Uint16) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i := 0; i < a.Len(); i++ { if a.IsValid(i) { vals[i] = a.values[i] } else { vals[i] = nil } } return json.Marshal(vals) } func arrayEqualUint16(left, right *Uint16) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if left.Value(i) != right.Value(i) { return false } } return true } // A type which represents an immutable sequence of int8 values. type Int8 struct { array values []int8 } // NewInt8Data creates a new Int8. func NewInt8Data(data arrow.ArrayData) *Int8 { a := &Int8{} a.refCount = 1 a.setData(data.(*Data)) return a } // Reset resets the array for re-use. func (a *Int8) Reset(data *Data) { a.setData(data) } // Value returns the value at the specified index. func (a *Int8) Value(i int) int8 { return a.values[i] } // Values returns the values. func (a *Int8) Int8Values() []int8 { return a.values } // String returns a string representation of the array. func (a *Int8) String() string { o := new(strings.Builder) o.WriteString("[") for i, v := range a.values { if i > 0 { fmt.Fprintf(o, " ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(o, "%v", v) } } o.WriteString("]") return o.String() } func (a *Int8) setData(data *Data) { a.array.setData(data) vals := data.buffers[1] if vals != nil { a.values = arrow.Int8Traits.CastFromBytes(vals.Bytes()) beg := a.array.data.offset end := beg + a.array.data.length a.values = a.values[beg:end] } } func (a *Int8) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } return strconv.FormatInt(int64(a.Value(i)), 10) } func (a *Int8) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } return float64(a.values[i]) // prevent uint8 from being seen as binary data } func (a *Int8) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i := 0; i < a.Len(); i++ { if a.IsValid(i) { vals[i] = float64(a.values[i]) // prevent uint8 from being seen as binary data } else { vals[i] = nil } } return json.Marshal(vals) } func arrayEqualInt8(left, right *Int8) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if left.Value(i) != right.Value(i) { return false } } return true } // A type which represents an immutable sequence of uint8 values. type Uint8 struct { array values []uint8 } // NewUint8Data creates a new Uint8. func NewUint8Data(data arrow.ArrayData) *Uint8 { a := &Uint8{} a.refCount = 1 a.setData(data.(*Data)) return a } // Reset resets the array for re-use. func (a *Uint8) Reset(data *Data) { a.setData(data) } // Value returns the value at the specified index. func (a *Uint8) Value(i int) uint8 { return a.values[i] } // Values returns the values. func (a *Uint8) Uint8Values() []uint8 { return a.values } // String returns a string representation of the array. func (a *Uint8) String() string { o := new(strings.Builder) o.WriteString("[") for i, v := range a.values { if i > 0 { fmt.Fprintf(o, " ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(o, "%v", v) } } o.WriteString("]") return o.String() } func (a *Uint8) setData(data *Data) { a.array.setData(data) vals := data.buffers[1] if vals != nil { a.values = arrow.Uint8Traits.CastFromBytes(vals.Bytes()) beg := a.array.data.offset end := beg + a.array.data.length a.values = a.values[beg:end] } } func (a *Uint8) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } return strconv.FormatUint(uint64(a.Value(i)), 10) } func (a *Uint8) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } return float64(a.values[i]) // prevent uint8 from being seen as binary data } func (a *Uint8) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i := 0; i < a.Len(); i++ { if a.IsValid(i) { vals[i] = float64(a.values[i]) // prevent uint8 from being seen as binary data } else { vals[i] = nil } } return json.Marshal(vals) } func arrayEqualUint8(left, right *Uint8) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if left.Value(i) != right.Value(i) { return false } } return true } // A type which represents an immutable sequence of arrow.Time32 values. type Time32 struct { array values []arrow.Time32 } // NewTime32Data creates a new Time32. func NewTime32Data(data arrow.ArrayData) *Time32 { a := &Time32{} a.refCount = 1 a.setData(data.(*Data)) return a } // Reset resets the array for re-use. func (a *Time32) Reset(data *Data) { a.setData(data) } // Value returns the value at the specified index. func (a *Time32) Value(i int) arrow.Time32 { return a.values[i] } // Values returns the values. func (a *Time32) Time32Values() []arrow.Time32 { return a.values } // String returns a string representation of the array. func (a *Time32) String() string { o := new(strings.Builder) o.WriteString("[") for i, v := range a.values { if i > 0 { fmt.Fprintf(o, " ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(o, "%v", v) } } o.WriteString("]") return o.String() } func (a *Time32) setData(data *Data) { a.array.setData(data) vals := data.buffers[1] if vals != nil { a.values = arrow.Time32Traits.CastFromBytes(vals.Bytes()) beg := a.array.data.offset end := beg + a.array.data.length a.values = a.values[beg:end] } } func (a *Time32) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } return a.values[i].FormattedString(a.DataType().(*arrow.Time32Type).Unit) } func (a *Time32) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } return a.values[i].ToTime(a.DataType().(*arrow.Time32Type).Unit).Format("15:04:05.999999999") } func (a *Time32) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i := range a.values { vals[i] = a.GetOneForMarshal(i) } return json.Marshal(vals) } func arrayEqualTime32(left, right *Time32) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if left.Value(i) != right.Value(i) { return false } } return true } // A type which represents an immutable sequence of arrow.Time64 values. type Time64 struct { array values []arrow.Time64 } // NewTime64Data creates a new Time64. func NewTime64Data(data arrow.ArrayData) *Time64 { a := &Time64{} a.refCount = 1 a.setData(data.(*Data)) return a } // Reset resets the array for re-use. func (a *Time64) Reset(data *Data) { a.setData(data) } // Value returns the value at the specified index. func (a *Time64) Value(i int) arrow.Time64 { return a.values[i] } // Values returns the values. func (a *Time64) Time64Values() []arrow.Time64 { return a.values } // String returns a string representation of the array. func (a *Time64) String() string { o := new(strings.Builder) o.WriteString("[") for i, v := range a.values { if i > 0 { fmt.Fprintf(o, " ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(o, "%v", v) } } o.WriteString("]") return o.String() } func (a *Time64) setData(data *Data) { a.array.setData(data) vals := data.buffers[1] if vals != nil { a.values = arrow.Time64Traits.CastFromBytes(vals.Bytes()) beg := a.array.data.offset end := beg + a.array.data.length a.values = a.values[beg:end] } } func (a *Time64) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } return a.values[i].FormattedString(a.DataType().(*arrow.Time64Type).Unit) } func (a *Time64) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } return a.values[i].ToTime(a.DataType().(*arrow.Time64Type).Unit).Format("15:04:05.999999999") } func (a *Time64) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i := range a.values { vals[i] = a.GetOneForMarshal(i) } return json.Marshal(vals) } func arrayEqualTime64(left, right *Time64) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if left.Value(i) != right.Value(i) { return false } } return true } // A type which represents an immutable sequence of arrow.Date32 values. type Date32 struct { array values []arrow.Date32 } // NewDate32Data creates a new Date32. func NewDate32Data(data arrow.ArrayData) *Date32 { a := &Date32{} a.refCount = 1 a.setData(data.(*Data)) return a } // Reset resets the array for re-use. func (a *Date32) Reset(data *Data) { a.setData(data) } // Value returns the value at the specified index. func (a *Date32) Value(i int) arrow.Date32 { return a.values[i] } // Values returns the values. func (a *Date32) Date32Values() []arrow.Date32 { return a.values } // String returns a string representation of the array. func (a *Date32) String() string { o := new(strings.Builder) o.WriteString("[") for i, v := range a.values { if i > 0 { fmt.Fprintf(o, " ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(o, "%v", v) } } o.WriteString("]") return o.String() } func (a *Date32) setData(data *Data) { a.array.setData(data) vals := data.buffers[1] if vals != nil { a.values = arrow.Date32Traits.CastFromBytes(vals.Bytes()) beg := a.array.data.offset end := beg + a.array.data.length a.values = a.values[beg:end] } } func (a *Date32) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } return a.values[i].FormattedString() } func (a *Date32) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } return a.values[i].ToTime().Format("2006-01-02") } func (a *Date32) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i := range a.values { vals[i] = a.GetOneForMarshal(i) } return json.Marshal(vals) } func arrayEqualDate32(left, right *Date32) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if left.Value(i) != right.Value(i) { return false } } return true } // A type which represents an immutable sequence of arrow.Date64 values. type Date64 struct { array values []arrow.Date64 } // NewDate64Data creates a new Date64. func NewDate64Data(data arrow.ArrayData) *Date64 { a := &Date64{} a.refCount = 1 a.setData(data.(*Data)) return a } // Reset resets the array for re-use. func (a *Date64) Reset(data *Data) { a.setData(data) } // Value returns the value at the specified index. func (a *Date64) Value(i int) arrow.Date64 { return a.values[i] } // Values returns the values. func (a *Date64) Date64Values() []arrow.Date64 { return a.values } // String returns a string representation of the array. func (a *Date64) String() string { o := new(strings.Builder) o.WriteString("[") for i, v := range a.values { if i > 0 { fmt.Fprintf(o, " ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(o, "%v", v) } } o.WriteString("]") return o.String() } func (a *Date64) setData(data *Data) { a.array.setData(data) vals := data.buffers[1] if vals != nil { a.values = arrow.Date64Traits.CastFromBytes(vals.Bytes()) beg := a.array.data.offset end := beg + a.array.data.length a.values = a.values[beg:end] } } func (a *Date64) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } return a.values[i].FormattedString() } func (a *Date64) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } return a.values[i].ToTime().Format("2006-01-02") } func (a *Date64) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i := range a.values { vals[i] = a.GetOneForMarshal(i) } return json.Marshal(vals) } func arrayEqualDate64(left, right *Date64) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if left.Value(i) != right.Value(i) { return false } } return true } // A type which represents an immutable sequence of arrow.Duration values. type Duration struct { array values []arrow.Duration } // NewDurationData creates a new Duration. func NewDurationData(data arrow.ArrayData) *Duration { a := &Duration{} a.refCount = 1 a.setData(data.(*Data)) return a } // Reset resets the array for re-use. func (a *Duration) Reset(data *Data) { a.setData(data) } // Value returns the value at the specified index. func (a *Duration) Value(i int) arrow.Duration { return a.values[i] } // Values returns the values. func (a *Duration) DurationValues() []arrow.Duration { return a.values } // String returns a string representation of the array. func (a *Duration) String() string { o := new(strings.Builder) o.WriteString("[") for i, v := range a.values { if i > 0 { fmt.Fprintf(o, " ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(o, "%v", v) } } o.WriteString("]") return o.String() } func (a *Duration) setData(data *Data) { a.array.setData(data) vals := data.buffers[1] if vals != nil { a.values = arrow.DurationTraits.CastFromBytes(vals.Bytes()) beg := a.array.data.offset end := beg + a.array.data.length a.values = a.values[beg:end] } } func (a *Duration) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } // return value and suffix as a string such as "12345ms" return fmt.Sprintf("%d%s", a.values[i], a.DataType().(*arrow.DurationType).Unit) } func (a *Duration) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } // return value and suffix as a string such as "12345ms" return fmt.Sprintf("%d%s", a.values[i], a.DataType().(*arrow.DurationType).Unit.String()) } func (a *Duration) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i := range a.values { vals[i] = a.GetOneForMarshal(i) } return json.Marshal(vals) } func arrayEqualDuration(left, right *Duration) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if left.Value(i) != right.Value(i) { return false } } return true } arrow-go-18.2.0/arrow/array/numeric.gen.go.tmpl000066400000000000000000000122321476434502500213350ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "fmt" "strings" "time" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/internal/json" ) {{range .In}} // A type which represents an immutable sequence of {{or .QualifiedType .Type}} values. type {{.Name}} struct { array values []{{or .QualifiedType .Type}} } // New{{.Name}}Data creates a new {{.Name}}. func New{{.Name}}Data(data arrow.ArrayData) *{{.Name}} { a := &{{.Name}}{} a.refCount = 1 a.setData(data.(*Data)) return a } // Reset resets the array for re-use. func (a *{{.Name}}) Reset(data *Data) { a.setData(data) } // Value returns the value at the specified index. func (a *{{.Name}}) Value(i int) {{or .QualifiedType .Type}} { return a.values[i] } // Values returns the values. func (a *{{.Name}}) {{.Name}}Values() []{{or .QualifiedType .Type}} { return a.values } // String returns a string representation of the array. func (a *{{.Name}}) String() string { o := new(strings.Builder) o.WriteString("[") for i, v := range a.values { if i > 0 { fmt.Fprintf(o, " ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(o, "%v", v) } } o.WriteString("]") return o.String() } func (a *{{.Name}}) setData(data *Data) { a.array.setData(data) vals := data.buffers[1] if vals != nil { a.values = arrow.{{.Name}}Traits.CastFromBytes(vals.Bytes()) beg := a.array.data.offset end := beg + a.array.data.length a.values = a.values[beg:end] } } func (a *{{.Name}}) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } {{if or (eq .Name "Date32") (eq .Name "Date64") -}} return a.values[i].FormattedString() {{else if or (eq .Name "Time32") (eq .Name "Time64") -}} return a.values[i].FormattedString(a.DataType().(*{{.QualifiedType}}Type).Unit) {{else if (eq .Name "Duration") -}} // return value and suffix as a string such as "12345ms" return fmt.Sprintf("%d%s", a.values[i], a.DataType().(*{{.QualifiedType}}Type).Unit) {{else if or (eq .Name "Int8") (eq .Name "Int16") (eq .Name "Int32") (eq .Name "Int64") -}} return strconv.FormatInt(int64(a.Value(i)), 10) {{else if or (eq .Name "Uint8") (eq .Name "Uint16") (eq .Name "Uint32") (eq .Name "Uint64") -}} return strconv.FormatUint(uint64(a.Value(i)), 10) {{else if or (eq .Name "Float32") -}} return strconv.FormatFloat(float64(a.Value(i)), 'g', -1, 32) {{else if or (eq .Name "Float64") -}} return strconv.FormatFloat(float64(a.Value(i)), 'g', -1, 64) {{else}} return fmt.Sprintf("%v", a.values[i]) {{end -}} } func (a *{{.Name}}) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } {{if or (eq .Name "Date32") (eq .Name "Date64") -}} return a.values[i].ToTime().Format("2006-01-02") {{else if or (eq .Name "Time32") (eq .Name "Time64") -}} return a.values[i].ToTime(a.DataType().(*{{.QualifiedType}}Type).Unit).Format("15:04:05.999999999") {{else if (eq .Name "Duration") -}} // return value and suffix as a string such as "12345ms" return fmt.Sprintf("%d%s", a.values[i], a.DataType().(*{{.QualifiedType}}Type).Unit.String()) {{else if (eq .Size "1")}} return float64(a.values[i]) // prevent uint8 from being seen as binary data {{else}} return a.values[i] {{end -}} } func (a *{{.Name}}) MarshalJSON() ([]byte, error) { {{if .QualifiedType -}} vals := make([]interface{}, a.Len()) for i := range a.values { vals[i] = a.GetOneForMarshal(i) } {{else -}} vals := make([]interface{}, a.Len()) for i := 0; i < a.Len(); i++ { {{if (eq .Name "Float32") -}} if !a.IsValid(i) { vals[i] = nil continue } f := a.Value(i) v := strconv.FormatFloat(float64(f), 'g', -1, 32) switch v { case "NaN", "+Inf", "-Inf": vals[i] = v default: vals[i] = f } {{else if (eq .Name "Float64") -}} if !a.IsValid(i) { vals[i] = nil continue } f := a.Value(i) switch { case math.IsNaN(f): vals[i] = "NaN" case math.IsInf(f, 1): vals[i] = "+Inf" case math.IsInf(f, -1): vals[i] = "-Inf" default: vals[i] = f } {{else}} if a.IsValid(i) { {{ if (eq .Size "1") }}vals[i] = float64(a.values[i]) // prevent uint8 from being seen as binary data{{ else }}vals[i] = a.values[i]{{ end }} } else { vals[i] = nil } {{end}} } {{end}} return json.Marshal(vals) } func arrayEqual{{.Name}}(left, right *{{.Name}}) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if left.Value(i) != right.Value(i) { return false } } return true } {{end}} arrow-go-18.2.0/arrow/array/numeric_test.go000066400000000000000000000431731476434502500206610ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "math" "reflect" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/float16" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" "github.com/stretchr/testify/assert" ) func TestNewFloat64Data(t *testing.T) { exp := []float64{1.0, 2.0, 4.0, 8.0, 16.0} ad := array.NewData( arrow.PrimitiveTypes.Float64, len(exp), []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Float64Traits.CastToBytes(exp))}, nil, 0, 0, ) fa := array.NewFloat64Data(ad) assert.Equal(t, len(exp), fa.Len(), "unexpected Len()") assert.Equal(t, exp, fa.Float64Values(), "unexpected Float64Values()") } func TestFloat64SliceData(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) const ( beg = 2 end = 4 ) var ( vs = []float64{1, 2, 3, 4, 5} sub = vs[beg:end] ) b := array.NewFloat64Builder(pool) defer b.Release() for _, v := range vs { b.Append(v) } arr := b.NewArray().(*array.Float64) defer arr.Release() if got, want := arr.Len(), len(vs); got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := arr.Float64Values(), vs; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } slice := array.NewSlice(arr, beg, end).(*array.Float64) defer slice.Release() if got, want := slice.Len(), len(sub); got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := slice.Float64Values(), sub; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } } func TestFloat64SliceDataWithNull(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) const ( beg = 2 end = 5 ) var ( valids = []bool{true, true, true, false, true, true} vs = []float64{1, 2, 3, 0, 4, 5} sub = vs[beg:end] ) b := array.NewFloat64Builder(pool) defer b.Release() b.AppendValues(vs, valids) arr := b.NewArray().(*array.Float64) defer arr.Release() if got, want := arr.Len(), len(valids); got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := arr.NullN(), 1; got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := arr.Float64Values(), vs; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } slice := array.NewSlice(arr, beg, end).(*array.Float64) defer slice.Release() if got, want := slice.NullN(), 1; got != want { t.Errorf("got=%d, want=%d", got, want) } if got, want := slice.Len(), len(sub); got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := slice.Float64Values(), sub; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } } func TestFloat16MarshalJSON(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) bldr := array.NewFloat16Builder(pool) defer bldr.Release() jsonstr := `[0, 1, 2, 3, "NaN", "NaN", 4, 5, "+Inf", "-Inf"]` bldr.Append(float16.New(0)) bldr.Append(float16.New(1)) bldr.Append(float16.New(2)) bldr.Append(float16.New(3)) bldr.Append(float16.NaN()) bldr.Append(float16.NaN()) bldr.Append(float16.New(4)) bldr.Append(float16.New(5)) bldr.Append(float16.Inf()) bldr.Append(float16.Inf().Negate()) expected := bldr.NewFloat16Array() defer expected.Release() expected_json, err := expected.MarshalJSON() assert.NoError(t, err) assert.JSONEq(t, jsonstr, string(expected_json)) } func TestFloat32MarshalJSON(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) bldr := array.NewFloat32Builder(pool) defer bldr.Release() jsonstr := `[0, 1, "+Inf", 2, 3, "NaN", "NaN", 4, 5, "-Inf"]` bldr.Append(0) bldr.Append(1) bldr.Append(float32(math.Inf(1))) bldr.Append(2) bldr.Append(3) bldr.Append(float32(math.NaN())) bldr.Append(float32(math.NaN())) bldr.Append(4) bldr.Append(5) bldr.Append(float32(math.Inf(-1))) expected := bldr.NewFloat32Array() defer expected.Release() expected_json, err := expected.MarshalJSON() assert.NoError(t, err) assert.JSONEq(t, jsonstr, string(expected_json)) } func TestFloat64MarshalJSON(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) bldr := array.NewFloat64Builder(pool) defer bldr.Release() jsonstr := `[0, 1, "+Inf", 2, 3, "NaN", "NaN", 4, 5, "-Inf"]` bldr.Append(0) bldr.Append(1) bldr.Append(math.Inf(1)) bldr.Append(2) bldr.Append(3) bldr.Append(math.NaN()) bldr.Append(math.NaN()) bldr.Append(4) bldr.Append(5) bldr.Append(math.Inf(-1)) expected := bldr.NewFloat64Array() defer expected.Release() expected_json, err := expected.MarshalJSON() assert.NoError(t, err) assert.JSONEq(t, jsonstr, string(expected_json)) } func TestUnmarshalSpecialFloat(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) bldr := array.NewFloat32Builder(pool) defer bldr.Release() assert.NoError(t, json.Unmarshal([]byte(`[3.4, "Inf", "-Inf"]`), bldr)) arr := bldr.NewFloat32Array() defer arr.Release() assert.False(t, math.IsInf(float64(arr.Value(0)), 0), arr.Value(0)) assert.True(t, math.IsInf(float64(arr.Value(1)), 1), arr.Value(1)) assert.True(t, math.IsInf(float64(arr.Value(2)), -1), arr.Value(2)) } func TestNewTime32Data(t *testing.T) { data := []arrow.Time32{ arrow.Time32(1), arrow.Time32(2), arrow.Time32(4), arrow.Time32(8), arrow.Time32(16), } dtype := arrow.FixedWidthTypes.Time32s ad := array.NewData(dtype, len(data), []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Time32Traits.CastToBytes(data))}, nil, 0, 0, ) t32a := array.NewTime32Data(ad) assert.Equal(t, len(data), t32a.Len(), "unexpected Len()") assert.Equal(t, data, t32a.Time32Values(), "unexpected Float64Values()") } func TestTime32SliceData(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) const ( beg = 2 end = 4 ) var ( vs = []arrow.Time32{ arrow.Time32(1), arrow.Time32(2), arrow.Time32(4), arrow.Time32(8), arrow.Time32(16), } sub = vs[beg:end] ) dtype := arrow.FixedWidthTypes.Time32s b := array.NewTime32Builder(pool, dtype.(*arrow.Time32Type)) defer b.Release() for _, v := range vs { b.Append(v) } arr := b.NewArray().(*array.Time32) defer arr.Release() if got, want := arr.Len(), len(vs); got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := arr.Time32Values(), vs; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } slice := array.NewSlice(arr, beg, end).(*array.Time32) defer slice.Release() if got, want := slice.Len(), len(sub); got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := slice.Time32Values(), sub; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } } func TestTime32SliceDataWithNull(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) const ( beg = 2 end = 5 ) var ( valids = []bool{true, true, true, false, true, true} vs = []arrow.Time32{ arrow.Time32(1), arrow.Time32(2), arrow.Time32(3), arrow.Time32(0), arrow.Time32(4), arrow.Time32(5), } sub = vs[beg:end] ) dtype := arrow.FixedWidthTypes.Time32s b := array.NewTime32Builder(pool, dtype.(*arrow.Time32Type)) defer b.Release() b.AppendValues(vs, valids) arr := b.NewArray().(*array.Time32) defer arr.Release() if got, want := arr.Len(), len(valids); got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := arr.NullN(), 1; got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := arr.Time32Values(), vs; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } slice := array.NewSlice(arr, beg, end).(*array.Time32) defer slice.Release() if got, want := slice.NullN(), 1; got != want { t.Errorf("got=%d, want=%d", got, want) } if got, want := slice.Len(), len(sub); got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := slice.Time32Values(), sub; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } } func TestNewTime64Data(t *testing.T) { data := []arrow.Time64{ arrow.Time64(1), arrow.Time64(2), arrow.Time64(4), arrow.Time64(8), arrow.Time64(16), } dtype := arrow.FixedWidthTypes.Time64us ad := array.NewData(dtype, len(data), []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Time64Traits.CastToBytes(data))}, nil, 0, 0, ) t64a := array.NewTime64Data(ad) assert.Equal(t, len(data), t64a.Len(), "unexpected Len()") assert.Equal(t, data, t64a.Time64Values(), "unexpected Float64Values()") } func TestTime64SliceData(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) const ( beg = 2 end = 4 ) var ( vs = []arrow.Time64{ arrow.Time64(1), arrow.Time64(2), arrow.Time64(4), arrow.Time64(8), arrow.Time64(16), } sub = vs[beg:end] ) dtype := arrow.FixedWidthTypes.Time64us b := array.NewTime64Builder(pool, dtype.(*arrow.Time64Type)) defer b.Release() for _, v := range vs { b.Append(v) } arr := b.NewArray().(*array.Time64) defer arr.Release() if got, want := arr.Len(), len(vs); got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := arr.Time64Values(), vs; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } slice := array.NewSlice(arr, beg, end).(*array.Time64) defer slice.Release() if got, want := slice.Len(), len(sub); got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := slice.Time64Values(), sub; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } } func TestTime64SliceDataWithNull(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) const ( beg = 2 end = 5 ) var ( valids = []bool{true, true, true, false, true, true} vs = []arrow.Time64{ arrow.Time64(1), arrow.Time64(2), arrow.Time64(3), arrow.Time64(0), arrow.Time64(4), arrow.Time64(5), } sub = vs[beg:end] ) dtype := arrow.FixedWidthTypes.Time64us b := array.NewTime64Builder(pool, dtype.(*arrow.Time64Type)) defer b.Release() b.AppendValues(vs, valids) arr := b.NewArray().(*array.Time64) defer arr.Release() if got, want := arr.Len(), len(valids); got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := arr.NullN(), 1; got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := arr.Time64Values(), vs; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } slice := array.NewSlice(arr, beg, end).(*array.Time64) defer slice.Release() if got, want := slice.NullN(), 1; got != want { t.Errorf("got=%d, want=%d", got, want) } if got, want := slice.Len(), len(sub); got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := slice.Time64Values(), sub; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } } func TestNewDate32Data(t *testing.T) { exp := []arrow.Date32{1, 2, 4, 8, 16} dtype := &arrow.Date32Type{} ad := array.NewData( dtype, len(exp), []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Date32Traits.CastToBytes(exp))}, nil, 0, 0, ) fa := array.NewDate32Data(ad) assert.Equal(t, len(exp), fa.Len(), "unexpected Len()") assert.Equal(t, exp, fa.Date32Values(), "unexpected Date32Values()") } func TestDate32SliceData(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) const ( beg = 2 end = 4 ) var ( vs = []arrow.Date32{1, 2, 3, 4, 5} sub = vs[beg:end] ) b := array.NewDate32Builder(pool) defer b.Release() for _, v := range vs { b.Append(v) } arr := b.NewArray().(*array.Date32) defer arr.Release() if got, want := arr.Len(), len(vs); got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := arr.Date32Values(), vs; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } slice := array.NewSlice(arr, beg, end).(*array.Date32) defer slice.Release() if got, want := slice.Len(), len(sub); got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := slice.Date32Values(), sub; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } } func TestDate32SliceDataWithNull(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) const ( beg = 2 end = 5 ) var ( valids = []bool{true, true, true, false, true, true} vs = []arrow.Date32{1, 2, 3, 0, 4, 5} sub = vs[beg:end] ) b := array.NewDate32Builder(pool) defer b.Release() b.AppendValues(vs, valids) arr := b.NewArray().(*array.Date32) defer arr.Release() if got, want := arr.Len(), len(valids); got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := arr.NullN(), 1; got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := arr.Date32Values(), vs; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } slice := array.NewSlice(arr, beg, end).(*array.Date32) defer slice.Release() if got, want := slice.NullN(), 1; got != want { t.Errorf("got=%d, want=%d", got, want) } if got, want := slice.Len(), len(sub); got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := slice.Date32Values(), sub; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } } func TestNewDate64Data(t *testing.T) { exp := []arrow.Date64{1, 2, 4, 8, 16} dtype := &arrow.Date64Type{} ad := array.NewData( dtype, len(exp), []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Date64Traits.CastToBytes(exp))}, nil, 0, 0, ) fa := array.NewDate64Data(ad) assert.Equal(t, len(exp), fa.Len(), "unexpected Len()") assert.Equal(t, exp, fa.Date64Values(), "unexpected Date64Values()") } func TestDate64SliceData(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) const ( beg = 2 end = 4 ) var ( vs = []arrow.Date64{1, 2, 3, 4, 5} sub = vs[beg:end] ) b := array.NewDate64Builder(pool) defer b.Release() for _, v := range vs { b.Append(v) } arr := b.NewArray().(*array.Date64) defer arr.Release() if got, want := arr.Len(), len(vs); got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := arr.Date64Values(), vs; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } slice := array.NewSlice(arr, beg, end).(*array.Date64) defer slice.Release() if got, want := slice.Len(), len(sub); got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := slice.Date64Values(), sub; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } } func TestDate64SliceDataWithNull(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) const ( beg = 2 end = 5 ) var ( valids = []bool{true, true, true, false, true, true} vs = []arrow.Date64{1, 2, 3, 0, 4, 5} sub = vs[beg:end] ) b := array.NewDate64Builder(pool) defer b.Release() b.AppendValues(vs, valids) arr := b.NewArray().(*array.Date64) defer arr.Release() if got, want := arr.Len(), len(valids); got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := arr.NullN(), 1; got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := arr.Date64Values(), vs; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } slice := array.NewSlice(arr, beg, end).(*array.Date64) defer slice.Release() if got, want := slice.NullN(), 1; got != want { t.Errorf("got=%d, want=%d", got, want) } if got, want := slice.Len(), len(sub); got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := slice.Date64Values(), sub; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } } func TestInt64MarshalJSON(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) var ( vs = []int64{-5474557666971701248} ) b := array.NewInt64Builder(pool) defer b.Release() for _, v := range vs { b.Append(v) } arr := b.NewArray().(*array.Int64) defer arr.Release() jsonBytes, err := json.Marshal(arr) if err != nil { t.Fatal(err) } got := string(jsonBytes) want := `[-5474557666971701248]` if got != want { t.Fatalf("got=%s, want=%s", got, want) } } func TestUInt64MarshalJSON(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) var ( vs = []uint64{14697929703826477056} ) b := array.NewUint64Builder(pool) defer b.Release() for _, v := range vs { b.Append(v) } arr := b.NewArray().(*array.Uint64) defer arr.Release() jsonBytes, err := json.Marshal(arr) if err != nil { t.Fatal(err) } got := string(jsonBytes) want := `[14697929703826477056]` if got != want { t.Fatalf("got=%s, want=%s", got, want) } } arrow-go-18.2.0/arrow/array/numericbuilder.gen.go000066400000000000000000002512331476434502500217370ustar00rootroot00000000000000// Code generated by array/numericbuilder.gen.go.tmpl. DO NOT EDIT. // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "bytes" "fmt" "reflect" "strconv" "strings" "sync/atomic" "time" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" ) type Int64Builder struct { builder data *memory.Buffer rawData []int64 } func NewInt64Builder(mem memory.Allocator) *Int64Builder { return &Int64Builder{builder: builder{refCount: 1, mem: mem}} } func (b *Int64Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Int64 } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Int64Builder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } } } func (b *Int64Builder) Append(v int64) { b.Reserve(1) b.UnsafeAppend(v) } func (b *Int64Builder) AppendNull() { b.Reserve(1) b.UnsafeAppendBoolToBitmap(false) } func (b *Int64Builder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *Int64Builder) AppendEmptyValue() { b.Append(0) } func (b *Int64Builder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *Int64Builder) UnsafeAppend(v int64) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v b.length++ } func (b *Int64Builder) UnsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *Int64Builder) AppendValues(v []int64, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) arrow.Int64Traits.Copy(b.rawData[b.length:], v) b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *Int64Builder) init(capacity int) { b.builder.init(capacity) b.data = memory.NewResizableBuffer(b.mem) bytesN := arrow.Int64Traits.BytesRequired(capacity) b.data.Resize(bytesN) b.rawData = arrow.Int64Traits.CastFromBytes(b.data.Bytes()) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *Int64Builder) Reserve(n int) { b.builder.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *Int64Builder) Resize(n int) { nBuilder := n if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(nBuilder, b.init) b.data.Resize(arrow.Int64Traits.BytesRequired(n)) b.rawData = arrow.Int64Traits.CastFromBytes(b.data.Bytes()) } } func (b *Int64Builder) Value(i int) int64 { return b.rawData[i] } // NewArray creates a Int64 array from the memory buffers used by the builder and resets the Int64Builder // so it can be used to build a new array. func (b *Int64Builder) NewArray() arrow.Array { return b.NewInt64Array() } // NewInt64Array creates a Int64 array from the memory buffers used by the builder and resets the Int64Builder // so it can be used to build a new array. func (b *Int64Builder) NewInt64Array() (a *Int64) { data := b.newData() a = NewInt64Data(data) data.Release() return } func (b *Int64Builder) newData() (data *Data) { bytesRequired := arrow.Int64Traits.BytesRequired(b.length) if bytesRequired > 0 && bytesRequired < b.data.Len() { // trim buffers b.data.Resize(bytesRequired) } data = NewData(arrow.PrimitiveTypes.Int64, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) b.reset() if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } return } func (b *Int64Builder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } v, err := strconv.ParseInt(s, 10, 8*8) if err != nil { b.AppendNull() return err } b.Append(int64(v)) return nil } func (b *Int64Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch v := t.(type) { case nil: b.AppendNull() case string: f, err := strconv.ParseInt(v, 10, 8*8) if err != nil { return &json.UnmarshalTypeError{ Value: v, Type: reflect.TypeOf(int64(0)), Offset: dec.InputOffset(), } } b.Append(int64(f)) case float64: b.Append(int64(v)) case json.Number: f, err := strconv.ParseInt(v.String(), 10, 8*8) if err != nil { return &json.UnmarshalTypeError{ Value: v.String(), Type: reflect.TypeOf(int64(0)), Offset: dec.InputOffset(), } } b.Append(int64(f)) default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Type: reflect.TypeOf(int64(0)), Offset: dec.InputOffset(), } } return nil } func (b *Int64Builder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *Int64Builder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("binary builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } type Uint64Builder struct { builder data *memory.Buffer rawData []uint64 } func NewUint64Builder(mem memory.Allocator) *Uint64Builder { return &Uint64Builder{builder: builder{refCount: 1, mem: mem}} } func (b *Uint64Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Uint64 } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Uint64Builder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } } } func (b *Uint64Builder) Append(v uint64) { b.Reserve(1) b.UnsafeAppend(v) } func (b *Uint64Builder) AppendNull() { b.Reserve(1) b.UnsafeAppendBoolToBitmap(false) } func (b *Uint64Builder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *Uint64Builder) AppendEmptyValue() { b.Append(0) } func (b *Uint64Builder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *Uint64Builder) UnsafeAppend(v uint64) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v b.length++ } func (b *Uint64Builder) UnsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *Uint64Builder) AppendValues(v []uint64, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) arrow.Uint64Traits.Copy(b.rawData[b.length:], v) b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *Uint64Builder) init(capacity int) { b.builder.init(capacity) b.data = memory.NewResizableBuffer(b.mem) bytesN := arrow.Uint64Traits.BytesRequired(capacity) b.data.Resize(bytesN) b.rawData = arrow.Uint64Traits.CastFromBytes(b.data.Bytes()) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *Uint64Builder) Reserve(n int) { b.builder.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *Uint64Builder) Resize(n int) { nBuilder := n if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(nBuilder, b.init) b.data.Resize(arrow.Uint64Traits.BytesRequired(n)) b.rawData = arrow.Uint64Traits.CastFromBytes(b.data.Bytes()) } } func (b *Uint64Builder) Value(i int) uint64 { return b.rawData[i] } // NewArray creates a Uint64 array from the memory buffers used by the builder and resets the Uint64Builder // so it can be used to build a new array. func (b *Uint64Builder) NewArray() arrow.Array { return b.NewUint64Array() } // NewUint64Array creates a Uint64 array from the memory buffers used by the builder and resets the Uint64Builder // so it can be used to build a new array. func (b *Uint64Builder) NewUint64Array() (a *Uint64) { data := b.newData() a = NewUint64Data(data) data.Release() return } func (b *Uint64Builder) newData() (data *Data) { bytesRequired := arrow.Uint64Traits.BytesRequired(b.length) if bytesRequired > 0 && bytesRequired < b.data.Len() { // trim buffers b.data.Resize(bytesRequired) } data = NewData(arrow.PrimitiveTypes.Uint64, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) b.reset() if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } return } func (b *Uint64Builder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } v, err := strconv.ParseUint(s, 10, 8*8) if err != nil { b.AppendNull() return err } b.Append(uint64(v)) return nil } func (b *Uint64Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch v := t.(type) { case nil: b.AppendNull() case string: f, err := strconv.ParseUint(v, 10, 8*8) if err != nil { return &json.UnmarshalTypeError{ Value: v, Type: reflect.TypeOf(uint64(0)), Offset: dec.InputOffset(), } } b.Append(uint64(f)) case float64: b.Append(uint64(v)) case json.Number: f, err := strconv.ParseUint(v.String(), 10, 8*8) if err != nil { return &json.UnmarshalTypeError{ Value: v.String(), Type: reflect.TypeOf(uint64(0)), Offset: dec.InputOffset(), } } b.Append(uint64(f)) default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Type: reflect.TypeOf(uint64(0)), Offset: dec.InputOffset(), } } return nil } func (b *Uint64Builder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *Uint64Builder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("binary builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } type Float64Builder struct { builder data *memory.Buffer rawData []float64 } func NewFloat64Builder(mem memory.Allocator) *Float64Builder { return &Float64Builder{builder: builder{refCount: 1, mem: mem}} } func (b *Float64Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Float64 } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Float64Builder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } } } func (b *Float64Builder) Append(v float64) { b.Reserve(1) b.UnsafeAppend(v) } func (b *Float64Builder) AppendNull() { b.Reserve(1) b.UnsafeAppendBoolToBitmap(false) } func (b *Float64Builder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *Float64Builder) AppendEmptyValue() { b.Append(0) } func (b *Float64Builder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *Float64Builder) UnsafeAppend(v float64) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v b.length++ } func (b *Float64Builder) UnsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *Float64Builder) AppendValues(v []float64, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) arrow.Float64Traits.Copy(b.rawData[b.length:], v) b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *Float64Builder) init(capacity int) { b.builder.init(capacity) b.data = memory.NewResizableBuffer(b.mem) bytesN := arrow.Float64Traits.BytesRequired(capacity) b.data.Resize(bytesN) b.rawData = arrow.Float64Traits.CastFromBytes(b.data.Bytes()) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *Float64Builder) Reserve(n int) { b.builder.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *Float64Builder) Resize(n int) { nBuilder := n if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(nBuilder, b.init) b.data.Resize(arrow.Float64Traits.BytesRequired(n)) b.rawData = arrow.Float64Traits.CastFromBytes(b.data.Bytes()) } } func (b *Float64Builder) Value(i int) float64 { return b.rawData[i] } // NewArray creates a Float64 array from the memory buffers used by the builder and resets the Float64Builder // so it can be used to build a new array. func (b *Float64Builder) NewArray() arrow.Array { return b.NewFloat64Array() } // NewFloat64Array creates a Float64 array from the memory buffers used by the builder and resets the Float64Builder // so it can be used to build a new array. func (b *Float64Builder) NewFloat64Array() (a *Float64) { data := b.newData() a = NewFloat64Data(data) data.Release() return } func (b *Float64Builder) newData() (data *Data) { bytesRequired := arrow.Float64Traits.BytesRequired(b.length) if bytesRequired > 0 && bytesRequired < b.data.Len() { // trim buffers b.data.Resize(bytesRequired) } data = NewData(arrow.PrimitiveTypes.Float64, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) b.reset() if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } return } func (b *Float64Builder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } v, err := strconv.ParseFloat(s, 8*8) if err != nil { b.AppendNull() return err } b.Append(float64(v)) return nil } func (b *Float64Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch v := t.(type) { case nil: b.AppendNull() case string: f, err := strconv.ParseFloat(v, 8*8) if err != nil { return &json.UnmarshalTypeError{ Value: v, Type: reflect.TypeOf(float64(0)), Offset: dec.InputOffset(), } } b.Append(float64(f)) case float64: b.Append(float64(v)) case json.Number: f, err := strconv.ParseFloat(v.String(), 8*8) if err != nil { return &json.UnmarshalTypeError{ Value: v.String(), Type: reflect.TypeOf(float64(0)), Offset: dec.InputOffset(), } } b.Append(float64(f)) default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Type: reflect.TypeOf(float64(0)), Offset: dec.InputOffset(), } } return nil } func (b *Float64Builder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *Float64Builder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("binary builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } type Int32Builder struct { builder data *memory.Buffer rawData []int32 } func NewInt32Builder(mem memory.Allocator) *Int32Builder { return &Int32Builder{builder: builder{refCount: 1, mem: mem}} } func (b *Int32Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Int32 } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Int32Builder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } } } func (b *Int32Builder) Append(v int32) { b.Reserve(1) b.UnsafeAppend(v) } func (b *Int32Builder) AppendNull() { b.Reserve(1) b.UnsafeAppendBoolToBitmap(false) } func (b *Int32Builder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *Int32Builder) AppendEmptyValue() { b.Append(0) } func (b *Int32Builder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *Int32Builder) UnsafeAppend(v int32) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v b.length++ } func (b *Int32Builder) UnsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *Int32Builder) AppendValues(v []int32, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) arrow.Int32Traits.Copy(b.rawData[b.length:], v) b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *Int32Builder) init(capacity int) { b.builder.init(capacity) b.data = memory.NewResizableBuffer(b.mem) bytesN := arrow.Int32Traits.BytesRequired(capacity) b.data.Resize(bytesN) b.rawData = arrow.Int32Traits.CastFromBytes(b.data.Bytes()) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *Int32Builder) Reserve(n int) { b.builder.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *Int32Builder) Resize(n int) { nBuilder := n if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(nBuilder, b.init) b.data.Resize(arrow.Int32Traits.BytesRequired(n)) b.rawData = arrow.Int32Traits.CastFromBytes(b.data.Bytes()) } } func (b *Int32Builder) Value(i int) int32 { return b.rawData[i] } // NewArray creates a Int32 array from the memory buffers used by the builder and resets the Int32Builder // so it can be used to build a new array. func (b *Int32Builder) NewArray() arrow.Array { return b.NewInt32Array() } // NewInt32Array creates a Int32 array from the memory buffers used by the builder and resets the Int32Builder // so it can be used to build a new array. func (b *Int32Builder) NewInt32Array() (a *Int32) { data := b.newData() a = NewInt32Data(data) data.Release() return } func (b *Int32Builder) newData() (data *Data) { bytesRequired := arrow.Int32Traits.BytesRequired(b.length) if bytesRequired > 0 && bytesRequired < b.data.Len() { // trim buffers b.data.Resize(bytesRequired) } data = NewData(arrow.PrimitiveTypes.Int32, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) b.reset() if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } return } func (b *Int32Builder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } v, err := strconv.ParseInt(s, 10, 4*8) if err != nil { b.AppendNull() return err } b.Append(int32(v)) return nil } func (b *Int32Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch v := t.(type) { case nil: b.AppendNull() case string: f, err := strconv.ParseInt(v, 10, 4*8) if err != nil { return &json.UnmarshalTypeError{ Value: v, Type: reflect.TypeOf(int32(0)), Offset: dec.InputOffset(), } } b.Append(int32(f)) case float64: b.Append(int32(v)) case json.Number: f, err := strconv.ParseInt(v.String(), 10, 4*8) if err != nil { return &json.UnmarshalTypeError{ Value: v.String(), Type: reflect.TypeOf(int32(0)), Offset: dec.InputOffset(), } } b.Append(int32(f)) default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Type: reflect.TypeOf(int32(0)), Offset: dec.InputOffset(), } } return nil } func (b *Int32Builder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *Int32Builder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("binary builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } type Uint32Builder struct { builder data *memory.Buffer rawData []uint32 } func NewUint32Builder(mem memory.Allocator) *Uint32Builder { return &Uint32Builder{builder: builder{refCount: 1, mem: mem}} } func (b *Uint32Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Uint32 } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Uint32Builder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } } } func (b *Uint32Builder) Append(v uint32) { b.Reserve(1) b.UnsafeAppend(v) } func (b *Uint32Builder) AppendNull() { b.Reserve(1) b.UnsafeAppendBoolToBitmap(false) } func (b *Uint32Builder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *Uint32Builder) AppendEmptyValue() { b.Append(0) } func (b *Uint32Builder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *Uint32Builder) UnsafeAppend(v uint32) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v b.length++ } func (b *Uint32Builder) UnsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *Uint32Builder) AppendValues(v []uint32, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) arrow.Uint32Traits.Copy(b.rawData[b.length:], v) b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *Uint32Builder) init(capacity int) { b.builder.init(capacity) b.data = memory.NewResizableBuffer(b.mem) bytesN := arrow.Uint32Traits.BytesRequired(capacity) b.data.Resize(bytesN) b.rawData = arrow.Uint32Traits.CastFromBytes(b.data.Bytes()) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *Uint32Builder) Reserve(n int) { b.builder.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *Uint32Builder) Resize(n int) { nBuilder := n if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(nBuilder, b.init) b.data.Resize(arrow.Uint32Traits.BytesRequired(n)) b.rawData = arrow.Uint32Traits.CastFromBytes(b.data.Bytes()) } } func (b *Uint32Builder) Value(i int) uint32 { return b.rawData[i] } // NewArray creates a Uint32 array from the memory buffers used by the builder and resets the Uint32Builder // so it can be used to build a new array. func (b *Uint32Builder) NewArray() arrow.Array { return b.NewUint32Array() } // NewUint32Array creates a Uint32 array from the memory buffers used by the builder and resets the Uint32Builder // so it can be used to build a new array. func (b *Uint32Builder) NewUint32Array() (a *Uint32) { data := b.newData() a = NewUint32Data(data) data.Release() return } func (b *Uint32Builder) newData() (data *Data) { bytesRequired := arrow.Uint32Traits.BytesRequired(b.length) if bytesRequired > 0 && bytesRequired < b.data.Len() { // trim buffers b.data.Resize(bytesRequired) } data = NewData(arrow.PrimitiveTypes.Uint32, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) b.reset() if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } return } func (b *Uint32Builder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } v, err := strconv.ParseUint(s, 10, 4*8) if err != nil { b.AppendNull() return err } b.Append(uint32(v)) return nil } func (b *Uint32Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch v := t.(type) { case nil: b.AppendNull() case string: f, err := strconv.ParseUint(v, 10, 4*8) if err != nil { return &json.UnmarshalTypeError{ Value: v, Type: reflect.TypeOf(uint32(0)), Offset: dec.InputOffset(), } } b.Append(uint32(f)) case float64: b.Append(uint32(v)) case json.Number: f, err := strconv.ParseUint(v.String(), 10, 4*8) if err != nil { return &json.UnmarshalTypeError{ Value: v.String(), Type: reflect.TypeOf(uint32(0)), Offset: dec.InputOffset(), } } b.Append(uint32(f)) default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Type: reflect.TypeOf(uint32(0)), Offset: dec.InputOffset(), } } return nil } func (b *Uint32Builder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *Uint32Builder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("binary builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } type Float32Builder struct { builder data *memory.Buffer rawData []float32 } func NewFloat32Builder(mem memory.Allocator) *Float32Builder { return &Float32Builder{builder: builder{refCount: 1, mem: mem}} } func (b *Float32Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Float32 } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Float32Builder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } } } func (b *Float32Builder) Append(v float32) { b.Reserve(1) b.UnsafeAppend(v) } func (b *Float32Builder) AppendNull() { b.Reserve(1) b.UnsafeAppendBoolToBitmap(false) } func (b *Float32Builder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *Float32Builder) AppendEmptyValue() { b.Append(0) } func (b *Float32Builder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *Float32Builder) UnsafeAppend(v float32) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v b.length++ } func (b *Float32Builder) UnsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *Float32Builder) AppendValues(v []float32, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) arrow.Float32Traits.Copy(b.rawData[b.length:], v) b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *Float32Builder) init(capacity int) { b.builder.init(capacity) b.data = memory.NewResizableBuffer(b.mem) bytesN := arrow.Float32Traits.BytesRequired(capacity) b.data.Resize(bytesN) b.rawData = arrow.Float32Traits.CastFromBytes(b.data.Bytes()) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *Float32Builder) Reserve(n int) { b.builder.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *Float32Builder) Resize(n int) { nBuilder := n if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(nBuilder, b.init) b.data.Resize(arrow.Float32Traits.BytesRequired(n)) b.rawData = arrow.Float32Traits.CastFromBytes(b.data.Bytes()) } } func (b *Float32Builder) Value(i int) float32 { return b.rawData[i] } // NewArray creates a Float32 array from the memory buffers used by the builder and resets the Float32Builder // so it can be used to build a new array. func (b *Float32Builder) NewArray() arrow.Array { return b.NewFloat32Array() } // NewFloat32Array creates a Float32 array from the memory buffers used by the builder and resets the Float32Builder // so it can be used to build a new array. func (b *Float32Builder) NewFloat32Array() (a *Float32) { data := b.newData() a = NewFloat32Data(data) data.Release() return } func (b *Float32Builder) newData() (data *Data) { bytesRequired := arrow.Float32Traits.BytesRequired(b.length) if bytesRequired > 0 && bytesRequired < b.data.Len() { // trim buffers b.data.Resize(bytesRequired) } data = NewData(arrow.PrimitiveTypes.Float32, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) b.reset() if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } return } func (b *Float32Builder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } v, err := strconv.ParseFloat(s, 4*8) if err != nil { b.AppendNull() return err } b.Append(float32(v)) return nil } func (b *Float32Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch v := t.(type) { case nil: b.AppendNull() case string: f, err := strconv.ParseFloat(v, 4*8) if err != nil { return &json.UnmarshalTypeError{ Value: v, Type: reflect.TypeOf(float32(0)), Offset: dec.InputOffset(), } } b.Append(float32(f)) case float64: b.Append(float32(v)) case json.Number: f, err := strconv.ParseFloat(v.String(), 4*8) if err != nil { return &json.UnmarshalTypeError{ Value: v.String(), Type: reflect.TypeOf(float32(0)), Offset: dec.InputOffset(), } } b.Append(float32(f)) default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Type: reflect.TypeOf(float32(0)), Offset: dec.InputOffset(), } } return nil } func (b *Float32Builder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *Float32Builder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("binary builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } type Int16Builder struct { builder data *memory.Buffer rawData []int16 } func NewInt16Builder(mem memory.Allocator) *Int16Builder { return &Int16Builder{builder: builder{refCount: 1, mem: mem}} } func (b *Int16Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Int16 } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Int16Builder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } } } func (b *Int16Builder) Append(v int16) { b.Reserve(1) b.UnsafeAppend(v) } func (b *Int16Builder) AppendNull() { b.Reserve(1) b.UnsafeAppendBoolToBitmap(false) } func (b *Int16Builder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *Int16Builder) AppendEmptyValue() { b.Append(0) } func (b *Int16Builder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *Int16Builder) UnsafeAppend(v int16) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v b.length++ } func (b *Int16Builder) UnsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *Int16Builder) AppendValues(v []int16, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) arrow.Int16Traits.Copy(b.rawData[b.length:], v) b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *Int16Builder) init(capacity int) { b.builder.init(capacity) b.data = memory.NewResizableBuffer(b.mem) bytesN := arrow.Int16Traits.BytesRequired(capacity) b.data.Resize(bytesN) b.rawData = arrow.Int16Traits.CastFromBytes(b.data.Bytes()) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *Int16Builder) Reserve(n int) { b.builder.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *Int16Builder) Resize(n int) { nBuilder := n if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(nBuilder, b.init) b.data.Resize(arrow.Int16Traits.BytesRequired(n)) b.rawData = arrow.Int16Traits.CastFromBytes(b.data.Bytes()) } } func (b *Int16Builder) Value(i int) int16 { return b.rawData[i] } // NewArray creates a Int16 array from the memory buffers used by the builder and resets the Int16Builder // so it can be used to build a new array. func (b *Int16Builder) NewArray() arrow.Array { return b.NewInt16Array() } // NewInt16Array creates a Int16 array from the memory buffers used by the builder and resets the Int16Builder // so it can be used to build a new array. func (b *Int16Builder) NewInt16Array() (a *Int16) { data := b.newData() a = NewInt16Data(data) data.Release() return } func (b *Int16Builder) newData() (data *Data) { bytesRequired := arrow.Int16Traits.BytesRequired(b.length) if bytesRequired > 0 && bytesRequired < b.data.Len() { // trim buffers b.data.Resize(bytesRequired) } data = NewData(arrow.PrimitiveTypes.Int16, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) b.reset() if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } return } func (b *Int16Builder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } v, err := strconv.ParseInt(s, 10, 2*8) if err != nil { b.AppendNull() return err } b.Append(int16(v)) return nil } func (b *Int16Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch v := t.(type) { case nil: b.AppendNull() case string: f, err := strconv.ParseInt(v, 10, 2*8) if err != nil { return &json.UnmarshalTypeError{ Value: v, Type: reflect.TypeOf(int16(0)), Offset: dec.InputOffset(), } } b.Append(int16(f)) case float64: b.Append(int16(v)) case json.Number: f, err := strconv.ParseInt(v.String(), 10, 2*8) if err != nil { return &json.UnmarshalTypeError{ Value: v.String(), Type: reflect.TypeOf(int16(0)), Offset: dec.InputOffset(), } } b.Append(int16(f)) default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Type: reflect.TypeOf(int16(0)), Offset: dec.InputOffset(), } } return nil } func (b *Int16Builder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *Int16Builder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("binary builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } type Uint16Builder struct { builder data *memory.Buffer rawData []uint16 } func NewUint16Builder(mem memory.Allocator) *Uint16Builder { return &Uint16Builder{builder: builder{refCount: 1, mem: mem}} } func (b *Uint16Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Uint16 } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Uint16Builder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } } } func (b *Uint16Builder) Append(v uint16) { b.Reserve(1) b.UnsafeAppend(v) } func (b *Uint16Builder) AppendNull() { b.Reserve(1) b.UnsafeAppendBoolToBitmap(false) } func (b *Uint16Builder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *Uint16Builder) AppendEmptyValue() { b.Append(0) } func (b *Uint16Builder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *Uint16Builder) UnsafeAppend(v uint16) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v b.length++ } func (b *Uint16Builder) UnsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *Uint16Builder) AppendValues(v []uint16, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) arrow.Uint16Traits.Copy(b.rawData[b.length:], v) b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *Uint16Builder) init(capacity int) { b.builder.init(capacity) b.data = memory.NewResizableBuffer(b.mem) bytesN := arrow.Uint16Traits.BytesRequired(capacity) b.data.Resize(bytesN) b.rawData = arrow.Uint16Traits.CastFromBytes(b.data.Bytes()) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *Uint16Builder) Reserve(n int) { b.builder.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *Uint16Builder) Resize(n int) { nBuilder := n if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(nBuilder, b.init) b.data.Resize(arrow.Uint16Traits.BytesRequired(n)) b.rawData = arrow.Uint16Traits.CastFromBytes(b.data.Bytes()) } } func (b *Uint16Builder) Value(i int) uint16 { return b.rawData[i] } // NewArray creates a Uint16 array from the memory buffers used by the builder and resets the Uint16Builder // so it can be used to build a new array. func (b *Uint16Builder) NewArray() arrow.Array { return b.NewUint16Array() } // NewUint16Array creates a Uint16 array from the memory buffers used by the builder and resets the Uint16Builder // so it can be used to build a new array. func (b *Uint16Builder) NewUint16Array() (a *Uint16) { data := b.newData() a = NewUint16Data(data) data.Release() return } func (b *Uint16Builder) newData() (data *Data) { bytesRequired := arrow.Uint16Traits.BytesRequired(b.length) if bytesRequired > 0 && bytesRequired < b.data.Len() { // trim buffers b.data.Resize(bytesRequired) } data = NewData(arrow.PrimitiveTypes.Uint16, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) b.reset() if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } return } func (b *Uint16Builder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } v, err := strconv.ParseUint(s, 10, 2*8) if err != nil { b.AppendNull() return err } b.Append(uint16(v)) return nil } func (b *Uint16Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch v := t.(type) { case nil: b.AppendNull() case string: f, err := strconv.ParseUint(v, 10, 2*8) if err != nil { return &json.UnmarshalTypeError{ Value: v, Type: reflect.TypeOf(uint16(0)), Offset: dec.InputOffset(), } } b.Append(uint16(f)) case float64: b.Append(uint16(v)) case json.Number: f, err := strconv.ParseUint(v.String(), 10, 2*8) if err != nil { return &json.UnmarshalTypeError{ Value: v.String(), Type: reflect.TypeOf(uint16(0)), Offset: dec.InputOffset(), } } b.Append(uint16(f)) default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Type: reflect.TypeOf(uint16(0)), Offset: dec.InputOffset(), } } return nil } func (b *Uint16Builder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *Uint16Builder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("binary builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } type Int8Builder struct { builder data *memory.Buffer rawData []int8 } func NewInt8Builder(mem memory.Allocator) *Int8Builder { return &Int8Builder{builder: builder{refCount: 1, mem: mem}} } func (b *Int8Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Int8 } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Int8Builder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } } } func (b *Int8Builder) Append(v int8) { b.Reserve(1) b.UnsafeAppend(v) } func (b *Int8Builder) AppendNull() { b.Reserve(1) b.UnsafeAppendBoolToBitmap(false) } func (b *Int8Builder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *Int8Builder) AppendEmptyValue() { b.Append(0) } func (b *Int8Builder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *Int8Builder) UnsafeAppend(v int8) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v b.length++ } func (b *Int8Builder) UnsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *Int8Builder) AppendValues(v []int8, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) arrow.Int8Traits.Copy(b.rawData[b.length:], v) b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *Int8Builder) init(capacity int) { b.builder.init(capacity) b.data = memory.NewResizableBuffer(b.mem) bytesN := arrow.Int8Traits.BytesRequired(capacity) b.data.Resize(bytesN) b.rawData = arrow.Int8Traits.CastFromBytes(b.data.Bytes()) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *Int8Builder) Reserve(n int) { b.builder.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *Int8Builder) Resize(n int) { nBuilder := n if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(nBuilder, b.init) b.data.Resize(arrow.Int8Traits.BytesRequired(n)) b.rawData = arrow.Int8Traits.CastFromBytes(b.data.Bytes()) } } func (b *Int8Builder) Value(i int) int8 { return b.rawData[i] } // NewArray creates a Int8 array from the memory buffers used by the builder and resets the Int8Builder // so it can be used to build a new array. func (b *Int8Builder) NewArray() arrow.Array { return b.NewInt8Array() } // NewInt8Array creates a Int8 array from the memory buffers used by the builder and resets the Int8Builder // so it can be used to build a new array. func (b *Int8Builder) NewInt8Array() (a *Int8) { data := b.newData() a = NewInt8Data(data) data.Release() return } func (b *Int8Builder) newData() (data *Data) { bytesRequired := arrow.Int8Traits.BytesRequired(b.length) if bytesRequired > 0 && bytesRequired < b.data.Len() { // trim buffers b.data.Resize(bytesRequired) } data = NewData(arrow.PrimitiveTypes.Int8, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) b.reset() if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } return } func (b *Int8Builder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } v, err := strconv.ParseInt(s, 10, 1*8) if err != nil { b.AppendNull() return err } b.Append(int8(v)) return nil } func (b *Int8Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch v := t.(type) { case nil: b.AppendNull() case string: f, err := strconv.ParseInt(v, 10, 1*8) if err != nil { return &json.UnmarshalTypeError{ Value: v, Type: reflect.TypeOf(int8(0)), Offset: dec.InputOffset(), } } b.Append(int8(f)) case float64: b.Append(int8(v)) case json.Number: f, err := strconv.ParseInt(v.String(), 10, 1*8) if err != nil { return &json.UnmarshalTypeError{ Value: v.String(), Type: reflect.TypeOf(int8(0)), Offset: dec.InputOffset(), } } b.Append(int8(f)) default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Type: reflect.TypeOf(int8(0)), Offset: dec.InputOffset(), } } return nil } func (b *Int8Builder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *Int8Builder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("binary builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } type Uint8Builder struct { builder data *memory.Buffer rawData []uint8 } func NewUint8Builder(mem memory.Allocator) *Uint8Builder { return &Uint8Builder{builder: builder{refCount: 1, mem: mem}} } func (b *Uint8Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Uint8 } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Uint8Builder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } } } func (b *Uint8Builder) Append(v uint8) { b.Reserve(1) b.UnsafeAppend(v) } func (b *Uint8Builder) AppendNull() { b.Reserve(1) b.UnsafeAppendBoolToBitmap(false) } func (b *Uint8Builder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *Uint8Builder) AppendEmptyValue() { b.Append(0) } func (b *Uint8Builder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *Uint8Builder) UnsafeAppend(v uint8) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v b.length++ } func (b *Uint8Builder) UnsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *Uint8Builder) AppendValues(v []uint8, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) arrow.Uint8Traits.Copy(b.rawData[b.length:], v) b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *Uint8Builder) init(capacity int) { b.builder.init(capacity) b.data = memory.NewResizableBuffer(b.mem) bytesN := arrow.Uint8Traits.BytesRequired(capacity) b.data.Resize(bytesN) b.rawData = arrow.Uint8Traits.CastFromBytes(b.data.Bytes()) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *Uint8Builder) Reserve(n int) { b.builder.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *Uint8Builder) Resize(n int) { nBuilder := n if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(nBuilder, b.init) b.data.Resize(arrow.Uint8Traits.BytesRequired(n)) b.rawData = arrow.Uint8Traits.CastFromBytes(b.data.Bytes()) } } func (b *Uint8Builder) Value(i int) uint8 { return b.rawData[i] } // NewArray creates a Uint8 array from the memory buffers used by the builder and resets the Uint8Builder // so it can be used to build a new array. func (b *Uint8Builder) NewArray() arrow.Array { return b.NewUint8Array() } // NewUint8Array creates a Uint8 array from the memory buffers used by the builder and resets the Uint8Builder // so it can be used to build a new array. func (b *Uint8Builder) NewUint8Array() (a *Uint8) { data := b.newData() a = NewUint8Data(data) data.Release() return } func (b *Uint8Builder) newData() (data *Data) { bytesRequired := arrow.Uint8Traits.BytesRequired(b.length) if bytesRequired > 0 && bytesRequired < b.data.Len() { // trim buffers b.data.Resize(bytesRequired) } data = NewData(arrow.PrimitiveTypes.Uint8, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) b.reset() if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } return } func (b *Uint8Builder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } v, err := strconv.ParseUint(s, 10, 1*8) if err != nil { b.AppendNull() return err } b.Append(uint8(v)) return nil } func (b *Uint8Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch v := t.(type) { case nil: b.AppendNull() case string: f, err := strconv.ParseUint(v, 10, 1*8) if err != nil { return &json.UnmarshalTypeError{ Value: v, Type: reflect.TypeOf(uint8(0)), Offset: dec.InputOffset(), } } b.Append(uint8(f)) case float64: b.Append(uint8(v)) case json.Number: f, err := strconv.ParseUint(v.String(), 10, 1*8) if err != nil { return &json.UnmarshalTypeError{ Value: v.String(), Type: reflect.TypeOf(uint8(0)), Offset: dec.InputOffset(), } } b.Append(uint8(f)) default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Type: reflect.TypeOf(uint8(0)), Offset: dec.InputOffset(), } } return nil } func (b *Uint8Builder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *Uint8Builder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("binary builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } type Time32Builder struct { builder dtype *arrow.Time32Type data *memory.Buffer rawData []arrow.Time32 } func NewTime32Builder(mem memory.Allocator, dtype *arrow.Time32Type) *Time32Builder { return &Time32Builder{builder: builder{refCount: 1, mem: mem}, dtype: dtype} } func (b *Time32Builder) Type() arrow.DataType { return b.dtype } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Time32Builder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } } } func (b *Time32Builder) Append(v arrow.Time32) { b.Reserve(1) b.UnsafeAppend(v) } func (b *Time32Builder) AppendNull() { b.Reserve(1) b.UnsafeAppendBoolToBitmap(false) } func (b *Time32Builder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *Time32Builder) AppendEmptyValue() { b.Append(0) } func (b *Time32Builder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *Time32Builder) UnsafeAppend(v arrow.Time32) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v b.length++ } func (b *Time32Builder) UnsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *Time32Builder) AppendValues(v []arrow.Time32, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) arrow.Time32Traits.Copy(b.rawData[b.length:], v) b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *Time32Builder) init(capacity int) { b.builder.init(capacity) b.data = memory.NewResizableBuffer(b.mem) bytesN := arrow.Time32Traits.BytesRequired(capacity) b.data.Resize(bytesN) b.rawData = arrow.Time32Traits.CastFromBytes(b.data.Bytes()) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *Time32Builder) Reserve(n int) { b.builder.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *Time32Builder) Resize(n int) { nBuilder := n if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(nBuilder, b.init) b.data.Resize(arrow.Time32Traits.BytesRequired(n)) b.rawData = arrow.Time32Traits.CastFromBytes(b.data.Bytes()) } } func (b *Time32Builder) Value(i int) arrow.Time32 { return b.rawData[i] } // NewArray creates a Time32 array from the memory buffers used by the builder and resets the Time32Builder // so it can be used to build a new array. func (b *Time32Builder) NewArray() arrow.Array { return b.NewTime32Array() } // NewTime32Array creates a Time32 array from the memory buffers used by the builder and resets the Time32Builder // so it can be used to build a new array. func (b *Time32Builder) NewTime32Array() (a *Time32) { data := b.newData() a = NewTime32Data(data) data.Release() return } func (b *Time32Builder) newData() (data *Data) { bytesRequired := arrow.Time32Traits.BytesRequired(b.length) if bytesRequired > 0 && bytesRequired < b.data.Len() { // trim buffers b.data.Resize(bytesRequired) } data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) b.reset() if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } return } func (b *Time32Builder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } val, err := arrow.Time32FromString(s, b.dtype.Unit) if err != nil { b.AppendNull() return err } b.Append(val) return nil } func (b *Time32Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch v := t.(type) { case nil: b.AppendNull() case string: tm, err := arrow.Time32FromString(v, b.dtype.Unit) if err != nil { return &json.UnmarshalTypeError{ Value: v, Type: reflect.TypeOf(arrow.Time32(0)), Offset: dec.InputOffset(), } } b.Append(tm) case json.Number: n, err := v.Int64() if err != nil { return &json.UnmarshalTypeError{ Value: v.String(), Type: reflect.TypeOf(arrow.Time32(0)), Offset: dec.InputOffset(), } } b.Append(arrow.Time32(n)) case float64: b.Append(arrow.Time32(v)) default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Type: reflect.TypeOf(arrow.Time32(0)), Offset: dec.InputOffset(), } } return nil } func (b *Time32Builder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *Time32Builder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("binary builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } type Time64Builder struct { builder dtype *arrow.Time64Type data *memory.Buffer rawData []arrow.Time64 } func NewTime64Builder(mem memory.Allocator, dtype *arrow.Time64Type) *Time64Builder { return &Time64Builder{builder: builder{refCount: 1, mem: mem}, dtype: dtype} } func (b *Time64Builder) Type() arrow.DataType { return b.dtype } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Time64Builder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } } } func (b *Time64Builder) Append(v arrow.Time64) { b.Reserve(1) b.UnsafeAppend(v) } func (b *Time64Builder) AppendNull() { b.Reserve(1) b.UnsafeAppendBoolToBitmap(false) } func (b *Time64Builder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *Time64Builder) AppendEmptyValue() { b.Append(0) } func (b *Time64Builder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *Time64Builder) UnsafeAppend(v arrow.Time64) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v b.length++ } func (b *Time64Builder) UnsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *Time64Builder) AppendValues(v []arrow.Time64, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) arrow.Time64Traits.Copy(b.rawData[b.length:], v) b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *Time64Builder) init(capacity int) { b.builder.init(capacity) b.data = memory.NewResizableBuffer(b.mem) bytesN := arrow.Time64Traits.BytesRequired(capacity) b.data.Resize(bytesN) b.rawData = arrow.Time64Traits.CastFromBytes(b.data.Bytes()) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *Time64Builder) Reserve(n int) { b.builder.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *Time64Builder) Resize(n int) { nBuilder := n if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(nBuilder, b.init) b.data.Resize(arrow.Time64Traits.BytesRequired(n)) b.rawData = arrow.Time64Traits.CastFromBytes(b.data.Bytes()) } } func (b *Time64Builder) Value(i int) arrow.Time64 { return b.rawData[i] } // NewArray creates a Time64 array from the memory buffers used by the builder and resets the Time64Builder // so it can be used to build a new array. func (b *Time64Builder) NewArray() arrow.Array { return b.NewTime64Array() } // NewTime64Array creates a Time64 array from the memory buffers used by the builder and resets the Time64Builder // so it can be used to build a new array. func (b *Time64Builder) NewTime64Array() (a *Time64) { data := b.newData() a = NewTime64Data(data) data.Release() return } func (b *Time64Builder) newData() (data *Data) { bytesRequired := arrow.Time64Traits.BytesRequired(b.length) if bytesRequired > 0 && bytesRequired < b.data.Len() { // trim buffers b.data.Resize(bytesRequired) } data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) b.reset() if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } return } func (b *Time64Builder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } val, err := arrow.Time64FromString(s, b.dtype.Unit) if err != nil { b.AppendNull() return err } b.Append(val) return nil } func (b *Time64Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch v := t.(type) { case nil: b.AppendNull() case string: tm, err := arrow.Time64FromString(v, b.dtype.Unit) if err != nil { return &json.UnmarshalTypeError{ Value: v, Type: reflect.TypeOf(arrow.Time64(0)), Offset: dec.InputOffset(), } } b.Append(tm) case json.Number: n, err := v.Int64() if err != nil { return &json.UnmarshalTypeError{ Value: v.String(), Type: reflect.TypeOf(arrow.Time64(0)), Offset: dec.InputOffset(), } } b.Append(arrow.Time64(n)) case float64: b.Append(arrow.Time64(v)) default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Type: reflect.TypeOf(arrow.Time64(0)), Offset: dec.InputOffset(), } } return nil } func (b *Time64Builder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *Time64Builder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("binary builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } type Date32Builder struct { builder data *memory.Buffer rawData []arrow.Date32 } func NewDate32Builder(mem memory.Allocator) *Date32Builder { return &Date32Builder{builder: builder{refCount: 1, mem: mem}} } func (b *Date32Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Date32 } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Date32Builder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } } } func (b *Date32Builder) Append(v arrow.Date32) { b.Reserve(1) b.UnsafeAppend(v) } func (b *Date32Builder) AppendNull() { b.Reserve(1) b.UnsafeAppendBoolToBitmap(false) } func (b *Date32Builder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *Date32Builder) AppendEmptyValue() { b.Append(0) } func (b *Date32Builder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *Date32Builder) UnsafeAppend(v arrow.Date32) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v b.length++ } func (b *Date32Builder) UnsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *Date32Builder) AppendValues(v []arrow.Date32, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) arrow.Date32Traits.Copy(b.rawData[b.length:], v) b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *Date32Builder) init(capacity int) { b.builder.init(capacity) b.data = memory.NewResizableBuffer(b.mem) bytesN := arrow.Date32Traits.BytesRequired(capacity) b.data.Resize(bytesN) b.rawData = arrow.Date32Traits.CastFromBytes(b.data.Bytes()) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *Date32Builder) Reserve(n int) { b.builder.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *Date32Builder) Resize(n int) { nBuilder := n if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(nBuilder, b.init) b.data.Resize(arrow.Date32Traits.BytesRequired(n)) b.rawData = arrow.Date32Traits.CastFromBytes(b.data.Bytes()) } } func (b *Date32Builder) Value(i int) arrow.Date32 { return b.rawData[i] } // NewArray creates a Date32 array from the memory buffers used by the builder and resets the Date32Builder // so it can be used to build a new array. func (b *Date32Builder) NewArray() arrow.Array { return b.NewDate32Array() } // NewDate32Array creates a Date32 array from the memory buffers used by the builder and resets the Date32Builder // so it can be used to build a new array. func (b *Date32Builder) NewDate32Array() (a *Date32) { data := b.newData() a = NewDate32Data(data) data.Release() return } func (b *Date32Builder) newData() (data *Data) { bytesRequired := arrow.Date32Traits.BytesRequired(b.length) if bytesRequired > 0 && bytesRequired < b.data.Len() { // trim buffers b.data.Resize(bytesRequired) } data = NewData(arrow.PrimitiveTypes.Date32, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) b.reset() if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } return } func (b *Date32Builder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } tm, err := time.Parse("2006-01-02", s) if err != nil { b.AppendNull() return err } b.Append(arrow.Date32FromTime(tm)) return nil } func (b *Date32Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch v := t.(type) { case nil: b.AppendNull() case string: tm, err := time.Parse("2006-01-02", v) if err != nil { return &json.UnmarshalTypeError{ Value: v, Type: reflect.TypeOf(arrow.Date32(0)), Offset: dec.InputOffset(), } } b.Append(arrow.Date32FromTime(tm)) case json.Number: n, err := v.Int64() if err != nil { return &json.UnmarshalTypeError{ Value: v.String(), Type: reflect.TypeOf(arrow.Date32(0)), Offset: dec.InputOffset(), } } b.Append(arrow.Date32(n)) case float64: b.Append(arrow.Date32(v)) default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Type: reflect.TypeOf(arrow.Date32(0)), Offset: dec.InputOffset(), } } return nil } func (b *Date32Builder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *Date32Builder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("binary builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } type Date64Builder struct { builder data *memory.Buffer rawData []arrow.Date64 } func NewDate64Builder(mem memory.Allocator) *Date64Builder { return &Date64Builder{builder: builder{refCount: 1, mem: mem}} } func (b *Date64Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Date64 } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Date64Builder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } } } func (b *Date64Builder) Append(v arrow.Date64) { b.Reserve(1) b.UnsafeAppend(v) } func (b *Date64Builder) AppendNull() { b.Reserve(1) b.UnsafeAppendBoolToBitmap(false) } func (b *Date64Builder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *Date64Builder) AppendEmptyValue() { b.Append(0) } func (b *Date64Builder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *Date64Builder) UnsafeAppend(v arrow.Date64) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v b.length++ } func (b *Date64Builder) UnsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *Date64Builder) AppendValues(v []arrow.Date64, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) arrow.Date64Traits.Copy(b.rawData[b.length:], v) b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *Date64Builder) init(capacity int) { b.builder.init(capacity) b.data = memory.NewResizableBuffer(b.mem) bytesN := arrow.Date64Traits.BytesRequired(capacity) b.data.Resize(bytesN) b.rawData = arrow.Date64Traits.CastFromBytes(b.data.Bytes()) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *Date64Builder) Reserve(n int) { b.builder.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *Date64Builder) Resize(n int) { nBuilder := n if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(nBuilder, b.init) b.data.Resize(arrow.Date64Traits.BytesRequired(n)) b.rawData = arrow.Date64Traits.CastFromBytes(b.data.Bytes()) } } func (b *Date64Builder) Value(i int) arrow.Date64 { return b.rawData[i] } // NewArray creates a Date64 array from the memory buffers used by the builder and resets the Date64Builder // so it can be used to build a new array. func (b *Date64Builder) NewArray() arrow.Array { return b.NewDate64Array() } // NewDate64Array creates a Date64 array from the memory buffers used by the builder and resets the Date64Builder // so it can be used to build a new array. func (b *Date64Builder) NewDate64Array() (a *Date64) { data := b.newData() a = NewDate64Data(data) data.Release() return } func (b *Date64Builder) newData() (data *Data) { bytesRequired := arrow.Date64Traits.BytesRequired(b.length) if bytesRequired > 0 && bytesRequired < b.data.Len() { // trim buffers b.data.Resize(bytesRequired) } data = NewData(arrow.PrimitiveTypes.Date64, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) b.reset() if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } return } func (b *Date64Builder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } tm, err := time.Parse("2006-01-02", s) if err != nil { b.AppendNull() return err } b.Append(arrow.Date64FromTime(tm)) return nil } func (b *Date64Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch v := t.(type) { case nil: b.AppendNull() case string: tm, err := time.Parse("2006-01-02", v) if err != nil { return &json.UnmarshalTypeError{ Value: v, Type: reflect.TypeOf(arrow.Date64(0)), Offset: dec.InputOffset(), } } b.Append(arrow.Date64FromTime(tm)) case json.Number: n, err := v.Int64() if err != nil { return &json.UnmarshalTypeError{ Value: v.String(), Type: reflect.TypeOf(arrow.Date64(0)), Offset: dec.InputOffset(), } } b.Append(arrow.Date64(n)) case float64: b.Append(arrow.Date64(v)) default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Type: reflect.TypeOf(arrow.Date64(0)), Offset: dec.InputOffset(), } } return nil } func (b *Date64Builder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *Date64Builder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("binary builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } type DurationBuilder struct { builder dtype *arrow.DurationType data *memory.Buffer rawData []arrow.Duration } func NewDurationBuilder(mem memory.Allocator, dtype *arrow.DurationType) *DurationBuilder { return &DurationBuilder{builder: builder{refCount: 1, mem: mem}, dtype: dtype} } func (b *DurationBuilder) Type() arrow.DataType { return b.dtype } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *DurationBuilder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } } } func (b *DurationBuilder) Append(v arrow.Duration) { b.Reserve(1) b.UnsafeAppend(v) } func (b *DurationBuilder) AppendNull() { b.Reserve(1) b.UnsafeAppendBoolToBitmap(false) } func (b *DurationBuilder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *DurationBuilder) AppendEmptyValue() { b.Append(0) } func (b *DurationBuilder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *DurationBuilder) UnsafeAppend(v arrow.Duration) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v b.length++ } func (b *DurationBuilder) UnsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *DurationBuilder) AppendValues(v []arrow.Duration, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) arrow.DurationTraits.Copy(b.rawData[b.length:], v) b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *DurationBuilder) init(capacity int) { b.builder.init(capacity) b.data = memory.NewResizableBuffer(b.mem) bytesN := arrow.DurationTraits.BytesRequired(capacity) b.data.Resize(bytesN) b.rawData = arrow.DurationTraits.CastFromBytes(b.data.Bytes()) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *DurationBuilder) Reserve(n int) { b.builder.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *DurationBuilder) Resize(n int) { nBuilder := n if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(nBuilder, b.init) b.data.Resize(arrow.DurationTraits.BytesRequired(n)) b.rawData = arrow.DurationTraits.CastFromBytes(b.data.Bytes()) } } func (b *DurationBuilder) Value(i int) arrow.Duration { return b.rawData[i] } // NewArray creates a Duration array from the memory buffers used by the builder and resets the DurationBuilder // so it can be used to build a new array. func (b *DurationBuilder) NewArray() arrow.Array { return b.NewDurationArray() } // NewDurationArray creates a Duration array from the memory buffers used by the builder and resets the DurationBuilder // so it can be used to build a new array. func (b *DurationBuilder) NewDurationArray() (a *Duration) { data := b.newData() a = NewDurationData(data) data.Release() return } func (b *DurationBuilder) newData() (data *Data) { bytesRequired := arrow.DurationTraits.BytesRequired(b.length) if bytesRequired > 0 && bytesRequired < b.data.Len() { // trim buffers b.data.Resize(bytesRequired) } data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) b.reset() if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } return } func (b *DurationBuilder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } dur, err := time.ParseDuration(s) if err != nil { return err } b.Append(arrow.Duration(dur / b.dtype.Unit.Multiplier())) return nil } func (b *DurationBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch v := t.(type) { case nil: b.AppendNull() case json.Number: n, err := v.Int64() if err != nil { return &json.UnmarshalTypeError{ Value: v.String(), Type: reflect.TypeOf(arrow.Duration(0)), Offset: dec.InputOffset(), } } b.Append(arrow.Duration(n)) case float64: b.Append(arrow.Duration(v)) case string: // be flexible for specifying durations by accepting forms like // 3h2m0.5s regardless of the unit and converting it to the proper // precision. val, err := time.ParseDuration(v) if err != nil { // if we got an error, maybe it was because the attempt to create // a time.Duration (int64) in nanoseconds would overflow. check if // the string is just a large number followed by the unit suffix if strings.HasSuffix(v, b.dtype.Unit.String()) { value, err := strconv.ParseInt(v[:len(v)-len(b.dtype.Unit.String())], 10, 64) if err == nil { b.Append(arrow.Duration(value)) break } } return &json.UnmarshalTypeError{ Value: v, Type: reflect.TypeOf(arrow.Duration(0)), Offset: dec.InputOffset(), } } switch b.dtype.Unit { case arrow.Nanosecond: b.Append(arrow.Duration(val.Nanoseconds())) case arrow.Microsecond: b.Append(arrow.Duration(val.Microseconds())) case arrow.Millisecond: b.Append(arrow.Duration(val.Milliseconds())) case arrow.Second: b.Append(arrow.Duration(val.Seconds())) } default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Type: reflect.TypeOf(arrow.Duration(0)), Offset: dec.InputOffset(), } } return nil } func (b *DurationBuilder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *DurationBuilder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("binary builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } var ( _ Builder = (*Int64Builder)(nil) _ Builder = (*Uint64Builder)(nil) _ Builder = (*Float64Builder)(nil) _ Builder = (*Int32Builder)(nil) _ Builder = (*Uint32Builder)(nil) _ Builder = (*Float32Builder)(nil) _ Builder = (*Int16Builder)(nil) _ Builder = (*Uint16Builder)(nil) _ Builder = (*Int8Builder)(nil) _ Builder = (*Uint8Builder)(nil) _ Builder = (*Time32Builder)(nil) _ Builder = (*Time64Builder)(nil) _ Builder = (*Date32Builder)(nil) _ Builder = (*Date64Builder)(nil) _ Builder = (*DurationBuilder)(nil) ) arrow-go-18.2.0/arrow/array/numericbuilder.gen.go.tmpl000066400000000000000000000274511476434502500227150ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" ) {{range .In}} type {{.Name}}Builder struct { builder {{if .Opt.Parametric -}} dtype *arrow.{{.Name}}Type {{end -}} data *memory.Buffer rawData []{{or .QualifiedType .Type}} } {{if .Opt.Parametric}} func New{{.Name}}Builder(mem memory.Allocator, dtype *arrow.{{.Name}}Type) *{{.Name}}Builder { return &{{.Name}}Builder{builder: builder{refCount:1, mem: mem}, dtype: dtype} } func (b *{{.Name}}Builder) Type() arrow.DataType { return b.dtype } {{else}} func New{{.Name}}Builder(mem memory.Allocator) *{{.Name}}Builder { return &{{.Name}}Builder{builder: builder{refCount:1, mem: mem}} } func (b *{{.Name}}Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.{{.Name}} } {{end}} // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *{{.Name}}Builder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } } } func (b *{{.Name}}Builder) Append(v {{or .QualifiedType .Type}}) { b.Reserve(1) b.UnsafeAppend(v) } func (b *{{.Name}}Builder) AppendNull() { b.Reserve(1) b.UnsafeAppendBoolToBitmap(false) } func (b *{{.Name}}Builder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *{{.Name}}Builder) AppendEmptyValue() { b.Append(0) } func (b *{{.Name}}Builder) AppendEmptyValues(n int) { for i := 0; i < n; i ++ { b.AppendEmptyValue() } } func (b *{{.Name}}Builder) UnsafeAppend(v {{or .QualifiedType .Type}}) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v b.length++ } func (b *{{.Name}}Builder) UnsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *{{.Name}}Builder) AppendValues(v []{{or .QualifiedType .Type}}, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) arrow.{{.Name}}Traits.Copy(b.rawData[b.length:], v) b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *{{.Name}}Builder) init(capacity int) { b.builder.init(capacity) b.data = memory.NewResizableBuffer(b.mem) bytesN := arrow.{{.Name}}Traits.BytesRequired(capacity) b.data.Resize(bytesN) b.rawData = arrow.{{.Name}}Traits.CastFromBytes(b.data.Bytes()) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *{{.Name}}Builder) Reserve(n int) { b.builder.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *{{.Name}}Builder) Resize(n int) { nBuilder := n if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(nBuilder, b.init) b.data.Resize(arrow.{{.Name}}Traits.BytesRequired(n)) b.rawData = arrow.{{.Name}}Traits.CastFromBytes(b.data.Bytes()) } } func (b *{{.Name}}Builder) Value(i int) {{or .QualifiedType .Type}} { return b.rawData[i] } // NewArray creates a {{.Name}} array from the memory buffers used by the builder and resets the {{.Name}}Builder // so it can be used to build a new array. func (b *{{.Name}}Builder) NewArray() arrow.Array { return b.New{{.Name}}Array() } // New{{.Name}}Array creates a {{.Name}} array from the memory buffers used by the builder and resets the {{.Name}}Builder // so it can be used to build a new array. func (b *{{.Name}}Builder) New{{.Name}}Array() (a *{{.Name}}) { data := b.newData() a = New{{.Name}}Data(data) data.Release() return } func (b *{{.Name}}Builder) newData() (data *Data) { bytesRequired := arrow.{{.Name}}Traits.BytesRequired(b.length) if bytesRequired > 0 && bytesRequired < b.data.Len() { // trim buffers b.data.Resize(bytesRequired) } {{if .Opt.Parametric -}} data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) {{else -}} data = NewData(arrow.PrimitiveTypes.{{.Name}}, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) {{end -}} b.reset() if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } return } func (b *{{.Name}}Builder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } {{if or (eq .Name "Date32") -}} tm, err := time.Parse("2006-01-02", s) if err != nil { b.AppendNull() return err } b.Append(arrow.Date32FromTime(tm)) {{else if or (eq .Name "Date64") -}} tm, err := time.Parse("2006-01-02", s) if err != nil { b.AppendNull() return err } b.Append(arrow.Date64FromTime(tm)) {{else if or (eq .Name "Time32") -}} val, err := arrow.Time32FromString(s, b.dtype.Unit) if err != nil { b.AppendNull() return err } b.Append(val) {{else if or (eq .Name "Time64") -}} val, err := arrow.Time64FromString(s, b.dtype.Unit) if err != nil { b.AppendNull() return err } b.Append(val) {{else if (eq .Name "Duration") -}} dur, err := time.ParseDuration(s) if err != nil { return err } b.Append(arrow.Duration(dur / b.dtype.Unit.Multiplier())) {{else if or (eq .Name "Int8") (eq .Name "Int16") (eq .Name "Int32") (eq .Name "Int64") -}} v, err := strconv.ParseInt(s, 10, {{.Size}} * 8) if err != nil { b.AppendNull() return err } b.Append({{.name}}(v)) {{else if or (eq .Name "Uint8") (eq .Name "Uint16") (eq .Name "Uint32") (eq .Name "Uint64") -}} v, err := strconv.ParseUint(s, 10, {{.Size}} * 8) if err != nil { b.AppendNull() return err } b.Append({{.name}}(v)) {{else if or (eq .Name "Float32") (eq .Name "Float64") -}} v, err := strconv.ParseFloat(s, {{.Size}} * 8) if err != nil { b.AppendNull() return err } b.Append({{.name}}(v)) {{end -}} return nil } func (b *{{.Name}}Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch v := t.(type) { case nil: b.AppendNull() {{if or (eq .Name "Date32") (eq .Name "Date64") -}} case string: tm, err := time.Parse("2006-01-02", v) if err != nil { return &json.UnmarshalTypeError{ Value: v, Type: reflect.TypeOf({{.QualifiedType}}(0)), Offset: dec.InputOffset(), } } b.Append({{.QualifiedType}}FromTime(tm)) case json.Number: n, err := v.Int64() if err != nil { return &json.UnmarshalTypeError{ Value: v.String(), Type: reflect.TypeOf({{.QualifiedType}}(0)), Offset: dec.InputOffset(), } } b.Append({{.QualifiedType}}(n)) case float64: b.Append({{.QualifiedType}}(v)) {{else if or (eq .Name "Time32") (eq .Name "Time64") -}} case string: tm, err := {{.QualifiedType}}FromString(v, b.dtype.Unit) if err != nil { return &json.UnmarshalTypeError{ Value: v, Type: reflect.TypeOf({{.QualifiedType}}(0)), Offset: dec.InputOffset(), } } b.Append(tm) case json.Number: n, err := v.Int64() if err != nil { return &json.UnmarshalTypeError{ Value: v.String(), Type: reflect.TypeOf({{.QualifiedType}}(0)), Offset: dec.InputOffset(), } } b.Append({{.QualifiedType}}(n)) case float64: b.Append({{.QualifiedType}}(v)) {{else if eq .Name "Duration" -}} case json.Number: n, err := v.Int64() if err != nil { return &json.UnmarshalTypeError{ Value: v.String(), Type: reflect.TypeOf({{.QualifiedType}}(0)), Offset: dec.InputOffset(), } } b.Append({{.QualifiedType}}(n)) case float64: b.Append({{.QualifiedType}}(v)) case string: // be flexible for specifying durations by accepting forms like // 3h2m0.5s regardless of the unit and converting it to the proper // precision. val, err := time.ParseDuration(v) if err != nil { // if we got an error, maybe it was because the attempt to create // a time.Duration (int64) in nanoseconds would overflow. check if // the string is just a large number followed by the unit suffix if strings.HasSuffix(v, b.dtype.Unit.String()) { value, err := strconv.ParseInt(v[:len(v)-len(b.dtype.Unit.String())], 10, 64) if err == nil { b.Append(arrow.Duration(value)) break } } return &json.UnmarshalTypeError{ Value: v, Type: reflect.TypeOf({{.QualifiedType}}(0)), Offset: dec.InputOffset(), } } switch b.dtype.Unit { case arrow.Nanosecond: b.Append({{.QualifiedType}}(val.Nanoseconds())) case arrow.Microsecond: b.Append({{.QualifiedType}}(val.Microseconds())) case arrow.Millisecond: b.Append({{.QualifiedType}}(val.Milliseconds())) case arrow.Second: b.Append({{.QualifiedType}}(val.Seconds())) } {{else}} case string: {{if or (eq .Name "Float32") (eq .Name "Float64") -}} f, err := strconv.ParseFloat(v, {{.Size}}*8) {{else if eq (printf "%.1s" .Name) "U" -}} f, err := strconv.ParseUint(v, 10, {{.Size}}*8) {{else -}} f, err := strconv.ParseInt(v, 10, {{.Size}}*8) {{end -}} if err != nil { return &json.UnmarshalTypeError{ Value: v, Type: reflect.TypeOf({{.name}}(0)), Offset: dec.InputOffset(), } } b.Append({{.name}}(f)) case float64: b.Append({{.name}}(v)) case json.Number: {{if or (eq .Name "Float32") (eq .Name "Float64") -}} f, err := strconv.ParseFloat(v.String(), {{.Size}}*8) {{else if eq (printf "%.1s" .Name) "U" -}} f, err := strconv.ParseUint(v.String(), 10, {{.Size}}*8) {{else -}} f, err := strconv.ParseInt(v.String(), 10, {{.Size}}*8) {{end -}} if err != nil { return &json.UnmarshalTypeError{ Value: v.String(), Type: reflect.TypeOf({{.name}}(0)), Offset: dec.InputOffset(), } } b.Append({{.name}}(f)) {{end}} default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Type: reflect.TypeOf({{or .QualifiedType .Type}}(0)), Offset: dec.InputOffset(), } } return nil } func (b *{{.Name}}Builder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *{{.Name}}Builder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("binary builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } {{end}} var ( {{- range .In}} _ Builder = (*{{.Name}}Builder)(nil) {{- end}} ) arrow-go-18.2.0/arrow/array/numericbuilder.gen_test.go000066400000000000000000002152421476434502500227760ustar00rootroot00000000000000// Code generated by array/numericbuilder.gen_test.go.tmpl. DO NOT EDIT. // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "math" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) func TestInt64StringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) b := array.NewInt64Builder(mem) defer b.Release() b.Append(1) b.Append(2) b.Append(3) b.AppendNull() b.Append(5) b.Append(6) b.AppendNull() b.Append(8) b.Append(9) b.Append(10) arr := b.NewArray().(*array.Int64) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewInt64Builder(mem) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.Int64) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } func TestNewInt64Builder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewInt64Builder(mem) defer ab.Release() ab.Retain() ab.Release() ab.Append(1) ab.Append(2) ab.Append(3) ab.AppendNull() ab.Append(5) ab.Append(6) ab.AppendNull() ab.Append(8) ab.Append(9) ab.Append(10) // check state of builder before NewInt64Array assert.Equal(t, 10, ab.Len(), "unexpected Len()") assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") a := ab.NewInt64Array() // check state of builder after NewInt64Array assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewInt64Array did not reset state") assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewInt64Array did not reset state") assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewInt64Array did not reset state") // check state of array assert.Equal(t, 2, a.NullN(), "unexpected null count") assert.Equal(t, []int64{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Int64Values(), "unexpected Int64Values") assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity assert.Len(t, a.Int64Values(), 10, "unexpected length of Int64Values") a.Release() ab.Append(7) ab.Append(8) a = ab.NewInt64Array() assert.Equal(t, 0, a.NullN()) assert.Equal(t, []int64{7, 8}, a.Int64Values()) assert.Len(t, a.Int64Values(), 2) a.Release() var ( want = []int64{1, 2, 3, 4} valids = []bool{true, true, false, true} ) ab.AppendValues(want, valids) a = ab.NewInt64Array() sub := array.MakeFromData(a.Data()) defer sub.Release() if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { t.Fatalf("invalid type: got=%q, want=%q", got, want) } if _, ok := sub.(*array.Int64); !ok { t.Fatalf("could not type-assert to array.Int64") } if got, want := a.String(), `[1 2 (null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } slice := array.NewSliceData(a.Data(), 2, 4) defer slice.Release() sub1 := array.MakeFromData(slice) defer sub1.Release() v, ok := sub1.(*array.Int64) if !ok { t.Fatalf("could not type-assert to array.Int64") } if got, want := v.String(), `[(null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } a.Release() } func TestInt64Builder_AppendValues(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewInt64Builder(mem) defer ab.Release() exp := []int64{0, 1, 2, 3} ab.AppendValues(exp, nil) a := ab.NewInt64Array() assert.Equal(t, exp, a.Int64Values()) a.Release() } func TestInt64Builder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewInt64Builder(mem) defer ab.Release() exp := []int64{0, 1, 2, 3} ab.AppendValues([]int64{}, nil) a := ab.NewInt64Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(nil, nil) a = ab.NewInt64Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues([]int64{}, nil) ab.AppendValues(exp, nil) a = ab.NewInt64Array() assert.Equal(t, exp, a.Int64Values()) a.Release() ab.AppendValues(exp, nil) ab.AppendValues([]int64{}, nil) a = ab.NewInt64Array() assert.Equal(t, exp, a.Int64Values()) a.Release() } func TestInt64Builder_Resize(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewInt64Builder(mem) defer ab.Release() assert.Equal(t, 0, ab.Cap()) assert.Equal(t, 0, ab.Len()) ab.Reserve(63) assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 0, ab.Len()) for i := 0; i < 63; i++ { ab.Append(0) } assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 63, ab.Len()) ab.Resize(5) assert.Equal(t, 5, ab.Len()) ab.Resize(32) assert.Equal(t, 5, ab.Len()) } func TestUint64StringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) b := array.NewUint64Builder(mem) defer b.Release() b.Append(1) b.Append(2) b.Append(3) b.AppendNull() b.Append(5) b.Append(6) b.AppendNull() b.Append(8) b.Append(9) b.Append(10) arr := b.NewArray().(*array.Uint64) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewUint64Builder(mem) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.Uint64) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } func TestNewUint64Builder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewUint64Builder(mem) defer ab.Release() ab.Retain() ab.Release() ab.Append(1) ab.Append(2) ab.Append(3) ab.AppendNull() ab.Append(5) ab.Append(6) ab.AppendNull() ab.Append(8) ab.Append(9) ab.Append(10) // check state of builder before NewUint64Array assert.Equal(t, 10, ab.Len(), "unexpected Len()") assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") a := ab.NewUint64Array() // check state of builder after NewUint64Array assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewUint64Array did not reset state") assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewUint64Array did not reset state") assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewUint64Array did not reset state") // check state of array assert.Equal(t, 2, a.NullN(), "unexpected null count") assert.Equal(t, []uint64{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Uint64Values(), "unexpected Uint64Values") assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity assert.Len(t, a.Uint64Values(), 10, "unexpected length of Uint64Values") a.Release() ab.Append(7) ab.Append(8) a = ab.NewUint64Array() assert.Equal(t, 0, a.NullN()) assert.Equal(t, []uint64{7, 8}, a.Uint64Values()) assert.Len(t, a.Uint64Values(), 2) a.Release() var ( want = []uint64{1, 2, 3, 4} valids = []bool{true, true, false, true} ) ab.AppendValues(want, valids) a = ab.NewUint64Array() sub := array.MakeFromData(a.Data()) defer sub.Release() if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { t.Fatalf("invalid type: got=%q, want=%q", got, want) } if _, ok := sub.(*array.Uint64); !ok { t.Fatalf("could not type-assert to array.Uint64") } if got, want := a.String(), `[1 2 (null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } slice := array.NewSliceData(a.Data(), 2, 4) defer slice.Release() sub1 := array.MakeFromData(slice) defer sub1.Release() v, ok := sub1.(*array.Uint64) if !ok { t.Fatalf("could not type-assert to array.Uint64") } if got, want := v.String(), `[(null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } a.Release() } func TestUint64Builder_AppendValues(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewUint64Builder(mem) defer ab.Release() exp := []uint64{0, 1, 2, 3} ab.AppendValues(exp, nil) a := ab.NewUint64Array() assert.Equal(t, exp, a.Uint64Values()) a.Release() } func TestUint64Builder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewUint64Builder(mem) defer ab.Release() exp := []uint64{0, 1, 2, 3} ab.AppendValues([]uint64{}, nil) a := ab.NewUint64Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(nil, nil) a = ab.NewUint64Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues([]uint64{}, nil) ab.AppendValues(exp, nil) a = ab.NewUint64Array() assert.Equal(t, exp, a.Uint64Values()) a.Release() ab.AppendValues(exp, nil) ab.AppendValues([]uint64{}, nil) a = ab.NewUint64Array() assert.Equal(t, exp, a.Uint64Values()) a.Release() } func TestUint64Builder_Resize(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewUint64Builder(mem) defer ab.Release() assert.Equal(t, 0, ab.Cap()) assert.Equal(t, 0, ab.Len()) ab.Reserve(63) assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 0, ab.Len()) for i := 0; i < 63; i++ { ab.Append(0) } assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 63, ab.Len()) ab.Resize(5) assert.Equal(t, 5, ab.Len()) ab.Resize(32) assert.Equal(t, 5, ab.Len()) } func TestFloat64StringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) b := array.NewFloat64Builder(mem) defer b.Release() b.Append(1) b.Append(2) b.Append(3) b.AppendNull() b.Append(5) b.Append(6) b.AppendNull() b.Append(8) b.Append(9) b.Append(10) arr := b.NewArray().(*array.Float64) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewFloat64Builder(mem) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.Float64) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } func TestNewFloat64Builder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewFloat64Builder(mem) defer ab.Release() ab.Retain() ab.Release() ab.Append(1) ab.Append(2) ab.Append(3) ab.AppendNull() ab.Append(5) ab.Append(6) ab.AppendNull() ab.Append(8) ab.Append(9) ab.Append(10) // check state of builder before NewFloat64Array assert.Equal(t, 10, ab.Len(), "unexpected Len()") assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") a := ab.NewFloat64Array() // check state of builder after NewFloat64Array assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewFloat64Array did not reset state") assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewFloat64Array did not reset state") assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewFloat64Array did not reset state") // check state of array assert.Equal(t, 2, a.NullN(), "unexpected null count") assert.Equal(t, []float64{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Float64Values(), "unexpected Float64Values") assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity assert.Len(t, a.Float64Values(), 10, "unexpected length of Float64Values") a.Release() ab.Append(7) ab.Append(8) a = ab.NewFloat64Array() assert.Equal(t, 0, a.NullN()) assert.Equal(t, []float64{7, 8}, a.Float64Values()) assert.Len(t, a.Float64Values(), 2) a.Release() var ( want = []float64{1, 2, 3, 4} valids = []bool{true, true, false, true} ) ab.AppendValues(want, valids) a = ab.NewFloat64Array() sub := array.MakeFromData(a.Data()) defer sub.Release() if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { t.Fatalf("invalid type: got=%q, want=%q", got, want) } if _, ok := sub.(*array.Float64); !ok { t.Fatalf("could not type-assert to array.Float64") } if got, want := a.String(), `[1 2 (null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } slice := array.NewSliceData(a.Data(), 2, 4) defer slice.Release() sub1 := array.MakeFromData(slice) defer sub1.Release() v, ok := sub1.(*array.Float64) if !ok { t.Fatalf("could not type-assert to array.Float64") } if got, want := v.String(), `[(null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } a.Release() } func TestFloat64Builder_AppendValues(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewFloat64Builder(mem) defer ab.Release() exp := []float64{0, 1, 2, 3} ab.AppendValues(exp, nil) a := ab.NewFloat64Array() assert.Equal(t, exp, a.Float64Values()) a.Release() } func TestFloat64Builder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewFloat64Builder(mem) defer ab.Release() exp := []float64{0, 1, 2, 3} ab.AppendValues([]float64{}, nil) a := ab.NewFloat64Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(nil, nil) a = ab.NewFloat64Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues([]float64{}, nil) ab.AppendValues(exp, nil) a = ab.NewFloat64Array() assert.Equal(t, exp, a.Float64Values()) a.Release() ab.AppendValues(exp, nil) ab.AppendValues([]float64{}, nil) a = ab.NewFloat64Array() assert.Equal(t, exp, a.Float64Values()) a.Release() } func TestFloat64Builder_Resize(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewFloat64Builder(mem) defer ab.Release() assert.Equal(t, 0, ab.Cap()) assert.Equal(t, 0, ab.Len()) ab.Reserve(63) assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 0, ab.Len()) for i := 0; i < 63; i++ { ab.Append(0) } assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 63, ab.Len()) ab.Resize(5) assert.Equal(t, 5, ab.Len()) ab.Resize(32) assert.Equal(t, 5, ab.Len()) } func TestFloat64BuilderUnmarshalJSON(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) bldr := array.NewFloat64Builder(mem) defer bldr.Release() jsonstr := `[0, 1, "+Inf", 2, 3, "NaN", "NaN", 4, 5, "-Inf"]` err := bldr.UnmarshalJSON([]byte(jsonstr)) assert.NoError(t, err) arr := bldr.NewFloat64Array() defer arr.Release() assert.NotNil(t, arr) assert.False(t, math.IsInf(float64(arr.Value(0)), 0), arr.Value(0)) assert.True(t, math.IsInf(float64(arr.Value(2)), 1), arr.Value(2)) assert.True(t, math.IsNaN(float64(arr.Value(5))), arr.Value(5)) } func TestInt32StringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) b := array.NewInt32Builder(mem) defer b.Release() b.Append(1) b.Append(2) b.Append(3) b.AppendNull() b.Append(5) b.Append(6) b.AppendNull() b.Append(8) b.Append(9) b.Append(10) arr := b.NewArray().(*array.Int32) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewInt32Builder(mem) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.Int32) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } func TestNewInt32Builder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewInt32Builder(mem) defer ab.Release() ab.Retain() ab.Release() ab.Append(1) ab.Append(2) ab.Append(3) ab.AppendNull() ab.Append(5) ab.Append(6) ab.AppendNull() ab.Append(8) ab.Append(9) ab.Append(10) // check state of builder before NewInt32Array assert.Equal(t, 10, ab.Len(), "unexpected Len()") assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") a := ab.NewInt32Array() // check state of builder after NewInt32Array assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewInt32Array did not reset state") assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewInt32Array did not reset state") assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewInt32Array did not reset state") // check state of array assert.Equal(t, 2, a.NullN(), "unexpected null count") assert.Equal(t, []int32{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Int32Values(), "unexpected Int32Values") assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity assert.Len(t, a.Int32Values(), 10, "unexpected length of Int32Values") a.Release() ab.Append(7) ab.Append(8) a = ab.NewInt32Array() assert.Equal(t, 0, a.NullN()) assert.Equal(t, []int32{7, 8}, a.Int32Values()) assert.Len(t, a.Int32Values(), 2) a.Release() var ( want = []int32{1, 2, 3, 4} valids = []bool{true, true, false, true} ) ab.AppendValues(want, valids) a = ab.NewInt32Array() sub := array.MakeFromData(a.Data()) defer sub.Release() if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { t.Fatalf("invalid type: got=%q, want=%q", got, want) } if _, ok := sub.(*array.Int32); !ok { t.Fatalf("could not type-assert to array.Int32") } if got, want := a.String(), `[1 2 (null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } slice := array.NewSliceData(a.Data(), 2, 4) defer slice.Release() sub1 := array.MakeFromData(slice) defer sub1.Release() v, ok := sub1.(*array.Int32) if !ok { t.Fatalf("could not type-assert to array.Int32") } if got, want := v.String(), `[(null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } a.Release() } func TestInt32Builder_AppendValues(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewInt32Builder(mem) defer ab.Release() exp := []int32{0, 1, 2, 3} ab.AppendValues(exp, nil) a := ab.NewInt32Array() assert.Equal(t, exp, a.Int32Values()) a.Release() } func TestInt32Builder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewInt32Builder(mem) defer ab.Release() exp := []int32{0, 1, 2, 3} ab.AppendValues([]int32{}, nil) a := ab.NewInt32Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(nil, nil) a = ab.NewInt32Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues([]int32{}, nil) ab.AppendValues(exp, nil) a = ab.NewInt32Array() assert.Equal(t, exp, a.Int32Values()) a.Release() ab.AppendValues(exp, nil) ab.AppendValues([]int32{}, nil) a = ab.NewInt32Array() assert.Equal(t, exp, a.Int32Values()) a.Release() } func TestInt32Builder_Resize(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewInt32Builder(mem) defer ab.Release() assert.Equal(t, 0, ab.Cap()) assert.Equal(t, 0, ab.Len()) ab.Reserve(63) assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 0, ab.Len()) for i := 0; i < 63; i++ { ab.Append(0) } assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 63, ab.Len()) ab.Resize(5) assert.Equal(t, 5, ab.Len()) ab.Resize(32) assert.Equal(t, 5, ab.Len()) } func TestUint32StringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) b := array.NewUint32Builder(mem) defer b.Release() b.Append(1) b.Append(2) b.Append(3) b.AppendNull() b.Append(5) b.Append(6) b.AppendNull() b.Append(8) b.Append(9) b.Append(10) arr := b.NewArray().(*array.Uint32) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewUint32Builder(mem) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.Uint32) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } func TestNewUint32Builder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewUint32Builder(mem) defer ab.Release() ab.Retain() ab.Release() ab.Append(1) ab.Append(2) ab.Append(3) ab.AppendNull() ab.Append(5) ab.Append(6) ab.AppendNull() ab.Append(8) ab.Append(9) ab.Append(10) // check state of builder before NewUint32Array assert.Equal(t, 10, ab.Len(), "unexpected Len()") assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") a := ab.NewUint32Array() // check state of builder after NewUint32Array assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewUint32Array did not reset state") assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewUint32Array did not reset state") assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewUint32Array did not reset state") // check state of array assert.Equal(t, 2, a.NullN(), "unexpected null count") assert.Equal(t, []uint32{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Uint32Values(), "unexpected Uint32Values") assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity assert.Len(t, a.Uint32Values(), 10, "unexpected length of Uint32Values") a.Release() ab.Append(7) ab.Append(8) a = ab.NewUint32Array() assert.Equal(t, 0, a.NullN()) assert.Equal(t, []uint32{7, 8}, a.Uint32Values()) assert.Len(t, a.Uint32Values(), 2) a.Release() var ( want = []uint32{1, 2, 3, 4} valids = []bool{true, true, false, true} ) ab.AppendValues(want, valids) a = ab.NewUint32Array() sub := array.MakeFromData(a.Data()) defer sub.Release() if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { t.Fatalf("invalid type: got=%q, want=%q", got, want) } if _, ok := sub.(*array.Uint32); !ok { t.Fatalf("could not type-assert to array.Uint32") } if got, want := a.String(), `[1 2 (null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } slice := array.NewSliceData(a.Data(), 2, 4) defer slice.Release() sub1 := array.MakeFromData(slice) defer sub1.Release() v, ok := sub1.(*array.Uint32) if !ok { t.Fatalf("could not type-assert to array.Uint32") } if got, want := v.String(), `[(null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } a.Release() } func TestUint32Builder_AppendValues(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewUint32Builder(mem) defer ab.Release() exp := []uint32{0, 1, 2, 3} ab.AppendValues(exp, nil) a := ab.NewUint32Array() assert.Equal(t, exp, a.Uint32Values()) a.Release() } func TestUint32Builder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewUint32Builder(mem) defer ab.Release() exp := []uint32{0, 1, 2, 3} ab.AppendValues([]uint32{}, nil) a := ab.NewUint32Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(nil, nil) a = ab.NewUint32Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues([]uint32{}, nil) ab.AppendValues(exp, nil) a = ab.NewUint32Array() assert.Equal(t, exp, a.Uint32Values()) a.Release() ab.AppendValues(exp, nil) ab.AppendValues([]uint32{}, nil) a = ab.NewUint32Array() assert.Equal(t, exp, a.Uint32Values()) a.Release() } func TestUint32Builder_Resize(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewUint32Builder(mem) defer ab.Release() assert.Equal(t, 0, ab.Cap()) assert.Equal(t, 0, ab.Len()) ab.Reserve(63) assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 0, ab.Len()) for i := 0; i < 63; i++ { ab.Append(0) } assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 63, ab.Len()) ab.Resize(5) assert.Equal(t, 5, ab.Len()) ab.Resize(32) assert.Equal(t, 5, ab.Len()) } func TestFloat32StringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) b := array.NewFloat32Builder(mem) defer b.Release() b.Append(1) b.Append(2) b.Append(3) b.AppendNull() b.Append(5) b.Append(6) b.AppendNull() b.Append(8) b.Append(9) b.Append(10) arr := b.NewArray().(*array.Float32) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewFloat32Builder(mem) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.Float32) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } func TestNewFloat32Builder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewFloat32Builder(mem) defer ab.Release() ab.Retain() ab.Release() ab.Append(1) ab.Append(2) ab.Append(3) ab.AppendNull() ab.Append(5) ab.Append(6) ab.AppendNull() ab.Append(8) ab.Append(9) ab.Append(10) // check state of builder before NewFloat32Array assert.Equal(t, 10, ab.Len(), "unexpected Len()") assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") a := ab.NewFloat32Array() // check state of builder after NewFloat32Array assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewFloat32Array did not reset state") assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewFloat32Array did not reset state") assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewFloat32Array did not reset state") // check state of array assert.Equal(t, 2, a.NullN(), "unexpected null count") assert.Equal(t, []float32{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Float32Values(), "unexpected Float32Values") assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity assert.Len(t, a.Float32Values(), 10, "unexpected length of Float32Values") a.Release() ab.Append(7) ab.Append(8) a = ab.NewFloat32Array() assert.Equal(t, 0, a.NullN()) assert.Equal(t, []float32{7, 8}, a.Float32Values()) assert.Len(t, a.Float32Values(), 2) a.Release() var ( want = []float32{1, 2, 3, 4} valids = []bool{true, true, false, true} ) ab.AppendValues(want, valids) a = ab.NewFloat32Array() sub := array.MakeFromData(a.Data()) defer sub.Release() if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { t.Fatalf("invalid type: got=%q, want=%q", got, want) } if _, ok := sub.(*array.Float32); !ok { t.Fatalf("could not type-assert to array.Float32") } if got, want := a.String(), `[1 2 (null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } slice := array.NewSliceData(a.Data(), 2, 4) defer slice.Release() sub1 := array.MakeFromData(slice) defer sub1.Release() v, ok := sub1.(*array.Float32) if !ok { t.Fatalf("could not type-assert to array.Float32") } if got, want := v.String(), `[(null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } a.Release() } func TestFloat32Builder_AppendValues(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewFloat32Builder(mem) defer ab.Release() exp := []float32{0, 1, 2, 3} ab.AppendValues(exp, nil) a := ab.NewFloat32Array() assert.Equal(t, exp, a.Float32Values()) a.Release() } func TestFloat32Builder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewFloat32Builder(mem) defer ab.Release() exp := []float32{0, 1, 2, 3} ab.AppendValues([]float32{}, nil) a := ab.NewFloat32Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(nil, nil) a = ab.NewFloat32Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues([]float32{}, nil) ab.AppendValues(exp, nil) a = ab.NewFloat32Array() assert.Equal(t, exp, a.Float32Values()) a.Release() ab.AppendValues(exp, nil) ab.AppendValues([]float32{}, nil) a = ab.NewFloat32Array() assert.Equal(t, exp, a.Float32Values()) a.Release() } func TestFloat32Builder_Resize(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewFloat32Builder(mem) defer ab.Release() assert.Equal(t, 0, ab.Cap()) assert.Equal(t, 0, ab.Len()) ab.Reserve(63) assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 0, ab.Len()) for i := 0; i < 63; i++ { ab.Append(0) } assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 63, ab.Len()) ab.Resize(5) assert.Equal(t, 5, ab.Len()) ab.Resize(32) assert.Equal(t, 5, ab.Len()) } func TestFloat32BuilderUnmarshalJSON(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) bldr := array.NewFloat32Builder(mem) defer bldr.Release() jsonstr := `[0, 1, "+Inf", 2, 3, "NaN", "NaN", 4, 5, "-Inf"]` err := bldr.UnmarshalJSON([]byte(jsonstr)) assert.NoError(t, err) arr := bldr.NewFloat32Array() defer arr.Release() assert.NotNil(t, arr) assert.False(t, math.IsInf(float64(arr.Value(0)), 0), arr.Value(0)) assert.True(t, math.IsInf(float64(arr.Value(2)), 1), arr.Value(2)) assert.True(t, math.IsNaN(float64(arr.Value(5))), arr.Value(5)) } func TestInt16StringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) b := array.NewInt16Builder(mem) defer b.Release() b.Append(1) b.Append(2) b.Append(3) b.AppendNull() b.Append(5) b.Append(6) b.AppendNull() b.Append(8) b.Append(9) b.Append(10) arr := b.NewArray().(*array.Int16) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewInt16Builder(mem) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.Int16) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } func TestNewInt16Builder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewInt16Builder(mem) defer ab.Release() ab.Retain() ab.Release() ab.Append(1) ab.Append(2) ab.Append(3) ab.AppendNull() ab.Append(5) ab.Append(6) ab.AppendNull() ab.Append(8) ab.Append(9) ab.Append(10) // check state of builder before NewInt16Array assert.Equal(t, 10, ab.Len(), "unexpected Len()") assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") a := ab.NewInt16Array() // check state of builder after NewInt16Array assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewInt16Array did not reset state") assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewInt16Array did not reset state") assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewInt16Array did not reset state") // check state of array assert.Equal(t, 2, a.NullN(), "unexpected null count") assert.Equal(t, []int16{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Int16Values(), "unexpected Int16Values") assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity assert.Len(t, a.Int16Values(), 10, "unexpected length of Int16Values") a.Release() ab.Append(7) ab.Append(8) a = ab.NewInt16Array() assert.Equal(t, 0, a.NullN()) assert.Equal(t, []int16{7, 8}, a.Int16Values()) assert.Len(t, a.Int16Values(), 2) a.Release() var ( want = []int16{1, 2, 3, 4} valids = []bool{true, true, false, true} ) ab.AppendValues(want, valids) a = ab.NewInt16Array() sub := array.MakeFromData(a.Data()) defer sub.Release() if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { t.Fatalf("invalid type: got=%q, want=%q", got, want) } if _, ok := sub.(*array.Int16); !ok { t.Fatalf("could not type-assert to array.Int16") } if got, want := a.String(), `[1 2 (null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } slice := array.NewSliceData(a.Data(), 2, 4) defer slice.Release() sub1 := array.MakeFromData(slice) defer sub1.Release() v, ok := sub1.(*array.Int16) if !ok { t.Fatalf("could not type-assert to array.Int16") } if got, want := v.String(), `[(null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } a.Release() } func TestInt16Builder_AppendValues(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewInt16Builder(mem) defer ab.Release() exp := []int16{0, 1, 2, 3} ab.AppendValues(exp, nil) a := ab.NewInt16Array() assert.Equal(t, exp, a.Int16Values()) a.Release() } func TestInt16Builder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewInt16Builder(mem) defer ab.Release() exp := []int16{0, 1, 2, 3} ab.AppendValues([]int16{}, nil) a := ab.NewInt16Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(nil, nil) a = ab.NewInt16Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues([]int16{}, nil) ab.AppendValues(exp, nil) a = ab.NewInt16Array() assert.Equal(t, exp, a.Int16Values()) a.Release() ab.AppendValues(exp, nil) ab.AppendValues([]int16{}, nil) a = ab.NewInt16Array() assert.Equal(t, exp, a.Int16Values()) a.Release() } func TestInt16Builder_Resize(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewInt16Builder(mem) defer ab.Release() assert.Equal(t, 0, ab.Cap()) assert.Equal(t, 0, ab.Len()) ab.Reserve(63) assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 0, ab.Len()) for i := 0; i < 63; i++ { ab.Append(0) } assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 63, ab.Len()) ab.Resize(5) assert.Equal(t, 5, ab.Len()) ab.Resize(32) assert.Equal(t, 5, ab.Len()) } func TestUint16StringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) b := array.NewUint16Builder(mem) defer b.Release() b.Append(1) b.Append(2) b.Append(3) b.AppendNull() b.Append(5) b.Append(6) b.AppendNull() b.Append(8) b.Append(9) b.Append(10) arr := b.NewArray().(*array.Uint16) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewUint16Builder(mem) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.Uint16) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } func TestNewUint16Builder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewUint16Builder(mem) defer ab.Release() ab.Retain() ab.Release() ab.Append(1) ab.Append(2) ab.Append(3) ab.AppendNull() ab.Append(5) ab.Append(6) ab.AppendNull() ab.Append(8) ab.Append(9) ab.Append(10) // check state of builder before NewUint16Array assert.Equal(t, 10, ab.Len(), "unexpected Len()") assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") a := ab.NewUint16Array() // check state of builder after NewUint16Array assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewUint16Array did not reset state") assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewUint16Array did not reset state") assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewUint16Array did not reset state") // check state of array assert.Equal(t, 2, a.NullN(), "unexpected null count") assert.Equal(t, []uint16{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Uint16Values(), "unexpected Uint16Values") assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity assert.Len(t, a.Uint16Values(), 10, "unexpected length of Uint16Values") a.Release() ab.Append(7) ab.Append(8) a = ab.NewUint16Array() assert.Equal(t, 0, a.NullN()) assert.Equal(t, []uint16{7, 8}, a.Uint16Values()) assert.Len(t, a.Uint16Values(), 2) a.Release() var ( want = []uint16{1, 2, 3, 4} valids = []bool{true, true, false, true} ) ab.AppendValues(want, valids) a = ab.NewUint16Array() sub := array.MakeFromData(a.Data()) defer sub.Release() if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { t.Fatalf("invalid type: got=%q, want=%q", got, want) } if _, ok := sub.(*array.Uint16); !ok { t.Fatalf("could not type-assert to array.Uint16") } if got, want := a.String(), `[1 2 (null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } slice := array.NewSliceData(a.Data(), 2, 4) defer slice.Release() sub1 := array.MakeFromData(slice) defer sub1.Release() v, ok := sub1.(*array.Uint16) if !ok { t.Fatalf("could not type-assert to array.Uint16") } if got, want := v.String(), `[(null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } a.Release() } func TestUint16Builder_AppendValues(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewUint16Builder(mem) defer ab.Release() exp := []uint16{0, 1, 2, 3} ab.AppendValues(exp, nil) a := ab.NewUint16Array() assert.Equal(t, exp, a.Uint16Values()) a.Release() } func TestUint16Builder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewUint16Builder(mem) defer ab.Release() exp := []uint16{0, 1, 2, 3} ab.AppendValues([]uint16{}, nil) a := ab.NewUint16Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(nil, nil) a = ab.NewUint16Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues([]uint16{}, nil) ab.AppendValues(exp, nil) a = ab.NewUint16Array() assert.Equal(t, exp, a.Uint16Values()) a.Release() ab.AppendValues(exp, nil) ab.AppendValues([]uint16{}, nil) a = ab.NewUint16Array() assert.Equal(t, exp, a.Uint16Values()) a.Release() } func TestUint16Builder_Resize(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewUint16Builder(mem) defer ab.Release() assert.Equal(t, 0, ab.Cap()) assert.Equal(t, 0, ab.Len()) ab.Reserve(63) assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 0, ab.Len()) for i := 0; i < 63; i++ { ab.Append(0) } assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 63, ab.Len()) ab.Resize(5) assert.Equal(t, 5, ab.Len()) ab.Resize(32) assert.Equal(t, 5, ab.Len()) } func TestInt8StringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) b := array.NewInt8Builder(mem) defer b.Release() b.Append(1) b.Append(2) b.Append(3) b.AppendNull() b.Append(5) b.Append(6) b.AppendNull() b.Append(8) b.Append(9) b.Append(10) arr := b.NewArray().(*array.Int8) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewInt8Builder(mem) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.Int8) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } func TestNewInt8Builder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewInt8Builder(mem) defer ab.Release() ab.Retain() ab.Release() ab.Append(1) ab.Append(2) ab.Append(3) ab.AppendNull() ab.Append(5) ab.Append(6) ab.AppendNull() ab.Append(8) ab.Append(9) ab.Append(10) // check state of builder before NewInt8Array assert.Equal(t, 10, ab.Len(), "unexpected Len()") assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") a := ab.NewInt8Array() // check state of builder after NewInt8Array assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewInt8Array did not reset state") assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewInt8Array did not reset state") assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewInt8Array did not reset state") // check state of array assert.Equal(t, 2, a.NullN(), "unexpected null count") assert.Equal(t, []int8{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Int8Values(), "unexpected Int8Values") assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity assert.Len(t, a.Int8Values(), 10, "unexpected length of Int8Values") a.Release() ab.Append(7) ab.Append(8) a = ab.NewInt8Array() assert.Equal(t, 0, a.NullN()) assert.Equal(t, []int8{7, 8}, a.Int8Values()) assert.Len(t, a.Int8Values(), 2) a.Release() var ( want = []int8{1, 2, 3, 4} valids = []bool{true, true, false, true} ) ab.AppendValues(want, valids) a = ab.NewInt8Array() sub := array.MakeFromData(a.Data()) defer sub.Release() if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { t.Fatalf("invalid type: got=%q, want=%q", got, want) } if _, ok := sub.(*array.Int8); !ok { t.Fatalf("could not type-assert to array.Int8") } if got, want := a.String(), `[1 2 (null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } slice := array.NewSliceData(a.Data(), 2, 4) defer slice.Release() sub1 := array.MakeFromData(slice) defer sub1.Release() v, ok := sub1.(*array.Int8) if !ok { t.Fatalf("could not type-assert to array.Int8") } if got, want := v.String(), `[(null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } a.Release() } func TestInt8Builder_AppendValues(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewInt8Builder(mem) defer ab.Release() exp := []int8{0, 1, 2, 3} ab.AppendValues(exp, nil) a := ab.NewInt8Array() assert.Equal(t, exp, a.Int8Values()) a.Release() } func TestInt8Builder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewInt8Builder(mem) defer ab.Release() exp := []int8{0, 1, 2, 3} ab.AppendValues([]int8{}, nil) a := ab.NewInt8Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(nil, nil) a = ab.NewInt8Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues([]int8{}, nil) ab.AppendValues(exp, nil) a = ab.NewInt8Array() assert.Equal(t, exp, a.Int8Values()) a.Release() ab.AppendValues(exp, nil) ab.AppendValues([]int8{}, nil) a = ab.NewInt8Array() assert.Equal(t, exp, a.Int8Values()) a.Release() } func TestInt8Builder_Resize(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewInt8Builder(mem) defer ab.Release() assert.Equal(t, 0, ab.Cap()) assert.Equal(t, 0, ab.Len()) ab.Reserve(63) assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 0, ab.Len()) for i := 0; i < 63; i++ { ab.Append(0) } assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 63, ab.Len()) ab.Resize(5) assert.Equal(t, 5, ab.Len()) ab.Resize(32) assert.Equal(t, 5, ab.Len()) } func TestUint8StringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) b := array.NewUint8Builder(mem) defer b.Release() b.Append(1) b.Append(2) b.Append(3) b.AppendNull() b.Append(5) b.Append(6) b.AppendNull() b.Append(8) b.Append(9) b.Append(10) arr := b.NewArray().(*array.Uint8) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewUint8Builder(mem) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.Uint8) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } func TestNewUint8Builder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewUint8Builder(mem) defer ab.Release() ab.Retain() ab.Release() ab.Append(1) ab.Append(2) ab.Append(3) ab.AppendNull() ab.Append(5) ab.Append(6) ab.AppendNull() ab.Append(8) ab.Append(9) ab.Append(10) // check state of builder before NewUint8Array assert.Equal(t, 10, ab.Len(), "unexpected Len()") assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") a := ab.NewUint8Array() // check state of builder after NewUint8Array assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewUint8Array did not reset state") assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewUint8Array did not reset state") assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewUint8Array did not reset state") // check state of array assert.Equal(t, 2, a.NullN(), "unexpected null count") assert.Equal(t, []uint8{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Uint8Values(), "unexpected Uint8Values") assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity assert.Len(t, a.Uint8Values(), 10, "unexpected length of Uint8Values") a.Release() ab.Append(7) ab.Append(8) a = ab.NewUint8Array() assert.Equal(t, 0, a.NullN()) assert.Equal(t, []uint8{7, 8}, a.Uint8Values()) assert.Len(t, a.Uint8Values(), 2) a.Release() var ( want = []uint8{1, 2, 3, 4} valids = []bool{true, true, false, true} ) ab.AppendValues(want, valids) a = ab.NewUint8Array() sub := array.MakeFromData(a.Data()) defer sub.Release() if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { t.Fatalf("invalid type: got=%q, want=%q", got, want) } if _, ok := sub.(*array.Uint8); !ok { t.Fatalf("could not type-assert to array.Uint8") } if got, want := a.String(), `[1 2 (null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } slice := array.NewSliceData(a.Data(), 2, 4) defer slice.Release() sub1 := array.MakeFromData(slice) defer sub1.Release() v, ok := sub1.(*array.Uint8) if !ok { t.Fatalf("could not type-assert to array.Uint8") } if got, want := v.String(), `[(null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } a.Release() } func TestUint8Builder_AppendValues(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewUint8Builder(mem) defer ab.Release() exp := []uint8{0, 1, 2, 3} ab.AppendValues(exp, nil) a := ab.NewUint8Array() assert.Equal(t, exp, a.Uint8Values()) a.Release() } func TestUint8Builder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewUint8Builder(mem) defer ab.Release() exp := []uint8{0, 1, 2, 3} ab.AppendValues([]uint8{}, nil) a := ab.NewUint8Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(nil, nil) a = ab.NewUint8Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues([]uint8{}, nil) ab.AppendValues(exp, nil) a = ab.NewUint8Array() assert.Equal(t, exp, a.Uint8Values()) a.Release() ab.AppendValues(exp, nil) ab.AppendValues([]uint8{}, nil) a = ab.NewUint8Array() assert.Equal(t, exp, a.Uint8Values()) a.Release() } func TestUint8Builder_Resize(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewUint8Builder(mem) defer ab.Release() assert.Equal(t, 0, ab.Cap()) assert.Equal(t, 0, ab.Len()) ab.Reserve(63) assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 0, ab.Len()) for i := 0; i < 63; i++ { ab.Append(0) } assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 63, ab.Len()) ab.Resize(5) assert.Equal(t, 5, ab.Len()) ab.Resize(32) assert.Equal(t, 5, ab.Len()) } func TestTime32StringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dt := &arrow.Time32Type{Unit: arrow.Second} b := array.NewTime32Builder(mem, dt) defer b.Release() b.Append(1) b.Append(2) b.Append(3) b.AppendNull() b.Append(5) b.Append(6) b.AppendNull() b.Append(8) b.Append(9) b.Append(10) arr := b.NewArray().(*array.Time32) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewTime32Builder(mem, dt) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.Time32) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } func TestNewTime32Builder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dtype := &arrow.Time32Type{Unit: arrow.Second} ab := array.NewTime32Builder(mem, dtype) defer ab.Release() ab.Retain() ab.Release() ab.Append(1) ab.Append(2) ab.Append(3) ab.AppendNull() ab.Append(5) ab.Append(6) ab.AppendNull() ab.Append(8) ab.Append(9) ab.Append(10) // check state of builder before NewTime32Array assert.Equal(t, 10, ab.Len(), "unexpected Len()") assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") a := ab.NewTime32Array() // check state of builder after NewTime32Array assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewTime32Array did not reset state") assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewTime32Array did not reset state") assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewTime32Array did not reset state") // check state of array assert.Equal(t, 2, a.NullN(), "unexpected null count") assert.Equal(t, []arrow.Time32{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Time32Values(), "unexpected Time32Values") assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity assert.Len(t, a.Time32Values(), 10, "unexpected length of Time32Values") a.Release() ab.Append(7) ab.Append(8) a = ab.NewTime32Array() assert.Equal(t, 0, a.NullN()) assert.Equal(t, []arrow.Time32{7, 8}, a.Time32Values()) assert.Len(t, a.Time32Values(), 2) a.Release() var ( want = []arrow.Time32{1, 2, 3, 4} valids = []bool{true, true, false, true} ) ab.AppendValues(want, valids) a = ab.NewTime32Array() sub := array.MakeFromData(a.Data()) defer sub.Release() if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { t.Fatalf("invalid type: got=%q, want=%q", got, want) } if _, ok := sub.(*array.Time32); !ok { t.Fatalf("could not type-assert to array.Time32") } if got, want := a.String(), `[1 2 (null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } slice := array.NewSliceData(a.Data(), 2, 4) defer slice.Release() sub1 := array.MakeFromData(slice) defer sub1.Release() v, ok := sub1.(*array.Time32) if !ok { t.Fatalf("could not type-assert to array.Time32") } if got, want := v.String(), `[(null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } a.Release() } func TestTime32Builder_AppendValues(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dtype := &arrow.Time32Type{Unit: arrow.Second} ab := array.NewTime32Builder(mem, dtype) defer ab.Release() exp := []arrow.Time32{0, 1, 2, 3} ab.AppendValues(exp, nil) a := ab.NewTime32Array() assert.Equal(t, exp, a.Time32Values()) a.Release() } func TestTime32Builder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dtype := &arrow.Time32Type{Unit: arrow.Second} ab := array.NewTime32Builder(mem, dtype) defer ab.Release() exp := []arrow.Time32{0, 1, 2, 3} ab.AppendValues([]arrow.Time32{}, nil) a := ab.NewTime32Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(nil, nil) a = ab.NewTime32Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues([]arrow.Time32{}, nil) ab.AppendValues(exp, nil) a = ab.NewTime32Array() assert.Equal(t, exp, a.Time32Values()) a.Release() ab.AppendValues(exp, nil) ab.AppendValues([]arrow.Time32{}, nil) a = ab.NewTime32Array() assert.Equal(t, exp, a.Time32Values()) a.Release() } func TestTime32Builder_Resize(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dtype := &arrow.Time32Type{Unit: arrow.Second} ab := array.NewTime32Builder(mem, dtype) defer ab.Release() assert.Equal(t, 0, ab.Cap()) assert.Equal(t, 0, ab.Len()) ab.Reserve(63) assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 0, ab.Len()) for i := 0; i < 63; i++ { ab.Append(0) } assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 63, ab.Len()) ab.Resize(5) assert.Equal(t, 5, ab.Len()) ab.Resize(32) assert.Equal(t, 5, ab.Len()) } func TestTime64StringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dt := &arrow.Time64Type{Unit: arrow.Microsecond} b := array.NewTime64Builder(mem, dt) defer b.Release() b.Append(1) b.Append(2) b.Append(3) b.AppendNull() b.Append(5) b.Append(6) b.AppendNull() b.Append(8) b.Append(9) b.Append(10) arr := b.NewArray().(*array.Time64) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewTime64Builder(mem, dt) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.Time64) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } func TestNewTime64Builder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dtype := &arrow.Time64Type{Unit: arrow.Second} ab := array.NewTime64Builder(mem, dtype) defer ab.Release() ab.Retain() ab.Release() ab.Append(1) ab.Append(2) ab.Append(3) ab.AppendNull() ab.Append(5) ab.Append(6) ab.AppendNull() ab.Append(8) ab.Append(9) ab.Append(10) // check state of builder before NewTime64Array assert.Equal(t, 10, ab.Len(), "unexpected Len()") assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") a := ab.NewTime64Array() // check state of builder after NewTime64Array assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewTime64Array did not reset state") assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewTime64Array did not reset state") assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewTime64Array did not reset state") // check state of array assert.Equal(t, 2, a.NullN(), "unexpected null count") assert.Equal(t, []arrow.Time64{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Time64Values(), "unexpected Time64Values") assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity assert.Len(t, a.Time64Values(), 10, "unexpected length of Time64Values") a.Release() ab.Append(7) ab.Append(8) a = ab.NewTime64Array() assert.Equal(t, 0, a.NullN()) assert.Equal(t, []arrow.Time64{7, 8}, a.Time64Values()) assert.Len(t, a.Time64Values(), 2) a.Release() var ( want = []arrow.Time64{1, 2, 3, 4} valids = []bool{true, true, false, true} ) ab.AppendValues(want, valids) a = ab.NewTime64Array() sub := array.MakeFromData(a.Data()) defer sub.Release() if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { t.Fatalf("invalid type: got=%q, want=%q", got, want) } if _, ok := sub.(*array.Time64); !ok { t.Fatalf("could not type-assert to array.Time64") } if got, want := a.String(), `[1 2 (null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } slice := array.NewSliceData(a.Data(), 2, 4) defer slice.Release() sub1 := array.MakeFromData(slice) defer sub1.Release() v, ok := sub1.(*array.Time64) if !ok { t.Fatalf("could not type-assert to array.Time64") } if got, want := v.String(), `[(null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } a.Release() } func TestTime64Builder_AppendValues(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dtype := &arrow.Time64Type{Unit: arrow.Second} ab := array.NewTime64Builder(mem, dtype) defer ab.Release() exp := []arrow.Time64{0, 1, 2, 3} ab.AppendValues(exp, nil) a := ab.NewTime64Array() assert.Equal(t, exp, a.Time64Values()) a.Release() } func TestTime64Builder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dtype := &arrow.Time64Type{Unit: arrow.Second} ab := array.NewTime64Builder(mem, dtype) defer ab.Release() exp := []arrow.Time64{0, 1, 2, 3} ab.AppendValues([]arrow.Time64{}, nil) a := ab.NewTime64Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(nil, nil) a = ab.NewTime64Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues([]arrow.Time64{}, nil) ab.AppendValues(exp, nil) a = ab.NewTime64Array() assert.Equal(t, exp, a.Time64Values()) a.Release() ab.AppendValues(exp, nil) ab.AppendValues([]arrow.Time64{}, nil) a = ab.NewTime64Array() assert.Equal(t, exp, a.Time64Values()) a.Release() } func TestTime64Builder_Resize(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dtype := &arrow.Time64Type{Unit: arrow.Second} ab := array.NewTime64Builder(mem, dtype) defer ab.Release() assert.Equal(t, 0, ab.Cap()) assert.Equal(t, 0, ab.Len()) ab.Reserve(63) assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 0, ab.Len()) for i := 0; i < 63; i++ { ab.Append(0) } assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 63, ab.Len()) ab.Resize(5) assert.Equal(t, 5, ab.Len()) ab.Resize(32) assert.Equal(t, 5, ab.Len()) } func TestDate32StringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) b := array.NewDate32Builder(mem) defer b.Release() b.Append(1) b.Append(2) b.Append(3) b.AppendNull() b.Append(5) b.Append(6) b.AppendNull() b.Append(8) b.Append(9) b.Append(10) arr := b.NewArray().(*array.Date32) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewDate32Builder(mem) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.Date32) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } func TestNewDate32Builder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewDate32Builder(mem) defer ab.Release() ab.Retain() ab.Release() ab.Append(1) ab.Append(2) ab.Append(3) ab.AppendNull() ab.Append(5) ab.Append(6) ab.AppendNull() ab.Append(8) ab.Append(9) ab.Append(10) // check state of builder before NewDate32Array assert.Equal(t, 10, ab.Len(), "unexpected Len()") assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") a := ab.NewDate32Array() // check state of builder after NewDate32Array assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewDate32Array did not reset state") assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewDate32Array did not reset state") assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewDate32Array did not reset state") // check state of array assert.Equal(t, 2, a.NullN(), "unexpected null count") assert.Equal(t, []arrow.Date32{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Date32Values(), "unexpected Date32Values") assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity assert.Len(t, a.Date32Values(), 10, "unexpected length of Date32Values") a.Release() ab.Append(7) ab.Append(8) a = ab.NewDate32Array() assert.Equal(t, 0, a.NullN()) assert.Equal(t, []arrow.Date32{7, 8}, a.Date32Values()) assert.Len(t, a.Date32Values(), 2) a.Release() var ( want = []arrow.Date32{1, 2, 3, 4} valids = []bool{true, true, false, true} ) ab.AppendValues(want, valids) a = ab.NewDate32Array() sub := array.MakeFromData(a.Data()) defer sub.Release() if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { t.Fatalf("invalid type: got=%q, want=%q", got, want) } if _, ok := sub.(*array.Date32); !ok { t.Fatalf("could not type-assert to array.Date32") } if got, want := a.String(), `[1 2 (null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } slice := array.NewSliceData(a.Data(), 2, 4) defer slice.Release() sub1 := array.MakeFromData(slice) defer sub1.Release() v, ok := sub1.(*array.Date32) if !ok { t.Fatalf("could not type-assert to array.Date32") } if got, want := v.String(), `[(null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } a.Release() } func TestDate32Builder_AppendValues(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewDate32Builder(mem) defer ab.Release() exp := []arrow.Date32{0, 1, 2, 3} ab.AppendValues(exp, nil) a := ab.NewDate32Array() assert.Equal(t, exp, a.Date32Values()) a.Release() } func TestDate32Builder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewDate32Builder(mem) defer ab.Release() exp := []arrow.Date32{0, 1, 2, 3} ab.AppendValues([]arrow.Date32{}, nil) a := ab.NewDate32Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(nil, nil) a = ab.NewDate32Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues([]arrow.Date32{}, nil) ab.AppendValues(exp, nil) a = ab.NewDate32Array() assert.Equal(t, exp, a.Date32Values()) a.Release() ab.AppendValues(exp, nil) ab.AppendValues([]arrow.Date32{}, nil) a = ab.NewDate32Array() assert.Equal(t, exp, a.Date32Values()) a.Release() } func TestDate32Builder_Resize(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewDate32Builder(mem) defer ab.Release() assert.Equal(t, 0, ab.Cap()) assert.Equal(t, 0, ab.Len()) ab.Reserve(63) assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 0, ab.Len()) for i := 0; i < 63; i++ { ab.Append(0) } assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 63, ab.Len()) ab.Resize(5) assert.Equal(t, 5, ab.Len()) ab.Resize(32) assert.Equal(t, 5, ab.Len()) } func TestDate64StringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) b := array.NewDate64Builder(mem) defer b.Release() b.Append(1) b.Append(2) b.Append(3) b.AppendNull() b.Append(5) b.Append(6) b.AppendNull() b.Append(8) b.Append(9) b.Append(10) arr := b.NewArray().(*array.Date64) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewDate64Builder(mem) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.Date64) defer arr1.Release() assert.Exactly(t, arr.Len(), arr1.Len()) for i := 0; i < arr.Len(); i++ { assert.Exactly(t, arr.IsValid(i), arr1.IsValid(i)) assert.Exactly(t, arr.ValueStr(i), arr1.ValueStr(i)) if arr.IsValid(i) { assert.Exactly(t, arr.Value(i).ToTime(), arr1.Value(i).ToTime()) } } } func TestNewDate64Builder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewDate64Builder(mem) defer ab.Release() ab.Retain() ab.Release() ab.Append(1) ab.Append(2) ab.Append(3) ab.AppendNull() ab.Append(5) ab.Append(6) ab.AppendNull() ab.Append(8) ab.Append(9) ab.Append(10) // check state of builder before NewDate64Array assert.Equal(t, 10, ab.Len(), "unexpected Len()") assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") a := ab.NewDate64Array() // check state of builder after NewDate64Array assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewDate64Array did not reset state") assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewDate64Array did not reset state") assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewDate64Array did not reset state") // check state of array assert.Equal(t, 2, a.NullN(), "unexpected null count") assert.Equal(t, []arrow.Date64{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Date64Values(), "unexpected Date64Values") assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity assert.Len(t, a.Date64Values(), 10, "unexpected length of Date64Values") a.Release() ab.Append(7) ab.Append(8) a = ab.NewDate64Array() assert.Equal(t, 0, a.NullN()) assert.Equal(t, []arrow.Date64{7, 8}, a.Date64Values()) assert.Len(t, a.Date64Values(), 2) a.Release() var ( want = []arrow.Date64{1, 2, 3, 4} valids = []bool{true, true, false, true} ) ab.AppendValues(want, valids) a = ab.NewDate64Array() sub := array.MakeFromData(a.Data()) defer sub.Release() if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { t.Fatalf("invalid type: got=%q, want=%q", got, want) } if _, ok := sub.(*array.Date64); !ok { t.Fatalf("could not type-assert to array.Date64") } if got, want := a.String(), `[1 2 (null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } slice := array.NewSliceData(a.Data(), 2, 4) defer slice.Release() sub1 := array.MakeFromData(slice) defer sub1.Release() v, ok := sub1.(*array.Date64) if !ok { t.Fatalf("could not type-assert to array.Date64") } if got, want := v.String(), `[(null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } a.Release() } func TestDate64Builder_AppendValues(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewDate64Builder(mem) defer ab.Release() exp := []arrow.Date64{0, 1, 2, 3} ab.AppendValues(exp, nil) a := ab.NewDate64Array() assert.Equal(t, exp, a.Date64Values()) a.Release() } func TestDate64Builder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewDate64Builder(mem) defer ab.Release() exp := []arrow.Date64{0, 1, 2, 3} ab.AppendValues([]arrow.Date64{}, nil) a := ab.NewDate64Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(nil, nil) a = ab.NewDate64Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues([]arrow.Date64{}, nil) ab.AppendValues(exp, nil) a = ab.NewDate64Array() assert.Equal(t, exp, a.Date64Values()) a.Release() ab.AppendValues(exp, nil) ab.AppendValues([]arrow.Date64{}, nil) a = ab.NewDate64Array() assert.Equal(t, exp, a.Date64Values()) a.Release() } func TestDate64Builder_Resize(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ab := array.NewDate64Builder(mem) defer ab.Release() assert.Equal(t, 0, ab.Cap()) assert.Equal(t, 0, ab.Len()) ab.Reserve(63) assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 0, ab.Len()) for i := 0; i < 63; i++ { ab.Append(0) } assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 63, ab.Len()) ab.Resize(5) assert.Equal(t, 5, ab.Len()) ab.Resize(32) assert.Equal(t, 5, ab.Len()) } func TestDurationStringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dt := &arrow.DurationType{Unit: arrow.Second} b := array.NewDurationBuilder(mem, dt) defer b.Release() b.Append(1) b.Append(2) b.Append(3) b.AppendNull() b.Append(5) b.Append(6) b.AppendNull() b.Append(8) b.Append(9) b.Append(10) arr := b.NewArray().(*array.Duration) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewDurationBuilder(mem, dt) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.Duration) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } func TestNewDurationBuilder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dtype := &arrow.DurationType{Unit: arrow.Second} ab := array.NewDurationBuilder(mem, dtype) defer ab.Release() ab.Retain() ab.Release() ab.Append(1) ab.Append(2) ab.Append(3) ab.AppendNull() ab.Append(5) ab.Append(6) ab.AppendNull() ab.Append(8) ab.Append(9) ab.Append(10) // check state of builder before NewDurationArray assert.Equal(t, 10, ab.Len(), "unexpected Len()") assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") a := ab.NewDurationArray() // check state of builder after NewDurationArray assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewDurationArray did not reset state") assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewDurationArray did not reset state") assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewDurationArray did not reset state") // check state of array assert.Equal(t, 2, a.NullN(), "unexpected null count") assert.Equal(t, []arrow.Duration{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.DurationValues(), "unexpected DurationValues") assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity assert.Len(t, a.DurationValues(), 10, "unexpected length of DurationValues") a.Release() ab.Append(7) ab.Append(8) a = ab.NewDurationArray() assert.Equal(t, 0, a.NullN()) assert.Equal(t, []arrow.Duration{7, 8}, a.DurationValues()) assert.Len(t, a.DurationValues(), 2) a.Release() var ( want = []arrow.Duration{1, 2, 3, 4} valids = []bool{true, true, false, true} ) ab.AppendValues(want, valids) a = ab.NewDurationArray() sub := array.MakeFromData(a.Data()) defer sub.Release() if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { t.Fatalf("invalid type: got=%q, want=%q", got, want) } if _, ok := sub.(*array.Duration); !ok { t.Fatalf("could not type-assert to array.Duration") } if got, want := a.String(), `[1 2 (null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } slice := array.NewSliceData(a.Data(), 2, 4) defer slice.Release() sub1 := array.MakeFromData(slice) defer sub1.Release() v, ok := sub1.(*array.Duration) if !ok { t.Fatalf("could not type-assert to array.Duration") } if got, want := v.String(), `[(null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } a.Release() } func TestDurationBuilder_AppendValues(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dtype := &arrow.DurationType{Unit: arrow.Second} ab := array.NewDurationBuilder(mem, dtype) defer ab.Release() exp := []arrow.Duration{0, 1, 2, 3} ab.AppendValues(exp, nil) a := ab.NewDurationArray() assert.Equal(t, exp, a.DurationValues()) a.Release() } func TestDurationBuilder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dtype := &arrow.DurationType{Unit: arrow.Second} ab := array.NewDurationBuilder(mem, dtype) defer ab.Release() exp := []arrow.Duration{0, 1, 2, 3} ab.AppendValues([]arrow.Duration{}, nil) a := ab.NewDurationArray() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(nil, nil) a = ab.NewDurationArray() assert.Zero(t, a.Len()) a.Release() ab.AppendValues([]arrow.Duration{}, nil) ab.AppendValues(exp, nil) a = ab.NewDurationArray() assert.Equal(t, exp, a.DurationValues()) a.Release() ab.AppendValues(exp, nil) ab.AppendValues([]arrow.Duration{}, nil) a = ab.NewDurationArray() assert.Equal(t, exp, a.DurationValues()) a.Release() } func TestDurationBuilder_Resize(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dtype := &arrow.DurationType{Unit: arrow.Second} ab := array.NewDurationBuilder(mem, dtype) defer ab.Release() assert.Equal(t, 0, ab.Cap()) assert.Equal(t, 0, ab.Len()) ab.Reserve(63) assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 0, ab.Len()) for i := 0; i < 63; i++ { ab.Append(0) } assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 63, ab.Len()) ab.Resize(5) assert.Equal(t, 5, ab.Len()) ab.Resize(32) assert.Equal(t, 5, ab.Len()) } arrow-go-18.2.0/arrow/array/numericbuilder.gen_test.go.tmpl000066400000000000000000000171141476434502500237470ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) {{range .In}} func Test{{.Name}}StringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) {{if .Opt.Parametric -}} {{ if or (eq .Name "Time64") -}} dt := &arrow.{{.Name}}Type{Unit: arrow.Microsecond} {{else -}} dt := &arrow.{{.Name}}Type{Unit: arrow.Second} {{end -}} b := array.New{{.Name}}Builder(mem, dt) {{else -}} b := array.New{{.Name}}Builder(mem) {{end -}} defer b.Release() b.Append(1) b.Append(2) b.Append(3) b.AppendNull() b.Append(5) b.Append(6) b.AppendNull() b.Append(8) b.Append(9) b.Append(10) arr := b.NewArray().(*array.{{.Name}}) defer arr.Release() // 2. create array via AppendValueFromString {{if .Opt.Parametric -}} b1 := array.New{{.Name}}Builder(mem, dt) {{else -}} b1 := array.New{{.Name}}Builder(mem) {{end -}} defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.{{.Name}}) defer arr1.Release() {{ if or (eq .Name "Date64") -}} assert.Exactly(t, arr.Len(), arr1.Len()) for i := 0; i < arr.Len(); i++ { assert.Exactly(t, arr.IsValid(i), arr1.IsValid(i)) assert.Exactly(t, arr.ValueStr(i), arr1.ValueStr(i)) if arr.IsValid(i) { assert.Exactly(t, arr.Value(i).ToTime(), arr1.Value(i).ToTime()) } } {{else -}} assert.True(t, array.Equal(arr, arr1)) {{end -}} } func TestNew{{.Name}}Builder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) {{if .Opt.Parametric -}} dtype := &arrow.{{.Name}}Type{Unit: arrow.Second} ab := array.New{{.Name}}Builder(mem, dtype) {{else}} ab := array.New{{.Name}}Builder(mem) {{end -}} defer ab.Release() ab.Retain() ab.Release() ab.Append(1) ab.Append(2) ab.Append(3) ab.AppendNull() ab.Append(5) ab.Append(6) ab.AppendNull() ab.Append(8) ab.Append(9) ab.Append(10) // check state of builder before New{{.Name}}Array assert.Equal(t, 10, ab.Len(), "unexpected Len()") assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") a := ab.New{{.Name}}Array() // check state of builder after New{{.Name}}Array assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), New{{.Name}}Array did not reset state") assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), New{{.Name}}Array did not reset state") assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), New{{.Name}}Array did not reset state") // check state of array assert.Equal(t, 2, a.NullN(), "unexpected null count") assert.Equal(t, []{{or .QualifiedType .Type}}{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.{{.Name}}Values(), "unexpected {{.Name}}Values") assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity assert.Len(t, a.{{.Name}}Values(), 10, "unexpected length of {{.Name}}Values") a.Release() ab.Append(7) ab.Append(8) a = ab.New{{.Name}}Array() assert.Equal(t, 0, a.NullN()) assert.Equal(t, []{{or .QualifiedType .Type}}{7, 8}, a.{{.Name}}Values()) assert.Len(t, a.{{.Name}}Values(), 2) a.Release() var ( want = []{{or .QualifiedType .Type}}{1, 2, 3, 4} valids = []bool{true, true, false, true} ) ab.AppendValues(want, valids) a = ab.New{{.Name}}Array() sub := array.MakeFromData(a.Data()) defer sub.Release() if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { t.Fatalf("invalid type: got=%q, want=%q", got, want) } if _, ok := sub.(*array.{{.Name}}); !ok { t.Fatalf("could not type-assert to array.{{.Name}}") } if got, want := a.String(), `[1 2 (null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } slice := array.NewSliceData(a.Data(), 2, 4) defer slice.Release() sub1 := array.MakeFromData(slice) defer sub1.Release() v, ok := sub1.(*array.{{.Name}}) if !ok { t.Fatalf("could not type-assert to array.{{.Name}}") } if got, want := v.String(), `[(null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } a.Release() } func Test{{.Name}}Builder_AppendValues(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) {{if .Opt.Parametric -}} dtype := &arrow.{{.Name}}Type{Unit: arrow.Second} ab := array.New{{.Name}}Builder(mem, dtype) {{else}} ab := array.New{{.Name}}Builder(mem) {{end -}} defer ab.Release() exp := []{{or .QualifiedType .Type}}{0, 1, 2, 3} ab.AppendValues(exp, nil) a := ab.New{{.Name}}Array() assert.Equal(t, exp, a.{{.Name}}Values()) a.Release() } func Test{{.Name}}Builder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) {{if .Opt.Parametric -}} dtype := &arrow.{{.Name}}Type{Unit: arrow.Second} ab := array.New{{.Name}}Builder(mem, dtype) {{else}} ab := array.New{{.Name}}Builder(mem) {{end -}} defer ab.Release() exp := []{{or .QualifiedType .Type}}{0, 1, 2, 3} ab.AppendValues([]{{or .QualifiedType .Type}}{}, nil) a := ab.New{{.Name}}Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(nil, nil) a = ab.New{{.Name}}Array() assert.Zero(t, a.Len()) a.Release() ab.AppendValues([]{{or .QualifiedType .Type}}{}, nil) ab.AppendValues(exp, nil) a = ab.New{{.Name}}Array() assert.Equal(t, exp, a.{{.Name}}Values()) a.Release() ab.AppendValues(exp, nil) ab.AppendValues([]{{or .QualifiedType .Type}}{}, nil) a = ab.New{{.Name}}Array() assert.Equal(t, exp, a.{{.Name}}Values()) a.Release() } func Test{{.Name}}Builder_Resize(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) {{if .Opt.Parametric -}} dtype := &arrow.{{.Name}}Type{Unit: arrow.Second} ab := array.New{{.Name}}Builder(mem, dtype) {{else}} ab := array.New{{.Name}}Builder(mem) {{end -}} defer ab.Release() assert.Equal(t, 0, ab.Cap()) assert.Equal(t, 0, ab.Len()) ab.Reserve(63) assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 0, ab.Len()) for i := 0; i < 63; i++ { ab.Append(0) } assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 63, ab.Len()) ab.Resize(5) assert.Equal(t, 5, ab.Len()) ab.Resize(32) assert.Equal(t, 5, ab.Len()) } func Test{{.Name}}BuilderUnmarshalJSON(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) bldr := array.New{{.Name}}Builder(mem) defer bldr.Release() jsonstr := `[0, 1, "+Inf", 2, 3, "NaN", "NaN", 4, 5, "-Inf"]` err := bldr.UnmarshalJSON([]byte(jsonstr)) assert.NoError(t, err) arr := bldr.New{{.Name}}Array() defer arr.Release() assert.NotNil(t, arr) assert.False(t, math.IsInf(float64(arr.Value(0)), 0), arr.Value(0)) assert.True(t, math.IsInf(float64(arr.Value(2)), 1), arr.Value(2)) assert.True(t, math.IsNaN(float64(arr.Value(5))), arr.Value(5)) } {{end}} arrow-go-18.2.0/arrow/array/record.go000066400000000000000000000247431476434502500174400ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "bytes" "fmt" "strings" "sync/atomic" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" ) // RecordReader reads a stream of records. type RecordReader interface { Retain() Release() Schema() *arrow.Schema Next() bool Record() arrow.Record Err() error } // simpleRecords is a simple iterator over a collection of records. type simpleRecords struct { refCount int64 schema *arrow.Schema recs []arrow.Record cur arrow.Record } // NewRecordReader returns a simple iterator over the given slice of records. func NewRecordReader(schema *arrow.Schema, recs []arrow.Record) (RecordReader, error) { rs := &simpleRecords{ refCount: 1, schema: schema, recs: recs, cur: nil, } for _, rec := range rs.recs { rec.Retain() } for _, rec := range recs { if !rec.Schema().Equal(rs.schema) { rs.Release() return nil, fmt.Errorf("arrow/array: mismatch schema") } } return rs, nil } // Retain increases the reference count by 1. // Retain may be called simultaneously from multiple goroutines. func (rs *simpleRecords) Retain() { atomic.AddInt64(&rs.refCount, 1) } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. // Release may be called simultaneously from multiple goroutines. func (rs *simpleRecords) Release() { debug.Assert(atomic.LoadInt64(&rs.refCount) > 0, "too many releases") if atomic.AddInt64(&rs.refCount, -1) == 0 { if rs.cur != nil { rs.cur.Release() } for _, rec := range rs.recs { rec.Release() } rs.recs = nil } } func (rs *simpleRecords) Schema() *arrow.Schema { return rs.schema } func (rs *simpleRecords) Record() arrow.Record { return rs.cur } func (rs *simpleRecords) Next() bool { if len(rs.recs) == 0 { return false } if rs.cur != nil { rs.cur.Release() } rs.cur = rs.recs[0] rs.recs = rs.recs[1:] return true } func (rs *simpleRecords) Err() error { return nil } // simpleRecord is a basic, non-lazy in-memory record batch. type simpleRecord struct { refCount int64 schema *arrow.Schema rows int64 arrs []arrow.Array } // NewRecord returns a basic, non-lazy in-memory record batch. // // NewRecord panics if the columns and schema are inconsistent. // NewRecord panics if rows is larger than the height of the columns. func NewRecord(schema *arrow.Schema, cols []arrow.Array, nrows int64) arrow.Record { rec := &simpleRecord{ refCount: 1, schema: schema, rows: nrows, arrs: make([]arrow.Array, len(cols)), } copy(rec.arrs, cols) for _, arr := range rec.arrs { arr.Retain() } if rec.rows < 0 { switch len(rec.arrs) { case 0: rec.rows = 0 default: rec.rows = int64(rec.arrs[0].Len()) } } err := rec.validate() if err != nil { rec.Release() panic(err) } return rec } func (rec *simpleRecord) SetColumn(i int, arr arrow.Array) (arrow.Record, error) { if i < 0 || i >= len(rec.arrs) { return nil, fmt.Errorf("arrow/array: column index out of range [0, %d): got=%d", len(rec.arrs), i) } if arr.Len() != int(rec.rows) { return nil, fmt.Errorf("arrow/array: mismatch number of rows in column %q: got=%d, want=%d", rec.schema.Field(i).Name, arr.Len(), rec.rows, ) } f := rec.schema.Field(i) if !arrow.TypeEqual(f.Type, arr.DataType()) { return nil, fmt.Errorf("arrow/array: column %q type mismatch: got=%v, want=%v", f.Name, arr.DataType(), f.Type, ) } arrs := make([]arrow.Array, len(rec.arrs)) copy(arrs, rec.arrs) arrs[i] = arr return NewRecord(rec.schema, arrs, rec.rows), nil } func (rec *simpleRecord) validate() error { if rec.rows == 0 && len(rec.arrs) == 0 { return nil } if len(rec.arrs) != rec.schema.NumFields() { return fmt.Errorf("arrow/array: number of columns/fields mismatch") } for i, arr := range rec.arrs { f := rec.schema.Field(i) if int64(arr.Len()) < rec.rows { return fmt.Errorf("arrow/array: mismatch number of rows in column %q: got=%d, want=%d", f.Name, arr.Len(), rec.rows, ) } if !arrow.TypeEqual(f.Type, arr.DataType()) { return fmt.Errorf("arrow/array: column %q type mismatch: got=%v, want=%v", f.Name, arr.DataType(), f.Type, ) } } return nil } // Retain increases the reference count by 1. // Retain may be called simultaneously from multiple goroutines. func (rec *simpleRecord) Retain() { atomic.AddInt64(&rec.refCount, 1) } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. // Release may be called simultaneously from multiple goroutines. func (rec *simpleRecord) Release() { debug.Assert(atomic.LoadInt64(&rec.refCount) > 0, "too many releases") if atomic.AddInt64(&rec.refCount, -1) == 0 { for _, arr := range rec.arrs { arr.Release() } rec.arrs = nil } } func (rec *simpleRecord) Schema() *arrow.Schema { return rec.schema } func (rec *simpleRecord) NumRows() int64 { return rec.rows } func (rec *simpleRecord) NumCols() int64 { return int64(len(rec.arrs)) } func (rec *simpleRecord) Columns() []arrow.Array { return rec.arrs } func (rec *simpleRecord) Column(i int) arrow.Array { return rec.arrs[i] } func (rec *simpleRecord) ColumnName(i int) string { return rec.schema.Field(i).Name } // NewSlice constructs a zero-copy slice of the record with the indicated // indices i and j, corresponding to array[i:j]. // The returned record must be Release()'d after use. // // NewSlice panics if the slice is outside the valid range of the record array. // NewSlice panics if j < i. func (rec *simpleRecord) NewSlice(i, j int64) arrow.Record { arrs := make([]arrow.Array, len(rec.arrs)) for ii, arr := range rec.arrs { arrs[ii] = NewSlice(arr, i, j) } defer func() { for _, arr := range arrs { arr.Release() } }() return NewRecord(rec.schema, arrs, j-i) } func (rec *simpleRecord) String() string { o := new(strings.Builder) fmt.Fprintf(o, "record:\n %v\n", rec.schema) fmt.Fprintf(o, " rows: %d\n", rec.rows) for i, col := range rec.arrs { fmt.Fprintf(o, " col[%d][%s]: %v\n", i, rec.schema.Field(i).Name, col) } return o.String() } func (rec *simpleRecord) MarshalJSON() ([]byte, error) { arr := RecordToStructArray(rec) defer arr.Release() return arr.MarshalJSON() } // RecordBuilder eases the process of building a Record, iteratively, from // a known Schema. type RecordBuilder struct { refCount int64 mem memory.Allocator schema *arrow.Schema fields []Builder } // NewRecordBuilder returns a builder, using the provided memory allocator and a schema. func NewRecordBuilder(mem memory.Allocator, schema *arrow.Schema) *RecordBuilder { b := &RecordBuilder{ refCount: 1, mem: mem, schema: schema, fields: make([]Builder, schema.NumFields()), } for i := 0; i < schema.NumFields(); i++ { b.fields[i] = NewBuilder(b.mem, schema.Field(i).Type) } return b } // Retain increases the reference count by 1. // Retain may be called simultaneously from multiple goroutines. func (b *RecordBuilder) Retain() { atomic.AddInt64(&b.refCount, 1) } // Release decreases the reference count by 1. func (b *RecordBuilder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { for _, f := range b.fields { f.Release() } b.fields = nil } } func (b *RecordBuilder) Schema() *arrow.Schema { return b.schema } func (b *RecordBuilder) Fields() []Builder { return b.fields } func (b *RecordBuilder) Field(i int) Builder { return b.fields[i] } func (b *RecordBuilder) Reserve(size int) { for _, f := range b.fields { f.Reserve(size) } } // NewRecord creates a new record from the memory buffers and resets the // RecordBuilder so it can be used to build a new record. // // The returned Record must be Release()'d after use. // // NewRecord panics if the fields' builder do not have the same length. func (b *RecordBuilder) NewRecord() arrow.Record { cols := make([]arrow.Array, len(b.fields)) rows := int64(0) defer func(cols []arrow.Array) { for _, col := range cols { if col == nil { continue } col.Release() } }(cols) for i, f := range b.fields { cols[i] = f.NewArray() irow := int64(cols[i].Len()) if i > 0 && irow != rows { panic(fmt.Errorf("arrow/array: field %d has %d rows. want=%d", i, irow, rows)) } rows = irow } return NewRecord(b.schema, cols, rows) } // UnmarshalJSON for record builder will read in a single object and add the values // to each field in the recordbuilder, missing fields will get a null and unexpected // keys will be ignored. If reading in an array of records as a single batch, then use // a structbuilder and use RecordFromStruct. func (b *RecordBuilder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) // should start with a '{' t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '{' { return fmt.Errorf("record should start with '{', not %s", t) } keylist := make(map[string]bool) for dec.More() { keyTok, err := dec.Token() if err != nil { return err } key := keyTok.(string) if keylist[key] { return fmt.Errorf("key %s shows up twice in row to be decoded", key) } keylist[key] = true indices := b.schema.FieldIndices(key) if len(indices) == 0 { var extra interface{} if err := dec.Decode(&extra); err != nil { return err } continue } if err := b.fields[indices[0]].UnmarshalOne(dec); err != nil { return err } } for i := 0; i < b.schema.NumFields(); i++ { if !keylist[b.schema.Field(i).Name] { b.fields[i].AppendNull() } } return nil } var ( _ arrow.Record = (*simpleRecord)(nil) _ RecordReader = (*simpleRecords)(nil) ) arrow-go-18.2.0/arrow/array/record_test.go000066400000000000000000000453151476434502500204750ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "fmt" "reflect" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) func TestRecord(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) schema := arrow.NewSchema( []arrow.Field{ {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, {Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64}, }, nil, ) col1 := func() arrow.Array { ib := array.NewInt32Builder(mem) defer ib.Release() ib.AppendValues([]int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil) return ib.NewInt32Array() }() defer col1.Release() col2 := func() arrow.Array { b := array.NewFloat64Builder(mem) defer b.Release() b.AppendValues([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil) return b.NewFloat64Array() }() defer col2.Release() col2_1 := func() arrow.Array { b := array.NewFloat64Builder(mem) defer b.Release() b.AppendValues([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil) return b.NewFloat64Array() }() defer col2_1.Release() cols := []arrow.Array{col1, col2} rec := array.NewRecord(schema, cols, -1) defer rec.Release() rec.Retain() rec.Release() if got, want := rec.Schema(), schema; !got.Equal(want) { t.Fatalf("invalid schema: got=%#v, want=%#v", got, want) } if got, want := rec.NumRows(), int64(10); got != want { t.Fatalf("invalid number of rows: got=%d, want=%d", got, want) } if got, want := rec.NumCols(), int64(2); got != want { t.Fatalf("invalid number of columns: got=%d, want=%d", got, want) } if got, want := rec.Columns()[0], cols[0]; got != want { t.Fatalf("invalid column: got=%q, want=%q", got, want) } if got, want := rec.Column(0), cols[0]; got != want { t.Fatalf("invalid column: got=%q, want=%q", got, want) } if got, want := rec.ColumnName(0), schema.Field(0).Name; got != want { t.Fatalf("invalid column name: got=%q, want=%q", got, want) } if _, err := rec.SetColumn(0, col2_1); err == nil { t.Fatalf("expected an error") } newRec, err := rec.SetColumn(1, col2_1) if err != nil { t.Fatalf("unexpected error: %v", err) } defer newRec.Release() if !reflect.DeepEqual(newRec.Column(1), col2_1) { t.Fatalf("invalid column: got=%q, want=%q", rec.Column(1), col2_1) } for _, tc := range []struct { i, j int64 err error }{ {i: 0, j: 10, err: nil}, {i: 1, j: 10, err: nil}, {i: 1, j: 9, err: nil}, {i: 0, j: 0, err: nil}, {i: 1, j: 1, err: nil}, {i: 10, j: 10, err: nil}, {i: 1, j: 0, err: fmt.Errorf("arrow/array: index out of range")}, {i: 1, j: 11, err: fmt.Errorf("arrow/array: index out of range")}, } { t.Run(fmt.Sprintf("slice-%02d-%02d", tc.i, tc.j), func(t *testing.T) { if tc.err != nil { defer func() { e := recover() if e == nil { t.Fatalf("expected an error %q", tc.err) } switch err := e.(type) { case string: if err != tc.err.Error() { t.Fatalf("invalid panic message. got=%q, want=%q", err, tc.err) } case error: if err.Error() != tc.err.Error() { t.Fatalf("invalid panic message. got=%q, want=%q", err, tc.err) } default: t.Fatalf("invalid type for panic message: %T (err=%v)", err, err) } }() } sub := rec.NewSlice(tc.i, tc.j) defer sub.Release() if got, want := sub.NumRows(), tc.j-tc.i; got != want { t.Fatalf("invalid rec-slice number of rows: got=%d, want=%d", got, want) } }) } for _, tc := range []struct { schema *arrow.Schema cols []arrow.Array rows int64 err error }{ { schema: schema, cols: nil, rows: 0, }, { schema: schema, cols: cols[:1], rows: 0, err: fmt.Errorf("arrow/array: number of columns/fields mismatch"), }, { schema: arrow.NewSchema( []arrow.Field{ {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, }, nil, ), cols: cols, rows: 0, err: fmt.Errorf("arrow/array: number of columns/fields mismatch"), }, { schema: arrow.NewSchema( []arrow.Field{ {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, {Name: "f2-f64", Type: arrow.PrimitiveTypes.Int32}, }, nil, ), cols: cols, rows: 0, err: fmt.Errorf(`arrow/array: column "f2-f64" type mismatch: got=float64, want=int32`), }, { schema: schema, cols: cols, rows: 11, err: fmt.Errorf(`arrow/array: mismatch number of rows in column "f1-i32": got=10, want=11`), }, { schema: schema, cols: cols, rows: 10, err: nil, }, { schema: schema, cols: cols, rows: 3, err: nil, }, { schema: schema, cols: cols, rows: 0, err: nil, }, } { t.Run("", func(t *testing.T) { if tc.err != nil { defer func() { e := recover() if e == nil { t.Fatalf("expected an error %q", tc.err) } switch err := e.(type) { case string: if err != tc.err.Error() { t.Fatalf("invalid panic message. got=%q, want=%q", err, tc.err) } case error: if err.Error() != tc.err.Error() { t.Fatalf("invalid panic message. got=%q, want=%q", err, tc.err) } default: t.Fatalf("invalid type for panic message: %T (err=%v)", err, err) } }() } rec := array.NewRecord(tc.schema, tc.cols, tc.rows) defer rec.Release() if got, want := rec.NumRows(), tc.rows; got != want { t.Fatalf("invalid number of rows: got=%d, want=%d", got, want) } }) } } func TestRecordReader(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) schema := arrow.NewSchema( []arrow.Field{ {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, {Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64}, }, nil, ) rec1 := func() arrow.Record { col1 := func() arrow.Array { ib := array.NewInt32Builder(mem) defer ib.Release() ib.AppendValues([]int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil) return ib.NewInt32Array() }() defer col1.Release() col2 := func() arrow.Array { b := array.NewFloat64Builder(mem) defer b.Release() b.AppendValues([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil) return b.NewFloat64Array() }() defer col2.Release() cols := []arrow.Array{col1, col2} return array.NewRecord(schema, cols, -1) }() defer rec1.Release() rec2 := func() arrow.Record { col1 := func() arrow.Array { ib := array.NewInt32Builder(mem) defer ib.Release() ib.AppendValues([]int32{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, nil) return ib.NewInt32Array() }() defer col1.Release() col2 := func() arrow.Array { b := array.NewFloat64Builder(mem) defer b.Release() b.AppendValues([]float64{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, nil) return b.NewFloat64Array() }() defer col2.Release() cols := []arrow.Array{col1, col2} return array.NewRecord(schema, cols, -1) }() defer rec2.Release() recs := []arrow.Record{rec1, rec2} itr, err := array.NewRecordReader(schema, recs) if err != nil { t.Fatal(err) } defer itr.Release() itr.Retain() itr.Release() if got, want := itr.Schema(), schema; !got.Equal(want) { t.Fatalf("invalid schema. got=%#v, want=%#v", got, want) } n := 0 for itr.Next() { n++ if got, want := itr.Record(), recs[n-1]; !reflect.DeepEqual(got, want) { t.Fatalf("itr[%d], invalid record. got=%#v, want=%#v", n-1, got, want) } } if err := itr.Err(); err != nil { t.Fatalf("itr error: %#v", err) } if n != len(recs) { t.Fatalf("invalid number of iterations. got=%d, want=%d", n, len(recs)) } for _, tc := range []struct { name string schema *arrow.Schema err error }{ { name: "mismatch-name", schema: arrow.NewSchema( []arrow.Field{ {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, {Name: "f2-XXX", Type: arrow.PrimitiveTypes.Float64}, }, nil, ), err: fmt.Errorf("arrow/array: mismatch schema"), }, { name: "mismatch-type", schema: arrow.NewSchema( []arrow.Field{ {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, {Name: "f2-f64", Type: arrow.PrimitiveTypes.Int64}, }, nil, ), err: fmt.Errorf("arrow/array: mismatch schema"), }, } { t.Run(tc.name, func(t *testing.T) { itr, err := array.NewRecordReader(tc.schema, recs) if itr != nil { itr.Release() } if err == nil { t.Fatalf("expected an error: %v", tc.err) } if !assert.Equal(t, tc.err, err) { t.Fatalf("invalid error: got=%v, want=%v", err, tc.err) } }) } } func TestRecordBuilderRespectsFixedSizeArrayNullability(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) cases := []struct { assertion string fields []arrow.Field }{ { "nullable", []arrow.Field{{Name: "data", Type: arrow.FixedSizeListOf(1, arrow.PrimitiveTypes.Int32)}}, }, { "not nullable", []arrow.Field{{Name: "data", Type: arrow.FixedSizeListOfNonNullable(1, arrow.PrimitiveTypes.Int32)}}, }, } for _, c := range cases { t.Run(c.assertion, func(t *testing.T) { schema := arrow.NewSchema(c.fields, nil) b := array.NewRecordBuilder(mem, schema) defer b.Release() lb := b.Field(0).(*array.FixedSizeListBuilder) lb.Append(true) vb := lb.ValueBuilder().(*array.Int32Builder) vb.Append(10) rec := b.NewRecord() defer rec.Release() if got, want := rec.Column(0).String(), "[[10]]"; got != want { t.Fatalf("invalid record: got=%q, want=%q", got, want) } }) } } func TestRecordBuilder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) mapDt := arrow.MapOf(arrow.BinaryTypes.String, arrow.BinaryTypes.String) mapDt.KeysSorted = true mapDt.SetItemNullable(false) schema := arrow.NewSchema( []arrow.Field{ {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, {Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64}, {Name: "map", Type: mapDt}, }, nil, ) b := array.NewRecordBuilder(mem, schema) defer b.Release() b.Retain() b.Release() b.Field(0).(*array.Int32Builder).AppendValues([]int32{1, 2, 3}, nil) b.Field(0).(*array.Int32Builder).AppendValues([]int32{4, 5}, nil) b.Field(1).(*array.Float64Builder).AppendValues([]float64{1, 2, 3, 4, 5}, nil) mb := b.Field(2).(*array.MapBuilder) for i := 0; i < 5; i++ { mb.Append(true) if i%3 == 0 { mb.KeyBuilder().(*array.StringBuilder).AppendValues([]string{fmt.Sprint(i), "2", "3"}, nil) mb.ItemBuilder().(*array.StringBuilder).AppendValues([]string{"a", "b", "c"}, nil) } } rec := b.NewRecord() defer rec.Release() if got, want := rec.Schema(), schema; !got.Equal(want) { t.Fatalf("invalid schema: got=%#v, want=%#v", got, want) } if got, want := rec.NumRows(), int64(5); got != want { t.Fatalf("invalid number of rows: got=%d, want=%d", got, want) } if got, want := rec.NumCols(), int64(3); got != want { t.Fatalf("invalid number of columns: got=%d, want=%d", got, want) } if got, want := rec.ColumnName(0), schema.Field(0).Name; got != want { t.Fatalf("invalid column name: got=%q, want=%q", got, want) } if got, want := rec.Column(2).String(), `[{["0" "2" "3"] ["a" "b" "c"]} {[] []} {[] []} {["3" "2" "3"] ["a" "b" "c"]} {[] []}]`; got != want { t.Fatalf("invalid column name: got=%q, want=%q", got, want) } } type testMessage struct { Foo *testMessageFoo Bars []*testMessageBar } func (m *testMessage) Reset() { *m = testMessage{} } func (m *testMessage) GetFoo() *testMessageFoo { if m != nil { return m.Foo } return nil } func (m *testMessage) GetBars() []*testMessageBar { if m != nil { return m.Bars } return nil } type testMessageFoo struct { A int32 B []uint32 } func (m *testMessageFoo) Reset() { *m = testMessageFoo{} } func (m *testMessageFoo) GetA() int32 { if m != nil { return m.A } return 0 } func (m *testMessageFoo) GetB() []uint32 { if m != nil { return m.B } return nil } type testMessageBar struct { C int64 D []uint64 } func (m *testMessageBar) Reset() { *m = testMessageBar{} } func (m *testMessageBar) GetC() int64 { if m != nil { return m.C } return 0 } func (m *testMessageBar) GetD() []uint64 { if m != nil { return m.D } return nil } var testMessageSchema = arrow.NewSchema( []arrow.Field{ {Name: "foo", Type: arrow.StructOf( arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int32}, arrow.Field{Name: "b", Type: arrow.ListOf( arrow.PrimitiveTypes.Uint32, )}, )}, {Name: "bars", Type: arrow.ListOf( arrow.StructOf( arrow.Field{Name: "c", Type: arrow.PrimitiveTypes.Int64}, arrow.Field{Name: "d", Type: arrow.ListOf( arrow.PrimitiveTypes.Uint64, )}, ), )}, }, nil, ) func (m *testMessage) Fill(rec arrow.Record, row int) error { m.Reset() // foo if 0 < rec.NumCols() { src0 := rec.Column(0).Data() typedSrc0 := array.NewStructData(src0) defer typedSrc0.Release() if typedSrc0.IsValid(row) { m0 := &testMessageFoo{} { // a if 0 < typedSrc0.NumField() { src0_0 := typedSrc0.Field(0).Data() typedSrc0_0 := array.NewInt32Data(src0_0) defer typedSrc0_0.Release() m0.A = typedSrc0_0.Value(row) } // b if 1 < typedSrc0.NumField() { src0_1 := typedSrc0.Field(1).Data() listSrc0_1 := array.NewListData(src0_1) defer listSrc0_1.Release() if listSrc0_1.IsValid(row) { typedSrc0_1 := array.NewUint32Data(listSrc0_1.ListValues().Data()) typedSrc0_1.Release() start0_1 := int(listSrc0_1.Offsets()[row]) end0_1 := int(listSrc0_1.Offsets()[row+1]) for row := start0_1; row < end0_1; row++ { m0.B = append(m0.B, typedSrc0_1.Value(row)) } } } } m.Foo = m0 } } // bars if 1 < rec.NumCols() { src1 := rec.Column(1).Data() listSrc1 := array.NewListData(src1) defer listSrc1.Release() if listSrc1.IsValid(row) { typedSrc1 := array.NewStructData(listSrc1.ListValues().Data()) defer typedSrc1.Release() start1 := int(listSrc1.Offsets()[row]) end1 := int(listSrc1.Offsets()[row+1]) for row := start1; row < end1; row++ { if typedSrc1.IsValid(row) { m1 := &testMessageBar{} { // c if 0 < typedSrc1.NumField() { src1_0 := typedSrc1.Field(0).Data() typedSrc1_0 := array.NewInt64Data(src1_0) defer typedSrc1_0.Release() m1.C = typedSrc1_0.Value(row) } // d if 1 < typedSrc1.NumField() { src1_1 := typedSrc1.Field(1).Data() listSrc1_1 := array.NewListData(src1_1) defer listSrc1_1.Release() if listSrc1_1.IsValid(row) { typedSrc1_1 := array.NewUint64Data(listSrc1_1.ListValues().Data()) defer typedSrc1_1.Release() start1_1 := int(listSrc1_1.Offsets()[row]) end1_1 := int(listSrc1_1.Offsets()[row+1]) for row := start1_1; row < end1_1; row++ { m1.D = append(m1.D, typedSrc1_1.Value(row)) } } } } m.Bars = append(m.Bars, m1) } else { m.Bars = append(m.Bars, nil) } } } } return nil } func newTestMessageArrowRecordBuilder(mem memory.Allocator) *testMessageArrowRecordBuilder { return &testMessageArrowRecordBuilder{ rb: array.NewRecordBuilder(mem, testMessageSchema), } } type testMessageArrowRecordBuilder struct { rb *array.RecordBuilder } func (b *testMessageArrowRecordBuilder) Build() arrow.Record { return b.rb.NewRecord() } func (b *testMessageArrowRecordBuilder) Release() { b.rb.Release() } func (b *testMessageArrowRecordBuilder) Append(m *testMessage) { // foo { builder0 := b.rb.Field(0) v0 := m.GetFoo() valueBuilder0 := builder0.(*array.StructBuilder) if v0 == nil { valueBuilder0.AppendNull() } else { valueBuilder0.Append(true) // a { v0_0 := v0.GetA() builder0_0 := valueBuilder0.FieldBuilder(0) valueBuilder0_0 := builder0_0.(*array.Int32Builder) valueBuilder0_0.Append(v0_0) } // b { v0_1 := v0.GetB() builder0_1 := valueBuilder0.FieldBuilder(1) listBuilder0_1 := builder0_1.(*array.ListBuilder) if len(v0_1) == 0 { listBuilder0_1.AppendNull() } else { listBuilder0_1.Append(true) valueBuilder0_1 := listBuilder0_1.ValueBuilder().(*array.Uint32Builder) for _, item := range v0_1 { valueBuilder0_1.Append(item) } } } } } // bars { builder1 := b.rb.Field(1) v1 := m.GetBars() listBuilder1 := builder1.(*array.ListBuilder) if len(v1) == 0 { listBuilder1.AppendNull() } else { listBuilder1.Append(true) valueBuilder1 := listBuilder1.ValueBuilder().(*array.StructBuilder) for _, item := range v1 { if item == nil { valueBuilder1.AppendNull() } else { valueBuilder1.Append(true) // c { v1_0 := item.GetC() builder1_0 := valueBuilder1.FieldBuilder(0) valueBuilder1_0 := builder1_0.(*array.Int64Builder) valueBuilder1_0.Append(v1_0) } // d { v1_1 := item.GetD() builder1_1 := valueBuilder1.FieldBuilder(1) listBuilder1_1 := builder1_1.(*array.ListBuilder) if len(v1_1) == 0 { listBuilder1_1.AppendNull() } else { listBuilder1_1.Append(true) valueBuilder1_1 := listBuilder1_1.ValueBuilder().(*array.Uint64Builder) for _, item := range v1_1 { valueBuilder1_1.Append(item) } } } } } } } } func TestRecordBuilderMessages(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) b := newTestMessageArrowRecordBuilder(mem) defer b.Release() var msgs []*testMessage for i := 0; i < 1000; i++ { msg := &testMessage{ Foo: &testMessageFoo{ A: int32(i), B: []uint32{2, 3, 4, 5, 6, 7, 8, 9}, }, Bars: []*testMessageBar{ { C: 11, D: []uint64{12, 13, 14}, }, { C: 15, D: []uint64{16, 17, 18, 19}, }, nil, { C: 20, D: []uint64{21}, }, }, } msgs = append(msgs, msg) b.Append(msg) } rec := b.Build() defer rec.Release() var got testMessage for i := 0; i < 1000; i++ { got.Fill(rec, i) if !reflect.DeepEqual(&got, msgs[i]) { t.Fatalf("row[%d], invalid record. got=%#v, want=%#v", i, &got, msgs[i]) } } } arrow-go-18.2.0/arrow/array/string.go000066400000000000000000000414561476434502500174700ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "bytes" "fmt" "reflect" "strings" "unsafe" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" ) type StringLike interface { arrow.Array Value(int) string ValueLen(int) int } // String represents an immutable sequence of variable-length UTF-8 strings. type String struct { array offsets []int32 values string } // NewStringData constructs a new String array from data. func NewStringData(data arrow.ArrayData) *String { a := &String{} a.refCount = 1 a.setData(data.(*Data)) return a } // Reset resets the String with a different set of Data. func (a *String) Reset(data arrow.ArrayData) { a.setData(data.(*Data)) } // Value returns the slice at index i. This value should not be mutated. func (a *String) Value(i int) string { i = i + a.array.data.offset return a.values[a.offsets[i]:a.offsets[i+1]] } func (a *String) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } return a.Value(i) } // ValueOffset returns the offset of the value at index i. func (a *String) ValueOffset(i int) int { if i < 0 || i >= a.array.data.length { panic("arrow/array: index out of range") } return int(a.offsets[i+a.array.data.offset]) } func (a *String) ValueOffset64(i int) int64 { return int64(a.ValueOffset(i)) } func (a *String) ValueLen(i int) int { if i < 0 || i >= a.array.data.length { panic("arrow/array: index out of range") } beg := a.array.data.offset + i return int(a.offsets[beg+1] - a.offsets[beg]) } func (a *String) ValueOffsets() []int32 { beg := a.array.data.offset end := beg + a.array.data.length + 1 return a.offsets[beg:end] } func (a *String) ValueBytes() []byte { beg := a.array.data.offset end := beg + a.array.data.length if a.array.data.buffers[2] != nil { return a.array.data.buffers[2].Bytes()[a.offsets[beg]:a.offsets[end]] } return nil } func (a *String) String() string { o := new(strings.Builder) o.WriteString("[") for i := 0; i < a.Len(); i++ { if i > 0 { o.WriteString(" ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(o, "%q", a.Value(i)) } } o.WriteString("]") return o.String() } func (a *String) setData(data *Data) { if len(data.buffers) != 3 { panic("arrow/array: len(data.buffers) != 3") } a.array.setData(data) if vdata := data.buffers[2]; vdata != nil { b := vdata.Bytes() a.values = *(*string)(unsafe.Pointer(&b)) } if offsets := data.buffers[1]; offsets != nil { a.offsets = arrow.Int32Traits.CastFromBytes(offsets.Bytes()) } if a.array.data.length < 1 { return } expNumOffsets := a.array.data.offset + a.array.data.length + 1 if len(a.offsets) < expNumOffsets { panic(fmt.Errorf("arrow/array: string offset buffer must have at least %d values", expNumOffsets)) } if int(a.offsets[expNumOffsets-1]) > len(a.values) { panic("arrow/array: string offsets out of bounds of data buffer") } } func (a *String) GetOneForMarshal(i int) interface{} { if a.IsValid(i) { return a.Value(i) } return nil } func (a *String) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i := 0; i < a.Len(); i++ { if a.IsValid(i) { vals[i] = a.Value(i) } else { vals[i] = nil } } return json.Marshal(vals) } func arrayEqualString(left, right *String) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if left.Value(i) != right.Value(i) { return false } } return true } // String represents an immutable sequence of variable-length UTF-8 strings. type LargeString struct { array offsets []int64 values string } // NewStringData constructs a new String array from data. func NewLargeStringData(data arrow.ArrayData) *LargeString { a := &LargeString{} a.refCount = 1 a.setData(data.(*Data)) return a } // Reset resets the String with a different set of Data. func (a *LargeString) Reset(data arrow.ArrayData) { a.setData(data.(*Data)) } // Value returns the slice at index i. This value should not be mutated. func (a *LargeString) Value(i int) string { i = i + a.array.data.offset return a.values[a.offsets[i]:a.offsets[i+1]] } func (a *LargeString) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } return a.Value(i) } // ValueOffset returns the offset of the value at index i. func (a *LargeString) ValueOffset(i int) int64 { if i < 0 || i > a.array.data.length { panic("arrow/array: index out of range") } return a.offsets[i+a.array.data.offset] } func (a *LargeString) ValueOffset64(i int) int64 { return a.ValueOffset(i) } func (a *LargeString) ValueLen(i int) int { if i < 0 || i >= a.array.data.length { panic("arrow/array: index out of range") } beg := a.array.data.offset + i return int(a.offsets[beg+1] - a.offsets[beg]) } func (a *LargeString) ValueOffsets() []int64 { beg := a.array.data.offset end := beg + a.array.data.length + 1 return a.offsets[beg:end] } func (a *LargeString) ValueBytes() []byte { beg := a.array.data.offset end := beg + a.array.data.length if a.array.data.buffers[2] != nil { return a.array.data.buffers[2].Bytes()[a.offsets[beg]:a.offsets[end]] } return nil } func (a *LargeString) String() string { o := new(strings.Builder) o.WriteString("[") for i := 0; i < a.Len(); i++ { if i > 0 { o.WriteString(" ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(o, "%q", a.Value(i)) } } o.WriteString("]") return o.String() } func (a *LargeString) setData(data *Data) { if len(data.buffers) != 3 { panic("arrow/array: len(data.buffers) != 3") } a.array.setData(data) if vdata := data.buffers[2]; vdata != nil { b := vdata.Bytes() a.values = *(*string)(unsafe.Pointer(&b)) } if offsets := data.buffers[1]; offsets != nil { a.offsets = arrow.Int64Traits.CastFromBytes(offsets.Bytes()) } if a.array.data.length < 1 { return } expNumOffsets := a.array.data.offset + a.array.data.length + 1 if len(a.offsets) < expNumOffsets { panic(fmt.Errorf("arrow/array: string offset buffer must have at least %d values", expNumOffsets)) } if int(a.offsets[expNumOffsets-1]) > len(a.values) { panic("arrow/array: string offsets out of bounds of data buffer") } } func (a *LargeString) GetOneForMarshal(i int) interface{} { if a.IsValid(i) { return a.Value(i) } return nil } func (a *LargeString) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i := 0; i < a.Len(); i++ { vals[i] = a.GetOneForMarshal(i) } return json.Marshal(vals) } func arrayEqualLargeString(left, right *LargeString) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if left.Value(i) != right.Value(i) { return false } } return true } type StringView struct { array values []arrow.ViewHeader dataBuffers []*memory.Buffer } func NewStringViewData(data arrow.ArrayData) *StringView { a := &StringView{} a.refCount = 1 a.setData(data.(*Data)) return a } // Reset resets the String with a different set of Data. func (a *StringView) Reset(data arrow.ArrayData) { a.setData(data.(*Data)) } func (a *StringView) setData(data *Data) { if len(data.buffers) < 2 { panic("len(data.buffers) < 2") } a.array.setData(data) if valueData := data.buffers[1]; valueData != nil { a.values = arrow.ViewHeaderTraits.CastFromBytes(valueData.Bytes()) } a.dataBuffers = data.buffers[2:] } func (a *StringView) ValueHeader(i int) *arrow.ViewHeader { if i < 0 || i >= a.array.data.length { panic("arrow/array: index out of range") } return &a.values[a.array.data.offset+i] } func (a *StringView) Value(i int) string { s := a.ValueHeader(i) if s.IsInline() { return s.InlineString() } start := s.BufferOffset() buf := a.dataBuffers[s.BufferIndex()] value := buf.Bytes()[start : start+int32(s.Len())] return *(*string)(unsafe.Pointer(&value)) } func (a *StringView) ValueLen(i int) int { s := a.ValueHeader(i) return s.Len() } func (a *StringView) String() string { var o strings.Builder o.WriteString("[") for i := 0; i < a.Len(); i++ { if i > 0 { o.WriteString(" ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(&o, "%q", a.Value(i)) } } o.WriteString("]") return o.String() } func (a *StringView) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } return a.Value(i) } func (a *StringView) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } return a.Value(i) } func (a *StringView) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i := 0; i < a.Len(); i++ { vals[i] = a.GetOneForMarshal(i) } return json.Marshal(vals) } func arrayEqualStringView(left, right *StringView) bool { leftBufs, rightBufs := left.dataBuffers, right.dataBuffers for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if !left.ValueHeader(i).Equals(leftBufs, right.ValueHeader(i), rightBufs) { return false } } return true } // A StringBuilder is used to build a String array using the Append methods. type StringBuilder struct { *BinaryBuilder } // NewStringBuilder creates a new StringBuilder. func NewStringBuilder(mem memory.Allocator) *StringBuilder { b := &StringBuilder{ BinaryBuilder: NewBinaryBuilder(mem, arrow.BinaryTypes.String), } return b } func (b *StringBuilder) Type() arrow.DataType { return arrow.BinaryTypes.String } // Append appends a string to the builder. func (b *StringBuilder) Append(v string) { b.BinaryBuilder.Append([]byte(v)) } // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *StringBuilder) AppendValues(v []string, valid []bool) { b.BinaryBuilder.AppendStringValues(v, valid) } // Value returns the string at index i. func (b *StringBuilder) Value(i int) string { return string(b.BinaryBuilder.Value(i)) } // NewArray creates a String array from the memory buffers used by the builder and resets the StringBuilder // so it can be used to build a new array. func (b *StringBuilder) NewArray() arrow.Array { return b.NewStringArray() } // NewStringArray creates a String array from the memory buffers used by the builder and resets the StringBuilder // so it can be used to build a new array. func (b *StringBuilder) NewStringArray() (a *String) { data := b.newData() a = NewStringData(data) data.Release() return } func (b *StringBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch v := t.(type) { case nil: b.AppendNull() case string: b.Append(v) default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(v), Type: reflect.TypeOf(string("")), Offset: dec.InputOffset(), } } return nil } func (b *StringBuilder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *StringBuilder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("string builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } // A LargeStringBuilder is used to build a LargeString array using the Append methods. // LargeString is for when you need the offset buffer to be 64-bit integers // instead of 32-bit integers. type LargeStringBuilder struct { *BinaryBuilder } // NewStringBuilder creates a new StringBuilder. func NewLargeStringBuilder(mem memory.Allocator) *LargeStringBuilder { b := &LargeStringBuilder{ BinaryBuilder: NewBinaryBuilder(mem, arrow.BinaryTypes.LargeString), } return b } func (b *LargeStringBuilder) Type() arrow.DataType { return arrow.BinaryTypes.LargeString } // Append appends a string to the builder. func (b *LargeStringBuilder) Append(v string) { b.BinaryBuilder.Append([]byte(v)) } // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *LargeStringBuilder) AppendValues(v []string, valid []bool) { b.BinaryBuilder.AppendStringValues(v, valid) } // Value returns the string at index i. func (b *LargeStringBuilder) Value(i int) string { return string(b.BinaryBuilder.Value(i)) } // NewArray creates a String array from the memory buffers used by the builder and resets the StringBuilder // so it can be used to build a new array. func (b *LargeStringBuilder) NewArray() arrow.Array { return b.NewLargeStringArray() } // NewStringArray creates a String array from the memory buffers used by the builder and resets the StringBuilder // so it can be used to build a new array. func (b *LargeStringBuilder) NewLargeStringArray() (a *LargeString) { data := b.newData() a = NewLargeStringData(data) data.Release() return } func (b *LargeStringBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch v := t.(type) { case nil: b.AppendNull() case string: b.Append(v) default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(v), Type: reflect.TypeOf(string("")), Offset: dec.InputOffset(), } } return nil } func (b *LargeStringBuilder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *LargeStringBuilder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("string builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } type StringViewBuilder struct { *BinaryViewBuilder } func NewStringViewBuilder(mem memory.Allocator) *StringViewBuilder { bldr := &StringViewBuilder{ BinaryViewBuilder: NewBinaryViewBuilder(mem), } bldr.dtype = arrow.BinaryTypes.StringView return bldr } func (b *StringViewBuilder) Append(v string) { b.BinaryViewBuilder.AppendString(v) } func (b *StringViewBuilder) AppendValues(v []string, valid []bool) { b.BinaryViewBuilder.AppendStringValues(v, valid) } func (b *StringViewBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch v := t.(type) { case string: b.Append(v) case []byte: b.BinaryViewBuilder.Append(v) case nil: b.AppendNull() default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Type: reflect.TypeOf([]byte{}), Offset: dec.InputOffset(), } } return nil } func (b *StringViewBuilder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *StringViewBuilder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("binary view builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } func (b *StringViewBuilder) NewArray() arrow.Array { return b.NewStringViewArray() } func (b *StringViewBuilder) NewStringViewArray() (a *StringView) { data := b.newData() a = NewStringViewData(data) data.Release() return } type StringLikeBuilder interface { Builder Append(string) AppendValues([]string, []bool) UnsafeAppend([]byte) ReserveData(int) } var ( _ arrow.Array = (*String)(nil) _ arrow.Array = (*LargeString)(nil) _ arrow.Array = (*StringView)(nil) _ Builder = (*StringBuilder)(nil) _ Builder = (*LargeStringBuilder)(nil) _ Builder = (*StringViewBuilder)(nil) _ StringLikeBuilder = (*StringBuilder)(nil) _ StringLikeBuilder = (*LargeStringBuilder)(nil) _ StringLikeBuilder = (*StringViewBuilder)(nil) _ StringLike = (*String)(nil) _ StringLike = (*LargeString)(nil) _ StringLike = (*StringView)(nil) ) arrow-go-18.2.0/arrow/array/string_test.go000066400000000000000000000540461476434502500205260ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "bytes" "reflect" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) func TestStringArray(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) var ( want = []string{"hello", "世界", "", "bye"} valids = []bool{true, true, false, true} offsets = []int32{0, 5, 11, 11, 14} ) sb := array.NewStringBuilder(mem) defer sb.Release() sb.Retain() sb.Release() assert.NoError(t, sb.AppendValueFromString(want[0])) sb.AppendValues(want[1:2], nil) sb.AppendNull() sb.Append(want[3]) if got, want := sb.Len(), len(want); got != want { t.Fatalf("invalid len: got=%d, want=%d", got, want) } if got, want := sb.NullN(), 1; got != want { t.Fatalf("invalid nulls: got=%d, want=%d", got, want) } arr := sb.NewStringArray() defer arr.Release() arr.Retain() arr.Release() assert.Equal(t, "hello", arr.ValueStr(0)) if got, want := arr.Len(), len(want); got != want { t.Fatalf("invalid len: got=%d, want=%d", got, want) } if got, want := arr.NullN(), 1; got != want { t.Fatalf("invalid nulls: got=%d, want=%d", got, want) } for i := range want { if arr.IsNull(i) != !valids[i] { t.Fatalf("arr[%d]-validity: got=%v want=%v", i, !arr.IsNull(i), valids[i]) } switch { case arr.IsNull(i): default: got := arr.Value(i) if got != want[i] { t.Fatalf("arr[%d]: got=%q, want=%q", i, got, want[i]) } } if got, want := arr.ValueOffset(i), int(offsets[i]); got != want { t.Fatalf("arr-offset-beg[%d]: got=%d, want=%d", i, got, want) } } if !reflect.DeepEqual(offsets, arr.ValueOffsets()) { t.Fatalf("ValueOffsets got=%v, want=%v", arr.ValueOffsets(), offsets) } sub := array.MakeFromData(arr.Data()) defer sub.Release() if sub.DataType().ID() != arrow.STRING { t.Fatalf("invalid type: got=%q, want=string", sub.DataType().Name()) } if _, ok := sub.(*array.String); !ok { t.Fatalf("could not type-assert to array.String") } if got, want := arr.String(), `["hello" "世界" (null) "bye"]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } if !bytes.Equal([]byte(`hello世界bye`), arr.ValueBytes()) { t.Fatalf("got=%q, want=%q", string(arr.ValueBytes()), `hello世界bye`) } slice := array.NewSliceData(arr.Data(), 2, 4) defer slice.Release() sub1 := array.MakeFromData(slice) defer sub1.Release() v, ok := sub1.(*array.String) if !ok { t.Fatalf("could not type-assert to array.String") } if got, want := v.String(), `[(null) "bye"]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } if !bytes.Equal(v.ValueBytes(), []byte("bye")) { t.Fatalf("got=%q, want=%q", string(v.ValueBytes()), "bye") } for i := 0; i < v.Len(); i++ { if got, want := v.ValueOffset(0), int(offsets[i+slice.Offset()]); got != want { t.Fatalf("val-offset-with-offset[%d]: got=%q, want=%q", i, got, want) } } if !reflect.DeepEqual(offsets[2:5], v.ValueOffsets()) { t.Fatalf("ValueOffsets got=%v, want=%v", v.ValueOffsets(), offsets[2:5]) } } func TestStringBuilder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) want := []string{"hello", "世界", "", "bye"} ab := array.NewStringBuilder(mem) defer ab.Release() stringValues := func(a *array.String) []string { vs := make([]string, a.Len()) for i := range vs { vs[i] = a.Value(i) } return vs } ab.AppendValues([]string{}, nil) a := ab.NewStringArray() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(nil, nil) a = ab.NewStringArray() assert.Zero(t, a.Len()) a.Release() ab.AppendValues([]string{}, nil) ab.AppendValues(want, nil) a = ab.NewStringArray() assert.Equal(t, want, stringValues(a)) a.Release() ab.AppendValues(want, nil) ab.AppendValues([]string{}, nil) a = ab.NewStringArray() assert.Equal(t, want, stringValues(a)) a.Release() } // TestStringReset tests the Reset() method on the String type by creating two different Strings and then // resetting the contents of string2 with the values from string1. func TestStringReset(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) sb1 := array.NewStringBuilder(mem) sb2 := array.NewStringBuilder(mem) defer sb1.Release() defer sb2.Release() sb1.Append("string1") sb1.AppendNull() var ( string1 = sb1.NewStringArray() string2 = sb2.NewStringArray() string1Data = string1.Data() ) string2.Reset(string1Data) assert.Equal(t, "string1", string2.Value(0)) } func TestStringInvalidOffsets(t *testing.T) { const expectedPanic = "arrow/array: string offsets out of bounds of data buffer" makeBuffers := func(valids []bool, offsets []int32, data string) []*memory.Buffer { offsetBuf := memory.NewBufferBytes(arrow.Int32Traits.CastToBytes(offsets)) var nullBufBytes []byte var nullBuf *memory.Buffer if valids != nil { nullBufBytes = make([]byte, bitutil.BytesForBits(int64(len(valids)))) for i, v := range valids { bitutil.SetBitTo(nullBufBytes, i, v) } nullBuf = memory.NewBufferBytes(nullBufBytes) } return []*memory.Buffer{nullBuf, offsetBuf, memory.NewBufferBytes([]byte(data))} } assert.NotPanics(t, func() { buffers := makeBuffers(nil, []int32{}, "") array.NewStringData(array.NewData(arrow.BinaryTypes.String, 0, buffers, nil, 0, 0)) }, "empty array with no offsets") assert.NotPanics(t, func() { buffers := makeBuffers(nil, []int32{0, 5}, "") array.NewStringData(array.NewData(arrow.BinaryTypes.String, 0, buffers, nil, 0, 0)) }, "empty array, offsets ignored") assert.NotPanics(t, func() { buffers := makeBuffers(nil, []int32{0, 3, 4, 9}, "oooabcdef") array.NewStringData(array.NewData(arrow.BinaryTypes.String, 1, buffers, nil, 0, 2)) }, "data has offset and value offsets are valid") assert.NotPanics(t, func() { buffers := makeBuffers(nil, []int32{0, 3, 6, 9, 9}, "012345678") arr := array.NewStringData(array.NewData(arrow.BinaryTypes.String, 4, buffers, nil, 0, 0)) if assert.Equal(t, 4, arr.Len()) && assert.Zero(t, arr.NullN()) { assert.Equal(t, "012", arr.Value(0)) assert.Equal(t, "345", arr.Value(1)) assert.Equal(t, "678", arr.Value(2)) assert.Equal(t, "", arr.Value(3), "trailing empty string value will have offset past end") } }, "simple valid case") assert.NotPanics(t, func() { buffers := makeBuffers([]bool{true, false, true, false}, []int32{0, 3, 4, 9, 9}, "oooabcdef") arr := array.NewStringData(array.NewData(arrow.BinaryTypes.String, 4, buffers, nil, 2, 0)) if assert.Equal(t, 4, arr.Len()) && assert.Equal(t, 2, arr.NullN()) { assert.Equal(t, "ooo", arr.Value(0)) assert.True(t, arr.IsNull(1)) assert.Equal(t, "bcdef", arr.Value(2)) assert.True(t, arr.IsNull(3)) } }, "simple valid case with nulls") assert.PanicsWithValue(t, expectedPanic, func() { buffers := makeBuffers(nil, []int32{0, 5}, "abc") array.NewStringData(array.NewData(arrow.BinaryTypes.String, 1, buffers, nil, 0, 0)) }, "last offset is overflowing") assert.PanicsWithError(t, "arrow/array: string offset buffer must have at least 2 values", func() { buffers := makeBuffers(nil, []int32{0}, "abc") array.NewStringData(array.NewData(arrow.BinaryTypes.String, 1, buffers, nil, 0, 0)) }, "last offset is missing") assert.PanicsWithValue(t, expectedPanic, func() { buffers := makeBuffers(nil, []int32{0, 3, 10, 15}, "oooabcdef") array.NewStringData(array.NewData(arrow.BinaryTypes.String, 1, buffers, nil, 0, 2)) }, "data has offset and value offset is overflowing") } func TestStringStringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) var ( values = []string{"hello", "世界", "", "bye"} valid = []bool{true, true, false, true} ) b := array.NewStringBuilder(mem) defer b.Release() b.AppendValues(values, valid) arr := b.NewArray().(*array.String) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewStringBuilder(mem) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.String) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } func TestLargeStringArray(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) var ( want = []string{"hello", "世界", "", "bye"} valids = []bool{true, true, false, true} offsets = []int64{0, 5, 11, 11, 14} ) sb := array.NewLargeStringBuilder(mem) defer sb.Release() sb.Retain() sb.Release() sb.AppendValues(want[:2], nil) sb.AppendNull() sb.Append(want[3]) if got, want := sb.Len(), len(want); got != want { t.Fatalf("invalid len: got=%d, want=%d", got, want) } if got, want := sb.NullN(), 1; got != want { t.Fatalf("invalid nulls: got=%d, want=%d", got, want) } arr := sb.NewLargeStringArray() defer arr.Release() arr.Retain() arr.Release() if got, want := arr.Len(), len(want); got != want { t.Fatalf("invalid len: got=%d, want=%d", got, want) } if got, want := arr.NullN(), 1; got != want { t.Fatalf("invalid nulls: got=%d, want=%d", got, want) } for i := range want { if arr.IsNull(i) != !valids[i] { t.Fatalf("arr[%d]-validity: got=%v want=%v", i, !arr.IsNull(i), valids[i]) } switch { case arr.IsNull(i): default: got := arr.Value(i) if got != want[i] { t.Fatalf("arr[%d]: got=%q, want=%q", i, got, want[i]) } } if got, want := arr.ValueOffset(i), offsets[i]; got != want { t.Fatalf("arr-offset-beg[%d]: got=%d, want=%d", i, got, want) } } if !reflect.DeepEqual(offsets, arr.ValueOffsets()) { t.Fatalf("ValueOffsets got=%v, want=%v", arr.ValueOffsets(), offsets) } sub := array.MakeFromData(arr.Data()) defer sub.Release() if sub.DataType().ID() != arrow.LARGE_STRING { t.Fatalf("invalid type: got=%q, want=large_string", sub.DataType().Name()) } if _, ok := sub.(*array.LargeString); !ok { t.Fatalf("could not type-assert to array.LargeString") } if got, want := arr.String(), `["hello" "世界" (null) "bye"]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } if !bytes.Equal([]byte(`hello世界bye`), arr.ValueBytes()) { t.Fatalf("got=%q, want=%q", string(arr.ValueBytes()), `hello世界bye`) } slice := array.NewSliceData(arr.Data(), 2, 4) defer slice.Release() sub1 := array.MakeFromData(slice) defer sub1.Release() v, ok := sub1.(*array.LargeString) if !ok { t.Fatalf("could not type-assert to array.LargeString") } if got, want := v.String(), `[(null) "bye"]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } if !bytes.Equal(v.ValueBytes(), []byte("bye")) { t.Fatalf("got=%q, want=%q", string(v.ValueBytes()), "bye") } for i := 0; i < v.Len(); i++ { if got, want := v.ValueOffset(0), offsets[i+slice.Offset()]; got != want { t.Fatalf("val-offset-with-offset[%d]: got=%q, want=%q", i, got, want) } } if !reflect.DeepEqual(offsets[2:5], v.ValueOffsets()) { t.Fatalf("ValueOffsets got=%v, want=%v", v.ValueOffsets(), offsets[2:5]) } } func TestLargeStringBuilder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) want := []string{"hello", "世界", "", "bye"} ab := array.NewLargeStringBuilder(mem) defer ab.Release() stringValues := func(a *array.LargeString) []string { vs := make([]string, a.Len()) for i := range vs { vs[i] = a.Value(i) } return vs } ab.AppendValues([]string{}, nil) a := ab.NewLargeStringArray() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(nil, nil) a = ab.NewLargeStringArray() assert.Zero(t, a.Len()) a.Release() ab.AppendValues([]string{}, nil) ab.AppendValues(want, nil) a = ab.NewLargeStringArray() assert.Equal(t, want, stringValues(a)) a.Release() ab.AppendValues(want, nil) ab.AppendValues([]string{}, nil) a = ab.NewLargeStringArray() assert.Equal(t, want, stringValues(a)) a.Release() } // TestStringReset tests the Reset() method on the String type by creating two different Strings and then // resetting the contents of string2 with the values from string1. func TestLargeStringReset(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) sb1 := array.NewLargeStringBuilder(mem) sb2 := array.NewLargeStringBuilder(mem) defer sb1.Release() defer sb2.Release() sb1.Append("string1") sb1.AppendNull() var ( string1 = sb1.NewLargeStringArray() string2 = sb2.NewLargeStringArray() string1Data = string1.Data() ) string2.Reset(string1Data) assert.Equal(t, "string1", string2.Value(0)) } func TestLargeStringInvalidOffsets(t *testing.T) { const expectedPanic = "arrow/array: string offsets out of bounds of data buffer" makeBuffers := func(valids []bool, offsets []int64, data string) []*memory.Buffer { offsetBuf := memory.NewBufferBytes(arrow.Int64Traits.CastToBytes(offsets)) var nullBufBytes []byte var nullBuf *memory.Buffer if valids != nil { nullBufBytes = make([]byte, bitutil.BytesForBits(int64(len(valids)))) for i, v := range valids { bitutil.SetBitTo(nullBufBytes, i, v) } nullBuf = memory.NewBufferBytes(nullBufBytes) } return []*memory.Buffer{nullBuf, offsetBuf, memory.NewBufferBytes([]byte(data))} } assert.NotPanics(t, func() { buffers := makeBuffers(nil, []int64{}, "") array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 0, buffers, nil, 0, 0)) }, "empty array with no offsets") assert.NotPanics(t, func() { buffers := makeBuffers(nil, []int64{0, 5}, "") array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 0, buffers, nil, 0, 0)) }, "empty array, offsets ignored") assert.NotPanics(t, func() { buffers := makeBuffers(nil, []int64{0, 3, 4, 9}, "oooabcdef") array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 1, buffers, nil, 0, 2)) }, "data has offset and value offsets are valid") assert.NotPanics(t, func() { buffers := makeBuffers(nil, []int64{0, 3, 6, 9, 9}, "012345678") arr := array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 4, buffers, nil, 0, 0)) if assert.Equal(t, 4, arr.Len()) && assert.Zero(t, arr.NullN()) { assert.Equal(t, "012", arr.Value(0)) assert.Equal(t, "345", arr.Value(1)) assert.Equal(t, "678", arr.Value(2)) assert.Equal(t, "", arr.Value(3), "trailing empty string value will have offset past end") } }, "simple valid case") assert.NotPanics(t, func() { buffers := makeBuffers([]bool{true, false, true, false}, []int64{0, 3, 4, 9, 9}, "oooabcdef") arr := array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 4, buffers, nil, 2, 0)) if assert.Equal(t, 4, arr.Len()) && assert.Equal(t, 2, arr.NullN()) { assert.Equal(t, "ooo", arr.Value(0)) assert.True(t, arr.IsNull(1)) assert.Equal(t, "bcdef", arr.Value(2)) assert.True(t, arr.IsNull(3)) } }, "simple valid case with nulls") assert.PanicsWithValue(t, expectedPanic, func() { buffers := makeBuffers(nil, []int64{0, 5}, "abc") array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 1, buffers, nil, 0, 0)) }, "last offset is overflowing") assert.PanicsWithError(t, "arrow/array: string offset buffer must have at least 2 values", func() { buffers := makeBuffers(nil, []int64{0}, "abc") array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 1, buffers, nil, 0, 0)) }, "last offset is missing") assert.PanicsWithValue(t, expectedPanic, func() { buffers := makeBuffers(nil, []int64{0, 3, 10, 15}, "oooabcdef") array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 1, buffers, nil, 0, 2)) }, "data has offset and value offset is overflowing") } func TestLargeStringStringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) var ( values = []string{"hello", "世界", "", "bye"} valid = []bool{true, true, false, true} ) b := array.NewLargeStringBuilder(mem) defer b.Release() b.AppendValues(values, valid) arr := b.NewArray().(*array.LargeString) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewLargeStringBuilder(mem) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.LargeString) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } func TestStringValueLen(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} valids := []bool{true, true, false, false, true, true, true, true, false, true} b := array.NewStringBuilder(mem) defer b.Release() b.AppendStringValues(values, valids) arr := b.NewArray().(*array.String) defer arr.Release() slice := array.NewSlice(arr, 2, 9).(*array.String) defer slice.Release() vs := values[2:9] for i, v := range vs { assert.Equal(t, len(v), slice.ValueLen(i)) } } func TestStringViewArray(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) var ( // only the last string is long enough to not get inlined want = []string{"hello", "世界", "", "say goodbye daffy"} valids = []bool{true, true, false, true} ) sb := array.NewStringViewBuilder(mem) defer sb.Release() sb.Retain() sb.Release() assert.NoError(t, sb.AppendValueFromString(want[0])) sb.AppendValues(want[1:2], nil) sb.AppendNull() sb.Append(want[3]) if got, want := sb.Len(), len(want); got != want { t.Fatalf("invalid len: got=%d, want=%d", got, want) } if got, want := sb.NullN(), 1; got != want { t.Fatalf("invalid nulls: got=%d, want=%d", got, want) } arr := sb.NewStringViewArray() defer arr.Release() arr.Retain() arr.Release() assert.Equal(t, "hello", arr.ValueStr(0)) if got, want := arr.Len(), len(want); got != want { t.Fatalf("invalid len: got=%d, want=%d", got, want) } if got, want := arr.NullN(), 1; got != want { t.Fatalf("invalid nulls: got=%d, want=%d", got, want) } for i := range want { if arr.IsNull(i) != !valids[i] { t.Fatalf("arr[%d]-validity: got=%v want=%v", i, !arr.IsNull(i), valids[i]) } switch { case arr.IsNull(i): default: got := arr.Value(i) if got != want[i] { t.Fatalf("arr[%d]: got=%q, want=%q", i, got, want[i]) } } } sub := array.MakeFromData(arr.Data()) defer sub.Release() if sub.DataType().ID() != arrow.STRING_VIEW { t.Fatalf("invalid type: got=%q, want=string view", sub.DataType().Name()) } if _, ok := sub.(*array.StringView); !ok { t.Fatalf("could not type-assert to array.String") } if got, want := arr.String(), `["hello" "世界" (null) "say goodbye daffy"]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } // only the last string gets stuck into a buffer the rest are inlined // in the headers. if !bytes.Equal([]byte(`say goodbye daffy`), arr.Data().Buffers()[2].Bytes()) { t.Fatalf("got=%q, want=%q", string(arr.Data().Buffers()[2].Bytes()), `say goodbye daffy`) } // check the prefix for the non-inlined value if [4]byte{'s', 'a', 'y', ' '} != arr.ValueHeader(3).Prefix() { t.Fatalf("got=%q, want=%q", arr.ValueHeader(3).Prefix(), `say `) } slice := array.NewSliceData(arr.Data(), 2, 4) defer slice.Release() sub1 := array.MakeFromData(slice) defer sub1.Release() v, ok := sub1.(*array.StringView) if !ok { t.Fatalf("could not type-assert to array.StringView") } if got, want := v.String(), `[(null) "say goodbye daffy"]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } if !bytes.Equal([]byte(`say goodbye daffy`), v.Data().Buffers()[2].Bytes()) { t.Fatalf("got=%q, want=%q", string(v.Data().Buffers()[2].Bytes()), `say goodbye daffy`) } // check the prefix for the non-inlined value if [4]byte{'s', 'a', 'y', ' '} != v.ValueHeader(1).Prefix() { t.Fatalf("got=%q, want=%q", v.ValueHeader(1).Prefix(), `say `) } } func TestStringViewBuilder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) want := []string{"hello", "世界", "", "say goodbye daffy"} ab := array.NewStringViewBuilder(mem) defer ab.Release() stringValues := func(a *array.StringView) []string { vs := make([]string, a.Len()) for i := range vs { vs[i] = a.Value(i) } return vs } ab.AppendValues([]string{}, nil) a := ab.NewStringViewArray() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(nil, nil) a = ab.NewStringViewArray() assert.Zero(t, a.Len()) a.Release() ab.AppendValues([]string{}, nil) ab.AppendValues(want, nil) a = ab.NewStringViewArray() assert.Equal(t, want, stringValues(a)) a.Release() ab.AppendValues(want, nil) ab.AppendValues([]string{}, nil) a = ab.NewStringViewArray() assert.Equal(t, want, stringValues(a)) a.Release() } // TestStringReset tests the Reset() method on the String type by creating two different Strings and then // resetting the contents of string2 with the values from string1. func TestStringViewReset(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) sb1 := array.NewStringViewBuilder(mem) sb2 := array.NewStringViewBuilder(mem) defer sb1.Release() defer sb2.Release() sb1.Append("string1") sb1.AppendNull() var ( string1 = sb1.NewStringViewArray() string2 = sb2.NewStringViewArray() string1Data = string1.Data() ) string2.Reset(string1Data) assert.Equal(t, "string1", string2.Value(0)) } arrow-go-18.2.0/arrow/array/struct.go000066400000000000000000000333151476434502500175010ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "bytes" "errors" "fmt" "strings" "sync/atomic" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" ) // Struct represents an ordered sequence of relative types. type Struct struct { array fields []arrow.Array } // NewStructArray constructs a new Struct Array out of the columns passed // in and the field names. The length of all cols must be the same and // there should be the same number of columns as names. func NewStructArray(cols []arrow.Array, names []string) (*Struct, error) { return NewStructArrayWithNulls(cols, names, nil, 0, 0) } // NewStructArrayWithFields builds a new Struct Array using the passed columns // and provided fields. As opposed to NewStructArray, this allows you to provide // the full fields to utilize for the struct column instead of just the names. func NewStructArrayWithFields(cols []arrow.Array, fields []arrow.Field) (*Struct, error) { if len(cols) != len(fields) { return nil, fmt.Errorf("%w: mismatching number of fields and child arrays", arrow.ErrInvalid) } if len(cols) == 0 { return nil, fmt.Errorf("%w: can't infer struct array length with 0 child arrays", arrow.ErrInvalid) } length := cols[0].Len() children := make([]arrow.ArrayData, len(cols)) for i, c := range cols { if length != c.Len() { return nil, fmt.Errorf("%w: mismatching child array lengths", arrow.ErrInvalid) } if !arrow.TypeEqual(fields[i].Type, c.DataType()) { return nil, fmt.Errorf("%w: mismatching data type for child #%d, field says '%s', got '%s'", arrow.ErrInvalid, i, fields[i].Type, c.DataType()) } if !fields[i].Nullable && c.NullN() > 0 { return nil, fmt.Errorf("%w: field says not-nullable, child #%d has nulls", arrow.ErrInvalid, i) } children[i] = c.Data() } data := NewData(arrow.StructOf(fields...), length, []*memory.Buffer{nil}, children, 0, 0) defer data.Release() return NewStructData(data), nil } // NewStructArrayWithNulls is like NewStructArray as a convenience function, // but also takes in a null bitmap, the number of nulls, and an optional offset // to use for creating the Struct Array. func NewStructArrayWithNulls(cols []arrow.Array, names []string, nullBitmap *memory.Buffer, nullCount int, offset int) (*Struct, error) { if len(cols) != len(names) { return nil, fmt.Errorf("%w: mismatching number of fields and child arrays", arrow.ErrInvalid) } if len(cols) == 0 { return nil, fmt.Errorf("%w: can't infer struct array length with 0 child arrays", arrow.ErrInvalid) } length := cols[0].Len() children := make([]arrow.ArrayData, len(cols)) fields := make([]arrow.Field, len(cols)) for i, c := range cols { if length != c.Len() { return nil, fmt.Errorf("%w: mismatching child array lengths", arrow.ErrInvalid) } children[i] = c.Data() fields[i].Name = names[i] fields[i].Type = c.DataType() fields[i].Nullable = true } data := NewData(arrow.StructOf(fields...), length, []*memory.Buffer{nullBitmap}, children, nullCount, offset) defer data.Release() return NewStructData(data), nil } // NewStructData returns a new Struct array value from data. func NewStructData(data arrow.ArrayData) *Struct { a := &Struct{} a.refCount = 1 a.setData(data.(*Data)) return a } func (a *Struct) NumField() int { return len(a.fields) } func (a *Struct) Field(i int) arrow.Array { return a.fields[i] } // ValueStr returns the string representation (as json) of the value at index i. func (a *Struct) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } data, err := json.Marshal(a.GetOneForMarshal(i)) if err != nil { panic(err) } return string(data) } func (a *Struct) String() string { o := new(strings.Builder) o.WriteString("{") structBitmap := a.NullBitmapBytes() for i, v := range a.fields { if i > 0 { o.WriteString(" ") } if arrow.IsUnion(v.DataType().ID()) { fmt.Fprintf(o, "%v", v) continue } else if !bytes.Equal(structBitmap, v.NullBitmapBytes()) { masked := a.newStructFieldWithParentValidityMask(i) fmt.Fprintf(o, "%v", masked) masked.Release() continue } fmt.Fprintf(o, "%v", v) } o.WriteString("}") return o.String() } // newStructFieldWithParentValidityMask returns the Interface at fieldIndex // with a nullBitmapBytes adjusted according on the parent struct nullBitmapBytes. // From the docs: // // "When reading the struct array the parent validity bitmap takes priority." func (a *Struct) newStructFieldWithParentValidityMask(fieldIndex int) arrow.Array { field := a.Field(fieldIndex) nullBitmapBytes := field.NullBitmapBytes() maskedNullBitmapBytes := make([]byte, len(nullBitmapBytes)) copy(maskedNullBitmapBytes, nullBitmapBytes) for i := 0; i < field.Len(); i++ { if a.IsNull(i) { bitutil.ClearBit(maskedNullBitmapBytes, i) } } data := NewSliceData(field.Data(), 0, int64(field.Len())).(*Data) defer data.Release() bufs := make([]*memory.Buffer, len(data.Buffers())) copy(bufs, data.buffers) bufs[0].Release() bufs[0] = memory.NewBufferBytes(maskedNullBitmapBytes) data.buffers = bufs maskedField := MakeFromData(data) return maskedField } func (a *Struct) setData(data *Data) { a.array.setData(data) a.fields = make([]arrow.Array, len(data.childData)) for i, child := range data.childData { if data.offset != 0 || child.Len() != data.length { sub := NewSliceData(child, int64(data.offset), int64(data.offset+data.length)) a.fields[i] = MakeFromData(sub) sub.Release() } else { a.fields[i] = MakeFromData(child) } } } func (a *Struct) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } tmp := make(map[string]interface{}) fieldList := a.data.dtype.(*arrow.StructType).Fields() for j, d := range a.fields { tmp[fieldList[j].Name] = d.GetOneForMarshal(i) } return tmp } func (a *Struct) MarshalJSON() ([]byte, error) { var buf bytes.Buffer enc := json.NewEncoder(&buf) buf.WriteByte('[') for i := 0; i < a.Len(); i++ { if i != 0 { buf.WriteByte(',') } if err := enc.Encode(a.GetOneForMarshal(i)); err != nil { return nil, err } } buf.WriteByte(']') return buf.Bytes(), nil } func arrayEqualStruct(left, right *Struct) bool { for i, lf := range left.fields { rf := right.fields[i] if !Equal(lf, rf) { return false } } return true } func (a *Struct) Retain() { a.array.Retain() for _, f := range a.fields { f.Retain() } } func (a *Struct) Release() { a.array.Release() for _, f := range a.fields { f.Release() } } type StructBuilder struct { builder dtype arrow.DataType fields []Builder } // NewStructBuilder returns a builder, using the provided memory allocator. func NewStructBuilder(mem memory.Allocator, dtype *arrow.StructType) *StructBuilder { b := &StructBuilder{ builder: builder{refCount: 1, mem: mem}, dtype: dtype, fields: make([]Builder, dtype.NumFields()), } for i, f := range dtype.Fields() { b.fields[i] = NewBuilder(b.mem, f.Type) } return b } func (b *StructBuilder) Type() arrow.DataType { fields := make([]arrow.Field, len(b.fields)) copy(fields, b.dtype.(*arrow.StructType).Fields()) for i, b := range b.fields { fields[i].Type = b.Type() } return arrow.StructOf(fields...) } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *StructBuilder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } for _, f := range b.fields { f.Release() } } } func (b *StructBuilder) Append(v bool) { // Intentionally not calling `Reserve` as it will recursively call // `Reserve` on the child builders, which during profiling has shown to be // very expensive due to iterating over children, dynamic dispatch and all // other code that gets executed even if previously `Reserve` was called to // preallocate. Not calling `Reserve` has no downsides as when appending to // the underlying children they already ensure they have enough space // reserved. The only thing we must do is ensure we have enough space in // the validity bitmap of the struct builder itself. b.builder.reserve(1, b.resizeHelper) b.unsafeAppendBoolToBitmap(v) if !v { for _, f := range b.fields { f.AppendNull() } } } func (b *StructBuilder) AppendValues(valids []bool) { b.Reserve(len(valids)) b.builder.unsafeAppendBoolsToBitmap(valids, len(valids)) } func (b *StructBuilder) AppendNull() { b.Append(false) } func (b *StructBuilder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *StructBuilder) AppendEmptyValue() { b.Append(true) for _, f := range b.fields { f.AppendEmptyValue() } } func (b *StructBuilder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *StructBuilder) unsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } func (b *StructBuilder) init(capacity int) { b.builder.init(capacity) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *StructBuilder) Reserve(n int) { b.builder.reserve(n, b.resizeHelper) for _, f := range b.fields { f.Reserve(n) } } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *StructBuilder) Resize(n int) { b.resizeHelper(n) for _, f := range b.fields { f.Resize(n) } } func (b *StructBuilder) resizeHelper(n int) { if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(n, b.builder.init) } } func (b *StructBuilder) NumField() int { return len(b.fields) } func (b *StructBuilder) FieldBuilder(i int) Builder { return b.fields[i] } // NewArray creates a Struct array from the memory buffers used by the builder and resets the StructBuilder // so it can be used to build a new array. func (b *StructBuilder) NewArray() arrow.Array { return b.NewStructArray() } // NewStructArray creates a Struct array from the memory buffers used by the builder and resets the StructBuilder // so it can be used to build a new array. func (b *StructBuilder) NewStructArray() (a *Struct) { data := b.newData() a = NewStructData(data) data.Release() return } func (b *StructBuilder) newData() (data *Data) { fields := make([]arrow.ArrayData, len(b.fields)) for i, f := range b.fields { arr := f.NewArray() defer arr.Release() fields[i] = arr.Data() } data = NewData( b.Type(), b.length, []*memory.Buffer{ b.nullBitmap, }, fields, b.nulls, 0, ) b.reset() return } func (b *StructBuilder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } if !strings.HasPrefix(s, "{") && !strings.HasSuffix(s, "}") { return fmt.Errorf("%w: invalid string for struct should be be of form: {*}", arrow.ErrInvalid) } dec := json.NewDecoder(strings.NewReader(s)) return b.UnmarshalOne(dec) } func (b *StructBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch t { case json.Delim('{'): b.Append(true) keylist := make(map[string]bool) for dec.More() { keyTok, err := dec.Token() if err != nil { return err } key, ok := keyTok.(string) if !ok { return errors.New("missing key") } if keylist[key] { return fmt.Errorf("key %s is specified twice", key) } keylist[key] = true idx, ok := b.dtype.(*arrow.StructType).FieldIdx(key) if !ok { var extra interface{} dec.Decode(&extra) continue } if err := b.fields[idx].UnmarshalOne(dec); err != nil { return err } } // Append null values to all optional fields that were not presented in the json input for _, field := range b.dtype.(*arrow.StructType).Fields() { if !field.Nullable { continue } idx, _ := b.dtype.(*arrow.StructType).FieldIdx(field.Name) if _, hasKey := keylist[field.Name]; !hasKey { b.fields[idx].AppendNull() } } // consume '}' _, err := dec.Token() return err case nil: b.AppendNull() default: return &json.UnmarshalTypeError{ Offset: dec.InputOffset(), Struct: fmt.Sprint(b.dtype), } } return nil } func (b *StructBuilder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *StructBuilder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("struct builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } var ( _ arrow.Array = (*Struct)(nil) _ Builder = (*StructBuilder)(nil) ) arrow-go-18.2.0/arrow/array/struct_test.go000066400000000000000000000322001476434502500205300ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "reflect" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) func TestStructArray(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) var ( f1s = []byte{'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'} f2s = []int32{1, 2, 3, 4} f1Lengths = []int{3, 0, 3, 4} f1Offsets = []int32{0, 3, 3, 6, 10} f1Valids = []bool{true, false, true, true} isValid = []bool{true, true, true, true} fields = []arrow.Field{ {Name: "f1", Type: arrow.ListOf(arrow.PrimitiveTypes.Uint8)}, {Name: "f2", Type: arrow.PrimitiveTypes.Int32}, } dtype = arrow.StructOf(fields...) ) sb := array.NewStructBuilder(pool, dtype) defer sb.Release() for i := 0; i < 10; i++ { f1b := sb.FieldBuilder(0).(*array.ListBuilder) f1vb := f1b.ValueBuilder().(*array.Uint8Builder) f2b := sb.FieldBuilder(1).(*array.Int32Builder) if got, want := sb.NumField(), 2; got != want { t.Fatalf("got=%d, want=%d", got, want) } sb.Resize(len(f1Lengths)) f1vb.Resize(len(f1s)) f2b.Resize(len(f2s)) pos := 0 for i, length := range f1Lengths { f1b.Append(f1Valids[i]) for j := 0; j < length; j++ { f1vb.Append(f1s[pos]) pos++ } f2b.Append(f2s[i]) } for _, valid := range isValid { sb.Append(valid) } arr := sb.NewArray().(*array.Struct) defer arr.Release() arr.Retain() arr.Release() if got, want := arr.DataType().ID(), arrow.STRUCT; got != want { t.Fatalf("got=%v, want=%v", got, want) } if got, want := arr.Len(), len(isValid); got != want { t.Fatalf("got=%d, want=%d", got, want) } for i, valid := range isValid { if got, want := arr.IsValid(i), valid; got != want { t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) } } { f1arr := arr.Field(0).(*array.List) if got, want := f1arr.Len(), len(f1Lengths); got != want { t.Fatalf("got=%d, want=%d", got, want) } for i := range f1Lengths { if got, want := f1arr.IsValid(i), f1Valids[i]; got != want { t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) } if got, want := f1arr.IsNull(i), f1Lengths[i] == 0; got != want { t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) } } if got, want := f1arr.Offsets(), f1Offsets; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } varr := f1arr.ListValues().(*array.Uint8) if got, want := varr.Uint8Values(), f1s; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } } { f2arr := arr.Field(1).(*array.Int32) if got, want := f2arr.Len(), len(f2s); got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := f2arr.Int32Values(), f2s; !reflect.DeepEqual(got, want) { t.Fatalf("got=%d, want=%d", got, want) } } } } func TestStructStringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dt := arrow.StructOf( arrow.Field{Name: "nullable_bool", Type: new(arrow.BooleanType), Nullable: true}, arrow.Field{Name: "non_nullable_bool", Type: new(arrow.BooleanType)}, ) builder := array.NewStructBuilder(memory.DefaultAllocator, dt) nullableBld := builder.FieldBuilder(0).(*array.BooleanBuilder) nonNullableBld := builder.FieldBuilder(1).(*array.BooleanBuilder) builder.Append(true) nullableBld.Append(true) nonNullableBld.Append(true) builder.Append(true) nullableBld.AppendNull() nonNullableBld.Append(true) builder.AppendNull() arr := builder.NewArray().(*array.Struct) // 2. create array via AppendValueFromString b1 := array.NewStructBuilder(mem, dt) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.Struct) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } func TestStructArrayEmpty(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) sb := array.NewStructBuilder(pool, arrow.StructOf()) defer sb.Release() if got, want := sb.NumField(), 0; got != want { t.Fatalf("got=%d, want=%d", got, want) } arr := sb.NewArray().(*array.Struct) if got, want := arr.Len(), 0; got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := arr.NumField(), 0; got != want { t.Fatalf("got=%d, want=%d", got, want) } } func TestStructArrayBulkAppend(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) var ( f1s = []byte{'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'} f2s = []int32{1, 2, 3, 4} f1Lengths = []int{3, 0, 3, 4} f1Offsets = []int32{0, 3, 3, 6, 10} f1Valids = []bool{true, false, true, true} isValid = []bool{true, true, true, true} fields = []arrow.Field{ {Name: "f1", Type: arrow.ListOf(arrow.PrimitiveTypes.Uint8)}, {Name: "f2", Type: arrow.PrimitiveTypes.Int32}, } dtype = arrow.StructOf(fields...) ) sb := array.NewStructBuilder(pool, dtype) defer sb.Release() for i := 0; i < 10; i++ { f1b := sb.FieldBuilder(0).(*array.ListBuilder) f1vb := f1b.ValueBuilder().(*array.Uint8Builder) f2b := sb.FieldBuilder(1).(*array.Int32Builder) if got, want := sb.NumField(), 2; got != want { t.Fatalf("got=%d, want=%d", got, want) } sb.Resize(len(f1Lengths)) f1vb.Resize(len(f1s)) f2b.Resize(len(f2s)) sb.AppendValues(isValid) f1b.AppendValues(f1Offsets, f1Valids) f1vb.AppendValues(f1s, nil) f2b.AppendValues(f2s, nil) arr := sb.NewArray().(*array.Struct) defer arr.Release() if got, want := arr.DataType().ID(), arrow.STRUCT; got != want { t.Fatalf("got=%v, want=%v", got, want) } if got, want := arr.Len(), len(isValid); got != want { t.Fatalf("got=%d, want=%d", got, want) } for i, valid := range isValid { if got, want := arr.IsValid(i), valid; got != want { t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) } } { f1arr := arr.Field(0).(*array.List) if got, want := f1arr.Len(), len(f1Lengths); got != want { t.Fatalf("got=%d, want=%d", got, want) } for i := range f1Lengths { if got, want := f1arr.IsValid(i), f1Valids[i]; got != want { t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) } if got, want := f1arr.IsNull(i), f1Lengths[i] == 0; got != want { t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) } } if got, want := f1arr.Offsets(), f1Offsets; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } varr := f1arr.ListValues().(*array.Uint8) if got, want := varr.Uint8Values(), f1s; !reflect.DeepEqual(got, want) { t.Fatalf("got=%v, want=%v", got, want) } } { f2arr := arr.Field(1).(*array.Int32) if got, want := f2arr.Len(), len(f2s); got != want { t.Fatalf("got=%d, want=%d", got, want) } if got, want := f2arr.Int32Values(), f2s; !reflect.DeepEqual(got, want) { t.Fatalf("got=%d, want=%d", got, want) } } } } func TestStructArrayStringer(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) var ( f1s = []float64{1.1, 1.2, 1.3, 1.4} f2s = []int32{1, 2, 3, 4} fields = []arrow.Field{ {Name: "f1", Type: arrow.PrimitiveTypes.Float64}, {Name: "f2", Type: arrow.PrimitiveTypes.Int32}, } dtype = arrow.StructOf(fields...) ) sb := array.NewStructBuilder(pool, dtype) defer sb.Release() f1b := sb.FieldBuilder(0).(*array.Float64Builder) f2b := sb.FieldBuilder(1).(*array.Int32Builder) if got, want := sb.NumField(), 2; got != want { t.Fatalf("got=%d, want=%d", got, want) } for i := range f1s { sb.Append(true) switch i { case 1: f1b.AppendNull() f2b.Append(f2s[i]) case 2: f1b.Append(f1s[i]) f2b.AppendNull() default: f1b.Append(f1s[i]) f2b.Append(f2s[i]) } } assert.NoError(t, sb.AppendValueFromString(`{"f1": 1.1, "f2": 1}`)) arr := sb.NewArray().(*array.Struct) defer arr.Release() assert.Equal(t, `{"f1":1.1,"f2":1}`, arr.ValueStr(4)) want := "{[1.1 (null) 1.3 1.4 1.1] [1 2 (null) 4 1]}" got := arr.String() if got != want { t.Fatalf("invalid string representation:\ngot = %q\nwant= %q", got, want) } } func TestStructArraySlice(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) var ( f1s = []float64{1.1, 1.2, 1.3, 1.4} f2s = []int32{1, 2, 3, 4} valids = []bool{true, true, true, true} fields = []arrow.Field{ {Name: "f1", Type: arrow.PrimitiveTypes.Float64}, {Name: "f2", Type: arrow.PrimitiveTypes.Int32}, } dtype = arrow.StructOf(fields...) ) sb := array.NewStructBuilder(pool, dtype) defer sb.Release() f1b := sb.FieldBuilder(0).(*array.Float64Builder) f2b := sb.FieldBuilder(1).(*array.Int32Builder) if got, want := sb.NumField(), 2; got != want { t.Fatalf("got=%d, want=%d", got, want) } for i := range f1s { sb.Append(valids[i]) switch i { case 1: f1b.AppendNull() f2b.Append(f2s[i]) case 2: f1b.Append(f1s[i]) f2b.AppendNull() default: f1b.Append(f1s[i]) f2b.Append(f2s[i]) } } arr := sb.NewArray().(*array.Struct) defer arr.Release() // Slice arrSlice := array.NewSlice(arr, 2, 4).(*array.Struct) defer arrSlice.Release() want := "{[1.3 1.4] [(null) 4]}" got := arrSlice.String() if got != want { t.Fatalf("invalid string representation:\ngot = %q\nwant= %q", got, want) } } func TestStructArrayNullBitmap(t *testing.T) { pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer pool.AssertSize(t, 0) var ( f1s = []float64{1.1, 1.2, 1.3, 1.4} f2s = []int32{1, 2, 3, 4} valids = []bool{true, true, true, false} fields = []arrow.Field{ {Name: "f1", Type: arrow.PrimitiveTypes.Float64}, {Name: "f2", Type: arrow.PrimitiveTypes.Int32}, } dtype = arrow.StructOf(fields...) ) sb := array.NewStructBuilder(pool, dtype) defer sb.Release() f1b := sb.FieldBuilder(0).(*array.Float64Builder) f2b := sb.FieldBuilder(1).(*array.Int32Builder) if got, want := sb.NumField(), 2; got != want { t.Fatalf("got=%d, want=%d", got, want) } sb.AppendValues(valids) for i := range f1s { f1b.Append(f1s[i]) switch i { case 1: f2b.AppendNull() default: f2b.Append(f2s[i]) } } arr := sb.NewArray().(*array.Struct) defer arr.Release() want := "{[1.1 1.2 1.3 (null)] [1 (null) 3 (null)]}" got := arr.String() if got != want { t.Fatalf("invalid string representation:\ngot = %q\nwant= %q", got, want) } } func TestStructArrayUnmarshalJSONMissingFields(t *testing.T) { pool := memory.NewGoAllocator() var ( fields = []arrow.Field{ {Name: "f1", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, {Name: "f2", Type: arrow.PrimitiveTypes.Int32}, { Name: "f3", Type: arrow.StructOf( []arrow.Field{ {Name: "f3_1", Type: arrow.BinaryTypes.String, Nullable: true}, {Name: "f3_2", Type: arrow.BinaryTypes.String, Nullable: true}, {Name: "f3_3", Type: arrow.BinaryTypes.String, Nullable: false}, }..., ), }, } dtype = arrow.StructOf(fields...) ) tests := []struct { name string jsonInput string want string panic bool }{ { name: "missing required field", jsonInput: `[{"f2": 3, "f3": {"f3_1": "test"}}]`, panic: true, want: "", }, { name: "missing optional fields", jsonInput: `[{"f2": 3, "f3": {"f3_3": "test"}}]`, panic: false, want: `{[(null)] [3] {[(null)] [(null)] ["test"]}}`, }, } for _, tc := range tests { t.Run( tc.name, func(t *testing.T) { var val bool sb := array.NewStructBuilder(pool, dtype) defer sb.Release() if tc.panic { defer func() { e := recover() if e == nil { t.Fatalf("this should have panicked, but did not; slice value %v", val) } if got, want := e.(string), "arrow/array: index out of range"; got != want { t.Fatalf("invalid error. got=%q, want=%q", got, want) } }() } else { defer func() { if e := recover(); e != nil { t.Fatalf("unexpected panic: %v", e) } }() } err := sb.UnmarshalJSON([]byte(tc.jsonInput)) if err != nil { t.Fatal(err) } arr := sb.NewArray().(*array.Struct) defer arr.Release() got := arr.String() if got != tc.want { t.Fatalf("invalid string representation:\ngot = %q\nwant= %q", got, tc.want) } }, ) } } arrow-go-18.2.0/arrow/array/table.go000066400000000000000000000257501476434502500172500ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "errors" "fmt" "math" "strings" "sync/atomic" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/internal/debug" ) // NewColumnSlice returns a new zero-copy slice of the column with the indicated // indices i and j, corresponding to the column's array[i:j]. // The returned column must be Release()'d after use. // // NewColSlice panics if the slice is outside the valid range of the column's array. // NewColSlice panics if j < i. func NewColumnSlice(col *arrow.Column, i, j int64) *arrow.Column { slice := NewChunkedSlice(col.Data(), i, j) defer slice.Release() return arrow.NewColumn(col.Field(), slice) } // NewChunkedSlice constructs a zero-copy slice of the chunked array with the indicated // indices i and j, corresponding to array[i:j]. // The returned chunked array must be Release()'d after use. // // NewSlice panics if the slice is outside the valid range of the input array. // NewSlice panics if j < i. func NewChunkedSlice(a *arrow.Chunked, i, j int64) *arrow.Chunked { if j > int64(a.Len()) || i > j || i > int64(a.Len()) { panic("arrow/array: index out of range") } var ( cur = 0 beg = i sz = j - i chunks = make([]arrow.Array, 0, len(a.Chunks())) ) for cur < len(a.Chunks()) && beg >= int64(a.Chunks()[cur].Len()) { beg -= int64(a.Chunks()[cur].Len()) cur++ } for cur < len(a.Chunks()) && sz > 0 { arr := a.Chunks()[cur] end := beg + sz if end > int64(arr.Len()) { end = int64(arr.Len()) } chunks = append(chunks, NewSlice(arr, beg, end)) sz -= int64(arr.Len()) - beg beg = 0 cur++ } chunks = chunks[:len(chunks):len(chunks)] defer func() { for _, chunk := range chunks { chunk.Release() } }() return arrow.NewChunked(a.DataType(), chunks) } // simpleTable is a basic, non-lazy in-memory table. type simpleTable struct { refCount int64 rows int64 cols []arrow.Column schema *arrow.Schema } // NewTable returns a new basic, non-lazy in-memory table. // If rows is negative, the number of rows will be inferred from the height // of the columns. // // NewTable panics if the columns and schema are inconsistent. // NewTable panics if rows is larger than the height of the columns. func NewTable(schema *arrow.Schema, cols []arrow.Column, rows int64) arrow.Table { tbl := simpleTable{ refCount: 1, rows: rows, cols: cols, schema: schema, } if tbl.rows < 0 { switch len(tbl.cols) { case 0: tbl.rows = 0 default: tbl.rows = int64(tbl.cols[0].Len()) } } // validate the table and its constituents. // note we retain the columns after having validated the table // in case the validation fails and panics (and would otherwise leak // a ref-count on the columns.) tbl.validate() for i := range tbl.cols { tbl.cols[i].Retain() } return &tbl } // NewTableFromSlice is a convenience function to create a table from a slice // of slices of arrow.Array. // // Like other NewTable functions this can panic if: // - len(schema.Fields) != len(data) // - the total length of each column's array slice (ie: number of rows // in the column) aren't the same for all columns. func NewTableFromSlice(schema *arrow.Schema, data [][]arrow.Array) arrow.Table { if len(data) != schema.NumFields() { panic("array/table: mismatch in number of columns and data for creating a table") } cols := make([]arrow.Column, schema.NumFields()) for i, arrs := range data { field := schema.Field(i) chunked := arrow.NewChunked(field.Type, arrs) cols[i] = *arrow.NewColumn(field, chunked) chunked.Release() } tbl := simpleTable{ refCount: 1, schema: schema, cols: cols, rows: int64(cols[0].Len()), } defer func() { if r := recover(); r != nil { // if validate panics, let's release the columns // so that we don't leak them, then propagate the panic for _, c := range cols { c.Release() } panic(r) } }() // validate the table and its constituents. tbl.validate() return &tbl } // NewTableFromRecords returns a new basic, non-lazy in-memory table. // // NewTableFromRecords panics if the records and schema are inconsistent. func NewTableFromRecords(schema *arrow.Schema, recs []arrow.Record) arrow.Table { arrs := make([]arrow.Array, len(recs)) cols := make([]arrow.Column, schema.NumFields()) defer func(cols []arrow.Column) { for i := range cols { cols[i].Release() } }(cols) for i := range cols { field := schema.Field(i) for j, rec := range recs { arrs[j] = rec.Column(i) } chunk := arrow.NewChunked(field.Type, arrs) cols[i] = *arrow.NewColumn(field, chunk) chunk.Release() } return NewTable(schema, cols, -1) } func (tbl *simpleTable) Schema() *arrow.Schema { return tbl.schema } func (tbl *simpleTable) AddColumn(i int, field arrow.Field, column arrow.Column) (arrow.Table, error) { if int64(column.Len()) != tbl.rows { return nil, fmt.Errorf("arrow/array: column length mismatch: %d != %d", column.Len(), tbl.rows) } if field.Type != column.DataType() { return nil, fmt.Errorf("arrow/array: column type mismatch: %v != %v", field.Type, column.DataType()) } newSchema, err := tbl.schema.AddField(i, field) if err != nil { return nil, err } cols := make([]arrow.Column, len(tbl.cols)+1) copy(cols[:i], tbl.cols[:i]) cols[i] = column copy(cols[i+1:], tbl.cols[i:]) newTable := NewTable(newSchema, cols, tbl.rows) return newTable, nil } func (tbl *simpleTable) NumRows() int64 { return tbl.rows } func (tbl *simpleTable) NumCols() int64 { return int64(len(tbl.cols)) } func (tbl *simpleTable) Column(i int) *arrow.Column { return &tbl.cols[i] } func (tbl *simpleTable) validate() { if len(tbl.cols) != tbl.schema.NumFields() { panic(errors.New("arrow/array: table schema mismatch")) } for i, col := range tbl.cols { if !col.Field().Equal(tbl.schema.Field(i)) { panic(fmt.Errorf("arrow/array: column field %q is inconsistent with schema", col.Name())) } if int64(col.Len()) < tbl.rows { panic(fmt.Errorf("arrow/array: column %q expected length >= %d but got length %d", col.Name(), tbl.rows, col.Len())) } } } // Retain increases the reference count by 1. // Retain may be called simultaneously from multiple goroutines. func (tbl *simpleTable) Retain() { atomic.AddInt64(&tbl.refCount, 1) } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. // Release may be called simultaneously from multiple goroutines. func (tbl *simpleTable) Release() { debug.Assert(atomic.LoadInt64(&tbl.refCount) > 0, "too many releases") if atomic.AddInt64(&tbl.refCount, -1) == 0 { for i := range tbl.cols { tbl.cols[i].Release() } tbl.cols = nil } } func (tbl *simpleTable) String() string { o := new(strings.Builder) o.WriteString(tbl.Schema().String()) o.WriteString("\n") for i := 0; i < int(tbl.NumCols()); i++ { col := tbl.Column(i) o.WriteString(col.Field().Name + ": [") for j, chunk := range col.Data().Chunks() { if j != 0 { o.WriteString(", ") } o.WriteString(chunk.String()) } o.WriteString("]\n") } return o.String() } // TableReader is a Record iterator over a (possibly chunked) Table type TableReader struct { refCount int64 tbl arrow.Table cur int64 // current row max int64 // total number of rows rec arrow.Record // current Record chksz int64 // chunk size chunks []*arrow.Chunked slots []int // chunk indices offsets []int64 // chunk offsets } // NewTableReader returns a new TableReader to iterate over the (possibly chunked) Table. // if chunkSize is <= 0, the biggest possible chunk will be selected. func NewTableReader(tbl arrow.Table, chunkSize int64) *TableReader { ncols := tbl.NumCols() tr := &TableReader{ refCount: 1, tbl: tbl, cur: 0, max: int64(tbl.NumRows()), chksz: chunkSize, chunks: make([]*arrow.Chunked, ncols), slots: make([]int, ncols), offsets: make([]int64, ncols), } tr.tbl.Retain() if tr.chksz <= 0 { tr.chksz = math.MaxInt64 } for i := range tr.chunks { col := tr.tbl.Column(i) tr.chunks[i] = col.Data() tr.chunks[i].Retain() } return tr } func (tr *TableReader) Schema() *arrow.Schema { return tr.tbl.Schema() } func (tr *TableReader) Record() arrow.Record { return tr.rec } func (tr *TableReader) Next() bool { if tr.cur >= tr.max { return false } if tr.rec != nil { tr.rec.Release() } // determine the minimum contiguous slice across all columns chunksz := imin64(tr.max, tr.chksz) chunks := make([]arrow.Array, len(tr.chunks)) for i := range chunks { j := tr.slots[i] chunk := tr.chunks[i].Chunk(j) remain := int64(chunk.Len()) - tr.offsets[i] if remain < chunksz { chunksz = remain } chunks[i] = chunk } // slice the chunks, advance each chunk slot as appropriate. batch := make([]arrow.Array, len(tr.chunks)) for i, chunk := range chunks { var slice arrow.Array offset := tr.offsets[i] switch int64(chunk.Len()) - offset { case chunksz: tr.slots[i]++ tr.offsets[i] = 0 if offset > 0 { // need to slice slice = NewSlice(chunk, offset, offset+chunksz) } else { // no need to slice slice = chunk slice.Retain() } default: tr.offsets[i] += chunksz slice = NewSlice(chunk, offset, offset+chunksz) } batch[i] = slice } tr.cur += chunksz tr.rec = NewRecord(tr.tbl.Schema(), batch, chunksz) for _, arr := range batch { arr.Release() } return true } // Retain increases the reference count by 1. // Retain may be called simultaneously from multiple goroutines. func (tr *TableReader) Retain() { atomic.AddInt64(&tr.refCount, 1) } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. // Release may be called simultaneously from multiple goroutines. func (tr *TableReader) Release() { debug.Assert(atomic.LoadInt64(&tr.refCount) > 0, "too many releases") if atomic.AddInt64(&tr.refCount, -1) == 0 { tr.tbl.Release() for _, chk := range tr.chunks { chk.Release() } if tr.rec != nil { tr.rec.Release() } tr.tbl = nil tr.chunks = nil tr.slots = nil tr.offsets = nil } } func (tr *TableReader) Err() error { return nil } func imin64(a, b int64) int64 { if a < b { return a } return b } var ( _ arrow.Table = (*simpleTable)(nil) _ RecordReader = (*TableReader)(nil) ) arrow-go-18.2.0/arrow/array/table_test.go000066400000000000000000000527231476434502500203070ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "errors" "fmt" "reflect" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/memory" ) func TestChunked(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) c1 := arrow.NewChunked(arrow.PrimitiveTypes.Int32, nil) c1.Retain() c1.Release() if got, want := c1.Len(), 0; got != want { t.Fatalf("len differ. got=%d, want=%d", got, want) } if got, want := c1.NullN(), 0; got != want { t.Fatalf("nulls: got=%d, want=%d", got, want) } if got, want := c1.DataType(), arrow.PrimitiveTypes.Int32; got != want { t.Fatalf("dtype: got=%v, want=%v", got, want) } c1.Release() fb := array.NewFloat64Builder(mem) defer fb.Release() fb.AppendValues([]float64{1, 2, 3, 4, 5}, nil) f1 := fb.NewFloat64Array() defer f1.Release() fb.AppendValues([]float64{6, 7}, nil) f2 := fb.NewFloat64Array() defer f2.Release() fb.AppendValues([]float64{8, 9, 10}, nil) f3 := fb.NewFloat64Array() defer f3.Release() c2 := arrow.NewChunked( arrow.PrimitiveTypes.Float64, []arrow.Array{f1, f2, f3}, ) defer c2.Release() if got, want := c2.Len(), 10; got != want { t.Fatalf("len: got=%d, want=%d", got, want) } if got, want := c2.NullN(), 0; got != want { t.Fatalf("nulls: got=%d, want=%d", got, want) } if got, want := c2.DataType(), arrow.PrimitiveTypes.Float64; got != want { t.Fatalf("dtype: got=%v, want=%v", got, want) } if got, want := c2.Chunk(0), c2.Chunks()[0]; !reflect.DeepEqual(got, want) { t.Fatalf("chunk: got=%v, want=%v", got, want) } for _, tc := range []struct { i, j int64 len int nulls int chunks int }{ {i: 0, j: 10, len: 10, nulls: 0, chunks: 3}, {i: 2, j: 3, len: 1, nulls: 0, chunks: 1}, {i: 9, j: 10, len: 1, nulls: 0, chunks: 1}, {i: 0, j: 5, len: 5, nulls: 0, chunks: 1}, {i: 5, j: 7, len: 2, nulls: 0, chunks: 1}, {i: 7, j: 10, len: 3, nulls: 0, chunks: 1}, {i: 10, j: 10, len: 0, nulls: 0, chunks: 0}, } { t.Run("", func(t *testing.T) { sub := array.NewChunkedSlice(c2, tc.i, tc.j) defer sub.Release() if got, want := sub.Len(), tc.len; got != want { t.Fatalf("len: got=%d, want=%d", got, want) } if got, want := sub.NullN(), tc.nulls; got != want { t.Fatalf("nulls: got=%d, want=%d", got, want) } if got, want := sub.DataType(), arrow.PrimitiveTypes.Float64; got != want { t.Fatalf("dtype: got=%v, want=%v", got, want) } if got, want := len(sub.Chunks()), tc.chunks; got != want { t.Fatalf("chunks: got=%d, want=%d", got, want) } }) } } func TestChunkedEqualDataType(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) lb1 := array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32) defer lb1.Release() v1 := lb1.NewArray() defer v1.Release() lb2 := array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32) defer lb2.Release() v2 := lb2.NewArray() defer v2.Release() c1 := arrow.NewChunked(arrow.ListOf(arrow.PrimitiveTypes.Int32), []arrow.Array{ v1, v2, }) defer c1.Release() } func TestChunkedInvalid(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) fb := array.NewFloat64Builder(mem) defer fb.Release() fb.AppendValues([]float64{1, 2, 3, 4, 5}, nil) f1 := fb.NewFloat64Array() defer f1.Release() ib := array.NewInt32Builder(mem) defer ib.Release() ib.AppendValues([]int32{6, 7}, nil) f2 := ib.NewInt32Array() defer f2.Release() defer func() { e := recover() if e == nil { t.Fatalf("expected a panic") } err, ok := e.(error) if !ok { t.Fatalf("expected an error") } if !errors.Is(err, arrow.ErrInvalid) { t.Fatalf("should be an ErrInvalid") } if got, want := err.Error(), fmt.Sprintf("%s: arrow/array: mismatch data type float64 vs int32", arrow.ErrInvalid); got != want { t.Fatalf("invalid error. got=%q, want=%q", got, want) } }() c1 := arrow.NewChunked(arrow.PrimitiveTypes.Int32, []arrow.Array{ f1, f2, }) defer c1.Release() } func TestChunkedSliceInvalid(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) fb := array.NewFloat64Builder(mem) defer fb.Release() fb.AppendValues([]float64{1, 2, 3, 4, 5}, nil) f1 := fb.NewFloat64Array() defer f1.Release() fb.AppendValues([]float64{6, 7}, nil) f2 := fb.NewFloat64Array() defer f2.Release() fb.AppendValues([]float64{8, 9, 10}, nil) f3 := fb.NewFloat64Array() defer f3.Release() c := arrow.NewChunked( arrow.PrimitiveTypes.Float64, []arrow.Array{f1, f2, f3}, ) defer c.Release() for _, tc := range []struct { i, j int64 }{ {i: 2, j: 1}, {i: 10, j: 11}, {i: 11, j: 11}, } { t.Run("", func(t *testing.T) { defer func() { e := recover() if e == nil { t.Fatalf("expected a panic") } if got, want := e.(string), "arrow/array: index out of range"; got != want { t.Fatalf("invalid error. got=%q, want=%q", got, want) } }() sub := array.NewChunkedSlice(c, tc.i, tc.j) defer sub.Release() }) } } func TestColumn(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) type slice struct { i, j int64 len int nulls int chunks int } for _, tc := range []struct { chunk *arrow.Chunked field arrow.Field err error slices []slice }{ { chunk: func() *arrow.Chunked { ib := array.NewInt32Builder(mem) defer ib.Release() ib.AppendValues([]int32{1, 2, 3}, nil) i1 := ib.NewInt32Array() defer i1.Release() ib.AppendValues([]int32{4, 5, 6, 7, 8, 9, 10}, nil) i2 := ib.NewInt32Array() defer i2.Release() c := arrow.NewChunked( arrow.PrimitiveTypes.Int32, []arrow.Array{i1, i2}, ) return c }(), field: arrow.Field{Name: "i32", Type: arrow.PrimitiveTypes.Int32}, slices: []slice{ {i: 0, j: 10, len: 10, nulls: 0, chunks: 2}, {i: 2, j: 3, len: 1, nulls: 0, chunks: 1}, {i: 9, j: 10, len: 1, nulls: 0, chunks: 1}, {i: 0, j: 5, len: 5, nulls: 0, chunks: 2}, {i: 5, j: 7, len: 2, nulls: 0, chunks: 1}, {i: 7, j: 10, len: 3, nulls: 0, chunks: 1}, {i: 10, j: 10, len: 0, nulls: 0, chunks: 0}, }, }, { chunk: func() *arrow.Chunked { fb := array.NewFloat64Builder(mem) defer fb.Release() fb.AppendValues([]float64{1, 2, 3, 4, 5}, nil) f1 := fb.NewFloat64Array() defer f1.Release() fb.AppendValues([]float64{6, 7}, nil) f2 := fb.NewFloat64Array() defer f2.Release() fb.AppendValues([]float64{8, 9, 10}, nil) f3 := fb.NewFloat64Array() defer f3.Release() c := arrow.NewChunked( arrow.PrimitiveTypes.Float64, []arrow.Array{f1, f2, f3}, ) return c }(), field: arrow.Field{Name: "f64", Type: arrow.PrimitiveTypes.Float64}, slices: []slice{ {i: 0, j: 10, len: 10, nulls: 0, chunks: 3}, {i: 2, j: 3, len: 1, nulls: 0, chunks: 1}, {i: 9, j: 10, len: 1, nulls: 0, chunks: 1}, {i: 0, j: 5, len: 5, nulls: 0, chunks: 1}, {i: 5, j: 7, len: 2, nulls: 0, chunks: 1}, {i: 7, j: 10, len: 3, nulls: 0, chunks: 1}, {i: 10, j: 10, len: 0, nulls: 0, chunks: 0}, }, }, { chunk: func() *arrow.Chunked { fb := array.NewFloat64Builder(mem) defer fb.Release() fb.AppendValues([]float64{1, 2, 3, 4, 5}, nil) f1 := fb.NewFloat64Array() defer f1.Release() c := arrow.NewChunked( arrow.PrimitiveTypes.Float64, []arrow.Array{f1}, ) return c }(), field: arrow.Field{Name: "f32", Type: arrow.PrimitiveTypes.Float32}, err: fmt.Errorf("%w: arrow/array: inconsistent data type float64 vs float32", arrow.ErrInvalid), }, } { t.Run("", func(t *testing.T) { defer tc.chunk.Release() if tc.err != nil { defer func() { e := recover() if e == nil { t.Fatalf("expected an error %q", tc.err) } switch err := e.(type) { case string: if err != tc.err.Error() { t.Fatalf("invalid panic message. got=%q, want=%q", err, tc.err) } case error: if err.Error() != tc.err.Error() { t.Fatalf("invalid panic message. got=%q, want=%q", err, tc.err) } default: t.Fatalf("invalid type for panic message: %T (err=%v)", err, err) } }() } col := arrow.NewColumn(tc.field, tc.chunk) defer col.Release() if got, want := col.Len(), tc.chunk.Len(); got != want { t.Fatalf("invalid length: got=%d, want=%d", got, want) } if got, want := col.NullN(), tc.chunk.NullN(); got != want { t.Fatalf("invalid nulls: got=%d, want=%d", got, want) } if got, want := col.Data(), tc.chunk; got != want { t.Fatalf("invalid chunked: got=%#v, want=%#v", got, want) } if got, want := col.Field(), tc.field; !got.Equal(want) { t.Fatalf("invalid field: got=%#v, want=%#v", got, want) } if got, want := col.Name(), tc.field.Name; got != want { t.Fatalf("invalid name: got=%q, want=%q", got, want) } if got, want := col.DataType(), tc.field.Type; !reflect.DeepEqual(got, want) { t.Fatalf("invalid data type: got=%#v, want=%#v", got, want) } col.Retain() col.Release() for _, slice := range tc.slices { t.Run("", func(t *testing.T) { sub := array.NewColumnSlice(col, slice.i, slice.j) defer sub.Release() if got, want := sub.Len(), slice.len; got != want { t.Fatalf("len: got=%d, want=%d", got, want) } if got, want := sub.NullN(), slice.nulls; got != want { t.Fatalf("nulls: got=%d, want=%d", got, want) } if got, want := sub.DataType(), col.DataType(); got != want { t.Fatalf("dtype: got=%v, want=%v", got, want) } if got, want := len(sub.Data().Chunks()), slice.chunks; got != want { t.Fatalf("chunks: got=%d, want=%d", got, want) } }) } }) } } func TestTable(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) preSchema := arrow.NewSchema( []arrow.Field{ {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, }, nil, ) schema := arrow.NewSchema( []arrow.Field{ {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, {Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64}, }, nil, ) col1 := func() *arrow.Column { chunk := func() *arrow.Chunked { ib := array.NewInt32Builder(mem) defer ib.Release() ib.AppendValues([]int32{1, 2, 3}, nil) i1 := ib.NewInt32Array() defer i1.Release() ib.AppendValues([]int32{4, 5, 6, 7, 8, 9, 10}, nil) i2 := ib.NewInt32Array() defer i2.Release() c := arrow.NewChunked( arrow.PrimitiveTypes.Int32, []arrow.Array{i1, i2}, ) return c }() defer chunk.Release() return arrow.NewColumn(schema.Field(0), chunk) }() defer col1.Release() col2 := func() *arrow.Column { chunk := func() *arrow.Chunked { fb := array.NewFloat64Builder(mem) defer fb.Release() fb.AppendValues([]float64{1, 2, 3, 4, 5}, nil) f1 := fb.NewFloat64Array() defer f1.Release() fb.AppendValues([]float64{6, 7}, nil) f2 := fb.NewFloat64Array() defer f2.Release() fb.AppendValues([]float64{8, 9, 10}, nil) f3 := fb.NewFloat64Array() defer f3.Release() c := arrow.NewChunked( arrow.PrimitiveTypes.Float64, []arrow.Array{f1, f2, f3}, ) return c }() defer chunk.Release() return arrow.NewColumn(schema.Field(1), chunk) }() defer col2.Release() cols := []arrow.Column{*col1, *col2} slices := [][]arrow.Array{col1.Data().Chunks(), col2.Data().Chunks()} preTbl := array.NewTable(preSchema, []arrow.Column{*col1}, -1) defer preTbl.Release() tbl, err := preTbl.AddColumn( 1, arrow.Field{Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64}, *col2, ) defer tbl.Release() if err != nil { t.Fatalf("could not add column: %+v", err) } tbl2 := array.NewTableFromSlice(schema, slices) defer tbl2.Release() tbl.Retain() tbl.Release() if got, want := tbl.Schema(), schema; !got.Equal(want) { t.Fatalf("invalid schema: got=%#v, want=%#v", got, want) } if got, want := tbl.NumRows(), int64(10); got != want { t.Fatalf("invalid number of rows: got=%d, want=%d", got, want) } if got, want := tbl.NumCols(), int64(2); got != want { t.Fatalf("invalid number of columns: got=%d, want=%d", got, want) } if got, want := tbl.Column(0).Name(), col1.Name(); got != want { t.Fatalf("invalid column: got=%q, want=%q", got, want) } if got, want := tbl2.NumRows(), int64(10); got != want { t.Fatalf("invalid number of rows: got=%d, want=%d", got, want) } if got, want := tbl2.NumCols(), int64(2); got != want { t.Fatalf("invalid number of columns: got=%d, want=%d", got, want) } if got, want := tbl2.Column(0).Name(), col1.Name(); got != want { t.Fatalf("invalid column: got=%q, want=%q", got, want) } for _, tc := range []struct { schema *arrow.Schema cols []arrow.Column rows int64 err error }{ { schema: schema, cols: nil, rows: -1, err: fmt.Errorf("arrow/array: table schema mismatch"), }, { schema: schema, cols: cols[:1], rows: 0, err: fmt.Errorf("arrow/array: table schema mismatch"), }, { schema: arrow.NewSchema( []arrow.Field{ {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, }, nil, ), cols: cols, rows: 0, err: fmt.Errorf("arrow/array: table schema mismatch"), }, { schema: arrow.NewSchema( []arrow.Field{ {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, {Name: "f2-f64", Type: arrow.PrimitiveTypes.Int32}, }, nil, ), cols: cols, rows: 0, err: fmt.Errorf(`arrow/array: column field "f2-f64" is inconsistent with schema`), }, { schema: arrow.NewSchema( []arrow.Field{ {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, {Name: "f2-f32", Type: arrow.PrimitiveTypes.Float64}, }, nil, ), cols: cols, rows: 0, err: fmt.Errorf(`arrow/array: column field "f2-f64" is inconsistent with schema`), }, { schema: schema, cols: cols, rows: 11, err: fmt.Errorf(`arrow/array: column "f1-i32" expected length >= 11 but got length 10`), }, { schema: schema, cols: cols, rows: 3, err: nil, }, } { t.Run("", func(t *testing.T) { if tc.err != nil { defer func() { e := recover() if e == nil { t.Fatalf("expected an error %q", tc.err) } switch err := e.(type) { case string: if err != tc.err.Error() { t.Fatalf("invalid panic message. got=%q, want=%q", err, tc.err) } case error: if err.Error() != tc.err.Error() { t.Fatalf("invalid panic message. got=%q, want=%q", err, tc.err) } default: t.Fatalf("invalid type for panic message: %T (err=%v)", err, err) } }() } tbl := array.NewTable(tc.schema, tc.cols, tc.rows) defer tbl.Release() if got, want := tbl.NumRows(), tc.rows; got != want { t.Fatalf("invalid number of rows: got=%d, want=%d", got, want) } }) } } func TestTableFromRecords(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) schema := arrow.NewSchema( []arrow.Field{ {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, {Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64}, }, nil, ) b := array.NewRecordBuilder(mem, schema) defer b.Release() b.Field(0).(*array.Int32Builder).AppendValues([]int32{1, 2, 3, 4, 5, 6}, nil) b.Field(0).(*array.Int32Builder).AppendValues([]int32{7, 8, 9, 10}, []bool{true, true, false, true}) b.Field(1).(*array.Float64Builder).AppendValues([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil) rec1 := b.NewRecord() defer rec1.Release() b.Field(0).(*array.Int32Builder).AppendValues([]int32{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, nil) b.Field(1).(*array.Float64Builder).AppendValues([]float64{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, nil) rec2 := b.NewRecord() defer rec2.Release() tbl := array.NewTableFromRecords(schema, []arrow.Record{rec1, rec2}) defer tbl.Release() if got, want := tbl.Schema(), schema; !got.Equal(want) { t.Fatalf("invalid schema: got=%#v, want=%#v", got, want) } if got, want := tbl.NumRows(), int64(20); got != want { t.Fatalf("invalid number of rows: got=%d, want=%d", got, want) } if got, want := tbl.NumCols(), int64(2); got != want { t.Fatalf("invalid number of columns: got=%d, want=%d", got, want) } if got, want := tbl.Column(0).Name(), schema.Field(0).Name; got != want { t.Fatalf("invalid column: got=%q, want=%q", got, want) } } func TestTableReader(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) schema := arrow.NewSchema( []arrow.Field{ {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, {Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64}, }, nil, ) col1 := func() *arrow.Column { chunk := func() *arrow.Chunked { ib := array.NewInt32Builder(mem) defer ib.Release() ib.AppendValues([]int32{1, 2, 3}, nil) i1 := ib.NewInt32Array() defer i1.Release() ib.AppendValues([]int32{4, 5, 6, 7, 8, 9, 10}, nil) i2 := ib.NewInt32Array() defer i2.Release() c := arrow.NewChunked( arrow.PrimitiveTypes.Int32, []arrow.Array{i1, i2}, ) return c }() defer chunk.Release() return arrow.NewColumn(schema.Field(0), chunk) }() defer col1.Release() col2 := func() *arrow.Column { chunk := func() *arrow.Chunked { fb := array.NewFloat64Builder(mem) defer fb.Release() fb.AppendValues([]float64{1, 2, 3, 4, 5}, nil) f1 := fb.NewFloat64Array() defer f1.Release() fb.AppendValues([]float64{6, 7}, nil) f2 := fb.NewFloat64Array() defer f2.Release() fb.AppendValues([]float64{8, 9, 10}, nil) f3 := fb.NewFloat64Array() defer f3.Release() c := arrow.NewChunked( arrow.PrimitiveTypes.Float64, []arrow.Array{f1, f2, f3}, ) return c }() defer chunk.Release() return arrow.NewColumn(schema.Field(1), chunk) }() defer col2.Release() cols := []arrow.Column{*col1, *col2} tbl := array.NewTable(schema, cols, -1) defer tbl.Release() tr := array.NewTableReader(tbl, 1) defer tr.Release() tr.Retain() tr.Release() for tr.Next() { } if err := tr.Err(); err != nil { t.Fatalf("tr err: %#v", err) } for _, tc := range []struct { sz int64 n int64 rows []int64 }{ {sz: -1, n: 4, rows: []int64{3, 2, 2, 3}}, {sz: +0, n: 4, rows: []int64{3, 2, 2, 3}}, {sz: +1, n: 10, rows: []int64{1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}, {sz: +2, n: 6, rows: []int64{2, 1, 2, 2, 2, 1}}, } { t.Run(fmt.Sprintf("chunksz=%d", tc.sz), func(t *testing.T) { tr := array.NewTableReader(tbl, tc.sz) defer tr.Release() if got, want := tr.Schema(), tbl.Schema(); !got.Equal(want) { t.Fatalf("invalid schema: got=%#v, want=%#v", got, want) } var ( n int64 sum int64 ) for tr.Next() { rec := tr.Record() if got, want := rec.Schema(), tbl.Schema(); !got.Equal(want) { t.Fatalf("invalid schema: got=%#v, want=%#v", got, want) } if got, want := rec.NumRows(), tc.rows[n]; got != want { t.Fatalf("invalid number of rows[%d]: got=%d, want=%d", n, got, want) } n++ sum += rec.NumRows() } if err := tr.Err(); err != nil { t.Fatalf("tr err: %#v", err) } if got, want := n, tc.n; got != want { t.Fatalf("invalid number of iterations: got=%d, want=%d", got, want) } if sum != tbl.NumRows() { t.Fatalf("invalid number of rows iterated over: got=%d, want=%d", sum, tbl.NumRows()) } }) } } func TestTableToString(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) schema := arrow.NewSchema( []arrow.Field{ {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, {Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64}, }, nil, ) b := array.NewRecordBuilder(mem, schema) defer b.Release() b.Field(0).(*array.Int32Builder).AppendValues([]int32{1, 2, 3, 4, 5, 6}, nil) b.Field(0).(*array.Int32Builder).AppendValues([]int32{7, 8, 9, 10}, []bool{true, true, false, true}) b.Field(1).(*array.Float64Builder).AppendValues([]float64{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, nil) rec1 := b.NewRecord() defer rec1.Release() b.Field(0).(*array.Int32Builder).AppendValues([]int32{111, 112, 113, 114, 115, 116, 117, 118, 119, 120}, nil) b.Field(1).(*array.Float64Builder).AppendValues([]float64{211, 212, 213, 214, 215, 216, 217, 218, 219, 220}, nil) rec2 := b.NewRecord() defer rec2.Release() tbl := array.NewTableFromRecords(schema, []arrow.Record{rec1, rec2}) defer tbl.Release() table_str := tbl.String() expected_str := `schema: fields: 2 - f1-i32: type=int32 - f2-f64: type=float64 f1-i32: [[1 2 3 4 5 6 7 8 (null) 10], [111 112 113 114 115 116 117 118 119 120]] f2-f64: [[11 12 13 14 15 16 17 18 19 20], [211 212 213 214 215 216 217 218 219 220]] ` if got, want := table_str, expected_str; table_str != expected_str { t.Fatalf("invalid String: got=%#v, want=%#v", got, want) } } arrow-go-18.2.0/arrow/array/timestamp.go000066400000000000000000000221011476434502500201470ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "bytes" "fmt" "reflect" "strings" "sync/atomic" "time" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" ) // Timestamp represents an immutable sequence of arrow.Timestamp values. type Timestamp struct { array values []arrow.Timestamp } // NewTimestampData creates a new Timestamp from Data. func NewTimestampData(data arrow.ArrayData) *Timestamp { a := &Timestamp{} a.refCount = 1 a.setData(data.(*Data)) return a } // Reset resets the array for re-use. func (a *Timestamp) Reset(data *Data) { a.setData(data) } // Value returns the value at the specified index. func (a *Timestamp) Value(i int) arrow.Timestamp { return a.values[i] } // TimestampValues returns the values. func (a *Timestamp) TimestampValues() []arrow.Timestamp { return a.values } // String returns a string representation of the array. func (a *Timestamp) String() string { o := new(strings.Builder) o.WriteString("[") for i, v := range a.values { if i > 0 { fmt.Fprintf(o, " ") } switch { case a.IsNull(i): o.WriteString(NullValueStr) default: fmt.Fprintf(o, "%v", v) } } o.WriteString("]") return o.String() } func (a *Timestamp) setData(data *Data) { a.array.setData(data) vals := data.buffers[1] if vals != nil { a.values = arrow.TimestampTraits.CastFromBytes(vals.Bytes()) beg := a.array.data.offset end := beg + a.array.data.length a.values = a.values[beg:end] } } func (a *Timestamp) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } toTime, _ := a.DataType().(*arrow.TimestampType).GetToTimeFunc() return toTime(a.values[i]).Format("2006-01-02 15:04:05.999999999Z0700") } func (a *Timestamp) GetOneForMarshal(i int) interface{} { if val := a.ValueStr(i); val != NullValueStr { return val } return nil } func (a *Timestamp) MarshalJSON() ([]byte, error) { vals := make([]interface{}, a.Len()) for i := range a.values { vals[i] = a.GetOneForMarshal(i) } return json.Marshal(vals) } func arrayEqualTimestamp(left, right *Timestamp) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } if left.Value(i) != right.Value(i) { return false } } return true } type TimestampBuilder struct { builder dtype *arrow.TimestampType data *memory.Buffer rawData []arrow.Timestamp } func NewTimestampBuilder(mem memory.Allocator, dtype *arrow.TimestampType) *TimestampBuilder { return &TimestampBuilder{builder: builder{refCount: 1, mem: mem}, dtype: dtype} } func (b *TimestampBuilder) Type() arrow.DataType { return b.dtype } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *TimestampBuilder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } } } func (b *TimestampBuilder) AppendTime(t time.Time) { ts, err := arrow.TimestampFromTime(t, b.dtype.Unit) if err != nil { panic(err) } b.Append(ts) } func (b *TimestampBuilder) Append(v arrow.Timestamp) { b.Reserve(1) b.UnsafeAppend(v) } func (b *TimestampBuilder) AppendNull() { b.Reserve(1) b.UnsafeAppendBoolToBitmap(false) } func (b *TimestampBuilder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *TimestampBuilder) AppendEmptyValue() { b.Append(0) } func (b *TimestampBuilder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *TimestampBuilder) UnsafeAppend(v arrow.Timestamp) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v b.length++ } func (b *TimestampBuilder) UnsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *TimestampBuilder) AppendValues(v []arrow.Timestamp, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) arrow.TimestampTraits.Copy(b.rawData[b.length:], v) b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *TimestampBuilder) init(capacity int) { b.builder.init(capacity) b.data = memory.NewResizableBuffer(b.mem) bytesN := arrow.TimestampTraits.BytesRequired(capacity) b.data.Resize(bytesN) b.rawData = arrow.TimestampTraits.CastFromBytes(b.data.Bytes()) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *TimestampBuilder) Reserve(n int) { b.builder.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *TimestampBuilder) Resize(n int) { nBuilder := n if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(nBuilder, b.init) b.data.Resize(arrow.TimestampTraits.BytesRequired(n)) b.rawData = arrow.TimestampTraits.CastFromBytes(b.data.Bytes()) } } // NewArray creates a Timestamp array from the memory buffers used by the builder and resets the TimestampBuilder // so it can be used to build a new array. func (b *TimestampBuilder) NewArray() arrow.Array { return b.NewTimestampArray() } // NewTimestampArray creates a Timestamp array from the memory buffers used by the builder and resets the TimestampBuilder // so it can be used to build a new array. func (b *TimestampBuilder) NewTimestampArray() (a *Timestamp) { data := b.newData() a = NewTimestampData(data) data.Release() return } func (b *TimestampBuilder) newData() (data *Data) { bytesRequired := arrow.TimestampTraits.BytesRequired(b.length) if bytesRequired > 0 && bytesRequired < b.data.Len() { // trim buffers b.data.Resize(bytesRequired) } data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) b.reset() if b.data != nil { b.data.Release() b.data = nil b.rawData = nil } return } func (b *TimestampBuilder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } loc, err := b.dtype.GetZone() if err != nil { return err } v, _, err := arrow.TimestampFromStringInLocation(s, b.dtype.Unit, loc) if err != nil { b.AppendNull() return err } b.Append(v) return nil } func (b *TimestampBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch v := t.(type) { case nil: b.AppendNull() case string: loc, _ := b.dtype.GetZone() tm, _, err := arrow.TimestampFromStringInLocation(v, b.dtype.Unit, loc) if err != nil { return &json.UnmarshalTypeError{ Value: v, Type: reflect.TypeOf(arrow.Timestamp(0)), Offset: dec.InputOffset(), } } b.Append(tm) case json.Number: n, err := v.Int64() if err != nil { return &json.UnmarshalTypeError{ Value: v.String(), Type: reflect.TypeOf(arrow.Timestamp(0)), Offset: dec.InputOffset(), } } b.Append(arrow.Timestamp(n)) case float64: b.Append(arrow.Timestamp(v)) default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Type: reflect.TypeOf(arrow.Timestamp(0)), Offset: dec.InputOffset(), } } return nil } func (b *TimestampBuilder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *TimestampBuilder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("binary builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } var ( _ arrow.Array = (*Timestamp)(nil) _ Builder = (*TimestampBuilder)(nil) ) arrow-go-18.2.0/arrow/array/timestamp_test.go000066400000000000000000000200621476434502500212120ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "testing" "time" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) func TestTimestampStringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dt := &arrow.TimestampType{Unit: arrow.Second} b := array.NewTimestampBuilder(mem, dt) defer b.Release() b.Append(1) b.Append(2) b.Append(3) b.AppendNull() b.Append(5) b.Append(6) b.AppendNull() b.Append(8) b.Append(9) b.Append(10) arr := b.NewArray().(*array.Timestamp) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewTimestampBuilder(mem, dt) defer b1.Release() for i := 0; i < arr.Len(); i++ { assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.Timestamp) defer arr1.Release() assert.True(t, array.Equal(arr, arr1)) } func TestNewTimestampBuilder(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) timestamp := time.Now() dtype := &arrow.TimestampType{Unit: arrow.Second} ab := array.NewTimestampBuilder(mem, dtype) defer ab.Release() ab.Retain() ab.Release() ab.Append(1) ab.Append(2) ab.Append(3) ab.AppendNull() ab.Append(5) ab.Append(6) ab.AppendNull() ab.Append(8) ab.Append(9) ab.Append(10) ab.AppendTime(timestamp) // check state of builder before NewTimestampArray assert.Equal(t, 11, ab.Len(), "unexpected Len()") assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") a := ab.NewTimestampArray() // check state of builder after NewTimestampArray assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewTimestampArray did not reset state") assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewTimestampArray did not reset state") assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewTimestampArray did not reset state") // check state of array assert.Equal(t, 2, a.NullN(), "unexpected null count") assert.Equal(t, []arrow.Timestamp{1, 2, 3, 0, 5, 6, 0, 8, 9, 10, arrow.Timestamp(timestamp.Unix())}, a.TimestampValues(), "unexpected TimestampValues") assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity assert.Len(t, a.TimestampValues(), 11, "unexpected length of TimestampValues") a.Release() ab.Append(7) ab.Append(8) a = ab.NewTimestampArray() assert.Equal(t, 0, a.NullN()) assert.Equal(t, []arrow.Timestamp{7, 8}, a.TimestampValues()) assert.Len(t, a.TimestampValues(), 2) a.Release() var ( want = []arrow.Timestamp{1, 2, 3, 4} valids = []bool{true, true, false, true} ) ab.AppendValues(want, valids) a = ab.NewTimestampArray() sub := array.MakeFromData(a.Data()) defer sub.Release() if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { t.Fatalf("invalid type: got=%q, want=%q", got, want) } if _, ok := sub.(*array.Timestamp); !ok { t.Fatalf("could not type-assert to array.Timestamp") } if got, want := a.String(), `[1 2 (null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } slice := array.NewSliceData(a.Data(), 2, 4) defer slice.Release() sub1 := array.MakeFromData(slice) defer sub1.Release() v, ok := sub1.(*array.Timestamp) if !ok { t.Fatalf("could not type-assert to array.Timestamp") } if got, want := v.String(), `[(null) 4]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } a.Release() } func TestTimestampBuilder_AppendValues(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dtype := &arrow.TimestampType{Unit: arrow.Second} ab := array.NewTimestampBuilder(mem, dtype) defer ab.Release() exp := []arrow.Timestamp{0, 1, 2, 3} ab.AppendValues(exp, nil) a := ab.NewTimestampArray() assert.Equal(t, exp, a.TimestampValues()) a.Release() } func TestTimestampBuilder_Empty(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dtype := &arrow.TimestampType{Unit: arrow.Second} ab := array.NewTimestampBuilder(mem, dtype) defer ab.Release() exp := []arrow.Timestamp{0, 1, 2, 3} ab.AppendValues([]arrow.Timestamp{}, nil) a := ab.NewTimestampArray() assert.Zero(t, a.Len()) a.Release() ab.AppendValues(nil, nil) a = ab.NewTimestampArray() assert.Zero(t, a.Len()) a.Release() ab.AppendValues([]arrow.Timestamp{}, nil) ab.AppendValues(exp, nil) a = ab.NewTimestampArray() assert.Equal(t, exp, a.TimestampValues()) a.Release() ab.AppendValues(exp, nil) ab.AppendValues([]arrow.Timestamp{}, nil) a = ab.NewTimestampArray() assert.Equal(t, exp, a.TimestampValues()) a.Release() } func TestTimestampBuilder_Resize(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dtype := &arrow.TimestampType{Unit: arrow.Second} ab := array.NewTimestampBuilder(mem, dtype) defer ab.Release() assert.Equal(t, 0, ab.Cap()) assert.Equal(t, 0, ab.Len()) ab.Reserve(63) assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 0, ab.Len()) for i := 0; i < 63; i++ { ab.Append(0) } assert.Equal(t, 64, ab.Cap()) assert.Equal(t, 63, ab.Len()) ab.Resize(5) assert.Equal(t, 5, ab.Len()) ab.Resize(32) assert.Equal(t, 5, ab.Len()) } func TestTimestampValueStr(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) dt := &arrow.TimestampType{Unit: arrow.Second, TimeZone: "America/Phoenix"} b := array.NewTimestampBuilder(mem, dt) defer b.Release() b.Append(-34226955) b.Append(1456767743) arr := b.NewArray() defer arr.Release() assert.Equal(t, "1968-11-30 13:30:45-0700", arr.ValueStr(0)) assert.Equal(t, "2016-02-29 10:42:23-0700", arr.ValueStr(1)) } func TestTimestampEquality(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) tsDatatypes := []*arrow.TimestampType{ {Unit: arrow.Second}, {Unit: arrow.Second, TimeZone: "UTC"}, {Unit: arrow.Second, TimeZone: "America/Phoenix"}, } arrs := make([]*array.Timestamp, 0, len(tsDatatypes)) for _, dt := range tsDatatypes { bldr := array.NewTimestampBuilder(mem, dt) defer bldr.Release() bldr.Append(-34226955) bldr.Append(1456767743) arr := bldr.NewTimestampArray() defer arr.Release() arrs = append(arrs, arr) } // No timezone, "wall clock" semantics // These timestamps have no actual timezone, but we still represent as UTC per Go conventions assert.Equal(t, "1968-11-30 20:30:45Z", arrs[0].ValueStr(0)) assert.Equal(t, "2016-02-29 17:42:23Z", arrs[0].ValueStr(1)) // UTC timezone, "instant" semantics assert.Equal(t, "1968-11-30 20:30:45Z", arrs[1].ValueStr(0)) assert.Equal(t, "2016-02-29 17:42:23Z", arrs[1].ValueStr(1)) // America/Phoenix timezone, "instant" semantics assert.Equal(t, "1968-11-30 13:30:45-0700", arrs[2].ValueStr(0)) assert.Equal(t, "2016-02-29 10:42:23-0700", arrs[2].ValueStr(1)) // Despite timezone and semantics, the physical values are equivalent assert.Equal(t, arrs[0].Value(0), arrs[1].Value(0)) assert.Equal(t, arrs[0].Value(0), arrs[2].Value(0)) assert.Equal(t, arrs[1].Value(0), arrs[2].Value(0)) assert.Equal(t, arrs[0].Value(1), arrs[1].Value(1)) assert.Equal(t, arrs[0].Value(1), arrs[2].Value(1)) assert.Equal(t, arrs[1].Value(1), arrs[2].Value(1)) } arrow-go-18.2.0/arrow/array/union.go000066400000000000000000001242321476434502500173040ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "bytes" "errors" "fmt" "math" "reflect" "strings" "sync/atomic" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/bitutils" "github.com/apache/arrow-go/v18/internal/json" ) // Union is a convenience interface to encompass both Sparse and Dense // union array types. type Union interface { arrow.Array // NumFields returns the number of child fields in this union. // Equivalent to len(UnionType().Fields()) NumFields() int // Validate returns an error if there are any issues with the lengths // or types of the children arrays mismatching with the Type of the // Union Array. nil is returned if there are no problems. Validate() error // ValidateFull runs the same checks that Validate() does, but additionally // checks that all childIDs are valid (>= 0 || ==InvalidID) and for // dense unions validates that all offsets are within the bounds of their // respective child. ValidateFull() error // TypeCodes returns the type id buffer for the union Array, equivalent to // Data().Buffers()[1]. Note: This will not account for any slice offset. TypeCodes() *memory.Buffer // RawTypeCodes returns a slice of UnionTypeCodes properly accounting for // any slice offset. RawTypeCodes() []arrow.UnionTypeCode // TypeCode returns the logical type code of the value at the requested index TypeCode(i int) arrow.UnionTypeCode // ChildID returns the index of the physical child containing the value // at the requested index. Equivalent to: // // arr.UnionType().ChildIDs()[arr.RawTypeCodes()[i+arr.Data().Offset()]] ChildID(i int) int // UnionType is a convenience function to retrieve the properly typed UnionType // instead of having to call DataType() and manually assert the type. UnionType() arrow.UnionType // Mode returns the union mode of the underlying Array, either arrow.SparseMode // or arrow.DenseMode. Mode() arrow.UnionMode // Field returns the requested child array for this union. Returns nil if a // nonexistent position is passed in. // // The appropriate child for an index can be retrieved with Field(ChildID(index)) Field(pos int) arrow.Array } const kMaxElems = math.MaxInt32 type union struct { array unionType arrow.UnionType typecodes []arrow.UnionTypeCode children []arrow.Array } func (a *union) Retain() { a.array.Retain() for _, c := range a.children { c.Retain() } } func (a *union) Release() { a.array.Release() for _, c := range a.children { c.Release() } } func (a *union) NumFields() int { return len(a.unionType.Fields()) } func (a *union) Mode() arrow.UnionMode { return a.unionType.Mode() } func (a *union) UnionType() arrow.UnionType { return a.unionType } func (a *union) TypeCodes() *memory.Buffer { return a.data.buffers[1] } func (a *union) RawTypeCodes() []arrow.UnionTypeCode { if a.data.length > 0 { return a.typecodes[a.data.offset:] } return []arrow.UnionTypeCode{} } func (a *union) TypeCode(i int) arrow.UnionTypeCode { return a.typecodes[i+a.data.offset] } func (a *union) ChildID(i int) int { return a.unionType.ChildIDs()[a.typecodes[i+a.data.offset]] } func (a *union) setData(data *Data) { a.unionType = data.dtype.(arrow.UnionType) debug.Assert(len(data.buffers) >= 2, "arrow/array: invalid number of union array buffers") if data.length > 0 { a.typecodes = arrow.Int8Traits.CastFromBytes(data.buffers[1].Bytes()) } else { a.typecodes = []int8{} } a.children = make([]arrow.Array, len(data.childData)) for i, child := range data.childData { if a.unionType.Mode() == arrow.SparseMode && (data.offset != 0 || child.Len() != data.length) { child = NewSliceData(child, int64(data.offset), int64(data.offset+data.length)) defer child.Release() } a.children[i] = MakeFromData(child) } a.array.setData(data) } func (a *union) Field(pos int) (result arrow.Array) { if pos < 0 || pos >= len(a.children) { return nil } return a.children[pos] } func (a *union) Validate() error { fields := a.unionType.Fields() for i, f := range fields { fieldData := a.data.childData[i] if a.unionType.Mode() == arrow.SparseMode && fieldData.Len() < a.data.length+a.data.offset { return fmt.Errorf("arrow/array: sparse union child array #%d has length smaller than expected for union array (%d < %d)", i, fieldData.Len(), a.data.length+a.data.offset) } if !arrow.TypeEqual(f.Type, fieldData.DataType()) { return fmt.Errorf("arrow/array: union child array #%d does not match type field %s vs %s", i, fieldData.DataType(), f.Type) } } return nil } func (a *union) ValidateFull() error { if err := a.Validate(); err != nil { return err } childIDs := a.unionType.ChildIDs() codesMap := a.unionType.TypeCodes() codes := a.RawTypeCodes() for i := 0; i < a.data.length; i++ { code := codes[i] if code < 0 || childIDs[code] == arrow.InvalidUnionChildID { return fmt.Errorf("arrow/array: union value at position %d has invalid type id %d", i, code) } } if a.unionType.Mode() == arrow.DenseMode { // validate offsets // map logical typeid to child length var childLengths [256]int64 for i := range a.unionType.Fields() { childLengths[codesMap[i]] = int64(a.data.childData[i].Len()) } // check offsets are in bounds var lastOffsets [256]int64 offsets := arrow.Int32Traits.CastFromBytes(a.data.buffers[2].Bytes())[a.data.offset:] for i := int64(0); i < int64(a.data.length); i++ { code := codes[i] offset := offsets[i] switch { case offset < 0: return fmt.Errorf("arrow/array: union value at position %d has negative offset %d", i, offset) case offset >= int32(childLengths[code]): return fmt.Errorf("arrow/array: union value at position %d has offset larger than child length (%d >= %d)", i, offset, childLengths[code]) case offset < int32(lastOffsets[code]): return fmt.Errorf("arrow/array: union value at position %d has non-monotonic offset %d", i, offset) } lastOffsets[code] = int64(offset) } } return nil } // SparseUnion represents an array where each logical value is taken from // a single child. A buffer of 8-bit type ids indicates which child a given // logical value is to be taken from. This is represented as the ChildID, // which is the index into the list of children. // // In a sparse union, each child array will have the same length as the // union array itself, regardless of how many values in the union actually // refer to it. // // Unlike most other arrays, unions do not have a top-level validity bitmap. type SparseUnion struct { union } // NewSparseUnion constructs a union array using the given type, length, list of // children and buffer of typeIDs with the given offset. func NewSparseUnion(dt *arrow.SparseUnionType, length int, children []arrow.Array, typeIDs *memory.Buffer, offset int) *SparseUnion { childData := make([]arrow.ArrayData, len(children)) for i, c := range children { childData[i] = c.Data() } data := NewData(dt, length, []*memory.Buffer{nil, typeIDs}, childData, 0, offset) defer data.Release() return NewSparseUnionData(data) } // NewSparseUnionData constructs a SparseUnion array from the given ArrayData object. func NewSparseUnionData(data arrow.ArrayData) *SparseUnion { a := &SparseUnion{} a.refCount = 1 a.setData(data.(*Data)) return a } // NewSparseUnionFromArrays constructs a new SparseUnion array with the provided // values. // // typeIDs *must* be an INT8 array with no nulls // len(codes) *must* be either 0 or equal to len(children). If len(codes) is 0, // the type codes used will be sequentially numeric starting at 0. func NewSparseUnionFromArrays(typeIDs arrow.Array, children []arrow.Array, codes ...arrow.UnionTypeCode) (*SparseUnion, error) { return NewSparseUnionFromArraysWithFieldCodes(typeIDs, children, []string{}, codes) } // NewSparseUnionFromArrayWithFields constructs a new SparseUnion array like // NewSparseUnionFromArrays, but allows specifying the field names. Type codes // will be auto-generated sequentially starting at 0. // // typeIDs *must* be an INT8 array with no nulls. // len(fields) *must* either be 0 or equal to len(children). If len(fields) is 0, // then the fields will be named sequentially starting at "0". func NewSparseUnionFromArraysWithFields(typeIDs arrow.Array, children []arrow.Array, fields []string) (*SparseUnion, error) { return NewSparseUnionFromArraysWithFieldCodes(typeIDs, children, fields, []arrow.UnionTypeCode{}) } // NewSparseUnionFromArraysWithFieldCodes combines the other constructors // for constructing a new SparseUnion array with the provided field names // and type codes, along with children and type ids. // // All the requirements mentioned in NewSparseUnionFromArrays and // NewSparseUnionFromArraysWithFields apply. func NewSparseUnionFromArraysWithFieldCodes(typeIDs arrow.Array, children []arrow.Array, fields []string, codes []arrow.UnionTypeCode) (*SparseUnion, error) { switch { case typeIDs.DataType().ID() != arrow.INT8: return nil, errors.New("arrow/array: union array type ids must be signed int8") case typeIDs.NullN() != 0: return nil, errors.New("arrow/array: union type ids may not have nulls") case len(fields) > 0 && len(fields) != len(children): return nil, errors.New("arrow/array: field names must have the same length as children") case len(codes) > 0 && len(codes) != len(children): return nil, errors.New("arrow/array: type codes must have same length as children") } buffers := []*memory.Buffer{nil, typeIDs.Data().Buffers()[1]} ty := arrow.SparseUnionFromArrays(children, fields, codes) childData := make([]arrow.ArrayData, len(children)) for i, c := range children { childData[i] = c.Data() if c.Len() != typeIDs.Len() { return nil, errors.New("arrow/array: sparse union array must have len(child) == len(typeids) for all children") } } data := NewData(ty, typeIDs.Len(), buffers, childData, 0, typeIDs.Data().Offset()) defer data.Release() return NewSparseUnionData(data), nil } func (a *SparseUnion) setData(data *Data) { a.union.setData(data) debug.Assert(a.data.dtype.ID() == arrow.SPARSE_UNION, "arrow/array: invalid data type for SparseUnion") debug.Assert(len(a.data.buffers) == 2, "arrow/array: sparse unions should have exactly 2 buffers") debug.Assert(a.data.buffers[0] == nil, "arrow/array: validity bitmap for sparse unions should be nil") } func (a *SparseUnion) GetOneForMarshal(i int) interface{} { typeID := a.RawTypeCodes()[i] childID := a.ChildID(i) data := a.Field(childID) if data.IsNull(i) { return nil } return []interface{}{typeID, data.GetOneForMarshal(i)} } func (a *SparseUnion) MarshalJSON() ([]byte, error) { var buf bytes.Buffer enc := json.NewEncoder(&buf) buf.WriteByte('[') for i := 0; i < a.Len(); i++ { if i != 0 { buf.WriteByte(',') } if err := enc.Encode(a.GetOneForMarshal(i)); err != nil { return nil, err } } buf.WriteByte(']') return buf.Bytes(), nil } func (a *SparseUnion) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } val := a.GetOneForMarshal(i) if val == nil { // child is nil return NullValueStr } data, err := json.Marshal(val) if err != nil { panic(err) } return string(data) } func (a *SparseUnion) String() string { var b strings.Builder b.WriteByte('[') fieldList := a.unionType.Fields() for i := 0; i < a.Len(); i++ { if i > 0 { b.WriteString(" ") } field := fieldList[a.ChildID(i)] f := a.Field(a.ChildID(i)) fmt.Fprintf(&b, "{%s=%v}", field.Name, f.GetOneForMarshal(i)) } b.WriteByte(']') return b.String() } // GetFlattenedField returns a child array, adjusting its validity bitmap // where the union array type codes don't match. // // ie: the returned array will have a null in every index that it is // not referenced by union. func (a *SparseUnion) GetFlattenedField(mem memory.Allocator, index int) (arrow.Array, error) { if index < 0 || index >= a.NumFields() { return nil, fmt.Errorf("arrow/array: index out of range: %d", index) } childData := a.data.childData[index] if a.data.offset != 0 || a.data.length != childData.Len() { childData = NewSliceData(childData, int64(a.data.offset), int64(a.data.offset+a.data.length)) // NewSliceData doesn't break the slice reference for buffers // since we're going to replace the null bitmap buffer we need to break the // slice reference so that we don't affect a.children's references newBufs := make([]*memory.Buffer, len(childData.Buffers())) copy(newBufs, childData.(*Data).buffers) childData.(*Data).buffers = newBufs } else { childData = childData.(*Data).Copy() } defer childData.Release() // synthesize a null bitmap based on the union discriminant // make sure the bitmap has extra bits corresponding to the child's offset flattenedNullBitmap := memory.NewResizableBuffer(mem) flattenedNullBitmap.Resize(childData.Len() + childData.Offset()) var ( childNullBitmap = childData.Buffers()[0] childOffset = childData.Offset() typeCode = a.unionType.TypeCodes()[index] codes = a.RawTypeCodes() offset int64 = 0 ) bitutils.GenerateBitsUnrolled(flattenedNullBitmap.Bytes(), int64(childOffset), int64(a.data.length), func() bool { b := codes[offset] == typeCode offset++ return b }) if childNullBitmap != nil { defer childNullBitmap.Release() bitutil.BitmapAnd(flattenedNullBitmap.Bytes(), childNullBitmap.Bytes(), int64(childOffset), int64(childOffset), flattenedNullBitmap.Bytes(), int64(childOffset), int64(childData.Len())) } childData.(*Data).buffers[0] = flattenedNullBitmap childData.(*Data).nulls = childData.Len() - bitutil.CountSetBits(flattenedNullBitmap.Bytes(), childOffset, childData.Len()) return MakeFromData(childData), nil } func arraySparseUnionEqual(l, r *SparseUnion) bool { childIDs := l.unionType.ChildIDs() leftCodes, rightCodes := l.RawTypeCodes(), r.RawTypeCodes() for i := 0; i < l.data.length; i++ { typeID := leftCodes[i] if typeID != rightCodes[i] { return false } childNum := childIDs[typeID] eq := SliceEqual(l.children[childNum], int64(i), int64(i+1), r.children[childNum], int64(i), int64(i+1)) if !eq { return false } } return true } func arraySparseUnionApproxEqual(l, r *SparseUnion, opt equalOption) bool { childIDs := l.unionType.ChildIDs() leftCodes, rightCodes := l.RawTypeCodes(), r.RawTypeCodes() for i := 0; i < l.data.length; i++ { typeID := leftCodes[i] if typeID != rightCodes[i] { return false } childNum := childIDs[typeID] eq := sliceApproxEqual(l.children[childNum], int64(i+l.data.offset), int64(i+l.data.offset+1), r.children[childNum], int64(i+r.data.offset), int64(i+r.data.offset+1), opt) if !eq { return false } } return true } // DenseUnion represents an array where each logical value is taken from // a single child, at a specific offset. A buffer of 8-bit type ids // indicates which child a given logical value is to be taken from and // a buffer of 32-bit offsets indicating which physical position in the // given child array has the logical value for that index. // // Unlike a sparse union, a dense union allows encoding only the child values // which are actually referred to by the union array. This is counterbalanced // by the additional footprint of the offsets buffer, and the additional // indirection cost when looking up values. // // Unlike most other arrays, unions do not have a top-level validity bitmap. type DenseUnion struct { union offsets []int32 } // NewDenseUnion constructs a union array using the given type, length, list of // children and buffers of typeIDs and offsets, with the given array offset. func NewDenseUnion(dt *arrow.DenseUnionType, length int, children []arrow.Array, typeIDs, valueOffsets *memory.Buffer, offset int) *DenseUnion { childData := make([]arrow.ArrayData, len(children)) for i, c := range children { childData[i] = c.Data() } data := NewData(dt, length, []*memory.Buffer{nil, typeIDs, valueOffsets}, childData, 0, offset) defer data.Release() return NewDenseUnionData(data) } // NewDenseUnionData constructs a DenseUnion array from the given ArrayData object. func NewDenseUnionData(data arrow.ArrayData) *DenseUnion { a := &DenseUnion{} a.refCount = 1 a.setData(data.(*Data)) return a } // NewDenseUnionFromArrays constructs a new DenseUnion array with the provided // values. // // typeIDs *must* be an INT8 array with no nulls // offsets *must* be an INT32 array with no nulls // len(codes) *must* be either 0 or equal to len(children). If len(codes) is 0, // the type codes used will be sequentially numeric starting at 0. func NewDenseUnionFromArrays(typeIDs, offsets arrow.Array, children []arrow.Array, codes ...arrow.UnionTypeCode) (*DenseUnion, error) { return NewDenseUnionFromArraysWithFieldCodes(typeIDs, offsets, children, []string{}, codes) } // NewDenseUnionFromArrayWithFields constructs a new DenseUnion array like // NewDenseUnionFromArrays, but allows specifying the field names. Type codes // will be auto-generated sequentially starting at 0. // // typeIDs *must* be an INT8 array with no nulls. // offsets *must* be an INT32 array with no nulls. // len(fields) *must* either be 0 or equal to len(children). If len(fields) is 0, // then the fields will be named sequentially starting at "0". func NewDenseUnionFromArraysWithFields(typeIDs, offsets arrow.Array, children []arrow.Array, fields []string) (*DenseUnion, error) { return NewDenseUnionFromArraysWithFieldCodes(typeIDs, offsets, children, fields, []arrow.UnionTypeCode{}) } // NewDenseUnionFromArraysWithFieldCodes combines the other constructors // for constructing a new DenseUnion array with the provided field names // and type codes, along with children and type ids. // // All the requirements mentioned in NewDenseUnionFromArrays and // NewDenseUnionFromArraysWithFields apply. func NewDenseUnionFromArraysWithFieldCodes(typeIDs, offsets arrow.Array, children []arrow.Array, fields []string, codes []arrow.UnionTypeCode) (*DenseUnion, error) { switch { case offsets.DataType().ID() != arrow.INT32: return nil, errors.New("arrow/array: union offsets must be signed int32") case typeIDs.DataType().ID() != arrow.INT8: return nil, errors.New("arrow/array: union type_ids must be signed int8") case typeIDs.NullN() != 0: return nil, errors.New("arrow/array: union typeIDs may not have nulls") case offsets.NullN() != 0: return nil, errors.New("arrow/array: nulls are not allowed in offsets for NewDenseUnionFromArrays*") case len(fields) > 0 && len(fields) != len(children): return nil, errors.New("arrow/array: fields must be the same length as children") case len(codes) > 0 && len(codes) != len(children): return nil, errors.New("arrow/array: typecodes must have the same length as children") } ty := arrow.DenseUnionFromArrays(children, fields, codes) buffers := []*memory.Buffer{nil, typeIDs.Data().Buffers()[1], offsets.Data().Buffers()[1]} childData := make([]arrow.ArrayData, len(children)) for i, c := range children { childData[i] = c.Data() } data := NewData(ty, typeIDs.Len(), buffers, childData, 0, typeIDs.Data().Offset()) defer data.Release() return NewDenseUnionData(data), nil } func (a *DenseUnion) ValueOffsets() *memory.Buffer { return a.data.buffers[2] } func (a *DenseUnion) ValueOffset(i int) int32 { return a.offsets[i+a.data.offset] } func (a *DenseUnion) RawValueOffsets() []int32 { return a.offsets[a.data.offset:] } func (a *DenseUnion) setData(data *Data) { a.union.setData(data) debug.Assert(a.data.dtype.ID() == arrow.DENSE_UNION, "arrow/array: invalid data type for DenseUnion") debug.Assert(len(a.data.buffers) == 3, "arrow/array: dense unions should have exactly 3 buffers") debug.Assert(a.data.buffers[0] == nil, "arrow/array: validity bitmap for dense unions should be nil") if data.length > 0 { a.offsets = arrow.Int32Traits.CastFromBytes(a.data.buffers[2].Bytes()) } else { a.offsets = []int32{} } } func (a *DenseUnion) GetOneForMarshal(i int) interface{} { typeID := a.RawTypeCodes()[i] childID := a.ChildID(i) data := a.Field(childID) offset := int(a.RawValueOffsets()[i]) if data.IsNull(offset) { return nil } return []interface{}{typeID, data.GetOneForMarshal(offset)} } func (a *DenseUnion) MarshalJSON() ([]byte, error) { var buf bytes.Buffer enc := json.NewEncoder(&buf) buf.WriteByte('[') for i := 0; i < a.Len(); i++ { if i != 0 { buf.WriteByte(',') } if err := enc.Encode(a.GetOneForMarshal(i)); err != nil { return nil, err } } buf.WriteByte(']') return buf.Bytes(), nil } func (a *DenseUnion) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } val := a.GetOneForMarshal(i) if val == nil { // child in nil return NullValueStr } data, err := json.Marshal(val) if err != nil { panic(err) } return string(data) } func (a *DenseUnion) String() string { var b strings.Builder b.WriteByte('[') offsets := a.RawValueOffsets() fieldList := a.unionType.Fields() for i := 0; i < a.Len(); i++ { if i > 0 { b.WriteString(" ") } field := fieldList[a.ChildID(i)] f := a.Field(a.ChildID(i)) fmt.Fprintf(&b, "{%s=%v}", field.Name, f.GetOneForMarshal(int(offsets[i]))) } b.WriteByte(']') return b.String() } func arrayDenseUnionEqual(l, r *DenseUnion) bool { childIDs := l.unionType.ChildIDs() leftCodes, rightCodes := l.RawTypeCodes(), r.RawTypeCodes() leftOffsets, rightOffsets := l.RawValueOffsets(), r.RawValueOffsets() for i := 0; i < l.data.length; i++ { typeID := leftCodes[i] if typeID != rightCodes[i] { return false } childNum := childIDs[typeID] eq := SliceEqual(l.children[childNum], int64(leftOffsets[i]), int64(leftOffsets[i]+1), r.children[childNum], int64(rightOffsets[i]), int64(rightOffsets[i]+1)) if !eq { return false } } return true } func arrayDenseUnionApproxEqual(l, r *DenseUnion, opt equalOption) bool { childIDs := l.unionType.ChildIDs() leftCodes, rightCodes := l.RawTypeCodes(), r.RawTypeCodes() leftOffsets, rightOffsets := l.RawValueOffsets(), r.RawValueOffsets() for i := 0; i < l.data.length; i++ { typeID := leftCodes[i] if typeID != rightCodes[i] { return false } childNum := childIDs[typeID] eq := sliceApproxEqual(l.children[childNum], int64(leftOffsets[i]), int64(leftOffsets[i]+1), r.children[childNum], int64(rightOffsets[i]), int64(rightOffsets[i]+1), opt) if !eq { return false } } return true } // UnionBuilder is a convenience interface for building Union arrays of // either Dense or Sparse mode. type UnionBuilder interface { Builder // AppendChild allows constructing the union type on the fly by making new // new array builder available to the union builder. The type code (index) // of the new child is returned, which should be passed to the Append method // when adding a new element to the union array. AppendChild(newChild Builder, fieldName string) (newCode arrow.UnionTypeCode) // Append adds an element to the UnionArray indicating which typecode the // new element should use. This *must* be followed up by an append to the // appropriate child builder. Append(arrow.UnionTypeCode) // Mode returns what kind of Union is being built, either arrow.SparseMode // or arrow.DenseMode Mode() arrow.UnionMode // Child returns the builder for the requested child index. // If an invalid index is requested (e.g. <0 or >len(children)) // then this will panic. Child(idx int) Builder } type unionBuilder struct { builder childFields []arrow.Field codes []arrow.UnionTypeCode mode arrow.UnionMode children []Builder typeIDtoBuilder []Builder typeIDtoChildID []int // for all typeID < denseTypeID, typeIDtoBuilder[typeID] != nil denseTypeID arrow.UnionTypeCode typesBuilder *int8BufferBuilder } func newUnionBuilder(mem memory.Allocator, children []Builder, typ arrow.UnionType) unionBuilder { if children == nil { children = make([]Builder, 0) } b := unionBuilder{ builder: builder{refCount: 1, mem: mem}, mode: typ.Mode(), codes: typ.TypeCodes(), children: children, typeIDtoChildID: make([]int, int(typ.MaxTypeCode())+1), // convert to int as int8(127) +1 panics typeIDtoBuilder: make([]Builder, int(typ.MaxTypeCode())+1), // convert to int as int8(127) +1 panics childFields: make([]arrow.Field, len(children)), typesBuilder: newInt8BufferBuilder(mem), } b.typeIDtoChildID[0] = arrow.InvalidUnionChildID for i := 1; i < len(b.typeIDtoChildID); i *= 2 { copy(b.typeIDtoChildID[i:], b.typeIDtoChildID[:i]) } debug.Assert(len(children) == len(typ.TypeCodes()), "mismatched typecodes and children") debug.Assert(len(b.typeIDtoBuilder)-1 <= int(arrow.MaxUnionTypeCode), "too many typeids") copy(b.childFields, typ.Fields()) for i, c := range children { c.Retain() typeID := typ.TypeCodes()[i] b.typeIDtoChildID[typeID] = i b.typeIDtoBuilder[typeID] = c } return b } func (b *unionBuilder) NumChildren() int { return len(b.children) } func (b *unionBuilder) Child(idx int) Builder { if idx < 0 || idx > len(b.children) { panic("arrow/array: invalid child index for union builder") } return b.children[idx] } // Len returns the current number of elements in the builder. func (b *unionBuilder) Len() int { return b.typesBuilder.Len() } func (b *unionBuilder) Mode() arrow.UnionMode { return b.mode } func (b *unionBuilder) reserve(elements int, resize func(int)) { // union has no null bitmap, ever so we can skip that handling if b.length+elements > b.capacity { b.capacity = bitutil.NextPowerOf2(b.length + elements) resize(b.capacity) } } func (b *unionBuilder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { for _, c := range b.children { c.Release() } b.typesBuilder.Release() } } func (b *unionBuilder) Type() arrow.DataType { fields := make([]arrow.Field, len(b.childFields)) for i, f := range b.childFields { fields[i] = f fields[i].Type = b.children[i].Type() } switch b.mode { case arrow.SparseMode: return arrow.SparseUnionOf(fields, b.codes) case arrow.DenseMode: return arrow.DenseUnionOf(fields, b.codes) default: panic("invalid union builder mode") } } func (b *unionBuilder) AppendChild(newChild Builder, fieldName string) arrow.UnionTypeCode { newChild.Retain() b.children = append(b.children, newChild) newType := b.nextTypeID() b.typeIDtoChildID[newType] = len(b.children) - 1 b.typeIDtoBuilder[newType] = newChild b.childFields = append(b.childFields, arrow.Field{Name: fieldName, Nullable: true}) b.codes = append(b.codes, newType) return newType } func (b *unionBuilder) nextTypeID() arrow.UnionTypeCode { // find typeID such that typeIDtoBuilder[typeID] == nil // use that for the new child. Start searching at denseTypeID // since typeIDtoBuilder is densely packed up at least to denseTypeID for ; int(b.denseTypeID) < len(b.typeIDtoBuilder); b.denseTypeID++ { if b.typeIDtoBuilder[b.denseTypeID] == nil { id := b.denseTypeID b.denseTypeID++ return id } } debug.Assert(len(b.typeIDtoBuilder) < int(arrow.MaxUnionTypeCode), "too many children typeids") // typeIDtoBuilder is already densely packed, so just append the new child b.typeIDtoBuilder = append(b.typeIDtoBuilder, nil) b.typeIDtoChildID = append(b.typeIDtoChildID, arrow.InvalidUnionChildID) id := b.denseTypeID b.denseTypeID++ return id } func (b *unionBuilder) newData() *Data { length := b.typesBuilder.Len() typesBuffer := b.typesBuilder.Finish() defer typesBuffer.Release() childData := make([]arrow.ArrayData, len(b.children)) for i, b := range b.children { childData[i] = b.newData() defer childData[i].Release() } return NewData(b.Type(), length, []*memory.Buffer{nil, typesBuffer}, childData, 0, 0) } // SparseUnionBuilder is used to build a Sparse Union array using the Append // methods. You can also add new types to the union on the fly by using // AppendChild. // // Keep in mind: All children of a SparseUnion should be the same length // as the union itself. If you add new children with AppendChild, ensure // that they have the correct number of preceding elements that have been // added to the builder beforehand. type SparseUnionBuilder struct { unionBuilder } // NewEmptySparseUnionBuilder is a helper to construct a SparseUnionBuilder // without having to predefine the union types. It creates a builder with no // children and AppendChild will have to be called before appending any // elements to this builder. func NewEmptySparseUnionBuilder(mem memory.Allocator) *SparseUnionBuilder { return &SparseUnionBuilder{ unionBuilder: newUnionBuilder(mem, nil, arrow.SparseUnionOf([]arrow.Field{}, []arrow.UnionTypeCode{})), } } // NewSparseUnionBuilder constructs a new SparseUnionBuilder with the provided // children and type codes. Builders will be constructed for each child // using the fields in typ func NewSparseUnionBuilder(mem memory.Allocator, typ *arrow.SparseUnionType) *SparseUnionBuilder { children := make([]Builder, typ.NumFields()) for i, f := range typ.Fields() { children[i] = NewBuilder(mem, f.Type) defer children[i].Release() } return NewSparseUnionBuilderWithBuilders(mem, typ, children) } // NewSparseUnionWithBuilders returns a new SparseUnionBuilder using the // provided type and builders. func NewSparseUnionBuilderWithBuilders(mem memory.Allocator, typ *arrow.SparseUnionType, children []Builder) *SparseUnionBuilder { return &SparseUnionBuilder{ unionBuilder: newUnionBuilder(mem, children, typ), } } func (b *SparseUnionBuilder) Reserve(n int) { b.reserve(n, b.Resize) } func (b *SparseUnionBuilder) Resize(n int) { b.typesBuilder.resize(n) } // AppendNull will append a null to the first child and an empty value // (implementation-defined) to the rest of the children. func (b *SparseUnionBuilder) AppendNull() { firstChildCode := b.codes[0] b.typesBuilder.AppendValue(firstChildCode) b.typeIDtoBuilder[firstChildCode].AppendNull() for _, c := range b.codes[1:] { b.typeIDtoBuilder[c].AppendEmptyValue() } } // AppendNulls is identical to calling AppendNull() n times, except // it will pre-allocate with reserve for all the nulls beforehand. func (b *SparseUnionBuilder) AppendNulls(n int) { firstChildCode := b.codes[0] b.Reserve(n) for _, c := range b.codes { b.typeIDtoBuilder[c].Reserve(n) } for i := 0; i < n; i++ { b.typesBuilder.AppendValue(firstChildCode) b.typeIDtoBuilder[firstChildCode].AppendNull() for _, c := range b.codes[1:] { b.typeIDtoBuilder[c].AppendEmptyValue() } } } // AppendEmptyValue appends an empty value (implementation defined) // to each child, and appends the type of the first typecode to the typeid // buffer. func (b *SparseUnionBuilder) AppendEmptyValue() { b.typesBuilder.AppendValue(b.codes[0]) for _, c := range b.codes { b.typeIDtoBuilder[c].AppendEmptyValue() } } // AppendEmptyValues is identical to calling AppendEmptyValue() n times, // except it pre-allocates first so it is more efficient. func (b *SparseUnionBuilder) AppendEmptyValues(n int) { b.Reserve(n) firstChildCode := b.codes[0] for _, c := range b.codes { b.typeIDtoBuilder[c].Reserve(n) } for i := 0; i < n; i++ { b.typesBuilder.AppendValue(firstChildCode) for _, c := range b.codes { b.typeIDtoBuilder[c].AppendEmptyValue() } } } // Append appends an element to the UnionArray and must be followed up // by an append to the appropriate child builder. The parameter should // be the type id of the child to which the next value will be appended. // // After appending to the corresponding child builder, all other child // builders should have a null or empty value appended to them (although // this is not enforced and any value is theoretically allowed and will be // ignored). func (b *SparseUnionBuilder) Append(nextType arrow.UnionTypeCode) { b.typesBuilder.AppendValue(nextType) } func (b *SparseUnionBuilder) NewArray() arrow.Array { return b.NewSparseUnionArray() } func (b *SparseUnionBuilder) NewSparseUnionArray() (a *SparseUnion) { data := b.newData() a = NewSparseUnionData(data) data.Release() return } func (b *SparseUnionBuilder) UnmarshalJSON(data []byte) (err error) { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("sparse union builder must unpack from json array, found %s", t) } return b.Unmarshal(dec) } func (b *SparseUnionBuilder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *SparseUnionBuilder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } dec := json.NewDecoder(strings.NewReader(s)) return b.UnmarshalOne(dec) } func (b *SparseUnionBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch t { case json.Delim('['): // should be [type_id, Value] typeID, err := dec.Token() if err != nil { return err } var typeCode int8 switch tid := typeID.(type) { case json.Number: id, err := tid.Int64() if err != nil { return err } typeCode = int8(id) case float64: if tid != float64(int64(tid)) { return &json.UnmarshalTypeError{ Offset: dec.InputOffset(), Type: reflect.TypeOf(int8(0)), Struct: fmt.Sprint(b.Type()), Value: "float", } } typeCode = int8(tid) } childNum := b.typeIDtoChildID[typeCode] if childNum == arrow.InvalidUnionChildID { return &json.UnmarshalTypeError{ Offset: dec.InputOffset(), Value: "invalid type code", } } for i, c := range b.children { if i != childNum { c.AppendNull() } } b.Append(typeCode) if err := b.children[childNum].UnmarshalOne(dec); err != nil { return err } endArr, err := dec.Token() if err != nil { return err } if endArr != json.Delim(']') { return &json.UnmarshalTypeError{ Offset: dec.InputOffset(), Value: "union value array should have exactly 2 elements", } } case nil: b.AppendNull() default: return &json.UnmarshalTypeError{ Offset: dec.InputOffset(), Value: fmt.Sprint(t), Struct: fmt.Sprint(b.Type()), } } return nil } // DenseUnionBuilder is used to build a Dense Union array using the Append // methods. You can also add new types to the union on the fly by using // AppendChild. type DenseUnionBuilder struct { unionBuilder offsetsBuilder *int32BufferBuilder } // NewEmptyDenseUnionBuilder is a helper to construct a DenseUnionBuilder // without having to predefine the union types. It creates a builder with no // children and AppendChild will have to be called before appending any // elements to this builder. func NewEmptyDenseUnionBuilder(mem memory.Allocator) *DenseUnionBuilder { return &DenseUnionBuilder{ unionBuilder: newUnionBuilder(mem, nil, arrow.DenseUnionOf([]arrow.Field{}, []arrow.UnionTypeCode{})), offsetsBuilder: newInt32BufferBuilder(mem), } } // NewDenseUnionBuilder constructs a new DenseUnionBuilder with the provided // children and type codes. Builders will be constructed for each child // using the fields in typ func NewDenseUnionBuilder(mem memory.Allocator, typ *arrow.DenseUnionType) *DenseUnionBuilder { children := make([]Builder, 0, typ.NumFields()) defer func() { for _, child := range children { child.Release() } }() for _, f := range typ.Fields() { children = append(children, NewBuilder(mem, f.Type)) } return NewDenseUnionBuilderWithBuilders(mem, typ, children) } // NewDenseUnionWithBuilders returns a new DenseUnionBuilder using the // provided type and builders. func NewDenseUnionBuilderWithBuilders(mem memory.Allocator, typ *arrow.DenseUnionType, children []Builder) *DenseUnionBuilder { return &DenseUnionBuilder{ unionBuilder: newUnionBuilder(mem, children, typ), offsetsBuilder: newInt32BufferBuilder(mem), } } func (b *DenseUnionBuilder) Reserve(n int) { b.reserve(n, b.Resize) } func (b *DenseUnionBuilder) Resize(n int) { b.typesBuilder.resize(n) b.offsetsBuilder.resize(n * arrow.Int32SizeBytes) } // AppendNull will only append a null value arbitrarily to the first child // and use that offset for this element of the array. func (b *DenseUnionBuilder) AppendNull() { firstChildCode := b.codes[0] childBuilder := b.typeIDtoBuilder[firstChildCode] b.typesBuilder.AppendValue(firstChildCode) b.offsetsBuilder.AppendValue(int32(childBuilder.Len())) childBuilder.AppendNull() } // AppendNulls will only append a single null arbitrarily to the first child // and use the same offset multiple times to point to it. The result is that // for a DenseUnion this is more efficient than calling AppendNull multiple // times in a loop func (b *DenseUnionBuilder) AppendNulls(n int) { // only append 1 null to the child builder, use the same offset twice firstChildCode := b.codes[0] childBuilder := b.typeIDtoBuilder[firstChildCode] b.Reserve(n) for i := 0; i < n; i++ { b.typesBuilder.AppendValue(firstChildCode) b.offsetsBuilder.AppendValue(int32(childBuilder.Len())) } // only append a single null to the child builder, the offsets all refer to the same value childBuilder.AppendNull() } // AppendEmptyValue only appends an empty value arbitrarily to the first child, // and then uses that offset to identify the value. func (b *DenseUnionBuilder) AppendEmptyValue() { firstChildCode := b.codes[0] childBuilder := b.typeIDtoBuilder[firstChildCode] b.typesBuilder.AppendValue(firstChildCode) b.offsetsBuilder.AppendValue(int32(childBuilder.Len())) childBuilder.AppendEmptyValue() } // AppendEmptyValues, like AppendNulls, will only append a single empty value // (implementation defined) to the first child arbitrarily, and then point // at that value using the offsets n times. That makes this more efficient // than calling AppendEmptyValue multiple times. func (b *DenseUnionBuilder) AppendEmptyValues(n int) { // only append 1 null to the child builder, use the same offset twice firstChildCode := b.codes[0] childBuilder := b.typeIDtoBuilder[firstChildCode] b.Reserve(n) for i := 0; i < n; i++ { b.typesBuilder.AppendValue(firstChildCode) b.offsetsBuilder.AppendValue(int32(childBuilder.Len())) } // only append a single empty value to the child builder, the offsets all // refer to the same value childBuilder.AppendEmptyValue() } // Append appends the necessary offset and type code to the builder // and must be followed up with an append to the appropriate child builder func (b *DenseUnionBuilder) Append(nextType arrow.UnionTypeCode) { b.typesBuilder.AppendValue(nextType) bldr := b.typeIDtoBuilder[nextType] if bldr.Len() == kMaxElems { panic("a dense UnionArray cannot contain more than 2^31 - 1 elements from a single child") } b.offsetsBuilder.AppendValue(int32(bldr.Len())) } func (b *DenseUnionBuilder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { for _, c := range b.children { c.Release() } b.typesBuilder.Release() b.offsetsBuilder.Release() } } func (b *DenseUnionBuilder) newData() *Data { data := b.unionBuilder.newData() data.buffers = append(data.buffers, b.offsetsBuilder.Finish()) return data } func (b *DenseUnionBuilder) NewArray() arrow.Array { return b.NewDenseUnionArray() } func (b *DenseUnionBuilder) NewDenseUnionArray() (a *DenseUnion) { data := b.newData() a = NewDenseUnionData(data) data.Release() return } func (b *DenseUnionBuilder) UnmarshalJSON(data []byte) (err error) { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("dense union builder must unpack from json array, found %s", t) } return b.Unmarshal(dec) } func (b *DenseUnionBuilder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (d *DenseUnionBuilder) AppendValueFromString(s string) error { if s == NullValueStr { d.AppendNull() return nil } dec := json.NewDecoder(strings.NewReader(s)) return d.UnmarshalOne(dec) } func (b *DenseUnionBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch t { case json.Delim('['): // should be [type_id, Value] typeID, err := dec.Token() if err != nil { return err } var typeCode int8 switch tid := typeID.(type) { case json.Number: id, err := tid.Int64() if err != nil { return err } typeCode = int8(id) case float64: if tid != float64(int64(tid)) { return &json.UnmarshalTypeError{ Offset: dec.InputOffset(), Type: reflect.TypeOf(int8(0)), Struct: fmt.Sprint(b.Type()), Value: "float", } } typeCode = int8(tid) } childNum := b.typeIDtoChildID[typeCode] if childNum == arrow.InvalidUnionChildID { return &json.UnmarshalTypeError{ Offset: dec.InputOffset(), Value: "invalid type code", } } b.Append(typeCode) if err := b.children[childNum].UnmarshalOne(dec); err != nil { return err } endArr, err := dec.Token() if err != nil { return err } if endArr != json.Delim(']') { return &json.UnmarshalTypeError{ Offset: dec.InputOffset(), Value: "union value array should have exactly 2 elements", } } case nil: b.AppendNull() default: return &json.UnmarshalTypeError{ Offset: dec.InputOffset(), Value: fmt.Sprint(t), Struct: fmt.Sprint(b.Type()), } } return nil } var ( _ arrow.Array = (*SparseUnion)(nil) _ arrow.Array = (*DenseUnion)(nil) _ Union = (*SparseUnion)(nil) _ Union = (*DenseUnion)(nil) _ Builder = (*SparseUnionBuilder)(nil) _ Builder = (*DenseUnionBuilder)(nil) _ UnionBuilder = (*SparseUnionBuilder)(nil) _ UnionBuilder = (*DenseUnionBuilder)(nil) ) arrow-go-18.2.0/arrow/array/union_test.go000066400000000000000000001137541476434502500203520ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "fmt" "strings" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) func uint8ArrFromSlice(ids ...uint8) arrow.Array { data := array.NewData(arrow.PrimitiveTypes.Uint8, len(ids), []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Uint8Traits.CastToBytes(ids))}, nil, 0, 0) defer data.Release() return array.MakeFromData(data) } func int32ArrFromSlice(offsets ...int32) arrow.Array { data := array.NewData(arrow.PrimitiveTypes.Int32, len(offsets), []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Int32Traits.CastToBytes(offsets))}, nil, 0, 0) defer data.Release() return array.MakeFromData(data) } func TestUnionSliceEquals(t *testing.T) { unionFields := []arrow.Field{ {Name: "u0", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, {Name: "u1", Type: arrow.PrimitiveTypes.Uint8, Nullable: true}, } typeCodes := []arrow.UnionTypeCode{5, 10} sparseType := arrow.SparseUnionOf(unionFields, typeCodes) denseType := arrow.DenseUnionOf(unionFields, typeCodes) schema := arrow.NewSchema([]arrow.Field{ {Name: "sparse", Type: sparseType, Nullable: true}, {Name: "dense", Type: denseType, Nullable: true}, }, nil) sparseChildren := make([]arrow.Array, 2) denseChildren := make([]arrow.Array, 2) const length = 7 typeIDsBuffer := memory.NewBufferBytes(arrow.Uint8Traits.CastToBytes([]uint8{5, 10, 5, 5, 10, 10, 5})) sparseChildren[0] = int32ArrFromSlice(0, 1, 2, 3, 4, 5, 6) defer sparseChildren[0].Release() sparseChildren[1] = uint8ArrFromSlice(10, 11, 12, 13, 14, 15, 16) defer sparseChildren[1].Release() denseChildren[0] = int32ArrFromSlice(0, 2, 3, 7) defer denseChildren[0].Release() denseChildren[1] = uint8ArrFromSlice(11, 14, 15) defer denseChildren[1].Release() offsetsBuffer := memory.NewBufferBytes(arrow.Int32Traits.CastToBytes([]int32{0, 0, 1, 2, 1, 2, 3})) sparse := array.NewSparseUnion(sparseType, length, sparseChildren, typeIDsBuffer, 0) dense := array.NewDenseUnion(denseType, length, denseChildren, typeIDsBuffer, offsetsBuffer, 0) defer sparse.Release() defer dense.Release() batch := array.NewRecord(schema, []arrow.Array{sparse, dense}, -1) defer batch.Release() checkUnion := func(arr arrow.Array) { size := arr.Len() slice := array.NewSlice(arr, 2, int64(size)) defer slice.Release() assert.EqualValues(t, size-2, slice.Len()) slice2 := array.NewSlice(arr, 2, int64(arr.Len())) defer slice2.Release() assert.EqualValues(t, size-2, slice2.Len()) assert.True(t, array.Equal(slice, slice2)) assert.True(t, array.SliceEqual(arr, 2, int64(arr.Len()), slice, 0, int64(slice.Len()))) // chain slices slice2 = array.NewSlice(arr, 1, int64(arr.Len())) defer slice2.Release() slice2 = array.NewSlice(slice2, 1, int64(slice2.Len())) defer slice2.Release() assert.True(t, array.Equal(slice, slice2)) slice, slice2 = array.NewSlice(arr, 1, 6), array.NewSlice(arr, 1, 6) defer slice.Release() defer slice2.Release() assert.EqualValues(t, 5, slice.Len()) assert.True(t, array.Equal(slice, slice2)) assert.True(t, array.SliceEqual(arr, 1, 6, slice, 0, 5)) } checkUnion(batch.Column(0)) checkUnion(batch.Column(1)) } func TestSparseUnionGetFlattenedField(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) ty := arrow.SparseUnionOf([]arrow.Field{ {Name: "ints", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, {Name: "strs", Type: arrow.BinaryTypes.String, Nullable: true}, }, []arrow.UnionTypeCode{2, 7}) ints, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[0, 1, 2, 3]`)) defer ints.Release() strs, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["a", null, "c", "d"]`)) defer strs.Release() idsArr, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[2, 7, 2, 7]`)) defer idsArr.Release() ids := idsArr.Data().Buffers()[1] const length = 4 t.Run("flattened", func(t *testing.T) { scoped := memory.NewCheckedAllocatorScope(mem) defer scoped.CheckSize(t) arr := array.NewSparseUnion(ty, length, []arrow.Array{ints, strs}, ids, 0) defer arr.Release() flattened, err := arr.GetFlattenedField(mem, 0) assert.NoError(t, err) defer flattened.Release() expected, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[0, null, 2, null]`)) defer expected.Release() assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) flattened, err = arr.GetFlattenedField(mem, 1) assert.NoError(t, err) defer flattened.Release() expected, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`[null, null, null, "d"]`)) defer expected.Release() assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) sliced := array.NewSlice(arr, 1, 3).(*array.SparseUnion) defer sliced.Release() flattened, err = sliced.GetFlattenedField(mem, 0) assert.NoError(t, err) defer flattened.Release() expected, _, _ = array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[null, 2]`)) defer expected.Release() assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) flattened, err = sliced.GetFlattenedField(mem, 1) assert.NoError(t, err) defer flattened.Release() expected, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`[null, null]`)) defer expected.Release() assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) _, err = arr.GetFlattenedField(mem, -1) assert.Error(t, err) _, err = arr.GetFlattenedField(mem, 2) assert.Error(t, err) }) t.Run("offset children", func(t *testing.T) { scoped := memory.NewCheckedAllocatorScope(mem) defer scoped.CheckSize(t) strSlice, intSlice := array.NewSlice(strs, 1, 3), array.NewSlice(ints, 1, 3) defer strSlice.Release() defer intSlice.Release() arr := array.NewSparseUnion(ty, length-2, []arrow.Array{intSlice, strSlice}, ids, 0) defer arr.Release() flattened, err := arr.GetFlattenedField(mem, 0) assert.NoError(t, err) defer flattened.Release() expected, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[1, null]`)) defer expected.Release() assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) flattened, err = arr.GetFlattenedField(mem, 1) assert.NoError(t, err) defer flattened.Release() expected, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`[null, "c"]`)) defer expected.Release() assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) sliced := array.NewSlice(arr, 1, 2).(*array.SparseUnion) defer sliced.Release() flattened, err = sliced.GetFlattenedField(mem, 0) assert.NoError(t, err) defer flattened.Release() expected, _, _ = array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[null]`)) defer expected.Release() assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) flattened, err = sliced.GetFlattenedField(mem, 1) assert.NoError(t, err) defer flattened.Release() expected, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["c"]`)) defer expected.Release() assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) }) t.Run("empty flattened", func(t *testing.T) { scoped := memory.NewCheckedAllocatorScope(mem) defer scoped.CheckSize(t) strSlice, intSlice := array.NewSlice(strs, length, length), array.NewSlice(ints, length, length) defer strSlice.Release() defer intSlice.Release() arr := array.NewSparseUnion(ty, 0, []arrow.Array{intSlice, strSlice}, ids, 0) defer arr.Release() flattened, err := arr.GetFlattenedField(mem, 0) assert.NoError(t, err) defer flattened.Release() expected, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[]`)) defer expected.Release() assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) flattened, err = arr.GetFlattenedField(mem, 1) assert.NoError(t, err) defer flattened.Release() expected, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`[]`)) defer expected.Release() assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) }) } func TestSparseUnionValidate(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) a, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[4, 5]`)) defer a.Release() dt := arrow.SparseUnionOf([]arrow.Field{{Name: "a", Type: arrow.PrimitiveTypes.Int32, Nullable: true}}, []arrow.UnionTypeCode{0}) children := []arrow.Array{a} typeIDsArr, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[0, 0, 0]`)) defer typeIDsArr.Release() typeIDs := typeIDsArr.Data().Buffers()[1] arr := array.NewSparseUnion(dt, 2, children, typeIDs, 0) assert.NoError(t, arr.ValidateFull()) arr.Release() arr = array.NewSparseUnion(dt, 1, children, typeIDs, 1) assert.NoError(t, arr.ValidateFull()) arr.Release() arr = array.NewSparseUnion(dt, 0, children, typeIDs, 2) assert.NoError(t, arr.ValidateFull()) arr.Release() // length + offset < child length but that's ok! arr = array.NewSparseUnion(dt, 1, children, typeIDs, 0) assert.NoError(t, arr.ValidateFull()) arr.Release() // length + offset > child length! BAD! assert.Panics(t, func() { arr = array.NewSparseUnion(dt, 1, children, typeIDs, 2) }) // offset > child length assert.Panics(t, func() { arr = array.NewSparseUnion(dt, 0, children, typeIDs, 3) }) } type UnionFactorySuite struct { suite.Suite mem *memory.CheckedAllocator codes []arrow.UnionTypeCode typeIDs arrow.Array logicalTypeIDs arrow.Array invalidTypeIDs arrow.Array invalidTypeIDs2 arrow.Array } func (s *UnionFactorySuite) typeidsFromSlice(ids ...int8) arrow.Array { data := array.NewData(arrow.PrimitiveTypes.Int8, len(ids), []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Int8Traits.CastToBytes(ids))}, nil, 0, 0) defer data.Release() return array.MakeFromData(data) } func (s *UnionFactorySuite) offsetsFromSlice(offsets ...int32) arrow.Array { data := array.NewData(arrow.PrimitiveTypes.Int32, len(offsets), []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Int32Traits.CastToBytes(offsets))}, nil, 0, 0) defer data.Release() return array.MakeFromData(data) } func (s *UnionFactorySuite) SetupTest() { s.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) s.codes = []arrow.UnionTypeCode{1, 2, 4, 127} s.typeIDs = s.typeidsFromSlice(0, 1, 2, 0, 1, 3, 2, 0, 2, 1) s.logicalTypeIDs = s.typeidsFromSlice(1, 2, 4, 1, 2, 127, 4, 1, 4, 2) s.invalidTypeIDs = s.typeidsFromSlice(1, 2, 4, 1, -2, 127, 4, 1, 4, 2) s.invalidTypeIDs2 = s.typeidsFromSlice(1, 2, 4, 1, 3, 127, 4, 1, 4, 2) } func (s *UnionFactorySuite) TearDownTest() { s.typeIDs.Release() s.logicalTypeIDs.Release() s.invalidTypeIDs.Release() s.invalidTypeIDs2.Release() s.mem.AssertSize(s.T(), 0) } func (s *UnionFactorySuite) checkFields(arr array.Union, fields []string) { ty := arr.DataType().(arrow.UnionType) s.Len(ty.Fields(), len(fields)) for i, f := range ty.Fields() { s.Equal(fields[i], f.Name) } } func (s *UnionFactorySuite) checkCodes(arr array.Union, codes []arrow.UnionTypeCode) { ty := arr.DataType().(arrow.UnionType) s.Equal(codes, ty.TypeCodes()) } func (s *UnionFactorySuite) checkUnion(arr array.Union, mode arrow.UnionMode, fields []string, codes []arrow.UnionTypeCode) { s.Equal(mode, arr.Mode()) s.checkFields(arr, fields) s.checkCodes(arr, codes) typeIDs := s.typeIDs.(*array.Int8) for i := 0; i < typeIDs.Len(); i++ { s.EqualValues(typeIDs.Value(i), arr.ChildID(i)) } s.Nil(arr.Field(-1)) s.Nil(arr.Field(typeIDs.Len())) } func (s *UnionFactorySuite) TestMakeDenseUnions() { // typeIDs: {0, 1, 2, 0, 1, 3, 2, 0, 2, 1} offsets := s.offsetsFromSlice(0, 0, 0, 1, 1, 0, 1, 2, 1, 2) defer offsets.Release() children := make([]arrow.Array, 4) children[0], _, _ = array.FromJSON(s.mem, arrow.BinaryTypes.String, strings.NewReader(`["abc", "def", "xyz"]`)) defer children[0].Release() children[1], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Uint8, strings.NewReader(`[10, 20, 30]`)) defer children[1].Release() children[2], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, strings.NewReader(`[1.618, 2.718, 3.142]`)) defer children[2].Release() children[3], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[-12]`)) defer children[3].Release() fieldNames := []string{"str", "int1", "real", "int2"} s.Run("without fields and codes", func() { result, err := array.NewDenseUnionFromArrays(s.typeIDs, offsets, children) s.NoError(err) defer result.Release() s.NoError(result.ValidateFull()) s.checkUnion(result, arrow.DenseMode, []string{"0", "1", "2", "3"}, []arrow.UnionTypeCode{0, 1, 2, 3}) }) s.Run("with fields", func() { _, err := array.NewDenseUnionFromArraysWithFields(s.typeIDs, offsets, children, []string{"one"}) s.Error(err) result, err := array.NewDenseUnionFromArraysWithFields(s.typeIDs, offsets, children, fieldNames) s.NoError(err) defer result.Release() s.NoError(result.ValidateFull()) s.checkUnion(result, arrow.DenseMode, fieldNames, []arrow.UnionTypeCode{0, 1, 2, 3}) }) s.Run("with codes", func() { _, err := array.NewDenseUnionFromArrays(s.logicalTypeIDs, offsets, children, 0) s.Error(err) result, err := array.NewDenseUnionFromArrays(s.logicalTypeIDs, offsets, children, s.codes...) s.NoError(err) defer result.Release() s.NoError(result.ValidateFull()) s.checkUnion(result, arrow.DenseMode, []string{"0", "1", "2", "3"}, s.codes) }) s.Run("with fields and codes", func() { _, err := array.NewDenseUnionFromArraysWithFieldCodes(s.logicalTypeIDs, offsets, children, []string{"one"}, s.codes) s.Error(err) result, err := array.NewDenseUnionFromArraysWithFieldCodes(s.logicalTypeIDs, offsets, children, fieldNames, s.codes) s.NoError(err) defer result.Release() s.NoError(result.ValidateFull()) s.checkUnion(result, arrow.DenseMode, fieldNames, s.codes) }) s.Run("invalid type codes", func() { result, err := array.NewDenseUnionFromArrays(s.invalidTypeIDs, offsets, children, s.codes...) s.NoError(err) defer result.Release() s.Error(result.ValidateFull()) result, err = array.NewDenseUnionFromArrays(s.invalidTypeIDs2, offsets, children, s.codes...) s.NoError(err) defer result.Release() s.Error(result.ValidateFull()) }) s.Run("invalid offsets", func() { // offset out of bounds at index 5 invalidOffsets := s.offsetsFromSlice(0, 0, 0, 1, 1, 1, 1, 2, 1, 2) defer invalidOffsets.Release() result, err := array.NewDenseUnionFromArrays(s.typeIDs, invalidOffsets, children) s.NoError(err) defer result.Release() s.Error(result.ValidateFull()) // negative offset at index 5 invalidOffsets = s.offsetsFromSlice(0, 0, 0, 1, 1, -1, 1, 2, 1, 2) defer invalidOffsets.Release() result, err = array.NewDenseUnionFromArrays(s.typeIDs, invalidOffsets, children) s.NoError(err) defer result.Release() s.Error(result.ValidateFull()) // non-monotonic offset at index 3 invalidOffsets = s.offsetsFromSlice(1, 0, 0, 0, 1, 0, 1, 2, 1, 2) defer invalidOffsets.Release() result, err = array.NewDenseUnionFromArrays(s.typeIDs, invalidOffsets, children) s.NoError(err) defer result.Release() s.Error(result.ValidateFull()) }) } func (s *UnionFactorySuite) TestDenseUnionStringRoundTrip() { // typeIDs: {0, 1, 2, 0, 1, 3, 2, 0, 2, 1} offsets := s.offsetsFromSlice(0, 0, 0, 1, 1, 0, 1, 2, 1, 2) defer offsets.Release() children := make([]arrow.Array, 4) children[0], _, _ = array.FromJSON(s.mem, arrow.BinaryTypes.String, strings.NewReader(`["abc", "def", "xyz"]`)) defer children[0].Release() children[1], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Uint8, strings.NewReader(`[10, 20, 30]`)) defer children[1].Release() children[2], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, strings.NewReader(`[1.618, 2.718, 3.142]`)) defer children[2].Release() children[3], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[-12]`)) defer children[3].Release() fields := []string{"str", "int1", "real", "int2"} // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(s.T(), 0) dt := arrow.DenseUnionFromArrays(children, fields, s.codes) arr, err := array.NewDenseUnionFromArraysWithFieldCodes(s.logicalTypeIDs, offsets, children, fields, s.codes) s.NoError(err) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewDenseUnionBuilder(mem, dt) defer b1.Release() for i := 0; i < arr.Len(); i++ { s.NoError(b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.DenseUnion) defer arr1.Release() s.True(array.Equal(arr, arr1)) } func (s *UnionFactorySuite) TestMakeSparse() { children := make([]arrow.Array, 4) children[0], _, _ = array.FromJSON(s.mem, arrow.BinaryTypes.String, strings.NewReader(`["abc", "", "", "def", "", "", "", "xyz", "", ""]`)) children[1], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Uint8, strings.NewReader(`[0, 10, 0, 0, 20, 0, 0, 0, 0, 30]`)) children[2], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, strings.NewReader(`[0.0, 0.0, 1.618, 0.0, 0.0, 0.0, 2.718, 0.0, 3.142, 0.0]`)) children[3], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[0, 0, 0, 0, 0, -12, 0, 0, 0, 0]`)) for _, c := range children { defer c.Release() } fieldNames := []string{"str", "int1", "real", "int2"} s.Run("without fields and codes", func() { result, err := array.NewSparseUnionFromArrays(s.typeIDs, children) s.NoError(err) defer result.Release() s.NoError(result.ValidateFull()) s.checkUnion(result, arrow.SparseMode, []string{"0", "1", "2", "3"}, []arrow.UnionTypeCode{0, 1, 2, 3}) }) s.Run("with fields", func() { _, err := array.NewSparseUnionFromArraysWithFields(s.typeIDs, children, []string{"one"}) s.Error(err) result, err := array.NewSparseUnionFromArraysWithFields(s.typeIDs, children, fieldNames) s.NoError(err) defer result.Release() s.NoError(result.ValidateFull()) s.checkUnion(result, arrow.SparseMode, fieldNames, []arrow.UnionTypeCode{0, 1, 2, 3}) }) s.Run("with codes", func() { _, err := array.NewSparseUnionFromArrays(s.logicalTypeIDs, children, 0) s.Error(err) result, err := array.NewSparseUnionFromArrays(s.logicalTypeIDs, children, s.codes...) s.NoError(err) defer result.Release() s.NoError(result.ValidateFull()) s.checkUnion(result, arrow.SparseMode, []string{"0", "1", "2", "3"}, s.codes) }) s.Run("with fields and codes", func() { _, err := array.NewSparseUnionFromArraysWithFieldCodes(s.logicalTypeIDs, children, []string{"one"}, s.codes) s.Error(err) result, err := array.NewSparseUnionFromArraysWithFieldCodes(s.logicalTypeIDs, children, fieldNames, s.codes) s.NoError(err) defer result.Release() s.NoError(result.ValidateFull()) s.checkUnion(result, arrow.SparseMode, fieldNames, s.codes) }) s.Run("invalid type codes", func() { result, err := array.NewSparseUnionFromArrays(s.invalidTypeIDs, children, s.codes...) s.NoError(err) defer result.Release() s.Error(result.ValidateFull()) result, err = array.NewSparseUnionFromArrays(s.invalidTypeIDs2, children, s.codes...) s.NoError(err) defer result.Release() s.Error(result.ValidateFull()) }) s.Run("invalid child length", func() { children[3], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[0, 0, 0, 0, 0, -12, 0, 0, 0]`)) defer children[3].Release() _, err := array.NewSparseUnionFromArrays(s.typeIDs, children) s.Error(err) }) } func (s *UnionFactorySuite) TestSparseUnionStringRoundTrip() { children := make([]arrow.Array, 4) children[0], _, _ = array.FromJSON(s.mem, arrow.BinaryTypes.String, strings.NewReader(`["abc", "", "", "def", "", "", "", "xyz", "", ""]`)) defer children[0].Release() children[1], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Uint8, strings.NewReader(`[0, 10, 0, 0, 20, 0, 0, 0, 0, 30]`)) defer children[1].Release() children[2], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, strings.NewReader(`[0.0, 0.0, 1.618, 0.0, 0.0, 0.0, 2.718, 0.0, 3.142, 0.0]`)) defer children[2].Release() children[3], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[0, 0, 0, 0, 0, -12, 0, 0, 0, 0]`)) defer children[3].Release() fields := []string{"str", "int1", "real", "int2"} // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(s.T(), 0) dt := arrow.SparseUnionFromArrays(children, fields, s.codes) arr, err := array.NewSparseUnionFromArraysWithFieldCodes(s.logicalTypeIDs, children, fields, s.codes) s.NoError(err) defer arr.Release() // 2. create array via AppendValueFromString b1 := array.NewSparseUnionBuilder(mem, dt) defer b1.Release() for i := 0; i < arr.Len(); i++ { s.NoError(b1.AppendValueFromString(arr.ValueStr(i))) } arr1 := b1.NewArray().(*array.SparseUnion) defer arr1.Release() s.True(array.Equal(arr, arr1)) } type UnionBuilderSuite struct { suite.Suite I8 arrow.UnionTypeCode STR arrow.UnionTypeCode DBL arrow.UnionTypeCode mem *memory.CheckedAllocator expectedTypes []arrow.UnionTypeCode expectedTypesArr arrow.Array i8Bldr *array.Int8Builder strBldr *array.StringBuilder dblBldr *array.Float64Builder unionBldr array.UnionBuilder actual array.Union } func (s *UnionBuilderSuite) SetupTest() { s.I8, s.STR, s.DBL = 8, 13, 7 s.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) s.expectedTypes = make([]arrow.UnionTypeCode, 0) s.i8Bldr = array.NewInt8Builder(s.mem) s.strBldr = array.NewStringBuilder(s.mem) s.dblBldr = array.NewFloat64Builder(s.mem) } func (s *UnionBuilderSuite) TearDownTest() { if s.expectedTypesArr != nil { s.expectedTypesArr.Release() s.expectedTypesArr = nil } s.i8Bldr.Release() s.strBldr.Release() s.dblBldr.Release() if s.actual != nil { s.actual.Release() s.actual = nil } s.mem.AssertSize(s.T(), 0) } func (s *UnionBuilderSuite) createExpectedTypesArr() { data := array.NewData(arrow.PrimitiveTypes.Int8, len(s.expectedTypes), []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Int8Traits.CastToBytes(s.expectedTypes))}, nil, 0, 0) defer data.Release() s.expectedTypesArr = array.MakeFromData(data) } func (s *UnionBuilderSuite) appendInt(i int8) { s.expectedTypes = append(s.expectedTypes, s.I8) s.unionBldr.Append(s.I8) s.i8Bldr.Append(i) if s.unionBldr.Mode() == arrow.SparseMode { s.strBldr.AppendEmptyValue() s.dblBldr.AppendEmptyValue() } } func (s *UnionBuilderSuite) appendString(str string) { s.expectedTypes = append(s.expectedTypes, s.STR) s.unionBldr.Append(s.STR) s.strBldr.Append(str) if s.unionBldr.Mode() == arrow.SparseMode { s.i8Bldr.AppendEmptyValue() s.dblBldr.AppendEmptyValue() } } func (s *UnionBuilderSuite) appendDbl(dbl float64) { s.expectedTypes = append(s.expectedTypes, s.DBL) s.unionBldr.Append(s.DBL) s.dblBldr.Append(dbl) if s.unionBldr.Mode() == arrow.SparseMode { s.strBldr.AppendEmptyValue() s.i8Bldr.AppendEmptyValue() } } func (s *UnionBuilderSuite) appendBasics() { s.appendInt(33) s.appendString("abc") s.appendDbl(1.0) s.appendDbl(-1.0) s.appendString("") s.appendInt(10) s.appendString("def") s.appendInt(-10) s.appendDbl(0.5) s.Equal(9, s.unionBldr.Len()) s.actual = s.unionBldr.NewArray().(array.Union) s.NoError(s.actual.ValidateFull()) s.createExpectedTypesArr() } func (s *UnionBuilderSuite) appendNullsAndEmptyValues() { s.appendString("abc") s.unionBldr.AppendNull() s.unionBldr.AppendEmptyValue() s.expectedTypes = append(s.expectedTypes, s.I8, s.I8, s.I8) s.appendInt(42) s.unionBldr.AppendNulls(2) s.unionBldr.AppendEmptyValues(2) s.expectedTypes = append(s.expectedTypes, s.I8, s.I8, s.I8) s.Equal(8, s.unionBldr.Len()) s.actual = s.unionBldr.NewArray().(array.Union) s.NoError(s.actual.ValidateFull()) s.createExpectedTypesArr() } func (s *UnionBuilderSuite) appendInferred() { s.I8 = s.unionBldr.AppendChild(s.i8Bldr, "i8") s.EqualValues(0, s.I8) s.appendInt(33) s.appendInt(10) s.STR = s.unionBldr.AppendChild(s.strBldr, "str") s.EqualValues(1, s.STR) s.appendString("abc") s.appendString("") s.appendString("def") s.appendInt(-10) s.DBL = s.unionBldr.AppendChild(s.dblBldr, "dbl") s.EqualValues(2, s.DBL) s.appendDbl(1.0) s.appendDbl(-1.0) s.appendDbl(0.5) s.Equal(9, s.unionBldr.Len()) s.actual = s.unionBldr.NewArray().(array.Union) s.NoError(s.actual.ValidateFull()) s.createExpectedTypesArr() s.EqualValues(0, s.I8) s.EqualValues(1, s.STR) s.EqualValues(2, s.DBL) } func (s *UnionBuilderSuite) appendListOfInferred(utyp arrow.UnionType) *array.List { listBldr := array.NewListBuilder(s.mem, utyp) defer listBldr.Release() s.unionBldr = listBldr.ValueBuilder().(array.UnionBuilder) listBldr.Append(true) s.I8 = s.unionBldr.AppendChild(s.i8Bldr, "i8") s.EqualValues(0, s.I8) s.appendInt(10) listBldr.Append(true) s.STR = s.unionBldr.AppendChild(s.strBldr, "str") s.EqualValues(1, s.STR) s.appendString("abc") s.appendInt(-10) listBldr.Append(true) s.DBL = s.unionBldr.AppendChild(s.dblBldr, "dbl") s.EqualValues(2, s.DBL) s.appendDbl(0.5) s.Equal(4, s.unionBldr.Len()) s.createExpectedTypesArr() return listBldr.NewListArray() } func (s *UnionBuilderSuite) assertArraysEqual(expected, actual arrow.Array) { s.Truef(array.Equal(expected, actual), "expected: %s, got: %s", expected, actual) } func (s *UnionBuilderSuite) TestDenseUnionBasics() { s.unionBldr = array.NewDenseUnionBuilderWithBuilders(s.mem, arrow.DenseUnionOf([]arrow.Field{ {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, {Name: "dbl", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, }, []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}), []array.Builder{s.i8Bldr, s.strBldr, s.dblBldr}) defer s.unionBldr.Release() s.appendBasics() expectedI8, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[33, 10, -10]`)) expectedStr, _, _ := array.FromJSON(s.mem, arrow.BinaryTypes.String, strings.NewReader(`["abc", "", "def"]`)) expectedDbl, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, strings.NewReader(`[1.0, -1.0, 0.5]`)) expectedOffsets, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[0, 0, 0, 1, 1, 1, 2, 2, 2]`)) defer func() { expectedI8.Release() expectedStr.Release() expectedDbl.Release() expectedOffsets.Release() }() expected, err := array.NewDenseUnionFromArraysWithFieldCodes(s.expectedTypesArr, expectedOffsets, []arrow.Array{expectedI8, expectedStr, expectedDbl}, []string{"i8", "str", "dbl"}, []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}) s.NoError(err) defer expected.Release() s.Equal(expected.DataType().String(), s.actual.DataType().String()) s.assertArraysEqual(expected, s.actual) } func (s *UnionBuilderSuite) TestDenseBuilderNullsAndEmpty() { s.unionBldr = array.NewDenseUnionBuilderWithBuilders(s.mem, arrow.DenseUnionOf([]arrow.Field{ {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, {Name: "dbl", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, }, []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}), []array.Builder{s.i8Bldr, s.strBldr, s.dblBldr}) defer s.unionBldr.Release() s.appendNullsAndEmptyValues() // four null / empty values (the latter implementation-defined) appended to I8 expectedI8, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[null, 0, 42, null, 0]`)) expectedStr, _, _ := array.FromJSON(s.mem, arrow.BinaryTypes.String, strings.NewReader(`["abc"]`)) expectedDbl, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, strings.NewReader(`[]`)) expectedOffsets, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[0, 0, 1, 2, 3, 3, 4, 4]`)) defer func() { expectedI8.Release() expectedStr.Release() expectedDbl.Release() expectedOffsets.Release() }() expected, err := array.NewDenseUnionFromArraysWithFieldCodes(s.expectedTypesArr, expectedOffsets, []arrow.Array{expectedI8, expectedStr, expectedDbl}, []string{"i8", "str", "dbl"}, []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}) s.NoError(err) defer expected.Release() s.Equal(expected.DataType().String(), s.actual.DataType().String()) s.assertArraysEqual(expected, s.actual) // physical arrays must be as expected s.assertArraysEqual(expectedI8, s.actual.Field(0)) s.assertArraysEqual(expectedStr, s.actual.Field(1)) s.assertArraysEqual(expectedDbl, s.actual.Field(2)) } func (s *UnionBuilderSuite) TestDenseUnionInferredTyped() { s.unionBldr = array.NewEmptyDenseUnionBuilder(s.mem) defer s.unionBldr.Release() s.appendInferred() expectedI8, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[33, 10, -10]`)) expectedStr, _, _ := array.FromJSON(s.mem, arrow.BinaryTypes.String, strings.NewReader(`["abc", "", "def"]`)) expectedDbl, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, strings.NewReader(`[1.0, -1.0, 0.5]`)) expectedOffsets, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[0, 1, 0, 1, 2, 2, 0, 1, 2]`)) defer func() { expectedI8.Release() expectedStr.Release() expectedDbl.Release() expectedOffsets.Release() }() expected, err := array.NewDenseUnionFromArraysWithFieldCodes(s.expectedTypesArr, expectedOffsets, []arrow.Array{expectedI8, expectedStr, expectedDbl}, []string{"i8", "str", "dbl"}, []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}) s.NoError(err) defer expected.Release() s.Equal(expected.DataType().String(), s.actual.DataType().String()) s.assertArraysEqual(expected, s.actual) } func (s *UnionBuilderSuite) TestDenseUnionListOfInferredType() { actual := s.appendListOfInferred(arrow.DenseUnionOf([]arrow.Field{}, []arrow.UnionTypeCode{})) defer actual.Release() expectedType := arrow.ListOf(arrow.DenseUnionOf( []arrow.Field{ {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, {Name: "dbl", Type: arrow.PrimitiveTypes.Float64, Nullable: true}}, []arrow.UnionTypeCode{s.I8, s.STR, s.DBL})) s.Equal(expectedType.String(), actual.DataType().String()) } func (s *UnionBuilderSuite) TestSparseUnionBasics() { s.unionBldr = array.NewSparseUnionBuilderWithBuilders(s.mem, arrow.SparseUnionOf([]arrow.Field{ {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, {Name: "dbl", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, }, []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}), []array.Builder{s.i8Bldr, s.strBldr, s.dblBldr}) defer s.unionBldr.Release() s.appendBasics() expectedI8, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[33, null, null, null, null, 10, null, -10, null]`)) expectedStr, _, _ := array.FromJSON(s.mem, arrow.BinaryTypes.String, strings.NewReader(`[null, "abc", null, null, "", null, "def", null, null]`)) expectedDbl, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, strings.NewReader(`[null, null, 1.0, -1.0, null, null, null, null, 0.5]`)) defer func() { expectedI8.Release() expectedStr.Release() expectedDbl.Release() }() expected, err := array.NewSparseUnionFromArraysWithFieldCodes(s.expectedTypesArr, []arrow.Array{expectedI8, expectedStr, expectedDbl}, []string{"i8", "str", "dbl"}, []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}) s.NoError(err) defer expected.Release() s.Equal(expected.DataType().String(), s.actual.DataType().String()) s.assertArraysEqual(expected, s.actual) } func (s *UnionBuilderSuite) TestSparseBuilderNullsAndEmpty() { s.unionBldr = array.NewSparseUnionBuilderWithBuilders(s.mem, arrow.SparseUnionOf([]arrow.Field{ {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, {Name: "dbl", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, }, []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}), []array.Builder{s.i8Bldr, s.strBldr, s.dblBldr}) defer s.unionBldr.Release() s.appendNullsAndEmptyValues() // "abc", null, 0, 42, null, null, 0, 0 // getting 0 for empty values is implementation-defined expectedI8, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[0, null, 0, 42, null, null, 0, 0]`)) expectedStr, _, _ := array.FromJSON(s.mem, arrow.BinaryTypes.String, strings.NewReader(`["abc", "", "", "", "", "", "", ""]`)) expectedDbl, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, strings.NewReader(`[0, 0, 0, 0, 0, 0, 0, 0]`)) defer func() { expectedI8.Release() expectedStr.Release() expectedDbl.Release() }() expected, err := array.NewSparseUnionFromArraysWithFieldCodes(s.expectedTypesArr, []arrow.Array{expectedI8, expectedStr, expectedDbl}, []string{"i8", "str", "dbl"}, []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}) s.NoError(err) defer expected.Release() s.Equal(expected.DataType().String(), s.actual.DataType().String()) s.assertArraysEqual(expected, s.actual) // physical arrays must be as expected s.assertArraysEqual(expectedI8, s.actual.Field(0)) s.assertArraysEqual(expectedStr, s.actual.Field(1)) s.assertArraysEqual(expectedDbl, s.actual.Field(2)) } func (s *UnionBuilderSuite) TestSparseUnionInferredType() { s.unionBldr = array.NewEmptySparseUnionBuilder(s.mem) defer s.unionBldr.Release() s.appendInferred() expectedI8, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[33, 10, null, null, null, -10, null, null, null]`)) expectedStr, _, _ := array.FromJSON(s.mem, arrow.BinaryTypes.String, strings.NewReader(`[null, null, "abc", "", "def", null, null, null, null]`)) expectedDbl, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, strings.NewReader(`[null, null, null, null, null, null,1.0, -1.0, 0.5]`)) defer func() { expectedI8.Release() expectedStr.Release() expectedDbl.Release() }() expected, err := array.NewSparseUnionFromArraysWithFieldCodes(s.expectedTypesArr, []arrow.Array{expectedI8, expectedStr, expectedDbl}, []string{"i8", "str", "dbl"}, []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}) s.NoError(err) defer expected.Release() s.Equal(expected.DataType().String(), s.actual.DataType().String()) s.assertArraysEqual(expected, s.actual) } func (s *UnionBuilderSuite) TestSparseUnionStructWithUnion() { bldr := array.NewStructBuilder(s.mem, arrow.StructOf(arrow.Field{Name: "u", Type: arrow.SparseUnionFromArrays(nil, nil, nil)})) defer bldr.Release() unionBldr := bldr.FieldBuilder(0).(array.UnionBuilder) int32Bldr := array.NewInt32Builder(s.mem) defer int32Bldr.Release() s.EqualValues(0, unionBldr.AppendChild(int32Bldr, "i")) expectedType := arrow.StructOf(arrow.Field{Name: "u", Type: arrow.SparseUnionOf([]arrow.Field{{Name: "i", Type: arrow.PrimitiveTypes.Int32, Nullable: true}}, []arrow.UnionTypeCode{0})}) s.Truef(arrow.TypeEqual(expectedType, bldr.Type()), "expected: %s, got: %s", expectedType, bldr.Type()) } func ExampleSparseUnionBuilder() { dt1 := arrow.SparseUnionOf([]arrow.Field{ {Name: "c", Type: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint16, ValueType: arrow.BinaryTypes.String}}, }, []arrow.UnionTypeCode{0}) dt2 := arrow.StructOf(arrow.Field{Name: "a", Type: dt1}) pool := memory.DefaultAllocator bldr := array.NewStructBuilder(pool, dt2) defer bldr.Release() bldrDt1 := bldr.FieldBuilder(0).(*array.SparseUnionBuilder) binDictBldr := bldrDt1.Child(0).(*array.BinaryDictionaryBuilder) bldr.Append(true) bldrDt1.Append(0) binDictBldr.AppendString("foo") bldr.Append(true) bldrDt1.Append(0) binDictBldr.AppendString("bar") out := bldr.NewArray().(*array.Struct) defer out.Release() fmt.Println(out) // Output: // {[{c=foo} {c=bar}]} } func TestUnions(t *testing.T) { suite.Run(t, new(UnionFactorySuite)) suite.Run(t, new(UnionBuilderSuite)) } func TestNestedUnionStructDict(t *testing.T) { // ARROW-18274 dt1 := arrow.SparseUnionOf([]arrow.Field{ {Name: "c", Type: &arrow.DictionaryType{ IndexType: arrow.PrimitiveTypes.Uint16, ValueType: arrow.BinaryTypes.String, Ordered: false, }}, }, []arrow.UnionTypeCode{0}) dt2 := arrow.StructOf( arrow.Field{Name: "b", Type: dt1}, ) dt3 := arrow.SparseUnionOf([]arrow.Field{ {Name: "a", Type: dt2}, }, []arrow.UnionTypeCode{0}) pool := memory.NewGoAllocator() builder := array.NewSparseUnionBuilder(pool, dt3) defer builder.Release() arr := builder.NewArray() defer arr.Release() assert.Equal(t, 0, arr.Len()) } func TestNestedUnionDictUnion(t *testing.T) { dt1 := arrow.SparseUnionOf([]arrow.Field{ {Name: "c", Type: &arrow.DictionaryType{ IndexType: arrow.PrimitiveTypes.Uint16, ValueType: arrow.BinaryTypes.String, Ordered: false, }}, }, []arrow.UnionTypeCode{0}) dt2 := arrow.SparseUnionOf([]arrow.Field{ {Name: "a", Type: dt1}, }, []arrow.UnionTypeCode{0}) pool := memory.NewGoAllocator() builder := array.NewSparseUnionBuilder(pool, dt2) defer builder.Release() arr := builder.NewArray() defer arr.Release() assert.Equal(t, 0, arr.Len()) } arrow-go-18.2.0/arrow/array/util.go000066400000000000000000000411111476434502500171230ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "errors" "fmt" "io" "strings" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/hashing" "github.com/apache/arrow-go/v18/internal/json" ) func min(a, b int) int { if a < b { return a } return b } type fromJSONCfg struct { multiDocument bool startOffset int64 useNumber bool } type FromJSONOption func(*fromJSONCfg) func WithMultipleDocs() FromJSONOption { return func(c *fromJSONCfg) { c.multiDocument = true } } // WithStartOffset attempts to start decoding from the reader at the offset // passed in. If using this option the reader must fulfill the io.ReadSeeker // interface, or else an error will be returned. // // It will call Seek(off, io.SeekStart) on the reader func WithStartOffset(off int64) FromJSONOption { return func(c *fromJSONCfg) { c.startOffset = off } } // WithUseNumber enables the 'UseNumber' option on the json decoder, using // the json.Number type instead of assuming float64 for numbers. This is critical // if you have numbers that are larger than what can fit into the 53 bits of // an IEEE float64 mantissa and want to preserve its value. func WithUseNumber() FromJSONOption { return func(c *fromJSONCfg) { c.useNumber = true } } // FromJSON creates an arrow.Array from a corresponding JSON stream and defined data type. If the types in the // json do not match the type provided, it will return errors. This is *not* the integration test format // and should not be used as such. This intended to be used by consumers more similarly to the current exposing of // the csv reader/writer. It also returns the input offset in the reader where it finished decoding since buffering // by the decoder could leave the reader's cursor past where the parsing finished if attempting to parse multiple json // arrays from one stream. // // All the Array types implement json.Marshaller and thus can be written to json // using the json.Marshal function // // The JSON provided must be formatted in one of two ways: // // Default: the top level of the json must be a list which matches the type specified exactly // Example: `[1, 2, 3, 4, 5]` for any integer type or `[[...], null, [], .....]` for a List type // Struct arrays are represented a list of objects: `[{"foo": 1, "bar": "moo"}, {"foo": 5, "bar": "baz"}]` // // Using WithMultipleDocs: // If the JSON provided is multiple newline separated json documents, then use this option // and each json document will be treated as a single row of the array. This is most useful for record batches // and interacting with other processes that use json. For example: // `{"col1": 1, "col2": "row1", "col3": ...}\n{"col1": 2, "col2": "row2", "col3": ...}\n.....` // // Duration values get formated upon marshalling as a string consisting of their numeric // value followed by the unit suffix such as "10s" for a value of 10 and unit of Seconds. // with "ms" for millisecond, "us" for microsecond, and "ns" for nanosecond as the suffixes. // Unmarshalling duration values is more permissive since it first tries to use Go's // time.ParseDuration function which means it allows values in the form 3h25m0.3s in addition // to the same values which are output. // // Interval types are marshalled / unmarshalled as follows: // // MonthInterval is marshalled as an object with the format: // { "months": #} // DayTimeInterval is marshalled using Go's regular marshalling of structs: // { "days": #, "milliseconds": # } // MonthDayNanoInterval values are marshalled the same as DayTime using Go's struct marshalling: // { "months": #, "days": #, "nanoseconds": # } // // Times use a format of HH:MM or HH:MM:SS[.zzz] where the fractions of a second cannot // exceed the precision allowed by the time unit, otherwise unmarshalling will error. // // # Dates use YYYY-MM-DD format // // Timestamps use RFC3339Nano format except without a timezone, all of the following are valid: // // YYYY-MM-DD // YYYY-MM-DD[T]HH // YYYY-MM-DD[T]HH:MM // YYYY-MM-DD[T]HH:MM:SS[.zzzzzzzzzz] // // The fractions of a second cannot exceed the precision allowed by the timeunit of the datatype. // // When processing structs as objects order of keys does not matter, but keys cannot be repeated. func FromJSON(mem memory.Allocator, dt arrow.DataType, r io.Reader, opts ...FromJSONOption) (arr arrow.Array, offset int64, err error) { var cfg fromJSONCfg for _, o := range opts { o(&cfg) } if cfg.startOffset != 0 { seeker, ok := r.(io.ReadSeeker) if !ok { return nil, 0, errors.New("using StartOffset option requires reader to be a ReadSeeker, cannot seek") } seeker.Seek(cfg.startOffset, io.SeekStart) } bldr := NewBuilder(mem, dt) defer bldr.Release() dec := json.NewDecoder(r) defer func() { if errors.Is(err, io.EOF) { err = fmt.Errorf("failed parsing json: %w", io.ErrUnexpectedEOF) } }() if cfg.useNumber { dec.UseNumber() } if !cfg.multiDocument { t, err := dec.Token() if err != nil { return nil, dec.InputOffset(), err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return nil, dec.InputOffset(), fmt.Errorf("json doc must be an array, found %s", delim) } } if err = bldr.Unmarshal(dec); err != nil { return nil, dec.InputOffset(), err } if !cfg.multiDocument { // consume the last ']' if _, err = dec.Token(); err != nil { return nil, dec.InputOffset(), err } } return bldr.NewArray(), dec.InputOffset(), nil } // RecordToStructArray constructs a struct array from the columns of the record batch // by referencing them, zero-copy. func RecordToStructArray(rec arrow.Record) *Struct { cols := make([]arrow.ArrayData, rec.NumCols()) for i, c := range rec.Columns() { cols[i] = c.Data() } data := NewData(arrow.StructOf(rec.Schema().Fields()...), int(rec.NumRows()), []*memory.Buffer{nil}, cols, 0, 0) defer data.Release() return NewStructData(data) } // RecordFromStructArray is a convenience function for converting a struct array into // a record batch without copying the data. If the passed in schema is nil, the fields // of the struct will be used to define the record batch. Otherwise the passed in // schema will be used to create the record batch. If passed in, the schema must match // the fields of the struct column. func RecordFromStructArray(in *Struct, schema *arrow.Schema) arrow.Record { if schema == nil { schema = arrow.NewSchema(in.DataType().(*arrow.StructType).Fields(), nil) } return NewRecord(schema, in.fields, int64(in.Len())) } // RecordFromJSON creates a record batch from JSON data. See array.FromJSON for the details // of formatting and logic. // // A record batch from JSON is equivalent to reading a struct array in from json and then // converting it to a record batch. func RecordFromJSON(mem memory.Allocator, schema *arrow.Schema, r io.Reader, opts ...FromJSONOption) (arrow.Record, int64, error) { st := arrow.StructOf(schema.Fields()...) arr, off, err := FromJSON(mem, st, r, opts...) if err != nil { return nil, off, err } defer arr.Release() return RecordFromStructArray(arr.(*Struct), schema), off, nil } // RecordToJSON writes out the given record following the format of each row is a single object // on a single line of the output. func RecordToJSON(rec arrow.Record, w io.Writer) error { enc := json.NewEncoder(w) fields := rec.Schema().Fields() cols := make(map[string]interface{}) for i := 0; int64(i) < rec.NumRows(); i++ { for j, c := range rec.Columns() { cols[fields[j].Name] = c.GetOneForMarshal(i) } if err := enc.Encode(cols); err != nil { return err } } return nil } func TableFromJSON(mem memory.Allocator, sc *arrow.Schema, recJSON []string, opt ...FromJSONOption) (arrow.Table, error) { batches := make([]arrow.Record, len(recJSON)) for i, batchJSON := range recJSON { batch, _, err := RecordFromJSON(mem, sc, strings.NewReader(batchJSON), opt...) if err != nil { return nil, err } defer batch.Release() batches[i] = batch } return NewTableFromRecords(sc, batches), nil } func GetDictArrayData(mem memory.Allocator, valueType arrow.DataType, memoTable hashing.MemoTable, startOffset int) (*Data, error) { dictLen := memoTable.Size() - startOffset buffers := []*memory.Buffer{nil, nil} buffers[1] = memory.NewResizableBuffer(mem) defer buffers[1].Release() switch tbl := memoTable.(type) { case hashing.NumericMemoTable: nbytes := tbl.TypeTraits().BytesRequired(dictLen) buffers[1].Resize(nbytes) tbl.WriteOutSubset(startOffset, buffers[1].Bytes()) case *hashing.BinaryMemoTable: switch valueType.ID() { case arrow.BINARY, arrow.STRING: buffers = append(buffers, memory.NewResizableBuffer(mem)) defer buffers[2].Release() buffers[1].Resize(arrow.Int32Traits.BytesRequired(dictLen + 1)) offsets := arrow.Int32Traits.CastFromBytes(buffers[1].Bytes()) tbl.CopyOffsetsSubset(startOffset, offsets) valuesz := offsets[len(offsets)-1] - offsets[0] buffers[2].Resize(int(valuesz)) tbl.CopyValuesSubset(startOffset, buffers[2].Bytes()) case arrow.LARGE_BINARY, arrow.LARGE_STRING: buffers = append(buffers, memory.NewResizableBuffer(mem)) defer buffers[2].Release() buffers[1].Resize(arrow.Int64Traits.BytesRequired(dictLen + 1)) offsets := arrow.Int64Traits.CastFromBytes(buffers[1].Bytes()) tbl.CopyLargeOffsetsSubset(startOffset, offsets) valuesz := offsets[len(offsets)-1] - offsets[0] buffers[2].Resize(int(valuesz)) tbl.CopyValuesSubset(startOffset, buffers[2].Bytes()) default: // fixed size bw := int(bitutil.BytesForBits(int64(valueType.(arrow.FixedWidthDataType).BitWidth()))) buffers[1].Resize(dictLen * bw) tbl.CopyFixedWidthValues(startOffset, bw, buffers[1].Bytes()) } default: return nil, fmt.Errorf("arrow/array: dictionary unifier unimplemented type: %s", valueType) } var nullcount int if idx, ok := memoTable.GetNull(); ok && idx >= startOffset { buffers[0] = memory.NewResizableBuffer(mem) defer buffers[0].Release() nullcount = 1 buffers[0].Resize(int(bitutil.BytesForBits(int64(dictLen)))) memory.Set(buffers[0].Bytes(), 0xFF) bitutil.ClearBit(buffers[0].Bytes(), idx) } return NewData(valueType, dictLen, buffers, nil, nullcount, 0), nil } func DictArrayFromJSON(mem memory.Allocator, dt *arrow.DictionaryType, indicesJSON, dictJSON string) (arrow.Array, error) { indices, _, err := FromJSON(mem, dt.IndexType, strings.NewReader(indicesJSON)) if err != nil { return nil, err } defer indices.Release() dict, _, err := FromJSON(mem, dt.ValueType, strings.NewReader(dictJSON)) if err != nil { return nil, err } defer dict.Release() return NewDictionaryArray(dt, indices, dict), nil } func ChunkedFromJSON(mem memory.Allocator, dt arrow.DataType, chunkStrs []string, opts ...FromJSONOption) (*arrow.Chunked, error) { chunks := make([]arrow.Array, len(chunkStrs)) defer func() { for _, c := range chunks { if c != nil { c.Release() } } }() var err error for i, c := range chunkStrs { chunks[i], _, err = FromJSON(mem, dt, strings.NewReader(c), opts...) if err != nil { return nil, err } } return arrow.NewChunked(dt, chunks), nil } func getMaxBufferLen(dt arrow.DataType, length int) int { bufferLen := int(bitutil.BytesForBits(int64(length))) maxOf := func(bl int) int { if bl > bufferLen { return bl } return bufferLen } switch dt := dt.(type) { case *arrow.DictionaryType: bufferLen = maxOf(getMaxBufferLen(dt.ValueType, length)) return maxOf(getMaxBufferLen(dt.IndexType, length)) case *arrow.FixedSizeBinaryType: return maxOf(dt.ByteWidth * length) case arrow.FixedWidthDataType: return maxOf(int(bitutil.BytesForBits(int64(dt.BitWidth()))) * length) case *arrow.StructType: for _, f := range dt.Fields() { bufferLen = maxOf(getMaxBufferLen(f.Type, length)) } return bufferLen case *arrow.SparseUnionType: // type codes bufferLen = maxOf(length) // creates children of the same length of the union for _, f := range dt.Fields() { bufferLen = maxOf(getMaxBufferLen(f.Type, length)) } return bufferLen case *arrow.DenseUnionType: // type codes bufferLen = maxOf(length) // offsets bufferLen = maxOf(arrow.Int32SizeBytes * length) // create children of length 1 for _, f := range dt.Fields() { bufferLen = maxOf(getMaxBufferLen(f.Type, 1)) } return bufferLen case arrow.OffsetsDataType: return maxOf(dt.OffsetTypeTraits().BytesRequired(length + 1)) case *arrow.FixedSizeListType: return maxOf(getMaxBufferLen(dt.Elem(), int(dt.Len())*length)) case arrow.ExtensionType: return maxOf(getMaxBufferLen(dt.StorageType(), length)) default: panic(fmt.Errorf("arrow/array: arrayofnull not implemented for type %s", dt)) } } type nullArrayFactory struct { mem memory.Allocator dt arrow.DataType len int buf *memory.Buffer } func (n *nullArrayFactory) create() *Data { if n.buf == nil { bufLen := getMaxBufferLen(n.dt, n.len) n.buf = memory.NewResizableBuffer(n.mem) n.buf.Resize(bufLen) defer n.buf.Release() } var ( dt = n.dt bufs = []*memory.Buffer{memory.SliceBuffer(n.buf, 0, int(bitutil.BytesForBits(int64(n.len))))} childData []arrow.ArrayData dictData arrow.ArrayData ) defer bufs[0].Release() if ex, ok := dt.(arrow.ExtensionType); ok { dt = ex.StorageType() } if nf, ok := dt.(arrow.NestedType); ok { childData = make([]arrow.ArrayData, nf.NumFields()) } switch dt := dt.(type) { case *arrow.NullType: case *arrow.DictionaryType: bufs = append(bufs, n.buf) arr := MakeArrayOfNull(n.mem, dt.ValueType, 0) defer arr.Release() dictData = arr.Data() case arrow.FixedWidthDataType: bufs = append(bufs, n.buf) case arrow.BinaryDataType: bufs = append(bufs, n.buf, n.buf) case arrow.OffsetsDataType: bufs = append(bufs, n.buf) childData[0] = n.createChild(dt, 0, 0) defer childData[0].Release() case *arrow.FixedSizeListType: childData[0] = n.createChild(dt, 0, n.len*int(dt.Len())) defer childData[0].Release() case *arrow.StructType: for i := range dt.Fields() { childData[i] = n.createChild(dt, i, n.len) defer childData[i].Release() } case *arrow.RunEndEncodedType: bldr := NewBuilder(n.mem, dt.RunEnds()) defer bldr.Release() switch b := bldr.(type) { case *Int16Builder: b.Append(int16(n.len)) case *Int32Builder: b.Append(int32(n.len)) case *Int64Builder: b.Append(int64(n.len)) } childData[0] = bldr.newData() defer childData[0].Release() childData[1] = n.createChild(dt.Encoded(), 1, 1) defer childData[1].Release() case arrow.UnionType: bufs[0].Release() bufs[0] = nil bufs = append(bufs, n.buf) // buffer is zeroed, but 0 may not be a valid type code if dt.TypeCodes()[0] != 0 { bufs[1] = memory.NewResizableBuffer(n.mem) bufs[1].Resize(n.len) defer bufs[1].Release() memory.Set(bufs[1].Bytes(), byte(dt.TypeCodes()[0])) } // for sparse unions we create children with the same length childLen := n.len if dt.Mode() == arrow.DenseMode { // for dense unions, offsets are all 0 and make children // with length 1 bufs = append(bufs, n.buf) childLen = 1 } for i := range dt.Fields() { childData[i] = n.createChild(dt, i, childLen) defer childData[i].Release() } } out := NewData(n.dt, n.len, bufs, childData, n.len, 0) if dictData != nil { out.SetDictionary(dictData) } return out } func (n *nullArrayFactory) createChild(_ arrow.DataType, i, length int) *Data { childFactory := &nullArrayFactory{ mem: n.mem, dt: n.dt.(arrow.NestedType).Fields()[i].Type, len: length, buf: n.buf} return childFactory.create() } // MakeArrayOfNull creates an array of size length which is all null of the given data type. func MakeArrayOfNull(mem memory.Allocator, dt arrow.DataType, length int) arrow.Array { if dt.ID() == arrow.NULL { return NewNull(length) } data := (&nullArrayFactory{mem: mem, dt: dt, len: length}).create() defer data.Release() return MakeFromData(data) } func stripNulls(s string) string { return strings.TrimRight(s, "\x00") } arrow-go-18.2.0/arrow/array/util_test.go000066400000000000000000000445341476434502500201760ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array_test import ( "bufio" "bytes" "fmt" "io" "reflect" "strings" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/decimal128" "github.com/apache/arrow-go/v18/arrow/decimal256" "github.com/apache/arrow-go/v18/arrow/internal/arrdata" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) var typemap = map[arrow.DataType]reflect.Type{ arrow.PrimitiveTypes.Int8: reflect.TypeOf(int8(0)), arrow.PrimitiveTypes.Uint8: reflect.TypeOf(uint8(0)), arrow.PrimitiveTypes.Int16: reflect.TypeOf(int16(0)), arrow.PrimitiveTypes.Uint16: reflect.TypeOf(uint16(0)), arrow.PrimitiveTypes.Int32: reflect.TypeOf(int32(0)), arrow.PrimitiveTypes.Uint32: reflect.TypeOf(uint32(0)), arrow.PrimitiveTypes.Int64: reflect.TypeOf(int64(0)), arrow.PrimitiveTypes.Uint64: reflect.TypeOf(uint64(0)), } func TestIntegerArrsJSON(t *testing.T) { const N = 10 types := []arrow.DataType{ arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Uint8, arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Uint32, arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Uint64, } for _, tt := range types { t.Run(fmt.Sprint(tt), func(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) jsontest := make([]int, N) vals := reflect.MakeSlice(reflect.SliceOf(typemap[tt]), N, N) for i := 0; i < N; i++ { vals.Index(i).Set(reflect.ValueOf(i).Convert(typemap[tt])) jsontest[i] = i } data, _ := json.Marshal(jsontest) arr, _, err := array.FromJSON(mem, tt, bytes.NewReader(data)) assert.NoError(t, err) defer arr.Release() assert.EqualValues(t, N, arr.Len()) assert.Zero(t, arr.NullN()) output, err := json.Marshal(arr) assert.NoError(t, err) assert.JSONEq(t, string(data), string(output)) }) t.Run(fmt.Sprint(tt)+" errors", func(t *testing.T) { _, _, err := array.FromJSON(memory.DefaultAllocator, tt, strings.NewReader("")) assert.Error(t, err) _, _, err = array.FromJSON(memory.DefaultAllocator, tt, strings.NewReader("[")) assert.ErrorIs(t, err, io.ErrUnexpectedEOF) _, _, err = array.FromJSON(memory.DefaultAllocator, tt, strings.NewReader("0")) assert.Error(t, err) _, _, err = array.FromJSON(memory.DefaultAllocator, tt, strings.NewReader("{}")) assert.Error(t, err) _, _, err = array.FromJSON(memory.DefaultAllocator, tt, strings.NewReader("[[0]]")) assert.EqualError(t, err, "json: cannot unmarshal [ into Go value of type "+tt.Name()) }) } } func TestStringsJSON(t *testing.T) { tests := []struct { jsonstring string values []string valids []bool }{ {"[]", []string{}, []bool{}}, {`["", "foo"]`, []string{"", "foo"}, nil}, {`["", null]`, []string{"", ""}, []bool{true, false}}, // NUL character in string {`["", "some\u0000char"]`, []string{"", "some\x00char"}, nil}, // utf8 sequence in string {"[\"\xc3\xa9\"]", []string{"\xc3\xa9"}, nil}, // bytes < 0x20 can be represented as JSON unicode escapes {`["\u0000\u001f"]`, []string{"\x00\x1f"}, nil}, } for _, tt := range tests { t.Run("json "+tt.jsonstring, func(t *testing.T) { bldr := array.NewStringBuilder(memory.DefaultAllocator) defer bldr.Release() bldr.AppendValues(tt.values, tt.valids) expected := bldr.NewStringArray() defer expected.Release() arr, _, err := array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.String, strings.NewReader(tt.jsonstring)) assert.NoError(t, err) defer arr.Release() assert.Truef(t, array.Equal(expected, arr), "expected: %s\ngot: %s\n", expected, arr) data, err := json.Marshal(arr) assert.NoError(t, err) assert.JSONEq(t, tt.jsonstring, string(data)) }) } for _, tt := range tests { t.Run("large json "+tt.jsonstring, func(t *testing.T) { bldr := array.NewLargeStringBuilder(memory.DefaultAllocator) defer bldr.Release() bldr.AppendValues(tt.values, tt.valids) expected := bldr.NewLargeStringArray() defer expected.Release() arr, _, err := array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.LargeString, strings.NewReader(tt.jsonstring)) assert.NoError(t, err) defer arr.Release() assert.Truef(t, array.Equal(expected, arr), "expected: %s\ngot: %s\n", expected, arr) data, err := json.Marshal(arr) assert.NoError(t, err) assert.JSONEq(t, tt.jsonstring, string(data)) }) } t.Run("errors", func(t *testing.T) { _, _, err := array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.String, strings.NewReader("[0]")) assert.Error(t, err) _, _, err = array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.String, strings.NewReader("[[]]")) assert.Error(t, err) }) } func TestStructArrayFromJSON(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) jsonStr := `[{"hello": 3.5, "world": true, "yo": "foo"},{"hello": 3.25, "world": false, "yo": "bar"}]` arr, _, err := array.FromJSON(mem, arrow.StructOf( arrow.Field{Name: "hello", Type: arrow.PrimitiveTypes.Float64}, arrow.Field{Name: "world", Type: arrow.FixedWidthTypes.Boolean}, arrow.Field{Name: "yo", Type: arrow.BinaryTypes.String}, ), strings.NewReader(jsonStr)) assert.NoError(t, err) defer arr.Release() output, err := json.Marshal(arr) assert.NoError(t, err) assert.JSONEq(t, jsonStr, string(output)) } func TestArrayFromJSONMulti(t *testing.T) { arr, _, err := array.FromJSON(memory.DefaultAllocator, arrow.StructOf( arrow.Field{Name: "hello", Type: arrow.PrimitiveTypes.Float64}, arrow.Field{Name: "world", Type: arrow.FixedWidthTypes.Boolean}, arrow.Field{Name: "yo", Type: arrow.BinaryTypes.String}, ), strings.NewReader("{\"hello\": 3.5, \"world\": true, \"yo\": \"foo\"}\n{\"hello\": 3.25, \"world\": false, \"yo\": \"bar\"}\n"), array.WithMultipleDocs()) assert.NoError(t, err) defer arr.Release() assert.EqualValues(t, 2, arr.Len()) assert.Zero(t, arr.NullN()) } func TestNestedJSONArrs(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) jsonStr := `[{"hello": 1.5, "world": [1, 2, 3, 4], "yo": [{"foo": "2005-05-06", "bar": "15:02:04.123"},{"foo": "1956-01-02", "bar": "02:10:00"}]}]` arr, _, err := array.FromJSON(mem, arrow.StructOf( arrow.Field{Name: "hello", Type: arrow.PrimitiveTypes.Float64}, arrow.Field{Name: "world", Type: arrow.ListOf(arrow.PrimitiveTypes.Int32)}, arrow.Field{Name: "yo", Type: arrow.FixedSizeListOf(2, arrow.StructOf( arrow.Field{Name: "foo", Type: arrow.FixedWidthTypes.Date32}, arrow.Field{Name: "bar", Type: arrow.FixedWidthTypes.Time32ms}, ))}, ), strings.NewReader(jsonStr)) assert.NoError(t, err) defer arr.Release() v, err := json.Marshal(arr) assert.NoError(t, err) assert.JSONEq(t, jsonStr, string(v)) } func TestGetNullsFromJSON(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) jsonStr := `[ {"yo": "thing", "arr": null, "nuf": {"ps": "今日ã¯"}}, {"yo": null, "nuf": {"ps": null}, "arr": []}, { "nuf": null, "yo": "今日ã¯", "arr": [1,2,3]} ]` rec, _, err := array.RecordFromJSON(mem, arrow.NewSchema([]arrow.Field{ {Name: "yo", Type: arrow.BinaryTypes.String, Nullable: true}, {Name: "arr", Type: arrow.ListOf(arrow.PrimitiveTypes.Int32), Nullable: true}, {Name: "nuf", Type: arrow.StructOf(arrow.Field{Name: "ps", Type: arrow.BinaryTypes.String, Nullable: true}), Nullable: true}, }, nil), strings.NewReader(jsonStr)) assert.NoError(t, err) defer rec.Release() assert.EqualValues(t, 3, rec.NumCols()) assert.EqualValues(t, 3, rec.NumRows()) data, err := json.Marshal(rec) assert.NoError(t, err) assert.JSONEq(t, jsonStr, string(data)) } func TestDurationsJSON(t *testing.T) { tests := []struct { unit arrow.TimeUnit jsonstr string values []arrow.Duration }{ {arrow.Second, `["1s", "2s", "3s", "4s", "5s"]`, []arrow.Duration{1, 2, 3, 4, 5}}, {arrow.Millisecond, `["1ms", "2ms", "3ms", "4ms", "5ms"]`, []arrow.Duration{1, 2, 3, 4, 5}}, {arrow.Microsecond, `["1us", "2us", "3us", "4us", "5us"]`, []arrow.Duration{1, 2, 3, 4, 5}}, {arrow.Nanosecond, `["1ns", "2ns", "3ns", "4ns", "5ns"]`, []arrow.Duration{1, 2, 3, 4, 5}}, } for _, tt := range tests { dtype := &arrow.DurationType{Unit: tt.unit} bldr := array.NewDurationBuilder(memory.DefaultAllocator, dtype) defer bldr.Release() bldr.AppendValues(tt.values, nil) expected := bldr.NewArray() defer expected.Release() arr, _, err := array.FromJSON(memory.DefaultAllocator, dtype, strings.NewReader(tt.jsonstr)) assert.NoError(t, err) defer arr.Release() assert.Truef(t, array.Equal(expected, arr), "expected: %s\ngot: %s\n", expected, arr) } } func TestTimestampsJSON(t *testing.T) { tests := []struct { unit arrow.TimeUnit jsonstr string values []arrow.Timestamp }{ {arrow.Second, `["1970-01-01", "2000-02-29", "3989-07-14", "1900-02-28"]`, []arrow.Timestamp{0, 951782400, 63730281600, -2203977600}}, {arrow.Nanosecond, `["1970-01-01", "2000-02-29", "1900-02-28"]`, []arrow.Timestamp{0, 951782400000000000, -2203977600000000000}}, } for _, tt := range tests { dtype := &arrow.TimestampType{Unit: tt.unit} bldr := array.NewTimestampBuilder(memory.DefaultAllocator, dtype) defer bldr.Release() bldr.AppendValues(tt.values, nil) expected := bldr.NewArray() defer expected.Release() arr, _, err := array.FromJSON(memory.DefaultAllocator, dtype, strings.NewReader(tt.jsonstr)) assert.NoError(t, err) defer arr.Release() assert.Truef(t, array.Equal(expected, arr), "expected: %s\ngot: %s\n", expected, arr) } } func TestDateJSON(t *testing.T) { t.Run("date32", func(t *testing.T) { bldr := array.NewDate32Builder(memory.DefaultAllocator) defer bldr.Release() jsonstr := `["1970-01-06", null, "1970-02-12", 0]` jsonExp := `["1970-01-06", null, "1970-02-12", "1970-01-01"]` bldr.AppendValues([]arrow.Date32{5, 0, 42, 0}, []bool{true, false, true, true}) expected := bldr.NewArray() defer expected.Release() arr, _, err := array.FromJSON(memory.DefaultAllocator, arrow.FixedWidthTypes.Date32, strings.NewReader(jsonstr)) assert.NoError(t, err) defer arr.Release() assert.Truef(t, array.Equal(expected, arr), "expected: %s\ngot: %s\n", expected, arr) data, err := json.Marshal(arr) assert.NoError(t, err) assert.JSONEq(t, jsonExp, string(data)) }) t.Run("date64", func(t *testing.T) { bldr := array.NewDate64Builder(memory.DefaultAllocator) defer bldr.Release() jsonstr := `["1970-01-02", null, "2286-11-20", 86400000]` jsonExp := `["1970-01-02", null, "2286-11-20", "1970-01-02"]` bldr.AppendValues([]arrow.Date64{86400000, 0, 9999936000000, 86400000}, []bool{true, false, true, true}) expected := bldr.NewArray() defer expected.Release() arr, _, err := array.FromJSON(memory.DefaultAllocator, arrow.FixedWidthTypes.Date64, strings.NewReader(jsonstr)) assert.NoError(t, err) defer arr.Release() assert.Truef(t, array.Equal(expected, arr), "expected: %s\ngot: %s\n", expected, arr) data, err := json.Marshal(arr) assert.NoError(t, err) assert.JSONEq(t, jsonExp, string(data)) }) } func TestTimeJSON(t *testing.T) { tententen := 60*(60*(10)+10) + 10 tests := []struct { dt arrow.DataType jsonstr string jsonexp string valueadd int }{ {arrow.FixedWidthTypes.Time32s, `[null, "10:10:10", 36610]`, `[null, "10:10:10", "10:10:10"]`, 123}, {arrow.FixedWidthTypes.Time32ms, `[null, "10:10:10.123", 36610123]`, `[null, "10:10:10.123", "10:10:10.123"]`, 456}, {arrow.FixedWidthTypes.Time64us, `[null, "10:10:10.123456", 36610123456]`, `[null, "10:10:10.123456", "10:10:10.123456"]`, 789}, {arrow.FixedWidthTypes.Time64ns, `[null, "10:10:10.123456789", 36610123456789]`, `[null, "10:10:10.123456789", "10:10:10.123456789"]`, 0}, } for _, tt := range tests { t.Run(fmt.Sprint(tt.dt), func(t *testing.T) { defer func() { tententen = 1000*tententen + tt.valueadd }() bldr := array.NewBuilder(memory.DefaultAllocator, tt.dt) defer bldr.Release() switch tt.dt.ID() { case arrow.TIME32: bldr.(*array.Time32Builder).AppendValues([]arrow.Time32{0, arrow.Time32(tententen), arrow.Time32(tententen)}, []bool{false, true, true}) case arrow.TIME64: bldr.(*array.Time64Builder).AppendValues([]arrow.Time64{0, arrow.Time64(tententen), arrow.Time64(tententen)}, []bool{false, true, true}) } expected := bldr.NewArray() defer expected.Release() arr, _, err := array.FromJSON(memory.DefaultAllocator, tt.dt, strings.NewReader(tt.jsonstr)) assert.NoError(t, err) defer arr.Release() assert.Truef(t, array.Equal(expected, arr), "expected: %s\ngot: %s\n", expected, arr) data, err := json.Marshal(arr) assert.NoError(t, err) assert.JSONEq(t, tt.jsonexp, string(data)) }) } } func TestDecimal128JSON(t *testing.T) { dt := &arrow.Decimal128Type{Precision: 10, Scale: 4} bldr := array.NewDecimal128Builder(memory.DefaultAllocator, dt) defer bldr.Release() bldr.AppendValues([]decimal128.Num{decimal128.FromU64(1234567), {}, decimal128.FromI64(-789000)}, []bool{true, false, true}) expected := bldr.NewArray() defer expected.Release() arr, _, err := array.FromJSON(memory.DefaultAllocator, dt, strings.NewReader(`["123.4567", null, "-78.9000"]`)) assert.NoError(t, err) defer arr.Release() assert.Truef(t, array.Equal(expected, arr), "expected: %s\ngot: %s\n", expected, arr) data, err := json.Marshal(arr) assert.NoError(t, err) assert.JSONEq(t, `["123.4567", null, "-78.9"]`, string(data)) } func TestDecimal256JSON(t *testing.T) { dt := &arrow.Decimal256Type{Precision: 10, Scale: 4} bldr := array.NewDecimal256Builder(memory.DefaultAllocator, dt) defer bldr.Release() bldr.AppendValues([]decimal256.Num{decimal256.FromU64(1234567), {}, decimal256.FromI64(-789000)}, []bool{true, false, true}) expected := bldr.NewArray() defer expected.Release() arr, _, err := array.FromJSON(memory.DefaultAllocator, dt, strings.NewReader(`["123.4567", null, "-78.9000"]`)) assert.NoError(t, err) defer arr.Release() assert.Truef(t, array.Equal(expected, arr), "expected: %s\ngot: %s\n", expected, arr) data, err := json.Marshal(arr) assert.NoError(t, err) assert.JSONEq(t, `["123.4567", null, "-78.9"]`, string(data)) } func TestArrRecordsJSONRoundTrip(t *testing.T) { for k, v := range arrdata.Records { if k == "decimal128" || k == "decimal256" || k == "fixed_width_types" { // test these separately since the sample data in the arrdata // records doesn't lend itself to exactness when going to/from // json. The fixed_width_types one uses negative values for // time32 and time64 which correctly get interpreted into times, // but re-encoding them in json produces the normalized positive // values instead of re-creating negative ones. // the decimal128/decimal256 values don't get parsed *exactly* due to fun // float weirdness due to their size, so smaller tests will work fine. continue } t.Run(k, func(t *testing.T) { var buf bytes.Buffer assert.NotPanics(t, func() { enc := json.NewEncoder(&buf) for _, r := range v { if err := enc.Encode(r); err != nil { panic(err) } } }) rdr := bytes.NewReader(buf.Bytes()) var cur int64 mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) for _, r := range v { rec, off, err := array.RecordFromJSON(mem, r.Schema(), rdr, array.WithStartOffset(cur)) assert.NoError(t, err) defer rec.Release() assert.Truef(t, array.RecordApproxEqual(r, rec), "expected: %s\ngot: %s\n", r, rec) cur += off } }) } } func TestStructBuilderJSONUnknownNested(t *testing.T) { dt := arrow.StructOf( arrow.Field{Name: "region", Type: arrow.BinaryTypes.String}, arrow.Field{Name: "model", Type: arrow.PrimitiveTypes.Int32}, arrow.Field{Name: "sales", Type: arrow.PrimitiveTypes.Float32}) const data = `[ {"region": "NY", "model": "3", "sales": 742.0}, {"region": "CT", "model": "5", "sales": 742.0} ]` const dataWithExtra = `[ {"region": "NY", "model": "3", "sales": 742.0, "extra": 1234}, {"region": "CT", "model": "5", "sales": 742.0, "extra_array": [1234], "extra_obj": {"nested": ["deeply"]}} ]` mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) arr, _, err := array.FromJSON(mem, dt, strings.NewReader(data)) require.NoError(t, err) require.NotNil(t, arr) defer arr.Release() arr2, _, err := array.FromJSON(mem, dt, strings.NewReader(dataWithExtra)) require.NoError(t, err) require.NotNil(t, arr2) defer arr2.Release() assert.Truef(t, array.Equal(arr, arr2), "expected: %s\n actual: %s", arr, arr2) } func TestRecordBuilderUnmarshalJSONExtraFields(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) schema := arrow.NewSchema([]arrow.Field{ {Name: "region", Type: arrow.BinaryTypes.String}, {Name: "model", Type: arrow.PrimitiveTypes.Int32}, {Name: "sales", Type: arrow.PrimitiveTypes.Float32}, }, nil) bldr := array.NewRecordBuilder(mem, schema) defer bldr.Release() const data = `{"region": "NY", "model": "3", "sales": 742.0, "extra": 1234} {"region": "NY", "model": "3", "sales": 742.0, "extra_array": [1234], "extra_obj": {"nested": ["deeply"]}}` s := bufio.NewScanner(strings.NewReader(data)) require.True(t, s.Scan()) require.NoError(t, bldr.UnmarshalJSON(s.Bytes())) rec1 := bldr.NewRecord() defer rec1.Release() require.True(t, s.Scan()) require.NoError(t, bldr.UnmarshalJSON(s.Bytes())) rec2 := bldr.NewRecord() defer rec2.Release() assert.Truef(t, array.RecordEqual(rec1, rec2), "expected: %s\nactual: %s", rec1, rec2) } arrow-go-18.2.0/arrow/arrio/000077500000000000000000000000001476434502500156175ustar00rootroot00000000000000arrow-go-18.2.0/arrow/arrio/arrio.go000066400000000000000000000054241476434502500172670ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Package arrio exposes functions to manipulate records, exposing and using // interfaces not unlike the ones defined in the stdlib io package. package arrio import ( "errors" "io" "github.com/apache/arrow-go/v18/arrow" ) // Reader is the interface that wraps the Read method. type Reader interface { // Read reads the current record from the underlying stream and an error, if any. // When the Reader reaches the end of the underlying stream, it returns (nil, io.EOF). Read() (arrow.Record, error) } // ReaderAt is the interface that wraps the ReadAt method. type ReaderAt interface { // ReadAt reads the i-th record from the underlying stream and an error, if any. ReadAt(i int64) (arrow.Record, error) } // Writer is the interface that wraps the Write method. type Writer interface { Write(rec arrow.Record) error } // Copy copies all the records available from src to dst. // Copy returns the number of records copied and the first error // encountered while copying, if any. // // A successful Copy returns err == nil, not err == EOF. Because Copy is // defined to read from src until EOF, it does not treat an EOF from Read as an // error to be reported. func Copy(dst Writer, src Reader) (n int64, err error) { for { rec, err := src.Read() if err != nil { if errors.Is(err, io.EOF) { return n, nil } return n, err } err = dst.Write(rec) if err != nil { return n, err } n++ } } // CopyN copies n records (or until an error) from src to dst. It returns the // number of records copied and the earliest error encountered while copying. On // return, written == n if and only if err == nil. func CopyN(dst Writer, src Reader, n int64) (written int64, err error) { for ; written < n; written++ { rec, err := src.Read() if err != nil { if errors.Is(err, io.EOF) && written == n { return written, nil } return written, err } err = dst.Write(rec) if err != nil { return written, err } } if written != n && err == nil { err = io.EOF } return written, err } arrow-go-18.2.0/arrow/arrio/arrio_test.go000066400000000000000000000121531476434502500203230ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package arrio_test import ( "fmt" "io" "os" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/arrio" "github.com/apache/arrow-go/v18/arrow/internal/arrdata" "github.com/apache/arrow-go/v18/arrow/ipc" "github.com/apache/arrow-go/v18/arrow/memory" ) type copyKind int const ( fileKind copyKind = iota streamKind ) func (k copyKind) write(t *testing.T, f *os.File, mem memory.Allocator, schema *arrow.Schema, recs []arrow.Record) { t.Helper() switch k { case fileKind: arrdata.WriteFile(t, f, mem, schema, recs) case streamKind: arrdata.WriteStream(t, f, mem, schema, recs) default: panic("invalid copyKind") } } func (k copyKind) check(t *testing.T, f *os.File, mem memory.Allocator, schema *arrow.Schema, recs []arrow.Record) { t.Helper() switch k { case fileKind: arrdata.CheckArrowFile(t, f, mem, schema, recs) case streamKind: arrdata.CheckArrowStream(t, f, mem, schema, recs) default: panic("invalid copyKind") } } func TestCopy(t *testing.T) { tempDir := t.TempDir() for _, tc := range []struct { name string src, dst copyKind }{ {name: "file2file", src: fileKind, dst: fileKind}, {name: "file2stream", src: fileKind, dst: streamKind}, {name: "stream2file", src: streamKind, dst: fileKind}, {name: "stream2stream", src: streamKind, dst: streamKind}, } { t.Run(tc.name, func(t *testing.T) { for name, recs := range arrdata.Records { t.Run(name, func(t *testing.T) { for _, tcopy := range []struct { n int want int err error }{ {-1, len(recs), nil}, {1, 1, nil}, {0, 0, nil}, {len(recs), len(recs), nil}, {len(recs) + 1, len(recs), io.EOF}, } { t.Run(fmt.Sprintf("-copy-n=%d", tcopy.n), func(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) f, err := os.CreateTemp(tempDir, "go-arrow-copy-") if err != nil { t.Fatal(err) } defer f.Close() o, err := os.CreateTemp(tempDir, "go-arrow-copy-") if err != nil { t.Fatal(err) } defer o.Close() tc.src.write(t, f, mem, recs[0].Schema(), recs) tc.src.check(t, f, mem, recs[0].Schema(), recs) _, err = f.Seek(0, io.SeekStart) if err != nil { t.Fatal(err) } var r arrio.Reader switch tc.src { case fileKind: rr, err := ipc.NewFileReader(f, ipc.WithSchema(recs[0].Schema()), ipc.WithAllocator(mem)) if err != nil { t.Fatal(err) } defer rr.Close() r = rr case streamKind: rr, err := ipc.NewReader(f, ipc.WithSchema(recs[0].Schema()), ipc.WithAllocator(mem)) if err != nil { t.Fatal(err) } defer rr.Release() r = rr default: t.Fatalf("invalid src type %v", tc.src) } var w interface { arrio.Writer io.Closer } switch tc.dst { case fileKind: w, err = ipc.NewFileWriter(o, ipc.WithSchema(recs[0].Schema()), ipc.WithAllocator(mem)) if err != nil { t.Fatal(err) } case streamKind: w = ipc.NewWriter(o, ipc.WithSchema(recs[0].Schema()), ipc.WithAllocator(mem)) default: t.Fatalf("invalid dst type %v", tc.dst) } defer w.Close() var ( n int64 ) switch tcopy.n { case -1: n, err = arrio.Copy(w, r) case len(recs) + 1: n, err = arrio.CopyN(w, r, int64(tcopy.n)) default: n, err = arrio.CopyN(w, r, int64(tcopy.n)) } switch err { case nil: if tcopy.err != nil { t.Fatalf("got a nil error, want=%v", tcopy.err) } default: switch tcopy.err { case nil: t.Fatalf("invalid error: got=%v, want=%v", err, tcopy.err) default: if tcopy.err.Error() != err.Error() { t.Fatalf("invalid error: got=%v, want=%v", err, tcopy.err) } } } if got, want := n, int64(tcopy.want); got != want { t.Fatalf("invalid number of records copied: got=%d, want=%d", got, want) } err = w.Close() if err != nil { t.Fatal(err) } tc.dst.check(t, o, mem, recs[0].Schema(), recs[:tcopy.want]) }) } }) } }) } } arrow-go-18.2.0/arrow/avro/000077500000000000000000000000001476434502500154525ustar00rootroot00000000000000arrow-go-18.2.0/arrow/avro/avro2parquet/000077500000000000000000000000001476434502500201055ustar00rootroot00000000000000arrow-go-18.2.0/arrow/avro/avro2parquet/main.go000066400000000000000000000063221476434502500213630ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package main import ( "bufio" "bytes" "flag" "fmt" "log" "os" "runtime/pprof" "time" "github.com/apache/arrow-go/v18/arrow/avro" "github.com/apache/arrow-go/v18/parquet" "github.com/apache/arrow-go/v18/parquet/compress" pq "github.com/apache/arrow-go/v18/parquet/pqarrow" ) var ( cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`") filepath = flag.String("file", "", "avro ocf to convert") ) func main() { flag.Parse() if *cpuprofile != "" { f, err := os.Create(*cpuprofile) if err != nil { log.Fatal("could not create CPU profile: ", err) } defer f.Close() // error handling omitted for example if err := pprof.StartCPUProfile(f); err != nil { log.Fatal("could not start CPU profile: ", err) } defer pprof.StopCPUProfile() } if *filepath == "" { fmt.Println("no file specified") } chunk := 1024 * 8 ts := time.Now() log.Println("starting:") info, err := os.Stat(*filepath) if err != nil { fmt.Println(err) os.Exit(1) } filesize := info.Size() data, err := os.ReadFile(*filepath) if err != nil { fmt.Println(err) os.Exit(2) } fmt.Printf("file : %v\nsize: %v MB\n", filepath, float64(filesize)/1024/1024) r := bytes.NewReader(data) ior := bufio.NewReaderSize(r, 4096*8) av2arReader, err := avro.NewOCFReader(ior, avro.WithChunk(chunk)) if err != nil { fmt.Println(err) os.Exit(3) } fp, err := os.OpenFile(*filepath+".parquet", os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o644) if err != nil { fmt.Println(err) os.Exit(4) } defer fp.Close() pwProperties := parquet.NewWriterProperties(parquet.WithDictionaryDefault(true), parquet.WithVersion(parquet.V2_LATEST), parquet.WithCompression(compress.Codecs.Snappy), parquet.WithBatchSize(1024*32), parquet.WithDataPageSize(1024*1024), parquet.WithMaxRowGroupLength(64*1024*1024), ) awProperties := pq.NewArrowWriterProperties(pq.WithStoreSchema()) pr, err := pq.NewFileWriter(av2arReader.Schema(), fp, pwProperties, awProperties) if err != nil { fmt.Println(err) os.Exit(5) } defer pr.Close() fmt.Printf("parquet version: %v\n", pwProperties.Version()) for av2arReader.Next() { if av2arReader.Err() != nil { fmt.Println(err) os.Exit(6) } recs := av2arReader.Record() err = pr.WriteBuffered(recs) if err != nil { fmt.Println(err) os.Exit(7) } recs.Release() } if av2arReader.Err() != nil { fmt.Println(av2arReader.Err()) } pr.Close() log.Printf("time to convert: %v\n", time.Since(ts)) } arrow-go-18.2.0/arrow/avro/loader.go000066400000000000000000000036741476434502500172610ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package avro import ( "errors" "fmt" "io" ) func (r *OCFReader) decodeOCFToChan() { defer close(r.avroChan) for r.r.HasNext() { select { case <-r.readerCtx.Done(): r.err = fmt.Errorf("avro decoding cancelled, %d records read", r.avroDatumCount) return default: var datum any err := r.r.Decode(&datum) if err != nil { if errors.Is(err, io.EOF) { r.err = nil return } r.err = err return } r.avroChan <- datum r.avroDatumCount++ } } } func (r *OCFReader) recordFactory() { defer close(r.recChan) r.primed = true recChunk := 0 switch { case r.chunk < 1: for data := range r.avroChan { err := r.ldr.loadDatum(data) if err != nil { r.err = err return } } r.recChan <- r.bld.NewRecord() r.bldDone <- struct{}{} case r.chunk >= 1: for data := range r.avroChan { if recChunk == 0 { r.bld.Reserve(r.chunk) } err := r.ldr.loadDatum(data) if err != nil { r.err = err return } recChunk++ if recChunk >= r.chunk { r.recChan <- r.bld.NewRecord() recChunk = 0 } } if recChunk != 0 { r.recChan <- r.bld.NewRecord() } r.bldDone <- struct{}{} } } arrow-go-18.2.0/arrow/avro/reader.go000066400000000000000000000227111476434502500172460ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package avro import ( "context" "errors" "fmt" "io" "sync/atomic" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/hamba/avro/v2/ocf" "github.com/tidwall/sjson" avro "github.com/hamba/avro/v2" ) var ErrMismatchFields = errors.New("arrow/avro: number of records mismatch") // Option configures an Avro reader/writer. type ( Option func(config) config *OCFReader ) type schemaEdit struct { method string path string value any } // Reader wraps goavro/OCFReader and creates array.Records from a schema. type OCFReader struct { r *ocf.Decoder avroSchema string avroSchemaEdits []schemaEdit schema *arrow.Schema refs int64 bld *array.RecordBuilder bldMap *fieldPos ldr *dataLoader cur arrow.Record err error primed bool readerCtx context.Context readCancel func() maxOCF int maxRec int avroChan chan any avroDatumCount int64 avroChanSize int recChan chan arrow.Record bldDone chan struct{} recChanSize int chunk int mem memory.Allocator } // NewReader returns a reader that reads from an Avro OCF file and creates // arrow.Records from the converted avro data. func NewOCFReader(r io.Reader, opts ...Option) (*OCFReader, error) { ocfr, err := ocf.NewDecoder(r) if err != nil { return nil, fmt.Errorf("%w: could not create avro ocfreader", arrow.ErrInvalid) } rr := &OCFReader{ r: ocfr, refs: 1, chunk: 1, avroChanSize: 500, recChanSize: 10, } for _, opt := range opts { opt(rr) } rr.avroChan = make(chan any, rr.avroChanSize) rr.recChan = make(chan arrow.Record, rr.recChanSize) rr.bldDone = make(chan struct{}) schema, err := avro.Parse(string(ocfr.Metadata()["avro.schema"])) if err != nil { return nil, fmt.Errorf("%w: could not parse avro header", arrow.ErrInvalid) } rr.avroSchema = schema.String() if len(rr.avroSchemaEdits) > 0 { // execute schema edits for _, e := range rr.avroSchemaEdits { err := rr.editAvroSchema(e) if err != nil { return nil, fmt.Errorf("%w: could not edit avro schema", arrow.ErrInvalid) } } // validate edited schema schema, err = avro.Parse(rr.avroSchema) if err != nil { return nil, fmt.Errorf("%w: could not parse modified avro schema", arrow.ErrInvalid) } } rr.schema, err = ArrowSchemaFromAvro(schema) if err != nil { return nil, fmt.Errorf("%w: could not convert avro schema", arrow.ErrInvalid) } if rr.mem == nil { rr.mem = memory.DefaultAllocator } rr.readerCtx, rr.readCancel = context.WithCancel(context.Background()) go rr.decodeOCFToChan() rr.bld = array.NewRecordBuilder(rr.mem, rr.schema) rr.bldMap = newFieldPos() rr.ldr = newDataLoader() for idx, fb := range rr.bld.Fields() { mapFieldBuilders(fb, rr.schema.Field(idx), rr.bldMap) } rr.ldr.drawTree(rr.bldMap) go rr.recordFactory() return rr, nil } // Reuse allows the OCFReader to be reused to read another Avro file provided the // new Avro file has an identical schema. func (rr *OCFReader) Reuse(r io.Reader, opts ...Option) error { rr.Close() rr.err = nil ocfr, err := ocf.NewDecoder(r) if err != nil { return fmt.Errorf("%w: could not create avro ocfreader", arrow.ErrInvalid) } schema, err := avro.Parse(string(ocfr.Metadata()["avro.schema"])) if err != nil { return fmt.Errorf("%w: could not parse avro header", arrow.ErrInvalid) } if rr.avroSchema != schema.String() { return fmt.Errorf("%w: avro schema mismatch", arrow.ErrInvalid) } rr.r = ocfr for _, opt := range opts { opt(rr) } rr.maxOCF = 0 rr.maxRec = 0 rr.avroDatumCount = 0 rr.primed = false rr.avroChan = make(chan any, rr.avroChanSize) rr.recChan = make(chan arrow.Record, rr.recChanSize) rr.bldDone = make(chan struct{}) rr.readerCtx, rr.readCancel = context.WithCancel(context.Background()) go rr.decodeOCFToChan() go rr.recordFactory() return nil } // Err returns the last error encountered during the iteration over the // underlying Avro file. func (r *OCFReader) Err() error { return r.err } // AvroSchema returns the Avro schema of the Avro OCF func (r *OCFReader) AvroSchema() string { return r.avroSchema } // Schema returns the converted Arrow schema of the Avro OCF func (r *OCFReader) Schema() *arrow.Schema { return r.schema } // Record returns the current record that has been extracted from the // underlying Avro OCF file. // It is valid until the next call to Next. func (r *OCFReader) Record() arrow.Record { return r.cur } // Metrics returns the maximum queue depth of the Avro record read cache and of the // converted Arrow record cache. func (r *OCFReader) Metrics() string { return fmt.Sprintf("Max. OCF queue depth: %d/%d Max. record queue depth: %d/%d", r.maxOCF, r.avroChanSize, r.maxRec, r.recChanSize) } // OCFRecordsReadCount returns the number of Avro datum that were read from the Avro file. func (r *OCFReader) OCFRecordsReadCount() int64 { return r.avroDatumCount } // Close closes the OCFReader's Avro record read cache and converted Arrow record cache. OCFReader must // be closed if the Avro OCF's records have not been read to completion. func (r *OCFReader) Close() { r.readCancel() r.err = r.readerCtx.Err() } func (r *OCFReader) editAvroSchema(e schemaEdit) error { var err error switch e.method { case "set": r.avroSchema, err = sjson.Set(r.avroSchema, e.path, e.value) if err != nil { return fmt.Errorf("%w: schema edit 'set %s = %v' failure - %v", arrow.ErrInvalid, e.path, e.value, err) } case "delete": r.avroSchema, err = sjson.Delete(r.avroSchema, e.path) if err != nil { return fmt.Errorf("%w: schema edit 'delete' failure - %v", arrow.ErrInvalid, err) } default: return fmt.Errorf("%w: schema edit method must be 'set' or 'delete'", arrow.ErrInvalid) } return nil } // Next returns whether a Record can be received from the converted record queue. // The user should check Err() after call to Next that return false to check // if an error took place. func (r *OCFReader) Next() bool { if r.cur != nil { r.cur.Release() r.cur = nil } if r.maxOCF < len(r.avroChan) { r.maxOCF = len(r.avroChan) } if r.maxRec < len(r.recChan) { r.maxRec = len(r.recChan) } select { case r.cur = <-r.recChan: case <-r.bldDone: if len(r.recChan) > 0 { r.cur = <-r.recChan } } if r.err != nil { return false } return r.cur != nil } // WithAllocator specifies the Arrow memory allocator used while building records. func WithAllocator(mem memory.Allocator) Option { return func(cfg config) { cfg.mem = mem } } // WithReadCacheSize specifies the size of the OCF record decode queue, default value // is 500. func WithReadCacheSize(n int) Option { return func(cfg config) { if n < 1 { cfg.avroChanSize = 500 } else { cfg.avroChanSize = n } } } // WithRecordCacheSize specifies the size of the converted Arrow record queue, default // value is 1. func WithRecordCacheSize(n int) Option { return func(cfg config) { if n < 1 { cfg.recChanSize = 1 } else { cfg.recChanSize = n } } } // WithSchemaEdit specifies modifications to the Avro schema. Supported methods are 'set' and // 'delete'. Set sets the value for the specified path. Delete deletes the value for the specified path. // A path is in dot syntax, such as "fields.1" or "fields.0.type". The modified Avro schema is // validated before conversion to Arrow schema - NewOCFReader will return an error if the modified schema // cannot be parsed. func WithSchemaEdit(method, path string, value any) Option { return func(cfg config) { var e schemaEdit e.method = method e.path = path e.value = value cfg.avroSchemaEdits = append(cfg.avroSchemaEdits, e) } } // WithChunk specifies the chunk size used while reading Avro OCF files. // // If n is zero or 1, no chunking will take place and the reader will create // one record per row. // If n is greater than 1, chunks of n rows will be read. // If n is negative, the reader will load the whole Avro OCF file into memory and // create one big record with all the rows. func WithChunk(n int) Option { return func(cfg config) { cfg.chunk = n } } // Retain increases the reference count by 1. // Retain may be called simultaneously from multiple goroutines. func (r *OCFReader) Retain() { atomic.AddInt64(&r.refs, 1) } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. // Release may be called simultaneously from multiple goroutines. func (r *OCFReader) Release() { debug.Assert(atomic.LoadInt64(&r.refs) > 0, "too many releases") if atomic.AddInt64(&r.refs, -1) == 0 { if r.cur != nil { r.cur.Release() } } } var _ array.RecordReader = (*OCFReader)(nil) arrow-go-18.2.0/arrow/avro/reader_test.go000066400000000000000000000200001476434502500202720ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package avro import ( "fmt" "testing" "github.com/apache/arrow-go/v18/arrow" hamba "github.com/hamba/avro/v2" ) func TestEditSchemaStringEqual(t *testing.T) { tests := []struct { avroSchema string arrowSchema []arrow.Field }{ { avroSchema: `{ "fields": [ { "name": "inheritNull", "type": { "name": "Simple", "symbols": [ "a", "b" ], "type": "enum" } }, { "name": "explicitNamespace", "type": { "name": "test", "namespace": "org.hamba.avro", "size": 12, "type": "fixed" } }, { "name": "fullName", "type": { "type": "record", "name": "fullName_data", "namespace": "ignored", "doc": "A name attribute with a fullname, so the namespace attribute is ignored. The fullname is 'a.full.Name', and the namespace is 'a.full'.", "fields": [{ "name": "inheritNamespace", "type": { "type": "enum", "name": "Understanding", "doc": "A simple name (attribute) and no namespace attribute: inherit the namespace of the enclosing type 'a.full.Name'. The fullname is 'a.full.Understanding'.", "symbols": ["d", "e"] } }, { "name": "md5", "type": { "name": "md5_data", "type": "fixed", "size": 16, "namespace": "ignored" } } ] } }, { "name": "id", "type": "int" }, { "name": "bigId", "type": "long" }, { "name": "temperature", "type": [ "null", "float" ] }, { "name": "fraction", "type": [ "null", "double" ] }, { "name": "is_emergency", "type": "boolean" }, { "name": "remote_ip", "type": [ "null", "bytes" ] }, { "name": "person", "type": { "fields": [ { "name": "lastname", "type": "string" }, { "name": "address", "type": { "fields": [ { "name": "streetaddress", "type": "string" }, { "name": "city", "type": "string" } ], "name": "AddressUSRecord", "type": "record" } }, { "name": "mapfield", "type": { "default": { }, "type": "map", "values": "long" } }, { "name": "arrayField", "type": { "default": [ ], "items": "string", "type": "array" } } ], "name": "person_data", "type": "record" } }, { "name": "decimalField", "type": { "logicalType": "decimal", "precision": 4, "scale": 2, "type": "bytes" } }, { "logicalType": "uuid", "name": "uuidField", "type": "string" }, { "name": "timemillis", "type": { "type": "int", "logicalType": "time-millis" } }, { "name": "timemicros", "type": { "type": "long", "logicalType": "time-micros" } }, { "name": "timestampmillis", "type": { "type": "long", "logicalType": "timestamp-millis" } }, { "name": "timestampmicros", "type": { "type": "long", "logicalType": "timestamp-micros" } }, { "name": "duration", "type": { "name": "duration", "namespace": "whyowhy", "logicalType": "duration", "size": 12, "type": "fixed" } }, { "name": "date", "type": { "logicalType": "date", "type": "int" } } ], "name": "Example", "type": "record" }`, arrowSchema: []arrow.Field{ { Name: "explicitNamespace", Type: &arrow.FixedSizeBinaryType{ByteWidth: 12}, }, { Name: "fullName", Type: arrow.StructOf( arrow.Field{ Name: "inheritNamespace", Type: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: arrow.BinaryTypes.String, Ordered: false}, }, arrow.Field{ Name: "md5", Type: &arrow.FixedSizeBinaryType{ByteWidth: 16}, }, ), }, { Name: "id", Type: arrow.PrimitiveTypes.Int32, }, { Name: "bigId", Type: arrow.PrimitiveTypes.Int64, }, { Name: "temperature", Type: arrow.PrimitiveTypes.Float32, Nullable: true, }, { Name: "fraction", Type: arrow.PrimitiveTypes.Float64, Nullable: true, }, { Name: "is_emergency", Type: arrow.FixedWidthTypes.Boolean, }, { Name: "remote_ip", Type: arrow.BinaryTypes.Binary, Nullable: true, }, { Name: "person", Type: arrow.StructOf( arrow.Field{ Name: "lastname", Type: arrow.BinaryTypes.String, }, arrow.Field{ Name: "address", Type: arrow.StructOf( arrow.Field{ Name: "streetaddress", Type: arrow.BinaryTypes.String, }, arrow.Field{ Name: "city", Type: arrow.BinaryTypes.String, }, ), }, arrow.Field{ Name: "mapfield", Type: arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int64), Nullable: true, }, arrow.Field{ Name: "arrayField", Type: arrow.ListOfNonNullable(arrow.BinaryTypes.String), }, ), }, { Name: "decimalField", Type: &arrow.Decimal128Type{Precision: 4, Scale: 2}, }, { Name: "uuidField", Type: arrow.BinaryTypes.String, }, { Name: "timemillis", Type: arrow.FixedWidthTypes.Time32ms, }, { Name: "timemicros", Type: arrow.FixedWidthTypes.Time64us, }, { Name: "timestampmillis", Type: arrow.FixedWidthTypes.Timestamp_ms, }, { Name: "timestampmicros", Type: arrow.FixedWidthTypes.Timestamp_us, }, { Name: "duration", Type: arrow.FixedWidthTypes.MonthDayNanoInterval, }, { Name: "date", Type: arrow.FixedWidthTypes.Date32, }, }, }, } for _, test := range tests { t.Run("", func(t *testing.T) { want := arrow.NewSchema(test.arrowSchema, nil) schema, err := hamba.ParseBytes([]byte(test.avroSchema)) if err != nil { t.Fatalf("%v", err) } r := new(OCFReader) r.avroSchema = schema.String() r.editAvroSchema(schemaEdit{method: "delete", path: "fields.0"}) schema, err = hamba.Parse(r.avroSchema) if err != nil { t.Fatalf("%v: could not parse modified avro schema", arrow.ErrInvalid) } got, err := ArrowSchemaFromAvro(schema) if err != nil { t.Fatalf("%v", err) } if !(fmt.Sprintf("%+v", want.String()) == fmt.Sprintf("%+v", got.String())) { t.Fatalf("got=%v,\n want=%v", got.String(), want.String()) } else { t.Logf("schema.String() comparison passed") } }) } } arrow-go-18.2.0/arrow/avro/reader_types.go000066400000000000000000000500431476434502500204710ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package avro import ( "bytes" "encoding/binary" "errors" "fmt" "math/big" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/decimal128" "github.com/apache/arrow-go/v18/arrow/decimal256" "github.com/apache/arrow-go/v18/arrow/extensions" "github.com/apache/arrow-go/v18/arrow/memory" ) type dataLoader struct { idx, depth int32 list *fieldPos item *fieldPos mapField *fieldPos mapKey *fieldPos mapValue *fieldPos fields []*fieldPos children []*dataLoader } var ( ErrNullStructData = errors.New("null struct data") ) func newDataLoader() *dataLoader { return &dataLoader{idx: 0, depth: 0} } // drawTree takes the tree of field builders produced by mapFieldBuilders() // and produces another tree structure and aggregates fields whose values can // be retrieved from a `map[string]any` into a slice of builders, and creates a hierarchy to // deal with nested types (lists and maps). func (d *dataLoader) drawTree(field *fieldPos) { for _, f := range field.children() { if f.isList || f.isMap { if f.isList { c := d.newListChild(f) if !f.childrens[0].isList { c.item = f.childrens[0] c.drawTree(f.childrens[0]) } else { c.drawTree(f.childrens[0].childrens[0]) } } if f.isMap { c := d.newMapChild(f) if !arrow.IsNested(f.childrens[1].builder.Type().ID()) { c.mapKey = f.childrens[0] c.mapValue = f.childrens[1] } else { c.mapKey = f.childrens[0] m := c.newChild() m.mapValue = f.childrens[1] m.drawTree(f.childrens[1]) } } } else { d.fields = append(d.fields, f) if len(f.children()) > 0 { d.drawTree(f) } } } } // loadDatum loads decoded Avro data to the schema fields' builder functions. // Since array.StructBuilder.AppendNull() will recursively append null to all of the // struct's fields, in the case of nil being passed to a struct's builderFunc it will // return a ErrNullStructData error to signal that all its sub-fields can be skipped. func (d *dataLoader) loadDatum(data any) error { if d.list == nil && d.mapField == nil { if d.mapValue != nil { d.mapValue.appendFunc(data) } var NullParent *fieldPos for _, f := range d.fields { if f.parent == NullParent { continue } if d.mapValue == nil { err := f.appendFunc(f.getValue(data)) if err != nil { if err == ErrNullStructData { NullParent = f continue } return err } } else { switch dt := data.(type) { case nil: err := f.appendFunc(dt) if err != nil { if err == ErrNullStructData { NullParent = f continue } return err } case []any: if len(d.children) < 1 { for _, e := range dt { err := f.appendFunc(e) if err != nil { if err == ErrNullStructData { NullParent = f continue } return err } } } else { for _, e := range dt { d.children[0].loadDatum(e) } } case map[string]any: err := f.appendFunc(f.getValue(dt)) if err != nil { if err == ErrNullStructData { NullParent = f continue } return err } } } } for _, c := range d.children { if c.list != nil { c.loadDatum(c.list.getValue(data)) } if c.mapField != nil { switch dt := data.(type) { case nil: c.loadDatum(dt) case map[string]any: c.loadDatum(c.mapField.getValue(dt)) default: c.loadDatum(c.mapField.getValue(data)) } } } } else { if d.list != nil { switch dt := data.(type) { case nil: d.list.appendFunc(dt) case []any: d.list.appendFunc(dt) for _, e := range dt { if d.item != nil { d.item.appendFunc(e) } var NullParent *fieldPos for _, f := range d.fields { if f.parent == NullParent { continue } err := f.appendFunc(f.getValue(e)) if err != nil { if err == ErrNullStructData { NullParent = f continue } return err } } for _, c := range d.children { if c.list != nil { c.loadDatum(c.list.getValue(e)) } if c.mapField != nil { c.loadDatum(c.mapField.getValue(e)) } } } case map[string]any: d.list.appendFunc(dt["array"]) for _, e := range dt["array"].([]any) { if d.item != nil { d.item.appendFunc(e) } var NullParent *fieldPos for _, f := range d.fields { if f.parent == NullParent { continue } err := f.appendFunc(f.getValue(e)) if err != nil { if err == ErrNullStructData { NullParent = f continue } return err } } for _, c := range d.children { c.loadDatum(c.list.getValue(e)) } } default: d.list.appendFunc(data) d.item.appendFunc(dt) } } if d.mapField != nil { switch dt := data.(type) { case nil: d.mapField.appendFunc(dt) case map[string]any: d.mapField.appendFunc(dt) for k, v := range dt { d.mapKey.appendFunc(k) if d.mapValue != nil { d.mapValue.appendFunc(v) } else { d.children[0].loadDatum(v) } } } } } return nil } func (d *dataLoader) newChild() *dataLoader { var child *dataLoader = &dataLoader{ depth: d.depth + 1, } d.children = append(d.children, child) return child } func (d *dataLoader) newListChild(list *fieldPos) *dataLoader { var child *dataLoader = &dataLoader{ list: list, item: list.childrens[0], depth: d.depth + 1, } d.children = append(d.children, child) return child } func (d *dataLoader) newMapChild(mapField *fieldPos) *dataLoader { var child *dataLoader = &dataLoader{ mapField: mapField, depth: d.depth + 1, } d.children = append(d.children, child) return child } type fieldPos struct { parent *fieldPos fieldName string builder array.Builder path []string isList bool isItem bool isStruct bool isMap bool typeName string appendFunc func(val interface{}) error metadatas arrow.Metadata childrens []*fieldPos index, depth int32 } func newFieldPos() *fieldPos { return &fieldPos{index: -1} } func (f *fieldPos) children() []*fieldPos { return f.childrens } func (f *fieldPos) newChild(childName string, childBuilder array.Builder, meta arrow.Metadata) *fieldPos { var child fieldPos = fieldPos{ parent: f, fieldName: childName, builder: childBuilder, metadatas: meta, index: int32(len(f.childrens)), depth: f.depth + 1, } if f.isList { child.isItem = true } child.path = child.buildNamePath() f.childrens = append(f.childrens, &child) return &child } func (f *fieldPos) buildNamePath() []string { var path []string var listPath []string cur := f for i := f.depth - 1; i >= 0; i-- { if cur.typeName == "" { path = append([]string{cur.fieldName}, path...) } else { path = append([]string{cur.fieldName, cur.typeName}, path...) } if !cur.parent.isMap { cur = cur.parent } } if f.parent.parent != nil && f.parent.parent.isList { for i := len(path) - 1; i >= 0; i-- { if path[i] != "item" { listPath = append([]string{path[i]}, listPath...) } else { return listPath } } } if f.parent != nil && f.parent.fieldName == "value" { for i := len(path) - 1; i >= 0; i-- { if path[i] != "value" { listPath = append([]string{path[i]}, listPath...) } else { return listPath } } } return path } // NamePath returns a slice of keys making up the path to the field func (f *fieldPos) namePath() []string { return f.path } // GetValue retrieves the value from the map[string]any // by following the field's key path func (f *fieldPos) getValue(m any) any { if _, ok := m.(map[string]any); !ok { return m } for _, key := range f.namePath() { valueMap, ok := m.(map[string]any) if !ok { if key == "item" { return m } return nil } m, ok = valueMap[key] if !ok { return nil } } return m } // Avro data is loaded to Arrow arrays using the following type mapping: // // Avro Go Arrow // null nil Null // boolean bool Boolean // bytes []byte Binary // float float32 Float32 // double float64 Float64 // long int64 Int64 // int int32 Int32 // string string String // array []interface{} List // enum string Dictionary // fixed []byte FixedSizeBinary // map and record map[string]any Struct // // mapFieldBuilders builds a tree of field builders matching the Arrow schema func mapFieldBuilders(b array.Builder, field arrow.Field, parent *fieldPos) { f := parent.newChild(field.Name, b, field.Metadata) switch bt := b.(type) { case *array.BinaryBuilder: f.appendFunc = func(data interface{}) error { appendBinaryData(bt, data) return nil } case *array.BinaryDictionaryBuilder: // has metadata for Avro enum symbols f.appendFunc = func(data interface{}) error { appendBinaryDictData(bt, data) return nil } // add Avro enum symbols to builder sb := array.NewStringBuilder(memory.DefaultAllocator) for _, v := range field.Metadata.Values() { sb.Append(v) } sa := sb.NewStringArray() bt.InsertStringDictValues(sa) case *array.BooleanBuilder: f.appendFunc = func(data interface{}) error { appendBoolData(bt, data) return nil } case *array.Date32Builder: f.appendFunc = func(data interface{}) error { appendDate32Data(bt, data) return nil } case *array.Decimal128Builder: f.appendFunc = func(data interface{}) error { err := appendDecimal128Data(bt, data) if err != nil { return err } return nil } case *array.Decimal256Builder: f.appendFunc = func(data interface{}) error { err := appendDecimal256Data(bt, data) if err != nil { return err } return nil } case *extensions.UUIDBuilder: f.appendFunc = func(data interface{}) error { switch dt := data.(type) { case nil: bt.AppendNull() case string: err := bt.AppendValueFromString(dt) if err != nil { return err } case []byte: err := bt.AppendValueFromString(string(dt)) if err != nil { return err } } return nil } case *array.FixedSizeBinaryBuilder: f.appendFunc = func(data interface{}) error { appendFixedSizeBinaryData(bt, data) return nil } case *array.Float32Builder: f.appendFunc = func(data interface{}) error { appendFloat32Data(bt, data) return nil } case *array.Float64Builder: f.appendFunc = func(data interface{}) error { appendFloat64Data(bt, data) return nil } case *array.Int32Builder: f.appendFunc = func(data interface{}) error { appendInt32Data(bt, data) return nil } case *array.Int64Builder: f.appendFunc = func(data interface{}) error { appendInt64Data(bt, data) return nil } case *array.LargeListBuilder: vb := bt.ValueBuilder() f.isList = true mapFieldBuilders(vb, field.Type.(*arrow.LargeListType).ElemField(), f) f.appendFunc = func(data interface{}) error { switch dt := data.(type) { case nil: bt.AppendNull() case []interface{}: if len(dt) == 0 { bt.AppendEmptyValue() } else { bt.Append(true) } default: bt.Append(true) } return nil } case *array.ListBuilder: vb := bt.ValueBuilder() f.isList = true mapFieldBuilders(vb, field.Type.(*arrow.ListType).ElemField(), f) f.appendFunc = func(data interface{}) error { switch dt := data.(type) { case nil: bt.AppendNull() case []interface{}: if len(dt) == 0 { bt.AppendEmptyValue() } else { bt.Append(true) } default: bt.Append(true) } return nil } case *array.MapBuilder: // has metadata for objects in values f.isMap = true kb := bt.KeyBuilder() ib := bt.ItemBuilder() mapFieldBuilders(kb, field.Type.(*arrow.MapType).KeyField(), f) mapFieldBuilders(ib, field.Type.(*arrow.MapType).ItemField(), f) f.appendFunc = func(data interface{}) error { switch data.(type) { case nil: bt.AppendNull() default: bt.Append(true) } return nil } case *array.MonthDayNanoIntervalBuilder: f.appendFunc = func(data interface{}) error { appendDurationData(bt, data) return nil } case *array.StringBuilder: f.appendFunc = func(data interface{}) error { appendStringData(bt, data) return nil } case *array.StructBuilder: // has metadata for Avro Union named types f.typeName, _ = field.Metadata.GetValue("typeName") f.isStruct = true // create children for i, p := range field.Type.(*arrow.StructType).Fields() { mapFieldBuilders(bt.FieldBuilder(i), p, f) } f.appendFunc = func(data interface{}) error { switch data.(type) { case nil: bt.AppendNull() return ErrNullStructData default: bt.Append(true) } return nil } case *array.Time32Builder: f.appendFunc = func(data interface{}) error { appendTime32Data(bt, data) return nil } case *array.Time64Builder: f.appendFunc = func(data interface{}) error { appendTime64Data(bt, data) return nil } case *array.TimestampBuilder: f.appendFunc = func(data interface{}) error { appendTimestampData(bt, data) return nil } } } func appendBinaryData(b *array.BinaryBuilder, data interface{}) { switch dt := data.(type) { case nil: b.AppendNull() case map[string]any: switch ct := dt["bytes"].(type) { case nil: b.AppendNull() default: b.Append(ct.([]byte)) } default: b.Append(fmt.Append([]byte{}, data)) } } func appendBinaryDictData(b *array.BinaryDictionaryBuilder, data interface{}) { switch dt := data.(type) { case nil: b.AppendNull() case string: b.AppendString(dt) case map[string]any: switch v := dt["string"].(type) { case nil: b.AppendNull() case string: b.AppendString(v) } } } func appendBoolData(b *array.BooleanBuilder, data interface{}) { switch dt := data.(type) { case nil: b.AppendNull() case bool: b.Append(dt) case map[string]any: switch v := dt["boolean"].(type) { case nil: b.AppendNull() case bool: b.Append(v) } } } func appendDate32Data(b *array.Date32Builder, data interface{}) { switch dt := data.(type) { case nil: b.AppendNull() case int32: b.Append(arrow.Date32(dt)) case map[string]any: switch v := dt["int"].(type) { case nil: b.AppendNull() case int32: b.Append(arrow.Date32(v)) } } } func appendDecimal128Data(b *array.Decimal128Builder, data interface{}) error { switch dt := data.(type) { case nil: b.AppendNull() case []byte: buf := bytes.NewBuffer(dt) if len(dt) <= 38 { var intData int64 err := binary.Read(buf, binary.BigEndian, &intData) if err != nil { return err } b.Append(decimal128.FromI64(intData)) } else { var bigIntData big.Int b.Append(decimal128.FromBigInt(bigIntData.SetBytes(buf.Bytes()))) } case map[string]any: buf := bytes.NewBuffer(dt["bytes"].([]byte)) if len(dt["bytes"].([]byte)) <= 38 { var intData int64 err := binary.Read(buf, binary.BigEndian, &intData) if err != nil { return err } b.Append(decimal128.FromI64(intData)) } else { var bigIntData big.Int b.Append(decimal128.FromBigInt(bigIntData.SetBytes(buf.Bytes()))) } } return nil } func appendDecimal256Data(b *array.Decimal256Builder, data interface{}) error { switch dt := data.(type) { case nil: b.AppendNull() case []byte: var bigIntData big.Int buf := bytes.NewBuffer(dt) b.Append(decimal256.FromBigInt(bigIntData.SetBytes(buf.Bytes()))) case map[string]any: var bigIntData big.Int buf := bytes.NewBuffer(dt["bytes"].([]byte)) b.Append(decimal256.FromBigInt(bigIntData.SetBytes(buf.Bytes()))) } return nil } // Avro duration logical type annotates Avro fixed type of size 12, which stores three little-endian // unsigned integers that represent durations at different granularities of time. The first stores // a number in months, the second stores a number in days, and the third stores a number in milliseconds. func appendDurationData(b *array.MonthDayNanoIntervalBuilder, data interface{}) { switch dt := data.(type) { case nil: b.AppendNull() case []byte: dur := new(arrow.MonthDayNanoInterval) dur.Months = int32(binary.LittleEndian.Uint16(dt[:3])) dur.Days = int32(binary.LittleEndian.Uint16(dt[4:7])) dur.Nanoseconds = int64(binary.LittleEndian.Uint32(dt[8:]) * 1000000) b.Append(*dur) case map[string]any: switch dtb := dt["bytes"].(type) { case nil: b.AppendNull() case []byte: dur := new(arrow.MonthDayNanoInterval) dur.Months = int32(binary.LittleEndian.Uint16(dtb[:3])) dur.Days = int32(binary.LittleEndian.Uint16(dtb[4:7])) dur.Nanoseconds = int64(binary.LittleEndian.Uint32(dtb[8:]) * 1000000) b.Append(*dur) } } } func appendFixedSizeBinaryData(b *array.FixedSizeBinaryBuilder, data interface{}) { switch dt := data.(type) { case nil: b.AppendNull() case []byte: b.Append(dt) case map[string]any: switch v := dt["bytes"].(type) { case nil: b.AppendNull() case []byte: b.Append(v) } } } func appendFloat32Data(b *array.Float32Builder, data interface{}) { switch dt := data.(type) { case nil: b.AppendNull() case float32: b.Append(dt) case map[string]any: switch v := dt["float"].(type) { case nil: b.AppendNull() case float32: b.Append(v) } } } func appendFloat64Data(b *array.Float64Builder, data interface{}) { switch dt := data.(type) { case nil: b.AppendNull() case float64: b.Append(dt) case map[string]any: switch v := dt["double"].(type) { case nil: b.AppendNull() case float64: b.Append(v) } } } func appendInt32Data(b *array.Int32Builder, data interface{}) { switch dt := data.(type) { case nil: b.AppendNull() case int: b.Append(int32(dt)) case int32: b.Append(dt) case map[string]any: switch v := dt["int"].(type) { case nil: b.AppendNull() case int: b.Append(int32(v)) case int32: b.Append(v) } } } func appendInt64Data(b *array.Int64Builder, data interface{}) { switch dt := data.(type) { case nil: b.AppendNull() case int: b.Append(int64(dt)) case int64: b.Append(dt) case map[string]any: switch v := dt["long"].(type) { case nil: b.AppendNull() case int: b.Append(int64(v)) case int64: b.Append(v) } } } func appendStringData(b *array.StringBuilder, data interface{}) { switch dt := data.(type) { case nil: b.AppendNull() case string: b.Append(dt) case map[string]any: switch v := dt["string"].(type) { case nil: b.AppendNull() case string: b.Append(v) } default: b.Append(fmt.Sprint(data)) } } func appendTime32Data(b *array.Time32Builder, data interface{}) { switch dt := data.(type) { case nil: b.AppendNull() case int32: b.Append(arrow.Time32(dt)) case map[string]any: switch v := dt["int"].(type) { case nil: b.AppendNull() case int32: b.Append(arrow.Time32(v)) } } } func appendTime64Data(b *array.Time64Builder, data interface{}) { switch dt := data.(type) { case nil: b.AppendNull() case int64: b.Append(arrow.Time64(dt)) case map[string]any: switch v := dt["long"].(type) { case nil: b.AppendNull() case int64: b.Append(arrow.Time64(v)) } } } func appendTimestampData(b *array.TimestampBuilder, data interface{}) { switch dt := data.(type) { case nil: b.AppendNull() case int64: b.Append(arrow.Timestamp(dt)) case map[string]any: switch v := dt["long"].(type) { case nil: b.AppendNull() case int64: b.Append(arrow.Timestamp(v)) } } } arrow-go-18.2.0/arrow/avro/schema.go000066400000000000000000000401561476434502500172470ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Package avro reads Avro OCF files and presents the extracted data as records package avro import ( "fmt" "math" "strconv" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/decimal128" "github.com/apache/arrow-go/v18/arrow/extensions" "github.com/apache/arrow-go/v18/internal/utils" avro "github.com/hamba/avro/v2" ) type schemaNode struct { name string parent *schemaNode schema avro.Schema union bool nullable bool childrens []*schemaNode arrowField arrow.Field schemaCache *avro.SchemaCache index, depth int32 } func newSchemaNode() *schemaNode { var schemaCache avro.SchemaCache return &schemaNode{name: "", index: -1, schemaCache: &schemaCache} } func (node *schemaNode) schemaPath() string { var path string n := node for n.parent != nil { path = "." + n.name + path n = n.parent } return path } func (node *schemaNode) newChild(n string, s avro.Schema) *schemaNode { child := &schemaNode{ name: n, parent: node, schema: s, schemaCache: node.schemaCache, index: int32(len(node.childrens)), depth: node.depth + 1, } node.childrens = append(node.childrens, child) return child } func (node *schemaNode) children() []*schemaNode { return node.childrens } // func (node *schemaNode) nodeName() string { return node.name } // ArrowSchemaFromAvro returns a new Arrow schema from an Avro schema func ArrowSchemaFromAvro(schema avro.Schema) (s *arrow.Schema, err error) { defer func() { if r := recover(); r != nil { s = nil err = utils.FormatRecoveredError("invalid avro schema", r) } }() n := newSchemaNode() n.schema = schema c := n.newChild(n.schema.(avro.NamedSchema).Name(), n.schema) arrowSchemafromAvro(c) var fields []arrow.Field for _, g := range c.children() { fields = append(fields, g.arrowField) } s = arrow.NewSchema(fields, nil) return s, nil } func arrowSchemafromAvro(n *schemaNode) { if ns, ok := n.schema.(avro.NamedSchema); ok { n.schemaCache.Add(ns.Name(), ns) } switch st := n.schema.Type(); st { case "record": iterateFields(n) case "enum": n.schemaCache.Add(n.schema.(avro.NamedSchema).Name(), n.schema.(*avro.EnumSchema)) symbols := make(map[string]string) for index, symbol := range n.schema.(avro.PropertySchema).(*avro.EnumSchema).Symbols() { k := strconv.FormatInt(int64(index), 10) symbols[k] = symbol } var dt arrow.DictionaryType = arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint64, ValueType: arrow.BinaryTypes.String, Ordered: false} sl := int64(len(symbols)) switch { case sl <= math.MaxUint8: dt.IndexType = arrow.PrimitiveTypes.Uint8 case sl > math.MaxUint8 && sl <= math.MaxUint16: dt.IndexType = arrow.PrimitiveTypes.Uint16 case sl > math.MaxUint16 && sl <= math.MaxUint32: dt.IndexType = arrow.PrimitiveTypes.Uint32 } n.arrowField = buildArrowField(n, &dt, arrow.MetadataFrom(symbols)) case "array": // logical items type c := n.newChild(n.name, n.schema.(*avro.ArraySchema).Items()) if isLogicalSchemaType(n.schema.(*avro.ArraySchema).Items()) { avroLogicalToArrowField(c) } else { arrowSchemafromAvro(c) } switch c.arrowField.Nullable { case true: n.arrowField = arrow.Field{Name: n.name, Type: arrow.ListOfField(c.arrowField), Metadata: c.arrowField.Metadata} case false: n.arrowField = arrow.Field{Name: n.name, Type: arrow.ListOfNonNullable(c.arrowField.Type), Metadata: c.arrowField.Metadata} } case "map": n.schemaCache.Add(n.schema.(*avro.MapSchema).Values().(avro.NamedSchema).Name(), n.schema.(*avro.MapSchema).Values()) c := n.newChild(n.name, n.schema.(*avro.MapSchema).Values()) arrowSchemafromAvro(c) n.arrowField = buildArrowField(n, arrow.MapOf(arrow.BinaryTypes.String, c.arrowField.Type), c.arrowField.Metadata) case "union": if n.schema.(*avro.UnionSchema).Nullable() { if len(n.schema.(*avro.UnionSchema).Types()) > 1 { n.schema = n.schema.(*avro.UnionSchema).Types()[1] n.union = true n.nullable = true arrowSchemafromAvro(n) } } // Avro "fixed" field type = Arrow FixedSize Primitive BinaryType case "fixed": n.schemaCache.Add(n.schema.(avro.NamedSchema).Name(), n.schema.(*avro.FixedSchema)) if isLogicalSchemaType(n.schema) { avroLogicalToArrowField(n) } else { n.arrowField = buildArrowField(n, &arrow.FixedSizeBinaryType{ByteWidth: n.schema.(*avro.FixedSchema).Size()}, arrow.Metadata{}) } case "string", "bytes", "int", "long": if isLogicalSchemaType(n.schema) { avroLogicalToArrowField(n) } else { n.arrowField = buildArrowField(n, avroPrimitiveToArrowType(string(st)), arrow.Metadata{}) } case "float", "double", "boolean": n.arrowField = arrow.Field{Name: n.name, Type: avroPrimitiveToArrowType(string(st)), Nullable: n.nullable} case "": refSchema := n.schemaCache.Get(string(n.schema.(*avro.RefSchema).Schema().Name())) if refSchema == nil { panic(fmt.Errorf("could not find schema for '%v' in schema cache - %v", n.schemaPath(), n.schema.(*avro.RefSchema).Schema().Name())) } n.schema = refSchema arrowSchemafromAvro(n) case "null": n.schemaCache.Add(n.schema.(*avro.MapSchema).Values().(avro.NamedSchema).Name(), &avro.NullSchema{}) n.nullable = true n.arrowField = buildArrowField(n, arrow.Null, arrow.Metadata{}) } } // iterate record Fields() func iterateFields(n *schemaNode) { for _, f := range n.schema.(*avro.RecordSchema).Fields() { switch ft := f.Type().(type) { // Avro "array" field type case *avro.ArraySchema: n.schemaCache.Add(f.Name(), ft.Items()) // logical items type c := n.newChild(f.Name(), ft.Items()) if isLogicalSchemaType(ft.Items()) { avroLogicalToArrowField(c) } else { arrowSchemafromAvro(c) } switch c.arrowField.Nullable { case true: c.arrowField = arrow.Field{Name: c.name, Type: arrow.ListOfField(c.arrowField), Metadata: c.arrowField.Metadata} case false: c.arrowField = arrow.Field{Name: c.name, Type: arrow.ListOfNonNullable(c.arrowField.Type), Metadata: c.arrowField.Metadata} } // Avro "enum" field type = Arrow dictionary type case *avro.EnumSchema: n.schemaCache.Add(f.Type().(*avro.EnumSchema).Name(), f.Type()) c := n.newChild(f.Name(), f.Type()) symbols := make(map[string]string) for index, symbol := range ft.Symbols() { k := strconv.FormatInt(int64(index), 10) symbols[k] = symbol } var dt arrow.DictionaryType = arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint64, ValueType: arrow.BinaryTypes.String, Ordered: false} sl := len(symbols) switch { case sl <= math.MaxUint8: dt.IndexType = arrow.PrimitiveTypes.Uint8 case sl > math.MaxUint8 && sl <= math.MaxUint16: dt.IndexType = arrow.PrimitiveTypes.Uint16 case sl > math.MaxUint16 && sl <= math.MaxInt: dt.IndexType = arrow.PrimitiveTypes.Uint32 } c.arrowField = buildArrowField(c, &dt, arrow.MetadataFrom(symbols)) // Avro "fixed" field type = Arrow FixedSize Primitive BinaryType case *avro.FixedSchema: n.schemaCache.Add(f.Name(), f.Type()) c := n.newChild(f.Name(), f.Type()) if isLogicalSchemaType(f.Type()) { avroLogicalToArrowField(c) } else { arrowSchemafromAvro(c) } case *avro.RecordSchema: n.schemaCache.Add(f.Name(), f.Type()) c := n.newChild(f.Name(), f.Type()) iterateFields(c) // Avro "map" field type - KVP with value of one type - keys are strings case *avro.MapSchema: n.schemaCache.Add(f.Name(), ft.Values()) c := n.newChild(f.Name(), ft.Values()) arrowSchemafromAvro(c) c.arrowField = buildArrowField(c, arrow.MapOf(arrow.BinaryTypes.String, c.arrowField.Type), c.arrowField.Metadata) case *avro.UnionSchema: if ft.Nullable() { if len(ft.Types()) > 1 { n.schemaCache.Add(f.Name(), ft.Types()[1]) c := n.newChild(f.Name(), ft.Types()[1]) c.union = true c.nullable = true arrowSchemafromAvro(c) } } default: n.schemaCache.Add(f.Name(), f.Type()) if isLogicalSchemaType(f.Type()) { c := n.newChild(f.Name(), f.Type()) avroLogicalToArrowField(c) } else { c := n.newChild(f.Name(), f.Type()) arrowSchemafromAvro(c) } } } var fields []arrow.Field for _, child := range n.children() { fields = append(fields, child.arrowField) } namedSchema, ok := isNamedSchema(n.schema) var md arrow.Metadata if ok && namedSchema != n.name+"_data" && n.union { md = arrow.NewMetadata([]string{"typeName"}, []string{namedSchema}) } n.arrowField = buildArrowField(n, arrow.StructOf(fields...), md) } func isLogicalSchemaType(s avro.Schema) bool { lts, ok := s.(avro.LogicalTypeSchema) if !ok { return false } if lts.Logical() != nil { return true } return false } func isNamedSchema(s avro.Schema) (string, bool) { if ns, ok := s.(avro.NamedSchema); ok { return ns.FullName(), ok } return "", false } func buildArrowField(n *schemaNode, t arrow.DataType, m arrow.Metadata) arrow.Field { return arrow.Field{ Name: n.name, Type: t, Metadata: m, Nullable: n.nullable, } } // Avro primitive type. // // NOTE: Arrow Binary type is used as a catchall to avoid potential data loss. func avroPrimitiveToArrowType(avroFieldType string) arrow.DataType { switch avroFieldType { // int: 32-bit signed integer case "int": return arrow.PrimitiveTypes.Int32 // long: 64-bit signed integer case "long": return arrow.PrimitiveTypes.Int64 // float: single precision (32-bit) IEEE 754 floating-point number case "float": return arrow.PrimitiveTypes.Float32 // double: double precision (64-bit) IEEE 754 floating-point number case "double": return arrow.PrimitiveTypes.Float64 // bytes: sequence of 8-bit unsigned bytes case "bytes": return arrow.BinaryTypes.Binary // boolean: a binary value case "boolean": return arrow.FixedWidthTypes.Boolean // string: unicode character sequence case "string": return arrow.BinaryTypes.String } return nil } func avroLogicalToArrowField(n *schemaNode) { var dt arrow.DataType // Avro logical types switch lt := n.schema.(avro.LogicalTypeSchema).Logical(); lt.Type() { // The decimal logical type represents an arbitrary-precision signed decimal number of the form unscaled × 10-scale. // A decimal logical type annotates Avro bytes or fixed types. The byte array must contain the two’s-complement // representation of the unscaled integer value in big-endian byte order. The scale is fixed, and is specified // using an attribute. // // The following attributes are supported: // scale, a JSON integer representing the scale (optional). If not specified the scale is 0. // precision, a JSON integer representing the (maximum) precision of decimals stored in this type (required). case "decimal": id := arrow.DECIMAL128 if lt.(*avro.DecimalLogicalSchema).Precision() > decimal128.MaxPrecision { id = arrow.DECIMAL256 } dt, _ = arrow.NewDecimalType(id, int32(lt.(*avro.DecimalLogicalSchema).Precision()), int32(lt.(*avro.DecimalLogicalSchema).Scale())) // The uuid logical type represents a random generated universally unique identifier (UUID). // A uuid logical type annotates an Avro string. The string has to conform with RFC-4122 case "uuid": dt = extensions.NewUUIDType() // The date logical type represents a date within the calendar, with no reference to a particular // time zone or time of day. // A date logical type annotates an Avro int, where the int stores the number of days from the unix epoch, // 1 January 1970 (ISO calendar). case "date": dt = arrow.FixedWidthTypes.Date32 // The time-millis logical type represents a time of day, with no reference to a particular calendar, // time zone or date, with a precision of one millisecond. // A time-millis logical type annotates an Avro int, where the int stores the number of milliseconds // after midnight, 00:00:00.000. case "time-millis": dt = arrow.FixedWidthTypes.Time32ms // The time-micros logical type represents a time of day, with no reference to a particular calendar, // time zone or date, with a precision of one microsecond. // A time-micros logical type annotates an Avro long, where the long stores the number of microseconds // after midnight, 00:00:00.000000. case "time-micros": dt = arrow.FixedWidthTypes.Time64us // The timestamp-millis logical type represents an instant on the global timeline, independent of a // particular time zone or calendar, with a precision of one millisecond. Please note that time zone // information gets lost in this process. Upon reading a value back, we can only reconstruct the instant, // but not the original representation. In practice, such timestamps are typically displayed to users in // their local time zones, therefore they may be displayed differently depending on the execution environment. // A timestamp-millis logical type annotates an Avro long, where the long stores the number of milliseconds // from the unix epoch, 1 January 1970 00:00:00.000 UTC. case "timestamp-millis": dt = arrow.FixedWidthTypes.Timestamp_ms // The timestamp-micros logical type represents an instant on the global timeline, independent of a // particular time zone or calendar, with a precision of one microsecond. Please note that time zone // information gets lost in this process. Upon reading a value back, we can only reconstruct the instant, // but not the original representation. In practice, such timestamps are typically displayed to users // in their local time zones, therefore they may be displayed differently depending on the execution environment. // A timestamp-micros logical type annotates an Avro long, where the long stores the number of microseconds // from the unix epoch, 1 January 1970 00:00:00.000000 UTC. case "timestamp-micros": dt = arrow.FixedWidthTypes.Timestamp_us // The local-timestamp-millis logical type represents a timestamp in a local timezone, regardless of // what specific time zone is considered local, with a precision of one millisecond. // A local-timestamp-millis logical type annotates an Avro long, where the long stores the number of // milliseconds, from 1 January 1970 00:00:00.000. // Note: not implemented in hamba/avro // case "local-timestamp-millis": // dt = &arrow.TimestampType{Unit: arrow.Millisecond} // The local-timestamp-micros logical type represents a timestamp in a local timezone, regardless of // what specific time zone is considered local, with a precision of one microsecond. // A local-timestamp-micros logical type annotates an Avro long, where the long stores the number of // microseconds, from 1 January 1970 00:00:00.000000. // case "local-timestamp-micros": // Note: not implemented in hamba/avro // dt = &arrow.TimestampType{Unit: arrow.Microsecond} // The duration logical type represents an amount of time defined by a number of months, days and milliseconds. // This is not equivalent to a number of milliseconds, because, depending on the moment in time from which the // duration is measured, the number of days in the month and number of milliseconds in a day may differ. Other // standard periods such as years, quarters, hours and minutes can be expressed through these basic periods. // A duration logical type annotates Avro fixed type of size 12, which stores three little-endian unsigned integers // that represent durations at different granularities of time. The first stores a number in months, the second // stores a number in days, and the third stores a number in milliseconds. case "duration": dt = arrow.FixedWidthTypes.MonthDayNanoInterval } n.arrowField = buildArrowField(n, dt, arrow.Metadata{}) } arrow-go-18.2.0/arrow/avro/schema_test.go000066400000000000000000000200061476434502500202760ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package avro import ( "fmt" "testing" "github.com/apache/arrow-go/v18/arrow" hamba "github.com/hamba/avro/v2" ) func TestSchemaStringEqual(t *testing.T) { tests := []struct { avroSchema string arrowSchema []arrow.Field }{ { avroSchema: `{ "fields": [ { "name": "inheritNull", "type": { "name": "Simple", "symbols": [ "a", "b" ], "type": "enum" } }, { "name": "explicitNamespace", "type": { "name": "test", "namespace": "org.hamba.avro", "size": 12, "type": "fixed" } }, { "name": "fullName", "type": { "type": "record", "name": "fullName_data", "namespace": "ignored", "doc": "A name attribute with a fullname, so the namespace attribute is ignored. The fullname is 'a.full.Name', and the namespace is 'a.full'.", "fields": [{ "name": "inheritNamespace", "type": { "type": "enum", "name": "Understanding", "doc": "A simple name (attribute) and no namespace attribute: inherit the namespace of the enclosing type 'a.full.Name'. The fullname is 'a.full.Understanding'.", "symbols": ["d", "e"] } }, { "name": "md5", "type": { "name": "md5_data", "type": "fixed", "size": 16, "namespace": "ignored" } } ] } }, { "name": "id", "type": "int" }, { "name": "bigId", "type": "long" }, { "name": "temperature", "type": [ "null", "float" ] }, { "name": "fraction", "type": [ "null", "double" ] }, { "name": "is_emergency", "type": "boolean" }, { "name": "remote_ip", "type": [ "null", "bytes" ] }, { "name": "person", "type": { "fields": [ { "name": "lastname", "type": "string" }, { "name": "address", "type": { "fields": [ { "name": "streetaddress", "type": "string" }, { "name": "city", "type": "string" } ], "name": "AddressUSRecord", "type": "record" } }, { "name": "mapfield", "type": { "default": { }, "type": "map", "values": "long" } }, { "name": "arrayField", "type": { "default": [ ], "items": "string", "type": "array" } } ], "name": "person_data", "type": "record" } }, { "name": "decimalField", "type": { "logicalType": "decimal", "precision": 4, "scale": 2, "type": "bytes" } }, { "logicalType": "uuid", "name": "uuidField", "type": "string" }, { "name": "timemillis", "type": { "type": "int", "logicalType": "time-millis" } }, { "name": "timemicros", "type": { "type": "long", "logicalType": "time-micros" } }, { "name": "timestampmillis", "type": { "type": "long", "logicalType": "timestamp-millis" } }, { "name": "timestampmicros", "type": { "type": "long", "logicalType": "timestamp-micros" } }, { "name": "duration", "type": { "name": "duration", "namespace": "whyowhy", "logicalType": "duration", "size": 12, "type": "fixed" } }, { "name": "date", "type": { "logicalType": "date", "type": "int" } } ], "name": "Example", "type": "record" }`, arrowSchema: []arrow.Field{ { Name: "inheritNull", Type: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: arrow.BinaryTypes.String, Ordered: false}, Metadata: arrow.MetadataFrom(map[string]string{"0": "a", "1": "b"}), }, { Name: "explicitNamespace", Type: &arrow.FixedSizeBinaryType{ByteWidth: 12}, }, { Name: "fullName", Type: arrow.StructOf( arrow.Field{ Name: "inheritNamespace", Type: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: arrow.BinaryTypes.String, Ordered: false}, }, arrow.Field{ Name: "md5", Type: &arrow.FixedSizeBinaryType{ByteWidth: 16}, }, ), }, { Name: "id", Type: arrow.PrimitiveTypes.Int32, }, { Name: "bigId", Type: arrow.PrimitiveTypes.Int64, }, { Name: "temperature", Type: arrow.PrimitiveTypes.Float32, Nullable: true, }, { Name: "fraction", Type: arrow.PrimitiveTypes.Float64, Nullable: true, }, { Name: "is_emergency", Type: arrow.FixedWidthTypes.Boolean, }, { Name: "remote_ip", Type: arrow.BinaryTypes.Binary, Nullable: true, }, { Name: "person", Type: arrow.StructOf( arrow.Field{ Name: "lastname", Type: arrow.BinaryTypes.String, Nullable: true, }, arrow.Field{ Name: "address", Type: arrow.StructOf( arrow.Field{ Name: "streetaddress", Type: arrow.BinaryTypes.String, }, arrow.Field{ Name: "city", Type: arrow.BinaryTypes.String, }, ), }, arrow.Field{ Name: "mapfield", Type: arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int64), Nullable: true, }, arrow.Field{ Name: "arrayField", Type: arrow.ListOfNonNullable(arrow.BinaryTypes.String), }, ), }, { Name: "decimalField", Type: &arrow.Decimal128Type{Precision: 4, Scale: 2}, }, { Name: "uuidField", Type: arrow.BinaryTypes.String, }, { Name: "timemillis", Type: arrow.FixedWidthTypes.Time32ms, }, { Name: "timemicros", Type: arrow.FixedWidthTypes.Time64us, }, { Name: "timestampmillis", Type: arrow.FixedWidthTypes.Timestamp_ms, }, { Name: "timestampmicros", Type: arrow.FixedWidthTypes.Timestamp_us, }, { Name: "duration", Type: arrow.FixedWidthTypes.MonthDayNanoInterval, }, { Name: "date", Type: arrow.FixedWidthTypes.Date32, }, }, }, } for _, test := range tests { t.Run("", func(t *testing.T) { want := arrow.NewSchema(test.arrowSchema, nil) schema, err := hamba.ParseBytes([]byte(test.avroSchema)) if err != nil { t.Fatalf("%v", err) } got, err := ArrowSchemaFromAvro(schema) if err != nil { t.Fatalf("%v", err) } if !(fmt.Sprintf("%+v", want.String()) == fmt.Sprintf("%+v", got.String())) { t.Fatalf("got=%v,\n want=%v", got.String(), want.String()) } else { t.Logf("schema.String() comparison passed") } }) } } arrow-go-18.2.0/arrow/avro/testdata/000077500000000000000000000000001476434502500172635ustar00rootroot00000000000000arrow-go-18.2.0/arrow/avro/testdata/arrayrecordmap.avro000066400000000000000000000011061476434502500231650ustar00rootroot00000000000000ObjÊavro.schemaŽ{"name":"org.hamba.avro.simple","type":"record","fields":[{"name":"array","type":{"type":"array","items":{"name":"org.hamba.avro.arrayrecord","type":"record","fields":[{"name":"a","type":"int"},{"name":"b","type":{"type":"array","items":"string"}},{"name":"map","type":{"type":"map","values":{"name":"org.hamba.avro.arrayrecordmap","type":"record","fields":[{"name":"number","type":"int"},{"name":"name","type":{"type":"array","items":"string"}}]}}}]}}}]}avro.codecnulloU*â²8×› §c·ã¥–LnhT bacontofuDarrayrecordmapÚÿ¢ jenny jennyoU*â²8×› §c·ã¥–Larrow-go-18.2.0/arrow/avro/testdata/githubsamplecommits.avro000066400000000000000000002716331476434502500242500ustar00rootroot00000000000000Objavro.codec snappyavro.schemaì#{"type":"record","name":"Root","fields":[{"name":"commit","type":["null","string"],"default":null},{"name":"tree","type":["null","string"],"default":null},{"name":"parent","type":{"type":"array","items":"string"}},{"name":"author","type":["null",{"type":"record","namespace":"root","name":"Author","fields":[{"name":"name","type":["null","string"],"default":null},{"name":"email","type":["null","string"],"default":null},{"name":"time_sec","type":["null","long"],"default":null},{"name":"tz_offset","type":["null","long"],"default":null},{"name":"date","type":["null",{"type":"long","logicalType":"timestamp-micros"}],"default":null}]}],"default":null},{"name":"committer","type":["null",{"type":"record","namespace":"root","name":"Committer","fields":[{"name":"name","type":["null","string"],"default":null},{"name":"email","type":["null","string"],"default":null},{"name":"time_sec","type":["null","long"],"default":null},{"name":"tz_offset","type":["null","long"],"default":null},{"name":"date","type":["null",{"type":"long","logicalType":"timestamp-micros"}],"default":null}]}],"default":null},{"name":"subject","type":["null","string"],"default":null},{"name":"message","type":["null","string"],"default":null},{"name":"trailer","type":{"type":"array","items":{"type":"record","namespace":"root","name":"Trailer","fields":[{"name":"key","type":["null","string"],"default":null},{"name":"value","type":["null","string"],"default":null},{"name":"email","type":["null","string"],"default":null}]}}},{"name":"difference","type":{"type":"array","items":{"type":"record","namespace":"root","name":"Difference","fields":[{"name":"old_mode","type":["null","long"],"default":null},{"name":"new_mode","type":["null","long"],"default":null},{"name":"old_path","type":["null","string"],"default":null},{"name":"new_path","type":["null","string"],"default":null},{"name":"old_sha1","type":["null","string"],"default":null},{"name":"new_sha1","type":["null","string"],"default":null},{"name":"old_repo","type":["null","string"],"default":null},{"name":"new_repo","type":["null","string"],"default":null}]}}},{"name":"difference_truncated","type":["null","boolean"],"default":null},{"name":"repo_name","type":["null","string"],"default":null},{"name":"encoding","type":["null","string"],"default":null}]}·#…ÖS VagôY?¤Ô©€‘ðÐP02ad66db3c4e0acfa136de7e2675e264a91fd15bPd467d8ace9970a648fc02f51e449159083ab62d9P652062a2ec52d970b487e0d451dedd471806a655SpringsUp^2994a0e86f63326b94cf83a146dfd21a0876892d@me.comÌîÅõ ð€ÖÍÔ¶³šêOÒþÒõ OðXñÃâ×¹š–[build-script] Infer --android platform flag by presence of a stdlib-target˜[þNN( Ú‡$utils/.aRˆP4adbb720225cd7d12bd13e08ff75ac5bff!ÄÈ75Pbf8a7afa10a701263c817277d824dffc1cc24fd5Ȇp †swift_Œ_supportRt% s.pyê:ðUPeb0a9e498a4c690b5fcd31a43ac3b9f5562ba38bPc12e01d2b4423a0e8784e4a5ff58bd889ca916ddÎ^fÒtests_ÉÂ1ðRP0ef3a19506ee6a5ea56dcab2efa8ae1456e239e0P4dc669b641cb2989db06892889f507c3c2b4118eAapple)xðÐP456135eb047793a2cc04a75444cf01eae22b86a8P79f78188d371290d56ba8d6ae8feb5c28141b86bPd19aa7a363e8724dd42f7cc72b75212c355f78cbAlex Smith\a3a515f22faec38a51c8b07d501f45b523f5ac5e@ya.ru¤ÚæØ ¸€¢ªÑˆÙŒþO6O<rename folder2ijIå Nexamples/basic-click-counter/Index.htmlž)¨Pb2978d524bb15d404dec3a36740e9d80e1e50392I–B„b~Š#ªwDfacebook/reacAQðý516b623121997c3fc091e8af178e706df2883650P2aa13c8ef31a5b50d4ad2de70cd60df4bcbce5dcPf1cfd29e7ea00299a3d476c451494729d71c424f&Christopher Chedeau^06130990ee5aa0933318308bd3d0435f0fd03d6b@fb.com¦“üÕ Ç€ë䇘¬‹&Christopher Chedeau^06130990ee5aa093ÆY¨P[Blog post] React Native Release ProcessRž*I›I›ldocs/_`0s/2015-05-22-%°-nk-r k-p k.mdÞ8€P615a878020107a1a5b1a1b130fc44bÁ39275b3ZAhFŽ'4 Fixes #1398.I7 ÅàÁä¨bsrc/browser/ui/dom/components/LinkedValueUe.jsÎ3ðRP727af8584417b8b7cdf76034cf8ce858aaa54ae5P84d3f8a9aed77188ae543670a1fe44ab2348238e©Ä\vÄepDOMInputÁ¾0ðRP618894f9131836112fb2f988ccfc4ea32c8ab351P49f783c1e1264ec219067fa463169d09236b83c3¾^–¾Option¿Â1ÐP946e6ed4204d25deaf725729348dc502d01311f3Pdcc593afccaCh16d49e780360fda5e91b0f4d3c‰xABšSeAµªÀ1ðUP9ad78ba868dafbb26581b7216717fcdb83e9d9bbP1573d64a2b5581d34ab0aead1fbd2ca7de15fdc4¼zQ>TextareaÂÊ3(Paf4a460effñdd6886e0cc90504f620eb7b0a3b– @61005c40ca4e5b52d} (9ccdd492ed0aMceÄ|vD__° __Î)]-Ñþ@ðRP1b369ec6276d43455d94dc3152f07bbc99bd6b7aP3166d913a51aa51b4c48a25c0caf5d96657fc704Z9ð†27996377e0da502b6d9fccd2018d7919c3de6733P41ea21568347aca834d73ad12c112560082084d2P10ab0c8915f4bb2c4bdc69e12f824e004814ada0Ben AlpÒ9 Ð’ÏÞ©éè£Ô…¼þ9Êãú P°ââìÐBFetch DOM node lazily for updatesŽ‚$D Previously, we fVed the XoçVÐevery changed attribute. In my createElement diff, I /it to ][(\D) regardless of if! re w @ s -- =áit • only/are.is®n betterí …ZaCéhersåsharedu4Cña2º/ðFP7abfcea3ce3a165fa2ad497a6889fe1ee0673216P07a313d9ec10a32deff2d42c368e¡Ç1d593a9cZ!bfcad96A²p6bebe6bbfbaa75fb121e1bad20f64iL75a8962e49f6e3ecb157a²àd339e4f86e56e7Pba81b60ad8e93b747be42a03b797065932c49c96úZ ´é×i!ª¾©Šó‹þ!¨Ê® PP´ÖÆ¢ŒnWarn when acq A<0.props, .setP  oneD& pÚ9p EcAgnAgHisomorphic/children½¤sliceC µ£â9ðIP265f5b0f78b87afb481d1460127c25e0b875dfb3P385e4ea1425a275d0c34fc04cdefebaÞ 2b462ÝzfÐ67clientÑñCÌÒ5 P1ba170e01fa7865edb0e1d2c280b2c620bfaed1eD fdccb99eafbc85076b8f24e41815559172d7b.B„bÉwrappFGButto9¤þEEhP3e28a9b35f734a8da45bd33334«Ød7527b5725P757b8cd97f019a69507fb8a33abee972d91b3bbfA|‚Òè; 5¿þDDP469b= pb2cd6d9899c105946f9a5fdcf0f90Œ dc38ba0d1c7559a05fb8277a88541ce423f50b54æÖ‰: þEEàP8ce03017766a0836b66c5f4344a665e43eca1ee1P30ffd98e2944f2IPf8d8bdc6e4e29829fb2c8.| ˆÒÎË 5ÑþG GðRP11e879ee36ef7b67aed998457c7b96c53c2569a6P6a81746137190c468c7b2861763ef9f8290ef535ju 6j2¡(efaultInjec* Ê3XP6a8bb1e934073153b031beA¬bW ldda1cd49cP6f08bcbb70038766du De25f46f769354b06a4..xFe ÄF\"o5§ö>dPf66e5e96805481082e510165cáð846ef1d53d70P96«ˆ67989c5073bbf09d57d74d92acf77e1a3ayrt6Ú Ö$reconcilerÉÔ site cL î<4P499ed30d01744ð>a11d241826670a340006ebPf0e532441fcb796d607e6c20f9308a59074dd99.€:`F×:¸Ø5µîC CðRP929279bfe429a7bb3ab4e91cb619a49703ffd1c8P77179a75ae0608ef63435baca50712574d12a5c3}X’âä LifeCycleíþL.L8P12b2e78c5415caáþð>9b9ad1c090fd15f999b137Pa43f0a75303b088f25b734342055988434dedfa.Öª2»úöVLàP6525bb2e45b21f9bccdc62d6dd5b2459076df85bPc23ea8c98f899e3P5b72873211979a2e4771b.0оìMultiŒ Á5òþHH`P319c5a4226e8197dbc65b135Áñ$42adec0ea4¥t98d13c33b010f5f7ee56a650391278Fbb85b.4”ã‰}TestUÅnðRP15b0d8a4392cb71b247ac3f291c9fe0532ff06fdPab1d75d0b5e86fb99a262b2d93fe397bba369879Z­ ð}ca663994022415897358e0fa1eb60003d737b268P5dfeb592f6ec7ed2640b363425cec5b2ef80df5fPb32fbef7c51462287b4e56ec989398d994b093eeöÎ ‚ò¡Ñ­ ‰»ƒ‰þ­ FP,[× ] Up» a1 nav.Vk ë 4>(/_layouts/d!£n Pe0b1b18b‰ 25ef47aa}|8983b951c198f7792P1c2bab2f0ec01áTa3f1ac7e05cc7887249222.ƒ&–cssM.scssVðRP53028c7acc9cba952231f0f3d462a7fc25793c0ePa0f33182352560bd695ede4c57deb4bac802c1a0Z‡dš:àPa2271ef203e37496014c67cb502375f717c426d5P5a3bda983b729é \70f632467e75617a5cabf97ú4¦ïõM‡ëÂ׆µî‡E‡´Pª™ÞPJMo• typeˆ 4IntoDocumenth’' Ž 3581B…8E 10.4¥p-u…´mdzpPc7cb97392632b963cc36998df4d7\ d‡ c3AáV`ae646194e8b52676690704e47O23Z/"ðya4792e250a107d09768c91a8baafb8e30087Pe0ff1a7e32ae48ecf92e6da6f6949d269a21bd2aP4d41cf740a629dccc719bdbac73270fbc088bc2dö¶ èÁÍ≶”°‹´¯‘þ¶þÕÓ PP÷Èõ«²‘TPerf: Insert/hs top-down in IE and EdgeV¦,‰Þ&¹LÁÝI‡œ¸yoM— ] ž(ðRPbdde2483c4f5afaf7ead01cc752084276e1098a1P3948e9958c4e6a7e9058cda80474ffef617826aa*e ¾R®EÞ /DOMfrenOpera mâ9P3fd79= ðI4bf631b55424e9bb30f50c7568eb16bP255e6346883ff8190b15c6c0cb57f41de01cfba9 ÑJü-{ÍLazyTree!‚¶/¨P8fd6a67becf8751b1cc01922493dc250640fe6fa ÁìDF¬ D›rˆŽ$ swiiefrom u$a}( to staêLet's hŸtht) 8line 10 as well=make taÀclear.Î-&‰*ÿ —ý ZPs P17c0c6becaf1dac30033fá¶a :#¬Æ5 Current¯%tÐa b&o spammyB¿ >AˆE8#¡7Esáö‚!ðRPf02a919ef0092f9ac0d0edb9330d6aa9d3f69da5P28595325b2b9410aa933ac9aeb6dd8a5987a1bc4}ŸW,˜ >µ…£¯¾0XP0fe36f968512e0870672c1K-$ea98134ab3¨P563171a0f4bd2d7480fced36cc5f055694fbf442aäFß1 9c3e2d833 & b885f1a15Þ 0043e768bdb209(6 05f7 52f¥@0ef818cb01419b867~/9fZ45Q4p1b080c5c5960c24389333870bfe59¦þ“í“ œ·ÇÆi3¾äéÂÿƒþ3q3FXXWrap\(ry6&z)in a å† site n'þ®/ ...un) they"ð have a wœ$. Also,  tagNamž)$í×2¶ refs&)onsistenåâi8ways look like  'Í  Íseffu!vely hi3¸i5¦ i65$× detå½of real%4S si]*Xyou can no longer get a±íone. Ini futu* m! wanZ* drop« 1†%:ø(refer direc‡Ø e!Ã` c•¥us!p hacky way auto-%^>in«1of Äà3E!+ s6 at‰ given str8æbe £d. Be * suggesè )¡welcome­$åšážàbrÁ1:pDO&t'ž(ðRPb839089d2b87a1feaca1ddb41397a7c2912539efP64328ee179ba98fab8869e701d0e78903406daf9½$F¡Ä®ui©Á>ø’%ˆP3f57ec4f92561c05a781ef54219b655ce1u09bP71f752e9e300a027'4 739f67bfeÝc7.o8N¨"Tv0P7124d59e3fb0{0082b0e582c0637¯154•)$Pa6f74256›@5226b913467608261"d517f2.Õšo1š(ÁgÂ1P€'T34caf6d1977748cabc1c8c$8b66ac8337¡Õ€206afe9fc5e08697abad356f1bfc550d6°.Å …]¹öíÒDOMForm¾º/P8h102e7b10730e972È5 a70c4e5ceŽ'P429eeP20c2b038ccd656q 486bãi#îi4765d023ˆA4e3c66846d6ca42 (68bc6ff4929,f7afee60ec1e+5df5ab2a&(,19c715ed2df9„7# core©ºCq¹!onPff3ðL440818755e679544bcfc749cc6ab3cc454P564c7311cc41dc8e4b33f66deaef435ba7d839b7–@¡®–:{œ†"˜P9f4be86474926412903eef72c7d73c3e3a737fe6H3f03fa1c5123233569b!E 2PzòPÂÑ …CGbDocuä…‰ ÔŽtP. Relies heavily on P5'sºZ . C’enteþ /½ cal ç8. WriGto "/ne  tak”hform: A.B.C.D LABEL or./N  w…ž,iÏ -F( address, Nn3Dger between 0-32, ²d&zSma;bel¬be[ d. If /KLomitted /32 is assum!N design-JnetmaskÖ ’. Entr!lare mded by$m‡specific2/Gpair. 0./0 will ?©zdthing, while 192.168.1.117ª. m exact%ãe A~ . A%ÃsŠ%° "@",¾ 4nounced "web",!&definò@Processes can not!.as!Á)he web O . AnÏB# rEbe wr!`n%}y any†hãPpackets coming from ai-`JHEso8 . Us"ƒž E:vi*ý3Ptrict MAC policy, but%¨;< has been requesAm=times.åA nlå|i# has 1×dÃm. It did%M…rightE¦!Pq3EÍrkîus!soMw:þ <77bÖþ>>[¹è <0fd®ú> >Ñ–þ««êå:ixîç^ºÒ  ,securityé .hbP3F p559595ad48e732a306c8d759a5ce5X80Pb79582e4fbfd6e82dc2Ê @90731953f5ba1926f*æ :NŽ_ac¡a$vðRP247cec3b5a43abfab3939148c6a8ac97faffee5cP2e0b83e77ffea727375a23e0abe29e894573a251œ4R}lsm.crP1b5551dTf74ab2119db364ef4dab08ƒc373P6´ a6177c25e¢490e2bf44711326V 4502–0N–f!-fðRP594e934f1385b04d97c79f6edea4c16c0d6b3eccPbf107a389ac14c66b8c25df297bef6d56cf08c87Z– @efeedcf2a9137c9be!vP3cb76b05315fe231197Pe!fe7t,8fe75e8ac081Ÿˆ0eaf59c038826P5a8095e9d02da19f3babÕ882c0520ecd62d6aLèAnilKumar Ch^11d1de7b71ccfe0dc742cdf48cef5550211c9da9@ti.‰ ðO —„„ ”€Ðˆ‘£äBenoit Cousson^1f57ac750b0b4fd69f24c35d4e23093b91dc3e77@ti.TDÒœõˆ x€ñÊâ–ÍæVì`¨ XX: # ñáe OPP t^Ȫ. l DT@í f famJ f ksçisšis deÇû OFÉö,of_init_opp_() hel›F® lsoÁ®s cpu0’ ly ÏÁúá corresponÈû fil1- 1;…cpufreq-$ÃP×REH112485e9c543b17fc08à@56c7a558b415d06afÞðRffd99e96d4c54a177ec251026a1cd5e1122af4P1cbb3a9a132969ed1ffeaecff2f910619d4470aeîá¥áŽôÇÆ ‚¿ îŸûÆþ5Á5ºØÎ Á5H… ™³ÏÊŠOMAP: muxÉ/¡5orRco6€% plit s al parti3 sþþHH StUng¤ S4,¡– pin¤“ figu is l†C($two differt Ž :Ï (CODE_PAD– WKUP 0). The first s insid»coFower doÏ œòsecond F7 wakeup. -å“Å» apabilityÁ} add Nnumbe? ¿ dur! boar*)it t depeÉøof Soc2ingjŠ)ák flag“ well!Åorš&@to avoid explicitKJ check.ÿmu!ƒre áÓea¥b%éÝ mux0Å !Ãtop ™ mux/Æ/<Þ > if)ur%·bm÷is!,defaul%i anksº,Tony Lindgreá00102733cced25434X3c88aaa47a4f5de@atomideú(> ¯efollow!jim'R*:-³Á_'3 gett-;Îôso platæ level „ !5^Æ( - FixC rx51-ã&t&Â'new API.Do%st!ƒ6z” each%A‚. LookK¤ *debugfEÚtea¿1VÐDan Murphy <50b72235ef2106735bf2282950a69d11e6d56bc7@• -Nes%oni }reWH"A/uplŸ bugsaq|An,ÈGadiyar <91e19066a3bfb79f58158f6450f17226f60f5cee@th R3 zoom„ bug ˆF^þ˜ : þœAœ Ti-)-’!– Cc:ÿLWalmsley ƒˆþFAFhÎU}xî}^ºECc„Èò°dÆå}–þæ]æxî%†î&m&^ºX{~úX^ºÄ NRX" *~'A-á/-*h/.cº.P3fec4d6Qpadae012bff4a250a2eb7f2e11a56d*\da20d73233db6750796edd3dl!$7a49e010ff*Å 2Nºmu5(jhP979e9d1c4659577f4820a5e5c0Ü4 8c6e792f2«422157ê&t718d676756feaae059e2cffd152b6eŽ”hf”XhP350c04f27383963c581f{ 246a'Pecc691e6P86549bedd52¹"cd3099³ 30ce36053$$7.”R§(muxÍ$¦ZG P414afå @456e84dc8c8623e2f!¥827b66b9ë¨8231f0ae4856c925fa49660df11dd62c79866e3bAzf}3vœdP84d2c5a7ecd7ed729da67a18eAAd9a4973a18d3Pcb6b40436e192›#@2216c569005422bca!ö9‚þ)mux™*^» \P574e54ea3ab7f5f5947701d™-(bdc902bc0a9ž.p4113c93d3f2787da9b1d629338bdf!ù65e099^b)ðO385a64bbc2d2c3ef864190b010c74afc5b850195Pe9e48a309a724100c507a86dbc3f5aca2be7fcT e3e5a92db430689fe918041e99cb04b8761f5a50îddôöÑä ƒŠûÅŠ§Õþƒƒ €„ðõƒ €’¬„ÃÝ<2 dts:. 3‡gpiì desÚv!ž 6 GPIO4¸leró3sÑ-1i’ 3þÿ Jÿ c èGrant Likely <9069e6f5a2b566e2674a0ba1e2bf39c12c195fad@secr$!.caJ´þ4Ê4ÅûˆþÐÐjÒ*/%&…."pŽ3 izð>Pc6121357c1ebc95c29dd29e76f78d9972441e216P4615ebb6cb93f03d2bc1Å806c46e1ebd78307Z¡|476b679a5d785d1244f6b43ad26877acAÅ þ¿ο1þÅöÅ¥G|öQ^¢? )ƒ Ä dD¦ø9dsp39Ê4Ñ90830a32ce08596 T3638e05ddd15877f7925dŽ ®fivaš 4ƒ4˜629517135880cc5519280acc84768b076cbaa9Êšmpuš 4XP1a5a42ce21bb0ac7480b5<9aced96914c33910 ™‘ òðRd558785c8b2c622d497ac30e7e294e7fb27e38b2Pd202bb5ec7efe3912cc9ea285bde04479741e4efIgjš4r¢ hPb85a39debbeaf0b592a705eb49L02a 2127p,61c8290”"4a3™9f791ad99X a48d5bAh–š0NB pi"fTP9e78261fbfba8f36df93am!84ca299d219da2fe•-pb7a9e17fe2683889762494249f356A# beabV ‚a`r·#…ÖS VagôY?¤Ð’Š’ô¬Pb3431f5ba402a98a89b78a9408b4972d8870df4dP416a752054267e07759f9fd011a791906a06373fP7498176803ca57200ce658da3850abcabb5cff84Rajendra Nayak^4802c9324e296bd4f067f90ad87878b5665d33d2@ti.comÖî¦ô ”€ƒðŽãÜBenoit Cousson^1f57ac750b0b4fd69f24c35d4e23093b91dc3e77@ti.com–ˆö x€©Ÿ‚ÆÎÝnarm/dts: OMAP3: Add mmc controller nodes and board dataðarm/dts: OMAP3: Add mmc controller nodes and board data Add OMAP mmc related device tree data for OMAP3. Currenly limited to only omap3-beagle board. Signed-off-by: Rajendra Nayak <4802c9324e296bd4f067f90ad87878b5665d33d2@ti.com> Signed-off-by: Benoit Cousson <1f57ac750b0b4fd69f24c35d4e23093b91dc3e77@ti.com> Signed-off-by€6+ <4802ª+>Â]$Signed-of!&€Be.Z <ºZ>^º2Ȇ 4Darch/arm/bootA@/omap3-beagle.dts’$ðUP54556b19c97d625e6c461e418692ca2c9e83ddd9P8c756be4d7adbf4b15bfc83ae6dcda1324f68453¦8Z¦Ÿizô[Pdc9425c48ed9ad7fede60d007f04de4dad51d23eP99474fa5fac4aa65585625cb77b19338fcdb84cftorvalds/linuxP9a6423621df893512fd3aeb0e11ca9015c8fba15P9ce99ee5f7171aad84e09c92322ca902112693a5P8e80f66069d54fd22e1a8e452a760f511f501b48"Sebastien Guiriec^6dee47aebf9f7c1a58387845aa7f92b0e456dd61@ti.comðܲˆ ð€øÑ…¼­æBenoit Cousson^1f57ac750b0b4fd69f24c35d4e23093b91dc3e77@ti.comæœõˆ x€Ëì–ÍæzARM: dts: omap5: Update MMC with address space and interruptsÔARM: dts: omap5: Update MMC with address space and interrupts Add base address and interrupt line inside Device Tree data for OMAP5. Si}È: Seba2 <6¶>þñ†ñ†î§ §^ºÙþôþôô•ôZN5rN ðRP7cc47ad849b382b0e1ef09b62765e6b746cb0ec2P930dbfe3bcff0b3b6d67ad62d88abbf88477401aZNðf4b224f2b48e3a5203a25fe64ef46cc5c54e1604P96263244045aa51a0bea0f17b29759e9b4327ae0P510c0ffdd408ced2654f073d0397f0fec410a235"Se:ÏÆöDêܲˆ ð€Ýゼ­æîN‰Nâ‘N$¹›ê–Íæ|AZ GPIO withrÖöA AdþPþPþPþPþPþPþPþPêPô461a4f2ea7d3ca43337ea7968bb2472c1b9e1a50cP4cd8acf32c6f84c584f5292d67e53302095c6757torvalds/linuxP11c27069cf963f7445a7b515bcb703d90ae0c162Pcd7b831f2a50597d34fd9176770767b79a86cb6dP6bc9c66e5657a33d19559fce730bfa36112cb1ccAneesh V^a184aa3b466e8073ca99327f8d24823e52c6b8d2@ti.comìŠÌñ ”€æì«à½ÛîG‰G à±Ñ„‰› ðÿÈùÇäv™XEMIF and LPDDR2 device F! 4— s² ê> DNathe Šsdram* s inú 4 ¥memory ¬0s attached to , «ðX. Reviewed-by: Grant Likely <9069e6f5a2b566e2674a0ba1e2bf39c12c195fad@secretlab.ca> Test PØLokesh Vutla [5b4d8dc9ea337fff5fd1729321eda9873ade0b09@ti.com: RÁ Ded against 3.6-rc]>• HSantosh Shilimkar <ºk> [º² (: Use label!ýÓ to accessI'nodesB®þ] ]1ˆþ2A2jÒjTQj|öj^ºœ:¸tæœ^ºÎ>|†î…M…^º·>†þºκÇ RFDelpida_ecb240abacnÐ ¦+LPf97f70f83374cd4ba± Heaccb99624bb0bdc205*œBF‹B4-panda„Ž#àP9880c12877b3f5347ef4d5c84cdfadeb028a67d1P20b966ee1bb3cfFTdfbb4d07fb0391bc41dc42¤>^¤sdp¢‚!ðRP8869c1e209fe9c1d2050eaadcbc83ed96e3556c0P7e83a61de0b25779d05bf4cecda3f8efd5ef525d Z’ 4œbà ðRP7fc45bb8a951b3b614a1c1e5e34043d15a835a2aPc7dc11feb9da5ed55aa2e67ef721bf9102ce0d06Z’ 1cb8f9776Hhadc57885c6653943511d282633bãðªa9bb2c2e241511cdb93398dcadd245480801fePbe186cc4de35fcfe766d2938c13ddf3935d6c159$Richard Weinberger^320bca71fc381a4a025636043ca86e734e31cf8b@nod.at £ÓÜ ð€ÐþÜøÃŽþXX ”Ÿ‚áP ؚж½ÎTubi: fastmap: Implement produce_free_peb()„u¢- IfW, requests a O8 PEB for a poolÄ ,UBI is busy erasing,s we ne& offe:functioná` waitQ@one. We can reuseJÇ from· non- ú, WL code butŽdii! lockŠðesemantics. Cc: 4fbacc2fa0ffdbb11bf1ad6925b886ebd08dd15f@vger.kernel.org # 4.1.x- Reported-and-tested-î Jörg KraÂx™ <32²™$> Cc‚îýýþDD,RRBŒþC)CnÚ}>…ˆþ~!~ÂI.Ogetattr()è©°^ $ Directly¡ åi   fields Te2V Í¡`c…ž0problems when¥VunderlyM\filesystem does not have(default.² iÑa¡Û. So å 0ead of obtainZvba©˜ ³via d_ _() ¡A vfs_5Åm H whatÕ[Õ,kstat struct­ê Al Viro <”L9eb4d5d70b1d38ec6642#Hc33a2781f63c@zeniv.s¡ó.uk> •¬¡ãþS S2¤: þdd­âŠþ¢¢vê3Ópoå!byþ’þ’ ’:^þ×Þ×_ x.>×build.cfðRP7091fca0fb445d95cbcec3024021e216ac05038aPef3618299494f4973fd528ae72325490eb622b2c*$ ,>kapi.cbðRPcc6fa0115c9ad86995fb0bc577df9dd474493099Pa9e2cef7c95c24c0deb4d6500d7da7f00424db7bZ[ð^af6aa1b9cad7350d675fe3523ab062e0e4c829cbP75bb30ea47d2d222b50d7bf5b458ecd3b127a326Pb86b413a321 `b75373b48fd7ba53fcbc7ec4cþ[Ñ[8Þ»Ï x€çì‰üžˆþZÑZ ÒŸ¤ÑW8±˜íމ2hostfsа(shared/os.hvðLP021104d98cb351d66f9f44353a6ddfa2da71b23dP75298d3358e7f3d2c7ff4e2b2c5ed8b81aÕ6d˜,˜i¦/sysrq©ZðRP33cc72e26c6ef63860fe04bc6e61b4daff053202P7122bf9c753e52c8acc073ba3b7494dbf9f28bddŽ*:vtrap.c^$P5c3aef7423ðFda72a0b252d4ee4b118d14a969aP974b87474a9900f1909f845d449eba90dc9b8338E^Ab2Œ os-L—/‰_.cn˜P905924b773d345b8f49de2905e62a546f6461c[( 7b605e4dfffa91305e763e7698832edcbf80d3bdV#d832b52a15085d04039b01cec56Ù39ÁÊlf301Pab3d3e9887c06d69d92c44á417e2a¥ ¸b5aeP9aa272b492e7551a9ee0e2c83c720ea013698485þSÍS ˆ‰ÈÌÅSäí„î†þÉ É BWXUBI: BÃ8: Explain usageP0blk_rq_map_sgá®/ þ¸ þ¸ þ¸ ¢¸ >Ô b!P¸ ^P00)60)"ðF2eb81630299d12093fa96c323890P995e61c38f24f8a19a7ef4f455bb91cb4b017450V ð}bc1d72e73be63a7c4a07eb10cf51e91f20bf6076P12c51927d7fd92b9585071c988a3b8e61ffd7d20P805f11a0d515658106bfbfadceff0eb30bd90ad2þÙÙ þÀ…¡¸ ·êÔäòþ¸ ¸ ®©×¢ X ðãêñòF~ ubd: Intrá submit_¤()ÜŠ&X Just a clean-up patchŽ remoä openXd variannî!ensur at all*$r”tò(he same wayþåþåþå~åÅÁƒ4уqaubd- nð@Pd27c703be1a13eba5f0ca91a5561e24fbd26b494P1812bc81715b75ec47dcddU#4e9971c4fe03412Z0 t9e6a57d2cdee36a6d9c5d5cc3db857L416719Pcf8fc0074fd5 ðmb6eb552010326ecc58aa7671P671312304ec73059a0128ff3ac7a5ced459098aeHonggang Liddaea1c6c75b10639714b46b7098…87ac389ac1@gmail<3<Šöê¸ À€­õƒú¹ýþtqt ú±ßÃit å²ÔëÓ‚`aN(delete unne© ary Ê0memjf arrayê¾3 1) umls \manageFrough_%.->ë_ +á, notl þ”, s± ¢ªË. 2)8 MFÔ has been~ 4d by a *local* r,  s *map,é init s"E. j%.ŒbeÃ$ed only in6T's scope. As a result,./ wastUbout 1%total m®&FxHY~ <Æ~þ›V›:ž€þ¨dÆÝ>†þ$Þ$¥J¡NB±N3ƒ $mem_user.hŽ#ðCP46384acd547b7590bd75e4f564cf203327e2e522Pcb84414e3e6663eb280c555d8‡(47c5c6cad27* 0¤ physmem.cj˜P30fdd5d0067b26c91fb8c831da5a1d4008c79e’549ecfõ(57441c95e43 8aabc99ce4723f7fj’um_¤J’ðRP016adf0985d522ccfb5f22e6d287bb60dfdb4e61P9274eae6ae7b81fb976f059ea9d8afb6b420722aZ€ð×d8f8b8445648c267a24f30a72533e77cb6543f21P771077fdccf7ec1b8b1f09324210867796a52dcdP4a0b88070406323487bad730d8945f482151a145 Mickaël Salaünhb6edf9c902cda42a2b91f8be7643dbd9a2bf61f4@digikod.netĺ—è x€²ûÛ냔þR R Ú–é_ Õ¸ážÀ”DÁR0full 30syscall.h supfʆ%G0subA @itecture-independd6áKasm—v allowu*to aŒ sy0 œ$ parameterñd­º s: *&"Ø_nr() roll2get_erro6+(return_valu1[se Z: argu+$s2j7V 4 ch()6vidíÛ8x86Ô2™Ois,såa&?-À_s $Qt HAVE_ARCH_SECCOMP_FILTER plus8Ž-. jÔnspired8Meredydd Luff'sã ` (https://gerrit.chromiumé /21425Y:–Mi67 €$TJeff Dike Ø Anvin <8a453bad9912ffe59bc0f0b8abe03df9be19379e@zytor.5$xKees Cook <50cb10d2be062154b17a†@6a4eab34f1cf1c84@. Ž Andy Lutomirski <2cc0d0d76de2e6f37b92deeb~d5cb306fc5dc1@amacapital.ne-ýÐWill Drewry <72416dde44ea762d19e3a7a555e3b59ce98f8eeaJ–2ã <68a628d’8317525340826b04á ŽþÖmÖÖDCc€JúhÎE}þâ Úâ |þy‘ylÖ²‡‚î³…³fÊé}†îêêdÆ~‚îTeTj5ÎX’þYµYnÚ“‰†î”­”jÒÌþͱÍpÞ‰ˆþ Á fÊ?>œþ›Þ›ÑrþêÿJb :2? ™ .hš'\P9fb9cf8cd39a3b29f45a8…"44281d2abd82a26.Ú2SNå n8P81d6562ce01d5c\(94691e03555¸+Àcf3e10fP11ab90dc5f14e95b521ee389e800f8abcfba0ca6RNZ+èå·#…ÖS VagôY?¤Œ«ˆô P549e78db9438374cb6a58201cc5f2194b222688fPf3861cd67f320e47b09b4aab321a6432b1a60c2dP0acdbbeb6dd435f5f3f1648fc3a2ab5fd07b5545Al Virorde609eb4d5d70b1d38ec6642adbfc33a2781f63c@ftp.linux.org.uk¦‹ëä x€ë€“гÕ$Richard Weinberger^320bca71fc381a4a025636043ca86e734e31cf8b@nod.at´‰Šë WÐªŽ«ˆ°Øfum: make load_initrd() static, kill shared/i.h¼Ê6@ Signed-off-by: -& >UF% <32²%> 2¬Šþ­­vêë>”ˆþìì^º`Ȇ>arch/um/include/>%‚!¸P22673bcc273d1747c618d864e9d660d344ac1f4fȆx.wkernelQ”cfðåPd386c75c88eb2ee117c3cc1c84ff18100e0705f4P10cc18f729fdde317b4fea334f0d8604ae3c5baftorvalds/linuxP2014d01878a8c38111eba3333f0d70ceb91f0bb7P12ef1cec8ca70acaa3cdebafe243ea73d3da9679P09e129a6038ead049b3ee509475420963f052a80þwwÒëä $±‡¢‹³Õ$FfÂ4ƉŠëWûس…w8…A`erge host_ldt_{32,64}.hމ`ZþIþIþIþIþIþIþI‘IIHeÕx86PHsysdep/Q.hš&ðUP94518b3e0da544bc5801a3e7cdd34d2862ed5eb4P246ff7ace5e5bcd66a65666deb634f22e55224f3…óN†§_32.h¦) P0953cc4df65284bc61745abb2fdb487935aef03b­’[64š„)8Pe8b1be1e154f44÷T7b5b78d1c7fe120784e7e2öN÷ð×72f83af99838bb663f85b65386db5b875748f379P612ddc67b783a82a40650acbf8a5a2e40f712307P2dc983c565e03f6f6f96c5fe7449b65d86af4dee(Tarun Kanti DebBarma^466c3e31e36c5952b62acbdc5f3c0bc7bd450d87@ti.comКëì ”€èǺµ›ÙþZZ ÎÕýò Zȟכ·’ÜLgpio/omap: fix debounce clock handlingä g’)ð< The dbck_enable_mask indicates which all GPIOs within a banpve„ Bd andS is  /dis_ld based upon this. But there/Pno mechanism to track Q\ (e. In order"manage:#\ we need additional flag›llogic so that turning off/on g Reviewedø hSantosh Shilimkar <5b4d8dc9mˆ7fff5fd1729321eda9873ade0b09@ti.com:QØKevin Hilman ¨ ŒþE)E^ºw=w†îx-x^ºª6„|öª^ºÜ>‹þ€¾€åß¼ 0drivers/…û-¡.cjPd483cc9f0c638e485476d54ec8c6c9148106u pP69e61aecab719511a45c8714305Añ 2727f796fÁÄtor6º =ðy752f291763bd6971521fa44c76ad9e937f7bPb8d934798fbc29b0bac9c81642eb24feec31f5afP81b279d80a63628e580c71a31d30a8c3b3047ad4þiÑiD’Àòô ”€ÑêË–‡ÝþZZ ÄÓÂö Z ò«“·êÝ|!ÃÝØwakeup_en register update in _set_gpio_ '()˜ öAÅóXre are two ways throughÍï l2“Lcan be programmed usÁ7Œsuspend_ œX)&Pd correctly, its valuíLt°} <.Ø. Fixé‰ irq%ptype()->Ü_$triggering)5¥ ()Ls#6 hO up()þF’FöKþõ:õ>jþõþõÍõþñ¾ñ>þ€¾€þq˜7cbad8569268fb18b2d28e2d7c144b93235ec3dÂa1Ÿ€3b34ec4e7025c4d652deffd4076bc0d0bZq4ba805be53cb911Éwà1d368f859af5e20d695fPcfcb6050d18f108b1c8cfad673ebf5ea709ðOcPae547354a8ed59f19b57f7e1de9c7816edfc3537Nishanth Menon^50c3c575ce2bb12c4‚H736ce59e86fe81915b4 Ø¿ÜåÉÌ–”é)(Nç Æµ ¬ã. †ûá½. €wÍlŠ  irq ÷ $he end of Ý configura- ¡+ restore’.I ÒC Setup¥ú interruptKÑó s only afÁÿ we hY •ed= requi edgeÊ *¿4s, not before,$ prÁP$t spuriousÅ_tsÁ° part! ð routineFÐ 6ø <ºøB8Nôþ þ þ þ v >$€þ•^ºÇ>ƒþ®þ®Í®þ£ þ£ þ£ þ£ þ£ þ£ Σ ðRf6b2c51b29354bcd19bcc286ec2bff79ac6ae8a3P41265e823b23a7908782ee002490557352e3a2b6ìF£ ð442a4da7cad64cf7270cbbbed9ad69dc4a1bc263Pe92dac2072d807dc03eedeaf0acb1eb0a610c96bPa0ef78f353d6edc9f88d3247601f1dc5ad8f4b84Pe¡Õ|Ujfalusi^e5c0b4cdf99ae1d408b9c4889e74b54e02e008@gHÊ÷Õ³ è€üÀâþúî†é†LŽ¿¶ ×€ÿ˜Œ†«ü†h: bus: d$_l3: Remova platform + 's r  funcá:ì òF 9 NOPít¥evm_*ÁëionF¤Pe.j <ºj:îöö Ack¼ð>Tony Lindgren <1001e8702733cced254345e193c88aaa47a4f5de@atomide½:qþÿ Test  ÐSekhar Nori <946cfb81282bb55c59f600e7ee3d3b2c6973f7a3M£>NœþÆ)þ¶þ¶¶1òˆþó!óhÎ*>§þ*Î*U]zò]^º*Ì 2 /bus _l3_noc.cn P25b< be880318Gð>1a5d06279a8b86efc1215P0eff48585ae36b3051fcbec27e8f82c416892fe0ZÎ ð×2b6bac9ee99fa7d60dfa0debd82ccf4217931b1eP6d6e60a24a7c2eafd80cfaf59fe09a6ba7aa9f5cPea99b1adf22abd62bdcf14b1c9a0a4d3664eefd8Helge Deller^5b1e7895819cf7d6bc77091b09bbfd6d330a4e74@gmx.de®âºš €Ï°ø÷ûîHelge DÚQxþ«Ïš ð€÷‹àß…ïbMAINTAINERS:óµ isc UPitecture file list‚Â4Bx.Çe1®> >|öQ›&@=6 P7714c3c363c3f1243e12dca70853ce16716cˆ €Pf35a259a6564a4bc31a7dabbafd8f3fz#c6a03Zçc6fe6b0½ðqfd923d11dd0388cbd561ff15bdf1P88b84c436d6c3e1e763645cb0f41174132e16c88Pb10ff54f9f58adfb708b53e6e56ed3d7804ade74þçòì§ E–ÁÛµŠ¸þRFRvpaAÔ`: hp_sdc_mlc.c - check re§ÛLof down_trylock()–ê>þüþüþünü@±ÿ,input/serio/.bŠ"ÀPb587e2d576ac2983393b5a49791c183250ceb28dP820e51›&t262eea190daba8a04f33c96989a54dZ&h784c2213e79c094ffd9c1118722Å$5fce5e7D€61827348b112ad6b750886e64cb8153c3I!@acP5fece5ad24ab5×51€*D8bc9332545ea8705aîÔEÔ Â­­ô©¼鯤Úê™þRJRNpm&W¡ùspaú eanups¶unisth*Þuz* C@ upüt Ts^mark unu° syscè3 suchF| öá þ2þ2.2JáY/)k/¥) uapi/asm/5dš'pPcc0ce92c93c78905c01ce06caaedq˜41588db2Pa9b9407f38f7c63a3ad9f42b0dcd7Á31fb8d9P$Jéð^ec758f98328da3eb933a25dc7a2eed01ef44d849P9c6e20e3926fc8eabdca18b7b6ce54908dff8c81P1c4c6597b67¨$\6a09b34deb337a8b14f4adf3î@m@ø–× M œ¿ÿ§êîeÉe¢‘ª’ ’)$ÙçÚÜ‹ë¤)Ôd: add CONFIG_MLONGCALLS opÄ1é link]#$of huge vm±* executÐ# s–þUFU, When builde$a 64bit keº+a©ch M™sÀ necessary›e  systemâÀ H$often gets$â,$hK 4nker won't be î!resol€branch stubs. This patch overcomew@is limit by provi Ón=o comp¿ x œs$-mlong-‰~ )r )«þ†þ†þ†J†×)ø!&.†KâV,Pb77feffbadeC<1f46d85be47d35a0·$4b2¨-3df1cÌ x17a3c2f34c85bc3825c02ce1c937451¹)ˆ(.ˆ MakeAVZ¨P87f64d6b5dcb5569acf4dffcba4641ad9abdbd3dP 14ccd066e5550225d1ee3566b3ea669159a2FÕ 6998. it rü3 ‰urÅÏvia¥X/procy8(/debug/exceM-Á4e vari1©& should"K 2Q|öQ^ºƒȆBarch/IÆ4/include/asm/u)Ö.hŽ#tP63f4dd0b49c29c758807b68bbf9b7ðÂ1911c3P4006964d8e12646761d954b9f73ff0b503e736b6torvalds/linuxP70ef5578dd8011eeafa999a18194d9b548a4d889P9d6c403711e784ccca4a38761435424e44c27306P17fdfd0851617b6c18c0913364caf2a54171ce85™@¬á¶˜ €†²¨úÿíþ™€¦·þ™ ð€ë†¦•ßîvMPILIB: disablañaŽof floataT4point registere4IÎê>¤Xe umul_ppmm() macro for `t uses the xmpyu assembler stat¥? which…8D calculation via a^Ã. But u‚êinside‚²: <0¾>>Sö´ : †î¥ ¥Êë:ˆþ ê 0dri:  m.cjðRPede614616f8efbb4b3ca20149ed73c0b877c37baP3aeb3279c92ab976d62fad84223442e441225963ZÖðÉca0ad83da17b6ba07f9eb5902e69daac90c4fa61P70f7a317a22e276721535f652624e2d6c6c3e4ddP0fbd06761f5c17cc9b20e02af60fd7ee9c895996"John David Anglinbd3a7a82a6ff309c7c46b71cf01386d84d6ca7c2a@bell.net¤½—— ‚ âçÀ´î‚ ‚ þÎÌ—‚ $·éê«Íí–‘± Provp __ucmpdi2a÷hresolve undefined referenceá° 32 bitåÓds.” þNN¡%8e Debian experi´al ` ` source package (3.8.5-1) Ž( fails with¸ fo/ ª¥s: ... MODPOST 2016 modules ERROR: "_1 " [fs/btr .ko]9!R2q,md/dm-verity>8¥»attached­“-s{ Á#(roblem. Itìbased9¡ås390 "ÍÕ-À.cFJ Jo:— Cc: "James E.J. Bottomley" <78abc16a0671a4bb8f94993f5c9afbb002942fb1@I*-%Ì .orgþ´>´>,ŠþþþÎÉCc¤þ3F3râo>þJÂJÓÛ2×å ÍU_ksyms.cŽ#ðUP6795dc6c995f7fbbcef5d6128e4ae707eb39d748P568b2c61ea0208de80bcd1f5fdc54f7aafa95a30 0.{Ä MakefilejP5fÁot04d14aecb897773538b1eadd001fdea’D51536ac733929b44efÁª<131562eab125b215h2>uôj¨P149c016f32c5cbb287f23761ecfaaa5f0fdf349b^VðÞca8e9026041544c0103b3037d8f03c1d2f4ae02P4b23cc0d2e54414d2ef91509fb66c895651797e4P6d2439d9558e259822fb487ec274cc9e362e6a81"John David Anglinbd3a7a82a6ff309c7c46b71cf01386d84d6ca7c2a@bell.net²²÷ €áŒ›¿¶êHelge DeÖ. Dú“ª’ x€åëþÝ‹ën‰Ü- s­ cleanupå5rc¡ô flus9  (4/4)¾õ@g ª:| CONFIG_PARISC_TMPALIAS enables€r_P_highƒÇN 6P se gXessentially alternativeBºof.a] ],. They don'Ù vËy8 kdo å­ x86 £%'s,£ y 8Õû0infrastructurâsQ fewIions. R·heB in cleFs iåevemportant>6i. ForÔ  reason,&re isàany gaæxD1˜/â! approachþ¬^¬2½î: þUêU:ŠþÁÂÁ&˜4.ŽÍÁeà.cr@Pec63de95cbd98088‡ ð>bbe4d34875e31dbb04dbP1c61b8245650ba8ef031b3500cbdd251f1d84a2aÁ"FŠðe38057477b98712bd9c735bf9d26b83c25d9d3279P681dc06615614aa94605404a58e31620d856e3caP45b6eff2a60c8bec12¡ë2dfdbÁÊð^b0afe04a2Kautuk Consuldfa1d3322ebd37bfdf69c45980e7dc5a46def8d67@gmail.comº–Äö ߀…·Œ”ëÝîÆ•­· ûŽàÞ¡·h©· /mm/à,.c: Port OOM}s do…Ð_%ª­´¶7 CÈd065bd810b6deb67d4897a14bfe21f8eb526ba99 (mm: retry¥ ~ wb bloc…@on disk transfer)¡äçÐ37b23e0525d393d48a7d59f870b3bc061a30ccdb (x86,mm: mak` ge l killable)¸ bov©TitÁØtroduced5+ into…ö¡Óí Ghandler kma¸¡! Õ " îr as well† seoreLLmmap_sem hold time, ¬G$rucial dur¡—!ÝT¸nvG. %úth.bto I#F­2á <Æáþÿ >ÿ Bª„î¤ ¤Òºþ¨þ¨¹¨,.¨yæbèP18162ce4261ef63eae30818c2eb5ffa4608ba728Pf247a3480e8e0cd7K,f4fd04004cbb* 3e11ZY âðÉ2e7b774c5849f694693fcd17cba0e78f2629P215b7d9ed5d42f8251e102d343711a5842d86047P876b2a008371663cb66637fde5274f67afef1780$Michael S. Tsirkinf255103e50249e3d658441816e0597170ebfc16ef@redhat.com¼ŒàÊ ãŽ„©¶ÿ…þ5 „±˜Î] ÒМ¯Ñ‡ anL!a¨it upFŠMi>Ë <2ÆËþ‚þªªfÊàþ—þ——2R€Ž#xP6c79311acdafe4f929f888eb3cdd07ÏÀ7ce579P0abdd4c607ed9dc22dce8610d8f8d1d016656553VM ðOa4e4f67f41d9bfcc2a00849a75712f09a7d43798Pd60de2e18f28af66c910e79a561e56c96d7782“˜27cf3a16b2535a490f8cf1d29a6634f1c70f783ðUScott Woodlb77916c2feab81a70245e7b5671f4f02730ebf64@freescale.com°¤Ð ׀ؓôÉLjþ¨ ä³ÑÓ[ ‚ý„ÞŠR‘ˆ%pfaÊonly õ funcœÜ ers’D :†, Use %pÙr actual%r¬s,f$hwise you'll get bad output ïes likÞ &64I rXf kct‘© descripto¾ Ý%waæ norm seen¡K ô64.ëî$!SÆht unless DEBUG_SUPERIO_INIT' manuUt *¡ifiedž 2Ò <407b36959ca09543ccda8f8e06721c791bc53435@HansenPartnership.coþ¨B¨>†î7-7â©:pÞpê:cc þ¬>¬|öí>œþôêô>ÿsu¡oJÿ€P8be2096c842390f5ecefc24186267e9cÁX1e2€+˜eaed54422246dceb236f0a604696d5cddb2549V£ð226d1c5b1d4c226429d58d992941a89588cce708P06229f0e1012822f1f38bf3a8b11a47d57e1cffdPc2dce2cf488ca8061df54733803ed5d508e8445b"Santosh Shilimkar÷(04d8dc9ea337ffn\1729321eda9873ade0b09@tiÅ£D¶øþŸ ߀óû¬ÏÍñþWWœ¦ÿ W þ—˜åW8hARM: keystone:°%#‡¡Z "a:{õ*²{Jõ*†îW WÂ*c_ zò$Æç:*þuÚuc">•ÕAŽa/e -evms~!¨P200c39ad1c8225508f369dbdecfb3f75b18395d1Å ×"8Ztpiv P57e527083746ed93a69624c0009af7ad548bcab",c4082d499fa2 *L089830700e34b64e3b92Ñ&q8ðR44ec9fffafdf79b258aa928892bc1408822107P283f708ca846903ee045e9f9374d627f7b47a711ö" ä›ïü‰Ë‚‘“òíàþ"b"‚³A $2+: board-Û1:"n u±6e irq¤ f¸.*¥Bœ÷îD M~¡]irq_m arr 7Ø"  iniæ8nqs—í 2,3 '4 Q)s$#Õ.cµValsÊ‹ the â2b¡½ Œ(nterrupt co_$Ø U#…"the SOCsËisüa paratŸ8p×toáR½5 evm ¤'sþƒ sup±"I ÝiF… òÖ>Q"þú¡ú>úþyþyþyÖy¡Fµ*Q :––%P2f2abfb82d848532668503aed30291ae3442>TP716e6b1a41deef7cd40cœ"(9a14c265dbb¿*)¹°¨¤ on.hz)ðRf98451a2ef55d0f4724b9d0cc39b3aa5d6d93P1f65b1871c231eb35abd289b1b1e4457367dad42È+:!)¨ š irq.!)RPd5b3| ðId82d70b2afdccc6741ed0e401ce02638P8467beb122b5a73d9c9f8d335635346371eb49ac=.DNÖ¡4--4c’$ðRPa8161e5f3204b6f41cbf94963af91fd6fc931a2dPf38d659869daf52943c7b233d01243cd2fef6c5bZ5 8e2fa61d40919555)"0ˆ*P3d7715bc67b0e855P3c6£He043c982d3071d5570fÎ Ë*c91e3a}cf6185ebÿ=ðRe00a821513802e56200378a6aFelipe Balbi^94dddeeef08b001e003cce128ddc162a4e2c6cd2ò À˜§×é îÆŸùÎþíª©» W ýû£ì‚Ït¡—3: l3: I¡é‰"l3-Åconnect x0Ü"x" 0²æ=ˆ# I pÀ1sÅ(inO>0regar"‹ ocp Šs Ê(gets loggedÉ6Á“.¶.Thò5Tfżdete1:bmaª; o)e tar‚l 'aÊ6g%, typeáuåf correspon¹adà›*;ck dumpå1á“ ü*:ð(suÊä [º : Enhac<s, majo«ù+Ù *al]>°þOáO[5b²ð: DË5çig?Aangesm%mA 4+%BB±.£ <º£> [º2aa itia2¶BA1i] Ackþ11B vêNþþÚ:”|öðÆÅ1ÑþÎ*¥2Rë"2vðRP1c3635d7f4cfded1e61086535b8729ea9c7b032aPe4c9bb3ad894616e9116d51da95a8d751d0745522<":ÍT _l3_smx.cŠ#ªˆ65bff3acb9e7c09dc19f7cf75ffea573aa4>¢x´"j›0hPba2ed9a85õ ,7df280a9ef88$b4dbf3859cVÖöú¯d·#…ÖS VagôY?¤ä’’†ô P513cfad3631e823f7fb5e713c1e3fb5dac47c7ddP0b57e29c4c9b8e471f68442c88f4cfc789105d68Pb4222e07d88715705a0227852e38734a0d7e94e5"Grygorii Strashko^bfa0a1520768723a1c60e18250fb841c342eb8d8@ti.com²ñ° ×€¡èÐÞ¦ù"Santosh Shilimkar^5b4d8dc9ea337fff5fd1729321eda9873ade0b09@ti.com¼ó° WÀÎÓÏߦù^ARM: dts: k2hk-evm: rename clock node to  s˜º20 Fix typo in F(s) Wg : "ci "--> Ls". Signed-off-by: BT >SSantos=P <5b²P> 2©†îª ª^ºÜF†îÝ Ý^ºȆH Tested-and-Ack TðCShawn Guo <912cf7eb7d8018e2943586ae6657d21fd4e38239@linaro.org> Test KØVishwanath BS <7c2638ac2d023b35cefbf2db9606a7de3366dec2í> >Ú„î•)•ÖhBêBfÆAÂd=ÿ|öÿ^º1&J1~ú1fÊg ‹ by~úg^º™Ýê*¡Åê,common/gic.c^ðRP66c7c482010899884d213be0b995eb60feead07cP734db99eaee768bb060ac684fd6cff19d33b1472ZØôi64792f253508268eb390a86f42f128d877b40776P7daf4e281e32054614337266f57797b1b02c5ceaP5ed7f9ff15e6ea56bcb78f69e9503dc1a587caf0David Sterba`a2ce9d316ca04d17b520237d2846a218b8284e52@suse.czòÂþ® x€×©ùàøJosef Bacik^631dfb3d07694fdcf26abc7aac2c6c2b641f8bde@fb.comÀ½ð± ߀àÔá­‘úxbtrfs: send: replace check with an assert in gen_unique_nameÊbtrfs: seÊ?, The bufferÝ’snprintfÅç holdŤðŸfully expanded format string, 64 = 3x largest ULL + 3x char + trailing null. I don't think that removing the check entirely is a good idea, hence the ASSERT. 2 : Da9ù B , a ¿nÐ anoj¡G.+X. For both cases this e¥dup beâ,pointless as‹would%# ÷,ing an entry#`already had before repeatPt )Fn FilA“‰¡Borba MEŸ 6ILChristoph Hellwig <9F 7205ˆ7a44b32e59bbfbea59d27f1ae8e@lst.de>.î5-5JîV VfÊÁþsús6þvêó`¾&6|†î'M'^ºY.õ„þÞãëD"fs/fs-writeback.cNðUPf45bf876579f39a0db10a154da1dd428c28a1cf5P3c974442bdf0edb2f7614c6ec2869d03e26a2193€Ðá¬.c2 ðRPc808183554a22b16fbdce88293c4ffa9b2f03933Pfd427ec0b372df1231b2a4b017d2ffe59a7284eav $include/H/fs.hRðUP09bbd38485f98759c316330b143a9760c8faadfbP82dfc5519b4b5f3245f5ac0b946fe270ca38ac6cNÉða9db2036744676e0a9bf01522ef24140a591c0f0Pcee53e63be6cff89a741f0b5117d0b0da5bfbf19P89346f950014f2c615ed96c630be2a9c8576743c Fe¿ PContrerasdde77a8b9cdd04c19c52ed62e48ab47243362@&UðOÐÓÖÍ Ï€¨ˆ³Ã­Ê"Omar Ramirez Luna^700dccb8971bf4d24fb97b326a3291f67c622519@è ”èÙ WÚØ¥„¯Ê‚staging: tidspbridge: hardcò8SCM macros whilyDx is upstreamed®îDDd On 2.6.37-rc1, omap platç$ internalsk“hრchanged, DIildXbroken again. drivers/ §/ê/core/tiph3430.c:26: fatal error:‹/@4rol.h: No such¦ “ directoryÓis…a tota´ugly  r viol> , butƒ ed until „4_ctrl_set_dsp_5*() areO 6Fe6r <ÆrB’Bn <ºnJvŠþ¬¬ÒS:%†îâ â‚*†Z²–¾/ðRPf22bc12bc0d343294a65eb364c6d3c62c4d5005cPd302e30443a4518819b5cf1ec9b0be5389f28af4ZpL3fc089e7c5ff914b2660ˆ $1a8d032498p$801P52fd19c7512ÁÜa7480fdcf4bee8b0c929a6dPc9fb8094(he57c75fb26c27fd77961f9ba257ÂðIEzequiel Garciav7248f52d8926fd82a5eb079a13afe9dd79e34c7a@free-electrons.ÍdÒÙÔ‘ 瀱™þâêJon Hu‚¨^843c43c6ca6ac047d393694d733847b0a7f0f436@­qþ–Ï• µ ·­ž¿Ôìhÿaë\2: gpmc: Mark local scopÜunc§ (s staticüÎ7Œpi markÅ bunch of et€ d… to™.cÞ– a‹F : <ê:ƒ 9{ <º{BÑòL>?šþ÷2÷ö¶2 xîéÆâ:Wþ~¶~f%ÿ 4÷€mach-ea/a`.cr P98c5d413209e505674fcb66b9f8c74cc3b49e218ë 37621788abd5ae52f996e225c6801ee0b5a48f’f–hrtPf79cbde767381eb5720932e4f439b¡mÀ353ec42P697ff4225d5db3281ccac5c3c3d811079a01d62bZ¿b{ðF737cf25bd0084ed85ccb1ab6c902e600a3cP8818413b24673e32ccda57b0bfd9746dd8/ °11406c2e69281b0e498d25a42902817299b6b3dAnñ,Gadiyar^91e‘€6a3bfb79f58158f6450f17226f60f5cee#ÊòÊΟ%Í¥žúä) úS Ú¢«Ï S0•ÎÇô’Ë`arm:¥´4: usb:ë ž it c- (for EHCIÔ¾3 - AdrVâOMAP4+adµ figu’€PHYùTLL mBD*2 Ü$²s> &"%ðCTony Lindgren <1001e8702733cced254345e193c88aaa47a4f5de@atomide.com>F<*~ú¡ÆFÓˆþÔÔhÎ * ƒþ“Ö“*‹Š!™sŠ þÙÖÙ6^ˆþÜÜhÎ* ."= csi/_lib.cf P9db097a28a74588c793c0521c7f80f8540820f61§\16b5932a454f57082527ace6$$270461d00cZõ14d3a' @98edadec344cb11e6! ðR091f5daf63P2f6f6b383a94e98f4832061e3bfc66587377a9faP839a301dc2c007ec942b73a002569¶6ô9bw7Ix*¸Æž<¾¦Ùæ ¿€—³½•©“þ  Æ§Ú W ûß‚ÓWZIBM aàÔ*è queue abs5 oà²0adds an.Jó3s ULP simply 3 a‰ object ©Ÿµ"í @each submitted WR/le "RDMA k h…þ½ nitty grdetails¡¯how¡a *r  ruptÏ d poØ"CQ.¡ð D\re‚v(,ib_cqe struc‡  which jO Òai°4he.c!ÜD( 1D" be u74g×( Þ F(-+us¡Ç $er_of. It•K(¡šou!Á -,WC¡¢an alî tive!#,wr_id field,!¨ ilar!+ many)ÄÑus^#0'stç"a |rŸastÞ!A ‰N p!89y5s\ catÁÝt's CQs 6Î#6!n,reate_cq API;$!oinAˆ6 ’eumber!òCQE%ÓFxvTvâakeA™Õ¸ üweIyE . Th+$sÓ$available:¤, ñì)a…ver$CQ>eE'pAjdth%softirqÈ ‚6!õ ext Iè!êá†æ:d blk-io8infra>˜ çcoS+armALa} budg9&,sa work‰%œ(consumer whÔ%nt y AÌT¥ user)¢.an!Õ lo.Sagi Gri!˜gHhel‘rl6ˆÑ)×rotIgc8 dion!Ñ&žbeÇ% my tw8,evious attemaÓ suckeo m[òconvere/ ReviewedQð@Hannes Reinecke Ack ˜ðCJens Axboe Test KðCBart Van Assche <89ed62d80e76c0eb24ee0d6433b48a91c2273b5e@acm.org> TNðCRobert Elliott 2Õ†îÖ-Ö^º]„î I `¾<6„zò<^ºnAMn~úndÆ£TQU„î¤I¤`¾×.‚€þØ^º  Ȇ|>drivers/message/fusion/mptsas.c†!ðUP711fcb5cec8750556de23248aa424af5f4deb084Pd636dbe172a3cf4a34cfc8b00cedd4f96582c6ea . ¡Þ_lib.cf8Pd0bd7e0ab7a887‚ðC9d2d74eb1720b869c16208P1ddf0fb43b595a75fa2313a4776684b96824150fÈ-02F sysf!*jðRPde57b8bca7be111abc02874e034ff538c3ee1e1cP79df9847edefa6537430aa1ab8e12001e48cdccb=$"6”g.cN˜P7a291f5c7227b4a30880d0b88f3159b0643734„01cf8797898!ÐP1bc9a8dac484be598ee6a„4include%£í‹.hrðuP4e078b63a9e5f655eda6b1e71c885e96f5aed6b5P3329901c72431d5da127d57231365fde3391286ftorvalds/linuxP8f04c47aa971!|ðFaf2c8816c2ca2a332cba80e4P56f76e7d1443759ed68c6720e7f242950e220f8cP857Áf€8d86ccba7d7b42c9d8aeecde794ec8a6bþ„ „ ´å·á „ ªìäÁãÓþ„ „ FWˆ>xfs: split xfs_itruncate_finish„z" S>Y guts ofRJ thã oop over0@existing extents  callsC$bunmapi on. m in , new helper,'´G rns. Make0attr_inactive[l 6ý ð6-î!!þ-þ-V-ˆþ,!,hÎc>9îd)dÆ…*ˆT2ÄIÿ_MÿOh_alua.c²,(P9115c31f26[ðC47ebdb67d08d473fc806d2f890Pd9781045c4ee2e22e2f4f148ce4c88c7439debe6ÝR޶emc.c®+TP153b4c3547a2c36878061Ôà8ef00ca43900285Pc2e26cdef21a26102bbda6b5e5ac8e740381e352´VŽ´hp_sw.c¶-`Pcf36557a1d4daa915842434eÅíd6e88e64d326P37dedcae0aa41Ág8411626f39a7b769é 6e7c¸’"rda!m®,ØPb850954c4e22aed273a34b7ba23d129f07f5dce0Pef8caaaad76fN\81d2a38f215ca3f025e4cf61¶â\0b18a097c1ba953a14940a22Ax31f8f94426fdP04cd5ad8289eee17a³2acd8á³ü36c314Z¸|48379270fe6808cf4612ee094adc8da2G3baaPS X95622d2f422c127b88ec30aÓd7fu°55P206c5f60a3d902bc4b56dab2de3e88de5eb06108þaéa ð¬¾Ååaøù‰”¾ƒþ·í·¬©†é· †¶ö¼àƒvéxÏ8re-lock door af8EH“‰»s Ø wereÐet ê> Setup Vusl$blk-mq I/O‰hò •,up if a host“ aR gle Ÿ žÊ(Ä9 ed eês EH. /surjösepheµm–tI) êJto6"ƒ9 <(hus might hl®Ptheir state. Otherwi ðEH 4be- b ¹ on!8_get_request as£ se non-reæ) sNin!Z˜. Cc: 4fbacc2fa0ffdbb11bf1ad6925b886ebÁA,d15f@vger.keþorgþ6V6 portS@Meelis Roos <6123¡6,e3855934c13dS 4cf892514I66@ut.ee2MòH*À0: Martin K. P@Dsen <0384aaef27f06…tdbdb09a807bb339f4aff9fd@oracleÑ CcpÞ{ê:þIþI"I5éxî¡\¶.þy¶y2¿þLQLfÊ‚ÁȆNÌ erroý22@ ¡ÕP9%ˆ468225f372bae07f36d4779f8c19f27c6e3¹ 01e53071600c7d- Pbd3d71a16f4d67bd829a8ZµžyÀP8e336f66e4e712da3f6d3f9a2f87bd2e0a8ca781P681b1cp8e3c7e2680c93e8188c5ee34215dfþm"m¬"͆„áþÍ&ÍFW:«k$Ä&ÖstartøG:b >5 i©aÁŸ0 length wrappÂat ev¬ to ab ×oend_waié“ tosspagesèåkátwo;er·I&–u;ár>i›dË&s leftMÊ IRIXE re we caná¡ÐD{ca # justr6— (aka4‚_6s)&5as we w]%toå›riCall¹ ' i_Ø ê‡.Sõ<cor’ alignme @too large offsets ¸finá¾2þ·%J·%òá6:î­­ þ¼þ¼Ù¼"Û&þþþâÅ<Á@vOv Pd48b7a579ae14bc4f90290535a0f938213571edb¦—*‘FÅZKDa098a20ca63e29bbd0d<6e287d87ca796fd8!šur†ZK(964cfea7768X4afb26f818b8a761ce652b1"žu†*¶K a4f56a42ef90586438effd244f4697e86fafd312¢uZÞx635d98b1d0cfc2ba3426a701725d31aèðXc059aP2a553dbd044567d3c76a3864dacf1bea62159358Pe6c11dbb8da81c599ca09ef2f6311220e068acd8þ«"«"ŠÀôº «"­‚Ñ˸þþêê š·Á¼ W µêͧšÿp‚ : "Í (nr_phys_segÅ2 ass6*&",(_init_io–Î; l, I shouldé·bs! lledn"{T ferå , so¶ ¥'a7ÈåÍåó`rá to2‰þ Š þ    6î||þþVþÐ úÐ 6] þh,Öh,*T ÊË)ð^03076b2b5debf07915a0e3635b2becdf473d17d9P150d7bb675a1ce505e475edcf2cdbb7c721f4d76torvald¥ |P9cb78c16f5dadefd8dc5ba0ae5a2fbD59419b3P576b86c3e™ 4d7176285ec2d27Ý a55f77bcç¨55f516bbb983915d6cbfb5aa592cc0a5a99fd00:ÆK ¹Ö­qˆÑ¡¡ªþq±q’´±q Ñô’¦¡q*©6…464-bit LUNsÔR$ The SCSIÕndard de« s?+ ežJ à 0arrays employ.% 8or hierarchical7 numb¡becomó&fü&¯on¡ o upd=%he £"ck¹ 6ØW;62:²î¾1¾1Chris.£5ª_2 infradead‚:\DEwan Milne <2c554dÃx6cf1e2efb352de0d4bd0940eeebcf1@.g%:øî¨ lst.·2>µ þœÖœ6 ”ò 26ƒl¢2296’€þ½fÊó>†ò-ôÂ%3¬J N "s(ata/libata-n.cn¨P72691fd939483af322e4460d8d168b2bf5899b46PÏ ˆf66d70fad3a52e37e2cc87bc6c5de5e8ec7.(”.hZ 5ab3aú$ðC1158c9ef0e618d6c8ae0fdeb373bP5f4e0cca56ec5fdc2cf539e3052bd9e2d51bcaÁÈ%!@ŠFÃ1basÝ&†"P585bc24cb7ab27b97ý050cc0436810770`Pf37ea6fdc6ae8f4187cb230á•07327df345c266*h h: dP1d31d7284cbd0b3e0de425ff5k Ð22677fb24aPe7de92c67cf693f04ef96135c73b819d175f527ey&8~ s390·(/zfcp_dbf.czPˆ4484cfa3328bd33d5e5b0cb028e7d499142<2$d7fbe4e907D\464c63e8f3278c78edcb838c.S4:Zšunit.c~ P39f5446f721657b10ab684972c091cfdcaf51a9cW"7d3d203ba1db0ade520c8f225acf04ff6fd8a..*œ,csi/53c700.c^Pa3adP5P57f54fab69a12fad742e1¹X328cb50Pfabd4be2c98552à0b8c87A,91086b60dbf4]`,2ŒNCR538^P93d1(a2937ý@bbe549a539a9add2c}"7c¡ (da3c8233226F,T49e7a2609707f9eba43d3b.Õ”%Žc406a.”F4Pc91888a0a23ca,"H354f0a3f1fdd1f3a08c8|P10c3374d759fc23b7b85d42d2374c6.b042”6 a100u2Æ#^,P0163457c12bî 8be5859ddc67a99aD+5338fAØ422570d297ca370AÜ 92279d734É76701492=°yrA<aacraid.E×vðRP4921ed19a027f819b731271c4804be74d2426e5aP63f576c9300a7d317a0b7b5f0b8baa3090dd070dš:ha152x.cbPeóx6a921fcef1fa54593fa8bcff612d44f#š47b72f780067153!NHe4bf8dd5cb3b6efedf1.µ)(ic7xxx/9xx.h‚ 4P113874c1284b102f10ò, 75bbÁM¸73cf82Pdf2e0e5367d2f7b292c3b522372206255acae19.zD2v–ž_osm.c’$lP69d5c43a65e590e8b91651e2944‰È282837ad5Ped333669a7dccf811be095a270414974c37b3836=ÒFr¦prog’%ðRPe9778b4f7e323fa0331d564aac80707e24081c43P27dbfccea77473c3cb68699abb24752b9e8e2f84¨fNxx†N$P114ff0L1¡ÝðC7dc23a5fc4d9238c79c2bd4a1Pd2c9bf39033d2dfd2d427ef1be833d96167ad94eù,f)‚‚N%|P383a3d11652d67f292a09d624b5f081~),e80d6P64eecÏ-0a83a2f32328f6K.1c6ˆ6c5e9d.]'"Ø i:rcmsr/ _hb¡‹†"PA(41b4ddbd541!ð>0593354ec0fd0ad0d0617Pb13764ca23fdfc9d272fd780c0bd6dc121ac9a24=ð:ærm/acorn°2v¥":ðFP2e797a36760879a685949fbdcb7671e9bb496036Pd89b9b4deb3cdd659b615cd87054S 6bb61a4.ž6À|fas216BÀ2Pb46a6f¼A 34913afc31af580b4b194678e994bP71cfb1e504ÃFÁóT412b99cd09d51e969414a0=.06Ærm/D.cjdPcb11ccef54e5104d2825d4e6b!¥Èefa8260672cP3441ce3ebabfed136f79560d734e37874837172u*:Àtari_åìzv$P1814aa20b²<f0f¡4h2b065e91b8a2172d9a94P79e6f{,D2a911348a99dfd02a8Ž 603b7168.€"¹?E<bnx2fc _io.¹?^!TP7bc47fc7c686afe9d4b88 ¨475b39e8eeba2a8Pe0bb209f3af14a5a28d443b08bóa 41dde3=Ì6•>cF J P2a3Ѩ2ce0471957ff4190b51e6acfe7766b2c2P7e952cad¡b2d83ace„ 2770802Ý3 244c.¤ "6 %csiostor • Š#DP7494e4bc69ccd6a19é*D8e8d8b8f77dbb6abaa  6103c8475d8ed649b456b4e9f83b954a3b04e31º¾6b dc395‹Z\Pad7bee069d089769b25d033F1,3b8ee81bd7a7áìË+ˆf2338e3ac8c733235f8a2af24d6ce525ba¹ø6 dpt_i2AK^Pc0a57a3bdÓ xc2be1c99376c86514472333P67283e/Tac86fcc6d169033c7b68ae3 9d0b=&2dpti.hV,Paeb046186c8n. 43e204736;-05f856fú$7P1fa345a7ba1Š2a5264d— (dae15a49b1d.6ˆeatÁYR@Pebf57364df912a7bè(078a9775fc1d2d˜˜d7eP0f27ac19cf0164f5c0d62054bc1d6b2fd1¶ a.ˆ&(Á. fnicQÊz˜Pea28b5ca4c734a10da01237fcb880e2a5874c8 13f88f565Øhec302a85b28beaf00a92f499779.^6ºg½$f,Pa1bc8ca958eõA895b291eA456141afbcdPb3þ 2e93bc4dauDa51aadd184fb49ec7c?Bb>hpsF¶ HP31184b35370fe47209õ/l2cdd7979b428fd07fcPa59e1e02 ,16cb13caf2deG'(9d82636ad20.Þ 6Éhptiop.>B4Pee196b363d81ccT2c85702bab0bb929d43d1b¯2hedb62c21b296f08c86cb9be52f9þ f7bde25.”6Œ in20JÎPb1!m31137d5Á…`a1c86a04a27d2be47af822ePHE694d87f0¯$04d7e62cdcC 32da10e9b.À6 libiA×.>ÙEPù8c67bac9dc58ac11§85694e568b3c3ab23|f2db82beb6468c4f061c80ce767bf284ƒ6259.06ô $libsas/sase_ _hosŒ’%P¡“f12742J=`ce9155c9e21c6b3828c193135$ áã$19419a746776t11d581acÑ 2049]Ø6ç lpfc§2.%BdP2df11daad85ba2e64f6f4284a‚<3384ed8a2e7P786=$086193aea9š 0e ,d1f87eca1df5œ6Ô megaóBÔ|Pb7770516f4c2dc5ebfda41727fec541B= ff6c +¡¬P6b466e59392d62c168de3‹#fac9ffÔw _comh:ÔZ%4P5ead1283a8442Dd432a6247e4020899eaqdÆ037ed52cÆbb L64d10d7ce2c8c9b010eb.¨V2ê2ƒ!A_sas_˜I.c¶-P22600A`e9" 439be2aaf68cc91„ec!81P3ed03dfab76c¡áHeadb43aed51be2dbad3¡.P6mesæ RPe88 e327Ê$3a!Dc33d987624ef13d4e4ô;P7a6160f172ce1b44b9807– 2ce528c5665.È%6dncr53c8xG f8P7d014b11df62fcre5d8de7 Xad4facaa4499Pa7305ffc3IJdf098647253be030ae33243e631.¬"e‡nsp32.cZ0P0665f9cfdb02?°5c4fc415f969ee309c37690eP50b086aef17836b75c8Ï<8b990e938b3f2360.\6¤ pcmcia‘_c[@v@P987fbb1b244e9661z(01d6de906f40a1ea\ 0cefv/2370a27UD7f376bd9357d70896a.à 6  š sym53c500  Ž$8Pf5b52731abd946$@768c224f950fee2b6&145f9573021f08afü H3b9abcf91db2c151361.Ú6ü pmÆB,(Pbe8ce54f99bcba427ahJ4f4873b987329c8~7f8b95ºD3de6c64ae49f73d79e 7dfd.þ6Âps3roZlPe6e2a30493e69d6baeda15585a8¡Ø5fd26c6fðfí@be3924ea138db50d1†:d0fb6a55ò½î6¸ qla2qla_defB¸F Md0p8d83ce995701183eaT586d528aeaa3Pb6439915á7d9e6a4ed4eb842561f1ffaa11ce2Ì fžgblrž †+4dea8fab1ba50a6g)c6j58ff73cb584ec2bPÍ7=1,da64a4b09606d1 4c02òRd.<6¾.iocßN‚!TP76093152959203227e93eØ 019c687f31f711û)8150529d98db47f1sNLc73db49a4a1cca76747c.Úf>is3~ Pa5682z:018f31e570e8c0M dcbec6f09Ô50a–Tð 01605a6c6ec785^d50d5af.Øf~mb¡ö~ PÁ{aº5c23582h=284f1Dfc6407d‡@Pd9aafc003be2384‚ 3a45,2b461aa4d441.Ø6¢ .Üm!;zPabeb39M8‘H714e99284d3c4a1e7dbá/X45P4775baa8b6a00dd6a9f\&<49efcdf9d61dddcf.Øf}oÁz(P5269aee1df(36f1f1a2956’%PP0102a2d70dd85912db42¡ÏØb1808cd6297f6babP126bf260500f8411c9a7585895eee3959f1c66(6S:¨sahŠ#˜Pc341f855fadcd433a6730db0d941317615a7feB T9a058194b9bdb2ae370796á(021a9f4dad3350.L6‹b3Y,2e01a9dd26faa‰ c6c62160eôE$af4ec6384dKZe183dVçFD8213017521ecebc75cA5a.Ð$66Pun3*7r0P88220794cc983a&89ã*,2ef1bf190092F$,a2367a1b1f29R%Pf1e5065d947516502ce792: :Cy‘8xx_2 _glu‘’%8P6d3ee1ab636218¾99í>,60fe9b4b8754@ ×) 253dß@e51da5401bb9f9731eú*Rvƒ hipdBÞ>Í %(P5a80cbac3fáC0a493c531789d7 ? 87470089f´41b1758O!02bb9c1fÛ,cf1587b63cea.ž6Btmaµ^ Pb00á–|89ba1186ac3bac5a69c9ca71a17da883¤T64575726c856702e7fd3fd¦d988b7J[23.N6vu14-34)^•S€3bb3bcfef1ce815cf11ad7224e586268513Pú fe863fc4553e09a6™ 30382d62, 6vwd33c93B^ P418… 7931d1d686478bÁ{c£b8f96j (0506de4f3b6j$1a¥%5äI,ea2fe0c70329.&B 4taging/rts52086XV l0e6010372ad7f77390b072bbb0b3åÁÄ06P77020905a7 b45ce7a3åKcbcÊ a25523.ð" tü>8t/loopback/tcm_ .*Š%j$ P8c6a{P76a9634e34027098ef3caUe ,91eÖde9d92b 28a5bF¦ ‰_core_pAûJ¦N$4P94d00df28f395Aw@23c43b8ba6ae4e702 c7*˜3b1dbe859afc1c34ef6c9a7c9cbb2caa091527&ê"ø usb/ û`sddr09.˜#R P073˜MD2ccc4eab336dd064e3ß5 1406$V9f,38a4504ce450(e8a2f71ab306ef73fo1a72," .|usjXPf1c96261a5019669468968øe411c41a8aedù'x52a92228119a6afa849c4853e87eb3b2&&i>Ý_.hV.dbef1172ò™Oe2eTDLba0cb91377601909c69dó'3e6![½òâMM027ab31017f090¯$f3 03 ´0a44a793ÂEa38f7b3s# b733Ó`4e278f5570636c2.x:B&: .h~P!À4f4bba53cdfd4447535a14255c8f81e117adP81292392adbà142de2— b14b44f33ORV>T‰¨·#…ÖS VagôY?¤¤‡›‚ô P22ffeb48b7584d6cd50f2a595ed6065d86a87459P3c5d6e68592cc599210fef2582cf09182412fd62Pc309b35171ddb5384cc3f2f9dc82a96dccc6b7f6Hannes Reinecke`b0d1e9e4a4e27620745ff49be9000da3174a4cc6@suse.deš¹ì¸ ð€µ³‰×ºý"Christoph Hellwig^923f7720577207a44b32e59bbfbea59d27f1ae8e@lst.deŽ´Á¼ Wè¿€‘¦šÿ^scsi_scan: Restrict sequential scan to 256 LUNs¼ º2 S:O0for more thanZ¸ is very fragile as LUNs might not be numbered ¤ly afterOdt point. SAM revisions la !tn SCSI-3 impose a structure on q larg Ož, making! €s betwee L and 16384 illegal. e<, however allowsú,plain 64-bitO4with no intern!x–Ëo rJ›LUNBŸ Œ|add a new blacklist flag 'BLIST_3LUN' to BPup to max_lun devicesz4igned-off-by: :Ø ReviewedPð@Ewan Milne <2c554d25b86cf1e2efb352de0d4bd0940eeebcf1@redhat.com> : B" <923®"> 2V„î÷ ÷Ê=*€þ+fÊa> †îb-b^º”Ȇ  0drivers/„…».cjðUP8564bdcd22879673990dfc4174ad13bc61aa3d5aPa02f7b0976ed832550c7b22431e4d99f0d9cb849’6include_a?nfo.hvôSP447d2d7466fccbe93f4e75d7773d982f67e22584P8670c04e199e5cfb1090a531f9a12edc8d230ce4torvalds/linuxPecc3bc982596cb66bd7f951e2f36503f4841c0c8Pd2169a160d6dd4ffbdb18748f2b726e50a46f879P049b3e81d79b1957c0be231d66a98b5715b347e4Matthew Wilcoxded1245e46949b7b8f2bca25d41520ebc8aeefcf8@intel.com†³¤³ ߀ÛÓ“çú"Christoph Hellwig^ºZȬ°¸ á(Äð÷‚žýrfuÁ24: Add free msgÁ“mes¡/ the head,Å‘tail of ¡cè â< ReusÁdapð< quickly means it's still cache-hot. This yields a small butŸDicable performanceÁö¨rovement in a well-known database benchmark_im64s already pres I!mpt3sas i¤F> MattY AckedÅîSreekanth Reddy <5d8ef29afd278d20bb09Áæ@2ee4a503e91e9208@M2š:þðÅð Fåî©-©dÆÞ6óšþß2ßpÞ6—’þU`¾N AcIN„îOIO`¾‚F,þÒ"@‰m$s/message/©ƒ /mptË.cŠ"ðRP570b18a113ffca813d2588dfc02c879e626fae9ePebc0af7d769c01699aa8077b4231a5f61fc86ec2Z‰ðå5565461e30c15525c431814dd612118a78d05992P6bc1fd956899655443cc607e0b367bead8a222a7Pfd10ccfa8dfe740f61471b212c200fbdb5de82f5Chad Dupuisf2981c6370b0db855ea005c1823de8ec941ffd48d@qlogic.com¤·°À ߀⌆¬†"Christoph HellwigÂá ð<̃ÃÁ Ç€–ìÈŸÌ„bnx2fc: fix incorrect DMA memory mappingÁÇ ,$_unmap_sg_á()ê îE Táapatch¢AVd oá;problems solution from Maurizio Lombardi where ¨4 isn't consistíAwhichz “ we (for DMA mapné operaus. MakPem.Xby Adma!8 in ‰ !1 like)I.˜v*° èddie Wai n‚Cþ0Ò÷:þÚ¹ö>±ö¾ /iå _io.c†!eb8bd7a137f597977934aP0679782d9d15e05b5d536c95b25dd643249f142eZôð†acafd0b920e1a4a94a03f1911b4fb87e2081f235P703713ff121b345a46ddd061593992dc007a6aecP19c8ead7252bb66147011a361bb0c74dcb18e213Ewan D.gÖ<°Â„È ×€Øšå×Ù„þö­öDŠ¥ÿÊ x€íÈ’¦Ž†T!Ï,_debug: Impl¨ XWRITE BUFFER command´¦- Accept¦ NN¥Udo noth… oth se 7`appropriate "microcode ha 4en changed" UAÁ LU. >FÁ$an earlierÍJ4by Doug Gilber°6kðD.)·Ú- :l) oÈ <54dff2264884c355ed5584a3b7f1530b4b26921e@interlog©¿ Test¼¿þSS:øîP P  F±îO-OÖ<&² Žþ3-3jÒ¾5¾þŒîŒ ŒþRþR>R22R™[.cnˆP1435b15029751f6334079a8fbfc8dc6cb9C¸f3P8bcf6ad0d765a0a7c2cc10980706f4f9419c8750NÃð¼cb2fb68d064c16a559483651132815cc378fd1f9Pbe186c470b27cfa9af56d464423cb3655bdacf1aP6bb5e6e772f5f71413e290eb9c6a475e9a6d39e2Vaughan Caof4153aaee9564c8d7cd638e908a6e859a1825f40e@oracle.: D´Ýì¸ À€ªˆ«èºýþÁÁ †´Á¼Á›˜Ñ€sd:¡ÊLify block layer when temporary­·ÙT_type¦ þC6 ,Á Á ÁºÈit 39c60a0948cc06139e2fbfe084f83cb7e7deae3b "sd:@array £ flusŦbug cau7.äp˜ $s" We mus=!Á–.via q->R_"sJaB# of?9'!5 writw 4rough. Withouá2\is, a SYNCHRONIZE CACHE !¤wÈÎbe genÌ edrp factors Sa helpé—t –be calle¨Pom sd_revalidate_diskå×±_storP6kVaUÜ <4153ºÜ:;þÂÁÂ:¢þSSBØ‚îùùÒ 2^ þL ÖL þ€þ€B€"6€d.cNðRP14ec8f521ae897af35eb592c72a949b384e8096aP4056004102ae4a274e6e16ceffaee5be2ae2348dZ¶ ðU029165acfa611a3a8838723f6978586ae35ff53dP0c7b4af43ff9e96a66817ca63413b0c2138a6d71Pe8ôò,9b6da1cefc63áðR590231c300059"Alexander Gordeevf840ef8418a045e2a597676027ea56f79bb2faf20@redhat± ¶ûµÍ³ÉÛë”ÿþvÍvPº×¼½ ߀ÅÛ§ŠÕÿflp­ Remove su¡luous…£¡+pci•’$_msix()œÊ6Á[ereÁ\no need^¡ JccÓ¡úp!>“en’ failed >€BŽ <8ÆŽ>*‘ JPØSmart <2b041da073ece526f0738329b43a4ebfc495c725@emulex.Xþ#Ž#Žþú úÒ¾&< ‚î1%1fÊgþ(þ(B(:2(a?_init.c~P06f9a5bÔP6d226c8c2e447b924f7d2˜4aa6Pa5769a9960ac0be0154bfa542144205f9m8e9Z@ð}fc4fff82104fa096eada73943fe5249500acd5faP5fefc3512525a08d9a77723706cde96d001c167fP6af502de224c3742936d54eee7e3690c09822934N[ f¢tuxera¥@ÚãªÊ  Õ²‚ÀáÈþ@­@FW8 >uŽòe[Ò*w $fs/-]/Åã.cR¨Pa7bf89e85b3bf279c5b3ae2be816eb5a742278a1PÛ% ea23Hl410e8186af0020c5003962eb0766^gð6089ff87d309a8ddb7b0d4dd92a570f1b0f689bP9b2c6b43885899e055e7b64b88e12bf3759e3794P13571a6977f821fab7d9c3cc5f75da52b7732e40"Chr:üÒg¨®¸Ë P´ØÆ¥ÉþgmgFW8hqg fix link ZupG¼n HFS i—s hardC"gTindirect catalog entri9#hat ref¿ oË idden .lyö e t})tߺ dÃa¬dev fiel HFS+i fic e¼, Gxlso uso*eAicer)… ©%ce file®n[side 3 pass‘(nçvalueî#ë‰Y-=_cat_‰v ò½ func!l . Now ifåhappef+7ïFdwhile e’is creat¨.w0y we'll get a)[p+,o` DidÒ currí%˜eÀ silyu$reproduced!úa/+ en loopR8local git-clone.­ Stop ab»êVÝ%Ô%ª\short term storage by re!Óhe wayNpermisÇ% "G+QG6#isáup,,renam©ò’to%>ih avoi/ygl . W!›!p re at it Eq !R5¤a2MK&Ñ~Q€s¦ E×~A-¢sha.-e s spaË%níon-ìê!`6áþšþšþš^š""(Ýš-â.cZ¨P48979c4e8fa5f273475a8d6abd81fcbf0dc13c7cPó˜94b0a07af0d01386ce600620b22e64c67ad4* Šdiá"FðP5cda96366acf2b2823465ff89d11f72bf8162abdPd92f590d6633c265cefbçV m8ontains multipl¡ÙstÜ*s £©$ kb (Bso ed "¶ "). ¥o ¨$_l2_rcv th áXaÉʹ$extracts aòã#[6#, modifå(¡²d©1oys) itsÁ%á#t€ proc³B0next one. TheQ$is xisËis-t,8remainÁøiþ R orÑ ed.+&›$W R_% 0_check() befoÁ½d& kbÁû the .ÛH. [ 6286.808725] -[ cut  ]-24,9] WARNING: á..A/4fc_frame.h:1736ž_)ž4+0x425/0x450 [)À]().c(48] ModulesåCF :N[Hardwk  ¨: HP ProLiant DL120 G7, BIOS J01 07/01/20132H2] 0:\b36e715 ffff8800deba1e00815ec0ba2T3] !2388105dee1a05618c T,8801e4c818882T4 Te8E66386 T2f402b18Cf56bc00 ¹4BT(Call Trace:2o9] [<kæX>] dump_stack+0x19/0x1b.:6!Q.: î,>] warn_slowaM×$+0x61/0x802D3:~05e00~6D null†a/0x26B52B$a054f415>]¥VfEyrcr¤2‘7BOeff0>] ?Qdi-+0x90/µí2I>X$085aef>] keÁ/cf/0xe2Ï702€872 € 9_· e_on_¶ !4‰146J>Ÿ05fc76c>] ret_ _for!Ü7c/0xb6=B˜85Ú‡4åŠ[ end tA@ c6cdb939184ccb4eå“BÜB  <Ê .Üî¥*¥* SþÀ&ÀðFlst.de> Cc: 4fbacc2fa0ffdbb11bf1ad6925b886ebd08dd15f@vger.kernel.org :°$Žþ7-7Öú .în%nþ,þ,ê,CcpÞ׿:*B2‘Ž _: .cŽ#P7Ù9l94107a9cc44fe8269f55ab72e815°e0bP72533c58c1f3bc0d6a18412a197651399abbf6a2R2)&%Å·#…ÖS VagôY?¤ò‘ñŒôiP93f2bd67b34b4f6b35b2300d668d92e3fd01163aP34bd6990d22a871772af97b612344f667167dd99P9d35894d338abc351cad8b0c0d5fb3e992f5cea9Quinn Tranfa02f5c47ee09ecfb205631316fdc727ed52eb140@qlogic.comŠöžÂ ߀­õ­ˆø"Christoph Hellwig^923f7720577207a44b32e59bbfbea59d27f1ae8e@lst.deÀ¦ Â ð€ í½Üødqla2xxx: Declaration error cause stack corruption.èqla2xxx: ¢5 F+ðFof mb array in qla2x00_iidma_fcport cause data to be written beyond the B4. This ends up8(ing stack c¾@ Signed-off-by: 9« >PàSaurav Kashyap <88d6fd94e71a9ac276fc44f696256f466171a3c0@9ÿBTCh:û <923f7¦û> 2ú€þûÖÜ:‡ˆþ2!2fÊh>‹†îi-i^º›Ȇ4>drivers/scsi/exx_init.c†!ð¤Pd5b10ecde4a03e4a9b312839b577bf43442c7627Pab22ccf4c7d3ad16db819facb88a7890f2f32663torvalds/linuxP5533abca06e07121697ed1d30863ce03e7c518e5Pdbd493cbf4a526f5aïð•72e5cac2475763fe598Pef300544723b3be25c877e520b23831eaf7830e8Tomas Henzlfed03cdd404c1c501a81ee6cc756c3c81b1d33f83@redhat.com²²é» ”€áŒœªðþ"BÂÎÀ ¥–½ ߀ÐÇ„ãÂÿ4pm8001: honor return valueòf The I ignores ñrB¡:a lot¡O places, fix it at least somewhere (and ree LsourcesIpsuch cases), to avoid that ba (ings happenFJ=’ Acked¥–Suresh¡ÔØagarajan <1c59eff352937ea4511bbac0a612db009801ddc9@pmcsáG.QJÁØWang ‚îB%BÒ 1xŒþy)yb­†|ö­dÆâ>Œþ Ú ÉÁ@iøsÉ ‰% _hwi.cŠ"pP92943797d86209abe0cf8ac34095aG64548c1  ef7b504d8c12db970dc88879b8f83db2bab1ed1žr¢xx~¢"ðRPd70587f961845c2a868f4fd596ef1a624c5d8a30Pc711a769d23ed55a4521db82fe6add2765bc6976Z¯ð¼1aee383d5912de15af3045a63a07e98f760f041cP6e58b144d1354008d497e3cd0003e9cb6e69e090P9ab9b134a86ed9f897a29d2ba5abb93f7b162dcaJoe Perchesh16a9a54ddf4259952e3c118c763138e83693d7fd@perchesÍØ ºÀÉYŒ‹Ðƒ‹þ  `¼ƒÃÁ ǀΛÁŸÌ:lpfc: ý Š zalloc_coÁAnt²r  UÍ[ zero„ ReviewÍaðî© © >6B þââÅ U·ÞùÞï:@î2%2ÖÛ :m!îKKÂåTP83c0a7dbfef3b5d874bafJà468091a9209d1d5P799393ede1ccf6c8fd8dcdb0033290ddbe5cef4aÝB2@2|_attr.cŽ#tPacdae33de52188f6ee16c3cbb55ef[ |6e7c101P16422adcc531269bb6666ba± 050dbfc333eadc¤66¤hbaBN"ðRPfc0dfbc70febbb6ad29b83804ed9616552f51205P1576805efc7d3b83bdb0ddee890cf3dead280929Z´ðÂ16070cc189c5e343696c29c8cff779e692cfcb8dP00edc84093c62557235b118fe8e3e28441c07dceP65c26a0f39695ba01d9693754f27ca76cc8a3ab5Douglas Gilbertj54dff2264884c355ed5584a3b7f1530b4b26921e@interlog.( ¬Ðù¸x Æ¥ÈûÀýþ¸ɸЌ´Á¼ ð€¶†¦šÿÜŽþQ-QjÒ‰6òˆþŠ!ŠhÎÁ6ІîÂ-ÂfÊø>¢þ~ Ú~ &U"‰KU³JðRP37fb44b0074b6222c1fc777e7e139be3b3c98584P32425ac61096a934ff5aa932faaf814ffda8fdeb*Ä"includeÙ sg.hN€Pd8c0c4307fcaa084d2e3d418f869fbfaÀ4b72P9859355a7cf9944a0fd27726b602733b2eea243bNÈ$26cf591e6d ðU07495b7bcf20a557b316811f00Paaebbfe8c9764f981f2fa540b6ae20c79dce55afP678e27573237a0b0ªfdf99e5 (9b0c403c3þ¦"¦ Ò¥–ÄO±³Ü‚î‚þ^^D´Í™Æ x€ª€åÕéƒ~›"§áŸH_RESET_NO_ESCALATE ÅåtoÁ]"ÂúB Furth´Ho a January 2013 th;@ titled: "[PATCH]N‰h should only perform reques·opera¶0" by Jeremy LÁñn aí~4(v3) is presen6+expñá…exist´to Mÿ "no_esca{ " ö ions"28[ts.-Ÿino1!d$ low level0$s (LLDs);  ldds several more finely tuneset opðs $ `user space. For example: áÝ/* ’Ê remai¼he same,È ç!semanticP * if'â (LU) ò f{ T{is: onurçX‡ * target 6(Sa Bs, , bu‚ set,F$I­ hostˆ LLD Ž . */&val =6M(_DEVICE; „ =)ê(,67, &val);1MW²ollowsü a new-’g G by ÿ ,series. Only * a1LE9K(ttempted. I!n1!y8n an appropriatë  *codÓ provid@N.B!”I   e1àon†) |6 .8æE2Ú$:þZ VZ 6È> þYRYþ1lÖ>6„þ?1?fÊuþþí$å‰áF2mg_base.c–%P88c80efá“ðFbec5cd147ff54ece05741c26ff135Pee8ba5c18d6dcf1b74631900f1070a5a1fed831cÝ‰Š¨h–%lP3faad454cba0b933d4e724ed5a7ä(<330b94691P72bff!$303d2b2be9(@a4848f04e15b50b83¨JrPcxg.cž'ðCP41a8f84ec8a48148037fc1a3193d4f055549ce87Pc43815b1a48513515aaf68b6ej/(78c6bb90d96¬Dv¬tþŽ$àPca4e563c01ddaf12c44663e292db596b021e05ebP4e509604b57164¦c52a7|5776a637bP2²¦hЦðXhP7f842c88abd274d037d4dd63a2d8e19181f6daacP46b2fc5b74afe2cd22e5d89f5db554bc9796e31cÈ1HrL debu¿–&,Pcc57ef31d0fÆ.`958374d54e7567c27b29f1f93,$$7120d45648œ\c9710b6673ff08fc1bddc617=Pv„ã h.cš&LPacfaa9f3913f9cabc03ç#@a23aa6666da4fd677avL26147bbc646535c643adÏ+ 001855a17÷ . PrTtransú5.c®*e689b³T3ea541b93f94cb50cc900eë€a751Pff2500ab9ba47b084ab7d256620J 6f9525f10=\B¦å¡NbN>%ÈPa428fdd43df5182b4ed8b2dda1ac54f115a73d3eP549b5003i bca01bì0 5931P13a13!(5¶¨JNF¨ hP4fef8f38cae478cf2991c902a8aaab77137c4aA¬˜7e9f55baa1bca136fb7f163f3a70094668f2d37.üBN2ƒfNV'P6582¡:ðI3ccfd2b97280d4067803447342b7564aPe45c4613ef0c3a4a61aca89f5e18d3ba9f4d19d6=TB*6…VNJ$8Pdca14877d5ab38 95A D91959aef5c999bffP:(P8a76d23d65c688f1a3e8fÿ A090c4ac8²¦JNB¦hP5f3x.H7c2f813ad2a659c8d61¡ÇT9d075e0ae9Paee99ce67e d8355e51eedf98039a4488d399e¦B¤2‚bNR&ˆP4778e7dd98bd4d4b5b1510b572244d70cdå/76Áž˜8a63fdb304f9c2c732b95cf8e89f48d5783f31jø2„bNR&@P5aa2ee96d0e6c672MLf83b70611e308508c8d7*€97e3286719d8150a6dbf24bcd062f2eff$.šBN2„rNf* 3637<*X0171190f34c0050e6a1682c¬72d523±7:ˆ igger_diaR ²-XP8a2dd113f401098410d80a¡:\121e9ce5a73525Pb60fd7a3²Db403869b09f9f0427báce560a.dª‹h¶-˜Pf681db56c53b801ed3a72ecaf837eec5b1aca5t6586a463bea9730f14820ccc5bf291a05bb94^’%b3e89>|19f1e0422230915dcaf23a262628b7PF f234Sca066ߨb2cba3a5d852a04c53dPc6d4a83177d1c45ec42d65ff48d85fb5b7á‰26´9ðUDolev Ravivn3d75773ad3271f97975e35fbf4c96b1bb9f3e9d6@codeaurora.org¦Óýº è€ëÄŽú¼þþ%%š¥¸9µÙ¸9DTufs: Logical Unit¡#Í$ depth¾ ¦-3$oe UFS³ s may sup "q%t number0 zs hca? ‡dà LU. AÝ curr$i´menta‡, SW ébK eachX¹.ˆ LU's`,ordµd controller capability. Ih%Ë,Lç%)$ available ¢ iŠad ¬up)™& &x(SW structurR5$UC <ÚCB5E’X Shvili <3f4cb7ebd81402aU948a2Ç( 013e30c87:™.n: Sant&Y <”*6d67415f4e0797c43b30d17c4ef5283c3fbb@þn:ºn:Š}ŽæKæÈ:›,Œþ†)†nÚÀ&;|öÀdÆõ>þ,$î,$,$,2¨ufs%ZP1545cd7ä 07Pbe1e1caeb449845bd7dc6Á3fafcf¡íD4c6d1043ff8c91a678›9c402465b.~2NvhcdBË © P, 8ec25c¼4 303ef4d75.05974bc6eePb3áyf5t0a67dbf79f3d28324371cf1d483a1aRZÓuœ;·#…ÖS VagôY?¤ä$¸#ô Pb6b41424f0ec28e9a167fa29b003327860b4b71bP9e17a235542259f82a3a1e2c3d2a1906f2ac4749Pee52716245877b821f5ddbb3ace85b73084fb450Al Virorde609eb4d5d70b1d38ec6642adbfc33a2781f63c@ftp.linux.org.ukÌ­¸Ë ߀–隥É"Christoph Hellwig^923f7720577207a44b32e59bbfbea59d27f1ae8e@lst.deÌ­¸Ë 2WÈ–hfsplus: hfs_bnode_find() can fail, resulting in (Hsplit() breakage¤þNN oops and fs corruption; the latter ¤Hhappen even on vali3hin case of oom. [hch: portðIcommit 3d10a15d6919488204bdb264050d156ced20d9aa from hfs] Signed-off-by: -ñ :U0Christoph Hel!ï <92šï8tuxera.com> 2ZŠþ°°vêî>”Žþï ïfÊ%Ȇ "fs/MÄ/brec.cNôSPc88e5d72a402ae2d29a8905cdccf7b59ccd4337dPfa903641992fb2d3dcae0787dd0bda4f53d80499torvalds/linuxPd98164461c885ef2655d8edceec4f8f403c0b41eP2309dcd9774a24bfa2a8a1b91a02a7ed4cb2836bP5b4ce882d56e5356ea38ab86f6da91df4ac57842&Rickard Strandqvistvb2870f70e324b62b54eced4cf50284b23ec5baab@spectrumdigital.se¤±é» ”€â±Ø©ðþ"Chris.ë^¢ÆlstÚ¢¥–½ …Ú ÙÁ…ãÂÿ‚pm8001: Fix to remove null poin…?Thecks that could neverS’îDD Rxalsª|Be Rick:Š Acked-by: Suresh Thiagarajan <1c59eff352937ea4511bbac0a612db009801ddc9@pmcs‰kQàJack Wang >s¢þRBRò1?Œþ‘)‘bÂņ|öÅdÆú>¤†ò-ûÂMÝ4@drivers/scsi/‰ _hwi.cŠ"P3eÁð^04d8c12db970dc88879b8f83db2bab1ed1Pcc89d18e1ae68bd4bc630254666a9297c61df8f8torvalds/linuÁ-ðå67955ba36eccb5733fd289839a5a5d1dfc33ccf7P08d046a9244227992325b3c73e0bfa1aa19ccdf1Pa84d794d5c12a4c118bcdc4fecc2ca76ed02f643$Stephen M. Cameronvfcc9df7f1d62c98ed1e7091f4fa112f895d521f5@beardog.cce.hp.comâຸ ×€¹—þ¢ý"Ch: ÂßðC¬·á¸ ð€†µæ¶µýlhpsa: fix handling of hpsa_volume_offline returne ueäÖ9 Make2L8 an int insteadÁ.an unsik  char soŪt we do not lose negative error2Vs.BHSt>¥ ReviewÍJxWebb Scales <9e4adae5467e71301dÁªL6765c60dd734d04ca7@h%ðK K$Dan CarpenáªÀSvê‘=‘zò‘^ºÃ}5ÆîÄ-ÄfÊúþIþIÍIT \ &2I6.cVðUPaf51e7d4e2621065a91e6aae900dd22054f37285P31184b35370fe472099f072cdd7979b428fd07fc„.Bˆ_cmd.hfð@P649b463951c6b46d1c362b22eba28bbd51d22b19Pb5125dc3143912233213ffÁå4c0d86ddd78efafRì €‚d¾·#…ÖS VagôY?¤arrow-go-18.2.0/arrow/bitutil/000077500000000000000000000000001476434502500161575ustar00rootroot00000000000000arrow-go-18.2.0/arrow/bitutil/Makefile000066400000000000000000000047001476434502500176200ustar00rootroot00000000000000# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # this converts rotate instructions from "ro[lr] " -> "ro[lr] , 1" for yasm compatibility PERL_FIXUP_ROTATE=perl -i -pe 's/(ro[rl]\s+\w{2,3})$$/\1, 1/' C2GOASM=c2goasm CC=clang-11 C_FLAGS=-target x86_64-unknown-none -masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=1000 \ -fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -O3 -fno-builtin -ffast-math -fno-jump-tables -I_lib ASM_FLAGS_AVX2=-mavx2 -mfma ASM_FLAGS_SSE4=-msse4 ASM_FLAGS_BMI2=-mbmi2 ASM_FLAGS_POPCNT=-mpopcnt C_FLAGS_NEON=-O3 -fvectorize -mllvm -force-vector-width=16 -fno-asynchronous-unwind-tables -mno-red-zone -mstackrealign -fno-exceptions \ -fno-rtti -fno-builtin -ffast-math -fno-jump-tables -I_lib GO_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -not -name '*_test.go') ALL_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -name '*.s' -not -name '*_test.go') .PHONEY: assembly INTEL_SOURCES := \ bitmap_ops_avx2_amd64.s bitmap_ops_sse4_amd64.s # # ARROW-15336: DO NOT add the assembly target for Arm64 (ARM_SOURCES) until c2goasm added the Arm64 support. # min_max_neon_arm64.s was generated by asm2plan9s. # And manually formatted it as the Arm64 Plan9. # assembly: $(INTEL_SOURCES) _lib/bitmap_ops_avx2_amd64.s: _lib/bitmap_ops.c $(CC) -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ _lib/bitmap_ops_sse4_amd64.s: _lib/bitmap_ops.c $(CC) -S $(C_FLAGS) $(ASM_FLAGS_SSE4) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ bitmap_ops_avx2_amd64.s: _lib/bitmap_ops_avx2_amd64.s $(C2GOASM) -a -f $^ $@ bitmap_ops_sse4_amd64.s: _lib/bitmap_ops_sse4_amd64.s $(C2GOASM) -a -f $^ $@ clean: rm -f $(INTEL_SOURCES) rm -f $(addprefix _lib/,$(INTEL_SOURCES)) arrow-go-18.2.0/arrow/bitutil/_lib/000077500000000000000000000000001476434502500170645ustar00rootroot00000000000000arrow-go-18.2.0/arrow/bitutil/_lib/bitmap_ops.c000066400000000000000000000035571476434502500213770ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "../../../internal/utils/_lib/arch.h" #include // like elsewhere in this repo, this .c file gets compiled into optimized // assembly and then converted to go plan9 assembly via c2goasm so we can // call these functions. see the Makefile in the parent directory. void FULL_NAME(bitmap_aligned_and)(const uint8_t* left, const uint8_t* right, uint8_t* out, const int64_t nbytes) { for (int64_t i = 0; i < nbytes; ++i) { out[i] = left[i] & right[i]; } } void FULL_NAME(bitmap_aligned_or)(const uint8_t* left, const uint8_t* right, uint8_t* out, const int64_t nbytes) { for (int64_t i = 0; i < nbytes; ++i) { out[i] = left[i] | right[i]; } } void FULL_NAME(bitmap_aligned_and_not)(const uint8_t* left, const uint8_t* right, uint8_t* out, const int64_t nbytes) { for (int64_t i = 0; i < nbytes; ++i) { out[i] = left[i] & ~right[i]; } } void FULL_NAME(bitmap_aligned_xor)(const uint8_t* left, const uint8_t* right, uint8_t* out, const int64_t nbytes) { for (int64_t i = 0; i < nbytes; ++i) { out[i] = left[i] ^ right[i]; } } arrow-go-18.2.0/arrow/bitutil/_lib/bitmap_ops_avx2_amd64.s000066400000000000000000000225721476434502500233500ustar00rootroot00000000000000 .text .intel_syntax noprefix .file "bitmap_ops.c" .globl bitmap_aligned_and_avx2 # -- Begin function bitmap_aligned_and_avx2 .p2align 4, 0x90 .type bitmap_aligned_and_avx2,@function bitmap_aligned_and_avx2: # @bitmap_aligned_and_avx2 # %bb.0: push rbp mov rbp, rsp push rbx and rsp, -8 test rcx, rcx jle .LBB0_12 # %bb.1: cmp rcx, 127 ja .LBB0_7 # %bb.2: xor r10d, r10d jmp .LBB0_3 .LBB0_7: lea r9, [rdx + rcx] lea rax, [rdi + rcx] cmp rax, rdx seta r11b lea rax, [rsi + rcx] cmp r9, rdi seta bl cmp rax, rdx seta r8b cmp r9, rsi seta r9b xor r10d, r10d test r11b, bl jne .LBB0_3 # %bb.8: and r8b, r9b jne .LBB0_3 # %bb.9: mov r10, rcx and r10, -128 xor r8d, r8d .p2align 4, 0x90 .LBB0_10: # =>This Inner Loop Header: Depth=1 vmovups ymm0, ymmword ptr [rsi + r8] vmovups ymm1, ymmword ptr [rsi + r8 + 32] vmovups ymm2, ymmword ptr [rsi + r8 + 64] vmovups ymm3, ymmword ptr [rsi + r8 + 96] vandps ymm0, ymm0, ymmword ptr [rdi + r8] vandps ymm1, ymm1, ymmword ptr [rdi + r8 + 32] vandps ymm2, ymm2, ymmword ptr [rdi + r8 + 64] vandps ymm3, ymm3, ymmword ptr [rdi + r8 + 96] vmovups ymmword ptr [rdx + r8], ymm0 vmovups ymmword ptr [rdx + r8 + 32], ymm1 vmovups ymmword ptr [rdx + r8 + 64], ymm2 vmovups ymmword ptr [rdx + r8 + 96], ymm3 sub r8, -128 cmp r10, r8 jne .LBB0_10 # %bb.11: cmp r10, rcx je .LBB0_12 .LBB0_3: mov r8, r10 not r8 add r8, rcx mov r9, rcx and r9, 3 je .LBB0_5 .p2align 4, 0x90 .LBB0_4: # =>This Inner Loop Header: Depth=1 movzx eax, byte ptr [rsi + r10] and al, byte ptr [rdi + r10] mov byte ptr [rdx + r10], al add r10, 1 add r9, -1 jne .LBB0_4 .LBB0_5: cmp r8, 3 jb .LBB0_12 .p2align 4, 0x90 .LBB0_6: # =>This Inner Loop Header: Depth=1 movzx eax, byte ptr [rsi + r10] and al, byte ptr [rdi + r10] mov byte ptr [rdx + r10], al movzx eax, byte ptr [rsi + r10 + 1] and al, byte ptr [rdi + r10 + 1] mov byte ptr [rdx + r10 + 1], al movzx eax, byte ptr [rsi + r10 + 2] and al, byte ptr [rdi + r10 + 2] mov byte ptr [rdx + r10 + 2], al movzx eax, byte ptr [rsi + r10 + 3] and al, byte ptr [rdi + r10 + 3] mov byte ptr [rdx + r10 + 3], al add r10, 4 cmp rcx, r10 jne .LBB0_6 .LBB0_12: lea rsp, [rbp - 8] pop rbx pop rbp vzeroupper ret .Lfunc_end0: .size bitmap_aligned_and_avx2, .Lfunc_end0-bitmap_aligned_and_avx2 # -- End function .globl bitmap_aligned_or_avx2 # -- Begin function bitmap_aligned_or_avx2 .p2align 4, 0x90 .type bitmap_aligned_or_avx2,@function bitmap_aligned_or_avx2: # @bitmap_aligned_or_avx2 # %bb.0: push rbp mov rbp, rsp push rbx and rsp, -8 test rcx, rcx jle .LBB1_12 # %bb.1: cmp rcx, 127 ja .LBB1_7 # %bb.2: xor r10d, r10d jmp .LBB1_3 .LBB1_7: lea r9, [rdx + rcx] lea rax, [rdi + rcx] cmp rax, rdx seta r11b lea rax, [rsi + rcx] cmp r9, rdi seta bl cmp rax, rdx seta r8b cmp r9, rsi seta r9b xor r10d, r10d test r11b, bl jne .LBB1_3 # %bb.8: and r8b, r9b jne .LBB1_3 # %bb.9: mov r10, rcx and r10, -128 xor r8d, r8d .p2align 4, 0x90 .LBB1_10: # =>This Inner Loop Header: Depth=1 vmovups ymm0, ymmword ptr [rsi + r8] vmovups ymm1, ymmword ptr [rsi + r8 + 32] vmovups ymm2, ymmword ptr [rsi + r8 + 64] vmovups ymm3, ymmword ptr [rsi + r8 + 96] vorps ymm0, ymm0, ymmword ptr [rdi + r8] vorps ymm1, ymm1, ymmword ptr [rdi + r8 + 32] vorps ymm2, ymm2, ymmword ptr [rdi + r8 + 64] vorps ymm3, ymm3, ymmword ptr [rdi + r8 + 96] vmovups ymmword ptr [rdx + r8], ymm0 vmovups ymmword ptr [rdx + r8 + 32], ymm1 vmovups ymmword ptr [rdx + r8 + 64], ymm2 vmovups ymmword ptr [rdx + r8 + 96], ymm3 sub r8, -128 cmp r10, r8 jne .LBB1_10 # %bb.11: cmp r10, rcx je .LBB1_12 .LBB1_3: mov r8, r10 not r8 add r8, rcx mov r9, rcx and r9, 3 je .LBB1_5 .p2align 4, 0x90 .LBB1_4: # =>This Inner Loop Header: Depth=1 movzx eax, byte ptr [rsi + r10] or al, byte ptr [rdi + r10] mov byte ptr [rdx + r10], al add r10, 1 add r9, -1 jne .LBB1_4 .LBB1_5: cmp r8, 3 jb .LBB1_12 .p2align 4, 0x90 .LBB1_6: # =>This Inner Loop Header: Depth=1 movzx eax, byte ptr [rsi + r10] or al, byte ptr [rdi + r10] mov byte ptr [rdx + r10], al movzx eax, byte ptr [rsi + r10 + 1] or al, byte ptr [rdi + r10 + 1] mov byte ptr [rdx + r10 + 1], al movzx eax, byte ptr [rsi + r10 + 2] or al, byte ptr [rdi + r10 + 2] mov byte ptr [rdx + r10 + 2], al movzx eax, byte ptr [rsi + r10 + 3] or al, byte ptr [rdi + r10 + 3] mov byte ptr [rdx + r10 + 3], al add r10, 4 cmp rcx, r10 jne .LBB1_6 .LBB1_12: lea rsp, [rbp - 8] pop rbx pop rbp vzeroupper ret .Lfunc_end1: .size bitmap_aligned_or_avx2, .Lfunc_end1-bitmap_aligned_or_avx2 # -- End function .globl bitmap_aligned_and_not_avx2 # -- Begin function bitmap_aligned_and_not_avx2 .p2align 4, 0x90 .type bitmap_aligned_and_not_avx2,@function bitmap_aligned_and_not_avx2: # @bitmap_aligned_and_not_avx2 # %bb.0: push rbp mov rbp, rsp push rbx and rsp, -8 test rcx, rcx jle .LBB2_12 # %bb.1: cmp rcx, 127 ja .LBB2_7 # %bb.2: xor r8d, r8d jmp .LBB2_3 .LBB2_7: lea r8, [rdx + rcx] lea rax, [rdi + rcx] cmp rax, rdx seta r11b lea rax, [rsi + rcx] cmp r8, rdi seta bl cmp rax, rdx seta r10b cmp r8, rsi seta r9b xor r8d, r8d test r11b, bl jne .LBB2_3 # %bb.8: and r10b, r9b jne .LBB2_3 # %bb.9: mov r8, rcx and r8, -128 xor eax, eax .p2align 4, 0x90 .LBB2_10: # =>This Inner Loop Header: Depth=1 vmovups ymm0, ymmword ptr [rsi + rax] vmovups ymm1, ymmword ptr [rsi + rax + 32] vmovups ymm2, ymmword ptr [rsi + rax + 64] vmovups ymm3, ymmword ptr [rsi + rax + 96] vandnps ymm0, ymm0, ymmword ptr [rdi + rax] vandnps ymm1, ymm1, ymmword ptr [rdi + rax + 32] vandnps ymm2, ymm2, ymmword ptr [rdi + rax + 64] vandnps ymm3, ymm3, ymmword ptr [rdi + rax + 96] vmovups ymmword ptr [rdx + rax], ymm0 vmovups ymmword ptr [rdx + rax + 32], ymm1 vmovups ymmword ptr [rdx + rax + 64], ymm2 vmovups ymmword ptr [rdx + rax + 96], ymm3 sub rax, -128 cmp r8, rax jne .LBB2_10 # %bb.11: cmp r8, rcx je .LBB2_12 .LBB2_3: mov r9, r8 not r9 test cl, 1 je .LBB2_5 # %bb.4: mov al, byte ptr [rsi + r8] not al and al, byte ptr [rdi + r8] mov byte ptr [rdx + r8], al or r8, 1 .LBB2_5: add r9, rcx je .LBB2_12 .p2align 4, 0x90 .LBB2_6: # =>This Inner Loop Header: Depth=1 movzx eax, byte ptr [rsi + r8] not al and al, byte ptr [rdi + r8] mov byte ptr [rdx + r8], al movzx eax, byte ptr [rsi + r8 + 1] not al and al, byte ptr [rdi + r8 + 1] mov byte ptr [rdx + r8 + 1], al add r8, 2 cmp rcx, r8 jne .LBB2_6 .LBB2_12: lea rsp, [rbp - 8] pop rbx pop rbp vzeroupper ret .Lfunc_end2: .size bitmap_aligned_and_not_avx2, .Lfunc_end2-bitmap_aligned_and_not_avx2 # -- End function .globl bitmap_aligned_xor_avx2 # -- Begin function bitmap_aligned_xor_avx2 .p2align 4, 0x90 .type bitmap_aligned_xor_avx2,@function bitmap_aligned_xor_avx2: # @bitmap_aligned_xor_avx2 # %bb.0: push rbp mov rbp, rsp push rbx and rsp, -8 test rcx, rcx jle .LBB3_12 # %bb.1: cmp rcx, 127 ja .LBB3_7 # %bb.2: xor r10d, r10d jmp .LBB3_3 .LBB3_7: lea r9, [rdx + rcx] lea rax, [rdi + rcx] cmp rax, rdx seta r11b lea rax, [rsi + rcx] cmp r9, rdi seta bl cmp rax, rdx seta r8b cmp r9, rsi seta r9b xor r10d, r10d test r11b, bl jne .LBB3_3 # %bb.8: and r8b, r9b jne .LBB3_3 # %bb.9: mov r10, rcx and r10, -128 xor r8d, r8d .p2align 4, 0x90 .LBB3_10: # =>This Inner Loop Header: Depth=1 vmovups ymm0, ymmword ptr [rsi + r8] vmovups ymm1, ymmword ptr [rsi + r8 + 32] vmovups ymm2, ymmword ptr [rsi + r8 + 64] vmovups ymm3, ymmword ptr [rsi + r8 + 96] vxorps ymm0, ymm0, ymmword ptr [rdi + r8] vxorps ymm1, ymm1, ymmword ptr [rdi + r8 + 32] vxorps ymm2, ymm2, ymmword ptr [rdi + r8 + 64] vxorps ymm3, ymm3, ymmword ptr [rdi + r8 + 96] vmovups ymmword ptr [rdx + r8], ymm0 vmovups ymmword ptr [rdx + r8 + 32], ymm1 vmovups ymmword ptr [rdx + r8 + 64], ymm2 vmovups ymmword ptr [rdx + r8 + 96], ymm3 sub r8, -128 cmp r10, r8 jne .LBB3_10 # %bb.11: cmp r10, rcx je .LBB3_12 .LBB3_3: mov r8, r10 not r8 add r8, rcx mov r9, rcx and r9, 3 je .LBB3_5 .p2align 4, 0x90 .LBB3_4: # =>This Inner Loop Header: Depth=1 movzx eax, byte ptr [rsi + r10] xor al, byte ptr [rdi + r10] mov byte ptr [rdx + r10], al add r10, 1 add r9, -1 jne .LBB3_4 .LBB3_5: cmp r8, 3 jb .LBB3_12 .p2align 4, 0x90 .LBB3_6: # =>This Inner Loop Header: Depth=1 movzx eax, byte ptr [rsi + r10] xor al, byte ptr [rdi + r10] mov byte ptr [rdx + r10], al movzx eax, byte ptr [rsi + r10 + 1] xor al, byte ptr [rdi + r10 + 1] mov byte ptr [rdx + r10 + 1], al movzx eax, byte ptr [rsi + r10 + 2] xor al, byte ptr [rdi + r10 + 2] mov byte ptr [rdx + r10 + 2], al movzx eax, byte ptr [rsi + r10 + 3] xor al, byte ptr [rdi + r10 + 3] mov byte ptr [rdx + r10 + 3], al add r10, 4 cmp rcx, r10 jne .LBB3_6 .LBB3_12: lea rsp, [rbp - 8] pop rbx pop rbp vzeroupper ret .Lfunc_end3: .size bitmap_aligned_xor_avx2, .Lfunc_end3-bitmap_aligned_xor_avx2 # -- End function .ident "Ubuntu clang version 11.1.0-6" .section ".note.GNU-stack","",@progbits .addrsig arrow-go-18.2.0/arrow/bitutil/_lib/bitmap_ops_sse4_amd64.s000066400000000000000000000267431476434502500233520ustar00rootroot00000000000000 .text .intel_syntax noprefix .file "bitmap_ops.c" .globl bitmap_aligned_and_sse4 # -- Begin function bitmap_aligned_and_sse4 .p2align 4, 0x90 .type bitmap_aligned_and_sse4,@function bitmap_aligned_and_sse4: # @bitmap_aligned_and_sse4 # %bb.0: push rbp mov rbp, rsp push rbx and rsp, -8 test rcx, rcx jle .LBB0_16 # %bb.1: cmp rcx, 31 ja .LBB0_7 # %bb.2: xor r11d, r11d .LBB0_3: mov r8, r11 not r8 add r8, rcx mov r9, rcx and r9, 3 je .LBB0_5 .p2align 4, 0x90 .LBB0_4: # =>This Inner Loop Header: Depth=1 movzx eax, byte ptr [rsi + r11] and al, byte ptr [rdi + r11] mov byte ptr [rdx + r11], al add r11, 1 add r9, -1 jne .LBB0_4 .LBB0_5: cmp r8, 3 jb .LBB0_16 .p2align 4, 0x90 .LBB0_6: # =>This Inner Loop Header: Depth=1 movzx eax, byte ptr [rsi + r11] and al, byte ptr [rdi + r11] mov byte ptr [rdx + r11], al movzx eax, byte ptr [rsi + r11 + 1] and al, byte ptr [rdi + r11 + 1] mov byte ptr [rdx + r11 + 1], al movzx eax, byte ptr [rsi + r11 + 2] and al, byte ptr [rdi + r11 + 2] mov byte ptr [rdx + r11 + 2], al movzx eax, byte ptr [rsi + r11 + 3] and al, byte ptr [rdi + r11 + 3] mov byte ptr [rdx + r11 + 3], al add r11, 4 cmp rcx, r11 jne .LBB0_6 jmp .LBB0_16 .LBB0_7: lea r9, [rdx + rcx] lea rax, [rdi + rcx] cmp rax, rdx seta r10b lea rax, [rsi + rcx] cmp r9, rdi seta bl cmp rax, rdx seta r8b cmp r9, rsi seta r9b xor r11d, r11d test r10b, bl jne .LBB0_3 # %bb.8: and r8b, r9b jne .LBB0_3 # %bb.9: mov r11, rcx and r11, -32 lea rax, [r11 - 32] mov r9, rax shr r9, 5 add r9, 1 test rax, rax je .LBB0_10 # %bb.11: mov r10, r9 and r10, -2 neg r10 xor r8d, r8d .p2align 4, 0x90 .LBB0_12: # =>This Inner Loop Header: Depth=1 movups xmm0, xmmword ptr [rdi + r8] movups xmm1, xmmword ptr [rdi + r8 + 16] movups xmm2, xmmword ptr [rsi + r8] andps xmm2, xmm0 movups xmm0, xmmword ptr [rsi + r8 + 16] andps xmm0, xmm1 movups xmmword ptr [rdx + r8], xmm2 movups xmmword ptr [rdx + r8 + 16], xmm0 movups xmm0, xmmword ptr [rdi + r8 + 32] movups xmm1, xmmword ptr [rdi + r8 + 48] movups xmm2, xmmword ptr [rsi + r8 + 32] andps xmm2, xmm0 movups xmm0, xmmword ptr [rsi + r8 + 48] andps xmm0, xmm1 movups xmmword ptr [rdx + r8 + 32], xmm2 movups xmmword ptr [rdx + r8 + 48], xmm0 add r8, 64 add r10, 2 jne .LBB0_12 # %bb.13: test r9b, 1 je .LBB0_15 .LBB0_14: movups xmm0, xmmword ptr [rdi + r8] movups xmm1, xmmword ptr [rdi + r8 + 16] movups xmm2, xmmword ptr [rsi + r8] andps xmm2, xmm0 movups xmm0, xmmword ptr [rsi + r8 + 16] andps xmm0, xmm1 movups xmmword ptr [rdx + r8], xmm2 movups xmmword ptr [rdx + r8 + 16], xmm0 .LBB0_15: cmp r11, rcx jne .LBB0_3 .LBB0_16: lea rsp, [rbp - 8] pop rbx pop rbp ret .LBB0_10: xor r8d, r8d test r9b, 1 jne .LBB0_14 jmp .LBB0_15 .Lfunc_end0: .size bitmap_aligned_and_sse4, .Lfunc_end0-bitmap_aligned_and_sse4 # -- End function .globl bitmap_aligned_or_sse4 # -- Begin function bitmap_aligned_or_sse4 .p2align 4, 0x90 .type bitmap_aligned_or_sse4,@function bitmap_aligned_or_sse4: # @bitmap_aligned_or_sse4 # %bb.0: push rbp mov rbp, rsp push rbx and rsp, -8 test rcx, rcx jle .LBB1_16 # %bb.1: cmp rcx, 31 ja .LBB1_7 # %bb.2: xor r11d, r11d .LBB1_3: mov r8, r11 not r8 add r8, rcx mov r9, rcx and r9, 3 je .LBB1_5 .p2align 4, 0x90 .LBB1_4: # =>This Inner Loop Header: Depth=1 movzx eax, byte ptr [rsi + r11] or al, byte ptr [rdi + r11] mov byte ptr [rdx + r11], al add r11, 1 add r9, -1 jne .LBB1_4 .LBB1_5: cmp r8, 3 jb .LBB1_16 .p2align 4, 0x90 .LBB1_6: # =>This Inner Loop Header: Depth=1 movzx eax, byte ptr [rsi + r11] or al, byte ptr [rdi + r11] mov byte ptr [rdx + r11], al movzx eax, byte ptr [rsi + r11 + 1] or al, byte ptr [rdi + r11 + 1] mov byte ptr [rdx + r11 + 1], al movzx eax, byte ptr [rsi + r11 + 2] or al, byte ptr [rdi + r11 + 2] mov byte ptr [rdx + r11 + 2], al movzx eax, byte ptr [rsi + r11 + 3] or al, byte ptr [rdi + r11 + 3] mov byte ptr [rdx + r11 + 3], al add r11, 4 cmp rcx, r11 jne .LBB1_6 jmp .LBB1_16 .LBB1_7: lea r9, [rdx + rcx] lea rax, [rdi + rcx] cmp rax, rdx seta r10b lea rax, [rsi + rcx] cmp r9, rdi seta bl cmp rax, rdx seta r8b cmp r9, rsi seta r9b xor r11d, r11d test r10b, bl jne .LBB1_3 # %bb.8: and r8b, r9b jne .LBB1_3 # %bb.9: mov r11, rcx and r11, -32 lea rax, [r11 - 32] mov r9, rax shr r9, 5 add r9, 1 test rax, rax je .LBB1_10 # %bb.11: mov r10, r9 and r10, -2 neg r10 xor r8d, r8d .p2align 4, 0x90 .LBB1_12: # =>This Inner Loop Header: Depth=1 movups xmm0, xmmword ptr [rdi + r8] movups xmm1, xmmword ptr [rdi + r8 + 16] movups xmm2, xmmword ptr [rsi + r8] orps xmm2, xmm0 movups xmm0, xmmword ptr [rsi + r8 + 16] orps xmm0, xmm1 movups xmmword ptr [rdx + r8], xmm2 movups xmmword ptr [rdx + r8 + 16], xmm0 movups xmm0, xmmword ptr [rdi + r8 + 32] movups xmm1, xmmword ptr [rdi + r8 + 48] movups xmm2, xmmword ptr [rsi + r8 + 32] orps xmm2, xmm0 movups xmm0, xmmword ptr [rsi + r8 + 48] orps xmm0, xmm1 movups xmmword ptr [rdx + r8 + 32], xmm2 movups xmmword ptr [rdx + r8 + 48], xmm0 add r8, 64 add r10, 2 jne .LBB1_12 # %bb.13: test r9b, 1 je .LBB1_15 .LBB1_14: movups xmm0, xmmword ptr [rdi + r8] movups xmm1, xmmword ptr [rdi + r8 + 16] movups xmm2, xmmword ptr [rsi + r8] orps xmm2, xmm0 movups xmm0, xmmword ptr [rsi + r8 + 16] orps xmm0, xmm1 movups xmmword ptr [rdx + r8], xmm2 movups xmmword ptr [rdx + r8 + 16], xmm0 .LBB1_15: cmp r11, rcx jne .LBB1_3 .LBB1_16: lea rsp, [rbp - 8] pop rbx pop rbp ret .LBB1_10: xor r8d, r8d test r9b, 1 jne .LBB1_14 jmp .LBB1_15 .Lfunc_end1: .size bitmap_aligned_or_sse4, .Lfunc_end1-bitmap_aligned_or_sse4 # -- End function .globl bitmap_aligned_and_not_sse4 # -- Begin function bitmap_aligned_and_not_sse4 .p2align 4, 0x90 .type bitmap_aligned_and_not_sse4,@function bitmap_aligned_and_not_sse4: # @bitmap_aligned_and_not_sse4 # %bb.0: push rbp mov rbp, rsp push rbx and rsp, -8 test rcx, rcx jle .LBB2_16 # %bb.1: cmp rcx, 31 ja .LBB2_7 # %bb.2: xor r11d, r11d .LBB2_3: mov r8, r11 not r8 test cl, 1 je .LBB2_5 # %bb.4: mov al, byte ptr [rsi + r11] not al and al, byte ptr [rdi + r11] mov byte ptr [rdx + r11], al or r11, 1 .LBB2_5: add r8, rcx je .LBB2_16 .p2align 4, 0x90 .LBB2_6: # =>This Inner Loop Header: Depth=1 movzx eax, byte ptr [rsi + r11] not al and al, byte ptr [rdi + r11] mov byte ptr [rdx + r11], al movzx eax, byte ptr [rsi + r11 + 1] not al and al, byte ptr [rdi + r11 + 1] mov byte ptr [rdx + r11 + 1], al add r11, 2 cmp rcx, r11 jne .LBB2_6 jmp .LBB2_16 .LBB2_7: lea r9, [rdx + rcx] lea rax, [rdi + rcx] cmp rax, rdx seta r10b lea rax, [rsi + rcx] cmp r9, rdi seta bl cmp rax, rdx seta r8b cmp r9, rsi seta r9b xor r11d, r11d test r10b, bl jne .LBB2_3 # %bb.8: and r8b, r9b jne .LBB2_3 # %bb.9: mov r11, rcx and r11, -32 lea rax, [r11 - 32] mov r9, rax shr r9, 5 add r9, 1 test rax, rax je .LBB2_10 # %bb.11: mov r10, r9 and r10, -2 neg r10 xor r8d, r8d .p2align 4, 0x90 .LBB2_12: # =>This Inner Loop Header: Depth=1 movups xmm0, xmmword ptr [rdi + r8] movups xmm1, xmmword ptr [rdi + r8 + 16] movups xmm2, xmmword ptr [rsi + r8] andnps xmm2, xmm0 movups xmm0, xmmword ptr [rsi + r8 + 16] andnps xmm0, xmm1 movups xmmword ptr [rdx + r8], xmm2 movups xmmword ptr [rdx + r8 + 16], xmm0 movups xmm0, xmmword ptr [rdi + r8 + 32] movups xmm1, xmmword ptr [rdi + r8 + 48] movups xmm2, xmmword ptr [rsi + r8 + 32] andnps xmm2, xmm0 movups xmm0, xmmword ptr [rsi + r8 + 48] andnps xmm0, xmm1 movups xmmword ptr [rdx + r8 + 32], xmm2 movups xmmword ptr [rdx + r8 + 48], xmm0 add r8, 64 add r10, 2 jne .LBB2_12 # %bb.13: test r9b, 1 je .LBB2_15 .LBB2_14: movups xmm0, xmmword ptr [rdi + r8] movups xmm1, xmmword ptr [rdi + r8 + 16] movups xmm2, xmmword ptr [rsi + r8] andnps xmm2, xmm0 movups xmm0, xmmword ptr [rsi + r8 + 16] andnps xmm0, xmm1 movups xmmword ptr [rdx + r8], xmm2 movups xmmword ptr [rdx + r8 + 16], xmm0 .LBB2_15: cmp r11, rcx jne .LBB2_3 .LBB2_16: lea rsp, [rbp - 8] pop rbx pop rbp ret .LBB2_10: xor r8d, r8d test r9b, 1 jne .LBB2_14 jmp .LBB2_15 .Lfunc_end2: .size bitmap_aligned_and_not_sse4, .Lfunc_end2-bitmap_aligned_and_not_sse4 # -- End function .globl bitmap_aligned_xor_sse4 # -- Begin function bitmap_aligned_xor_sse4 .p2align 4, 0x90 .type bitmap_aligned_xor_sse4,@function bitmap_aligned_xor_sse4: # @bitmap_aligned_xor_sse4 # %bb.0: push rbp mov rbp, rsp push rbx and rsp, -8 test rcx, rcx jle .LBB3_16 # %bb.1: cmp rcx, 31 ja .LBB3_7 # %bb.2: xor r11d, r11d .LBB3_3: mov r8, r11 not r8 add r8, rcx mov r9, rcx and r9, 3 je .LBB3_5 .p2align 4, 0x90 .LBB3_4: # =>This Inner Loop Header: Depth=1 movzx eax, byte ptr [rsi + r11] xor al, byte ptr [rdi + r11] mov byte ptr [rdx + r11], al add r11, 1 add r9, -1 jne .LBB3_4 .LBB3_5: cmp r8, 3 jb .LBB3_16 .p2align 4, 0x90 .LBB3_6: # =>This Inner Loop Header: Depth=1 movzx eax, byte ptr [rsi + r11] xor al, byte ptr [rdi + r11] mov byte ptr [rdx + r11], al movzx eax, byte ptr [rsi + r11 + 1] xor al, byte ptr [rdi + r11 + 1] mov byte ptr [rdx + r11 + 1], al movzx eax, byte ptr [rsi + r11 + 2] xor al, byte ptr [rdi + r11 + 2] mov byte ptr [rdx + r11 + 2], al movzx eax, byte ptr [rsi + r11 + 3] xor al, byte ptr [rdi + r11 + 3] mov byte ptr [rdx + r11 + 3], al add r11, 4 cmp rcx, r11 jne .LBB3_6 jmp .LBB3_16 .LBB3_7: lea r9, [rdx + rcx] lea rax, [rdi + rcx] cmp rax, rdx seta r10b lea rax, [rsi + rcx] cmp r9, rdi seta bl cmp rax, rdx seta r8b cmp r9, rsi seta r9b xor r11d, r11d test r10b, bl jne .LBB3_3 # %bb.8: and r8b, r9b jne .LBB3_3 # %bb.9: mov r11, rcx and r11, -32 lea rax, [r11 - 32] mov r9, rax shr r9, 5 add r9, 1 test rax, rax je .LBB3_10 # %bb.11: mov r10, r9 and r10, -2 neg r10 xor r8d, r8d .p2align 4, 0x90 .LBB3_12: # =>This Inner Loop Header: Depth=1 movups xmm0, xmmword ptr [rdi + r8] movups xmm1, xmmword ptr [rdi + r8 + 16] movups xmm2, xmmword ptr [rsi + r8] xorps xmm2, xmm0 movups xmm0, xmmword ptr [rsi + r8 + 16] xorps xmm0, xmm1 movups xmmword ptr [rdx + r8], xmm2 movups xmmword ptr [rdx + r8 + 16], xmm0 movups xmm0, xmmword ptr [rdi + r8 + 32] movups xmm1, xmmword ptr [rdi + r8 + 48] movups xmm2, xmmword ptr [rsi + r8 + 32] xorps xmm2, xmm0 movups xmm0, xmmword ptr [rsi + r8 + 48] xorps xmm0, xmm1 movups xmmword ptr [rdx + r8 + 32], xmm2 movups xmmword ptr [rdx + r8 + 48], xmm0 add r8, 64 add r10, 2 jne .LBB3_12 # %bb.13: test r9b, 1 je .LBB3_15 .LBB3_14: movups xmm0, xmmword ptr [rdi + r8] movups xmm1, xmmword ptr [rdi + r8 + 16] movups xmm2, xmmword ptr [rsi + r8] xorps xmm2, xmm0 movups xmm0, xmmword ptr [rsi + r8 + 16] xorps xmm0, xmm1 movups xmmword ptr [rdx + r8], xmm2 movups xmmword ptr [rdx + r8 + 16], xmm0 .LBB3_15: cmp r11, rcx jne .LBB3_3 .LBB3_16: lea rsp, [rbp - 8] pop rbx pop rbp ret .LBB3_10: xor r8d, r8d test r9b, 1 jne .LBB3_14 jmp .LBB3_15 .Lfunc_end3: .size bitmap_aligned_xor_sse4, .Lfunc_end3-bitmap_aligned_xor_sse4 # -- End function .ident "Ubuntu clang version 11.1.0-6" .section ".note.GNU-stack","",@progbits .addrsig arrow-go-18.2.0/arrow/bitutil/bitmap_ops.go000066400000000000000000000056201476434502500206460ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package bitutil func alignedBitAndGo(left, right, out []byte) { var ( nbytes = len(out) i = 0 ) if nbytes > uint64SizeBytes { // case where we have enough bytes to operate on words leftWords := bytesToUint64(left[i:]) rightWords := bytesToUint64(right[i:]) outWords := bytesToUint64(out[i:]) for w := range outWords { outWords[w] = leftWords[w] & rightWords[w] } i += len(outWords) * uint64SizeBytes } // grab any remaining bytes that were fewer than a word for ; i < nbytes; i++ { out[i] = left[i] & right[i] } } func alignedBitAndNotGo(left, right, out []byte) { var ( nbytes = len(out) i = 0 ) if nbytes > uint64SizeBytes { // case where we have enough bytes to operate on words leftWords := bytesToUint64(left[i:]) rightWords := bytesToUint64(right[i:]) outWords := bytesToUint64(out[i:]) for w := range outWords { outWords[w] = leftWords[w] &^ rightWords[w] } i += len(outWords) * uint64SizeBytes } // grab any remaining bytes that were fewer than a word for ; i < nbytes; i++ { out[i] = left[i] &^ right[i] } } func alignedBitOrGo(left, right, out []byte) { var ( nbytes = len(out) i = 0 ) if nbytes > uint64SizeBytes { // case where we have enough bytes to operate on words leftWords := bytesToUint64(left[i:]) rightWords := bytesToUint64(right[i:]) outWords := bytesToUint64(out[i:]) for w := range outWords { outWords[w] = leftWords[w] | rightWords[w] } i += len(outWords) * uint64SizeBytes } // grab any remaining bytes that were fewer than a word for ; i < nbytes; i++ { out[i] = left[i] | right[i] } } func alignedBitXorGo(left, right, out []byte) { var ( nbytes = len(out) i = 0 ) if nbytes > uint64SizeBytes { // case where we have enough bytes to operate on words leftWords := bytesToUint64(left[i:]) rightWords := bytesToUint64(right[i:]) outWords := bytesToUint64(out[i:]) for w := range outWords { outWords[w] = leftWords[w] ^ rightWords[w] } i += len(outWords) * uint64SizeBytes } // grab any remaining bytes that were fewer than a word for ; i < nbytes; i++ { out[i] = left[i] ^ right[i] } } arrow-go-18.2.0/arrow/bitutil/bitmap_ops_amd64.go000066400000000000000000000027111476434502500216370ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !noasm // +build !noasm package bitutil import "golang.org/x/sys/cpu" func init() { if cpu.X86.HasAVX2 { bitAndOp.opAligned = bitmapAlignedAndAVX2 bitOrOp.opAligned = bitmapAlignedOrAVX2 bitAndNotOp.opAligned = bitmapAlignedAndNotAVX2 bitXorOp.opAligned = bitmapAlignedXorAVX2 } else if cpu.X86.HasSSE42 { bitAndOp.opAligned = bitmapAlignedAndSSE4 bitOrOp.opAligned = bitmapAlignedOrSSE4 bitAndNotOp.opAligned = bitmapAlignedAndNotSSE4 bitXorOp.opAligned = bitmapAlignedXorSSE4 } else { bitAndOp.opAligned = alignedBitAndGo bitOrOp.opAligned = alignedBitOrGo bitAndNotOp.opAligned = alignedBitAndNotGo bitXorOp.opAligned = alignedBitXorGo } } arrow-go-18.2.0/arrow/bitutil/bitmap_ops_arm64.go000066400000000000000000000017751476434502500216660ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !noasm // +build !noasm package bitutil func init() { bitAndOp.opAligned = alignedBitAndGo bitOrOp.opAligned = alignedBitOrGo bitAndNotOp.opAligned = alignedBitAndNotGo bitXorOp.opAligned = alignedBitXorGo } arrow-go-18.2.0/arrow/bitutil/bitmap_ops_avx2_amd64.go000066400000000000000000000036421476434502500226030ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !noasm // +build !noasm package bitutil import ( "unsafe" ) //go:noescape func _bitmap_aligned_and_avx2(left, right, out unsafe.Pointer, length int64) func bitmapAlignedAndAVX2(left, right, out []byte) { _bitmap_aligned_and_avx2(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) } //go:noescape func _bitmap_aligned_or_avx2(left, right, out unsafe.Pointer, length int64) func bitmapAlignedOrAVX2(left, right, out []byte) { _bitmap_aligned_or_avx2(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) } //go:noescape func _bitmap_aligned_and_not_avx2(left, right, out unsafe.Pointer, length int64) func bitmapAlignedAndNotAVX2(left, right, out []byte) { _bitmap_aligned_and_not_avx2(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) } //go:noescape func _bitmap_aligned_xor_avx2(left, right, out unsafe.Pointer, length int64) func bitmapAlignedXorAVX2(left, right, out []byte) { _bitmap_aligned_xor_avx2(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) } arrow-go-18.2.0/arrow/bitutil/bitmap_ops_avx2_amd64.s000066400000000000000000000354731476434502500224470ustar00rootroot00000000000000//+build !noasm !appengine // AUTO-GENERATED BY C2GOASM -- DO NOT EDIT TEXT ·_bitmap_aligned_and_avx2(SB), $0-32 MOVQ left+0(FP), DI MOVQ right+8(FP), SI MOVQ out+16(FP), DX MOVQ length+24(FP), CX WORD $0x8548; BYTE $0xc9 // test rcx, rcx JLE LBB0_12 LONG $0x7ff98348 // cmp rcx, 127 JA LBB0_7 WORD $0x3145; BYTE $0xd2 // xor r10d, r10d JMP LBB0_3 LBB0_7: LONG $0x0a0c8d4c // lea r9, [rdx + rcx] LONG $0x0f048d48 // lea rax, [rdi + rcx] WORD $0x3948; BYTE $0xd0 // cmp rax, rdx LONG $0xd3970f41 // seta r11b LONG $0x0e048d48 // lea rax, [rsi + rcx] WORD $0x3949; BYTE $0xf9 // cmp r9, rdi WORD $0x970f; BYTE $0xd3 // seta bl WORD $0x3948; BYTE $0xd0 // cmp rax, rdx LONG $0xd0970f41 // seta r8b WORD $0x3949; BYTE $0xf1 // cmp r9, rsi LONG $0xd1970f41 // seta r9b WORD $0x3145; BYTE $0xd2 // xor r10d, r10d WORD $0x8441; BYTE $0xdb // test r11b, bl JNE LBB0_3 WORD $0x2045; BYTE $0xc8 // and r8b, r9b JNE LBB0_3 WORD $0x8949; BYTE $0xca // mov r10, rcx LONG $0x80e28349 // and r10, -128 WORD $0x3145; BYTE $0xc0 // xor r8d, r8d LBB0_10: LONG $0x107ca1c4; WORD $0x0604 // vmovups ymm0, yword [rsi + r8] LONG $0x107ca1c4; WORD $0x064c; BYTE $0x20 // vmovups ymm1, yword [rsi + r8 + 32] LONG $0x107ca1c4; WORD $0x0654; BYTE $0x40 // vmovups ymm2, yword [rsi + r8 + 64] LONG $0x107ca1c4; WORD $0x065c; BYTE $0x60 // vmovups ymm3, yword [rsi + r8 + 96] LONG $0x547ca1c4; WORD $0x0704 // vandps ymm0, ymm0, yword [rdi + r8] LONG $0x5474a1c4; WORD $0x074c; BYTE $0x20 // vandps ymm1, ymm1, yword [rdi + r8 + 32] LONG $0x546ca1c4; WORD $0x0754; BYTE $0x40 // vandps ymm2, ymm2, yword [rdi + r8 + 64] LONG $0x5464a1c4; WORD $0x075c; BYTE $0x60 // vandps ymm3, ymm3, yword [rdi + r8 + 96] LONG $0x117ca1c4; WORD $0x0204 // vmovups yword [rdx + r8], ymm0 LONG $0x117ca1c4; WORD $0x024c; BYTE $0x20 // vmovups yword [rdx + r8 + 32], ymm1 LONG $0x117ca1c4; WORD $0x0254; BYTE $0x40 // vmovups yword [rdx + r8 + 64], ymm2 LONG $0x117ca1c4; WORD $0x025c; BYTE $0x60 // vmovups yword [rdx + r8 + 96], ymm3 LONG $0x80e88349 // sub r8, -128 WORD $0x394d; BYTE $0xc2 // cmp r10, r8 JNE LBB0_10 WORD $0x3949; BYTE $0xca // cmp r10, rcx JE LBB0_12 LBB0_3: WORD $0x894d; BYTE $0xd0 // mov r8, r10 WORD $0xf749; BYTE $0xd0 // not r8 WORD $0x0149; BYTE $0xc8 // add r8, rcx WORD $0x8949; BYTE $0xc9 // mov r9, rcx LONG $0x03e18349 // and r9, 3 JE LBB0_5 LBB0_4: LONG $0x04b60f42; BYTE $0x16 // movzx eax, byte [rsi + r10] LONG $0x17042242 // and al, byte [rdi + r10] LONG $0x12048842 // mov byte [rdx + r10], al LONG $0x01c28349 // add r10, 1 LONG $0xffc18349 // add r9, -1 JNE LBB0_4 LBB0_5: LONG $0x03f88349 // cmp r8, 3 JB LBB0_12 LBB0_6: LONG $0x04b60f42; BYTE $0x16 // movzx eax, byte [rsi + r10] LONG $0x17042242 // and al, byte [rdi + r10] LONG $0x12048842 // mov byte [rdx + r10], al LONG $0x44b60f42; WORD $0x0116 // movzx eax, byte [rsi + r10 + 1] LONG $0x17442242; BYTE $0x01 // and al, byte [rdi + r10 + 1] LONG $0x12448842; BYTE $0x01 // mov byte [rdx + r10 + 1], al LONG $0x44b60f42; WORD $0x0216 // movzx eax, byte [rsi + r10 + 2] LONG $0x17442242; BYTE $0x02 // and al, byte [rdi + r10 + 2] LONG $0x12448842; BYTE $0x02 // mov byte [rdx + r10 + 2], al LONG $0x44b60f42; WORD $0x0316 // movzx eax, byte [rsi + r10 + 3] LONG $0x17442242; BYTE $0x03 // and al, byte [rdi + r10 + 3] LONG $0x12448842; BYTE $0x03 // mov byte [rdx + r10 + 3], al LONG $0x04c28349 // add r10, 4 WORD $0x394c; BYTE $0xd1 // cmp rcx, r10 JNE LBB0_6 LBB0_12: VZEROUPPER RET TEXT ·_bitmap_aligned_or_avx2(SB), $0-32 MOVQ left+0(FP), DI MOVQ right+8(FP), SI MOVQ out+16(FP), DX MOVQ length+24(FP), CX WORD $0x8548; BYTE $0xc9 // test rcx, rcx JLE LBB1_12 LONG $0x7ff98348 // cmp rcx, 127 JA LBB1_7 WORD $0x3145; BYTE $0xd2 // xor r10d, r10d JMP LBB1_3 LBB1_7: LONG $0x0a0c8d4c // lea r9, [rdx + rcx] LONG $0x0f048d48 // lea rax, [rdi + rcx] WORD $0x3948; BYTE $0xd0 // cmp rax, rdx LONG $0xd3970f41 // seta r11b LONG $0x0e048d48 // lea rax, [rsi + rcx] WORD $0x3949; BYTE $0xf9 // cmp r9, rdi WORD $0x970f; BYTE $0xd3 // seta bl WORD $0x3948; BYTE $0xd0 // cmp rax, rdx LONG $0xd0970f41 // seta r8b WORD $0x3949; BYTE $0xf1 // cmp r9, rsi LONG $0xd1970f41 // seta r9b WORD $0x3145; BYTE $0xd2 // xor r10d, r10d WORD $0x8441; BYTE $0xdb // test r11b, bl JNE LBB1_3 WORD $0x2045; BYTE $0xc8 // and r8b, r9b JNE LBB1_3 WORD $0x8949; BYTE $0xca // mov r10, rcx LONG $0x80e28349 // and r10, -128 WORD $0x3145; BYTE $0xc0 // xor r8d, r8d LBB1_10: LONG $0x107ca1c4; WORD $0x0604 // vmovups ymm0, yword [rsi + r8] LONG $0x107ca1c4; WORD $0x064c; BYTE $0x20 // vmovups ymm1, yword [rsi + r8 + 32] LONG $0x107ca1c4; WORD $0x0654; BYTE $0x40 // vmovups ymm2, yword [rsi + r8 + 64] LONG $0x107ca1c4; WORD $0x065c; BYTE $0x60 // vmovups ymm3, yword [rsi + r8 + 96] LONG $0x567ca1c4; WORD $0x0704 // vorps ymm0, ymm0, yword [rdi + r8] LONG $0x5674a1c4; WORD $0x074c; BYTE $0x20 // vorps ymm1, ymm1, yword [rdi + r8 + 32] LONG $0x566ca1c4; WORD $0x0754; BYTE $0x40 // vorps ymm2, ymm2, yword [rdi + r8 + 64] LONG $0x5664a1c4; WORD $0x075c; BYTE $0x60 // vorps ymm3, ymm3, yword [rdi + r8 + 96] LONG $0x117ca1c4; WORD $0x0204 // vmovups yword [rdx + r8], ymm0 LONG $0x117ca1c4; WORD $0x024c; BYTE $0x20 // vmovups yword [rdx + r8 + 32], ymm1 LONG $0x117ca1c4; WORD $0x0254; BYTE $0x40 // vmovups yword [rdx + r8 + 64], ymm2 LONG $0x117ca1c4; WORD $0x025c; BYTE $0x60 // vmovups yword [rdx + r8 + 96], ymm3 LONG $0x80e88349 // sub r8, -128 WORD $0x394d; BYTE $0xc2 // cmp r10, r8 JNE LBB1_10 WORD $0x3949; BYTE $0xca // cmp r10, rcx JE LBB1_12 LBB1_3: WORD $0x894d; BYTE $0xd0 // mov r8, r10 WORD $0xf749; BYTE $0xd0 // not r8 WORD $0x0149; BYTE $0xc8 // add r8, rcx WORD $0x8949; BYTE $0xc9 // mov r9, rcx LONG $0x03e18349 // and r9, 3 JE LBB1_5 LBB1_4: LONG $0x04b60f42; BYTE $0x16 // movzx eax, byte [rsi + r10] LONG $0x17040a42 // or al, byte [rdi + r10] LONG $0x12048842 // mov byte [rdx + r10], al LONG $0x01c28349 // add r10, 1 LONG $0xffc18349 // add r9, -1 JNE LBB1_4 LBB1_5: LONG $0x03f88349 // cmp r8, 3 JB LBB1_12 LBB1_6: LONG $0x04b60f42; BYTE $0x16 // movzx eax, byte [rsi + r10] LONG $0x17040a42 // or al, byte [rdi + r10] LONG $0x12048842 // mov byte [rdx + r10], al LONG $0x44b60f42; WORD $0x0116 // movzx eax, byte [rsi + r10 + 1] LONG $0x17440a42; BYTE $0x01 // or al, byte [rdi + r10 + 1] LONG $0x12448842; BYTE $0x01 // mov byte [rdx + r10 + 1], al LONG $0x44b60f42; WORD $0x0216 // movzx eax, byte [rsi + r10 + 2] LONG $0x17440a42; BYTE $0x02 // or al, byte [rdi + r10 + 2] LONG $0x12448842; BYTE $0x02 // mov byte [rdx + r10 + 2], al LONG $0x44b60f42; WORD $0x0316 // movzx eax, byte [rsi + r10 + 3] LONG $0x17440a42; BYTE $0x03 // or al, byte [rdi + r10 + 3] LONG $0x12448842; BYTE $0x03 // mov byte [rdx + r10 + 3], al LONG $0x04c28349 // add r10, 4 WORD $0x394c; BYTE $0xd1 // cmp rcx, r10 JNE LBB1_6 LBB1_12: VZEROUPPER RET TEXT ·_bitmap_aligned_and_not_avx2(SB), $0-32 MOVQ left+0(FP), DI MOVQ right+8(FP), SI MOVQ out+16(FP), DX MOVQ length+24(FP), CX WORD $0x8548; BYTE $0xc9 // test rcx, rcx JLE LBB2_12 LONG $0x7ff98348 // cmp rcx, 127 JA LBB2_7 WORD $0x3145; BYTE $0xc0 // xor r8d, r8d JMP LBB2_3 LBB2_7: LONG $0x0a048d4c // lea r8, [rdx + rcx] LONG $0x0f048d48 // lea rax, [rdi + rcx] WORD $0x3948; BYTE $0xd0 // cmp rax, rdx LONG $0xd3970f41 // seta r11b LONG $0x0e048d48 // lea rax, [rsi + rcx] WORD $0x3949; BYTE $0xf8 // cmp r8, rdi WORD $0x970f; BYTE $0xd3 // seta bl WORD $0x3948; BYTE $0xd0 // cmp rax, rdx LONG $0xd2970f41 // seta r10b WORD $0x3949; BYTE $0xf0 // cmp r8, rsi LONG $0xd1970f41 // seta r9b WORD $0x3145; BYTE $0xc0 // xor r8d, r8d WORD $0x8441; BYTE $0xdb // test r11b, bl JNE LBB2_3 WORD $0x2045; BYTE $0xca // and r10b, r9b JNE LBB2_3 WORD $0x8949; BYTE $0xc8 // mov r8, rcx LONG $0x80e08349 // and r8, -128 WORD $0xc031 // xor eax, eax LBB2_10: LONG $0x0410fcc5; BYTE $0x06 // vmovups ymm0, yword [rsi + rax] LONG $0x4c10fcc5; WORD $0x2006 // vmovups ymm1, yword [rsi + rax + 32] LONG $0x5410fcc5; WORD $0x4006 // vmovups ymm2, yword [rsi + rax + 64] LONG $0x5c10fcc5; WORD $0x6006 // vmovups ymm3, yword [rsi + rax + 96] LONG $0x0455fcc5; BYTE $0x07 // vandnps ymm0, ymm0, yword [rdi + rax] LONG $0x4c55f4c5; WORD $0x2007 // vandnps ymm1, ymm1, yword [rdi + rax + 32] LONG $0x5455ecc5; WORD $0x4007 // vandnps ymm2, ymm2, yword [rdi + rax + 64] LONG $0x5c55e4c5; WORD $0x6007 // vandnps ymm3, ymm3, yword [rdi + rax + 96] LONG $0x0411fcc5; BYTE $0x02 // vmovups yword [rdx + rax], ymm0 LONG $0x4c11fcc5; WORD $0x2002 // vmovups yword [rdx + rax + 32], ymm1 LONG $0x5411fcc5; WORD $0x4002 // vmovups yword [rdx + rax + 64], ymm2 LONG $0x5c11fcc5; WORD $0x6002 // vmovups yword [rdx + rax + 96], ymm3 LONG $0x80e88348 // sub rax, -128 WORD $0x3949; BYTE $0xc0 // cmp r8, rax JNE LBB2_10 WORD $0x3949; BYTE $0xc8 // cmp r8, rcx JE LBB2_12 LBB2_3: WORD $0x894d; BYTE $0xc1 // mov r9, r8 WORD $0xf749; BYTE $0xd1 // not r9 WORD $0xc1f6; BYTE $0x01 // test cl, 1 JE LBB2_5 LONG $0x06048a42 // mov al, byte [rsi + r8] WORD $0xd0f6 // not al LONG $0x07042242 // and al, byte [rdi + r8] LONG $0x02048842 // mov byte [rdx + r8], al LONG $0x01c88349 // or r8, 1 LBB2_5: WORD $0x0149; BYTE $0xc9 // add r9, rcx JE LBB2_12 LBB2_6: LONG $0x04b60f42; BYTE $0x06 // movzx eax, byte [rsi + r8] WORD $0xd0f6 // not al LONG $0x07042242 // and al, byte [rdi + r8] LONG $0x02048842 // mov byte [rdx + r8], al LONG $0x44b60f42; WORD $0x0106 // movzx eax, byte [rsi + r8 + 1] WORD $0xd0f6 // not al LONG $0x07442242; BYTE $0x01 // and al, byte [rdi + r8 + 1] LONG $0x02448842; BYTE $0x01 // mov byte [rdx + r8 + 1], al LONG $0x02c08349 // add r8, 2 WORD $0x394c; BYTE $0xc1 // cmp rcx, r8 JNE LBB2_6 LBB2_12: VZEROUPPER RET TEXT ·_bitmap_aligned_xor_avx2(SB), $0-32 MOVQ left+0(FP), DI MOVQ right+8(FP), SI MOVQ out+16(FP), DX MOVQ length+24(FP), CX WORD $0x8548; BYTE $0xc9 // test rcx, rcx JLE LBB3_12 LONG $0x7ff98348 // cmp rcx, 127 JA LBB3_7 WORD $0x3145; BYTE $0xd2 // xor r10d, r10d JMP LBB3_3 LBB3_7: LONG $0x0a0c8d4c // lea r9, [rdx + rcx] LONG $0x0f048d48 // lea rax, [rdi + rcx] WORD $0x3948; BYTE $0xd0 // cmp rax, rdx LONG $0xd3970f41 // seta r11b LONG $0x0e048d48 // lea rax, [rsi + rcx] WORD $0x3949; BYTE $0xf9 // cmp r9, rdi WORD $0x970f; BYTE $0xd3 // seta bl WORD $0x3948; BYTE $0xd0 // cmp rax, rdx LONG $0xd0970f41 // seta r8b WORD $0x3949; BYTE $0xf1 // cmp r9, rsi LONG $0xd1970f41 // seta r9b WORD $0x3145; BYTE $0xd2 // xor r10d, r10d WORD $0x8441; BYTE $0xdb // test r11b, bl JNE LBB3_3 WORD $0x2045; BYTE $0xc8 // and r8b, r9b JNE LBB3_3 WORD $0x8949; BYTE $0xca // mov r10, rcx LONG $0x80e28349 // and r10, -128 WORD $0x3145; BYTE $0xc0 // xor r8d, r8d LBB3_10: LONG $0x107ca1c4; WORD $0x0604 // vmovups ymm0, yword [rsi + r8] LONG $0x107ca1c4; WORD $0x064c; BYTE $0x20 // vmovups ymm1, yword [rsi + r8 + 32] LONG $0x107ca1c4; WORD $0x0654; BYTE $0x40 // vmovups ymm2, yword [rsi + r8 + 64] LONG $0x107ca1c4; WORD $0x065c; BYTE $0x60 // vmovups ymm3, yword [rsi + r8 + 96] LONG $0x577ca1c4; WORD $0x0704 // vxorps ymm0, ymm0, yword [rdi + r8] LONG $0x5774a1c4; WORD $0x074c; BYTE $0x20 // vxorps ymm1, ymm1, yword [rdi + r8 + 32] LONG $0x576ca1c4; WORD $0x0754; BYTE $0x40 // vxorps ymm2, ymm2, yword [rdi + r8 + 64] LONG $0x5764a1c4; WORD $0x075c; BYTE $0x60 // vxorps ymm3, ymm3, yword [rdi + r8 + 96] LONG $0x117ca1c4; WORD $0x0204 // vmovups yword [rdx + r8], ymm0 LONG $0x117ca1c4; WORD $0x024c; BYTE $0x20 // vmovups yword [rdx + r8 + 32], ymm1 LONG $0x117ca1c4; WORD $0x0254; BYTE $0x40 // vmovups yword [rdx + r8 + 64], ymm2 LONG $0x117ca1c4; WORD $0x025c; BYTE $0x60 // vmovups yword [rdx + r8 + 96], ymm3 LONG $0x80e88349 // sub r8, -128 WORD $0x394d; BYTE $0xc2 // cmp r10, r8 JNE LBB3_10 WORD $0x3949; BYTE $0xca // cmp r10, rcx JE LBB3_12 LBB3_3: WORD $0x894d; BYTE $0xd0 // mov r8, r10 WORD $0xf749; BYTE $0xd0 // not r8 WORD $0x0149; BYTE $0xc8 // add r8, rcx WORD $0x8949; BYTE $0xc9 // mov r9, rcx LONG $0x03e18349 // and r9, 3 JE LBB3_5 LBB3_4: LONG $0x04b60f42; BYTE $0x16 // movzx eax, byte [rsi + r10] LONG $0x17043242 // xor al, byte [rdi + r10] LONG $0x12048842 // mov byte [rdx + r10], al LONG $0x01c28349 // add r10, 1 LONG $0xffc18349 // add r9, -1 JNE LBB3_4 LBB3_5: LONG $0x03f88349 // cmp r8, 3 JB LBB3_12 LBB3_6: LONG $0x04b60f42; BYTE $0x16 // movzx eax, byte [rsi + r10] LONG $0x17043242 // xor al, byte [rdi + r10] LONG $0x12048842 // mov byte [rdx + r10], al LONG $0x44b60f42; WORD $0x0116 // movzx eax, byte [rsi + r10 + 1] LONG $0x17443242; BYTE $0x01 // xor al, byte [rdi + r10 + 1] LONG $0x12448842; BYTE $0x01 // mov byte [rdx + r10 + 1], al LONG $0x44b60f42; WORD $0x0216 // movzx eax, byte [rsi + r10 + 2] LONG $0x17443242; BYTE $0x02 // xor al, byte [rdi + r10 + 2] LONG $0x12448842; BYTE $0x02 // mov byte [rdx + r10 + 2], al LONG $0x44b60f42; WORD $0x0316 // movzx eax, byte [rsi + r10 + 3] LONG $0x17443242; BYTE $0x03 // xor al, byte [rdi + r10 + 3] LONG $0x12448842; BYTE $0x03 // mov byte [rdx + r10 + 3], al LONG $0x04c28349 // add r10, 4 WORD $0x394c; BYTE $0xd1 // cmp rcx, r10 JNE LBB3_6 LBB3_12: VZEROUPPER RET arrow-go-18.2.0/arrow/bitutil/bitmap_ops_noasm.go000066400000000000000000000017731476434502500220500ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build noasm // +build noasm package bitutil func init() { bitAndOp.opAligned = alignedBitAndGo bitOrOp.opAligned = alignedBitOrGo bitAndNotOp.opAligned = alignedBitAndNotGo bitXorOp.opAligned = alignedBitXorGo } arrow-go-18.2.0/arrow/bitutil/bitmap_ops_ppc64le.go000066400000000000000000000017751476434502500222120ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !noasm // +build !noasm package bitutil func init() { bitAndOp.opAligned = alignedBitAndGo bitOrOp.opAligned = alignedBitOrGo bitAndNotOp.opAligned = alignedBitAndNotGo bitXorOp.opAligned = alignedBitXorGo } arrow-go-18.2.0/arrow/bitutil/bitmap_ops_s390x.go000066400000000000000000000017751476434502500216230ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !noasm // +build !noasm package bitutil func init() { bitAndOp.opAligned = alignedBitAndGo bitOrOp.opAligned = alignedBitOrGo bitAndNotOp.opAligned = alignedBitAndNotGo bitXorOp.opAligned = alignedBitXorGo } arrow-go-18.2.0/arrow/bitutil/bitmap_ops_sse4_amd64.go000066400000000000000000000036421476434502500226010ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !noasm // +build !noasm package bitutil import ( "unsafe" ) //go:noescape func _bitmap_aligned_and_sse4(left, right, out unsafe.Pointer, length int64) func bitmapAlignedAndSSE4(left, right, out []byte) { _bitmap_aligned_and_sse4(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) } //go:noescape func _bitmap_aligned_or_sse4(left, right, out unsafe.Pointer, length int64) func bitmapAlignedOrSSE4(left, right, out []byte) { _bitmap_aligned_or_sse4(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) } //go:noescape func _bitmap_aligned_and_not_sse4(left, right, out unsafe.Pointer, length int64) func bitmapAlignedAndNotSSE4(left, right, out []byte) { _bitmap_aligned_and_not_sse4(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) } //go:noescape func _bitmap_aligned_xor_sse4(left, right, out unsafe.Pointer, length int64) func bitmapAlignedXorSSE4(left, right, out []byte) { _bitmap_aligned_xor_sse4(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) } arrow-go-18.2.0/arrow/bitutil/bitmap_ops_sse4_amd64.s000066400000000000000000000464301476434502500224400ustar00rootroot00000000000000//+build !noasm !appengine // AUTO-GENERATED BY C2GOASM -- DO NOT EDIT TEXT ·_bitmap_aligned_and_sse4(SB), $0-32 MOVQ left+0(FP), DI MOVQ right+8(FP), SI MOVQ out+16(FP), DX MOVQ length+24(FP), CX WORD $0x8548; BYTE $0xc9 // test rcx, rcx JLE LBB0_16 LONG $0x1ff98348 // cmp rcx, 31 JA LBB0_7 WORD $0x3145; BYTE $0xdb // xor r11d, r11d LBB0_3: WORD $0x894d; BYTE $0xd8 // mov r8, r11 WORD $0xf749; BYTE $0xd0 // not r8 WORD $0x0149; BYTE $0xc8 // add r8, rcx WORD $0x8949; BYTE $0xc9 // mov r9, rcx LONG $0x03e18349 // and r9, 3 JE LBB0_5 LBB0_4: LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] LONG $0x1f042242 // and al, byte [rdi + r11] LONG $0x1a048842 // mov byte [rdx + r11], al LONG $0x01c38349 // add r11, 1 LONG $0xffc18349 // add r9, -1 JNE LBB0_4 LBB0_5: LONG $0x03f88349 // cmp r8, 3 JB LBB0_16 LBB0_6: LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] LONG $0x1f042242 // and al, byte [rdi + r11] LONG $0x1a048842 // mov byte [rdx + r11], al LONG $0x44b60f42; WORD $0x011e // movzx eax, byte [rsi + r11 + 1] LONG $0x1f442242; BYTE $0x01 // and al, byte [rdi + r11 + 1] LONG $0x1a448842; BYTE $0x01 // mov byte [rdx + r11 + 1], al LONG $0x44b60f42; WORD $0x021e // movzx eax, byte [rsi + r11 + 2] LONG $0x1f442242; BYTE $0x02 // and al, byte [rdi + r11 + 2] LONG $0x1a448842; BYTE $0x02 // mov byte [rdx + r11 + 2], al LONG $0x44b60f42; WORD $0x031e // movzx eax, byte [rsi + r11 + 3] LONG $0x1f442242; BYTE $0x03 // and al, byte [rdi + r11 + 3] LONG $0x1a448842; BYTE $0x03 // mov byte [rdx + r11 + 3], al LONG $0x04c38349 // add r11, 4 WORD $0x394c; BYTE $0xd9 // cmp rcx, r11 JNE LBB0_6 JMP LBB0_16 LBB0_7: LONG $0x0a0c8d4c // lea r9, [rdx + rcx] LONG $0x0f048d48 // lea rax, [rdi + rcx] WORD $0x3948; BYTE $0xd0 // cmp rax, rdx LONG $0xd2970f41 // seta r10b LONG $0x0e048d48 // lea rax, [rsi + rcx] WORD $0x3949; BYTE $0xf9 // cmp r9, rdi WORD $0x970f; BYTE $0xd3 // seta bl WORD $0x3948; BYTE $0xd0 // cmp rax, rdx LONG $0xd0970f41 // seta r8b WORD $0x3949; BYTE $0xf1 // cmp r9, rsi LONG $0xd1970f41 // seta r9b WORD $0x3145; BYTE $0xdb // xor r11d, r11d WORD $0x8441; BYTE $0xda // test r10b, bl JNE LBB0_3 WORD $0x2045; BYTE $0xc8 // and r8b, r9b JNE LBB0_3 WORD $0x8949; BYTE $0xcb // mov r11, rcx LONG $0xe0e38349 // and r11, -32 LONG $0xe0438d49 // lea rax, [r11 - 32] WORD $0x8949; BYTE $0xc1 // mov r9, rax LONG $0x05e9c149 // shr r9, 5 LONG $0x01c18349 // add r9, 1 WORD $0x8548; BYTE $0xc0 // test rax, rax JE LBB0_10 WORD $0x894d; BYTE $0xca // mov r10, r9 LONG $0xfee28349 // and r10, -2 WORD $0xf749; BYTE $0xda // neg r10 WORD $0x3145; BYTE $0xc0 // xor r8d, r8d LBB0_12: LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] WORD $0x540f; BYTE $0xd0 // andps xmm2, xmm0 LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] WORD $0x540f; BYTE $0xc1 // andps xmm0, xmm1 LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 LONG $0x44100f42; WORD $0x2007 // movups xmm0, oword [rdi + r8 + 32] LONG $0x4c100f42; WORD $0x3007 // movups xmm1, oword [rdi + r8 + 48] LONG $0x54100f42; WORD $0x2006 // movups xmm2, oword [rsi + r8 + 32] WORD $0x540f; BYTE $0xd0 // andps xmm2, xmm0 LONG $0x44100f42; WORD $0x3006 // movups xmm0, oword [rsi + r8 + 48] WORD $0x540f; BYTE $0xc1 // andps xmm0, xmm1 LONG $0x54110f42; WORD $0x2002 // movups oword [rdx + r8 + 32], xmm2 LONG $0x44110f42; WORD $0x3002 // movups oword [rdx + r8 + 48], xmm0 LONG $0x40c08349 // add r8, 64 LONG $0x02c28349 // add r10, 2 JNE LBB0_12 LONG $0x01c1f641 // test r9b, 1 JE LBB0_15 LBB0_14: LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] WORD $0x540f; BYTE $0xd0 // andps xmm2, xmm0 LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] WORD $0x540f; BYTE $0xc1 // andps xmm0, xmm1 LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 LBB0_15: WORD $0x3949; BYTE $0xcb // cmp r11, rcx JNE LBB0_3 LBB0_16: RET LBB0_10: WORD $0x3145; BYTE $0xc0 // xor r8d, r8d LONG $0x01c1f641 // test r9b, 1 JNE LBB0_14 JMP LBB0_15 TEXT ·_bitmap_aligned_or_sse4(SB), $0-32 MOVQ left+0(FP), DI MOVQ right+8(FP), SI MOVQ out+16(FP), DX MOVQ length+24(FP), CX WORD $0x8548; BYTE $0xc9 // test rcx, rcx JLE LBB1_16 LONG $0x1ff98348 // cmp rcx, 31 JA LBB1_7 WORD $0x3145; BYTE $0xdb // xor r11d, r11d LBB1_3: WORD $0x894d; BYTE $0xd8 // mov r8, r11 WORD $0xf749; BYTE $0xd0 // not r8 WORD $0x0149; BYTE $0xc8 // add r8, rcx WORD $0x8949; BYTE $0xc9 // mov r9, rcx LONG $0x03e18349 // and r9, 3 JE LBB1_5 LBB1_4: LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] LONG $0x1f040a42 // or al, byte [rdi + r11] LONG $0x1a048842 // mov byte [rdx + r11], al LONG $0x01c38349 // add r11, 1 LONG $0xffc18349 // add r9, -1 JNE LBB1_4 LBB1_5: LONG $0x03f88349 // cmp r8, 3 JB LBB1_16 LBB1_6: LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] LONG $0x1f040a42 // or al, byte [rdi + r11] LONG $0x1a048842 // mov byte [rdx + r11], al LONG $0x44b60f42; WORD $0x011e // movzx eax, byte [rsi + r11 + 1] LONG $0x1f440a42; BYTE $0x01 // or al, byte [rdi + r11 + 1] LONG $0x1a448842; BYTE $0x01 // mov byte [rdx + r11 + 1], al LONG $0x44b60f42; WORD $0x021e // movzx eax, byte [rsi + r11 + 2] LONG $0x1f440a42; BYTE $0x02 // or al, byte [rdi + r11 + 2] LONG $0x1a448842; BYTE $0x02 // mov byte [rdx + r11 + 2], al LONG $0x44b60f42; WORD $0x031e // movzx eax, byte [rsi + r11 + 3] LONG $0x1f440a42; BYTE $0x03 // or al, byte [rdi + r11 + 3] LONG $0x1a448842; BYTE $0x03 // mov byte [rdx + r11 + 3], al LONG $0x04c38349 // add r11, 4 WORD $0x394c; BYTE $0xd9 // cmp rcx, r11 JNE LBB1_6 JMP LBB1_16 LBB1_7: LONG $0x0a0c8d4c // lea r9, [rdx + rcx] LONG $0x0f048d48 // lea rax, [rdi + rcx] WORD $0x3948; BYTE $0xd0 // cmp rax, rdx LONG $0xd2970f41 // seta r10b LONG $0x0e048d48 // lea rax, [rsi + rcx] WORD $0x3949; BYTE $0xf9 // cmp r9, rdi WORD $0x970f; BYTE $0xd3 // seta bl WORD $0x3948; BYTE $0xd0 // cmp rax, rdx LONG $0xd0970f41 // seta r8b WORD $0x3949; BYTE $0xf1 // cmp r9, rsi LONG $0xd1970f41 // seta r9b WORD $0x3145; BYTE $0xdb // xor r11d, r11d WORD $0x8441; BYTE $0xda // test r10b, bl JNE LBB1_3 WORD $0x2045; BYTE $0xc8 // and r8b, r9b JNE LBB1_3 WORD $0x8949; BYTE $0xcb // mov r11, rcx LONG $0xe0e38349 // and r11, -32 LONG $0xe0438d49 // lea rax, [r11 - 32] WORD $0x8949; BYTE $0xc1 // mov r9, rax LONG $0x05e9c149 // shr r9, 5 LONG $0x01c18349 // add r9, 1 WORD $0x8548; BYTE $0xc0 // test rax, rax JE LBB1_10 WORD $0x894d; BYTE $0xca // mov r10, r9 LONG $0xfee28349 // and r10, -2 WORD $0xf749; BYTE $0xda // neg r10 WORD $0x3145; BYTE $0xc0 // xor r8d, r8d LBB1_12: LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] WORD $0x560f; BYTE $0xd0 // orps xmm2, xmm0 LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] WORD $0x560f; BYTE $0xc1 // orps xmm0, xmm1 LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 LONG $0x44100f42; WORD $0x2007 // movups xmm0, oword [rdi + r8 + 32] LONG $0x4c100f42; WORD $0x3007 // movups xmm1, oword [rdi + r8 + 48] LONG $0x54100f42; WORD $0x2006 // movups xmm2, oword [rsi + r8 + 32] WORD $0x560f; BYTE $0xd0 // orps xmm2, xmm0 LONG $0x44100f42; WORD $0x3006 // movups xmm0, oword [rsi + r8 + 48] WORD $0x560f; BYTE $0xc1 // orps xmm0, xmm1 LONG $0x54110f42; WORD $0x2002 // movups oword [rdx + r8 + 32], xmm2 LONG $0x44110f42; WORD $0x3002 // movups oword [rdx + r8 + 48], xmm0 LONG $0x40c08349 // add r8, 64 LONG $0x02c28349 // add r10, 2 JNE LBB1_12 LONG $0x01c1f641 // test r9b, 1 JE LBB1_15 LBB1_14: LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] WORD $0x560f; BYTE $0xd0 // orps xmm2, xmm0 LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] WORD $0x560f; BYTE $0xc1 // orps xmm0, xmm1 LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 LBB1_15: WORD $0x3949; BYTE $0xcb // cmp r11, rcx JNE LBB1_3 LBB1_16: RET LBB1_10: WORD $0x3145; BYTE $0xc0 // xor r8d, r8d LONG $0x01c1f641 // test r9b, 1 JNE LBB1_14 JMP LBB1_15 TEXT ·_bitmap_aligned_and_not_sse4(SB), $0-32 MOVQ left+0(FP), DI MOVQ right+8(FP), SI MOVQ out+16(FP), DX MOVQ length+24(FP), CX WORD $0x8548; BYTE $0xc9 // test rcx, rcx JLE LBB2_16 LONG $0x1ff98348 // cmp rcx, 31 JA LBB2_7 WORD $0x3145; BYTE $0xdb // xor r11d, r11d LBB2_3: WORD $0x894d; BYTE $0xd8 // mov r8, r11 WORD $0xf749; BYTE $0xd0 // not r8 WORD $0xc1f6; BYTE $0x01 // test cl, 1 JE LBB2_5 LONG $0x1e048a42 // mov al, byte [rsi + r11] WORD $0xd0f6 // not al LONG $0x1f042242 // and al, byte [rdi + r11] LONG $0x1a048842 // mov byte [rdx + r11], al LONG $0x01cb8349 // or r11, 1 LBB2_5: WORD $0x0149; BYTE $0xc8 // add r8, rcx JE LBB2_16 LBB2_6: LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] WORD $0xd0f6 // not al LONG $0x1f042242 // and al, byte [rdi + r11] LONG $0x1a048842 // mov byte [rdx + r11], al LONG $0x44b60f42; WORD $0x011e // movzx eax, byte [rsi + r11 + 1] WORD $0xd0f6 // not al LONG $0x1f442242; BYTE $0x01 // and al, byte [rdi + r11 + 1] LONG $0x1a448842; BYTE $0x01 // mov byte [rdx + r11 + 1], al LONG $0x02c38349 // add r11, 2 WORD $0x394c; BYTE $0xd9 // cmp rcx, r11 JNE LBB2_6 JMP LBB2_16 LBB2_7: LONG $0x0a0c8d4c // lea r9, [rdx + rcx] LONG $0x0f048d48 // lea rax, [rdi + rcx] WORD $0x3948; BYTE $0xd0 // cmp rax, rdx LONG $0xd2970f41 // seta r10b LONG $0x0e048d48 // lea rax, [rsi + rcx] WORD $0x3949; BYTE $0xf9 // cmp r9, rdi WORD $0x970f; BYTE $0xd3 // seta bl WORD $0x3948; BYTE $0xd0 // cmp rax, rdx LONG $0xd0970f41 // seta r8b WORD $0x3949; BYTE $0xf1 // cmp r9, rsi LONG $0xd1970f41 // seta r9b WORD $0x3145; BYTE $0xdb // xor r11d, r11d WORD $0x8441; BYTE $0xda // test r10b, bl JNE LBB2_3 WORD $0x2045; BYTE $0xc8 // and r8b, r9b JNE LBB2_3 WORD $0x8949; BYTE $0xcb // mov r11, rcx LONG $0xe0e38349 // and r11, -32 LONG $0xe0438d49 // lea rax, [r11 - 32] WORD $0x8949; BYTE $0xc1 // mov r9, rax LONG $0x05e9c149 // shr r9, 5 LONG $0x01c18349 // add r9, 1 WORD $0x8548; BYTE $0xc0 // test rax, rax JE LBB2_10 WORD $0x894d; BYTE $0xca // mov r10, r9 LONG $0xfee28349 // and r10, -2 WORD $0xf749; BYTE $0xda // neg r10 WORD $0x3145; BYTE $0xc0 // xor r8d, r8d LBB2_12: LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] WORD $0x550f; BYTE $0xd0 // andnps xmm2, xmm0 LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] WORD $0x550f; BYTE $0xc1 // andnps xmm0, xmm1 LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 LONG $0x44100f42; WORD $0x2007 // movups xmm0, oword [rdi + r8 + 32] LONG $0x4c100f42; WORD $0x3007 // movups xmm1, oword [rdi + r8 + 48] LONG $0x54100f42; WORD $0x2006 // movups xmm2, oword [rsi + r8 + 32] WORD $0x550f; BYTE $0xd0 // andnps xmm2, xmm0 LONG $0x44100f42; WORD $0x3006 // movups xmm0, oword [rsi + r8 + 48] WORD $0x550f; BYTE $0xc1 // andnps xmm0, xmm1 LONG $0x54110f42; WORD $0x2002 // movups oword [rdx + r8 + 32], xmm2 LONG $0x44110f42; WORD $0x3002 // movups oword [rdx + r8 + 48], xmm0 LONG $0x40c08349 // add r8, 64 LONG $0x02c28349 // add r10, 2 JNE LBB2_12 LONG $0x01c1f641 // test r9b, 1 JE LBB2_15 LBB2_14: LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] WORD $0x550f; BYTE $0xd0 // andnps xmm2, xmm0 LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] WORD $0x550f; BYTE $0xc1 // andnps xmm0, xmm1 LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 LBB2_15: WORD $0x3949; BYTE $0xcb // cmp r11, rcx JNE LBB2_3 LBB2_16: RET LBB2_10: WORD $0x3145; BYTE $0xc0 // xor r8d, r8d LONG $0x01c1f641 // test r9b, 1 JNE LBB2_14 JMP LBB2_15 TEXT ·_bitmap_aligned_xor_sse4(SB), $0-32 MOVQ left+0(FP), DI MOVQ right+8(FP), SI MOVQ out+16(FP), DX MOVQ length+24(FP), CX WORD $0x8548; BYTE $0xc9 // test rcx, rcx JLE LBB3_16 LONG $0x1ff98348 // cmp rcx, 31 JA LBB3_7 WORD $0x3145; BYTE $0xdb // xor r11d, r11d LBB3_3: WORD $0x894d; BYTE $0xd8 // mov r8, r11 WORD $0xf749; BYTE $0xd0 // not r8 WORD $0x0149; BYTE $0xc8 // add r8, rcx WORD $0x8949; BYTE $0xc9 // mov r9, rcx LONG $0x03e18349 // and r9, 3 JE LBB3_5 LBB3_4: LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] LONG $0x1f043242 // xor al, byte [rdi + r11] LONG $0x1a048842 // mov byte [rdx + r11], al LONG $0x01c38349 // add r11, 1 LONG $0xffc18349 // add r9, -1 JNE LBB3_4 LBB3_5: LONG $0x03f88349 // cmp r8, 3 JB LBB3_16 LBB3_6: LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] LONG $0x1f043242 // xor al, byte [rdi + r11] LONG $0x1a048842 // mov byte [rdx + r11], al LONG $0x44b60f42; WORD $0x011e // movzx eax, byte [rsi + r11 + 1] LONG $0x1f443242; BYTE $0x01 // xor al, byte [rdi + r11 + 1] LONG $0x1a448842; BYTE $0x01 // mov byte [rdx + r11 + 1], al LONG $0x44b60f42; WORD $0x021e // movzx eax, byte [rsi + r11 + 2] LONG $0x1f443242; BYTE $0x02 // xor al, byte [rdi + r11 + 2] LONG $0x1a448842; BYTE $0x02 // mov byte [rdx + r11 + 2], al LONG $0x44b60f42; WORD $0x031e // movzx eax, byte [rsi + r11 + 3] LONG $0x1f443242; BYTE $0x03 // xor al, byte [rdi + r11 + 3] LONG $0x1a448842; BYTE $0x03 // mov byte [rdx + r11 + 3], al LONG $0x04c38349 // add r11, 4 WORD $0x394c; BYTE $0xd9 // cmp rcx, r11 JNE LBB3_6 JMP LBB3_16 LBB3_7: LONG $0x0a0c8d4c // lea r9, [rdx + rcx] LONG $0x0f048d48 // lea rax, [rdi + rcx] WORD $0x3948; BYTE $0xd0 // cmp rax, rdx LONG $0xd2970f41 // seta r10b LONG $0x0e048d48 // lea rax, [rsi + rcx] WORD $0x3949; BYTE $0xf9 // cmp r9, rdi WORD $0x970f; BYTE $0xd3 // seta bl WORD $0x3948; BYTE $0xd0 // cmp rax, rdx LONG $0xd0970f41 // seta r8b WORD $0x3949; BYTE $0xf1 // cmp r9, rsi LONG $0xd1970f41 // seta r9b WORD $0x3145; BYTE $0xdb // xor r11d, r11d WORD $0x8441; BYTE $0xda // test r10b, bl JNE LBB3_3 WORD $0x2045; BYTE $0xc8 // and r8b, r9b JNE LBB3_3 WORD $0x8949; BYTE $0xcb // mov r11, rcx LONG $0xe0e38349 // and r11, -32 LONG $0xe0438d49 // lea rax, [r11 - 32] WORD $0x8949; BYTE $0xc1 // mov r9, rax LONG $0x05e9c149 // shr r9, 5 LONG $0x01c18349 // add r9, 1 WORD $0x8548; BYTE $0xc0 // test rax, rax JE LBB3_10 WORD $0x894d; BYTE $0xca // mov r10, r9 LONG $0xfee28349 // and r10, -2 WORD $0xf749; BYTE $0xda // neg r10 WORD $0x3145; BYTE $0xc0 // xor r8d, r8d LBB3_12: LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] WORD $0x570f; BYTE $0xd0 // xorps xmm2, xmm0 LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] WORD $0x570f; BYTE $0xc1 // xorps xmm0, xmm1 LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 LONG $0x44100f42; WORD $0x2007 // movups xmm0, oword [rdi + r8 + 32] LONG $0x4c100f42; WORD $0x3007 // movups xmm1, oword [rdi + r8 + 48] LONG $0x54100f42; WORD $0x2006 // movups xmm2, oword [rsi + r8 + 32] WORD $0x570f; BYTE $0xd0 // xorps xmm2, xmm0 LONG $0x44100f42; WORD $0x3006 // movups xmm0, oword [rsi + r8 + 48] WORD $0x570f; BYTE $0xc1 // xorps xmm0, xmm1 LONG $0x54110f42; WORD $0x2002 // movups oword [rdx + r8 + 32], xmm2 LONG $0x44110f42; WORD $0x3002 // movups oword [rdx + r8 + 48], xmm0 LONG $0x40c08349 // add r8, 64 LONG $0x02c28349 // add r10, 2 JNE LBB3_12 LONG $0x01c1f641 // test r9b, 1 JE LBB3_15 LBB3_14: LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] WORD $0x570f; BYTE $0xd0 // xorps xmm2, xmm0 LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] WORD $0x570f; BYTE $0xc1 // xorps xmm0, xmm1 LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 LBB3_15: WORD $0x3949; BYTE $0xcb // cmp r11, rcx JNE LBB3_3 LBB3_16: RET LBB3_10: WORD $0x3145; BYTE $0xc0 // xor r8d, r8d LONG $0x01c1f641 // test r9b, 1 JNE LBB3_14 JMP LBB3_15 arrow-go-18.2.0/arrow/bitutil/bitmaps.go000066400000000000000000000560501476434502500201530ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package bitutil import ( "bytes" "errors" "math/bits" "unsafe" "github.com/apache/arrow-go/v18/arrow/endian" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" ) // BitmapReader is a simple bitmap reader for a byte slice. type BitmapReader struct { bitmap []byte pos int len int current byte byteOffset int bitOffset int } // NewBitmapReader creates and returns a new bitmap reader for the given bitmap func NewBitmapReader(bitmap []byte, offset, length int) *BitmapReader { curbyte := byte(0) if length > 0 && bitmap != nil { curbyte = bitmap[offset/8] } return &BitmapReader{ bitmap: bitmap, byteOffset: offset / 8, bitOffset: offset % 8, current: curbyte, len: length, } } // Set returns true if the current bit is set func (b *BitmapReader) Set() bool { return (b.current & (1 << b.bitOffset)) != 0 } // NotSet returns true if the current bit is not set func (b *BitmapReader) NotSet() bool { return (b.current & (1 << b.bitOffset)) == 0 } // Next advances the reader to the next bit in the bitmap. func (b *BitmapReader) Next() { b.bitOffset++ b.pos++ if b.bitOffset == 8 { b.bitOffset = 0 b.byteOffset++ if b.pos < b.len { b.current = b.bitmap[int(b.byteOffset)] } } } // Pos returns the current bit position in the bitmap that the reader is looking at func (b *BitmapReader) Pos() int { return b.pos } // Len returns the total number of bits in the bitmap func (b *BitmapReader) Len() int { return b.len } // BitmapWriter is a simple writer for writing bitmaps to byte slices type BitmapWriter struct { buf []byte pos int length int curByte uint8 bitMask uint8 byteOffset int } // NewBitmapWriter returns a sequential bitwise writer that preserves surrounding // bit values as it writes. func NewBitmapWriter(bitmap []byte, start, length int) *BitmapWriter { ret := &BitmapWriter{ buf: bitmap, length: length, byteOffset: start / 8, bitMask: BitMask[start%8], } if length > 0 { ret.curByte = bitmap[int(ret.byteOffset)] } return ret } // Reset resets the position and view of the slice to restart writing a bitmap // to the same byte slice. func (b *BitmapWriter) Reset(start, length int) { b.pos = 0 b.byteOffset = start / 8 b.bitMask = BitMask[start%8] b.length = length if b.length > 0 { b.curByte = b.buf[int(b.byteOffset)] } } func (b *BitmapWriter) Pos() int { return b.pos } func (b *BitmapWriter) Set() { b.curByte |= b.bitMask } func (b *BitmapWriter) Clear() { b.curByte &= ^b.bitMask } // Next increments the writer to the next bit for writing. func (b *BitmapWriter) Next() { b.bitMask = b.bitMask << 1 b.pos++ if b.bitMask == 0 { b.bitMask = 0x01 b.buf[b.byteOffset] = b.curByte b.byteOffset++ if b.pos < b.length { b.curByte = b.buf[int(b.byteOffset)] } } } // AppendBools writes a series of booleans to the bitmapwriter and returns // the number of remaining bytes left in the buffer for writing. func (b *BitmapWriter) AppendBools(in []bool) int { space := min(b.length-b.pos, len(in)) if space == 0 { return 0 } bitOffset := bits.TrailingZeros32(uint32(b.bitMask)) // location that the first byte needs to be written to for appending appslice := b.buf[int(b.byteOffset) : b.byteOffset+int(BytesForBits(int64(bitOffset+space)))] // update everything but curByte appslice[0] = b.curByte for i, b := range in[:space] { if b { SetBit(appslice, i+bitOffset) } else { ClearBit(appslice, i+bitOffset) } } b.pos += space b.bitMask = BitMask[(bitOffset+space)%8] b.byteOffset += (bitOffset + space) / 8 b.curByte = appslice[len(appslice)-1] return space } // Finish flushes the final byte out to the byteslice in case it was not already // on a byte aligned boundary. func (b *BitmapWriter) Finish() { if b.length > 0 && (b.bitMask != 0x01 || b.pos < b.length) { b.buf[int(b.byteOffset)] = b.curByte } } // BitmapWordReader is a reader for bitmaps that reads a word at a time (a word being an 8 byte uint64) // and then provides functions to grab the individual trailing bytes after the last word type BitmapWordReader struct { bitmap []byte offset int nwords int trailingBits int trailingBytes int curword uint64 } // NewBitmapWordReader sets up a word reader, calculates the number of trailing bits and // number of trailing bytes, along with the number of words. func NewBitmapWordReader(bitmap []byte, offset, length int) *BitmapWordReader { bitoffset := offset % 8 byteOffset := offset / 8 bm := &BitmapWordReader{ offset: bitoffset, bitmap: bitmap[byteOffset : byteOffset+int(BytesForBits(int64(bitoffset+length)))], // decrement wordcount by 1 as we may touch two adjacent words in one iteration nwords: length/int(unsafe.Sizeof(uint64(0))*8) - 1, } if bm.nwords < 0 { bm.nwords = 0 } bm.trailingBits = length - bm.nwords*int(unsafe.Sizeof(uint64(0)))*8 bm.trailingBytes = int(BytesForBits(int64(bm.trailingBits))) if bm.nwords > 0 { bm.curword = toFromLEFunc(endian.Native.Uint64(bm.bitmap)) } else if length > 0 { setLSB(&bm.curword, bm.bitmap[0]) } return bm } // NextWord returns the next full word read from the bitmap, should not be called // if Words() is 0 as it will step outside of the bounds of the bitmap slice and panic. // // We don't perform the bounds checking in order to improve performance. func (bm *BitmapWordReader) NextWord() uint64 { bm.bitmap = bm.bitmap[unsafe.Sizeof(bm.curword):] word := bm.curword nextWord := toFromLEFunc(endian.Native.Uint64(bm.bitmap)) if bm.offset != 0 { // combine two adjacent words into one word // |<------ next ----->|<---- current ---->| // +-------------+-----+-------------+-----+ // | --- | A | B | --- | // +-------------+-----+-------------+-----+ // | | offset // v v // +-----+-------------+ // | A | B | // +-----+-------------+ // |<------ word ----->| word >>= uint64(bm.offset) word |= nextWord << (int64(unsafe.Sizeof(uint64(0))*8) - int64(bm.offset)) } bm.curword = nextWord return word } // NextTrailingByte returns the next trailing byte of the bitmap after the last word // along with the number of valid bits in that byte. When validBits < 8, that // is the last byte. // // If the bitmap ends on a byte alignment, then the last byte can also return 8 valid bits. // Thus the TrailingBytes function should be used to know how many trailing bytes to read. func (bm *BitmapWordReader) NextTrailingByte() (val byte, validBits int) { debug.Assert(bm.trailingBits > 0, "next trailing byte called with no trailing bits") if bm.trailingBits <= 8 { // last byte validBits = bm.trailingBits bm.trailingBits = 0 rdr := NewBitmapReader(bm.bitmap, bm.offset, validBits) for i := 0; i < validBits; i++ { val >>= 1 if rdr.Set() { val |= 0x80 } rdr.Next() } val >>= (8 - validBits) return } bm.bitmap = bm.bitmap[1:] nextByte := bm.bitmap[0] val = getLSB(bm.curword) if bm.offset != 0 { val >>= byte(bm.offset) val |= nextByte << (8 - bm.offset) } setLSB(&bm.curword, nextByte) bm.trailingBits -= 8 bm.trailingBytes-- validBits = 8 return } func (bm *BitmapWordReader) Words() int { return bm.nwords } func (bm *BitmapWordReader) TrailingBytes() int { return bm.trailingBytes } // BitmapWordWriter is a bitmap writer for writing a full word at a time (a word being // a uint64). After the last full word is written, PutNextTrailingByte can be used to // write the remaining trailing bytes. type BitmapWordWriter struct { bitmap []byte offset int len int bitMask uint64 currentWord uint64 } // NewBitmapWordWriter initializes a new bitmap word writer which will start writing // into the byte slice at bit offset start, expecting to write len bits. func NewBitmapWordWriter(bitmap []byte, start, len int) *BitmapWordWriter { ret := &BitmapWordWriter{ bitmap: bitmap[start/8:], len: len, offset: start % 8, bitMask: (uint64(1) << uint64(start%8)) - 1, } if ret.offset != 0 { if ret.len >= int(unsafe.Sizeof(uint64(0))*8) { ret.currentWord = toFromLEFunc(endian.Native.Uint64(ret.bitmap)) } else if ret.len > 0 { setLSB(&ret.currentWord, ret.bitmap[0]) } } return ret } // PutNextWord writes the given word to the bitmap, potentially splitting across // two adjacent words. func (bm *BitmapWordWriter) PutNextWord(word uint64) { sz := int(unsafe.Sizeof(word)) if bm.offset != 0 { // split one word into two adjacent words, don't touch unused bits // |<------ word ----->| // +-----+-------------+ // | A | B | // +-----+-------------+ // | | // v v offset // +-------------+-----+-------------+-----+ // | --- | A | B | --- | // +-------------+-----+-------------+-----+ // |<------ next ----->|<---- current ---->| word = (word << uint64(bm.offset)) | (word >> (int64(sz*8) - int64(bm.offset))) next := toFromLEFunc(endian.Native.Uint64(bm.bitmap[sz:])) bm.currentWord = (bm.currentWord & bm.bitMask) | (word &^ bm.bitMask) next = (next &^ bm.bitMask) | (word & bm.bitMask) endian.Native.PutUint64(bm.bitmap, toFromLEFunc(bm.currentWord)) endian.Native.PutUint64(bm.bitmap[sz:], toFromLEFunc(next)) bm.currentWord = next } else { endian.Native.PutUint64(bm.bitmap, toFromLEFunc(word)) } bm.bitmap = bm.bitmap[sz:] } // PutNextTrailingByte writes the number of bits indicated by validBits from b to // the bitmap. func (bm *BitmapWordWriter) PutNextTrailingByte(b byte, validBits int) { curbyte := getLSB(bm.currentWord) if validBits == 8 { if bm.offset != 0 { b = (b << bm.offset) | (b >> (8 - bm.offset)) next := bm.bitmap[1] curbyte = (curbyte & byte(bm.bitMask)) | (b &^ byte(bm.bitMask)) next = (next &^ byte(bm.bitMask)) | (b & byte(bm.bitMask)) bm.bitmap[0] = curbyte bm.bitmap[1] = next bm.currentWord = uint64(next) } else { bm.bitmap[0] = b } bm.bitmap = bm.bitmap[1:] } else { debug.Assert(validBits > 0 && validBits < 8, "invalid valid bits in bitmap word writer") debug.Assert(BytesForBits(int64(bm.offset+validBits)) <= int64(len(bm.bitmap)), "writing trailing byte outside of bounds of bitmap") wr := NewBitmapWriter(bm.bitmap, int(bm.offset), validBits) for i := 0; i < validBits; i++ { if b&0x01 != 0 { wr.Set() } else { wr.Clear() } wr.Next() b >>= 1 } wr.Finish() } } type transferMode int8 const ( transferCopy transferMode = iota transferInvert ) func transferBitmap(mode transferMode, src []byte, srcOffset, length int, dst []byte, dstOffset int) { if length == 0 { // if there's nothing to write, end early. return } bitOffset := srcOffset % 8 destBitOffset := dstOffset % 8 // slow path, one of the bitmaps are not byte aligned. if bitOffset != 0 || destBitOffset != 0 { rdr := NewBitmapWordReader(src, srcOffset, length) wr := NewBitmapWordWriter(dst, dstOffset, length) nwords := rdr.Words() for nwords > 0 { nwords-- if mode == transferInvert { wr.PutNextWord(^rdr.NextWord()) } else { wr.PutNextWord(rdr.NextWord()) } } nbytes := rdr.TrailingBytes() for nbytes > 0 { nbytes-- bt, validBits := rdr.NextTrailingByte() if mode == transferInvert { bt = ^bt } wr.PutNextTrailingByte(bt, validBits) } return } // fast path, both are starting with byte-aligned bitmaps nbytes := int(BytesForBits(int64(length))) // shift by its byte offset src = src[srcOffset/8:] dst = dst[dstOffset/8:] // Take care of the trailing bits in the last byte // E.g., if trailing_bits = 5, last byte should be // - low 3 bits: new bits from last byte of data buffer // - high 5 bits: old bits from last byte of dest buffer trailingBits := nbytes*8 - length trailMask := byte(uint(1)<<(8-trailingBits)) - 1 var lastData byte if mode == transferInvert { for i, b := range src[:nbytes-1] { dst[i] = ^b } lastData = ^src[nbytes-1] } else { copy(dst, src[:nbytes-1]) lastData = src[nbytes-1] } dst[nbytes-1] &= ^trailMask dst[nbytes-1] |= lastData & trailMask } // CopyBitmap copies the bitmap indicated by src, starting at bit offset srcOffset, // and copying length bits into dst, starting at bit offset dstOffset. func CopyBitmap(src []byte, srcOffset, length int, dst []byte, dstOffset int) { transferBitmap(transferCopy, src, srcOffset, length, dst, dstOffset) } // InvertBitmap copies a bit range of a bitmap, inverting it as it copies // over into the destination. func InvertBitmap(src []byte, srcOffset, length int, dst []byte, dstOffset int) { transferBitmap(transferInvert, src, srcOffset, length, dst, dstOffset) } type bitOp struct { opWord func(uint64, uint64) uint64 opByte func(byte, byte) byte opAligned func(l, r, o []byte) } var ( bitAndOp = bitOp{ opWord: func(l, r uint64) uint64 { return l & r }, opByte: func(l, r byte) byte { return l & r }, opAligned: alignedBitAndGo, } bitOrOp = bitOp{ opWord: func(l, r uint64) uint64 { return l | r }, opByte: func(l, r byte) byte { return l | r }, opAligned: alignedBitOrGo, } bitAndNotOp = bitOp{ opWord: func(l, r uint64) uint64 { return l &^ r }, opByte: func(l, r byte) byte { return l &^ r }, opAligned: alignedBitAndNotGo, } bitXorOp = bitOp{ opWord: func(l, r uint64) uint64 { return l ^ r }, opByte: func(l, r byte) byte { return l ^ r }, opAligned: alignedBitXorGo, } ) func alignedBitmapOp(op bitOp, left, right []byte, lOffset, rOffset int64, out []byte, outOffset int64, length int64) { debug.Assert(lOffset%8 == rOffset%8, "aligned bitmap op called with unaligned offsets") debug.Assert(lOffset%8 == outOffset%8, "aligned bitmap op called with unaligned output offset") nbytes := BytesForBits(length + lOffset%8) left = left[lOffset/8:] right = right[rOffset/8:] out = out[outOffset/8:] endMask := (lOffset + length%8) switch nbytes { case 0: return case 1: // everything within a single byte // (length+lOffset%8) <= 8 mask := PrecedingBitmask[lOffset%8] if endMask != 0 { mask |= TrailingBitmask[(lOffset+length)%8] } out[0] = (out[0] & mask) | (op.opByte(left[0], right[0]) &^ mask) case 2: // don't send zero length to opAligned firstByteMask := PrecedingBitmask[lOffset%8] out[0] = (out[0] & firstByteMask) | (op.opByte(left[0], right[0]) &^ firstByteMask) lastByteMask := byte(0) if endMask != 0 { lastByteMask = TrailingBitmask[(lOffset+length)%8] } out[1] = (out[1] & lastByteMask) | (op.opByte(left[1], right[1]) &^ lastByteMask) default: firstByteMask := PrecedingBitmask[lOffset%8] out[0] = (out[0] & firstByteMask) | (op.opByte(left[0], right[0]) &^ firstByteMask) op.opAligned(left[1:nbytes-1], right[1:nbytes-1], out[1:nbytes-1]) lastByteMask := byte(0) if endMask != 0 { lastByteMask = TrailingBitmask[(lOffset+length)%8] } out[nbytes-1] = (out[nbytes-1] & lastByteMask) | (op.opByte(left[nbytes-1], right[nbytes-1]) &^ lastByteMask) } } func unalignedBitmapOp(op bitOp, left, right []byte, lOffset, rOffset int64, out []byte, outOffset int64, length int64) { leftRdr := NewBitmapWordReader(left, int(lOffset), int(length)) rightRdr := NewBitmapWordReader(right, int(rOffset), int(length)) writer := NewBitmapWordWriter(out, int(outOffset), int(length)) for nwords := leftRdr.Words(); nwords > 0; nwords-- { writer.PutNextWord(op.opWord(leftRdr.NextWord(), rightRdr.NextWord())) } for nbytes := leftRdr.TrailingBytes(); nbytes > 0; nbytes-- { leftByte, leftValid := leftRdr.NextTrailingByte() rightByte, rightValid := rightRdr.NextTrailingByte() debug.Assert(leftValid == rightValid, "unexpected mismatch of valid bits") writer.PutNextTrailingByte(op.opByte(leftByte, rightByte), leftValid) } } func BitmapOp(op bitOp, left, right []byte, lOffset, rOffset int64, out []byte, outOffset, length int64) { if (outOffset%8 == lOffset%8) && (outOffset%8 == rOffset%8) { // fastcase! alignedBitmapOp(op, left, right, lOffset, rOffset, out, outOffset, length) } else { unalignedBitmapOp(op, left, right, lOffset, rOffset, out, outOffset, length) } } func BitmapOpAlloc(mem memory.Allocator, op bitOp, left, right []byte, lOffset, rOffset int64, length int64, outOffset int64) *memory.Buffer { bits := length + outOffset buf := memory.NewResizableBuffer(mem) buf.Resize(int(BytesForBits(bits))) BitmapOp(op, left, right, lOffset, rOffset, buf.Bytes(), outOffset, length) return buf } func BitmapAnd(left, right []byte, lOffset, rOffset int64, out []byte, outOffset int64, length int64) { BitmapOp(bitAndOp, left, right, lOffset, rOffset, out, outOffset, length) } func BitmapOr(left, right []byte, lOffset, rOffset int64, out []byte, outOffset int64, length int64) { BitmapOp(bitOrOp, left, right, lOffset, rOffset, out, outOffset, length) } func BitmapAndAlloc(mem memory.Allocator, left, right []byte, lOffset, rOffset int64, length, outOffset int64) *memory.Buffer { return BitmapOpAlloc(mem, bitAndOp, left, right, lOffset, rOffset, length, outOffset) } func BitmapOrAlloc(mem memory.Allocator, left, right []byte, lOffset, rOffset int64, length, outOffset int64) *memory.Buffer { return BitmapOpAlloc(mem, bitOrOp, left, right, lOffset, rOffset, length, outOffset) } func BitmapAndNot(left, right []byte, lOffset, rOffset int64, out []byte, outOffset int64, length int64) { BitmapOp(bitAndNotOp, left, right, lOffset, rOffset, out, outOffset, length) } func BitmapAndNotAlloc(mem memory.Allocator, left, right []byte, lOffset, rOffset int64, length, outOffset int64) *memory.Buffer { return BitmapOpAlloc(mem, bitAndNotOp, left, right, lOffset, rOffset, length, outOffset) } func BitmapXor(left, right []byte, lOffset, rOffset int64, out []byte, outOffset int64, length int64) { BitmapOp(bitXorOp, left, right, lOffset, rOffset, out, outOffset, length) } func BitmapXorAlloc(mem memory.Allocator, left, right []byte, lOffset, rOffset int64, length, outOffset int64) *memory.Buffer { return BitmapOpAlloc(mem, bitXorOp, left, right, lOffset, rOffset, length, outOffset) } func BitmapEquals(left, right []byte, lOffset, rOffset int64, length int64) bool { if lOffset%8 == 0 && rOffset%8 == 0 { // byte aligned, fast path, can use bytes.Equal (memcmp) byteLen := length / 8 lStart := lOffset / 8 rStart := rOffset / 8 if !bytes.Equal(left[lStart:lStart+byteLen], right[rStart:rStart+byteLen]) { return false } // check trailing bits for i := (length / 8) * 8; i < length; i++ { if BitIsSet(left, int(lOffset+i)) != BitIsSet(right, int(rOffset+i)) { return false } } return true } lrdr := NewBitmapWordReader(left, int(lOffset), int(length)) rrdr := NewBitmapWordReader(right, int(rOffset), int(length)) nwords := lrdr.Words() for nwords > 0 { nwords-- if lrdr.NextWord() != rrdr.NextWord() { return false } } nbytes := lrdr.TrailingBytes() for nbytes > 0 { nbytes-- lbt, _ := lrdr.NextTrailingByte() rbt, _ := rrdr.NextTrailingByte() if lbt != rbt { return false } } return true } // OptionalBitIndexer is a convenience wrapper for getting bits from // a bitmap which may or may not be nil. type OptionalBitIndexer struct { Bitmap []byte Offset int } func (b *OptionalBitIndexer) GetBit(i int) bool { return b.Bitmap == nil || BitIsSet(b.Bitmap, b.Offset+i) } type Bitmap struct { Data []byte Offset, Len int64 } func bitLength(bitmaps []Bitmap) (int64, error) { for _, b := range bitmaps[1:] { if b.Len != bitmaps[0].Len { return -1, errors.New("bitmaps must be same length") } } return bitmaps[0].Len, nil } func runVisitWordsAndWriteLoop(bitLen int64, rdrs []*BitmapWordReader, wrs []*BitmapWordWriter, visitor func(in, out []uint64)) { const bitWidth int64 = int64(uint64SizeBits) visited := make([]uint64, len(rdrs)) output := make([]uint64, len(wrs)) // every reader will have same number of words, since they are same // length'ed. This will be inefficient in some cases. When there's // offsets beyond the Word boundary, every word would have to be // created from 2 adjoining words nwords := int64(rdrs[0].Words()) bitLen -= nwords * bitWidth for nwords > 0 { nwords-- for i := range visited { visited[i] = rdrs[i].NextWord() } visitor(visited, output) for i := range output { wrs[i].PutNextWord(output[i]) } } // every reader will have the same number of trailing bytes, because // we already confirmed they have the same length. Because // offsets beyond the Word boundary can cause adjoining words, the // tailing portion could be more than one word remaining full/partial // words to write. if bitLen == 0 { return } // convert the word visitor to a bytevisitor byteVisitor := func(in, out []byte) { for i, w := range in { visited[i] = uint64(w) } visitor(visited, output) for i, w := range output { out[i] = byte(w) } } visitedBytes := make([]byte, len(rdrs)) outputBytes := make([]byte, len(wrs)) nbytes := rdrs[0].trailingBytes for nbytes > 0 { nbytes-- memory.Set(visitedBytes, 0) memory.Set(outputBytes, 0) var validBits int for i := range rdrs { visitedBytes[i], validBits = rdrs[i].NextTrailingByte() } byteVisitor(visitedBytes, outputBytes) for i, w := range outputBytes { wrs[i].PutNextTrailingByte(w, validBits) } } } // VisitWordsAndWrite visits words of bits from each input bitmap and // collects outputs to a slice of output Bitmaps. // // All bitmaps must have identical lengths. The first bit in a visited // bitmap may be offset within the first visited word, but words will // otherwise contain densely packed bits loaded from the bitmap. That // offset within the first word is returned. // // NOTE: this function is efficient on 3+ sufficiently large bitmaps. // It also has a large prolog/epilog overhead and should be used // carefully in other cases. For 2 or fewer bitmaps, and/or smaller // bitmaps, try BitmapReader and or other utilities. func VisitWordsAndWrite(args []Bitmap, out []Bitmap, visitor func(in, out []uint64)) error { bitLen, err := bitLength(args) if err != nil { return err } rdrs, wrs := make([]*BitmapWordReader, len(args)), make([]*BitmapWordWriter, len(out)) for i, in := range args { rdrs[i] = NewBitmapWordReader(in.Data, int(in.Offset), int(in.Len)) } for i, o := range out { wrs[i] = NewBitmapWordWriter(o.Data, int(o.Offset), int(o.Len)) } runVisitWordsAndWriteLoop(bitLen, rdrs, wrs, visitor) return nil } arrow-go-18.2.0/arrow/bitutil/bitmaps_test.go000066400000000000000000000404051476434502500212070ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package bitutil_test import ( "fmt" "math/rand" "strconv" "testing" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) func bitmapFromSlice(vals []int, bitOffset int) []byte { out := make([]byte, int(bitutil.BytesForBits(int64(len(vals)+bitOffset)))) writer := bitutil.NewBitmapWriter(out, bitOffset, len(vals)) for _, val := range vals { if val == 1 { writer.Set() } else { writer.Clear() } writer.Next() } writer.Finish() return out } func assertReaderVals(t *testing.T, reader *bitutil.BitmapReader, vals []bool) { for _, v := range vals { if v { assert.True(t, reader.Set()) assert.False(t, reader.NotSet()) } else { assert.True(t, reader.NotSet()) assert.False(t, reader.Set()) } reader.Next() } } func TestNormalOperation(t *testing.T) { for _, offset := range []int{0, 1, 3, 5, 7, 8, 12, 13, 21, 38, 75, 120} { buf := bitmapFromSlice([]int{0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1}, offset) reader := bitutil.NewBitmapReader(buf, offset, 14) assertReaderVals(t, reader, []bool{false, true, true, true, false, false, false, true, false, true, false, true, false, true}) } } func TestDoesNotReadOutOfBounds(t *testing.T) { var bitmap [16]byte const length = 128 reader := bitutil.NewBitmapReader(bitmap[:], 0, length) assert.EqualValues(t, length, reader.Len()) assert.NotPanics(t, func() { for i := 0; i < length; i++ { assert.True(t, reader.NotSet()) reader.Next() } }) assert.EqualValues(t, length, reader.Pos()) reader = bitutil.NewBitmapReader(bitmap[:], 5, length-5) assert.EqualValues(t, length-5, reader.Len()) assert.NotPanics(t, func() { for i := 0; i < length-5; i++ { assert.True(t, reader.NotSet()) reader.Next() } }) assert.EqualValues(t, length-5, reader.Pos()) assert.NotPanics(t, func() { reader = bitutil.NewBitmapReader(nil, 0, 0) }) } func writeToWriter(vals []int, wr *bitutil.BitmapWriter) { for _, v := range vals { if v != 0 { wr.Set() } else { wr.Clear() } wr.Next() } wr.Finish() } func TestBitmapWriter(t *testing.T) { for _, fillByte := range []byte{0x00, 0xFF} { { bitmap := []byte{fillByte, fillByte, fillByte, fillByte} wr := bitutil.NewBitmapWriter(bitmap, 0, 12) writeToWriter([]int{0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}, wr) // {0b00110110, 0b....1010, ........, ........} assert.Equal(t, []byte{0x36, (0x0A | (fillByte & 0xF0)), fillByte, fillByte}, bitmap) } { bitmap := []byte{fillByte, fillByte, fillByte, fillByte} wr := bitutil.NewBitmapWriter(bitmap, 0, 12) wr.AppendBools([]bool{false, true, true, false, true, true, false, false, false, true, false, true}) assert.Equal(t, []byte{0x36, (0x0A | (fillByte & 0xF0)), fillByte, fillByte}, bitmap) } { bitmap := []byte{fillByte, fillByte, fillByte, fillByte} wr := bitutil.NewBitmapWriter(bitmap, 3, 12) writeToWriter([]int{0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}, wr) // {0b10110..., 0b.1010001, ........, ........} assert.Equal(t, []byte{0xb0 | (fillByte & 0x07), 0x51 | (fillByte & 0x80), fillByte, fillByte}, bitmap) } { bitmap := []byte{fillByte, fillByte, fillByte, fillByte} wr := bitutil.NewBitmapWriter(bitmap, 3, 12) wr.AppendBools([]bool{false, true, true, false}) wr.AppendBools([]bool{true, true, false, false}) wr.AppendBools([]bool{false, true, false, true}) assert.Equal(t, []byte{0xb0 | (fillByte & 0x07), 0x51 | (fillByte & 0x80), fillByte, fillByte}, bitmap) } { bitmap := []byte{fillByte, fillByte, fillByte, fillByte} wr := bitutil.NewBitmapWriter(bitmap, 20, 12) writeToWriter([]int{0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}, wr) // {........, ........, 0b0110...., 0b10100011} assert.Equal(t, []byte{fillByte, fillByte, 0x60 | (fillByte & 0x0f), 0xa3}, bitmap) } } } func TestBitmapReader(t *testing.T) { assertReaderVals := func(vals []int, rdr *bitutil.BitmapReader) { for _, v := range vals { if v != 0 { assert.True(t, rdr.Set()) assert.False(t, rdr.NotSet()) } else { assert.False(t, rdr.Set()) assert.True(t, rdr.NotSet()) } rdr.Next() } } vals := []int{0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1} for _, offset := range []int{0, 1, 3, 5, 7, 8, 12, 13, 21, 38, 75, 120} { bm := make([]byte, bitutil.BytesForBits(int64(len(vals)+offset))) wr := bitutil.NewBitmapWriter(bm, offset, len(vals)) writeToWriter(vals, wr) rdr := bitutil.NewBitmapReader(bm, offset, 14) assertReaderVals(vals, rdr) } } func TestCopyBitmap(t *testing.T) { const bufsize = 1000 lengths := []int{bufsize*8 - 4, bufsize * 8} offsets := []int{0, 12, 16, 32, 37, 63, 64, 128} buffer := make([]byte, bufsize) // random bytes r := rand.New(rand.NewSource(0)) r.Read(buffer) // add 16 byte padding otherBuffer := make([]byte, bufsize+32) r.Read(otherBuffer) for _, nbits := range lengths { for _, offset := range offsets { for _, destOffset := range offsets { t.Run(fmt.Sprintf("bits %d off %d dst %d", nbits, offset, destOffset), func(t *testing.T) { copyLen := nbits - offset bmCopy := make([]byte, len(otherBuffer)) copy(bmCopy, otherBuffer) bitutil.CopyBitmap(buffer, offset, copyLen, bmCopy, destOffset) for i := 0; i < int(destOffset); i++ { assert.Equalf(t, bitutil.BitIsSet(otherBuffer, i), bitutil.BitIsSet(bmCopy, i), "bit index: %d", i) } for i := 0; i < int(copyLen); i++ { assert.Equalf(t, bitutil.BitIsSet(buffer, i+int(offset)), bitutil.BitIsSet(bmCopy, i+int(destOffset)), "bit index: %d", i) } for i := int(destOffset + copyLen); i < len(otherBuffer); i++ { assert.Equalf(t, bitutil.BitIsSet(otherBuffer, i), bitutil.BitIsSet(bmCopy, i), "bit index: %d", i) } }) } } } } func benchmarkCopyBitmapN(b *testing.B, offsetSrc, offsetDest, n int) { nbits := n * 8 // random bytes r := rand.New(rand.NewSource(0)) src := make([]byte, n) r.Read(src) length := nbits - offsetSrc dest := make([]byte, bitutil.BytesForBits(int64(length+offsetDest))) b.ResetTimer() b.SetBytes(int64(n)) for i := 0; i < b.N; i++ { bitutil.CopyBitmap(src, offsetSrc, length, dest, offsetDest) } } // Fast path which is just a memcopy func BenchmarkCopyBitmapWithoutOffset(b *testing.B) { for _, sz := range []int{32, 128, 1000, 1024} { b.Run(strconv.Itoa(sz), func(b *testing.B) { benchmarkCopyBitmapN(b, 0, 0, sz) }) } } // slow path where the source buffer is not byte aligned func BenchmarkCopyBitmapWithOffset(b *testing.B) { for _, sz := range []int{32, 128, 1000, 1024} { b.Run(strconv.Itoa(sz), func(b *testing.B) { benchmarkCopyBitmapN(b, 4, 0, sz) }) } } // slow path where both source and dest are not byte aligned func BenchmarkCopyBitmapWithOffsetBoth(b *testing.B) { for _, sz := range []int{32, 128, 1000, 1024} { b.Run(strconv.Itoa(sz), func(b *testing.B) { benchmarkCopyBitmapN(b, 3, 7, sz) }) } } const bufferSize = 1024 * 8 // a naive bitmap reader for a baseline type NaiveBitmapReader struct { bitmap []byte pos int } func (n *NaiveBitmapReader) IsSet() bool { return bitutil.BitIsSet(n.bitmap, n.pos) } func (n *NaiveBitmapReader) IsNotSet() bool { return !n.IsSet() } func (n *NaiveBitmapReader) Next() { n.pos++ } // naive bitmap writer for a baseline type NaiveBitmapWriter struct { bitmap []byte pos int } func (n *NaiveBitmapWriter) Set() { byteOffset := n.pos / 8 bitOffset := n.pos % 8 bitSetMask := uint8(1 << bitOffset) n.bitmap[byteOffset] |= bitSetMask } func (n *NaiveBitmapWriter) Clear() { byteOffset := n.pos / 8 bitOffset := n.pos % 8 bitClearMask := uint8(0xFF ^ (1 << bitOffset)) n.bitmap[byteOffset] &= bitClearMask } func (n *NaiveBitmapWriter) Next() { n.pos++ } func (n *NaiveBitmapWriter) Finish() {} func randomBuffer(nbytes int64) []byte { buf := make([]byte, nbytes) r := rand.New(rand.NewSource(0)) r.Read(buf) return buf } func BenchmarkBitmapReader(b *testing.B) { buf := randomBuffer(bufferSize) nbits := bufferSize * 8 b.Run("naive baseline", func(b *testing.B) { b.SetBytes(2 * bufferSize) for i := 0; i < b.N; i++ { { total := 0 rdr := NaiveBitmapReader{buf, 0} for j := 0; j < nbits; j++ { if rdr.IsSet() { total++ } rdr.Next() } } { total := 0 rdr := NaiveBitmapReader{buf, 0} for j := 0; j < nbits; j++ { if rdr.IsSet() { total++ } rdr.Next() } } } }) b.Run("bitmap reader", func(b *testing.B) { b.SetBytes(2 * bufferSize) for i := 0; i < b.N; i++ { { total := 0 rdr := bitutil.NewBitmapReader(buf, 0, nbits) for j := 0; j < nbits; j++ { if rdr.Set() { total++ } rdr.Next() } } { total := 0 rdr := bitutil.NewBitmapReader(buf, 0, nbits) for j := 0; j < nbits; j++ { if rdr.Set() { total++ } rdr.Next() } } } }) } type ( noAllocFn func(left, right []byte, lOffset, rOffset int64, out []byte, outOffset, length int64) allocFn func(mem memory.Allocator, left, right []byte, lOffset, rOffset int64, length, outOffset int64) *memory.Buffer bitmapOp struct { noAlloc noAllocFn alloc allocFn } ) type BitmapOpSuite struct { suite.Suite } func (s *BitmapOpSuite) testAligned(op bitmapOp, leftBits, rightBits []int, resultBits []bool) { var ( left, right []byte out *memory.Buffer length int64 ) for _, lOffset := range []int64{0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120, 65536} { s.Run(fmt.Sprintf("left offset %d", lOffset), func() { left = bitmapFromSlice(leftBits, int(lOffset)) length = int64(len(leftBits)) for _, rOffset := range []int64{lOffset, lOffset + 8, lOffset + 40} { s.Run(fmt.Sprintf("right offset %d", rOffset), func() { right = bitmapFromSlice(rightBits, int(rOffset)) for _, outOffset := range []int64{lOffset, lOffset + 16, lOffset + 24} { s.Run(fmt.Sprintf("out offset %d", outOffset), func() { s.Run("zero-length", func() { out = op.alloc(memory.DefaultAllocator, left, right, lOffset, rOffset, 0, outOffset) s.EqualValues(bitutil.BytesForBits(outOffset), out.Len()) expected := make([]byte, out.Len()) if out.Len() > 0 { s.Equal(expected, out.Bytes()) } else { s.Nil(out.Bytes()) } memory.Set(out.Bytes(), 0xFF) op.noAlloc(left, right, lOffset, rOffset, out.Bytes(), outOffset, 0) if out.Len() > 0 { memory.Set(expected, 0xFF) s.Equal(expected, out.Bytes()) } else { s.Nil(out.Bytes()) } out.Release() }) out = op.alloc(memory.DefaultAllocator, left, right, lOffset, rOffset, length, outOffset) defer out.Release() rdr := bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length)) assertReaderVals(s.T(), rdr, resultBits) memory.Set(out.Bytes(), 0x00) op.noAlloc(left, right, lOffset, rOffset, out.Bytes(), outOffset, length) rdr = bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length)) assertReaderVals(s.T(), rdr, resultBits) }) } }) } }) } } func (s *BitmapOpSuite) testUnaligned(op bitmapOp, leftBits, rightBits []int, resultBits []bool) { var ( left, right []byte out *memory.Buffer length int64 offsets = []int64{0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120, 65536} ) for _, lOffset := range offsets { s.Run(fmt.Sprintf("left offset %d", lOffset), func() { left = bitmapFromSlice(leftBits, int(lOffset)) length = int64(len(leftBits)) for _, rOffset := range offsets { s.Run(fmt.Sprintf("right offset %d", rOffset), func() { right = bitmapFromSlice(rightBits, int(rOffset)) for _, outOffset := range offsets { s.Run(fmt.Sprintf("out offset %d", outOffset), func() { s.Run("zero-length", func() { out = op.alloc(memory.DefaultAllocator, left, right, lOffset, rOffset, 0, outOffset) s.EqualValues(bitutil.BytesForBits(outOffset), out.Len()) expected := make([]byte, out.Len()) if out.Len() > 0 { s.Equal(expected, out.Bytes()) } else { s.Nil(out.Bytes()) } memory.Set(out.Bytes(), 0xFF) op.noAlloc(left, right, lOffset, rOffset, out.Bytes(), outOffset, 0) if out.Len() > 0 { memory.Set(expected, 0xFF) s.Equal(expected, out.Bytes()) } else { s.Nil(out.Bytes()) } out.Release() }) s.Run("alloc", func() { out = op.alloc(memory.DefaultAllocator, left, right, lOffset, rOffset, length, outOffset) rdr := bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length)) assertReaderVals(s.T(), rdr, resultBits) }) s.Run("noalloc", func() { memory.Set(out.Bytes(), 0x00) op.noAlloc(left, right, lOffset, rOffset, out.Bytes(), outOffset, length) rdr := bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length)) assertReaderVals(s.T(), rdr, resultBits) }) }) } }) } }) } } func (s *BitmapOpSuite) TestBitmapAnd() { op := bitmapOp{ noAlloc: bitutil.BitmapAnd, alloc: bitutil.BitmapAndAlloc, } leftBits := []int{0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1} rightBits := []int{0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0} resultBits := []bool{false, false, true, false, false, false, false, false, false, true, false, false, false, false} s.Run("aligned", func() { s.testAligned(op, leftBits, rightBits, resultBits) }) s.Run("unaligned", func() { s.testUnaligned(op, leftBits, rightBits, resultBits) }) } func (s *BitmapOpSuite) TestBitmapOr() { op := bitmapOp{ noAlloc: bitutil.BitmapOr, alloc: bitutil.BitmapOrAlloc, } leftBits := []int{0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1} rightBits := []int{0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0} resultBits := []bool{false, true, true, true, true, true, false, true, true, true, true, true, true, true} s.Run("aligned", func() { s.testAligned(op, leftBits, rightBits, resultBits) }) s.Run("unaligned", func() { s.testUnaligned(op, leftBits, rightBits, resultBits) }) } func TestBitmapOps(t *testing.T) { suite.Run(t, new(BitmapOpSuite)) } func TestSmallBitmapOp(t *testing.T) { // 0b01111111 0b11001111 left := [2]byte{127, 207} // 0b11111110 0b01111111 right := [2]byte{254, 127} // 0b01111110 0b01001111 results := [2]byte{126, 79} var out [2]byte bitutil.BitmapAnd(left[:], right[:], 0, 0, out[:], 0, 8) assert.Equal(t, results[:1], out[:1]) bitutil.BitmapAnd(left[:], right[:], 0, 0, out[:], 0, 16) assert.Equal(t, results, out) } func createRandomBuffer(mem memory.Allocator, src *rand.Rand, nbytes int) []byte { buf := mem.Allocate(nbytes) src.Read(buf) return buf } func benchBitOpImpl(b *testing.B, nBytes, offset int, op noAllocFn) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) r := rand.New(rand.NewSource(0)) buf1 := createRandomBuffer(mem, r, nBytes) buf2 := createRandomBuffer(mem, r, nBytes) buf3 := createRandomBuffer(mem, r, nBytes) b.Cleanup(func() { mem.Free(buf1) mem.Free(buf2) mem.Free(buf3) }) numBits := nBytes*8 - offset b.ResetTimer() b.SetBytes(bitutil.BytesForBits(int64(numBits)) * 2) for i := 0; i < b.N; i++ { op(buf1, buf2, 0, int64(offset), buf3, 0, int64(numBits)) } } func BenchmarkBitmapAnd(b *testing.B) { sizes := []int{bufferSize * 4, bufferSize * 16} offsets := []int{0, 1, 2} for _, s := range sizes { b.Run(fmt.Sprintf("nbytes=%d", s), func(b *testing.B) { for _, o := range offsets { b.Run(fmt.Sprintf("%d", o), func(b *testing.B) { benchBitOpImpl(b, s, o, bitutil.BitmapAnd) }) } }) } } arrow-go-18.2.0/arrow/bitutil/bitutil.go000066400000000000000000000114631476434502500201670ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package bitutil import ( "math" "math/bits" "unsafe" "github.com/apache/arrow-go/v18/arrow/memory" ) var ( BitMask = [8]byte{1, 2, 4, 8, 16, 32, 64, 128} FlippedBitMask = [8]byte{254, 253, 251, 247, 239, 223, 191, 127} ) // IsMultipleOf8 returns whether v is a multiple of 8. func IsMultipleOf8(v int64) bool { return v&7 == 0 } // IsMultipleOf64 returns whether v is a multiple of 64 func IsMultipleOf64(v int64) bool { return v&63 == 0 } func BytesForBits(bits int64) int64 { return (bits + 7) >> 3 } // NextPowerOf2 rounds x to the next power of two. func NextPowerOf2(x int) int { return 1 << uint(bits.Len(uint(x))) } // CeilByte rounds size to the next multiple of 8. func CeilByte(size int) int { return (size + 7) &^ 7 } // CeilByte64 rounds size to the next multiple of 8. func CeilByte64(size int64) int64 { return (size + 7) &^ 7 } // BitIsSet returns true if the bit at index i in buf is set (1). func BitIsSet(buf []byte, i int) bool { return (buf[uint(i)/8] & BitMask[byte(i)%8]) != 0 } // BitIsNotSet returns true if the bit at index i in buf is not set (0). func BitIsNotSet(buf []byte, i int) bool { return (buf[uint(i)/8] & BitMask[byte(i)%8]) == 0 } // SetBit sets the bit at index i in buf to 1. func SetBit(buf []byte, i int) { buf[uint(i)/8] |= BitMask[byte(i)%8] } // ClearBit sets the bit at index i in buf to 0. func ClearBit(buf []byte, i int) { buf[uint(i)/8] &= FlippedBitMask[byte(i)%8] } // SetBitTo sets the bit at index i in buf to val. func SetBitTo(buf []byte, i int, val bool) { if val { SetBit(buf, i) } else { ClearBit(buf, i) } } // CountSetBits counts the number of 1's in buf up to n bits. func CountSetBits(buf []byte, offset, n int) int { if offset > 0 { return countSetBitsWithOffset(buf, offset, n) } count := 0 uint64Bytes := n / uint64SizeBits * 8 for _, v := range bytesToUint64(buf[:uint64Bytes]) { count += bits.OnesCount64(v) } for _, v := range buf[uint64Bytes : n/8] { count += bits.OnesCount8(v) } // tail bits for i := n &^ 0x7; i < n; i++ { if BitIsSet(buf, i) { count++ } } return count } func countSetBitsWithOffset(buf []byte, offset, n int) int { count := 0 beg := offset begU8 := roundUp(beg, uint64SizeBits) init := min(n, begU8-beg) for i := offset; i < beg+init; i++ { if BitIsSet(buf, i) { count++ } } begU64 := BytesForBits(int64(beg + init)) return count + CountSetBits(buf[begU64:], 0, n-init) } func roundUp(v, f int) int { return (v + (f - 1)) / f * f } func min(a, b int) int { if a < b { return a } return b } const ( uint64SizeBytes = int(unsafe.Sizeof(uint64(0))) uint64SizeBits = uint64SizeBytes * 8 ) var ( // PrecedingBitmask is a convenience set of values as bitmasks for checking // prefix bits of a byte PrecedingBitmask = [8]byte{0, 1, 3, 7, 15, 31, 63, 127} // TrailingBitmask is the bitwise complement version of kPrecedingBitmask TrailingBitmask = [8]byte{255, 254, 252, 248, 240, 224, 192, 128} ) // SetBitsTo is a convenience function to quickly set or unset all the bits // in a bitmap starting at startOffset for length bits. func SetBitsTo(bits []byte, startOffset, length int64, areSet bool) { if length == 0 { return } beg := startOffset end := startOffset + length var fill uint8 = 0 if areSet { fill = math.MaxUint8 } byteBeg := beg / 8 byteEnd := end/8 + 1 // don't modify bits before the startOffset by using this mask firstByteMask := PrecedingBitmask[beg%8] // don't modify bits past the length by using this mask lastByteMask := TrailingBitmask[end%8] if byteEnd == byteBeg+1 { // set bits within a single byte onlyByteMask := firstByteMask if end%8 != 0 { onlyByteMask = firstByteMask | lastByteMask } bits[byteBeg] &= onlyByteMask bits[byteBeg] |= fill &^ onlyByteMask return } // set/clear trailing bits of first byte bits[byteBeg] &= firstByteMask bits[byteBeg] |= fill &^ firstByteMask if byteEnd-byteBeg > 2 { memory.Set(bits[byteBeg+1:byteEnd-1], fill) } if end%8 == 0 { return } bits[byteEnd-1] &= lastByteMask bits[byteEnd-1] |= fill &^ lastByteMask } arrow-go-18.2.0/arrow/bitutil/bitutil_bytes.go000066400000000000000000000021071476434502500213700ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.20 || tinygo package bitutil import ( "unsafe" ) func bytesToUint64(b []byte) []uint64 { if len(b) < uint64SizeBytes { return nil } ptr := unsafe.SliceData(b) if ptr == nil { return nil } return unsafe.Slice((*uint64)(unsafe.Pointer(ptr)), len(b)/uint64SizeBytes) } arrow-go-18.2.0/arrow/bitutil/bitutil_test.go000066400000000000000000000212241476434502500212220ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package bitutil_test import ( "fmt" "math/rand" "testing" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/internal/testing/tools" "github.com/stretchr/testify/assert" ) func TestIsMultipleOf8(t *testing.T) { for _, tc := range []struct { v int64 want bool }{ {-16, true}, {-9, false}, {-8, true}, {-7, false}, {-4, false}, {-1, false}, {-0, true}, {0, true}, {1, false}, {4, false}, {7, false}, {8, true}, {9, false}, {16, true}, } { t.Run(fmt.Sprintf("v=%d", tc.v), func(t *testing.T) { got := bitutil.IsMultipleOf8(tc.v) if got != tc.want { t.Fatalf("IsMultipleOf8(%d): got=%v, want=%v", tc.v, got, tc.want) } }) } } func TestCeilByte(t *testing.T) { tests := []struct { name string in, exp int }{ {"zero", 0, 0}, {"five", 5, 8}, {"sixteen", 16, 16}, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { got := bitutil.CeilByte(test.in) assert.Equal(t, test.exp, got) }) } } func TestBitIsSet(t *testing.T) { buf := make([]byte, 2) buf[0] = 0xa1 buf[1] = 0xc2 exp := []bool{true, false, false, false, false, true, false, true, false, true, false, false, false, false, true, true} var got []bool for i := 0; i < 0x10; i++ { got = append(got, bitutil.BitIsSet(buf, i)) } assert.Equal(t, exp, got) } func TestBitIsNotSet(t *testing.T) { buf := make([]byte, 2) buf[0] = 0xa1 buf[1] = 0xc2 exp := []bool{false, true, true, true, true, false, true, false, true, false, true, true, true, true, false, false} var got []bool for i := 0; i < 0x10; i++ { got = append(got, bitutil.BitIsNotSet(buf, i)) } assert.Equal(t, exp, got) } func TestClearBit(t *testing.T) { buf := make([]byte, 2) buf[0] = 0xff buf[1] = 0xff for i, v := range []bool{false, true, true, true, true, false, true, false, true, false, true, true, true, true, false, false} { if v { bitutil.ClearBit(buf, i) } } assert.Equal(t, []byte{0xa1, 0xc2}, buf) } func TestSetBit(t *testing.T) { buf := make([]byte, 2) for i, v := range []bool{true, false, false, false, false, true, false, true, false, true, false, false, false, false, true, true} { if v { bitutil.SetBit(buf, i) } } assert.Equal(t, []byte{0xa1, 0xc2}, buf) } func TestSetBitTo(t *testing.T) { buf := make([]byte, 2) for i, v := range []bool{true, false, false, false, false, true, false, true, false, true, false, false, false, false, true, true} { bitutil.SetBitTo(buf, i, v) } assert.Equal(t, []byte{0xa1, 0xc2}, buf) } func TestCountSetBits(t *testing.T) { tests := []struct { name string buf []byte off int n int exp int }{ {"some 03 bits", bbits(0x11000000), 0, 3, 2}, {"some 11 bits", bbits(0x11000011, 0x01000000), 0, 11, 5}, {"some 72 bits", bbits(0x11001010, 0x11110000, 0x00001111, 0x11000011, 0x11001010, 0x11110000, 0x00001111, 0x11000011, 0x10001001), 0, 9 * 8, 35}, {"all 08 bits", bbits(0x11111110), 0, 8, 7}, {"all 03 bits", bbits(0x11100001), 0, 3, 3}, {"all 11 bits", bbits(0x11111111, 0x11111111), 0, 11, 11}, {"all 72 bits", bbits(0x11111111, 0x11111111, 0x11111111, 0x11111111, 0x11111111, 0x11111111, 0x11111111, 0x11111111, 0x11111111), 0, 9 * 8, 72}, {"none 03 bits", bbits(0x00000001), 0, 3, 0}, {"none 11 bits", bbits(0x00000000, 0x00000000), 0, 11, 0}, {"none 72 bits", bbits(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), 0, 9 * 8, 0}, {"some 03 bits - offset+1", bbits(0x11000000), 1, 3, 1}, {"some 03 bits - offset+2", bbits(0x11000000), 2, 3, 0}, {"some 11 bits - offset+1", bbits(0x11000011, 0x01000000, 0x00000000), 1, 11, 4}, {"some 11 bits - offset+2", bbits(0x11000011, 0x01000000, 0x00000000), 2, 11, 3}, {"some 11 bits - offset+3", bbits(0x11000011, 0x01000000, 0x00000000), 3, 11, 3}, {"some 11 bits - offset+6", bbits(0x11000011, 0x01000000, 0x00000000), 6, 11, 3}, {"some 11 bits - offset+7", bbits(0x11000011, 0x01000000, 0x00000000), 7, 11, 2}, {"some 11 bits - offset+8", bbits(0x11000011, 0x01000000, 0x00000000), 8, 11, 1}, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { got := bitutil.CountSetBits(test.buf, test.off, test.n) assert.Equal(t, test.exp, got) }) } } func TestCountSetBitsOffset(t *testing.T) { slowCountSetBits := func(buf []byte, offset, n int) int { count := 0 for i := offset; i < offset+n; i++ { if bitutil.BitIsSet(buf, i) { count++ } } return count } const ( bufSize = 1000 nbits = bufSize * 8 ) offsets := []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 32, 37, 63, 64, 128, nbits - 30, nbits - 64} buf := make([]byte, bufSize) rng := rand.New(rand.NewSource(0)) _, err := rng.Read(buf) if err != nil { t.Fatal(err) } for i, offset := range offsets { want := slowCountSetBits(buf, offset, nbits-offset) got := bitutil.CountSetBits(buf, offset, nbits-offset) if got != want { t.Errorf("offset[%2d/%2d]=%5d. got=%5d, want=%5d", i+1, len(offsets), offset, got, want) } } } func TestSetBitsTo(t *testing.T) { for _, fillByte := range []byte{0x00, 0xFF} { { // set within a byte bm := []byte{fillByte, fillByte, fillByte, fillByte} bitutil.SetBitsTo(bm, 2, 2, true) bitutil.SetBitsTo(bm, 4, 2, false) assert.Equal(t, []byte{(fillByte &^ 0x3C) | 0xC}, bm[:1]) } { // test straddling a single byte boundary bm := []byte{fillByte, fillByte, fillByte, fillByte} bitutil.SetBitsTo(bm, 4, 7, true) bitutil.SetBitsTo(bm, 11, 7, false) assert.Equal(t, []byte{(fillByte & 0xF) | 0xF0, 0x7, fillByte &^ 0x3}, bm[:3]) } { // test byte aligned end bm := []byte{fillByte, fillByte, fillByte, fillByte} bitutil.SetBitsTo(bm, 4, 4, true) bitutil.SetBitsTo(bm, 8, 8, false) assert.Equal(t, []byte{(fillByte & 0xF) | 0xF0, 0x00, fillByte}, bm[:3]) } { // test byte aligned end, multiple bytes bm := []byte{fillByte, fillByte, fillByte, fillByte} bitutil.SetBitsTo(bm, 0, 24, false) falseByte := byte(0) assert.Equal(t, []byte{falseByte, falseByte, falseByte, fillByte}, bm) } } } func bbits(v ...int32) []byte { return tools.IntsToBitsLSB(v...) } func BenchmarkBitIsSet(b *testing.B) { buf := make([]byte, 32) b.ResetTimer() for i := 0; i < b.N; i++ { bitutil.BitIsSet(buf, (i%32)&0x1a) } } func BenchmarkSetBit(b *testing.B) { buf := make([]byte, 32) b.ResetTimer() for i := 0; i < b.N; i++ { bitutil.SetBit(buf, (i%32)&0x1a) } } func BenchmarkSetBitTo(b *testing.B) { vals := []bool{true, false, false, false, false, true, false, true, false, true, false, false, false, false, true, true} buf := make([]byte, 32) b.ResetTimer() for i := 0; i < b.N; i++ { bitutil.SetBitTo(buf, i%32, vals[i%len(vals)]) } } var ( intval int ) func benchmarkCountSetBitsN(b *testing.B, offset, n int) { nn := n/8 + 1 buf := make([]byte, nn) //src := [4]byte{0x1f, 0xaa, 0xba, 0x11} src := [4]byte{0x01, 0x01, 0x01, 0x01} for i := 0; i < nn; i++ { buf[i] = src[i&0x3] } b.ResetTimer() var res int for i := 0; i < b.N; i++ { res = bitutil.CountSetBits(buf, offset, n-offset) } intval = res } func BenchmarkCountSetBits_3(b *testing.B) { benchmarkCountSetBitsN(b, 0, 3) } func BenchmarkCountSetBits_32(b *testing.B) { benchmarkCountSetBitsN(b, 0, 32) } func BenchmarkCountSetBits_128(b *testing.B) { benchmarkCountSetBitsN(b, 0, 128) } func BenchmarkCountSetBits_1000(b *testing.B) { benchmarkCountSetBitsN(b, 0, 1000) } func BenchmarkCountSetBits_1024(b *testing.B) { benchmarkCountSetBitsN(b, 0, 1024) } func BenchmarkCountSetBitsOffset_3(b *testing.B) { benchmarkCountSetBitsN(b, 1, 3) } func BenchmarkCountSetBitsOffset_32(b *testing.B) { benchmarkCountSetBitsN(b, 1, 32) } func BenchmarkCountSetBitsOffset_128(b *testing.B) { benchmarkCountSetBitsN(b, 1, 128) } func BenchmarkCountSetBitsOffset_1000(b *testing.B) { benchmarkCountSetBitsN(b, 1, 1000) } func BenchmarkCountSetBitsOffset_1024(b *testing.B) { benchmarkCountSetBitsN(b, 1, 1024) } arrow-go-18.2.0/arrow/bitutil/endian_default.go000066400000000000000000000020631476434502500214510ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !s390x // +build !s390x package bitutil import ( "unsafe" ) var toFromLEFunc = func(in uint64) uint64 { return in } func getLSB(v uint64) byte { return (*[8]byte)(unsafe.Pointer(&v))[0] } func setLSB(v *uint64, b byte) { (*[8]byte)(unsafe.Pointer(v))[0] = b } arrow-go-18.2.0/arrow/bitutil/endian_s390x.go000066400000000000000000000020131476434502500207060ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package bitutil import ( "math/bits" "unsafe" ) var toFromLEFunc = bits.ReverseBytes64 func getLSB(v uint64) byte { return (*[8]byte)(unsafe.Pointer(&v))[7] } func setLSB(v *uint64, b byte) { (*[8]byte)(unsafe.Pointer(v))[7] = b } arrow-go-18.2.0/arrow/cdata/000077500000000000000000000000001476434502500155575ustar00rootroot00000000000000arrow-go-18.2.0/arrow/cdata/abi.h000066400000000000000000000453051476434502500164720ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. /// \file abi.h Arrow C Data Interface /// /// The Arrow C Data interface defines a very small, stable set /// of C definitions which can be easily copied into any project's /// source code and vendored to be used for columnar data interchange /// in the Arrow format. For non-C/C++ languages and runtimes, /// it should be almost as easy to translate the C definitions into /// the corresponding C FFI declarations. /// /// Applications and libraries can therefore work with Arrow memory /// without necessarily using the Arrow libraries or reinventing /// the wheel. Developers can choose between tight integration /// with the Arrow software project or minimal integration with /// the Arrow format only. #pragma once #include // Spec and documentation: https://arrow.apache.org/docs/format/CDataInterface.html #ifdef __cplusplus extern "C" { #endif #ifndef ARROW_C_DATA_INTERFACE # define ARROW_C_DATA_INTERFACE # define ARROW_FLAG_DICTIONARY_ORDERED 1 # define ARROW_FLAG_NULLABLE 2 # define ARROW_FLAG_MAP_KEYS_SORTED 4 struct ArrowSchema { // Array type description const char* format; const char* name; const char* metadata; int64_t flags; int64_t n_children; struct ArrowSchema** children; struct ArrowSchema* dictionary; // Release callback void (*release)(struct ArrowSchema*); // Opaque producer-specific data void* private_data; }; struct ArrowArray { // Array data description int64_t length; int64_t null_count; int64_t offset; int64_t n_buffers; int64_t n_children; const void** buffers; struct ArrowArray** children; struct ArrowArray* dictionary; // Release callback void (*release)(struct ArrowArray*); // Opaque producer-specific data void* private_data; }; #endif // ARROW_C_DATA_INTERFACE #ifndef ARROW_C_DEVICE_DATA_INTERFACE # define ARROW_C_DEVICE_DATA_INTERFACE // Spec and Documentation: https://arrow.apache.org/docs/format/CDeviceDataInterface.html // DeviceType for the allocated memory typedef int32_t ArrowDeviceType; // CPU device, same as using ArrowArray directly # define ARROW_DEVICE_CPU 1 // CUDA GPU Device # define ARROW_DEVICE_CUDA 2 // Pinned CUDA CPU memory by cudaMallocHost # define ARROW_DEVICE_CUDA_HOST 3 // OpenCL Device # define ARROW_DEVICE_OPENCL 4 // Vulkan buffer for next-gen graphics # define ARROW_DEVICE_VULKAN 7 // Metal for Apple GPU # define ARROW_DEVICE_METAL 8 // Verilog simulator buffer # define ARROW_DEVICE_VPI 9 // ROCm GPUs for AMD GPUs # define ARROW_DEVICE_ROCM 10 // Pinned ROCm CPU memory allocated by hipMallocHost # define ARROW_DEVICE_ROCM_HOST 11 // Reserved for extension # define ARROW_DEVICE_EXT_DEV 12 // CUDA managed/unified memory allocated by cudaMallocManaged # define ARROW_DEVICE_CUDA_MANAGED 13 // unified shared memory allocated on a oneAPI non-partitioned device. # define ARROW_DEVICE_ONEAPI 14 // GPU support for next-gen WebGPU standard # define ARROW_DEVICE_WEBGPU 15 // Qualcomm Hexagon DSP # define ARROW_DEVICE_HEXAGON 16 struct ArrowDeviceArray { // the Allocated Array // // the buffers in the array (along with the buffers of any // children) are what is allocated on the device. struct ArrowArray array; // The device id to identify a specific device int64_t device_id; // The type of device which can access this memory. ArrowDeviceType device_type; // An event-like object to synchronize on if needed. void* sync_event; // Reserved bytes for future expansion. int64_t reserved[3]; }; #endif // ARROW_C_DEVICE_DATA_INTERFACE #ifndef ARROW_C_STREAM_INTERFACE # define ARROW_C_STREAM_INTERFACE struct ArrowArrayStream { // Callback to get the stream type // (will be the same for all arrays in the stream). // // Return value: 0 if successful, an `errno`-compatible error code otherwise. // // If successful, the ArrowSchema must be released independently from the stream. int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out); // Callback to get the next array // (if no error and the array is released, the stream has ended) // // Return value: 0 if successful, an `errno`-compatible error code otherwise. // // If successful, the ArrowArray must be released independently from the stream. int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out); // Callback to get optional detailed error information. // This must only be called if the last stream operation failed // with a non-0 return code. // // Return value: pointer to a null-terminated character array describing // the last error, or NULL if no description is available. // // The returned pointer is only valid until the next operation on this stream // (including release). const char* (*get_last_error)(struct ArrowArrayStream*); // Release callback: release the stream's own resources. // Note that arrays returned by `get_next` must be individually released. void (*release)(struct ArrowArrayStream*); // Opaque producer-specific data void* private_data; }; #endif // ARROW_C_STREAM_INTERFACE #ifndef ARROW_C_DEVICE_STREAM_INTERFACE # define ARROW_C_DEVICE_STREAM_INTERFACE // Equivalent to ArrowArrayStream, but for ArrowDeviceArrays. // // This stream is intended to provide a stream of data on a single // device, if a producer wants data to be produced on multiple devices // then multiple streams should be provided. One per device. struct ArrowDeviceArrayStream { // The device that this stream produces data on. ArrowDeviceType device_type; // Callback to get the stream schema // (will be the same for all arrays in the stream). // // Return value 0 if successful, an `errno`-compatible error code otherwise. // // If successful, the ArrowSchema must be released independently from the stream. // The schema should be accessible via CPU memory. int (*get_schema)(struct ArrowDeviceArrayStream* self, struct ArrowSchema* out); // Callback to get the next array // (if no error and the array is released, the stream has ended) // // Return value: 0 if successful, an `errno`-compatible error code otherwise. // // If successful, the ArrowDeviceArray must be released independently from the stream. int (*get_next)(struct ArrowDeviceArrayStream* self, struct ArrowDeviceArray* out); // Callback to get optional detailed error information. // This must only be called if the last stream operation failed // with a non-0 return code. // // Return value: pointer to a null-terminated character array describing // the last error, or NULL if no description is available. // // The returned pointer is only valid until the next operation on this stream // (including release). const char* (*get_last_error)(struct ArrowDeviceArrayStream* self); // Release callback: release the stream's own resources. // Note that arrays returned by `get_next` must be individually released. void (*release)(struct ArrowDeviceArrayStream* self); // Opaque producer-specific data void* private_data; }; #endif // ARROW_C_DEVICE_STREAM_INTERFACE #ifndef ARROW_C_ASYNC_STREAM_INTERFACE # define ARROW_C_ASYNC_STREAM_INTERFACE // EXPERIMENTAL: ArrowAsyncTask represents available data from a producer that was passed // to an invocation of `on_next_task` on the ArrowAsyncDeviceStreamHandler. // // The reason for this Task approach instead of the Async interface returning // the Array directly is to allow for more complex thread handling and reducing // context switching and data transfers between CPU cores (e.g. from one L1/L2 // cache to another) if desired. // // For example, the `on_next_task` callback can be called when data is ready, while // the producer puts potential "decoding" logic in the `ArrowAsyncTask` object. This // allows for the producer to manage the I/O on one thread which calls `on_next_task` // and the consumer can determine when the decoding (producer logic in the `extract_data` // callback of the task) occurs and on which thread, to avoid a CPU core transfer // (data staying in the L2 cache). struct ArrowAsyncTask { // This callback should populate the ArrowDeviceArray associated with this task. // The order of ArrowAsyncTasks provided by the producer enables a consumer to // ensure the order of data to process. // // This function is expected to be synchronous, but should not perform any blocking // I/O. Ideally it should be as cheap as possible so as to not tie up the consumer // thread unnecessarily. // // Returns: 0 if successful, errno-compatible error otherwise. // // If a non-0 value is returned then it should be followed by a call to `on_error` // on the appropriate ArrowAsyncDeviceStreamHandler. This is because it's highly // likely that whatever is calling this function may be entirely disconnected from // the current control flow. Indicating an error here with a non-zero return allows // the current flow to be aware of the error occurring, while still allowing any // logging or error handling to still be centralized in the `on_error` callback of // the original Async handler. // // Rather than a release callback, any required cleanup should be performed as part // of the invocation of `extract_data`. Ownership of the Array is passed to the consumer // calling this, and so it must be released separately. // // It is only valid to call this method exactly once. int (*extract_data)(struct ArrowAsyncTask* self, struct ArrowDeviceArray* out); // opaque task-specific data void* private_data; }; // EXPERIMENTAL: ArrowAsyncProducer represents a 1-to-1 relationship between an async // producer and consumer. This object allows the consumer to perform backpressure and flow // control on the asynchronous stream processing. This object must be owned by the // producer who creates it, and thus is responsible for cleaning it up. struct ArrowAsyncProducer { // The device type that this stream produces data on. ArrowDeviceType device_type; // A consumer must call this function to start receiving on_next_task calls. // // It *must* be valid to call this synchronously from within `on_next_task` or // `on_schema`, but this function *must not* immediately call `on_next_task` so as // to avoid recursion and reentrant callbacks. // // After cancel has been called, additional calls to this function must be NOPs, // but allowed. While not cancelled, calling this function must register the // given number of additional arrays/batches to be produced with the producer. // The producer should only call `on_next_task` at most the registered number // of arrays before propagating backpressure. // // Any error encountered by calling request must be propagated by calling the `on_error` // callback of the ArrowAsyncDeviceStreamHandler. // // While not cancelled, any subsequent calls to `on_next_task`, `on_error` or // `release` should be scheduled by the producer to be called later. // // It is invalid for a consumer to call this with a value of n <= 0, producers should // error if given such a value. void (*request)(struct ArrowAsyncProducer* self, int64_t n); // This cancel callback signals a producer that it must eventually stop making calls // to on_next_task. It must be idempotent and thread-safe. After calling cancel once, // subsequent calls must be NOPs. This must not call any consumer-side handlers other // than `on_error`. // // It is not required that calling cancel affect the producer immediately, only that it // must eventually stop calling on_next_task and subsequently call release on the // async handler. As such, a consumer must be prepared to receive one or more calls to // `on_next_task` even after calling cancel if there are still requested arrays pending. // // Successful cancellation should *not* result in the producer calling `on_error`, it // should finish out any remaining tasks and eventually call `release`. // // Any error encountered during handling a call to cancel must be reported via the // on_error callback on the async stream handler. void (*cancel)(struct ArrowAsyncProducer* self); // Any additional metadata tied to a specific stream of data. This must either be NULL // or a valid pointer to metadata which is encoded in the same way schema metadata // would be. Non-null metadata must be valid for the lifetime of this object. As an // example a producer could use this to provide the total number of rows and/or batches // in the stream if known. const char* additional_metadata; // producer-specific opaque data. void* private_data; }; // EXPERIMENTAL: Similar to ArrowDeviceArrayStream, except designed for an asynchronous // style of interaction. While ArrowDeviceArrayStream provides producer // defined callbacks, this is intended to be created by the consumer instead. // The consumer passes this handler to the producer, which in turn uses the // callbacks to inform the consumer of events in the stream. struct ArrowAsyncDeviceStreamHandler { // Handler for receiving a schema. The passed in stream_schema must be // released or moved by the handler (producer is giving ownership of the schema to // the handler, but not ownership of the top level object itself). // // With the exception of an error occurring (on_error), this must be the first // callback function which is called by a producer and must only be called exactly // once. As such, the producer should provide a valid ArrowAsyncProducer instance // so the consumer can control the flow. See the documentation on ArrowAsyncProducer // for how it works. The ArrowAsyncProducer is owned by the producer who calls this // function and thus the producer is responsible for cleaning it up when calling // the release callback of this handler. // // If there is any additional metadata tied to this stream, it will be provided as // a non-null value for the `additional_metadata` field of the ArrowAsyncProducer // which will be valid at least until the release callback is called. // // Return value: 0 if successful, `errno`-compatible error otherwise // // A producer that receives a non-zero return here should stop producing and eventually // call release instead. int (*on_schema)(struct ArrowAsyncDeviceStreamHandler* self, struct ArrowSchema* stream_schema); // Handler for receiving data. This is called when data is available providing an // ArrowAsyncTask struct to signify it. The producer indicates the end of the stream // by passing NULL as the value for the task rather than a valid pointer to a task. // The task object is only valid for the lifetime of this function call, if a consumer // wants to utilize it after this function returns, it must copy or move the contents // of it to a new ArrowAsyncTask object. // // The `request` callback of a provided ArrowAsyncProducer must be called in order // to start receiving calls to this handler. // // The metadata argument can be null or can be used by a producer // to pass arbitrary extra information to the consumer (such as total number // of rows, context info, or otherwise). The data should be passed using the same // encoding as the metadata within the ArrowSchema struct itself (defined in // the spec at // https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema.metadata) // // If metadata is non-null then it only needs to exist for the lifetime of this call, // a consumer who wants it to live after that must copy it to ensure lifetime. // // A producer *must not* call this concurrently from multiple different threads. // // A consumer must be prepared to receive one or more calls to this callback even // after calling cancel on the corresponding ArrowAsyncProducer, as cancel does not // guarantee it happens immediately. // // Return value: 0 if successful, `errno`-compatible error otherwise. // // If the consumer returns a non-zero return from this method, that indicates to the // producer that it should stop propagating data as an error occurred. After receiving // such a return, the only interaction with this object is for the producer to call // the `release` callback. int (*on_next_task)(struct ArrowAsyncDeviceStreamHandler* self, struct ArrowAsyncTask* task, const char* metadata); // Handler for encountering an error. The producer should call release after // this returns to clean up any resources. The `code` passed in can be any error // code that a producer wants, but should be errno-compatible for consistency. // // If the message or metadata are non-null, they will only last as long as this // function call. The consumer would need to perform a copy of the data if it is // necessary for them to live past the lifetime of this call. // // Error metadata should be encoded as with metadata in ArrowSchema, defined in // the spec at // https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema.metadata // // It is valid for this to be called by a producer with or without a preceding call // to ArrowAsyncProducer.request. // // This callback must not call any methods of an ArrowAsyncProducer object. void (*on_error)(struct ArrowAsyncDeviceStreamHandler* self, int code, const char* message, const char* metadata); // Release callback to release any resources for the handler. Should always be // called by a producer when it is done utilizing a handler. No callbacks should // be called after this is called. // // It is valid for the release callback to be called by a producer with or without // a preceding call to ArrowAsyncProducer.request. // // The release callback must not call any methods of an ArrowAsyncProducer object. void (*release)(struct ArrowAsyncDeviceStreamHandler* self); // MUST be populated by the producer BEFORE calling any callbacks other than release. // This provides the connection between a handler and its producer, and must exist until // the release callback is called. struct ArrowAsyncProducer* producer; // Opaque handler-specific data void* private_data; }; #endif // ARROW_C_ASYNC_STREAM_INTERFACE #ifdef __cplusplus } #endif arrow-go-18.2.0/arrow/cdata/cdata.go000066400000000000000000000724371476434502500171770ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build cgo // +build cgo package cdata // implement handling of the Arrow C Data Interface. At least from a consuming side. // #include "abi.h" // #include "helpers.h" // #include // int stream_get_schema(struct ArrowArrayStream* st, struct ArrowSchema* out) { return st->get_schema(st, out); } // int stream_get_next(struct ArrowArrayStream* st, struct ArrowArray* out) { return st->get_next(st, out); } // const char* stream_get_last_error(struct ArrowArrayStream* st) { return st->get_last_error(st); } // struct ArrowArray* get_arr() { // struct ArrowArray* out = (struct ArrowArray*)(malloc(sizeof(struct ArrowArray))); // memset(out, 0, sizeof(struct ArrowArray)); // return out; // } // struct ArrowArrayStream* get_stream() { // struct ArrowArrayStream* out = (struct ArrowArrayStream*)malloc(sizeof(struct ArrowArrayStream)); // memset(out, 0, sizeof(struct ArrowArrayStream)); // return out; // } // import "C" import ( "errors" "fmt" "io" "runtime" "strconv" "strings" "syscall" "unsafe" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/memory" "golang.org/x/xerrors" ) type ( // CArrowSchema is the C Data Interface for ArrowSchemas defined in abi.h CArrowSchema = C.struct_ArrowSchema // CArrowArray is the C Data Interface object for Arrow Arrays as defined in abi.h CArrowArray = C.struct_ArrowArray // CArrowArrayStream is the C Stream Interface object for handling streams of record batches. CArrowArrayStream = C.struct_ArrowArrayStream CArrowAsyncDeviceStreamHandler = C.struct_ArrowAsyncDeviceStreamHandler CArrowAsyncProducer = C.struct_ArrowAsyncProducer CArrowAsyncTask = C.struct_ArrowAsyncTask CArrowDeviceArray = C.struct_ArrowDeviceArray ) // Map from the defined strings to their corresponding arrow.DataType interface // object instances, for types that don't require params. var formatToSimpleType = map[string]arrow.DataType{ "n": arrow.Null, "b": arrow.FixedWidthTypes.Boolean, "c": arrow.PrimitiveTypes.Int8, "C": arrow.PrimitiveTypes.Uint8, "s": arrow.PrimitiveTypes.Int16, "S": arrow.PrimitiveTypes.Uint16, "i": arrow.PrimitiveTypes.Int32, "I": arrow.PrimitiveTypes.Uint32, "l": arrow.PrimitiveTypes.Int64, "L": arrow.PrimitiveTypes.Uint64, "e": arrow.FixedWidthTypes.Float16, "f": arrow.PrimitiveTypes.Float32, "g": arrow.PrimitiveTypes.Float64, "z": arrow.BinaryTypes.Binary, "Z": arrow.BinaryTypes.LargeBinary, "u": arrow.BinaryTypes.String, "U": arrow.BinaryTypes.LargeString, "vz": arrow.BinaryTypes.BinaryView, "vu": arrow.BinaryTypes.StringView, "tdD": arrow.FixedWidthTypes.Date32, "tdm": arrow.FixedWidthTypes.Date64, "tts": arrow.FixedWidthTypes.Time32s, "ttm": arrow.FixedWidthTypes.Time32ms, "ttu": arrow.FixedWidthTypes.Time64us, "ttn": arrow.FixedWidthTypes.Time64ns, "tDs": arrow.FixedWidthTypes.Duration_s, "tDm": arrow.FixedWidthTypes.Duration_ms, "tDu": arrow.FixedWidthTypes.Duration_us, "tDn": arrow.FixedWidthTypes.Duration_ns, "tiM": arrow.FixedWidthTypes.MonthInterval, "tiD": arrow.FixedWidthTypes.DayTimeInterval, "tin": arrow.FixedWidthTypes.MonthDayNanoInterval, } // decode metadata from C which is encoded as // // [int32] -> number of metadata pairs // for 0..n // [int32] -> number of bytes in key // [n bytes] -> key value // [int32] -> number of bytes in value // [n bytes] -> value func decodeCMetadata(md *C.char) arrow.Metadata { if md == nil { return arrow.Metadata{} } // don't copy the bytes, just reference them directly const maxlen = 0x7fffffff data := (*[maxlen]byte)(unsafe.Pointer(md))[:] readint32 := func() int32 { v := *(*int32)(unsafe.Pointer(&data[0])) data = data[arrow.Int32SizeBytes:] return v } readstr := func() string { l := readint32() s := string(data[:l]) data = data[l:] return s } npairs := readint32() if npairs == 0 { return arrow.Metadata{} } keys := make([]string, npairs) vals := make([]string, npairs) for i := int32(0); i < npairs; i++ { keys[i] = readstr() vals[i] = readstr() } return arrow.NewMetadata(keys, vals) } // convert a C.ArrowSchema to an arrow.Field to maintain metadata with the schema func importSchema(schema *CArrowSchema) (ret arrow.Field, err error) { // always release, even on error defer C.ArrowSchemaRelease(schema) var childFields []arrow.Field if schema.n_children > 0 { // call ourselves recursively if there are children. // set up a slice to reference safely schemaChildren := unsafe.Slice(schema.children, schema.n_children) childFields = make([]arrow.Field, schema.n_children) for i, c := range schemaChildren { childFields[i], err = importSchema((*CArrowSchema)(c)) if err != nil { return } } } // copy the schema name from the c-string ret.Name = C.GoString(schema.name) ret.Nullable = (schema.flags & C.ARROW_FLAG_NULLABLE) != 0 ret.Metadata = decodeCMetadata(schema.metadata) // copies the c-string here, but it's very small f := C.GoString(schema.format) // handle our non-parameterized simple types. dt, ok := formatToSimpleType[f] if ok { ret.Type = dt if schema.dictionary != nil { valueField, err := importSchema(schema.dictionary) if err != nil { return ret, err } ret.Type = &arrow.DictionaryType{ IndexType: ret.Type, ValueType: valueField.Type, Ordered: schema.dictionary.flags&C.ARROW_FLAG_DICTIONARY_ORDERED != 0} } return } // handle types with params via colon typs := strings.Split(f, ":") defaulttz := "" switch typs[0] { case "tss": tz := typs[1] if len(typs[1]) == 0 { tz = defaulttz } dt = &arrow.TimestampType{Unit: arrow.Second, TimeZone: tz} case "tsm": tz := typs[1] if len(typs[1]) == 0 { tz = defaulttz } dt = &arrow.TimestampType{Unit: arrow.Millisecond, TimeZone: tz} case "tsu": tz := typs[1] if len(typs[1]) == 0 { tz = defaulttz } dt = &arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: tz} case "tsn": tz := typs[1] if len(typs[1]) == 0 { tz = defaulttz } dt = &arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: tz} case "w": // fixed size binary is "w:##" where ## is the byteWidth byteWidth, err := strconv.Atoi(typs[1]) if err != nil { return ret, err } dt = &arrow.FixedSizeBinaryType{ByteWidth: byteWidth} case "d": // decimal types are d:,[,] size is assumed 128 if left out props := typs[1] propList := strings.Split(props, ",") bitwidth := 128 var precision, scale int if len(propList) < 2 || len(propList) > 3 { return ret, xerrors.Errorf("invalid decimal spec '%s': wrong number of properties", f) } else if len(propList) == 3 { bitwidth, err = strconv.Atoi(propList[2]) if err != nil { return ret, xerrors.Errorf("could not parse decimal bitwidth in '%s': %s", f, err.Error()) } } precision, err = strconv.Atoi(propList[0]) if err != nil { return ret, xerrors.Errorf("could not parse decimal precision in '%s': %s", f, err.Error()) } scale, err = strconv.Atoi(propList[1]) if err != nil { return ret, xerrors.Errorf("could not parse decimal scale in '%s': %s", f, err.Error()) } switch bitwidth { case 32: dt = &arrow.Decimal32Type{Precision: int32(precision), Scale: int32(scale)} case 64: dt = &arrow.Decimal64Type{Precision: int32(precision), Scale: int32(scale)} case 128: dt = &arrow.Decimal128Type{Precision: int32(precision), Scale: int32(scale)} case 256: dt = &arrow.Decimal256Type{Precision: int32(precision), Scale: int32(scale)} default: return ret, xerrors.Errorf("unsupported decimal bitwidth, got '%s'", f) } } if f[0] == '+' { // types with children switch f[1] { case 'l': // list dt = arrow.ListOfField(childFields[0]) case 'L': // large list dt = arrow.LargeListOfField(childFields[0]) case 'v': // list view/large list view if f[2] == 'l' { dt = arrow.ListViewOfField(childFields[0]) } else if f[2] == 'L' { dt = arrow.LargeListViewOfField(childFields[0]) } case 'w': // fixed size list is w:# where # is the list size. listSize, err := strconv.Atoi(strings.Split(f, ":")[1]) if err != nil { return ret, err } dt = arrow.FixedSizeListOfField(int32(listSize), childFields[0]) case 's': // struct dt = arrow.StructOf(childFields...) case 'r': // run-end encoded if len(childFields) != 2 { return ret, fmt.Errorf("%w: run-end encoded arrays must have 2 children", arrow.ErrInvalid) } dt = arrow.RunEndEncodedOf(childFields[0].Type, childFields[1].Type) case 'm': // map type is basically a list of structs. st := childFields[0].Type.(*arrow.StructType) dt = arrow.MapOf(st.Field(0).Type, st.Field(1).Type) dt.(*arrow.MapType).KeysSorted = (schema.flags & C.ARROW_FLAG_MAP_KEYS_SORTED) != 0 case 'u': // union var mode arrow.UnionMode switch f[2] { case 'd': mode = arrow.DenseMode case 's': mode = arrow.SparseMode default: err = fmt.Errorf("%w: invalid union type", arrow.ErrInvalid) return } codes := strings.Split(strings.Split(f, ":")[1], ",") typeCodes := make([]arrow.UnionTypeCode, 0, len(codes)) for _, i := range codes { v, e := strconv.ParseInt(i, 10, 8) if e != nil { err = fmt.Errorf("%w: invalid type code: %s", arrow.ErrInvalid, e) return } if v < 0 { err = fmt.Errorf("%w: negative type code in union: format string %s", arrow.ErrInvalid, f) return } typeCodes = append(typeCodes, arrow.UnionTypeCode(v)) } if len(childFields) != len(typeCodes) { err = fmt.Errorf("%w: ArrowArray struct number of children incompatible with format string", arrow.ErrInvalid) return } dt = arrow.UnionOf(mode, childFields, typeCodes) } } if dt == nil { // if we didn't find a type, then it's something we haven't implemented. err = xerrors.New("unimplemented type") } else { ret.Type = dt } return } // importer to keep track when importing C ArrowArray objects. type cimporter struct { dt arrow.DataType arr *CArrowArray data arrow.ArrayData parent *cimporter children []cimporter cbuffers []*C.void alloc *importAllocator } func (imp *cimporter) importChild(parent *cimporter, src *CArrowArray) error { imp.parent, imp.arr, imp.alloc = parent, src, parent.alloc return imp.doImport() } // import any child arrays for lists, structs, and so on. func (imp *cimporter) doImportChildren() error { children := unsafe.Slice(imp.arr.children, imp.arr.n_children) if len(children) > 0 { imp.children = make([]cimporter, len(children)) } // handle the cases switch imp.dt.ID() { case arrow.LIST: // only one child to import imp.children[0].dt = imp.dt.(*arrow.ListType).Elem() if err := imp.children[0].importChild(imp, children[0]); err != nil { return err } case arrow.LARGE_LIST: // only one child to import imp.children[0].dt = imp.dt.(*arrow.LargeListType).Elem() if err := imp.children[0].importChild(imp, children[0]); err != nil { return err } case arrow.LIST_VIEW: // only one child to import imp.children[0].dt = imp.dt.(*arrow.ListViewType).Elem() if err := imp.children[0].importChild(imp, children[0]); err != nil { return err } case arrow.LARGE_LIST_VIEW: // only one child to import imp.children[0].dt = imp.dt.(*arrow.LargeListViewType).Elem() if err := imp.children[0].importChild(imp, children[0]); err != nil { return err } case arrow.FIXED_SIZE_LIST: // only one child to import imp.children[0].dt = imp.dt.(*arrow.FixedSizeListType).Elem() if err := imp.children[0].importChild(imp, children[0]); err != nil { return err } case arrow.STRUCT: // import all the children st := imp.dt.(*arrow.StructType) for i, c := range children { imp.children[i].dt = st.Field(i).Type imp.children[i].importChild(imp, c) } case arrow.RUN_END_ENCODED: // import run-ends and values st := imp.dt.(*arrow.RunEndEncodedType) imp.children[0].dt = st.RunEnds() if err := imp.children[0].importChild(imp, children[0]); err != nil { return err } imp.children[1].dt = st.Encoded() if err := imp.children[1].importChild(imp, children[1]); err != nil { return err } case arrow.MAP: // only one child to import, it's a struct array imp.children[0].dt = imp.dt.(*arrow.MapType).Elem() if err := imp.children[0].importChild(imp, children[0]); err != nil { return err } case arrow.DENSE_UNION: dt := imp.dt.(*arrow.DenseUnionType) for i, c := range children { imp.children[i].dt = dt.Fields()[i].Type imp.children[i].importChild(imp, c) } case arrow.SPARSE_UNION: dt := imp.dt.(*arrow.SparseUnionType) for i, c := range children { imp.children[i].dt = dt.Fields()[i].Type imp.children[i].importChild(imp, c) } } return nil } func (imp *cimporter) initarr() { imp.arr = C.get_arr() if imp.alloc == nil { imp.alloc = &importAllocator{arr: imp.arr} } } func (imp *cimporter) doImportArr(src *CArrowArray) error { imp.arr = C.get_arr() C.ArrowArrayMove(src, imp.arr) if imp.alloc == nil { imp.alloc = &importAllocator{arr: imp.arr} } // we tie the releasing of the array to when the buffers are // cleaned up, so if there are no buffers that we've imported // such as for a null array or a nested array with no bitmap // and only null columns, then we can release the CArrowArray // struct immediately after import, since we have no imported // memory that we have to track the lifetime of. defer func() { if imp.alloc.bufCount == 0 { C.ArrowArrayRelease(imp.arr) C.free(unsafe.Pointer(imp.arr)) } }() return imp.doImport() } // import is called recursively as needed for importing an array and its children // in order to generate array.Data objects func (imp *cimporter) doImport() error { // move the array from the src object passed in to the one referenced by // this importer. That way we can set up a finalizer on the created // arrow.ArrayData object so we clean up our Array's memory when garbage collected. defer func(arr *CArrowArray) { // this should only occur in the case of an error happening // during import, at which point we need to clean up the // ArrowArray struct we allocated. if imp.data == nil { C.free(unsafe.Pointer(arr)) } }(imp.arr) // import any children if err := imp.doImportChildren(); err != nil { return err } for _, c := range imp.children { if c.data != nil { defer c.data.Release() } } if imp.arr.n_buffers > 0 { // get a view of the buffers, zero-copy. we're just looking at the pointers imp.cbuffers = unsafe.Slice((**C.void)(unsafe.Pointer(imp.arr.buffers)), imp.arr.n_buffers) } // handle each of our type cases switch dt := imp.dt.(type) { case *arrow.NullType: if err := imp.checkNoChildren(); err != nil { return err } imp.data = array.NewData(dt, int(imp.arr.length), nil, nil, int(imp.arr.null_count), int(imp.arr.offset)) case arrow.FixedWidthDataType: return imp.importFixedSizePrimitive() case *arrow.StringType: return imp.importStringLike(int64(arrow.Int32SizeBytes)) case *arrow.BinaryType: return imp.importStringLike(int64(arrow.Int32SizeBytes)) case *arrow.LargeStringType: return imp.importStringLike(int64(arrow.Int64SizeBytes)) case *arrow.LargeBinaryType: return imp.importStringLike(int64(arrow.Int64SizeBytes)) case *arrow.StringViewType: return imp.importBinaryViewLike() case *arrow.BinaryViewType: return imp.importBinaryViewLike() case *arrow.ListType: return imp.importListLike() case *arrow.LargeListType: return imp.importListLike() case *arrow.ListViewType: return imp.importListViewLike() case *arrow.LargeListViewType: return imp.importListViewLike() case *arrow.MapType: return imp.importListLike() case *arrow.FixedSizeListType: if err := imp.checkNumChildren(1); err != nil { return err } if err := imp.checkNumBuffers(1); err != nil { return err } nulls, err := imp.importNullBitmap(0) if err != nil { return err } if nulls != nil { defer nulls.Release() } imp.data = array.NewData(dt, int(imp.arr.length), []*memory.Buffer{nulls}, []arrow.ArrayData{imp.children[0].data}, int(imp.arr.null_count), int(imp.arr.offset)) case *arrow.StructType: if err := imp.checkNumBuffers(1); err != nil { return err } nulls, err := imp.importNullBitmap(0) if err != nil { return err } if nulls != nil { defer nulls.Release() } children := make([]arrow.ArrayData, len(imp.children)) for i := range imp.children { children[i] = imp.children[i].data } imp.data = array.NewData(dt, int(imp.arr.length), []*memory.Buffer{nulls}, children, int(imp.arr.null_count), int(imp.arr.offset)) case *arrow.RunEndEncodedType: if err := imp.checkNumBuffers(0); err != nil { return err } if len(imp.children) != 2 { return fmt.Errorf("%w: run-end encoded array should have 2 children", arrow.ErrInvalid) } children := []arrow.ArrayData{imp.children[0].data, imp.children[1].data} imp.data = array.NewData(dt, int(imp.arr.length), []*memory.Buffer{}, children, int(imp.arr.null_count), int(imp.arr.offset)) case *arrow.DenseUnionType: if err := imp.checkNoNulls(); err != nil { return err } bufs := []*memory.Buffer{nil, nil, nil} var err error if imp.arr.n_buffers == 3 { // legacy format exported by older arrow c++ versions if bufs[1], err = imp.importFixedSizeBuffer(1, 1); err != nil { return err } defer bufs[1].Release() if bufs[2], err = imp.importFixedSizeBuffer(2, int64(arrow.Int32SizeBytes)); err != nil { return err } defer bufs[2].Release() } else { if err := imp.checkNumBuffers(2); err != nil { return err } if bufs[1], err = imp.importFixedSizeBuffer(0, 1); err != nil { return err } defer bufs[1].Release() if bufs[2], err = imp.importFixedSizeBuffer(1, int64(arrow.Int32SizeBytes)); err != nil { return err } defer bufs[2].Release() } children := make([]arrow.ArrayData, len(imp.children)) for i := range imp.children { children[i] = imp.children[i].data } imp.data = array.NewData(dt, int(imp.arr.length), bufs, children, 0, int(imp.arr.offset)) case *arrow.SparseUnionType: if err := imp.checkNoNulls(); err != nil { return err } var buf *memory.Buffer var err error if imp.arr.n_buffers == 2 { // legacy format exported by older Arrow C++ versions if buf, err = imp.importFixedSizeBuffer(1, 1); err != nil { return err } defer buf.Release() } else { if err := imp.checkNumBuffers(1); err != nil { return err } if buf, err = imp.importFixedSizeBuffer(0, 1); err != nil { return err } defer buf.Release() } children := make([]arrow.ArrayData, len(imp.children)) for i := range imp.children { children[i] = imp.children[i].data } imp.data = array.NewData(dt, int(imp.arr.length), []*memory.Buffer{nil, buf}, children, 0, int(imp.arr.offset)) default: return fmt.Errorf("unimplemented type %s", dt) } return nil } func (imp *cimporter) importStringLike(offsetByteWidth int64) (err error) { if err = imp.checkNoChildren(); err != nil { return } if err = imp.checkNumBuffers(3); err != nil { return } var ( nulls, offsets, values *memory.Buffer ) if nulls, err = imp.importNullBitmap(0); err != nil { return } if nulls != nil { defer nulls.Release() } if offsets, err = imp.importOffsetsBuffer(1, offsetByteWidth); err != nil { return } defer offsets.Release() var nvals int64 switch offsetByteWidth { case 4: typedOffsets := arrow.Int32Traits.CastFromBytes(offsets.Bytes()) nvals = int64(typedOffsets[imp.arr.offset+imp.arr.length]) case 8: typedOffsets := arrow.Int64Traits.CastFromBytes(offsets.Bytes()) nvals = typedOffsets[imp.arr.offset+imp.arr.length] } if values, err = imp.importVariableValuesBuffer(2, 1, nvals); err != nil { return } defer values.Release() imp.data = array.NewData(imp.dt, int(imp.arr.length), []*memory.Buffer{nulls, offsets, values}, nil, int(imp.arr.null_count), int(imp.arr.offset)) return } func (imp *cimporter) importBinaryViewLike() (err error) { if err = imp.checkNoChildren(); err != nil { return } buffers := make([]*memory.Buffer, len(imp.cbuffers)-1) defer memory.ReleaseBuffers(buffers) if buffers[0], err = imp.importNullBitmap(0); err != nil { return } if buffers[1], err = imp.importFixedSizeBuffer(1, int64(arrow.ViewHeaderSizeBytes)); err != nil { return } dataBufferSizes := unsafe.Slice((*int64)(unsafe.Pointer(imp.cbuffers[len(buffers)])), len(buffers)-2) for i, size := range dataBufferSizes { if buffers[i+2], err = imp.importVariableValuesBuffer(i+2, 1, size); err != nil { return } } imp.data = array.NewData(imp.dt, int(imp.arr.length), buffers, nil, int(imp.arr.null_count), int(imp.arr.offset)) return } func (imp *cimporter) importListLike() (err error) { if err = imp.checkNumChildren(1); err != nil { return err } if err = imp.checkNumBuffers(2); err != nil { return err } var nulls, offsets *memory.Buffer if nulls, err = imp.importNullBitmap(0); err != nil { return } if nulls != nil { defer nulls.Release() } offsetSize := imp.dt.Layout().Buffers[1].ByteWidth if offsets, err = imp.importOffsetsBuffer(1, int64(offsetSize)); err != nil { return } if offsets != nil { defer offsets.Release() } imp.data = array.NewData(imp.dt, int(imp.arr.length), []*memory.Buffer{nulls, offsets}, []arrow.ArrayData{imp.children[0].data}, int(imp.arr.null_count), int(imp.arr.offset)) return } func (imp *cimporter) importListViewLike() (err error) { offsetSize := int64(imp.dt.Layout().Buffers[1].ByteWidth) if err = imp.checkNumChildren(1); err != nil { return err } if err = imp.checkNumBuffers(3); err != nil { return err } var nulls, offsets, sizes *memory.Buffer if nulls, err = imp.importNullBitmap(0); err != nil { return } if nulls != nil { defer nulls.Release() } if offsets, err = imp.importFixedSizeBuffer(1, offsetSize); err != nil { return } if offsets != nil { defer offsets.Release() } if sizes, err = imp.importFixedSizeBuffer(2, offsetSize); err != nil { return } if sizes != nil { defer sizes.Release() } imp.data = array.NewData(imp.dt, int(imp.arr.length), []*memory.Buffer{nulls, offsets, sizes}, []arrow.ArrayData{imp.children[0].data}, int(imp.arr.null_count), int(imp.arr.offset)) return } func (imp *cimporter) importFixedSizePrimitive() error { if err := imp.checkNoChildren(); err != nil { return err } if err := imp.checkNumBuffers(2); err != nil { return err } nulls, err := imp.importNullBitmap(0) if err != nil { return err } var values *memory.Buffer fw := imp.dt.(arrow.FixedWidthDataType) if bitutil.IsMultipleOf8(int64(fw.BitWidth())) { values, err = imp.importFixedSizeBuffer(1, bitutil.BytesForBits(int64(fw.BitWidth()))) } else { if fw.BitWidth() != 1 { return xerrors.New("invalid bitwidth") } values, err = imp.importBitsBuffer(1) } if err != nil { return err } var dict *array.Data if dt, ok := imp.dt.(*arrow.DictionaryType); ok { dictImp := &cimporter{dt: dt.ValueType} if err := dictImp.importChild(imp, imp.arr.dictionary); err != nil { return err } defer dictImp.data.Release() dict = dictImp.data.(*array.Data) } if nulls != nil { defer nulls.Release() } if values != nil { defer values.Release() } imp.data = array.NewDataWithDictionary(imp.dt, int(imp.arr.length), []*memory.Buffer{nulls, values}, int(imp.arr.null_count), int(imp.arr.offset), dict) return nil } func (imp *cimporter) checkNoChildren() error { return imp.checkNumChildren(0) } func (imp *cimporter) checkNoNulls() error { if imp.arr.null_count != 0 { return fmt.Errorf("%w: unexpected non-zero null count for imported type %s", arrow.ErrInvalid, imp.dt) } return nil } func (imp *cimporter) checkNumChildren(n int64) error { if int64(imp.arr.n_children) != n { return fmt.Errorf("expected %d children, for imported type %s, ArrowArray has %d", n, imp.dt, imp.arr.n_children) } return nil } func (imp *cimporter) checkNumBuffers(n int64) error { if int64(imp.arr.n_buffers) != n { return fmt.Errorf("expected %d buffers for imported type %s, ArrowArray has %d", n, imp.dt, imp.arr.n_buffers) } return nil } func (imp *cimporter) importBuffer(bufferID int, sz int64) (*memory.Buffer, error) { // this is not a copy, we're just having a slice which points at the data // it's still owned by the C.ArrowArray object and its backing C++ object. if imp.cbuffers[bufferID] == nil { if sz != 0 { return nil, errors.New("invalid buffer") } return memory.NewBufferBytes([]byte{}), nil } data := unsafe.Slice((*byte)(unsafe.Pointer(imp.cbuffers[bufferID])), sz) imp.alloc.addBuffer() return memory.NewBufferWithAllocator(data, imp.alloc), nil } func (imp *cimporter) importBitsBuffer(bufferID int) (*memory.Buffer, error) { bufsize := bitutil.BytesForBits(int64(imp.arr.length) + int64(imp.arr.offset)) return imp.importBuffer(bufferID, bufsize) } func (imp *cimporter) importNullBitmap(bufferID int) (*memory.Buffer, error) { if imp.arr.null_count > 0 && imp.cbuffers[bufferID] == nil { return nil, fmt.Errorf("arrowarray struct has null bitmap buffer, but non-zero null_count %d", imp.arr.null_count) } if imp.arr.null_count == 0 && imp.cbuffers[bufferID] == nil { return nil, nil } return imp.importBitsBuffer(bufferID) } func (imp *cimporter) importFixedSizeBuffer(bufferID int, byteWidth int64) (*memory.Buffer, error) { bufsize := byteWidth * int64(imp.arr.length+imp.arr.offset) return imp.importBuffer(bufferID, bufsize) } func (imp *cimporter) importOffsetsBuffer(bufferID int, offsetsize int64) (*memory.Buffer, error) { bufsize := offsetsize * int64((imp.arr.length + imp.arr.offset + 1)) return imp.importBuffer(bufferID, bufsize) } func (imp *cimporter) importVariableValuesBuffer(bufferID int, byteWidth, nvals int64) (*memory.Buffer, error) { bufsize := byteWidth * nvals return imp.importBuffer(bufferID, int64(bufsize)) } func importCArrayAsType(arr *CArrowArray, dt arrow.DataType) (imp *cimporter, err error) { imp = &cimporter{dt: dt} err = imp.doImportArr(arr) return } func initReader(rdr *nativeCRecordBatchReader, stream *CArrowArrayStream) error { rdr.stream = C.get_stream() C.ArrowArrayStreamMove(stream, rdr.stream) rdr.arr = C.get_arr() runtime.SetFinalizer(rdr, func(r *nativeCRecordBatchReader) { if r.cur != nil { r.cur.Release() } C.ArrowArrayStreamRelease(r.stream) C.ArrowArrayRelease(r.arr) C.free(unsafe.Pointer(r.stream)) C.free(unsafe.Pointer(r.arr)) }) var sc CArrowSchema errno := C.stream_get_schema(rdr.stream, &sc) if errno != 0 { return rdr.getError(int(errno)) } defer C.ArrowSchemaRelease(&sc) s, err := ImportCArrowSchema((*CArrowSchema)(&sc)) if err != nil { return err } rdr.schema = s return nil } // Record Batch reader that conforms to arrio.Reader for the ArrowArrayStream interface type nativeCRecordBatchReader struct { stream *CArrowArrayStream arr *CArrowArray schema *arrow.Schema cur arrow.Record err error } // No need to implement retain and release here as we used runtime.SetFinalizer when constructing // the reader to free up the ArrowArrayStream memory when the garbage collector cleans it up. func (n *nativeCRecordBatchReader) Retain() {} func (n *nativeCRecordBatchReader) Release() {} func (n *nativeCRecordBatchReader) Err() error { return n.err } func (n *nativeCRecordBatchReader) Record() arrow.Record { return n.cur } func (n *nativeCRecordBatchReader) Next() bool { err := n.next() switch { case err == nil: return true case err == io.EOF: return false } n.err = err return false } func (n *nativeCRecordBatchReader) next() error { if n.schema == nil { var sc CArrowSchema errno := C.stream_get_schema(n.stream, &sc) if errno != 0 { return n.getError(int(errno)) } defer C.ArrowSchemaRelease(&sc) s, err := ImportCArrowSchema((*CArrowSchema)(&sc)) if err != nil { return err } n.schema = s } if n.cur != nil { n.cur.Release() n.cur = nil } errno := C.stream_get_next(n.stream, n.arr) if errno != 0 { return n.getError(int(errno)) } if C.ArrowArrayIsReleased(n.arr) == 1 { return io.EOF } rec, err := ImportCRecordBatchWithSchema(n.arr, n.schema) if err != nil { return err } n.cur = rec return nil } func (n *nativeCRecordBatchReader) Schema() *arrow.Schema { return n.schema } func (n *nativeCRecordBatchReader) getError(errno int) error { return fmt.Errorf("%w: %s", syscall.Errno(errno), C.GoString(C.stream_get_last_error(n.stream))) } func (n *nativeCRecordBatchReader) Read() (arrow.Record, error) { if err := n.next(); err != nil { n.err = err return nil, err } return n.cur, nil } func releaseArr(arr *CArrowArray) { C.ArrowArrayRelease(arr) } func releaseSchema(schema *CArrowSchema) { C.ArrowSchemaRelease(schema) } arrow-go-18.2.0/arrow/cdata/cdata_allocate.go000066400000000000000000000035201476434502500210260ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.20 || tinygo package cdata // #include // #include "abi.h" import "C" import ( "unsafe" ) func allocateArrowSchemaArr(n int) (out []CArrowSchema) { return unsafe.Slice((*CArrowSchema)(C.calloc(C.size_t(n), C.sizeof_struct_ArrowSchema)), n) } func allocateArrowSchemaPtrArr(n int) (out []*CArrowSchema) { return unsafe.Slice((**CArrowSchema)(C.calloc(C.size_t(n), C.size_t(unsafe.Sizeof((*CArrowSchema)(nil))))), n) } func allocateArrowArrayArr(n int) (out []CArrowArray) { return unsafe.Slice((*CArrowArray)(C.calloc(C.size_t(n), C.sizeof_struct_ArrowArray)), n) } func allocateArrowArrayPtrArr(n int) (out []*CArrowArray) { return unsafe.Slice((**CArrowArray)(C.calloc(C.size_t(n), C.size_t(unsafe.Sizeof((*CArrowArray)(nil))))), n) } func allocateBufferPtrArr(n int) (out []*C.void) { return unsafe.Slice((**C.void)(C.calloc(C.size_t(n), C.size_t(unsafe.Sizeof((*C.void)(nil))))), n) } func allocateBufferSizeArr(n int) (out []C.int64_t) { return unsafe.Slice((*C.int64_t)(C.calloc(C.size_t(n), C.sizeof_int64_t)), n) } arrow-go-18.2.0/arrow/cdata/cdata_exports.go000066400000000000000000000347471476434502500207650ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package cdata // #include // #include // #include // #include "abi.h" // #include "helpers.h" // // extern void releaseExportedSchema(struct ArrowSchema* schema); // extern void releaseExportedArray(struct ArrowArray* array); // // const uint8_t kGoCdataZeroRegion[8] = {0}; // // void goReleaseArray(struct ArrowArray* array) { // releaseExportedArray(array); // } // void goReleaseSchema(struct ArrowSchema* schema) { // releaseExportedSchema(schema); // } // // void goCallCancel(struct ArrowAsyncProducer* producer) { // producer->cancel(producer); // } // // int goExtractTaskData(struct ArrowAsyncTask* task, struct ArrowDeviceArray* out) { // return task->extract_data(task, out); // } // // static void goCallRequest(struct ArrowAsyncProducer* producer, int64_t n) { // producer->request(producer, n); // } import "C" import ( "bytes" "context" "encoding/binary" "fmt" "runtime/cgo" "strconv" "strings" "unsafe" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/endian" "github.com/apache/arrow-go/v18/arrow/internal" "github.com/apache/arrow-go/v18/arrow/ipc" ) func encodeCMetadata(keys, values []string) []byte { if len(keys) != len(values) { panic("unequal metadata key/values length") } npairs := int32(len(keys)) var b bytes.Buffer totalSize := 4 for i := range keys { totalSize += 8 + len(keys[i]) + len(values[i]) } b.Grow(totalSize) b.Write((*[4]byte)(unsafe.Pointer(&npairs))[:]) for i := range keys { binary.Write(&b, endian.Native, int32(len(keys[i]))) b.WriteString(keys[i]) binary.Write(&b, endian.Native, int32(len(values[i]))) b.WriteString(values[i]) } return b.Bytes() } type schemaExporter struct { format, name string extraMeta arrow.Metadata metadata []byte flags int64 children []schemaExporter dict *schemaExporter } func (exp *schemaExporter) handleExtension(dt arrow.DataType) arrow.DataType { if dt.ID() != arrow.EXTENSION { return dt } ext := dt.(arrow.ExtensionType) exp.extraMeta = arrow.NewMetadata([]string{ipc.ExtensionTypeKeyName, ipc.ExtensionMetadataKeyName}, []string{ext.ExtensionName(), ext.Serialize()}) return ext.StorageType() } func (exp *schemaExporter) exportMeta(m *arrow.Metadata) { var ( finalKeys []string finalValues []string ) if m == nil { if exp.extraMeta.Len() > 0 { finalKeys = exp.extraMeta.Keys() finalValues = exp.extraMeta.Values() } exp.metadata = encodeCMetadata(finalKeys, finalValues) return } finalKeys = m.Keys() finalValues = m.Values() if exp.extraMeta.Len() > 0 { for i, k := range exp.extraMeta.Keys() { if m.FindKey(k) != -1 { continue } finalKeys = append(finalKeys, k) finalValues = append(finalValues, exp.extraMeta.Values()[i]) } } exp.metadata = encodeCMetadata(finalKeys, finalValues) } func (exp *schemaExporter) exportFormat(dt arrow.DataType) string { switch dt := dt.(type) { case *arrow.NullType: return "n" case *arrow.BooleanType: return "b" case *arrow.Int8Type: return "c" case *arrow.Uint8Type: return "C" case *arrow.Int16Type: return "s" case *arrow.Uint16Type: return "S" case *arrow.Int32Type: return "i" case *arrow.Uint32Type: return "I" case *arrow.Int64Type: return "l" case *arrow.Uint64Type: return "L" case *arrow.Float16Type: return "e" case *arrow.Float32Type: return "f" case *arrow.Float64Type: return "g" case *arrow.FixedSizeBinaryType: return fmt.Sprintf("w:%d", dt.ByteWidth) case *arrow.Decimal32Type: return fmt.Sprintf("d:%d,%d,32", dt.Precision, dt.Scale) case *arrow.Decimal64Type: return fmt.Sprintf("d:%d,%d,64", dt.Precision, dt.Scale) case *arrow.Decimal128Type: return fmt.Sprintf("d:%d,%d", dt.Precision, dt.Scale) case *arrow.Decimal256Type: return fmt.Sprintf("d:%d,%d,256", dt.Precision, dt.Scale) case *arrow.BinaryType: return "z" case *arrow.LargeBinaryType: return "Z" case *arrow.StringType: return "u" case *arrow.LargeStringType: return "U" case *arrow.BinaryViewType: return "vz" case *arrow.StringViewType: return "vu" case *arrow.Date32Type: return "tdD" case *arrow.Date64Type: return "tdm" case *arrow.Time32Type: switch dt.Unit { case arrow.Second: return "tts" case arrow.Millisecond: return "ttm" default: panic(fmt.Sprintf("invalid time unit for time32: %s", dt.Unit)) } case *arrow.Time64Type: switch dt.Unit { case arrow.Microsecond: return "ttu" case arrow.Nanosecond: return "ttn" default: panic(fmt.Sprintf("invalid time unit for time64: %s", dt.Unit)) } case *arrow.TimestampType: var b strings.Builder switch dt.Unit { case arrow.Second: b.WriteString("tss:") case arrow.Millisecond: b.WriteString("tsm:") case arrow.Microsecond: b.WriteString("tsu:") case arrow.Nanosecond: b.WriteString("tsn:") default: panic(fmt.Sprintf("invalid time unit for timestamp: %s", dt.Unit)) } b.WriteString(dt.TimeZone) return b.String() case *arrow.DurationType: switch dt.Unit { case arrow.Second: return "tDs" case arrow.Millisecond: return "tDm" case arrow.Microsecond: return "tDu" case arrow.Nanosecond: return "tDn" default: panic(fmt.Sprintf("invalid time unit for duration: %s", dt.Unit)) } case *arrow.MonthIntervalType: return "tiM" case *arrow.DayTimeIntervalType: return "tiD" case *arrow.MonthDayNanoIntervalType: return "tin" case *arrow.ListType: return "+l" case *arrow.LargeListType: return "+L" case *arrow.ListViewType: return "+vl" case *arrow.LargeListViewType: return "+vL" case *arrow.FixedSizeListType: return fmt.Sprintf("+w:%d", dt.Len()) case *arrow.StructType: return "+s" case *arrow.RunEndEncodedType: return "+r" case *arrow.MapType: if dt.KeysSorted { exp.flags |= C.ARROW_FLAG_MAP_KEYS_SORTED } return "+m" case *arrow.DictionaryType: if dt.Ordered { exp.flags |= C.ARROW_FLAG_DICTIONARY_ORDERED } return exp.exportFormat(dt.IndexType) case arrow.UnionType: var b strings.Builder if dt.Mode() == arrow.SparseMode { b.WriteString("+us:") } else { b.WriteString("+ud:") } for i, c := range dt.TypeCodes() { if i != 0 { b.WriteByte(',') } b.WriteString(strconv.Itoa(int(c))) } return b.String() } panic("unsupported data type for export") } func (exp *schemaExporter) export(field arrow.Field) { exp.name = field.Name exp.format = exp.exportFormat(exp.handleExtension(field.Type)) if field.Nullable { exp.flags |= C.ARROW_FLAG_NULLABLE } switch dt := field.Type.(type) { case *arrow.DictionaryType: exp.dict = new(schemaExporter) exp.dict.export(arrow.Field{Type: dt.ValueType}) case arrow.NestedType: exp.children = make([]schemaExporter, dt.NumFields()) for i, f := range dt.Fields() { exp.children[i].export(f) } } exp.exportMeta(&field.Metadata) } func (exp *schemaExporter) finish(out *CArrowSchema) { out.dictionary = nil if exp.dict != nil { out.dictionary = (*CArrowSchema)(C.calloc(C.sizeof_struct_ArrowSchema, C.size_t(1))) exp.dict.finish(out.dictionary) } out.name = C.CString(exp.name) out.format = C.CString(exp.format) out.metadata = (*C.char)(C.CBytes(exp.metadata)) out.flags = C.int64_t(exp.flags) out.n_children = C.int64_t(len(exp.children)) if len(exp.children) > 0 { children := allocateArrowSchemaArr(len(exp.children)) childPtrs := allocateArrowSchemaPtrArr(len(exp.children)) for i, c := range exp.children { c.finish(&children[i]) childPtrs[i] = &children[i] } out.children = (**CArrowSchema)(unsafe.Pointer(&childPtrs[0])) } else { out.children = nil } out.release = (*[0]byte)(C.goReleaseSchema) } func exportField(field arrow.Field, out *CArrowSchema) { var exp schemaExporter exp.export(field) exp.finish(out) } func exportArray(arr arrow.Array, out *CArrowArray, outSchema *CArrowSchema) { if outSchema != nil { exportField(arrow.Field{Type: arr.DataType()}, outSchema) } buffers := arr.Data().Buffers() // Some types don't have validity bitmaps, but we keep them shifted // to make processing easier in other contexts. This means that // we have to adjust when exporting. has_validity_bitmap := internal.DefaultHasValidityBitmap(arr.DataType().ID()) if len(buffers) > 0 && !has_validity_bitmap { buffers = buffers[1:] } nbuffers := len(buffers) has_buffer_sizes_buffer := internal.HasBufferSizesBuffer(arr.DataType().ID()) if has_buffer_sizes_buffer { nbuffers++ } out.dictionary = nil out.null_count = C.int64_t(arr.NullN()) out.length = C.int64_t(arr.Len()) out.offset = C.int64_t(arr.Data().Offset()) out.n_buffers = C.int64_t(nbuffers) out.buffers = nil if nbuffers > 0 { cBufs := allocateBufferPtrArr(nbuffers) for i, buf := range buffers { if buf == nil || buf.Len() == 0 { if i > 0 || !has_validity_bitmap { // apache/arrow#33936: export a dummy buffer to be friendly to // implementations that don't import NULL properly cBufs[i] = (*C.void)(unsafe.Pointer(&C.kGoCdataZeroRegion)) } else { // null pointer permitted for the validity bitmap // (assuming null count is 0) cBufs[i] = nil } continue } cBufs[i] = (*C.void)(unsafe.Pointer(&buf.Bytes()[0])) } if has_buffer_sizes_buffer { sizes := allocateBufferSizeArr(len(buffers[2:])) for i, buf := range buffers[2:] { sizes[i] = C.int64_t(buf.Len()) } if len(sizes) > 0 { cBufs[nbuffers-1] = (*C.void)(unsafe.Pointer(&sizes[0])) } } out.buffers = (*unsafe.Pointer)(unsafe.Pointer(&cBufs[0])) } arr.Data().Retain() h := cgo.NewHandle(arr.Data()) out.private_data = createHandle(h) out.release = (*[0]byte)(C.goReleaseArray) switch arr := arr.(type) { case array.ListLike: out.n_children = 1 childPtrs := allocateArrowArrayPtrArr(1) children := allocateArrowArrayArr(1) exportArray(arr.ListValues(), &children[0], nil) childPtrs[0] = &children[0] out.children = (**CArrowArray)(unsafe.Pointer(&childPtrs[0])) case *array.Struct: out.n_children = C.int64_t(arr.NumField()) if arr.NumField() == 0 { return } childPtrs := allocateArrowArrayPtrArr(arr.NumField()) children := allocateArrowArrayArr(arr.NumField()) for i := 0; i < arr.NumField(); i++ { exportArray(arr.Field(i), &children[i], nil) childPtrs[i] = &children[i] } out.children = (**CArrowArray)(unsafe.Pointer(&childPtrs[0])) case *array.RunEndEncoded: out.n_children = 2 childPtrs := allocateArrowArrayPtrArr(2) children := allocateArrowArrayArr(2) exportArray(arr.RunEndsArr(), &children[0], nil) exportArray(arr.Values(), &children[1], nil) childPtrs[0], childPtrs[1] = &children[0], &children[1] out.children = (**CArrowArray)(unsafe.Pointer(&childPtrs[0])) case *array.Dictionary: out.dictionary = (*CArrowArray)(C.calloc(C.sizeof_struct_ArrowArray, C.size_t(1))) exportArray(arr.Dictionary(), out.dictionary, nil) case array.Union: out.n_children = C.int64_t(arr.NumFields()) if arr.NumFields() == 0 { return } childPtrs := allocateArrowArrayPtrArr(arr.NumFields()) children := allocateArrowArrayArr(arr.NumFields()) for i := 0; i < arr.NumFields(); i++ { exportArray(arr.Field(i), &children[i], nil) childPtrs[i] = &children[i] } out.children = (**CArrowArray)(unsafe.Pointer(&childPtrs[0])) default: out.n_children = 0 out.children = nil } } type cRecordReader struct { rdr array.RecordReader err *C.char } func (rr cRecordReader) getSchema(out *CArrowSchema) int { schema := rr.rdr.Schema() if schema == nil { return rr.maybeError() } ExportArrowSchema(schema, out) return 0 } func (rr cRecordReader) next(out *CArrowArray) int { if rr.rdr.Next() { ExportArrowRecordBatch(rr.rdr.Record(), out, nil) return 0 } C.ArrowArrayMarkReleased(out) return rr.maybeError() } func (rr cRecordReader) maybeError() int { err := rr.rdr.Err() if err != nil { return C.EIO } return 0 } func (rr cRecordReader) getLastError() *C.char { err := rr.rdr.Err() if err != nil { if rr.err != nil { C.free(unsafe.Pointer(rr.err)) } rr.err = C.CString(err.Error()) } return rr.err } func (rr cRecordReader) release() { if rr.err != nil { C.free(unsafe.Pointer(rr.err)) } rr.rdr.Release() } type cAsyncStreamHandler struct { producer *CArrowAsyncProducer taskQueue chan taskState ctx context.Context } func asyncTaskQueue(ctx context.Context, schema *arrow.Schema, recordStream chan<- RecordMessage, taskQueue <-chan taskState, producer *CArrowAsyncProducer) { defer close(recordStream) for { select { case <-ctx.Done(): C.goCallCancel(producer) return case task, ok := <-taskQueue: // if the queue closes or we receive a nil task, we're done if !ok || (task.err == nil && task.task.extract_data == nil) { return } if task.err != nil { recordStream <- RecordMessage{Err: task.err} continue } // request another batch now that we've processed this one C.goCallRequest(producer, C.int64_t(1)) var out CArrowDeviceArray if C.goExtractTaskData(&task.task, &out) != C.int(0) { continue } rec, err := ImportCRecordBatchWithSchema(&out.array, schema) if err != nil { recordStream <- RecordMessage{Err: err} } else { recordStream <- RecordMessage{Record: rec, AdditionalMetadata: task.meta} } } } } func (h *cAsyncStreamHandler) onNextTask(task *CArrowAsyncTask, metadata *C.char) C.int { if task == nil { h.taskQueue <- taskState{} return 0 } ts := taskState{task: *task} if metadata != nil { ts.meta = decodeCMetadata(metadata) } h.taskQueue <- ts return 0 } func (h *cAsyncStreamHandler) onError(code C.int, message, metadata *C.char) { h.taskQueue <- taskState{err: AsyncStreamError{ Code: int(code), Msg: C.GoString(message), Metadata: C.GoString(metadata)}} } func (h *cAsyncStreamHandler) release() { close(h.taskQueue) h.taskQueue, h.producer = nil, nil h.producer = nil } arrow-go-18.2.0/arrow/cdata/cdata_fulltest.c000066400000000000000000000341331476434502500207250ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // +build cgo // +build test #include #include #include #include #include #include #include "abi.h" #include "helpers.h" #include "utils.h" int is_little_endian() { unsigned int x = 1; char *c = (char*) &x; return (int)*c; } static const int64_t kDefaultFlags = ARROW_FLAG_NULLABLE; extern void releaseTestArr(struct ArrowArray* array); void goReleaseTestArray(struct ArrowArray* array) { releaseTestArr(array); } static void release_int32_type(struct ArrowSchema* schema) { // mark released schema->release = NULL; } void export_int32_type(struct ArrowSchema* schema) { const char* encoded_metadata; if (is_little_endian() == 1) { encoded_metadata = kEncodedMeta1LE; } else { encoded_metadata = kEncodedMeta1BE; } *schema = (struct ArrowSchema) { // Type description .format = "i", .name = "", .metadata = encoded_metadata, .flags = 0, .n_children = 0, .children = NULL, .dictionary = NULL, // bookkeeping .release = &release_int32_type, }; } static bool test1_released = false; int test1_is_released() { return test1_released; } static void release_int32_array(struct ArrowArray* array) { assert(array->n_buffers == 2); // free the buffers and buffers array free((void *) array->buffers[1]); free(array->buffers); // mark released array->release = NULL; test1_released = true; } void export_int32_array(const int32_t* data, int64_t nitems, struct ArrowArray* array) { // initialize primitive fields *array = (struct ArrowArray) { .length = nitems, .offset = 0, .null_count = 0, .n_buffers = 2, .n_children = 0, .children = NULL, .dictionary = NULL, // bookkeeping .release = &release_int32_array }; // allocate list of buffers array->buffers = (const void**)malloc(sizeof(void*) * array->n_buffers); assert(array->buffers != NULL); array->buffers[0] = NULL; // no nulls, null bitmap can be omitted array->buffers[1] = data; } static void release_primitive(struct ArrowSchema* schema) { free((void *)schema->format); schema->release = NULL; } static void release_nested_internal(struct ArrowSchema* schema, int is_dynamic) { assert(!ArrowSchemaIsReleased(schema)); for (int i = 0; i < schema->n_children; ++i) { ArrowSchemaRelease(schema->children[i]); free(schema->children[i]); } if (is_dynamic) { free((void*)schema->format); free((void*)schema->name); } ArrowSchemaMarkReleased(schema); } static void release_nested_static(struct ArrowSchema* schema) { release_nested_internal(schema, /*is_dynamic=*/0); } static void release_nested_dynamic(struct ArrowSchema* schema) { release_nested_internal(schema, /*is_dynamic=*/1); } static void release_nested_dynamic_toplevel(struct ArrowSchema* schema) { assert(!ArrowSchemaIsReleased(schema)); for (int i = 0; i < schema->n_children; ++i) { ArrowSchemaRelease(schema->children[i]); free(schema->children[i]); } free((void*)schema->format); if (strlen(schema->name) > 0) { free((void*)schema->name); } ArrowSchemaMarkReleased(schema); } void test_primitive(struct ArrowSchema* schema, const char* fmt) { *schema = (struct ArrowSchema) { // Type description .format = fmt, .name = "", .metadata = NULL, .flags = 0, .n_children = 0, .children = NULL, .dictionary = NULL, // bookkeeping .release = &release_primitive, }; } // Since test_lists et al. allocate an entirely array of ArrowSchema pointers, // need to expose a function to free it. void free_malloced_schemas(struct ArrowSchema** schemas) { free(schemas); } struct ArrowSchema** test_lists(const char** fmts, const char** names, const int* nullflags, const int n) { struct ArrowSchema** schemas = malloc(sizeof(struct ArrowSchema*)*n); for (int i = 0; i < n; ++i) { schemas[i] = malloc(sizeof(struct ArrowSchema)); *schemas[i] = (struct ArrowSchema) { .format = fmts[i], .name = names[i], .metadata = NULL, .flags = 0, .children = NULL, .n_children = 0, .dictionary = NULL, .release = &release_nested_dynamic, }; if (i != 0) { schemas[i-1]->n_children = 1; schemas[i-1]->children = &schemas[i]; schemas[i]->flags = nullflags[i-1]; } } return schemas; } struct ArrowSchema** fill_structs(const char** fmts, const char** names, int64_t* flags, const int n) { struct ArrowSchema** schemas = malloc(sizeof(struct ArrowSchema*)*n); for (int i = 0; i < n; ++i) { schemas[i] = malloc(sizeof(struct ArrowSchema)); *schemas[i] = (struct ArrowSchema) { .format = fmts[i], .name = names[i], .metadata = NULL, .flags = flags[i], .children = NULL, .n_children = 0, .dictionary = NULL, .release = &release_nested_dynamic, }; } schemas[0]->children = &schemas[1]; schemas[0]->n_children = n-1; return schemas; } struct ArrowSchema** test_struct(const char** fmts, const char** names, int64_t* flags, const int n) { struct ArrowSchema** schemas = fill_structs(fmts, names, flags, n); if (is_little_endian() == 1) { schemas[n-1]->metadata = kEncodedMeta2LE; } else { schemas[n-1]->metadata = kEncodedMeta2BE; } return schemas; } struct ArrowSchema** test_schema(const char** fmts, const char** names, int64_t* flags, const int n) { struct ArrowSchema** schemas = fill_structs(fmts, names, flags, n); if (is_little_endian() == 1) { schemas[0]->metadata = kEncodedMeta2LE; schemas[n-1]->metadata = kEncodedMeta1LE; } else { schemas[0]->metadata = kEncodedMeta2BE; schemas[n-1]->metadata = kEncodedMeta1BE; } return schemas; } struct ArrowSchema** test_map(const char** fmts, const char** names, int64_t* flags, const int n) { struct ArrowSchema** schemas = malloc(sizeof(struct ArrowSchema*)*n); for (int i = 0; i < n; ++i) { schemas[i] = malloc(sizeof(struct ArrowSchema)); *schemas[i] = (struct ArrowSchema) { .format = fmts[i], .name = names[i], .metadata = NULL, .flags = flags[i], .children = NULL, .n_children = 0, .dictionary = NULL, .release = &release_nested_dynamic, }; } schemas[0]->n_children = 1; schemas[0]->children = &schemas[1]; schemas[1]->n_children = n-2; schemas[1]->children = &schemas[2]; return schemas; } struct ArrowSchema** test_union(const char** fmts, const char** names, int64_t* flags, const int n) { struct ArrowSchema** schemas = malloc(sizeof(struct ArrowSchema*)*n); for (int i = 0; i < n; ++i) { schemas[i] = malloc(sizeof(struct ArrowSchema)); *schemas[i] = (struct ArrowSchema) { .format = fmts[i], .name = names[i], .metadata = NULL, .flags = flags[i], .children = NULL, .n_children = 0, .dictionary = NULL, .release = &release_nested_dynamic, }; } schemas[0]->n_children = n-1; schemas[0]->children = &schemas[1]; return schemas; } struct streamcounter { int n; int max; }; static int stream_schema(struct ArrowArrayStream* st, struct ArrowSchema* out) { out->children = malloc(sizeof(struct ArrowSchema*)*2); out->n_children = 2; out->children[0] = malloc(sizeof(struct ArrowSchema)); *out->children[0] = (struct ArrowSchema) { .format = "i", .name = "a", .metadata = NULL, .flags = ARROW_FLAG_NULLABLE, .children = NULL, .n_children = 0, .dictionary = NULL, .release = &release_nested_static, }; out->children[1] = malloc(sizeof(struct ArrowSchema)); *out->children[1] = (struct ArrowSchema) { .format = "u", .name = "b", .metadata = NULL, .flags = ARROW_FLAG_NULLABLE, .children = NULL, .n_children = 0, .dictionary = NULL, .release = &release_nested_static, }; out->format = "+s"; out->release = &release_nested_static; return 0; } static void release_stream(struct ArrowArrayStream* st) { free(st->private_data); ArrowArrayStreamMarkReleased(st); } static void release_the_array(struct ArrowArray* out) { for (int i = 0; i < out->n_children; ++i) { ArrowArrayRelease(out->children[i]); } free((void*)out->children); free(out->buffers); out->release = NULL; } void export_int32_array(const int32_t*, int64_t, struct ArrowArray*); static void release_str_array(struct ArrowArray* array) { assert(array->n_buffers == 3); free((void*) array->buffers[1]); free((void*) array->buffers[2]); free(array->buffers); array->release = NULL; } void export_str_array(const char* data, const int32_t* offsets, int64_t nitems, struct ArrowArray* out) { *out = (struct ArrowArray) { .length = nitems, .offset = 0, .null_count = 0, .n_buffers = 3, .n_children = 0, .children = NULL, .dictionary = NULL, // bookkeeping .release = &release_str_array }; out->buffers = (const void**)malloc(sizeof(void*) * out->n_buffers); assert(out->buffers != NULL); out->buffers[0] = NULL; out->buffers[1] = offsets; out->buffers[2] = data; } static int next_record(struct ArrowArrayStream* st, struct ArrowArray* out) { struct streamcounter* cnter = (struct streamcounter*)(st->private_data); if (cnter->n == cnter->max) { ArrowArrayMarkReleased(out); return 0; } cnter->n++; *out = (struct ArrowArray) { .offset = 0, .dictionary = NULL, .length = 3, .null_count = 0, .buffers = (const void**)malloc(sizeof(void*)), .n_children = 2, .n_buffers = 1, .release = &release_the_array }; out->buffers[0] = NULL; out->children = (struct ArrowArray**)malloc(sizeof(struct ArrowArray*)*2); int32_t* intdata = malloc(sizeof(int32_t)*3); for (int i = 0; i < 3; ++i) { intdata[i] = cnter->n * (i+1); } out->children[0] = malloc(sizeof(struct ArrowArray)); export_int32_array(intdata, 3, out->children[0]); out->children[1] = malloc(sizeof(struct ArrowArray)); char* strdata = strdup("foobarbaz"); int32_t* offsets = malloc(sizeof(int32_t)*4); offsets[0] = 0; offsets[1] = 3; offsets[2] = 6; offsets[3] = 9; export_str_array(strdata, offsets, 3, out->children[1]); return 0; } void setup_array_stream_test(const int n_batches, struct ArrowArrayStream* out) { struct streamcounter* cnt = malloc(sizeof(struct streamcounter)); cnt->max = n_batches; cnt->n = 0; out->get_next = &next_record; out->get_schema = &stream_schema; out->release = &release_stream; out->private_data = cnt; } int test_exported_stream(struct ArrowArrayStream* stream) { while (1) { struct ArrowArray array; memset(&array, 0, sizeof(array)); // Garbage - implementation should not try to call it, though! array.release = (void*)0xDEADBEEF; int rc = stream->get_next(stream, &array); if (rc != 0) return rc; if (array.release == NULL) { stream->release(stream); break; } } return 0; } struct FallibleStream { // empty structs are a GNU extension int dummy; }; const char* FallibleGetLastError(struct ArrowArrayStream* stream) { return "Expected error message"; } int FallibleGetSchema(struct ArrowArrayStream* stream, struct ArrowSchema* schema) { return EINVAL; } int FallibleGetNext(struct ArrowArrayStream* stream, struct ArrowArray* array) { return EINVAL; } void FallibleRelease(struct ArrowArrayStream* stream) { memset(stream, 0, sizeof(*stream)); } static struct FallibleStream kFallibleStream; void test_stream_schema_fallible(struct ArrowArrayStream* stream) { stream->get_last_error = FallibleGetLastError; stream->get_schema = FallibleGetSchema; stream->get_next = FallibleGetNext; stream->private_data = &kFallibleStream; stream->release = FallibleRelease; } int confuse_go_gc(struct ArrowArrayStream* stream, unsigned int seed) { struct ArrowSchema schema; // Try to confuse the Go GC by putting what looks like a Go pointer here. #ifdef _WIN32 // Thread-safe on Windows with the multithread CRT #define DORAND rand() #else #define DORAND rand_r(&seed) #endif schema.name = (char*)(0xc000000000L + (DORAND % 0x2000)); schema.format = (char*)(0xc000000000L + (DORAND % 0x2000)); int rc = stream->get_schema(stream, &schema); if (rc != 0) return rc; schema.release(&schema); while (1) { struct ArrowArray array; array.release = (void*)(0xc000000000L + (DORAND % 0x2000)); array.private_data = (void*)(0xc000000000L + (DORAND % 0x2000)); int rc = stream->get_next(stream, &array); if (rc != 0) return rc; if (array.release == NULL) { stream->release(stream); break; } array.release(&array); } return 0; #undef DORAND } arrow-go-18.2.0/arrow/cdata/cdata_test.go000066400000000000000000000772111476434502500202310ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build cgo && test // +build cgo,test // use test tag so that we only run these tests when the "test" tag is present // so that the .c and other framework infrastructure is only compiled in during // testing, and the .c files and symbols are not present in release builds. package cdata import ( "context" "encoding/json" "errors" "fmt" "io" "runtime" "runtime/cgo" "sync" "testing" "time" "unsafe" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/decimal128" "github.com/apache/arrow-go/v18/arrow/internal/arrdata" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/arrow/memory/mallocator" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) func TestSchemaExport(t *testing.T) { sc := exportInt32TypeSchema() f, err := importSchema(&sc) assert.NoError(t, err) keys, _ := getMetadataKeys() vals, _ := getMetadataValues() assert.Equal(t, arrow.PrimitiveTypes.Int32, f.Type) assert.Equal(t, keys, f.Metadata.Keys()) assert.Equal(t, vals, f.Metadata.Values()) // schema was released when importing assert.True(t, schemaIsReleased(&sc)) } func TestSimpleArrayExport(t *testing.T) { assert.False(t, test1IsReleased()) testarr := exportInt32Array() arr, err := ImportCArrayWithType(testarr, arrow.PrimitiveTypes.Int32) assert.NoError(t, err) assert.False(t, test1IsReleased()) assert.True(t, isReleased(testarr)) arr.Release() runtime.GC() assert.Eventually(t, test1IsReleased, 1*time.Second, 10*time.Millisecond) } func TestSimpleArrayAndSchema(t *testing.T) { sc := exportInt32TypeSchema() testarr := exportInt32Array() // grab address of the buffer we stuck into the ArrowArray object buflist := (*[2]unsafe.Pointer)(unsafe.Pointer(testarr.buffers)) origvals := (*[10]int32)(unsafe.Pointer(buflist[1])) fld, arr, err := ImportCArray(testarr, &sc) assert.NoError(t, err) assert.Equal(t, arrow.PrimitiveTypes.Int32, fld.Type) assert.EqualValues(t, 10, arr.Len()) // verify that the address is the same of the first integer for the // slice that is being used by the arrow.Array and the original buffer vals := arr.(*array.Int32).Int32Values() assert.Same(t, &vals[0], &origvals[0]) // and that the values are correct for i, v := range vals { assert.Equal(t, int32(i+1), v) } } func TestPrimitiveSchemas(t *testing.T) { tests := []struct { typ arrow.DataType fmt string }{ {arrow.PrimitiveTypes.Int8, "c"}, {arrow.PrimitiveTypes.Int16, "s"}, {arrow.PrimitiveTypes.Int32, "i"}, {arrow.PrimitiveTypes.Int64, "l"}, {arrow.PrimitiveTypes.Uint8, "C"}, {arrow.PrimitiveTypes.Uint16, "S"}, {arrow.PrimitiveTypes.Uint32, "I"}, {arrow.PrimitiveTypes.Uint64, "L"}, {arrow.FixedWidthTypes.Boolean, "b"}, {arrow.Null, "n"}, {arrow.FixedWidthTypes.Float16, "e"}, {arrow.PrimitiveTypes.Float32, "f"}, {arrow.PrimitiveTypes.Float64, "g"}, {&arrow.FixedSizeBinaryType{ByteWidth: 3}, "w:3"}, {arrow.BinaryTypes.Binary, "z"}, {arrow.BinaryTypes.LargeBinary, "Z"}, {arrow.BinaryTypes.String, "u"}, {arrow.BinaryTypes.LargeString, "U"}, {&arrow.Decimal128Type{Precision: 16, Scale: 4}, "d:16,4"}, {&arrow.Decimal128Type{Precision: 15, Scale: 0}, "d:15,0"}, {&arrow.Decimal128Type{Precision: 15, Scale: -4}, "d:15,-4"}, {&arrow.Decimal256Type{Precision: 15, Scale: -4}, "d:15,-4,256"}, } for _, tt := range tests { t.Run(tt.typ.Name(), func(t *testing.T) { sc := testPrimitive(tt.fmt) f, err := ImportCArrowField(&sc) assert.NoError(t, err) assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) assert.True(t, schemaIsReleased(&sc)) }) } } func TestDecimalSchemaErrors(t *testing.T) { tests := []struct { fmt string errorMessage string }{ {"d:", "invalid decimal spec 'd:': wrong number of properties"}, {"d:1", "invalid decimal spec 'd:1': wrong number of properties"}, {"d:1,2,3,4", "invalid decimal spec 'd:1,2,3,4': wrong number of properties"}, {"d:a,2,3", "could not parse decimal precision in 'd:a,2,3':"}, {"d:1,a,3", "could not parse decimal scale in 'd:1,a,3':"}, {"d:1,2,a", "could not parse decimal bitwidth in 'd:1,2,a':"}, {"d:1,2,384", "unsupported decimal bitwidth, got 'd:1,2,384'"}, } for _, tt := range tests { t.Run(tt.fmt, func(t *testing.T) { sc := testPrimitive(tt.fmt) _, err := ImportCArrowField(&sc) assert.Error(t, err) assert.Contains(t, err.Error(), tt.errorMessage) }) } } func TestImportTemporalSchema(t *testing.T) { tests := []struct { typ arrow.DataType fmt string }{ {arrow.FixedWidthTypes.Date32, "tdD"}, {arrow.FixedWidthTypes.Date64, "tdm"}, {arrow.FixedWidthTypes.Time32s, "tts"}, {arrow.FixedWidthTypes.Time32ms, "ttm"}, {arrow.FixedWidthTypes.Time64us, "ttu"}, {arrow.FixedWidthTypes.Time64ns, "ttn"}, {arrow.FixedWidthTypes.Duration_s, "tDs"}, {arrow.FixedWidthTypes.Duration_ms, "tDm"}, {arrow.FixedWidthTypes.Duration_us, "tDu"}, {arrow.FixedWidthTypes.Duration_ns, "tDn"}, {arrow.FixedWidthTypes.MonthInterval, "tiM"}, {arrow.FixedWidthTypes.DayTimeInterval, "tiD"}, {arrow.FixedWidthTypes.MonthDayNanoInterval, "tin"}, {arrow.FixedWidthTypes.Timestamp_s, "tss:UTC"}, {&arrow.TimestampType{Unit: arrow.Second}, "tss:"}, {&arrow.TimestampType{Unit: arrow.Second, TimeZone: "Europe/Paris"}, "tss:Europe/Paris"}, {arrow.FixedWidthTypes.Timestamp_ms, "tsm:UTC"}, {&arrow.TimestampType{Unit: arrow.Millisecond}, "tsm:"}, {&arrow.TimestampType{Unit: arrow.Millisecond, TimeZone: "Europe/Paris"}, "tsm:Europe/Paris"}, {arrow.FixedWidthTypes.Timestamp_us, "tsu:UTC"}, {&arrow.TimestampType{Unit: arrow.Microsecond}, "tsu:"}, {&arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: "Europe/Paris"}, "tsu:Europe/Paris"}, {arrow.FixedWidthTypes.Timestamp_ns, "tsn:UTC"}, {&arrow.TimestampType{Unit: arrow.Nanosecond}, "tsn:"}, {&arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "Europe/Paris"}, "tsn:Europe/Paris"}, } for _, tt := range tests { t.Run(tt.typ.Name(), func(t *testing.T) { sc := testPrimitive(tt.fmt) f, err := ImportCArrowField(&sc) assert.NoError(t, err) assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) assert.True(t, schemaIsReleased(&sc)) }) } } func TestListSchemas(t *testing.T) { tests := []struct { typ arrow.DataType fmts []string names []string isnull []bool }{ {arrow.ListOf(arrow.PrimitiveTypes.Int8), []string{"+l", "c"}, []string{"", "item"}, []bool{true}}, {arrow.FixedSizeListOfNonNullable(2, arrow.PrimitiveTypes.Int64), []string{"+w:2", "l"}, []string{"", "item"}, []bool{false}}, {arrow.ListOfNonNullable(arrow.ListOf(arrow.PrimitiveTypes.Int32)), []string{"+l", "+l", "i"}, []string{"", "item", "item"}, []bool{false, true}}, } for _, tt := range tests { t.Run(tt.typ.Name(), func(t *testing.T) { sc := testNested(tt.fmts, tt.names, tt.isnull) defer freeMallocedSchemas(sc) top := (*[1]*CArrowSchema)(unsafe.Pointer(sc))[0] f, err := ImportCArrowField(top) assert.NoError(t, err) assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) assert.True(t, schemaIsReleased(top)) }) } } func TestStructSchemas(t *testing.T) { tests := []struct { typ arrow.DataType fmts []string names []string flags []int64 }{ {arrow.StructOf( arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, arrow.Field{Name: "b", Type: arrow.BinaryTypes.String, Nullable: true, Metadata: metadata2}, ), []string{"+s", "c", "u"}, []string{"", "a", "b"}, []int64{flagIsNullable, flagIsNullable, flagIsNullable}}, } for _, tt := range tests { t.Run(tt.typ.Name(), func(t *testing.T) { sc := testStruct(tt.fmts, tt.names, tt.flags) defer freeMallocedSchemas(sc) top := (*[1]*CArrowSchema)(unsafe.Pointer(sc))[0] f, err := ImportCArrowField(top) assert.NoError(t, err) assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) assert.True(t, schemaIsReleased(top)) }) } } func TestMapSchemas(t *testing.T) { tests := []struct { typ *arrow.MapType keysSorted bool fmts []string names []string flags []int64 }{ {arrow.MapOf(arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String), false, []string{"+m", "+s", "c", "u"}, []string{"", "entries", "key", "value"}, []int64{flagIsNullable, 0, 0, flagIsNullable}}, {arrow.MapOf(arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String), true, []string{"+m", "+s", "c", "u"}, []string{"", "entries", "key", "value"}, []int64{flagIsNullable | flagMapKeysSorted, 0, 0, flagIsNullable}}, } for _, tt := range tests { t.Run(tt.typ.Name(), func(t *testing.T) { sc := testMap(tt.fmts, tt.names, tt.flags) defer freeMallocedSchemas(sc) top := (*[1]*CArrowSchema)(unsafe.Pointer(sc))[0] f, err := ImportCArrowField(top) assert.NoError(t, err) tt.typ.KeysSorted = tt.keysSorted assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) assert.True(t, schemaIsReleased(top)) }) } } func TestSchema(t *testing.T) { // schema is exported as an equivalent struct type (+ top-level metadata) sc := arrow.NewSchema([]arrow.Field{ {Name: "nulls", Type: arrow.Null, Nullable: false}, {Name: "values", Type: arrow.PrimitiveTypes.Int64, Nullable: true, Metadata: metadata1}, }, &metadata2) cst := testSchema([]string{"+s", "n", "l"}, []string{"", "nulls", "values"}, []int64{0, 0, flagIsNullable}) defer freeMallocedSchemas(cst) top := (*[1]*CArrowSchema)(unsafe.Pointer(cst))[0] out, err := ImportCArrowSchema(top) assert.NoError(t, err) assert.True(t, sc.Equal(out)) assert.True(t, sc.Metadata().Equal(out.Metadata())) assert.True(t, schemaIsReleased(top)) } func createTestInt8Arr() arrow.Array { bld := array.NewInt8Builder(memory.DefaultAllocator) defer bld.Release() bld.AppendValues([]int8{1, 2, 0, -3}, []bool{true, true, false, true}) return bld.NewInt8Array() } func createTestInt16Arr() arrow.Array { bld := array.NewInt16Builder(memory.DefaultAllocator) defer bld.Release() bld.AppendValues([]int16{1, 2, -3}, []bool{true, true, true}) return bld.NewInt16Array() } func createTestInt32Arr() arrow.Array { bld := array.NewInt32Builder(memory.DefaultAllocator) defer bld.Release() bld.AppendValues([]int32{1, 2, 0, -3}, []bool{true, true, false, true}) return bld.NewInt32Array() } func createTestInt64Arr() arrow.Array { bld := array.NewInt64Builder(memory.DefaultAllocator) defer bld.Release() bld.AppendValues([]int64{1, 2, -3}, []bool{true, true, true}) return bld.NewInt64Array() } func createTestUint8Arr() arrow.Array { bld := array.NewUint8Builder(memory.DefaultAllocator) defer bld.Release() bld.AppendValues([]uint8{1, 2, 0, 3}, []bool{true, true, false, true}) return bld.NewUint8Array() } func createTestUint16Arr() arrow.Array { bld := array.NewUint16Builder(memory.DefaultAllocator) defer bld.Release() bld.AppendValues([]uint16{1, 2, 3}, []bool{true, true, true}) return bld.NewUint16Array() } func createTestUint32Arr() arrow.Array { bld := array.NewUint32Builder(memory.DefaultAllocator) defer bld.Release() bld.AppendValues([]uint32{1, 2, 0, 3}, []bool{true, true, false, true}) return bld.NewUint32Array() } func createTestUint64Arr() arrow.Array { bld := array.NewUint64Builder(memory.DefaultAllocator) defer bld.Release() bld.AppendValues([]uint64{1, 2, 3}, []bool{true, true, true}) return bld.NewUint64Array() } func createTestBoolArr() arrow.Array { bld := array.NewBooleanBuilder(memory.DefaultAllocator) defer bld.Release() bld.AppendValues([]bool{true, false, false}, []bool{true, true, false}) return bld.NewBooleanArray() } func createTestNullArr() arrow.Array { return array.NewNull(2) } func createTestFloat32Arr() arrow.Array { bld := array.NewFloat32Builder(memory.DefaultAllocator) defer bld.Release() bld.AppendValues([]float32{1.5, 0}, []bool{true, false}) return bld.NewFloat32Array() } func createTestFloat64Arr() arrow.Array { bld := array.NewFloat64Builder(memory.DefaultAllocator) defer bld.Release() bld.AppendValues([]float64{1.5, 0}, []bool{true, false}) return bld.NewFloat64Array() } func createTestFSBArr() arrow.Array { bld := array.NewFixedSizeBinaryBuilder(memory.DefaultAllocator, &arrow.FixedSizeBinaryType{ByteWidth: 3}) defer bld.Release() bld.AppendValues([][]byte{[]byte("foo"), []byte("bar"), nil}, []bool{true, true, false}) return bld.NewFixedSizeBinaryArray() } func createTestBinaryArr() arrow.Array { bld := array.NewBinaryBuilder(memory.DefaultAllocator, arrow.BinaryTypes.Binary) defer bld.Release() bld.AppendValues([][]byte{[]byte("foo"), []byte("bar"), nil}, []bool{true, true, false}) return bld.NewBinaryArray() } func createTestStrArr() arrow.Array { bld := array.NewStringBuilder(memory.DefaultAllocator) defer bld.Release() bld.AppendValues([]string{"foo", "bar", ""}, []bool{true, true, false}) return bld.NewStringArray() } func createTestLargeBinaryArr() arrow.Array { bld := array.NewBinaryBuilder(memory.DefaultAllocator, arrow.BinaryTypes.LargeBinary) defer bld.Release() bld.AppendValues([][]byte{[]byte("foo"), []byte("bar"), nil}, []bool{true, true, false}) return bld.NewLargeBinaryArray() } func createTestLargeStrArr() arrow.Array { bld := array.NewLargeStringBuilder(memory.DefaultAllocator) defer bld.Release() bld.AppendValues([]string{"foo", "bar", ""}, []bool{true, true, false}) return bld.NewLargeStringArray() } func createTestDecimalArr() arrow.Array { bld := array.NewDecimal128Builder(memory.DefaultAllocator, &arrow.Decimal128Type{Precision: 16, Scale: 4}) defer bld.Release() bld.AppendValues([]decimal128.Num{decimal128.FromU64(12345670), decimal128.FromU64(0)}, []bool{true, false}) return bld.NewDecimal128Array() } func TestPrimitiveArrs(t *testing.T) { tests := []struct { name string fn func() arrow.Array }{ {"int8", createTestInt8Arr}, {"uint8", createTestUint8Arr}, {"int16", createTestInt16Arr}, {"uint16", createTestUint16Arr}, {"int32", createTestInt32Arr}, {"uint32", createTestUint32Arr}, {"int64", createTestInt64Arr}, {"uint64", createTestUint64Arr}, {"bool", createTestBoolArr}, {"null", createTestNullArr}, {"float32", createTestFloat32Arr}, {"float64", createTestFloat64Arr}, {"fixed size binary", createTestFSBArr}, {"binary", createTestBinaryArr}, {"utf8", createTestStrArr}, {"largebinary", createTestLargeBinaryArr}, {"largeutf8", createTestLargeStrArr}, {"decimal128", createTestDecimalArr}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { arr := tt.fn() defer arr.Release() mem := mallocator.NewMallocator() defer mem.AssertSize(t, 0) carr := createCArr(arr, mem) defer freeTestMallocatorArr(carr, mem) imported, err := ImportCArrayWithType(carr, arr.DataType()) assert.NoError(t, err) assert.True(t, array.Equal(arr, imported)) assert.True(t, isReleased(carr)) imported.Release() }) } } func TestPrimitiveSliced(t *testing.T) { arr := createTestInt16Arr() defer arr.Release() sl := array.NewSlice(arr, 1, 2) defer sl.Release() mem := mallocator.NewMallocator() defer mem.AssertSize(t, 0) carr := createCArr(sl, mem) defer freeTestMallocatorArr(carr, mem) imported, err := ImportCArrayWithType(carr, arr.DataType()) assert.NoError(t, err) assert.True(t, array.Equal(sl, imported)) assert.True(t, array.SliceEqual(arr, 1, 2, imported, 0, int64(imported.Len()))) assert.True(t, isReleased(carr)) imported.Release() } func createTestListArr() arrow.Array { bld := array.NewListBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8) defer bld.Release() vb := bld.ValueBuilder().(*array.Int8Builder) bld.Append(true) vb.AppendValues([]int8{1, 2}, []bool{true, true}) bld.Append(true) vb.AppendValues([]int8{3, 0}, []bool{true, false}) bld.AppendNull() return bld.NewArray() } func createTestLargeListArr() arrow.Array { bld := array.NewLargeListBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8) defer bld.Release() vb := bld.ValueBuilder().(*array.Int8Builder) bld.Append(true) vb.AppendValues([]int8{1, 2}, []bool{true, true}) bld.Append(true) vb.AppendValues([]int8{3, 0}, []bool{true, false}) bld.AppendNull() return bld.NewArray() } func createTestFixedSizeList() arrow.Array { bld := array.NewFixedSizeListBuilder(memory.DefaultAllocator, 2, arrow.PrimitiveTypes.Int64) defer bld.Release() vb := bld.ValueBuilder().(*array.Int64Builder) bld.Append(true) vb.AppendValues([]int64{1, 2}, []bool{true, true}) bld.Append(true) vb.AppendValues([]int64{3, 0}, []bool{true, false}) bld.AppendNull() return bld.NewArray() } func createTestStructArr() arrow.Array { bld := array.NewStructBuilder(memory.DefaultAllocator, arrow.StructOf( arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, arrow.Field{Name: "b", Type: arrow.BinaryTypes.String, Nullable: true}, )) defer bld.Release() f1bld := bld.FieldBuilder(0).(*array.Int8Builder) f2bld := bld.FieldBuilder(1).(*array.StringBuilder) bld.Append(true) f1bld.Append(1) f2bld.Append("foo") bld.Append(true) f1bld.Append(2) f2bld.AppendNull() return bld.NewArray() } func createTestEmptyStructArr() arrow.Array { bld := array.NewStructBuilder(memory.DefaultAllocator, arrow.StructOf()) defer bld.Release() bld.AppendNull() return bld.NewArray() } func createTestEmptyDenseUnionArr() arrow.Array { bld := array.NewEmptyDenseUnionBuilder(memory.DefaultAllocator) defer bld.Release() return bld.NewArray() } func createTestEmptySparseUnionArr() arrow.Array { bld := array.NewEmptySparseUnionBuilder(memory.DefaultAllocator) defer bld.Release() return bld.NewArray() } func createTestRunEndsArr() arrow.Array { bld := array.NewRunEndEncodedBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int8) defer bld.Release() if err := json.Unmarshal([]byte(`[1, 2, 2, 3, null, null, null, 4]`), bld); err != nil { panic(err) } return bld.NewArray() } func createTestMapArr() arrow.Array { bld := array.NewMapBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String, false) defer bld.Release() kb := bld.KeyBuilder().(*array.Int8Builder) vb := bld.ItemBuilder().(*array.StringBuilder) bld.Append(true) kb.Append(1) vb.Append("foo") kb.Append(2) vb.AppendNull() bld.Append(true) kb.Append(3) vb.Append("bar") return bld.NewArray() } func createTestSparseUnion() arrow.Array { return createTestUnionArr(arrow.SparseMode) } func createTestDenseUnion() arrow.Array { return createTestUnionArr(arrow.DenseMode) } func createTestUnionArr(mode arrow.UnionMode) arrow.Array { fields := []arrow.Field{ arrow.Field{Name: "u0", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, arrow.Field{Name: "u1", Type: arrow.PrimitiveTypes.Uint8, Nullable: true}, } typeCodes := []arrow.UnionTypeCode{5, 10} bld := array.NewBuilder(memory.DefaultAllocator, arrow.UnionOf(mode, fields, typeCodes)).(array.UnionBuilder) defer bld.Release() u0Bld := bld.Child(0).(*array.Int32Builder) u1Bld := bld.Child(1).(*array.Uint8Builder) bld.Append(5) if mode == arrow.SparseMode { u1Bld.AppendNull() } u0Bld.Append(128) bld.Append(5) if mode == arrow.SparseMode { u1Bld.AppendNull() } u0Bld.Append(256) bld.Append(10) if mode == arrow.SparseMode { u0Bld.AppendNull() } u1Bld.Append(127) bld.Append(10) if mode == arrow.SparseMode { u0Bld.AppendNull() } u1Bld.Append(25) return bld.NewArray() } func TestNestedArrays(t *testing.T) { tests := []struct { name string fn func() arrow.Array }{ {"list", createTestListArr}, {"large list", createTestLargeListArr}, {"fixed size list", createTestFixedSizeList}, {"struct", createTestStructArr}, {"map", createTestMapArr}, {"sparse union", createTestSparseUnion}, {"dense union", createTestDenseUnion}, {"run-end encoded", createTestRunEndsArr}, {"empty struct", createTestEmptyStructArr}, {"empty dense union", createTestEmptyDenseUnionArr}, {"empty sparse union", createTestEmptySparseUnionArr}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { arr := tt.fn() defer arr.Release() mem := mallocator.NewMallocator() defer mem.AssertSize(t, 0) carr := createCArr(arr, mem) defer freeTestMallocatorArr(carr, mem) imported, err := ImportCArrayWithType(carr, arr.DataType()) assert.NoError(t, err) assert.True(t, array.Equal(arr, imported)) assert.True(t, isReleased(carr)) imported.Release() }) } } func TestRecordBatch(t *testing.T) { mem := mallocator.NewMallocator() defer mem.AssertSize(t, 0) arr := createTestStructArr() defer arr.Release() carr := createCArr(arr, mem) defer freeTestMallocatorArr(carr, mem) sc := testStruct([]string{"+s", "c", "u"}, []string{"", "a", "b"}, []int64{0, flagIsNullable, flagIsNullable}) defer freeMallocedSchemas(sc) top := (*[1]*CArrowSchema)(unsafe.Pointer(sc))[0] rb, err := ImportCRecordBatch(carr, top) assert.NoError(t, err) defer rb.Release() assert.EqualValues(t, 2, rb.NumCols()) rbschema := rb.Schema() assert.Equal(t, "a", rbschema.Field(0).Name) assert.Equal(t, "b", rbschema.Field(1).Name) rec := array.NewRecord(rbschema, []arrow.Array{arr.(*array.Struct).Field(0), arr.(*array.Struct).Field(1)}, -1) defer rec.Release() assert.True(t, array.RecordEqual(rb, rec)) } func TestRecordReaderStream(t *testing.T) { stream := arrayStreamTest() defer releaseStream(stream) rdr := ImportCArrayStream(stream, nil) i := 0 for { rec, err := rdr.Read() if err != nil { if errors.Is(err, io.EOF) { break } assert.NoError(t, err) } assert.EqualValues(t, 2, rec.NumCols()) assert.Equal(t, "a", rec.ColumnName(0)) assert.Equal(t, "b", rec.ColumnName(1)) i++ for j := 0; j < int(rec.NumRows()); j++ { assert.Equal(t, int32((j+1)*i), rec.Column(0).(*array.Int32).Value(j)) } assert.Equal(t, "foo", rec.Column(1).(*array.String).Value(0)) assert.Equal(t, "bar", rec.Column(1).(*array.String).Value(1)) assert.Equal(t, "baz", rec.Column(1).(*array.String).Value(2)) } } func TestExportRecordReaderStream(t *testing.T) { reclist := arrdata.Records["primitives"] rdr, _ := array.NewRecordReader(reclist[0].Schema(), reclist) out := createTestStreamObj() ExportRecordReader(rdr, out) assert.NotNil(t, out.get_schema) assert.NotNil(t, out.get_next) assert.NotNil(t, out.get_last_error) assert.NotNil(t, out.release) assert.NotNil(t, out.private_data) h := *(*cgo.Handle)(out.private_data) assert.Same(t, rdr, h.Value().(cRecordReader).rdr) importedRdr := ImportCArrayStream(out, nil) i := 0 for { rec, err := importedRdr.Read() if err != nil { if errors.Is(err, io.EOF) { break } assert.NoError(t, err) } assert.Truef(t, array.RecordEqual(reclist[i], rec), "expected: %s\ngot: %s", reclist[i], rec) i++ } assert.EqualValues(t, len(reclist), i) } func TestExportRecordReaderStreamLifetime(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) schema := arrow.NewSchema([]arrow.Field{ {Name: "strings", Type: arrow.BinaryTypes.String, Nullable: false}, }, nil) bldr := array.NewBuilder(mem, &arrow.StringType{}) defer bldr.Release() arr := bldr.NewArray() defer arr.Release() rec := array.NewRecord(schema, []arrow.Array{arr}, 0) defer rec.Release() rdr, _ := array.NewRecordReader(schema, []arrow.Record{rec}) defer rdr.Release() out := createTestStreamObj() ExportRecordReader(rdr, out) // C Stream is holding on to memory assert.NotEqual(t, 0, mem.CurrentAlloc()) releaseStream(out) } func TestEmptyListExport(t *testing.T) { bldr := array.NewBuilder(memory.DefaultAllocator, arrow.LargeListOf(arrow.PrimitiveTypes.Int32)) defer bldr.Release() arr := bldr.NewArray() defer arr.Release() var out CArrowArray ExportArrowArray(arr, &out, nil) assert.Zero(t, out.length) assert.Zero(t, out.null_count) assert.Zero(t, out.offset) assert.EqualValues(t, 2, out.n_buffers) assert.NotNil(t, out.buffers) assert.EqualValues(t, 1, out.n_children) assert.NotNil(t, out.children) } func TestEmptyDictExport(t *testing.T) { bldr := array.NewBuilder(memory.DefaultAllocator, &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.BinaryTypes.String, Ordered: true}) defer bldr.Release() arr := bldr.NewArray() defer arr.Release() var out CArrowArray var sc CArrowSchema ExportArrowArray(arr, &out, &sc) assert.EqualValues(t, 'c', *sc.format) assert.NotZero(t, sc.flags&1) assert.Zero(t, sc.n_children) assert.NotNil(t, sc.dictionary) assert.EqualValues(t, 'u', *sc.dictionary.format) assert.Zero(t, out.length) assert.Zero(t, out.null_count) assert.Zero(t, out.offset) assert.EqualValues(t, 2, out.n_buffers) assert.Zero(t, out.n_children) assert.Nil(t, out.children) assert.NotNil(t, out.dictionary) assert.Zero(t, out.dictionary.length) assert.Zero(t, out.dictionary.null_count) assert.Zero(t, out.dictionary.offset) assert.EqualValues(t, 3, out.dictionary.n_buffers) assert.Zero(t, out.dictionary.n_children) assert.Nil(t, out.dictionary.children) assert.Nil(t, out.dictionary.dictionary) } func TestEmptyStringExport(t *testing.T) { // apache/arrow#33936: regression test bldr := array.NewBuilder(memory.DefaultAllocator, &arrow.StringType{}) defer bldr.Release() arr := bldr.NewArray() defer arr.Release() var out CArrowArray var sc CArrowSchema ExportArrowArray(arr, &out, &sc) assert.EqualValues(t, 'u', *sc.format) assert.Zero(t, sc.n_children) assert.Nil(t, sc.dictionary) assert.EqualValues(t, 3, out.n_buffers) buffers := (*[3]unsafe.Pointer)(unsafe.Pointer(out.buffers)) assert.EqualValues(t, unsafe.Pointer(nil), buffers[0]) assert.NotEqualValues(t, unsafe.Pointer(nil), buffers[1]) assert.NotEqualValues(t, unsafe.Pointer(nil), buffers[2]) } func TestEmptyUnionExport(t *testing.T) { // apache/arrow#33936: regression test bldr := array.NewBuilder(memory.DefaultAllocator, arrow.SparseUnionOf([]arrow.Field{ {Name: "child", Type: &arrow.Int64Type{}}, }, []arrow.UnionTypeCode{0})) defer bldr.Release() arr := bldr.NewArray() defer arr.Release() var out CArrowArray var sc CArrowSchema ExportArrowArray(arr, &out, &sc) assert.EqualValues(t, 1, sc.n_children) assert.Nil(t, sc.dictionary) assert.EqualValues(t, 1, out.n_buffers) buffers := (*[1]unsafe.Pointer)(unsafe.Pointer(out.buffers)) assert.NotEqualValues(t, unsafe.Pointer(nil), buffers[0]) } func TestRecordReaderExport(t *testing.T) { // Regression test for apache/arrow#33767 reclist := arrdata.Records["primitives"] rdr, _ := array.NewRecordReader(reclist[0].Schema(), reclist) if err := exportedStreamTest(rdr); err != nil { t.Fatalf("Failed to test exported stream: %#v", err) } } type failingReader struct { opCount int } func (r *failingReader) Retain() {} func (r *failingReader) Release() {} func (r *failingReader) Schema() *arrow.Schema { r.opCount -= 1 if r.opCount == 0 { return nil } return arrdata.Records["primitives"][0].Schema() } func (r *failingReader) Next() bool { r.opCount -= 1 return r.opCount > 0 } func (r *failingReader) Record() arrow.Record { arrdata.Records["primitives"][0].Retain() return arrdata.Records["primitives"][0] } func (r *failingReader) Err() error { if r.opCount == 0 { return fmt.Errorf("Expected error message") } return nil } func TestRecordReaderError(t *testing.T) { // Regression test for apache/arrow#33789 err := roundTripStreamTest(&failingReader{opCount: 1}) if err == nil { t.Fatalf("Expected error but got none") } assert.Contains(t, err.Error(), "Expected error message") err = roundTripStreamTest(&failingReader{opCount: 2}) if err == nil { t.Fatalf("Expected error but got none") } assert.Contains(t, err.Error(), "Expected error message") err = roundTripStreamTest(&failingReader{opCount: 3}) if err == nil { t.Fatalf("Expected error but got none") } assert.Contains(t, err.Error(), "Expected error message") } func TestRecordReaderImportError(t *testing.T) { // Regression test for apache/arrow#35974 err := fallibleSchemaTestDeprecated() if err == nil { t.Fatalf("Expected error but got nil") } assert.Contains(t, err.Error(), "Expected error message") err = fallibleSchemaTest() if err == nil { t.Fatalf("Expected error but got nil") } assert.Contains(t, err.Error(), "Expected error message") } func TestConfuseGoGc(t *testing.T) { // Regression test for https://github.com/apache/arrow-adbc/issues/729 reclist := arrdata.Records["primitives"] var wg sync.WaitGroup concurrency := 32 wg.Add(concurrency) // XXX: this test is a bit expensive for i := 0; i < concurrency; i++ { go func() { for i := 0; i < 256; i++ { rdr, err := array.NewRecordReader(reclist[0].Schema(), reclist) assert.NoError(t, err) runtime.GC() assert.NoError(t, confuseGoGc(rdr)) runtime.GC() } wg.Done() }() } wg.Wait() } func TestAsyncInterfacesSimple(t *testing.T) { reclist := arrdata.Records["primitives"] handler := testAsyncHandler() defer freeAsyncHandler(handler) ctx := context.Background() ch := CreateAsyncDeviceStreamHandler(ctx, 1, handler) stream := make(chan RecordMessage, len(reclist)) go func() { defer close(stream) for _, r := range reclist { r.Retain() stream <- RecordMessage{Record: r} } }() wait := make(chan struct{}) go func() { defer close(wait) assert.NoError(t, ExportAsyncRecordBatchStream(reclist[0].Schema(), stream, handler)) }() asyncStream := <-ch require.NoError(t, asyncStream.Err) assert.True(t, reclist[0].Schema().Equal(asyncStream.Schema)) var idx int for r := range asyncStream.Stream { require.NoError(t, r.Err) assert.True(t, array.RecordEqual(reclist[idx], r.Record)) idx++ r.Record.Release() } <-wait } func TestAsyncSchemaError(t *testing.T) { handler := testAsyncHandler() defer freeAsyncHandler(handler) ctx := context.Background() ch := CreateAsyncDeviceStreamHandler(ctx, 1, handler) wait := make(chan struct{}) go func() { defer close(wait) err := ExportAsyncRecordBatchStream(nil, nil, handler) assert.ErrorIs(t, err, arrow.ErrInvalid) assert.ErrorContains(t, err, "must have non-nil schema") }() asyncStream := <-ch require.Error(t, asyncStream.Err) var asyncErr AsyncStreamError assert.ErrorAs(t, asyncStream.Err, &asyncErr) assert.Equal(t, 22, int(asyncErr.Code)) <-wait } func TestAsyncPropagateError(t *testing.T) { reclist := arrdata.Records["primitives"] handler := testAsyncHandler() defer freeAsyncHandler(handler) ctx := context.Background() ch := CreateAsyncDeviceStreamHandler(ctx, 1, handler) stream := make(chan RecordMessage, 2) go func() { defer close(stream) reclist[0].Retain() stream <- RecordMessage{Record: reclist[0]} stream <- RecordMessage{Err: assert.AnError} }() wait := make(chan struct{}) go func() { defer close(wait) err := ExportAsyncRecordBatchStream(reclist[0].Schema(), stream, handler) assert.ErrorIs(t, err, assert.AnError) }() asyncStream := <-ch require.NoError(t, asyncStream.Err) assert.True(t, reclist[0].Schema().Equal(asyncStream.Schema)) rec1 := <-asyncStream.Stream require.NoError(t, rec1.Err) assert.True(t, array.RecordEqual(reclist[0], rec1.Record)) rec1.Record.Release() rec2 := <-asyncStream.Stream var err AsyncStreamError assert.ErrorContains(t, rec2.Err, assert.AnError.Error()) assert.ErrorAs(t, rec2.Err, &err) assert.Equal(t, 22, int(err.Code)) <-wait } arrow-go-18.2.0/arrow/cdata/cdata_test_framework.go000066400000000000000000000322361476434502500223040ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build test // +build test package cdata // #include // #include // #include // #include "abi.h" // #include "helpers.h" // // void setup_array_stream_test(const int n_batches, struct ArrowArrayStream* out); // static struct ArrowArray* get_test_arr() { // struct ArrowArray* array = (struct ArrowArray*)malloc(sizeof(struct ArrowArray)); // memset(array, 0, sizeof(*array)); // return array; // } // static struct ArrowArrayStream* get_test_stream() { // struct ArrowArrayStream* out = (struct ArrowArrayStream*)malloc(sizeof(struct ArrowArrayStream)); // memset(out, 0, sizeof(struct ArrowArrayStream)); // return out; // } // // static struct ArrowAsyncDeviceStreamHandler* get_test_async_handler() { // struct ArrowAsyncDeviceStreamHandler* handler = // (struct ArrowAsyncDeviceStreamHandler*)malloc(sizeof(struct ArrowAsyncDeviceStreamHandler)); // memset(handler, 0, sizeof(*handler)); // return handler; // } // // void release_test_arr(struct ArrowArray* arr); // // static int32_t* get_data() { // int32_t* data = malloc(sizeof(int32_t)*10); // for (int i = 0; i < 10; ++i) { data[i] = i+1; } // return data; // } // void export_int32_type(struct ArrowSchema* schema); // void export_int32_array(const int32_t*, int64_t, struct ArrowArray*); // int test1_is_released(); // void test_primitive(struct ArrowSchema* schema, const char* fmt); // void free_malloced_schemas(struct ArrowSchema**); // struct ArrowSchema** test_lists(const char** fmts, const char** names, const int* nullflags, const int n); // struct ArrowSchema** test_struct(const char** fmts, const char** names, int64_t* flags, const int n); // struct ArrowSchema** test_map(const char** fmts, const char** names, int64_t* flags, const int n); // struct ArrowSchema** test_schema(const char** fmts, const char** names, int64_t* flags, const int n); // struct ArrowSchema** test_union(const char** fmts, const char** names, int64_t* flags, const int n); // int test_exported_stream(struct ArrowArrayStream* stream); // void test_stream_schema_fallible(struct ArrowArrayStream* stream); // int confuse_go_gc(struct ArrowArrayStream* stream, unsigned int seed); // extern void releaseTestArr(struct ArrowArray* array); // extern void goReleaseTestArray(struct ArrowArray* array); import "C" import ( "errors" "fmt" "io" "math/rand" "runtime/cgo" "unsafe" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/internal" "github.com/apache/arrow-go/v18/arrow/memory/mallocator" ) const ( flagIsNullable = C.ARROW_FLAG_NULLABLE flagMapKeysSorted = C.ARROW_FLAG_MAP_KEYS_SORTED ) var ( metadata1 = arrow.NewMetadata([]string{"key1", "key2"}, []string{"", "bar"}) metadata2 = arrow.NewMetadata([]string{"key"}, []string{"abcde"}) ) func exportInt32TypeSchema() CArrowSchema { var s CArrowSchema C.export_int32_type(&s) return s } func releaseStream(s *CArrowArrayStream) { C.ArrowArrayStreamRelease(s) } func schemaIsReleased(s *CArrowSchema) bool { return C.ArrowSchemaIsReleased(s) == 1 } func getMetadataKeys() ([]string, []string) { return []string{"key1", "key2"}, []string{"key"} } func getMetadataValues() ([]string, []string) { return []string{"", "bar"}, []string{"abcde"} } func exportInt32Array() *CArrowArray { arr := C.get_test_arr() C.export_int32_array(C.get_data(), C.int64_t(10), arr) return arr } func isReleased(arr *CArrowArray) bool { return C.ArrowArrayIsReleased(arr) == 1 } func test1IsReleased() bool { return C.test1_is_released() == 1 } func testPrimitive(fmtstr string) CArrowSchema { var s CArrowSchema fmt := C.CString(fmtstr) C.test_primitive(&s, fmt) return s } func freeMallocedSchemas(schemas **CArrowSchema) { C.free_malloced_schemas(schemas) } func testAsyncHandler() *CArrowAsyncDeviceStreamHandler { return C.get_test_async_handler() } func freeAsyncHandler(h *CArrowAsyncDeviceStreamHandler) { C.free(unsafe.Pointer(h)) } func testNested(fmts, names []string, isnull []bool) **CArrowSchema { if len(fmts) != len(names) { panic("testing nested lists must have same size fmts and names") } cfmts := make([]*C.char, len(fmts)) cnames := make([]*C.char, len(names)) nulls := make([]C.int, len(isnull)) for i := range fmts { cfmts[i] = C.CString(fmts[i]) cnames[i] = C.CString(names[i]) } for i, v := range isnull { if v { nulls[i] = C.ARROW_FLAG_NULLABLE } else { nulls[i] = 0 } } return C.test_lists((**C.char)(unsafe.Pointer(&cfmts[0])), (**C.char)(unsafe.Pointer(&cnames[0])), (*C.int)(unsafe.Pointer(&nulls[0])), C.int(len(fmts))) } func testStruct(fmts, names []string, flags []int64) **CArrowSchema { if len(fmts) != len(names) || len(names) != len(flags) { panic("testing structs must all have the same size slices in args") } cfmts := make([]*C.char, len(fmts)) cnames := make([]*C.char, len(names)) cflags := make([]C.int64_t, len(flags)) for i := range fmts { cfmts[i] = C.CString(fmts[i]) cnames[i] = C.CString(names[i]) cflags[i] = C.int64_t(flags[i]) } return C.test_struct((**C.char)(unsafe.Pointer(&cfmts[0])), (**C.char)(unsafe.Pointer(&cnames[0])), (*C.int64_t)(unsafe.Pointer(&cflags[0])), C.int(len(fmts))) } func testMap(fmts, names []string, flags []int64) **CArrowSchema { if len(fmts) != len(names) || len(names) != len(flags) { panic("testing maps must all have the same size slices in args") } cfmts := make([]*C.char, len(fmts)) cnames := make([]*C.char, len(names)) cflags := make([]C.int64_t, len(flags)) for i := range fmts { cfmts[i] = C.CString(fmts[i]) cnames[i] = C.CString(names[i]) cflags[i] = C.int64_t(flags[i]) } return C.test_map((**C.char)(unsafe.Pointer(&cfmts[0])), (**C.char)(unsafe.Pointer(&cnames[0])), (*C.int64_t)(unsafe.Pointer(&cflags[0])), C.int(len(fmts))) } func testUnion(fmts, names []string, flags []int64) **CArrowSchema { if len(fmts) != len(names) || len(names) != len(flags) { panic("testing unions must all have the same size slices in args") } cfmts := make([]*C.char, len(fmts)) cnames := make([]*C.char, len(names)) cflags := make([]C.int64_t, len(flags)) for i := range fmts { cfmts[i] = C.CString(fmts[i]) cnames[i] = C.CString(names[i]) cflags[i] = C.int64_t(flags[i]) } return C.test_union((**C.char)(unsafe.Pointer(&cfmts[0])), (**C.char)(unsafe.Pointer(&cnames[0])), (*C.int64_t)(unsafe.Pointer(&cflags[0])), C.int(len(fmts))) } func testSchema(fmts, names []string, flags []int64) **CArrowSchema { if len(fmts) != len(names) || len(names) != len(flags) { panic("testing structs must all have the same size slices in args") } cfmts := make([]*C.char, len(fmts)) cnames := make([]*C.char, len(names)) cflags := make([]C.int64_t, len(flags)) for i := range fmts { cfmts[i] = C.CString(fmts[i]) cnames[i] = C.CString(names[i]) cflags[i] = C.int64_t(flags[i]) } return C.test_schema((**C.char)(unsafe.Pointer(&cfmts[0])), (**C.char)(unsafe.Pointer(&cnames[0])), (*C.int64_t)(unsafe.Pointer(&cflags[0])), C.int(len(fmts))) } func freeAny[T any](alloc *mallocator.Mallocator, p *T, n int) { raw := unsafe.Slice((*byte)(unsafe.Pointer(p)), int(unsafe.Sizeof(*p))*n) alloc.Free(raw) } func freeTestMallocatorArr(carr *CArrowArray, alloc *mallocator.Mallocator) { freeAny(alloc, carr, 1) } func getTestArr(alloc *mallocator.Mallocator) *CArrowArray { raw := alloc.Allocate(C.sizeof_struct_ArrowArray) return (*CArrowArray)(unsafe.Pointer(&raw[0])) } type testReleaser struct { alloc *mallocator.Mallocator bufs [][]byte } //export releaseTestArr func releaseTestArr(arr *CArrowArray) { if C.ArrowArrayIsReleased(arr) == 1 { return } defer C.ArrowArrayMarkReleased(arr) h := getHandle(arr.private_data) tr := h.Value().(*testReleaser) alloc := tr.alloc for _, b := range tr.bufs { alloc.Free(b) } if arr.n_buffers > 0 { freeAny(alloc, arr.buffers, int(arr.n_buffers)) } if arr.dictionary != nil { C.ArrowArrayRelease(arr.dictionary) freeAny(alloc, arr.dictionary, 1) } if arr.n_children > 0 { children := unsafe.Slice(arr.children, arr.n_children) for _, c := range children { C.ArrowArrayRelease(c) freeTestMallocatorArr(c, alloc) } freeAny(alloc, arr.children, int(arr.n_children)) } h.Delete() C.free(unsafe.Pointer(arr.private_data)) } func allocateBufferMallocatorPtrArr(alloc *mallocator.Mallocator, n int) []*C.void { raw := alloc.Allocate(int(unsafe.Sizeof((*C.void)(nil))) * n) return unsafe.Slice((**C.void)(unsafe.Pointer(&raw[0])), n) } func allocateChildrenPtrArr(alloc *mallocator.Mallocator, n int) []*CArrowArray { raw := alloc.Allocate(int(unsafe.Sizeof((*CArrowArray)(nil))) * n) return unsafe.Slice((**CArrowArray)(unsafe.Pointer(&raw[0])), n) } func createCArr(arr arrow.Array, alloc *mallocator.Mallocator) *CArrowArray { var ( carr = getTestArr(alloc) children = (**CArrowArray)(nil) nchildren = C.int64_t(0) ) switch arr := arr.(type) { case array.ListLike: clist := allocateChildrenPtrArr(alloc, 1) clist[0] = createCArr(arr.ListValues(), alloc) children = (**CArrowArray)(unsafe.Pointer(&clist[0])) nchildren += 1 case *array.Struct: if arr.NumField() == 0 { break } clist := allocateChildrenPtrArr(alloc, arr.NumField()) for i := 0; i < arr.NumField(); i++ { clist[i] = createCArr(arr.Field(i), alloc) nchildren += 1 } children = (**CArrowArray)(unsafe.Pointer(&clist[0])) case *array.RunEndEncoded: clist := allocateChildrenPtrArr(alloc, 2) clist[0] = createCArr(arr.RunEndsArr(), alloc) clist[1] = createCArr(arr.Values(), alloc) children = (**CArrowArray)(unsafe.Pointer(&clist[0])) nchildren += 2 case array.Union: if arr.NumFields() == 0 { break } clist := allocateChildrenPtrArr(alloc, arr.NumFields()) for i := 0; i < arr.NumFields(); i++ { clist[i] = createCArr(arr.Field(i), alloc) nchildren += 1 } children = (**CArrowArray)(unsafe.Pointer(&clist[0])) } carr.children = children carr.n_children = nchildren carr.dictionary = nil carr.length = C.int64_t(arr.Len()) carr.null_count = C.int64_t(arr.NullN()) carr.offset = C.int64_t(arr.Data().Offset()) carr.release = (*[0]byte)(C.goReleaseTestArray) tr := &testReleaser{alloc: alloc} h := cgo.NewHandle(tr) carr.private_data = createHandle(h) buffers := arr.Data().Buffers() bufOffset, nbuffers := 0, len(buffers) hasValidityBitmap := internal.DefaultHasValidityBitmap(arr.DataType().ID()) if nbuffers > 0 && !hasValidityBitmap { nbuffers-- bufOffset++ } if nbuffers == 0 { return carr } tr.bufs = make([][]byte, 0, nbuffers) cbufs := allocateBufferMallocatorPtrArr(alloc, nbuffers) for i, b := range buffers[bufOffset:] { if b != nil && b.Len() > 0 { raw := alloc.Allocate(b.Len()) copy(raw, b.Bytes()) tr.bufs = append(tr.bufs, raw) cbufs[i] = (*C.void)(unsafe.Pointer(&raw[0])) } else { cbufs[i] = nil } } carr.n_buffers = C.int64_t(len(cbufs)) if len(cbufs) > 0 { carr.buffers = (*unsafe.Pointer)(unsafe.Pointer(&cbufs[0])) } return carr } func createTestStreamObj() *CArrowArrayStream { return C.get_test_stream() } func arrayStreamTest() *CArrowArrayStream { st := C.get_test_stream() C.setup_array_stream_test(2, st) return st } func exportedStreamTest(reader array.RecordReader) error { out := C.get_test_stream() ExportRecordReader(reader, out) rc := C.test_exported_stream(out) C.free(unsafe.Pointer(out)) if rc == 0 { return nil } return fmt.Errorf("Exported stream test failed with return code %d", int(rc)) } func roundTripStreamTest(reader array.RecordReader) error { out := C.get_test_stream() ExportRecordReader(reader, out) rdr, err := ImportCRecordReader(out, nil) if err != nil { return err } for { _, err = rdr.Read() if errors.Is(err, io.EOF) { break } else if err != nil { return err } } return nil } func fallibleSchemaTestDeprecated() (err error) { stream := CArrowArrayStream{} C.test_stream_schema_fallible(&stream) defer func() { if r := recover(); r != nil { err = fmt.Errorf("Panicked: %#v", r) } }() _ = ImportCArrayStream(&stream, nil) return nil } func fallibleSchemaTest() error { stream := CArrowArrayStream{} C.test_stream_schema_fallible(&stream) _, err := ImportCRecordReader(&stream, nil) if err != nil { return err } return nil } func confuseGoGc(reader array.RecordReader) error { out := C.get_test_stream() ExportRecordReader(reader, out) rc := C.confuse_go_gc(out, C.uint(rand.Int())) C.free(unsafe.Pointer(out)) if rc == 0 { return nil } return fmt.Errorf("Exported stream test failed with return code %d", int(rc)) } arrow-go-18.2.0/arrow/cdata/exports.go000066400000000000000000000306041476434502500176150ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package cdata import ( "context" "fmt" "runtime/cgo" "unsafe" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" ) // #include // #include // #include "abi.h" // #include "helpers.h" // // typedef const char cchar_t; // extern int streamGetSchema(struct ArrowArrayStream*, struct ArrowSchema*); // extern int streamGetNext(struct ArrowArrayStream*, struct ArrowArray*); // extern const char* streamGetError(struct ArrowArrayStream*); // extern void streamRelease(struct ArrowArrayStream*); // extern int asyncStreamOnSchema(struct ArrowAsyncDeviceStreamHandler*, struct ArrowSchema*); // extern int asyncStreamOnNextTask(struct ArrowAsyncDeviceStreamHandler*, struct ArrowAsyncTask*, char*); // extern void asyncStreamOnError(struct ArrowAsyncDeviceStreamHandler*, int, char*, char*); // extern void asyncStreamRelease(struct ArrowAsyncDeviceStreamHandler*); // extern void asyncProducerRequest(struct ArrowAsyncProducer*, int64_t); // extern void asyncProducerCancel(struct ArrowAsyncProducer*); // extern int asyncTaskExtract(struct ArrowAsyncTask*, struct ArrowDeviceArray*); // // XXX(https://github.com/apache/arrow-adbc/issues/729) // int streamGetSchemaTrampoline(struct ArrowArrayStream* stream, struct ArrowSchema* out); // int streamGetNextTrampoline(struct ArrowArrayStream* stream, struct ArrowArray* out); // int asyncTaskExtractTrampoline(struct ArrowAsyncTask* task, struct ArrowDeviceArray* out); // // static void goCallRequest(struct ArrowAsyncProducer* producer, int64_t n) { // producer->request(producer, n); // } // static int goCallOnSchema(struct ArrowAsyncDeviceStreamHandler* handler, struct ArrowSchema* schema) { // return handler->on_schema(handler, schema); // } // static void goCallOnError(struct ArrowAsyncDeviceStreamHandler* handler, int code, char* message, char* metadata) { // handler->on_error(handler, code, message, metadata); // } // static int goCallOnNextTask(struct ArrowAsyncDeviceStreamHandler* handler, struct ArrowAsyncTask* task, char* metadata) { // return handler->on_next_task(handler, task, metadata); // } // // static struct ArrowAsyncProducer* get_producer() { // struct ArrowAsyncProducer* out = (struct ArrowAsyncProducer*)malloc(sizeof(struct ArrowAsyncProducer)); // memset(out, 0, sizeof(struct ArrowAsyncProducer)); // return out; // } // // static void goReleaseAsyncHandler(struct ArrowAsyncDeviceStreamHandler* handler) { // handler->release(handler); // } // import "C" //export releaseExportedSchema func releaseExportedSchema(schema *CArrowSchema) { if C.ArrowSchemaIsReleased(schema) == 1 { return } defer C.ArrowSchemaMarkReleased(schema) C.free(unsafe.Pointer(schema.name)) C.free(unsafe.Pointer(schema.format)) C.free(unsafe.Pointer(schema.metadata)) if schema.n_children == 0 { return } if schema.dictionary != nil { C.ArrowSchemaRelease(schema.dictionary) C.free(unsafe.Pointer(schema.dictionary)) } children := unsafe.Slice(schema.children, schema.n_children) for _, c := range children { C.ArrowSchemaRelease(c) } C.free(unsafe.Pointer(children[0])) C.free(unsafe.Pointer(schema.children)) } // apache/arrow#33864: allocate a new cgo.Handle and store its address // in a heap-allocated uintptr_t. func createHandle(hndl cgo.Handle) unsafe.Pointer { // uintptr_t* hptr = malloc(sizeof(uintptr_t)); hptr := (*C.uintptr_t)(C.malloc(C.sizeof_uintptr_t)) // *hptr = (uintptr)hndl; *hptr = C.uintptr_t(uintptr(hndl)) return unsafe.Pointer(hptr) } func getHandle(ptr unsafe.Pointer) cgo.Handle { // uintptr_t* hptr = (uintptr_t*)ptr; hptr := (*C.uintptr_t)(ptr) return cgo.Handle((uintptr)(*hptr)) } //export releaseExportedArray func releaseExportedArray(arr *CArrowArray) { if C.ArrowArrayIsReleased(arr) == 1 { return } defer C.ArrowArrayMarkReleased(arr) if arr.n_buffers > 0 { C.free(unsafe.Pointer(arr.buffers)) } if arr.dictionary != nil { C.ArrowArrayRelease(arr.dictionary) C.free(unsafe.Pointer(arr.dictionary)) } if arr.n_children > 0 { children := unsafe.Slice(arr.children, arr.n_children) for _, c := range children { C.ArrowArrayRelease(c) } C.free(unsafe.Pointer(children[0])) C.free(unsafe.Pointer(arr.children)) } h := getHandle(arr.private_data) h.Value().(arrow.ArrayData).Release() h.Delete() C.free(unsafe.Pointer(arr.private_data)) } //export streamGetSchema func streamGetSchema(handle *CArrowArrayStream, out *CArrowSchema) C.int { h := getHandle(handle.private_data) rdr := h.Value().(cRecordReader) return C.int(rdr.getSchema(out)) } //export streamGetNext func streamGetNext(handle *CArrowArrayStream, out *CArrowArray) C.int { h := getHandle(handle.private_data) rdr := h.Value().(cRecordReader) return C.int(rdr.next(out)) } //export streamGetError func streamGetError(handle *CArrowArrayStream) *C.cchar_t { h := getHandle(handle.private_data) rdr := h.Value().(cRecordReader) return rdr.getLastError() } //export streamRelease func streamRelease(handle *CArrowArrayStream) { h := getHandle(handle.private_data) h.Value().(cRecordReader).release() h.Delete() C.free(unsafe.Pointer(handle.private_data)) handle.release = nil handle.private_data = nil } func exportStream(rdr array.RecordReader, out *CArrowArrayStream) { out.get_schema = (*[0]byte)(C.streamGetSchemaTrampoline) out.get_next = (*[0]byte)(C.streamGetNextTrampoline) out.get_last_error = (*[0]byte)(C.streamGetError) out.release = (*[0]byte)(C.streamRelease) rdr.Retain() h := cgo.NewHandle(cRecordReader{rdr: rdr, err: nil}) out.private_data = createHandle(h) } type cAsyncState struct { ch chan AsyncRecordBatchStream queueSize uint64 ctx context.Context } type taskState struct { task CArrowAsyncTask meta arrow.Metadata err error } //export asyncStreamOnSchema func asyncStreamOnSchema(self *CArrowAsyncDeviceStreamHandler, schema *CArrowSchema) C.int { h := getHandle(self.private_data) handler := h.Value().(cAsyncState) defer close(handler.ch) if self.producer.device_type != C.ARROW_DEVICE_CPU { handler.ch <- AsyncRecordBatchStream{Err: fmt.Errorf("unsupported device type")} return C.EINVAL } sc, err := ImportCArrowSchema(schema) if err != nil { handler.ch <- AsyncRecordBatchStream{Err: err} return C.EINVAL } var meta arrow.Metadata if self.producer.additional_metadata != nil { meta = decodeCMetadata(self.producer.additional_metadata) } recordStream := make(chan RecordMessage, handler.queueSize) taskQueue := make(chan taskState, handler.queueSize) handler.ch <- AsyncRecordBatchStream{Schema: sc, AdditionalMetadata: meta, Stream: recordStream} self.private_data = createHandle(cgo.NewHandle(&cAsyncStreamHandler{ producer: self.producer, ctx: handler.ctx, taskQueue: taskQueue, })) defer h.Delete() C.goCallRequest(self.producer, C.int64_t(handler.queueSize)) go asyncTaskQueue(handler.ctx, sc, recordStream, taskQueue, self.producer) return 0 } //export asyncStreamOnNextTask func asyncStreamOnNextTask(self *CArrowAsyncDeviceStreamHandler, task *CArrowAsyncTask, metadata *C.char) C.int { h := getHandle(self.private_data) handler := h.Value().(*cAsyncStreamHandler) return handler.onNextTask(task, metadata) } //export asyncStreamOnError func asyncStreamOnError(self *CArrowAsyncDeviceStreamHandler, code C.int, message, metadata *C.char) { h := getHandle(self.private_data) switch handler := h.Value().(type) { case *cAsyncStreamHandler: handler.onError(code, message, metadata) case cAsyncState: handler.ch <- AsyncRecordBatchStream{Err: AsyncStreamError{ Code: int(code), Msg: C.GoString(message), Metadata: C.GoString(metadata), }} close(handler.ch) } } //export asyncStreamRelease func asyncStreamRelease(self *CArrowAsyncDeviceStreamHandler) { h := getHandle(self.private_data) if handler, ok := h.Value().(*cAsyncStreamHandler); ok { handler.release() } h.Delete() C.free(unsafe.Pointer(self.private_data)) self.release = nil self.private_data = nil } func exportAsyncHandler(state cAsyncState, out *CArrowAsyncDeviceStreamHandler) { out.on_schema = (*[0]byte)(C.asyncStreamOnSchema) out.on_next_task = (*[0]byte)(C.asyncStreamOnNextTask) out.on_error = (*[0]byte)(C.asyncStreamOnError) out.release = (*[0]byte)(C.asyncStreamRelease) out.private_data = createHandle(cgo.NewHandle(state)) } //export asyncProducerRequest func asyncProducerRequest(producer *CArrowAsyncProducer, n C.int64_t) { h := getHandle(producer.private_data) handler := h.Value().(*cAsyncProducer) if handler.reqChan != nil { handler.reqChan <- int64(n) } } //export asyncProducerCancel func asyncProducerCancel(producer *CArrowAsyncProducer) { h := getHandle(producer.private_data) handler := h.Value().(*cAsyncProducer) if handler.done != nil { close(handler.done) handler.done, handler.reqChan = nil, nil } } //export asyncTaskExtract func asyncTaskExtract(task *CArrowAsyncTask, out *CArrowDeviceArray) C.int { h := getHandle(task.private_data) rec := h.Value().(arrow.Record) defer rec.Release() out.device_id, out.device_type = C.int64_t(-1), C.ARROW_DEVICE_CPU ExportArrowRecordBatch(rec, &out.array, nil) return C.int(0) } type cAsyncProducer struct { reqChan chan int64 done chan error } func exportAsyncProducer(schema *arrow.Schema, stream <-chan RecordMessage, handler *CArrowAsyncDeviceStreamHandler) error { defer C.goReleaseAsyncHandler(handler) if schema == nil { err := fmt.Errorf("%w: must have non-nil schema", arrow.ErrInvalid) errmsg := C.CString(err.Error()) C.goCallOnError(handler, C.EINVAL, errmsg, nil) C.free(unsafe.Pointer(errmsg)) return err } reqChan, done := make(chan int64, 5), make(chan error, 1) prodHandle := cgo.NewHandle(&cAsyncProducer{reqChan: reqChan, done: done}) cproducer := prodHandle.Value().(*cAsyncProducer) defer func() { close(reqChan) cproducer.reqChan = nil if cproducer.done != nil { close(cproducer.done) cproducer.done = nil } prodHandle.Delete() }() producer := C.get_producer() defer C.free(unsafe.Pointer(producer)) producer.device_type = C.ARROW_DEVICE_CPU producer.request = (*[0]byte)(C.asyncProducerRequest) producer.cancel = (*[0]byte)(C.asyncProducerCancel) producer.private_data = createHandle(prodHandle) producer.additional_metadata = nil handler.producer = producer var s CArrowSchema ExportArrowSchema(schema, &s) if status := C.goCallOnSchema(handler, &s); status != C.int(0) { releaseExportedSchema(&s) return fmt.Errorf("on_schema failed with status %d", status) } var pending int64 = 0 for { select { case err, ok := <-done: if !ok { return nil } return err case req := <-reqChan: pending += req default: } if pending > 0 { select { case msg, ok := <-stream: if !ok { if status := C.goCallOnNextTask(handler, nil, nil); status != C.int(0) { return fmt.Errorf("on_next_task with nil task failed with status %d", status) } return nil } pending-- if msg.Err != nil { errmsg := C.CString(msg.Err.Error()) C.goCallOnError(handler, C.EINVAL, errmsg, nil) C.free(unsafe.Pointer(errmsg)) return msg.Err } var task CArrowAsyncTask task.extract_data = (*[0]byte)(C.asyncTaskExtractTrampoline) task.private_data = createHandle(cgo.NewHandle(msg.Record)) var encoded []byte if msg.AdditionalMetadata.Len() != 0 { encoded = encodeCMetadata(msg.AdditionalMetadata.Keys(), msg.AdditionalMetadata.Values()) } status := C.goCallOnNextTask(handler, &task, (*C.char)(unsafe.Pointer(unsafe.SliceData(encoded)))) if status != C.int(0) { msg.Record.Release() getHandle(task.private_data).Delete() return fmt.Errorf("on_next_task failed with status %d", status) } default: } } } } arrow-go-18.2.0/arrow/cdata/helpers.h000066400000000000000000000144131476434502500173750ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #pragma once #include #include #include #include #include "abi.h" #define ARROW_C_ASSERT(condition, msg) \ do { \ if (!(condition)) { \ fprintf(stderr, "%s:%d:: %s", __FILE__, __LINE__, (msg)); \ abort(); \ } \ } while (0) #ifdef __cplusplus extern "C" { #endif /// Query whether the C schema is released static inline int ArrowSchemaIsReleased(const struct ArrowSchema* schema) { return schema->release == NULL; } /// Mark the C schema released (for use in release callbacks) static inline void ArrowSchemaMarkReleased(struct ArrowSchema* schema) { schema->release = NULL; } /// Move the C schema from `src` to `dest` /// /// Note `dest` must *not* point to a valid schema already, otherwise there /// will be a memory leak. static inline void ArrowSchemaMove(struct ArrowSchema* src, struct ArrowSchema* dest) { assert(dest != src); assert(!ArrowSchemaIsReleased(src)); memcpy(dest, src, sizeof(struct ArrowSchema)); ArrowSchemaMarkReleased(src); } /// Release the C schema, if necessary, by calling its release callback static inline void ArrowSchemaRelease(struct ArrowSchema* schema) { if (!ArrowSchemaIsReleased(schema)) { schema->release(schema); ARROW_C_ASSERT(ArrowSchemaIsReleased(schema), "ArrowSchemaRelease did not cleanup release callback"); } } /// Query whether the C array is released static inline int ArrowArrayIsReleased(const struct ArrowArray* array) { return array->release == NULL; } static inline int ArrowDeviceArrayIsReleased(const struct ArrowDeviceArray* array) { return ArrowArrayIsReleased(&array->array); } /// Mark the C array released (for use in release callbacks) static inline void ArrowArrayMarkReleased(struct ArrowArray* array) { array->release = NULL; } static inline void ArrowDeviceArrayMarkReleased(struct ArrowDeviceArray* array) { ArrowArrayMarkReleased(&array->array); } /// Move the C array from `src` to `dest` /// /// Note `dest` must *not* point to a valid array already, otherwise there /// will be a memory leak. static inline void ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dest) { assert(dest != src); assert(!ArrowArrayIsReleased(src)); memcpy(dest, src, sizeof(struct ArrowArray)); ArrowArrayMarkReleased(src); } static inline void ArrowDeviceArrayMove(struct ArrowDeviceArray* src, struct ArrowDeviceArray* dest) { assert(dest != src); assert(!ArrowDeviceArrayIsReleased(src)); memcpy(dest, src, sizeof(struct ArrowDeviceArray)); ArrowDeviceArrayMarkReleased(src); } /// Release the C array, if necessary, by calling its release callback static inline void ArrowArrayRelease(struct ArrowArray* array) { if (!ArrowArrayIsReleased(array)) { array->release(array); ARROW_C_ASSERT(ArrowArrayIsReleased(array), "ArrowArrayRelease did not cleanup release callback"); } } static inline void ArrowDeviceArrayRelease(struct ArrowDeviceArray* array) { if (!ArrowDeviceArrayIsReleased(array)) { array->array.release(&array->array); ARROW_C_ASSERT(ArrowDeviceArrayIsReleased(array), "ArrowDeviceArrayRelease did not cleanup release callback"); } } /// Query whether the C array stream is released static inline int ArrowArrayStreamIsReleased(const struct ArrowArrayStream* stream) { return stream->release == NULL; } static inline int ArrowDeviceArrayStreamIsReleased(const struct ArrowDeviceArrayStream* stream) { return stream->release == NULL; } /// Mark the C array stream released (for use in release callbacks) static inline void ArrowArrayStreamMarkReleased(struct ArrowArrayStream* stream) { stream->release = NULL; } static inline void ArrowDeviceArrayStreamMarkReleased(struct ArrowDeviceArrayStream* stream) { stream->release = NULL; } /// Move the C array stream from `src` to `dest` /// /// Note `dest` must *not* point to a valid stream already, otherwise there /// will be a memory leak. static inline void ArrowArrayStreamMove(struct ArrowArrayStream* src, struct ArrowArrayStream* dest) { assert(dest != src); assert(!ArrowArrayStreamIsReleased(src)); memcpy(dest, src, sizeof(struct ArrowArrayStream)); ArrowArrayStreamMarkReleased(src); } static inline void ArrowDeviceArrayStreamMove(struct ArrowDeviceArrayStream* src, struct ArrowDeviceArrayStream* dest) { assert(dest != src); assert(!ArrowDeviceArrayStreamIsReleased(src)); memcpy(dest, src, sizeof(struct ArrowDeviceArrayStream)); ArrowDeviceArrayStreamMarkReleased(src); } /// Release the C array stream, if necessary, by calling its release callback static inline void ArrowArrayStreamRelease(struct ArrowArrayStream* stream) { if (!ArrowArrayStreamIsReleased(stream)) { stream->release(stream); ARROW_C_ASSERT(ArrowArrayStreamIsReleased(stream), "ArrowArrayStreamRelease did not cleanup release callback"); } } static inline void ArrowDeviceArrayStreamRelease(struct ArrowDeviceArrayStream* stream) { if (!ArrowDeviceArrayStreamIsReleased(stream)) { stream->release(stream); ARROW_C_ASSERT(ArrowDeviceArrayStreamIsReleased(stream), "ArrowDeviceArrayStreamRelease did not cleanup release callback"); } } #ifdef __cplusplus } #endif arrow-go-18.2.0/arrow/cdata/import_allocator.go000066400000000000000000000031251476434502500214610ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package cdata import ( "sync/atomic" "unsafe" "github.com/apache/arrow-go/v18/arrow/internal/debug" ) // #include "helpers.h" // #include import "C" type importAllocator struct { bufCount int64 arr *CArrowArray } func (i *importAllocator) addBuffer() { atomic.AddInt64(&i.bufCount, 1) } func (*importAllocator) Allocate(int) []byte { panic("cannot allocate from importAllocator") } func (*importAllocator) Reallocate(int, []byte) []byte { panic("cannot reallocate from importAllocator") } func (i *importAllocator) Free([]byte) { debug.Assert(atomic.LoadInt64(&i.bufCount) > 0, "too many releases") if atomic.AddInt64(&i.bufCount, -1) == 0 { defer C.free(unsafe.Pointer(i.arr)) C.ArrowArrayRelease(i.arr) if C.ArrowArrayIsReleased(i.arr) != 1 { panic("did not release C mem") } } } arrow-go-18.2.0/arrow/cdata/interface.go000066400000000000000000000404051476434502500200510ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build cgo // +build cgo package cdata import ( "context" "unsafe" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/arrio" "github.com/apache/arrow-go/v18/arrow/memory" "golang.org/x/xerrors" ) // SchemaFromPtr is a simple helper function to cast a uintptr to a *CArrowSchema func SchemaFromPtr(ptr uintptr) *CArrowSchema { return (*CArrowSchema)(unsafe.Pointer(ptr)) } // ArrayFromPtr is a simple helper function to cast a uintptr to a *CArrowArray func ArrayFromPtr(ptr uintptr) *CArrowArray { return (*CArrowArray)(unsafe.Pointer(ptr)) } // ImportCArrowField takes in an ArrowSchema from the C Data interface, it // will copy the metadata and type definitions rather than keep direct references // to them. It is safe to call C.ArrowSchemaRelease after receiving the field // from this function. func ImportCArrowField(out *CArrowSchema) (arrow.Field, error) { return importSchema(out) } // ImportCArrowSchema takes in the ArrowSchema from the C Data Interface, it // will copy the metadata and schema definitions over from the C object rather // than keep direct references to them. This function will call ArrowSchemaRelease // on the passed in schema regardless of whether or not there is an error returned. // // This version is intended to take in a schema for a record batch, which means // that the top level of the schema should be a struct of the schema fields. If // importing a single array's schema, then use ImportCArrowField instead. func ImportCArrowSchema(out *CArrowSchema) (*arrow.Schema, error) { ret, err := importSchema(out) if err != nil { return nil, err } return arrow.NewSchema(ret.Type.(*arrow.StructType).Fields(), &ret.Metadata), nil } // ImportCArrayWithType takes a pointer to a C Data ArrowArray and interprets the values // as an array with the given datatype. If err is not nil, then ArrowArrayRelease must still // be called on arr to release the memory. // // The underlying buffers will not be copied, but will instead be referenced directly // by the resulting array interface object. The passed in ArrowArray will have it's ownership // transferred to the resulting arrow.Array via ArrowArrayMove. The underlying array.Data // object that is owned by the Array will now be the owner of the memory pointer and // will call ArrowArrayRelease when it is released and garbage collected via runtime.SetFinalizer. // // NOTE: The array takes ownership of the underlying memory buffers via ArrowArrayMove, // it does not take ownership of the actual arr object itself. func ImportCArrayWithType(arr *CArrowArray, dt arrow.DataType) (arrow.Array, error) { imp, err := importCArrayAsType(arr, dt) if err != nil { return nil, err } defer imp.data.Release() return array.MakeFromData(imp.data), nil } // ImportCArray takes a pointer to both a C Data ArrowArray and C Data ArrowSchema in order // to import them into usable Go Objects. If err is not nil, then ArrowArrayRelease must still // be called on arr to release the memory. The ArrowSchemaRelease will be called on the passed in // schema regardless of whether there is an error or not. // // The Schema will be copied with the information used to populate the returned Field, complete // with metadata. The array will reference the same memory that is referred to by the ArrowArray // object and take ownership of it as per ImportCArrayWithType. The returned arrow.Array will // own the C memory and call ArrowArrayRelease when the array.Data object is cleaned up. // // NOTE: The array takes ownership of the underlying memory buffers via ArrowArrayMove, // it does not take ownership of the actual arr object itself. func ImportCArray(arr *CArrowArray, schema *CArrowSchema) (arrow.Field, arrow.Array, error) { field, err := importSchema(schema) if err != nil { return field, nil, err } ret, err := ImportCArrayWithType(arr, field.Type) return field, ret, err } // ImportCRecordBatchWithSchema is used for importing a Record Batch array when the schema // is already known such as when receiving record batches through a stream. // // All of the semantics regarding memory ownership are the same as when calling // ImportCRecordBatch directly with a schema. // // NOTE: The array takes ownership of the underlying memory buffers via ArrowArrayMove, // it does not take ownership of the actual arr object itself. func ImportCRecordBatchWithSchema(arr *CArrowArray, sc *arrow.Schema) (arrow.Record, error) { imp, err := importCArrayAsType(arr, arrow.StructOf(sc.Fields()...)) if err != nil { return nil, err } defer imp.data.Release() st := array.NewStructData(imp.data) defer st.Release() // now that we have our fields, we can split them out into the slice of arrays // and construct a record batch from them to return. cols := make([]arrow.Array, st.NumField()) for i := 0; i < st.NumField(); i++ { cols[i] = st.Field(i) } return array.NewRecord(sc, cols, int64(st.Len())), nil } // ImportCRecordBatch imports an ArrowArray from C as a record batch. If err is not nil, // then ArrowArrayRelease must still be called to release the memory. // // A record batch is represented in the C Data Interface as a Struct Array whose fields // are the columns of the record batch. Thus after importing the schema passed in here, // if it is not a Struct type, this will return an error. As with ImportCArray, the // columns in the record batch will take ownership of the CArrowArray memory if successful. // Since ArrowArrayMove is used, it's still safe to call ArrowArrayRelease on the source // regardless. But if there is an error, it *MUST* be called to ensure there is no memory leak. // // NOTE: The array takes ownership of the underlying memory buffers via ArrowArrayMove, // it does not take ownership of the actual arr object itself. func ImportCRecordBatch(arr *CArrowArray, sc *CArrowSchema) (arrow.Record, error) { field, err := importSchema(sc) if err != nil { return nil, err } if field.Type.ID() != arrow.STRUCT { return nil, xerrors.New("recordbatch array import must be of struct type") } return ImportCRecordBatchWithSchema(arr, arrow.NewSchema(field.Type.(*arrow.StructType).Fields(), &field.Metadata)) } // ImportCArrayStream creates an arrio.Reader from an ArrowArrayStream taking ownership // of the underlying stream object via ArrowArrayStreamMove. // // The records returned by this reader must be released manually after they are returned. // The reader itself will release the stream via SetFinalizer when it is garbage collected. // It will return (nil, io.EOF) from the Read function when there are no more records to return. // // NOTE: The reader takes ownership of the underlying memory buffers via ArrowArrayStreamMove, // it does not take ownership of the actual stream object itself. // // Deprecated: This will panic if importing the schema fails (which is possible). // Prefer ImportCRecordReader instead. func ImportCArrayStream(stream *CArrowArrayStream, schema *arrow.Schema) arrio.Reader { reader, err := ImportCRecordReader(stream, schema) if err != nil { panic(err) } return reader } // ImportCStreamReader creates an arrio.Reader from an ArrowArrayStream taking ownership // of the underlying stream object via ArrowArrayStreamMove. // // The records returned by this reader must be released manually after they are returned. // The reader itself will release the stream via SetFinalizer when it is garbage collected. // It will return (nil, io.EOF) from the Read function when there are no more records to return. // // NOTE: The reader takes ownership of the underlying memory buffers via ArrowArrayStreamMove, // it does not take ownership of the actual stream object itself. func ImportCRecordReader(stream *CArrowArrayStream, schema *arrow.Schema) (arrio.Reader, error) { out := &nativeCRecordBatchReader{schema: schema} if err := initReader(out, stream); err != nil { return nil, err } return out, nil } // ExportArrowSchema populates the passed in CArrowSchema with the schema passed in so // that it can be passed to some consumer of the C Data Interface. The `release` function // is tied to a callback in order to properly release any memory that was allocated during // the populating of the struct. Any memory allocated will be allocated using malloc // which means that it is invisible to the Go Garbage Collector and must be freed manually // using the callback on the CArrowSchema object. // // WARNING: the output ArrowSchema MUST BE ZERO INITIALIZED, or the Go garbage collector // may error at runtime, due to CGO rules ("the current implementation may sometimes // cause a runtime error if the contents of the C memory appear to be a Go pointer"). // You have been warned! func ExportArrowSchema(schema *arrow.Schema, out *CArrowSchema) { dummy := arrow.Field{Type: arrow.StructOf(schema.Fields()...), Metadata: schema.Metadata()} exportField(dummy, out) } // ExportArrowRecordBatch populates the passed in CArrowArray (and optionally the schema too) // by sharing the memory used for the buffers of each column's arrays. It does not // copy the data, and will internally increment the reference counters so that releasing // the record will not free the memory prematurely. // // When using CGO, memory passed to C is pinned so that the Go garbage collector won't // move where it is allocated out from under the C pointer locations, ensuring the C pointers // stay valid. This is only true until the CGO call returns, at which point the garbage collector // is free to move things around again. As a result, if the function you're calling is going to // hold onto the pointers or otherwise continue to reference the memory *after* the call returns, // you should use the CgoArrowAllocator rather than the GoAllocator (or DefaultAllocator) so that // the memory which is allocated for the record batch in the first place is allocated in C, // not by the Go runtime and is therefore not subject to the Garbage collection. // // The release function on the populated CArrowArray will properly decrease the reference counts, // and release the memory if the record has already been released. But since this must be explicitly // done, make sure it is released so that you do not create a memory leak. // // WARNING: the output ArrowArray MUST BE ZERO INITIALIZED, or the Go garbage collector // may error at runtime, due to CGO rules ("the current implementation may sometimes // cause a runtime error if the contents of the C memory appear to be a Go pointer"). // You have been warned! func ExportArrowRecordBatch(rb arrow.Record, out *CArrowArray, outSchema *CArrowSchema) { children := make([]arrow.ArrayData, rb.NumCols()) for i := range rb.Columns() { children[i] = rb.Column(i).Data() } data := array.NewData(arrow.StructOf(rb.Schema().Fields()...), int(rb.NumRows()), []*memory.Buffer{nil}, children, 0, 0) defer data.Release() arr := array.NewStructData(data) defer arr.Release() if outSchema != nil { ExportArrowSchema(rb.Schema(), outSchema) } exportArray(arr, out, nil) } // ExportArrowArray populates the CArrowArray that is passed in with the pointers to the memory // being used by the arrow.Array passed in, in order to share with zero-copy across the C // Data Interface. See the documentation for ExportArrowRecordBatch for details on how to ensure // you do not leak memory and prevent unwanted, undefined or strange behaviors. // // WARNING: the output ArrowArray MUST BE ZERO INITIALIZED, or the Go garbage collector // may error at runtime, due to CGO rules ("the current implementation may sometimes // cause a runtime error if the contents of the C memory appear to be a Go pointer"). // You have been warned! func ExportArrowArray(arr arrow.Array, out *CArrowArray, outSchema *CArrowSchema) { exportArray(arr, out, outSchema) } // ExportRecordReader populates the CArrowArrayStream that is passed in with the appropriate // callbacks to be a working ArrowArrayStream utilizing the passed in RecordReader. The // CArrowArrayStream takes ownership of the RecordReader until the consumer calls the release // callback, as such it is unnecessary to call Release on the passed in reader unless it has // previously been retained. // // WARNING: the output ArrowArrayStream MUST BE ZERO INITIALIZED, or the Go garbage // collector may error at runtime, due to CGO rules ("the current implementation may // sometimes cause a runtime error if the contents of the C memory appear to be a Go // pointer"). You have been warned! func ExportRecordReader(reader array.RecordReader, out *CArrowArrayStream) { exportStream(reader, out) } // ReleaseCArrowArray calls ArrowArrayRelease on the passed in cdata array func ReleaseCArrowArray(arr *CArrowArray) { releaseArr(arr) } // ReleaseCArrowSchema calls ArrowSchemaRelease on the passed in cdata schema func ReleaseCArrowSchema(schema *CArrowSchema) { releaseSchema(schema) } // RecordMessage is a simple container for a record batch channel to stream for // using the Async C Data Interface via ExportAsyncRecordBatchStream. type RecordMessage struct { Record arrow.Record AdditionalMetadata arrow.Metadata Err error } // AsyncRecordBatchStream represents a stream of record batches being read in // from an ArrowAsyncDeviceStreamHandler's callbacks. If an error was encountered // before the call to on_schema, then this will contain the error as Err. Otherwise // the Schema will be valid and the Stream is a channel of RecordMessages being // propagated via on_next_task and extract_data. type AsyncRecordBatchStream struct { Schema *arrow.Schema AdditionalMetadata arrow.Metadata Err error Stream <-chan RecordMessage } // AsyncStreamError represents an error encountered via a call to the on_error // callback of an ArrowAsyncDeviceStreamHandler. The Code is the error code that // should be errno compatible. type AsyncStreamError struct { Code int Msg string Metadata string } func (e AsyncStreamError) Error() string { return e.Msg } // CreateAsyncDeviceStreamHandler populates a given ArrowAsyncDeviceStreamHandler's callbacks // and waits for the on_schema callback to be called before passing the AsyncRecordBatchStream // object across the returned channel. // // The provided queueSize is the number of records that will be requested at a time to be passed // along the Stream in the returned AsyncRecordBatchStream. See the documentation on // https://arrow.apache.org/docs/format/CDeviceDataInterface.html for more information as to the // expected semantics of that size. // // The populated ArrowAsyncDeviceStreamHandler can then be given to any compatible provider for // async record batch streams via the C Device interface. func CreateAsyncDeviceStreamHandler(ctx context.Context, queueSize uint64, out *CArrowAsyncDeviceStreamHandler) <-chan AsyncRecordBatchStream { ch := make(chan AsyncRecordBatchStream) exportAsyncHandler(cAsyncState{ctx: ctx, ch: ch, queueSize: queueSize}, out) return ch } // ExportAsyncRecordBatchStream takes in a schema and a channel of RecordMessages along with a // ArrowAsyncDeviceStreamHandler to export the records as they come across the channel and call // the appropriate callbacks on the handler. This function will block until the stream is closed // or a message containing an error comes across the channel. // // The returned error will be nil if everything is successful, otherwise it will be the error which // is encountered on the stream or an AsyncError if one of the handler callbacks returns an error. func ExportAsyncRecordBatchStream(schema *arrow.Schema, stream <-chan RecordMessage, handler *CArrowAsyncDeviceStreamHandler) error { return exportAsyncProducer(schema, stream, handler) } arrow-go-18.2.0/arrow/cdata/test/000077500000000000000000000000001476434502500165365ustar00rootroot00000000000000arrow-go-18.2.0/arrow/cdata/test/test_cimport.go000066400000000000000000000115301476434502500216010ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build cdata_test // +build cdata_test package main import ( "fmt" "runtime" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/cdata" "github.com/apache/arrow-go/v18/arrow/memory" ) // #include import "C" var alloc = memory.NewCheckedAllocator(memory.NewGoAllocator()) //export totalAllocated func totalAllocated() int64 { return int64(alloc.CurrentAlloc()) } //export runGC func runGC() { runtime.GC() } //export importSchema func importSchema(ptr uintptr) { schema, err := cdata.ImportCArrowSchema(cdata.SchemaFromPtr(ptr)) if err != nil { panic(err) } expectedMetadata := arrow.NewMetadata([]string{"key1"}, []string{"value1"}) expectedSchema := arrow.NewSchema([]arrow.Field{{Name: "ints", Type: arrow.ListOf(arrow.PrimitiveTypes.Int32), Nullable: true}}, &expectedMetadata) if !schema.Equal(expectedSchema) { panic(fmt.Sprintf("schema didn't match: expected %s, got %s", expectedSchema, schema)) } if !schema.Metadata().Equal(expectedMetadata) { panic(fmt.Sprintf("metadata didn't match: expected %s, got %s", expectedMetadata, schema.Metadata())) } fmt.Println("schema matches! Huzzah!") } //export importRecordBatch func importRecordBatch(scptr, rbptr uintptr) { sc := cdata.SchemaFromPtr(scptr) rb := cdata.ArrayFromPtr(rbptr) rec, err := cdata.ImportCRecordBatch(rb, sc) if err != nil { panic(err) } defer rec.Release() expectedMetadata := arrow.NewMetadata([]string{"key1"}, []string{"value1"}) expectedSchema := arrow.NewSchema([]arrow.Field{{Name: "ints", Type: arrow.ListOf(arrow.PrimitiveTypes.Int32), Nullable: true}}, &expectedMetadata) bldr := array.NewRecordBuilder(alloc, expectedSchema) defer bldr.Release() lb := bldr.Field(0).(*array.ListBuilder) vb := lb.ValueBuilder().(*array.Int32Builder) // [[[1], [], None [2, 42]]] lb.Append(true) vb.Append(int32(1)) lb.Append(true) lb.Append(false) lb.Append(true) vb.AppendValues([]int32{2, 42}, nil) expectedRec := bldr.NewRecord() defer expectedRec.Release() if !array.RecordEqual(expectedRec, rec) { panic(fmt.Sprintf("records didn't match: expected %s\n got %s", expectedRec, rec)) } fmt.Println("record batch matches huzzah!") } func makeSchema() *arrow.Schema { meta := arrow.NewMetadata([]string{"key1"}, []string{"value1"}) return arrow.NewSchema([]arrow.Field{ {Name: "ints", Type: arrow.ListOf(arrow.PrimitiveTypes.Int32), Nullable: true}, }, &meta) } func makeBatch() arrow.Record { bldr := array.NewRecordBuilder(alloc, makeSchema()) defer bldr.Release() fbldr := bldr.Field(0).(*array.ListBuilder) valbldr := fbldr.ValueBuilder().(*array.Int32Builder) fbldr.Append(true) valbldr.Append(1) fbldr.Append(true) fbldr.AppendNull() fbldr.Append(true) valbldr.Append(2) valbldr.Append(42) return bldr.NewRecord() } //export exportSchema func exportSchema(schema uintptr) { cdata.ExportArrowSchema(makeSchema(), cdata.SchemaFromPtr(schema)) } //export exportRecordBatch func exportRecordBatch(schema, record uintptr) { batch := makeBatch() defer batch.Release() cdata.ExportArrowRecordBatch(batch, cdata.ArrayFromPtr(record), cdata.SchemaFromPtr(schema)) } //export importThenExportSchema func importThenExportSchema(input, output uintptr) { schema, err := cdata.ImportCArrowSchema(cdata.SchemaFromPtr(input)) if err != nil { panic(err) } cdata.ExportArrowSchema(schema, cdata.SchemaFromPtr(output)) } //export importThenExportRecord func importThenExportRecord(schemaIn, arrIn uintptr, schemaOut, arrOut uintptr) { rec, err := cdata.ImportCRecordBatch(cdata.ArrayFromPtr(arrIn), cdata.SchemaFromPtr(schemaIn)) if err != nil { panic(err) } defer rec.Release() cdata.ExportArrowRecordBatch(rec, cdata.ArrayFromPtr(arrOut), cdata.SchemaFromPtr(schemaOut)) } //export roundtripArray func roundtripArray(arrIn, schema, arrOut uintptr) { _, arr, err := cdata.ImportCArray(cdata.ArrayFromPtr(arrIn), cdata.SchemaFromPtr(schema)) if err != nil { panic(err) } defer arr.Release() outArr := cdata.ArrayFromPtr(arrOut) cdata.ExportArrowArray(arr, outArr, nil) } func main() {} arrow-go-18.2.0/arrow/cdata/test/test_export_to_cgo.py000066400000000000000000000201471476434502500230260ustar00rootroot00000000000000#!/usr/bin/env python3 # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import contextlib import gc import os import unittest import pyarrow as pa from pyarrow.cffi import ffi def load_cgotest(): # XXX what about Darwin? libext = 'so' if os.name == 'nt': libext = 'dll' ffi.cdef( """ long long totalAllocated(); void importSchema(uintptr_t ptr); void importRecordBatch(uintptr_t scptr, uintptr_t rbptr); void runGC(); void exportSchema(uintptr_t ptr); void exportRecordBatch(uintptr_t schema, uintptr_t record); void importThenExportSchema(uintptr_t input, uintptr_t output); void importThenExportRecord(uintptr_t schemaIn, uintptr_t arrIn, uintptr_t schemaOut, uintptr_t arrOut); void roundtripArray(uintptr_t arrIn, uintptr_t schema, uintptr_t arrOut); """) return ffi.dlopen(f'./cgotest.{libext}') cgotest = load_cgotest() class BaseTestGoPython(unittest.TestCase): def setUp(self): self.c_schema = ffi.new("struct ArrowSchema*") self.ptr_schema = int(ffi.cast("uintptr_t", self.c_schema)) self.c_array = ffi.new("struct ArrowArray*") self.ptr_array = int(ffi.cast("uintptr_t", self.c_array)) def make_schema(self): return pa.schema([('ints', pa.list_(pa.int32()))], metadata={b'key1': b'value1'}) def make_batch(self): return pa.record_batch([[[1], [], None, [2, 42]]], self.make_schema()) def run_gc(self): # Several Go GC runs can be required to run all finalizers for i in range(5): cgotest.runGC() gc.collect() @contextlib.contextmanager def assert_pyarrow_memory_released(self): self.run_gc() old_allocated = pa.total_allocated_bytes() old_go_allocated = cgotest.totalAllocated() yield self.run_gc() diff = pa.total_allocated_bytes() - old_allocated godiff = cgotest.totalAllocated() - old_go_allocated self.assertEqual( pa.total_allocated_bytes(), old_allocated, f"PyArrow memory was not adequately released: {diff} bytes lost") self.assertEqual( cgotest.totalAllocated(), old_go_allocated, f"Go memory was not properly released: {godiff} bytes lost") class TestPythonToGo(BaseTestGoPython): def test_schema(self): with self.assert_pyarrow_memory_released(): self.make_schema()._export_to_c(self.ptr_schema) # Will panic if expectations are not met cgotest.importSchema(self.ptr_schema) def test_record_batch(self): with self.assert_pyarrow_memory_released(): self.make_schema()._export_to_c(self.ptr_schema) self.make_batch()._export_to_c(self.ptr_array) # Will panic if expectations are not met cgotest.importRecordBatch(self.ptr_schema, self.ptr_array) class TestGoToPython(BaseTestGoPython): def test_get_schema(self): with self.assert_pyarrow_memory_released(): cgotest.exportSchema(self.ptr_schema) sc = pa.Schema._import_from_c(self.ptr_schema) assert sc == self.make_schema() def test_get_batch(self): with self.assert_pyarrow_memory_released(): cgotest.exportRecordBatch(self.ptr_schema, self.ptr_array) arrnew = pa.RecordBatch._import_from_c(self.ptr_array, self.ptr_schema) assert arrnew == self.make_batch() del arrnew class TestRoundTrip(BaseTestGoPython): def test_schema_roundtrip(self): with self.assert_pyarrow_memory_released(): # make sure that Python -> Go -> Python ends up with # the same exact schema schema = self.make_schema() schema._export_to_c(self.ptr_schema) del schema c_schema = ffi.new("struct ArrowSchema*") ptr_schema = int(ffi.cast("uintptr_t", c_schema)) cgotest.importThenExportSchema(self.ptr_schema, ptr_schema) schema_new = pa.Schema._import_from_c(ptr_schema) assert schema_new == self.make_schema() del c_schema def test_batch_roundtrip(self): with self.assert_pyarrow_memory_released(): # make sure that Python -> Go -> Python for record # batches works correctly and gets the same data in the end schema = self.make_schema() batch = self.make_batch() schema._export_to_c(self.ptr_schema) batch._export_to_c(self.ptr_array) del schema del batch c_schema = ffi.new("struct ArrowSchema*") c_batch = ffi.new("struct ArrowArray*") ptr_schema = int(ffi.cast("uintptr_t", c_schema)) ptr_batch = int(ffi.cast("uintptr_t", c_batch)) cgotest.importThenExportRecord(self.ptr_schema, self.ptr_array, ptr_schema, ptr_batch) batch_new = pa.RecordBatch._import_from_c(ptr_batch, ptr_schema) assert batch_new == self.make_batch() del batch_new del c_schema del c_batch # commented out types can be uncommented after # GH-14875 is addressed _test_pyarrow_types = [ pa.null(), pa.bool_(), pa.int32(), pa.time32("s"), pa.time64("us"), pa.date32(), pa.timestamp("us"), pa.timestamp("us", tz="UTC"), pa.timestamp("us", tz="Europe/Paris"), pa.duration("s"), pa.duration("ms"), pa.duration("us"), pa.duration("ns"), pa.float16(), pa.float32(), pa.float64(), pa.decimal128(19, 4), pa.string(), pa.binary(), pa.binary(10), pa.large_string(), pa.large_binary(), pa.list_(pa.int32()), pa.list_(pa.int32(), 2), pa.large_list(pa.uint16()), pa.struct([ pa.field("a", pa.int32()), pa.field("b", pa.int8()), pa.field("c", pa.string()), ]), pa.struct([ pa.field("a", pa.int32(), nullable=False), pa.field("b", pa.int8(), nullable=False), pa.field("c", pa.string()), ]), pa.dictionary(pa.int8(), pa.int64()), pa.dictionary(pa.int8(), pa.string()), pa.map_(pa.string(), pa.int32()), pa.map_(pa.int64(), pa.int32()), # pa.run_end_encoded(pa.int16(), pa.int64()), ] def test_empty_roundtrip(self): for typ in self._test_pyarrow_types: with self.subTest(typ=typ): with self.assert_pyarrow_memory_released(): a = pa.array([], typ) a._export_to_c(self.ptr_array) typ._export_to_c(self.ptr_schema) c_arr = ffi.new("struct ArrowArray*") ptr_arr = int(ffi.cast("uintptr_t", c_arr)) cgotest.roundtripArray(self.ptr_array, self.ptr_schema, ptr_arr) b = pa.Array._import_from_c(ptr_arr, typ) b.validate(full=True) assert a.to_pylist() == b.to_pylist() assert a.type == b.type del a del b if __name__ == '__main__': unittest.main(verbosity=2) arrow-go-18.2.0/arrow/cdata/trampoline.c000066400000000000000000000031541476434502500201000ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include "abi.h" int streamGetSchema(struct ArrowArrayStream*, struct ArrowSchema*); int streamGetNext(struct ArrowArrayStream*, struct ArrowArray*); int asyncTaskExtract(struct ArrowAsyncTask*, struct ArrowDeviceArray*); int streamGetSchemaTrampoline(struct ArrowArrayStream* stream, struct ArrowSchema* out) { // XXX(https://github.com/apache/arrow-adbc/issues/729) memset(out, 0, sizeof(*out)); return streamGetSchema(stream, out); } int streamGetNextTrampoline(struct ArrowArrayStream* stream, struct ArrowArray* out) { // XXX(https://github.com/apache/arrow-adbc/issues/729) memset(out, 0, sizeof(*out)); return streamGetNext(stream, out); } int asyncTaskExtractTrampoline(struct ArrowAsyncTask* task, struct ArrowDeviceArray* out) { memset(out, 0, sizeof(*out)); return asyncTaskExtract(task, out); } arrow-go-18.2.0/arrow/cdata/utils.h000066400000000000000000000031571476434502500170760ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // +build cgo // +build test // metadata keys 1: {"key1", "key2"} // metadata values 1: {"", "bar"} static const char kEncodedMeta1LE[] = { 2, 0, 0, 0, 4, 0, 0, 0, 'k', 'e', 'y', '1', 0, 0, 0, 0, 4, 0, 0, 0, 'k', 'e', 'y', '2', 3, 0, 0, 0, 'b', 'a', 'r'}; static const char kEncodedMeta1BE[] = { 0, 0, 0, 2, 0, 0, 0, 4, 'k', 'e', 'y', '1', 0, 0, 0, 0, 0, 0, 0, 4, 'k', 'e', 'y', '2', 0, 0, 0, 3, 'b', 'a', 'r'}; static const char* kMetadataKeys2[] = {"key"}; static const char* kMetadataValues2[] = {"abcde"}; // metadata keys 2: {"key"} // metadata values 2: {"abcde"} static const char kEncodedMeta2LE[] = { 1, 0, 0, 0, 3, 0, 0, 0, 'k', 'e', 'y', 5, 0, 0, 0, 'a', 'b', 'c', 'd', 'e'}; static const char kEncodedMeta2BE[] = { 0, 0, 0, 1, 0, 0, 0, 3, 'k', 'e', 'y', 0, 0, 0, 5, 'a', 'b', 'c', 'd', 'e'}; arrow-go-18.2.0/arrow/compare.go000066400000000000000000000106521476434502500164640ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package arrow import ( "reflect" ) type typeEqualsConfig struct { metadata bool } // TypeEqualOption is a functional option type used for configuring type // equality checks. type TypeEqualOption func(*typeEqualsConfig) // CheckMetadata is an option for TypeEqual that allows checking for metadata // equality besides type equality. It only makes sense for types with metadata. func CheckMetadata() TypeEqualOption { return func(cfg *typeEqualsConfig) { cfg.metadata = true } } // TypeEqual checks if two DataType are the same, optionally checking metadata // equality for STRUCT types. func TypeEqual(left, right DataType, opts ...TypeEqualOption) bool { var cfg typeEqualsConfig for _, opt := range opts { opt(&cfg) } switch { case left == nil || right == nil: return left == nil && right == nil case left.ID() != right.ID(): return false } switch l := left.(type) { case ExtensionType: return l.ExtensionEquals(right.(ExtensionType)) case *ListType: if !TypeEqual(l.Elem(), right.(*ListType).Elem(), opts...) { return false } if cfg.metadata && !l.elem.Metadata.Equal(right.(*ListType).elem.Metadata) { return false } return l.elem.Nullable == right.(*ListType).elem.Nullable case *FixedSizeListType: if !TypeEqual(l.Elem(), right.(*FixedSizeListType).Elem(), opts...) { return false } if cfg.metadata && !l.elem.Metadata.Equal(right.(*FixedSizeListType).elem.Metadata) { return false } return l.n == right.(*FixedSizeListType).n && l.elem.Nullable == right.(*FixedSizeListType).elem.Nullable case *MapType: if !TypeEqual(l.KeyType(), right.(*MapType).KeyType(), opts...) { return false } if !TypeEqual(l.ItemType(), right.(*MapType).ItemType(), opts...) { return false } if l.KeyField().Nullable != right.(*MapType).KeyField().Nullable { return false } if l.ItemField().Nullable != right.(*MapType).ItemField().Nullable { return false } if cfg.metadata { if !l.KeyField().Metadata.Equal(right.(*MapType).KeyField().Metadata) { return false } if !l.ItemField().Metadata.Equal(right.(*MapType).ItemField().Metadata) { return false } } return true case *StructType: r := right.(*StructType) switch { case len(l.fields) != len(r.fields): return false case !reflect.DeepEqual(l.index, r.index): return false } for i := range l.fields { leftField, rightField := l.fields[i], r.fields[i] switch { case leftField.Name != rightField.Name: return false case leftField.Nullable != rightField.Nullable: return false case !TypeEqual(leftField.Type, rightField.Type, opts...): return false case cfg.metadata && !leftField.Metadata.Equal(rightField.Metadata): return false } } return true case UnionType: r := right.(UnionType) if l.Mode() != r.Mode() { return false } if !reflect.DeepEqual(l.ChildIDs(), r.ChildIDs()) { return false } for i := range l.Fields() { leftField, rightField := l.Fields()[i], r.Fields()[i] switch { case leftField.Name != rightField.Name: return false case leftField.Nullable != rightField.Nullable: return false case !TypeEqual(leftField.Type, rightField.Type, opts...): return false case cfg.metadata && !leftField.Metadata.Equal(rightField.Metadata): return false case l.TypeCodes()[i] != r.TypeCodes()[i]: return false } } return true case *TimestampType: r := right.(*TimestampType) return l.Unit == r.Unit && l.TimeZone == r.TimeZone case *RunEndEncodedType: r := right.(*RunEndEncodedType) return TypeEqual(l.Encoded(), r.Encoded(), opts...) && TypeEqual(l.runEnds, r.runEnds, opts...) default: return reflect.DeepEqual(left, right) } } arrow-go-18.2.0/arrow/compare_test.go000066400000000000000000000304711476434502500175240ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package arrow import ( "testing" "time" ) func TestTypeEqual(t *testing.T) { tests := []struct { left, right DataType want bool checkMetadata bool }{ { nil, nil, true, false, }, { nil, PrimitiveTypes.Uint8, false, false, }, { PrimitiveTypes.Float32, nil, false, false, }, { PrimitiveTypes.Float64, PrimitiveTypes.Int32, false, false, }, { Null, Null, true, false, }, { Null, new(NullType), true, false, }, { &BinaryType{}, &StringType{}, false, false, }, { &LargeBinaryType{}, &LargeStringType{}, false, false, }, { BinaryTypes.LargeBinary, &LargeBinaryType{}, true, false, }, { BinaryTypes.LargeString, &LargeStringType{}, true, false, }, { &Time32Type{Unit: Second}, &Time32Type{Unit: Second}, true, false, }, { &Time32Type{Unit: Millisecond}, &Time32Type{Unit: Second}, false, false, }, { &Time64Type{Unit: Nanosecond}, &Time64Type{Unit: Nanosecond}, true, false, }, { &Time64Type{Unit: Nanosecond}, &Time64Type{Unit: Microsecond}, false, false, }, { &TimestampType{Unit: Second, TimeZone: "UTC"}, &TimestampType{Unit: Second, TimeZone: "UTC"}, true, false, }, { &TimestampType{Unit: Microsecond, TimeZone: "UTC"}, &TimestampType{Unit: Millisecond, TimeZone: "UTC"}, false, false, }, { &TimestampType{Unit: Second, TimeZone: "UTC"}, &TimestampType{Unit: Second, TimeZone: "CET"}, false, false, }, { &TimestampType{Unit: Second, TimeZone: "UTC"}, &TimestampType{Unit: Nanosecond, TimeZone: "CET"}, false, false, }, { &ListType{elem: Field{Type: PrimitiveTypes.Uint64}}, &ListType{elem: Field{Type: PrimitiveTypes.Uint64}}, true, false, }, { &ListType{elem: Field{Type: PrimitiveTypes.Uint64}}, &ListType{elem: Field{Type: PrimitiveTypes.Uint32}}, false, false, }, { &ListType{elem: Field{Type: &Time32Type{Unit: Millisecond}}}, &ListType{elem: Field{Type: &Time32Type{Unit: Millisecond}}}, true, false, }, { &ListType{elem: Field{Type: &Time32Type{Unit: Millisecond}}}, &ListType{elem: Field{Type: &Time32Type{Unit: Second}}}, false, false, }, { &ListType{elem: Field{Type: &ListType{elem: Field{Type: PrimitiveTypes.Uint16}}}}, &ListType{elem: Field{Type: &ListType{elem: Field{Type: PrimitiveTypes.Uint16}}}}, true, false, }, { &ListType{elem: Field{Type: &ListType{elem: Field{Type: PrimitiveTypes.Uint16}}}}, &ListType{elem: Field{Type: &ListType{elem: Field{Type: PrimitiveTypes.Uint8}}}}, false, false, }, { &ListType{elem: Field{Type: &ListType{elem: Field{Type: &ListType{elem: Field{Type: PrimitiveTypes.Uint16}}}}}}, &ListType{elem: Field{Type: &ListType{elem: Field{Type: PrimitiveTypes.Uint8}}}}, false, false, }, { &ListType{elem: Field{Type: PrimitiveTypes.Uint64, Nullable: true}}, &ListType{elem: Field{Type: PrimitiveTypes.Uint64, Nullable: false}}, false, true, }, { &FixedSizeListType{n: 2, elem: Field{Type: PrimitiveTypes.Uint64, Nullable: false}}, &FixedSizeListType{n: 3, elem: Field{Type: PrimitiveTypes.Uint64, Nullable: false}}, false, true, }, { &FixedSizeListType{n: 2, elem: Field{Type: PrimitiveTypes.Uint64, Nullable: false}}, &FixedSizeListType{n: 2, elem: Field{Type: PrimitiveTypes.Uint64, Nullable: false}}, true, true, }, { &FixedSizeListType{n: 2, elem: Field{Type: PrimitiveTypes.Uint64, Nullable: false}}, &FixedSizeListType{n: 2, elem: Field{Type: PrimitiveTypes.Uint64, Nullable: true}}, false, true, }, { &StructType{ fields: []Field{ {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, }, index: map[string][]int{"f1": {0}}, }, &StructType{ fields: []Field{ {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, }, index: map[string][]int{"f1": {0}}, }, false, true, }, { &StructType{ fields: []Field{ {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: false}, }, index: map[string][]int{"f1": {0}}, }, &StructType{ fields: []Field{ {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, }, index: map[string][]int{"f1": {0}}, }, false, false, }, { &StructType{ fields: []Field{ {Name: "f0", Type: PrimitiveTypes.Uint32, Nullable: true}, }, index: map[string][]int{"f0": {0}}, }, &StructType{ fields: []Field{ {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, }, index: map[string][]int{"f1": {0}}, }, false, false, }, { &StructType{ fields: []Field{ {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, }, index: map[string][]int{"f1": {0}}, }, &StructType{ fields: []Field{ {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, {Name: "f2", Type: PrimitiveTypes.Uint32, Nullable: true}, }, index: map[string][]int{"f1": {0}, "f2": {1}}, }, false, true, }, { &StructType{ fields: []Field{ {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, }, index: map[string][]int{"f1": {0}}, }, &StructType{ fields: []Field{ {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, {Name: "f2", Type: PrimitiveTypes.Uint32, Nullable: true}, }, index: map[string][]int{"f1": {0}, "f2": {1}}, }, false, false, }, { &StructType{ fields: []Field{ {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, }, index: map[string][]int{"f1": {0}}, }, &StructType{ fields: []Field{ {Name: "f2", Type: PrimitiveTypes.Uint32, Nullable: true}, }, index: map[string][]int{"f2": {0}}, }, false, false, }, { &StructType{ fields: []Field{ {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, {Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false}, }, index: map[string][]int{"f1": {0}, "f2": {1}}, }, &StructType{ fields: []Field{ {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, {Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false}, }, index: map[string][]int{"f1": {0}, "f2": {1}}, }, true, false, }, { &StructType{ fields: []Field{ {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, {Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false}, }, index: map[string][]int{"f1": {0}, "f2": {1}}, }, &StructType{ fields: []Field{ {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, {Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false}, }, index: map[string][]int{"f1": {0}, "f2": {1}}, }, true, false, }, { &StructType{ fields: []Field{ {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, {Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false}, }, index: map[string][]int{"f1": {0}, "f2": {1}}, meta: MetadataFrom(map[string]string{"k1": "v1", "k2": "v2"}), }, &StructType{ fields: []Field{ {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, {Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false}, }, index: map[string][]int{"f1": {0}, "f2": {1}}, meta: MetadataFrom(map[string]string{"k2": "v2", "k1": "v1"}), }, true, true, }, { &StructType{ fields: []Field{ {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, }, index: map[string][]int{"f1": {0}}, meta: MetadataFrom(map[string]string{"k1": "v1"}), }, &StructType{ fields: []Field{ {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, }, index: map[string][]int{"f1": {0}}, meta: MetadataFrom(map[string]string{"k1": "v2"}), }, true, false, }, { &StructType{ fields: []Field{ {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true, Metadata: MetadataFrom(map[string]string{"k1": "v1"})}, {Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false}, }, index: map[string][]int{"f1": {0}, "f2": {1}}, }, &StructType{ fields: []Field{ {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true, Metadata: MetadataFrom(map[string]string{"k1": "v2"})}, {Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false}, }, index: map[string][]int{"f1": {0}, "f2": {1}}, }, false, true, }, { &StructType{ fields: []Field{ {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, }, index: map[string][]int{"f1": {0, 1}}, }, &StructType{ fields: []Field{ {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, }, index: map[string][]int{"f1": {0, 1}}, }, true, true, }, { &StructType{ fields: []Field{ {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, }, index: map[string][]int{"f1": {0, 1}}, }, &StructType{ fields: []Field{ {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, }, index: map[string][]int{"f1": {0, 1}}, }, false, true, }, { MapOf(BinaryTypes.String, PrimitiveTypes.Int32), MapOf(BinaryTypes.String, PrimitiveTypes.Int32), true, false, }, { MapOf(PrimitiveTypes.Int32, FixedWidthTypes.Timestamp_ns), MapOf(PrimitiveTypes.Int32, FixedWidthTypes.Timestamp_ns), true, false, }, { MapOf(BinaryTypes.String, &TimestampType{ Unit: 0, TimeZone: "UTC", loc: time.UTC, }), MapOf(BinaryTypes.String, &TimestampType{ Unit: 0, TimeZone: "UTC", }), true, false, }, { MapOf(PrimitiveTypes.Int32, FixedWidthTypes.Timestamp_ns), MapOf(PrimitiveTypes.Int32, FixedWidthTypes.Timestamp_us), false, false, }, { MapOf(BinaryTypes.String, FixedWidthTypes.Timestamp_ns), MapOf(PrimitiveTypes.Int32, FixedWidthTypes.Timestamp_ns), false, false, }, { MapOfWithMetadata(BinaryTypes.String, MetadataFrom(map[string]string{"key": "v1"}), FixedWidthTypes.Timestamp_ns, MetadataFrom(map[string]string{"item": "v1"})), MapOfWithMetadata(BinaryTypes.String, MetadataFrom(map[string]string{"key": "v1"}), FixedWidthTypes.Timestamp_ns, MetadataFrom(map[string]string{"item": "v1"})), true, true, }, { MapOfWithMetadata(BinaryTypes.String, MetadataFrom(map[string]string{"key": "v1"}), FixedWidthTypes.Timestamp_ns, MetadataFrom(map[string]string{"item": "v1"})), MapOfWithMetadata(BinaryTypes.String, MetadataFrom(map[string]string{"key": "v2"}), FixedWidthTypes.Timestamp_ns, MetadataFrom(map[string]string{"item": "v2"})), true, false, }, { MapOfWithMetadata(BinaryTypes.String, MetadataFrom(map[string]string{"key": "v1"}), FixedWidthTypes.Timestamp_ns, MetadataFrom(map[string]string{"item": "v1"})), MapOfWithMetadata(BinaryTypes.String, MetadataFrom(map[string]string{"key": "v1"}), FixedWidthTypes.Timestamp_ns, MetadataFrom(map[string]string{"item": "v2"})), false, true, }, { MapOfWithMetadata(BinaryTypes.String, MetadataFrom(map[string]string{"key": "v1"}), FixedWidthTypes.Timestamp_ns, MetadataFrom(map[string]string{"item": "v1"})), MapOfWithMetadata(BinaryTypes.String, MetadataFrom(map[string]string{"key": "v2"}), FixedWidthTypes.Timestamp_ns, MetadataFrom(map[string]string{"item": "v1"})), false, true, }, } for _, test := range tests { t.Run("", func(t *testing.T) { var got bool if test.checkMetadata { got = TypeEqual(test.left, test.right, CheckMetadata()) } else { got = TypeEqual(test.left, test.right) } if got != test.want { t.Fatalf("TypeEqual(%v, %v, %v): got=%v, want=%v", test.left, test.right, test.checkMetadata, got, test.want) } }) } } arrow-go-18.2.0/arrow/compute/000077500000000000000000000000001476434502500161575ustar00rootroot00000000000000arrow-go-18.2.0/arrow/compute/arithmetic.go000066400000000000000000001231511476434502500206420ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.18 package compute import ( "context" "fmt" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/compute/exec" "github.com/apache/arrow-go/v18/arrow/compute/internal/kernels" "github.com/apache/arrow-go/v18/arrow/decimal128" "github.com/apache/arrow-go/v18/arrow/decimal256" "github.com/apache/arrow-go/v18/arrow/scalar" ) type ( RoundOptions = kernels.RoundOptions RoundMode = kernels.RoundMode RoundToMultipleOptions = kernels.RoundToMultipleOptions ) const ( // Round to nearest integer less than or equal in magnitude (aka "floor") RoundDown = kernels.RoundDown // Round to nearest integer greater than or equal in magnitude (aka "ceil") RoundUp = kernels.RoundUp // Get integral part without fractional digits (aka "trunc") RoundTowardsZero = kernels.TowardsZero // Round negative values with DOWN and positive values with UP RoundTowardsInfinity = kernels.AwayFromZero // Round ties with DOWN (aka "round half towards negative infinity") RoundHalfDown = kernels.HalfDown // Round ties with UP (aka "round half towards positive infinity") RoundHalfUp = kernels.HalfUp // Round ties with TowardsZero (aka "round half away from infinity") RoundHalfTowardsZero = kernels.HalfTowardsZero // Round ties with AwayFromZero (aka "round half towards infinity") RoundHalfTowardsInfinity = kernels.HalfAwayFromZero // Round ties to nearest even integer RoundHalfToEven = kernels.HalfToEven // Round ties to nearest odd integer RoundHalfToOdd = kernels.HalfToOdd ) var ( DefaultRoundOptions = RoundOptions{NDigits: 0, Mode: RoundHalfToEven} DefaultRoundToMultipleOptions = RoundToMultipleOptions{ Multiple: scalar.NewFloat64Scalar(1), Mode: RoundHalfToEven} ) type arithmeticFunction struct { ScalarFunction promote decimalPromotion } func (fn *arithmeticFunction) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error) { return execInternal(ctx, fn, opts, -1, args...) } func (fn *arithmeticFunction) checkDecimals(vals ...arrow.DataType) error { if !hasDecimal(vals...) { return nil } if len(vals) != 2 { return nil } if fn.promote == decPromoteNone { return fmt.Errorf("%w: invalid decimal function: %s", arrow.ErrInvalid, fn.name) } return castBinaryDecimalArgs(fn.promote, vals...) } func (fn *arithmeticFunction) DispatchBest(vals ...arrow.DataType) (exec.Kernel, error) { if err := fn.checkArity(len(vals)); err != nil { return nil, err } if err := fn.checkDecimals(vals...); err != nil { return nil, err } if kn, err := fn.DispatchExact(vals...); err == nil { return kn, nil } ensureDictionaryDecoded(vals...) // only promote types for binary funcs if len(vals) == 2 { replaceNullWithOtherType(vals...) if unit, istime := commonTemporalResolution(vals...); istime { replaceTemporalTypes(unit, vals...) } else { if dt := commonNumeric(vals...); dt != nil { replaceTypes(dt, vals...) } } } return fn.DispatchExact(vals...) } // an arithmetic function which promotes integers and decimal // arguments to doubles. type arithmeticFloatingPointFunc struct { arithmeticFunction } func (fn *arithmeticFloatingPointFunc) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error) { return execInternal(ctx, fn, opts, -1, args...) } func (fn *arithmeticFloatingPointFunc) DispatchBest(vals ...arrow.DataType) (exec.Kernel, error) { if err := fn.checkArity(len(vals)); err != nil { return nil, err } if kn, err := fn.DispatchExact(vals...); err == nil { return kn, nil } ensureDictionaryDecoded(vals...) if len(vals) == 2 { replaceNullWithOtherType(vals...) } for i, v := range vals { if arrow.IsInteger(v.ID()) || arrow.IsDecimal(v.ID()) { vals[i] = arrow.PrimitiveTypes.Float64 } } if dt := commonNumeric(vals...); dt != nil { replaceTypes(dt, vals...) } return fn.DispatchExact(vals...) } // function that promotes only decimal arguments to float64 type arithmeticDecimalToFloatingPointFunc struct { arithmeticFunction } func (fn *arithmeticDecimalToFloatingPointFunc) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error) { return execInternal(ctx, fn, opts, -1, args...) } func (fn *arithmeticDecimalToFloatingPointFunc) DispatchBest(vals ...arrow.DataType) (exec.Kernel, error) { if err := fn.checkArity(len(vals)); err != nil { return nil, err } if kn, err := fn.DispatchExact(vals...); err == nil { return kn, nil } ensureDictionaryDecoded(vals...) if len(vals) == 2 { replaceNullWithOtherType(vals...) } for i, t := range vals { if arrow.IsDecimal(t.ID()) { vals[i] = arrow.PrimitiveTypes.Float64 } } if dt := commonNumeric(vals...); dt != nil { replaceTypes(dt, vals...) } return fn.DispatchExact(vals...) } // function that promotes only integer arguments to float64 type arithmeticIntegerToFloatingPointFunc struct { arithmeticFunction } func (fn *arithmeticIntegerToFloatingPointFunc) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error) { return execInternal(ctx, fn, opts, -1, args...) } func (fn *arithmeticIntegerToFloatingPointFunc) DispatchBest(vals ...arrow.DataType) (exec.Kernel, error) { if err := fn.checkArity(len(vals)); err != nil { return nil, err } if err := fn.checkDecimals(vals...); err != nil { return nil, err } if kn, err := fn.DispatchExact(vals...); err == nil { return kn, nil } ensureDictionaryDecoded(vals...) if len(vals) == 2 { replaceNullWithOtherType(vals...) } for i, t := range vals { if arrow.IsInteger(t.ID()) { vals[i] = arrow.PrimitiveTypes.Float64 } } if dt := commonNumeric(vals...); dt != nil { replaceTypes(dt, vals...) } return fn.DispatchExact(vals...) } var ( absoluteValueUncheckedDoc = FunctionDoc{ Summary: "Calculate the absolute value of the argument, element-wise", Description: `Results will wrap around on integer overflow Use function "abs" if you want overflows to return an error`, ArgNames: []string{"x"}, } absoluteValueDoc = FunctionDoc{ Summary: "Calculate the absolute value of the argument element-wise", Description: `This function returns an error on overflow. For a variant that won't fail on overflow, use function "abs_unchecked"`, ArgNames: []string{"x"}, } addUncheckedDoc = FunctionDoc{ Summary: "Add the arguments element-wise", Description: `Results will wrap around on integer overflow Use the function "add" if you want overflow to return an error`, ArgNames: []string{"x", "y"}, } addDoc = FunctionDoc{ Summary: "Add the arguments element-wise", Description: `This function returns an error on overflow. For a variant that won't fail on overflow, use function "add_unchecked"`, ArgNames: []string{"x", "y"}, } subUncheckedDoc = FunctionDoc{ Summary: "Subtract the arguments element-wise", Description: `This Results will wrap around on integer overflow. Use the function "sub" if you want overflow to return an error`, ArgNames: []string{"x", "y"}, } subDoc = FunctionDoc{ Summary: "Subtract the arguments element-wise", Description: `This function returns an error on overflow. For a variant that won't fail on overflow, use the function "sub_unchecked"`, ArgNames: []string{"x", "y"}, } mulUncheckedDoc = FunctionDoc{ Summary: "Multiply the arguments element-wise", Description: `Results will wrap around on integer overflow. Use function "multiply" if you want overflow to return an error`, ArgNames: []string{"x", "y"}, } mulDoc = FunctionDoc{ Summary: "Multiply the arguments element-wise", Description: `This function returns an error on overflow. For a variant that won't fail on overflow, use the function "multiply_unchecked"`, ArgNames: []string{"x", "y"}, } divUncheckedDoc = FunctionDoc{ Summary: "Divide the arguments element-wise", Description: `Integer division by zero returns an error. However integer overflow wraps around, and floating-point division by zero returns Inf. Use the function "divide" if you want to get an error in all the aforementioned cases.`, ArgNames: []string{"dividend", "divisor"}, } divDoc = FunctionDoc{ Summary: "Divide the arguments element-wise", Description: `An error is returned when trying to divide by zero, or when integer overflow is encountered.`, ArgNames: []string{"dividend", "divisor"}, } negateUncheckedDoc = FunctionDoc{ Summary: "Negate the argument element-wise", Description: `Results will wrap around on integer overflow Use function "negate" if you want overflow to return an error`, ArgNames: []string{"x"}, } negateDoc = FunctionDoc{ Summary: "Negate the argument element-wise", Description: `This function returns an error on overflow. For a variant that doesn't fail on overflow, use the function "negate_unchecked".`, ArgNames: []string{"x"}, } powUncheckedDoc = FunctionDoc{ Summary: "Raise argument to a power element-wise", Description: `Integers to negative integer powers return an error. However, integer overflow wraps around. If either base or exponent is null the result will be null.`, ArgNames: []string{"base", "exponent"}, } powDoc = FunctionDoc{ Summary: "Raise argument to a power element-wise", Description: `An error is returned when an integer is raised to a negative power or an integer overflow occurs.`, ArgNames: []string{"base", "exponent"}, } sqrtUncheckedDoc = FunctionDoc{ Summary: "Takes the square root of arguments element-wise", Description: `A negative argument returns an NaN. For a variant that returns an error, use function "sqrt"`, ArgNames: []string{"x"}, } sqrtDoc = FunctionDoc{ Summary: "Takes the square root of arguments element-wise", Description: `A negative argument returns an error. For a variant that instead returns NaN, use function "sqrt_unchecked"`, ArgNames: []string{"x"}, } signDoc = FunctionDoc{ Summary: "Get the signedness of the arguments element-wise", Description: `Output is -1 if <0, 1 if >0 and 0 for 0. NaN values return NaN. Integral values return signedness as Int8, and floating-point values return it with the same type as the input values.`, ArgNames: []string{"x"}, } bitWiseNotDoc = FunctionDoc{ Summary: "Bit-wise negate the arguments element-wise", Description: "Null values return null", ArgNames: []string{"x"}, } bitWiseAndDoc = FunctionDoc{ Summary: "Bit-wise AND the arguments element-wise", Description: "Null values return null", ArgNames: []string{"x", "y"}, } bitWiseOrDoc = FunctionDoc{ Summary: "Bit-wise OR the arguments element-wise", Description: "Null values return null", ArgNames: []string{"x", "y"}, } bitWiseXorDoc = FunctionDoc{ Summary: "Bit-wise XOR the arguments element-wise", Description: "Null values return null", ArgNames: []string{"x", "y"}, } shiftLeftUncheckedDoc = FunctionDoc{ Summary: "Left shift `x` by `y`", Description: `The shift operates as if on the two's complement representation of the number. In other words, this is equivalent to multiplying "x" by 2 to the power of "y", even if overflow occurs. "x" is returned if "y" (the amount to shift by) is (1) negative or (2) greater than or equal to the precision of "x". Use function "shift_left" if you want an invalid shift amount to return an error.`, ArgNames: []string{"x", "y"}, } shiftLeftDoc = FunctionDoc{ Summary: "Left shift `x` by `y`", Description: `The shift operates as if on the two's complement representation of the number. In other words, this is equivalent to multiplying "x" by 2 to the power of "y", even if overflow occurs. An error is raised if "y" (the amount to shift by) is (1) negative or (2) greater than or equal to the precision of "x". See "shift_left_unchecked" for a variant that doesn't fail for an invalid shift amount.`, ArgNames: []string{"x", "y"}, } shiftRightUncheckedDoc = FunctionDoc{ Summary: "Right shift `x` by `y`", Description: `This is equivalent to dividing "x" by 2 to the power "y". "x" is returned if "y" (the amount to shift by) is: (1) negative or (2) greater than or equal to the precision of "x". Use function "shift_right" if you want an invalid shift amount to return an error.`, ArgNames: []string{"x", "y"}, } shiftRightDoc = FunctionDoc{ Summary: "Right shift `x` by `y`", Description: `This is equivalent to dividing "x" by 2 to the power "y". An error is raised if "y" (the amount to shift by) is (1) negative or (2) greater than or equal to the precision of "x". See "shift_right_unchecked" for a variant that doesn't fail for an invalid shift amount.`, ArgNames: []string{"x", "y"}, } sinUncheckedDoc = FunctionDoc{ Summary: "Compute the sine", Description: `NaN is returned for invalid input values; to raise an error instead, see "sin"`, ArgNames: []string{"x"}, } sinDoc = FunctionDoc{ Summary: "Compute the sine", Description: `Invalid input values raise an error; to return NaN instead, see "sin_unchecked".`, ArgNames: []string{"x"}, } cosUncheckedDoc = FunctionDoc{ Summary: "Compute the cosine", Description: `NaN is returned for invalid input values; to raise an error instead, see "cos".`, ArgNames: []string{"x"}, } cosDoc = FunctionDoc{ Summary: "Compute the cosine", Description: `Infinite values raise an error; to return NaN instead, see "cos_unchecked".`, ArgNames: []string{"x"}, } tanUncheckedDoc = FunctionDoc{ Summary: "Compute the tangent", Description: `NaN is returned for invalid input values; to raise an error instead see "tan".`, ArgNames: []string{"x"}, } tanDoc = FunctionDoc{ Summary: "Compute the tangent", Description: `Infinite values raise an error; to return NaN instead, see "tan_unchecked".`, ArgNames: []string{"x"}, } asinUncheckedDoc = FunctionDoc{ Summary: "Compute the inverse sine", Description: `NaN is returned for invalid input values; to raise an error instead, see "asin"`, ArgNames: []string{"x"}, } asinDoc = FunctionDoc{ Summary: "Compute the inverse sine", Description: `Invalid input values raise an error; to return NaN instead see asin_unchecked.`, ArgNames: []string{"x"}, } acosUncheckedDoc = FunctionDoc{ Summary: "Compute the inverse cosine", Description: `NaN is returned for invalid input values; to raise an error instead, see "acos".`, ArgNames: []string{"x"}, } acosDoc = FunctionDoc{ Summary: "Compute the inverse cosine", Description: `Invalid input values raise an error; to return NaN instead, see "acos_unchecked".`, ArgNames: []string{"x"}, } atanDoc = FunctionDoc{ Summary: "Compute the inverse tangent of x", Description: `The return value is in the range [-pi/2, pi/2]; for a full return range [-pi, pi], see "atan2"`, ArgNames: []string{"x"}, } atan2Doc = FunctionDoc{ Summary: "Compute the inverse tangent of y/x", Description: "The return value is in the range [-pi, pi].", ArgNames: []string{"y", "x"}, } lnUncheckedDoc = FunctionDoc{ Summary: "Compute natural logarithm", Description: `Non-positive values return -Inf or NaN. Null values return null. Use function "ln" if you want non-positive values to raise an error.`, ArgNames: []string{"x"}, } lnDoc = FunctionDoc{ Summary: "Compute natural logarithm", Description: `Non-positive values raise an error. Null values return null. Use function "ln_unchecked" if you want non-positive values to return -Inf or NaN`, ArgNames: []string{"x"}, } log10UncheckedDoc = FunctionDoc{ Summary: "Compute base 10 logarithm", Description: `Non-positive values return -Inf or NaN. Null values return null. Use function "log10" if you want non-positive values to raise an error.`, ArgNames: []string{"x"}, } log10Doc = FunctionDoc{ Summary: "Compute base 10 logarithm", Description: `Non-positive values raise an error. Null values return null. Use function "log10_unchecked" if you want non-positive values to return -Inf or NaN.`, ArgNames: []string{"x"}, } log2UncheckedDoc = FunctionDoc{ Summary: "Compute base 2 logarithm", Description: `Non-positive values return -Inf or NaN. Null values return null. Use function "log2" if you want non-positive values to raise an error.`, ArgNames: []string{"x"}, } log2Doc = FunctionDoc{ Summary: "Compute base 2 logarithm", Description: `Non-positive values raise an error. Null values return null. Use function "log2_unchecked" if you want non-positive values to return -Inf or NaN`, ArgNames: []string{"x"}, } log1pUncheckedDoc = FunctionDoc{ Summary: "Compute natural log of (1+x)", Description: `Values <= -1 return -Inf or NaN. Null values return null. This function may be more precise than log(1 + x) for x close to zero. Use function "log1p" if you want invalid values to raise an error.`, ArgNames: []string{"x"}, } log1pDoc = FunctionDoc{ Summary: "Compute natural log of (1+x)", Description: `Values <= -1 return -Inf or NaN. Null values return null. This function may be more precise than (1 + x) for x close to zero. Use function "log1p_unchecked" if you want invalid values to return -Inf or NaN.`, ArgNames: []string{"x"}, } logbUncheckedDoc = FunctionDoc{ Summary: "Compute base `b` logarithm", Description: `Values <= 0 return -Inf or NaN. Null values return null. Use function "logb" if you want non-positive values to raise an error.`, ArgNames: []string{"x", "b"}, } logbDoc = FunctionDoc{ Summary: "Compute base `b` logarithm", Description: `Values <= 0 returns an error. Null values return null. Use function "logb_unchecked" if you want non-positive values to return -Inf or NaN.`, ArgNames: []string{"x", "b"}, } floorDoc = FunctionDoc{ Summary: "Round down to the nearest integer", Description: "Compute the largest integer value not greater than `x`", ArgNames: []string{"x"}, } ceilDoc = FunctionDoc{ Summary: "Round up to the nearest integer", Description: "Compute the smallest integer value not less than `x`", ArgNames: []string{"x"}, } truncDoc = FunctionDoc{ Summary: "Compute the integral part", Description: "Compute the nearest integer not greater than `x`", ArgNames: []string{"x"}, } roundDoc = FunctionDoc{ Summary: "Round to a given precision", Description: `Options are used to control the number of digits and rounding mode. Default behavior is to round to the nearest integer and use half-to-even rule to break ties.`, ArgNames: []string{"x"}, OptionsType: "RoundOptions", } roundToMultipleDoc = FunctionDoc{ Summary: "Round to a given multiple", Description: `Options are used to control the rounding multiple and rounding mode. Default behavior is to round to the nearest integer and use half-to-even rule to break ties.`, ArgNames: []string{"x"}, OptionsType: "RoundToMultipleOptions", } ) func RegisterScalarArithmetic(reg FunctionRegistry) { ops := []struct { funcName string op kernels.ArithmeticOp decPromote decimalPromotion doc FunctionDoc }{ {"add_unchecked", kernels.OpAdd, decPromoteAdd, addUncheckedDoc}, {"add", kernels.OpAddChecked, decPromoteAdd, addDoc}, } for _, o := range ops { fn := &arithmeticFunction{*NewScalarFunction(o.funcName, Binary(), o.doc), o.decPromote} kns := append(kernels.GetArithmeticBinaryKernels(o.op), kernels.GetDecimalBinaryKernels(o.op)...) kns = append(kns, kernels.GetArithmeticFunctionTimeDuration(o.op)...) for _, k := range kns { if err := fn.AddKernel(k); err != nil { panic(err) } } for _, unit := range arrow.TimeUnitValues { inType := exec.NewMatchedInput(exec.TimestampTypeUnit(unit)) inDuration := exec.NewExactInput(&arrow.DurationType{Unit: unit}) ex := kernels.ArithmeticExecSameType(arrow.TIMESTAMP, o.op) err := fn.AddNewKernel([]exec.InputType{inType, inDuration}, kernels.OutputFirstType, ex, nil) if err != nil { panic(err) } err = fn.AddNewKernel([]exec.InputType{inDuration, inType}, kernels.OutputLastType, ex, nil) if err != nil { panic(err) } matchDur := exec.NewMatchedInput(exec.DurationTypeUnit(unit)) ex = kernels.ArithmeticExecSameType(arrow.DURATION, o.op) err = fn.AddNewKernel([]exec.InputType{matchDur, matchDur}, exec.NewOutputType(&arrow.DurationType{Unit: unit}), ex, nil) if err != nil { panic(err) } } reg.AddFunction(fn, false) } ops = []struct { funcName string op kernels.ArithmeticOp decPromote decimalPromotion doc FunctionDoc }{ {"sub_unchecked", kernels.OpSub, decPromoteAdd, subUncheckedDoc}, {"sub", kernels.OpSubChecked, decPromoteAdd, subDoc}, {"subtract_unchecked", kernels.OpSub, decPromoteAdd, subUncheckedDoc}, {"subtract", kernels.OpSubChecked, decPromoteAdd, subDoc}, } for _, o := range ops { fn := &arithmeticFunction{*NewScalarFunction(o.funcName, Binary(), o.doc), o.decPromote} kns := append(kernels.GetArithmeticBinaryKernels(o.op), kernels.GetDecimalBinaryKernels(o.op)...) kns = append(kns, kernels.GetArithmeticFunctionTimeDuration(o.op)...) for _, k := range kns { if err := fn.AddKernel(k); err != nil { panic(err) } } for _, unit := range arrow.TimeUnitValues { // timestamp - timestamp => duration inType := exec.NewMatchedInput(exec.TimestampTypeUnit(unit)) ex := kernels.ArithmeticExecSameType(arrow.TIMESTAMP, o.op) err := fn.AddNewKernel([]exec.InputType{inType, inType}, kernels.OutputResolveTemporal, ex, nil) if err != nil { panic(err) } // timestamp - duration => timestamp inDuration := exec.NewExactInput(&arrow.DurationType{Unit: unit}) ex = kernels.ArithmeticExecSameType(arrow.TIMESTAMP, o.op) err = fn.AddNewKernel([]exec.InputType{inType, inDuration}, kernels.OutputFirstType, ex, nil) if err != nil { panic(err) } // duration - duration = duration matchDur := exec.NewMatchedInput(exec.DurationTypeUnit(unit)) ex = kernels.ArithmeticExecSameType(arrow.DURATION, o.op) err = fn.AddNewKernel([]exec.InputType{matchDur, matchDur}, exec.NewOutputType(&arrow.DurationType{Unit: unit}), ex, nil) if err != nil { panic(err) } } // time32 - time32 = duration for _, unit := range []arrow.TimeUnit{arrow.Second, arrow.Millisecond} { inType := exec.NewMatchedInput(exec.Time32TypeUnit(unit)) internalEx := kernels.ArithmeticExecSameType(arrow.TIME32, o.op) ex := func(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { if err := internalEx(ctx, batch, out); err != nil { return err } // the allocated space is for duration (an int64) but we // wrote the time32 - time32 as if the output was time32 // so a quick copy in reverse expands the int32s to int64. rawData := arrow.GetData[int32](out.Buffers[1].Buf) outData := arrow.GetData[int64](out.Buffers[1].Buf) for i := out.Len - 1; i >= 0; i-- { outData[i] = int64(rawData[i]) } return nil } err := fn.AddNewKernel([]exec.InputType{inType, inType}, exec.NewOutputType(&arrow.DurationType{Unit: unit}), ex, nil) if err != nil { panic(err) } } // time64 - time64 = duration for _, unit := range []arrow.TimeUnit{arrow.Microsecond, arrow.Nanosecond} { inType := exec.NewMatchedInput(exec.Time64TypeUnit(unit)) ex := kernels.ArithmeticExecSameType(arrow.TIME64, o.op) err := fn.AddNewKernel([]exec.InputType{inType, inType}, exec.NewOutputType(&arrow.DurationType{Unit: unit}), ex, nil) if err != nil { panic(err) } } inDate32 := exec.NewExactInput(arrow.FixedWidthTypes.Date32) ex := kernels.SubtractDate32(o.op) err := fn.AddNewKernel([]exec.InputType{inDate32, inDate32}, exec.NewOutputType(arrow.FixedWidthTypes.Duration_s), ex, nil) if err != nil { panic(err) } inDate64 := exec.NewExactInput(arrow.FixedWidthTypes.Date64) ex = kernels.ArithmeticExecSameType(arrow.DATE64, o.op) err = fn.AddNewKernel([]exec.InputType{inDate64, inDate64}, exec.NewOutputType(arrow.FixedWidthTypes.Duration_ms), ex, nil) if err != nil { panic(err) } reg.AddFunction(fn, false) } oplist := []struct { funcName string op kernels.ArithmeticOp decPromote decimalPromotion doc FunctionDoc commutative bool }{ {"multiply_unchecked", kernels.OpMul, decPromoteMultiply, mulUncheckedDoc, true}, {"multiply", kernels.OpMulChecked, decPromoteMultiply, mulDoc, true}, {"divide_unchecked", kernels.OpDiv, decPromoteDivide, divUncheckedDoc, false}, {"divide", kernels.OpDivChecked, decPromoteDivide, divDoc, false}, } for _, o := range oplist { fn := &arithmeticFunction{*NewScalarFunction(o.funcName, Binary(), o.doc), o.decPromote} for _, k := range append(kernels.GetArithmeticBinaryKernels(o.op), kernels.GetDecimalBinaryKernels(o.op)...) { if err := fn.AddKernel(k); err != nil { panic(err) } } for _, unit := range arrow.TimeUnitValues { durInput := exec.NewExactInput(&arrow.DurationType{Unit: unit}) i64Input := exec.NewExactInput(arrow.PrimitiveTypes.Int64) durOutput := exec.NewOutputType(&arrow.DurationType{Unit: unit}) ex := kernels.ArithmeticExecSameType(arrow.DURATION, o.op) err := fn.AddNewKernel([]exec.InputType{durInput, i64Input}, durOutput, ex, nil) if err != nil { panic(err) } if o.commutative { err = fn.AddNewKernel([]exec.InputType{i64Input, durInput}, durOutput, ex, nil) if err != nil { panic(err) } } } reg.AddFunction(fn, false) } ops = []struct { funcName string op kernels.ArithmeticOp decPromote decimalPromotion doc FunctionDoc }{ {"abs_unchecked", kernels.OpAbsoluteValue, decPromoteNone, absoluteValueUncheckedDoc}, {"abs", kernels.OpAbsoluteValueChecked, decPromoteNone, absoluteValueDoc}, {"negate_unchecked", kernels.OpNegate, decPromoteNone, negateUncheckedDoc}, } for _, o := range ops { fn := &arithmeticFunction{*NewScalarFunction(o.funcName, Unary(), o.doc), decPromoteNone} kns := append(kernels.GetArithmeticUnaryKernels(o.op), kernels.GetDecimalUnaryKernels(o.op)...) for _, k := range kns { if err := fn.AddKernel(k); err != nil { panic(err) } } reg.AddFunction(fn, false) } fn := &arithmeticFunction{*NewScalarFunction("negate", Unary(), negateDoc), decPromoteNone} kns := append(kernels.GetArithmeticUnarySignedKernels(kernels.OpNegateChecked), kernels.GetDecimalUnaryKernels(kernels.OpNegateChecked)...) for _, k := range kns { if err := fn.AddKernel(k); err != nil { panic(err) } } reg.AddFunction(fn, false) ops = []struct { funcName string op kernels.ArithmeticOp decPromote decimalPromotion doc FunctionDoc }{ {"sqrt_unchecked", kernels.OpSqrt, decPromoteNone, sqrtUncheckedDoc}, {"sqrt", kernels.OpSqrtChecked, decPromoteNone, sqrtDoc}, {"sin_unchecked", kernels.OpSin, decPromoteNone, sinUncheckedDoc}, {"sin", kernels.OpSinChecked, decPromoteNone, sinDoc}, {"cos_unchecked", kernels.OpCos, decPromoteNone, cosUncheckedDoc}, {"cos", kernels.OpCosChecked, decPromoteNone, cosDoc}, {"tan_unchecked", kernels.OpTan, decPromoteNone, tanUncheckedDoc}, {"tan", kernels.OpTanChecked, decPromoteNone, tanDoc}, {"asin_unchecked", kernels.OpAsin, decPromoteNone, asinUncheckedDoc}, {"asin", kernels.OpAsinChecked, decPromoteNone, asinDoc}, {"acos_unchecked", kernels.OpAcos, decPromoteNone, acosUncheckedDoc}, {"acos", kernels.OpAcosChecked, decPromoteNone, acosDoc}, {"atan", kernels.OpAtan, decPromoteNone, atanDoc}, {"ln_unchecked", kernels.OpLn, decPromoteNone, lnUncheckedDoc}, {"ln", kernels.OpLnChecked, decPromoteNone, lnDoc}, {"log10_unchecked", kernels.OpLog10, decPromoteNone, log10UncheckedDoc}, {"log10", kernels.OpLog10Checked, decPromoteNone, log10Doc}, {"log2_unchecked", kernels.OpLog2, decPromoteNone, log2UncheckedDoc}, {"log2", kernels.OpLog2Checked, decPromoteNone, log2Doc}, {"log1p_unchecked", kernels.OpLog1p, decPromoteNone, log1pUncheckedDoc}, {"log1p", kernels.OpLog1pChecked, decPromoteNone, log1pDoc}, } for _, o := range ops { fn := &arithmeticFloatingPointFunc{arithmeticFunction{*NewScalarFunction(o.funcName, Unary(), o.doc), decPromoteNone}} kns := kernels.GetArithmeticUnaryFloatingPointKernels(o.op) for _, k := range kns { if err := fn.AddKernel(k); err != nil { panic(err) } } reg.AddFunction(fn, false) } ops = []struct { funcName string op kernels.ArithmeticOp decPromote decimalPromotion doc FunctionDoc }{ {"atan2", kernels.OpAtan2, decPromoteNone, atan2Doc}, {"logb_unchecked", kernels.OpLogb, decPromoteNone, logbUncheckedDoc}, {"logb", kernels.OpLogbChecked, decPromoteNone, logbDoc}, } for _, o := range ops { fn := &arithmeticFloatingPointFunc{arithmeticFunction{*NewScalarFunction(o.funcName, Binary(), addDoc), decPromoteNone}} kns := kernels.GetArithmeticFloatingPointKernels(o.op) for _, k := range kns { if err := fn.AddKernel(k); err != nil { panic(err) } } reg.AddFunction(fn, false) } fn = &arithmeticFunction{*NewScalarFunction("sign", Unary(), signDoc), decPromoteNone} kns = kernels.GetArithmeticUnaryFixedIntOutKernels(arrow.PrimitiveTypes.Int8, kernels.OpSign) for _, k := range kns { if err := fn.AddKernel(k); err != nil { panic(err) } } reg.AddFunction(fn, false) ops = []struct { funcName string op kernels.ArithmeticOp decPromote decimalPromotion doc FunctionDoc }{ {"power_unchecked", kernels.OpPower, decPromoteNone, powUncheckedDoc}, {"power", kernels.OpPowerChecked, decPromoteNone, powDoc}, } for _, o := range ops { fn := &arithmeticDecimalToFloatingPointFunc{arithmeticFunction{*NewScalarFunction(o.funcName, Binary(), o.doc), o.decPromote}} kns := kernels.GetArithmeticBinaryKernels(o.op) for _, k := range kns { if err := fn.AddKernel(k); err != nil { panic(err) } } reg.AddFunction(fn, false) } bitWiseOps := []struct { funcName string op kernels.BitwiseOp doc FunctionDoc }{ {"bit_wise_and", kernels.OpBitAnd, bitWiseAndDoc}, {"bit_wise_or", kernels.OpBitOr, bitWiseOrDoc}, {"bit_wise_xor", kernels.OpBitXor, bitWiseXorDoc}, } for _, o := range bitWiseOps { fn := &arithmeticFunction{*NewScalarFunction(o.funcName, Binary(), o.doc), decPromoteNone} kns := kernels.GetBitwiseBinaryKernels(o.op) for _, k := range kns { if err := fn.AddKernel(k); err != nil { panic(err) } } reg.AddFunction(fn, false) } fn = &arithmeticFunction{*NewScalarFunction("bit_wise_not", Unary(), bitWiseNotDoc), decPromoteNone} for _, k := range kernels.GetBitwiseUnaryKernels() { if err := fn.AddKernel(k); err != nil { panic(err) } } reg.AddFunction(fn, false) shiftOps := []struct { funcName string dir kernels.ShiftDir checked bool doc FunctionDoc }{ {"shift_left", kernels.ShiftLeft, true, shiftLeftDoc}, {"shift_left_unchecked", kernels.ShiftLeft, false, shiftLeftUncheckedDoc}, {"shift_right", kernels.ShiftRight, true, shiftRightDoc}, {"shift_right_unchecked", kernels.ShiftRight, false, shiftRightUncheckedDoc}, } for _, o := range shiftOps { fn := &arithmeticFunction{*NewScalarFunction(o.funcName, Binary(), o.doc), decPromoteNone} kns := kernels.GetShiftKernels(o.dir, o.checked) for _, k := range kns { if err := fn.AddKernel(k); err != nil { panic(err) } } reg.AddFunction(fn, false) } floorFn := &arithmeticIntegerToFloatingPointFunc{arithmeticFunction{*NewScalarFunction("floor", Unary(), floorDoc), decPromoteNone}} kns = kernels.GetSimpleRoundKernels(kernels.RoundDown) for _, k := range kns { if err := floorFn.AddKernel(k); err != nil { panic(err) } } floorFn.AddNewKernel([]exec.InputType{exec.NewIDInput(arrow.DECIMAL128)}, kernels.OutputFirstType, kernels.FixedRoundDecimalExec[decimal128.Num](kernels.RoundDown), nil) floorFn.AddNewKernel([]exec.InputType{exec.NewIDInput(arrow.DECIMAL256)}, kernels.OutputFirstType, kernels.FixedRoundDecimalExec[decimal256.Num](kernels.RoundDown), nil) reg.AddFunction(floorFn, false) ceilFn := &arithmeticIntegerToFloatingPointFunc{arithmeticFunction{*NewScalarFunction("ceil", Unary(), ceilDoc), decPromoteNone}} kns = kernels.GetSimpleRoundKernels(kernels.RoundUp) for _, k := range kns { if err := ceilFn.AddKernel(k); err != nil { panic(err) } } ceilFn.AddNewKernel([]exec.InputType{exec.NewIDInput(arrow.DECIMAL128)}, kernels.OutputFirstType, kernels.FixedRoundDecimalExec[decimal128.Num](kernels.RoundUp), nil) ceilFn.AddNewKernel([]exec.InputType{exec.NewIDInput(arrow.DECIMAL256)}, kernels.OutputFirstType, kernels.FixedRoundDecimalExec[decimal256.Num](kernels.RoundUp), nil) reg.AddFunction(ceilFn, false) truncFn := &arithmeticIntegerToFloatingPointFunc{arithmeticFunction{*NewScalarFunction("trunc", Unary(), truncDoc), decPromoteNone}} kns = kernels.GetSimpleRoundKernels(kernels.TowardsZero) for _, k := range kns { if err := truncFn.AddKernel(k); err != nil { panic(err) } } truncFn.AddNewKernel([]exec.InputType{exec.NewIDInput(arrow.DECIMAL128)}, kernels.OutputFirstType, kernels.FixedRoundDecimalExec[decimal128.Num](kernels.TowardsZero), nil) truncFn.AddNewKernel([]exec.InputType{exec.NewIDInput(arrow.DECIMAL256)}, kernels.OutputFirstType, kernels.FixedRoundDecimalExec[decimal256.Num](kernels.TowardsZero), nil) reg.AddFunction(truncFn, false) roundFn := &arithmeticIntegerToFloatingPointFunc{arithmeticFunction{*NewScalarFunction("round", Unary(), roundDoc), decPromoteNone}} kns = kernels.GetRoundUnaryKernels(kernels.InitRoundState, kernels.UnaryRoundExec) for _, k := range kns { if err := roundFn.AddKernel(k); err != nil { panic(err) } } roundFn.defaultOpts = DefaultRoundOptions reg.AddFunction(roundFn, false) roundToMultipleFn := &arithmeticIntegerToFloatingPointFunc{arithmeticFunction{*NewScalarFunction("round_to_multiple", Unary(), roundToMultipleDoc), decPromoteNone}} kns = kernels.GetRoundUnaryKernels(kernels.InitRoundToMultipleState, kernels.UnaryRoundToMultipleExec) for _, k := range kns { if err := roundToMultipleFn.AddKernel(k); err != nil { panic(err) } } roundToMultipleFn.defaultOpts = DefaultRoundToMultipleOptions reg.AddFunction(roundToMultipleFn, false) } func impl(ctx context.Context, fn string, opts ArithmeticOptions, left, right Datum) (Datum, error) { if opts.NoCheckOverflow { fn += "_unchecked" } return CallFunction(ctx, fn, nil, left, right) } // Add performs an addition between the passed in arguments (scalar or array) // and returns the result. If one argument is a scalar and the other is an // array, the scalar value is added to each value of the array. // // ArithmeticOptions specifies whether or not to check for overflows, // performance is faster if not explicitly checking for overflows but // will error on an overflow if NoCheckOverflow is false (default). func Add(ctx context.Context, opts ArithmeticOptions, left, right Datum) (Datum, error) { return impl(ctx, "add", opts, left, right) } // Sub performs a subtraction between the passed in arguments (scalar or array) // and returns the result. If one argument is a scalar and the other is an // array, the scalar value is subtracted from each value of the array. // // ArithmeticOptions specifies whether or not to check for overflows, // performance is faster if not explicitly checking for overflows but // will error on an overflow if NoCheckOverflow is false (default). func Subtract(ctx context.Context, opts ArithmeticOptions, left, right Datum) (Datum, error) { return impl(ctx, "sub", opts, left, right) } // Multiply performs a multiplication between the passed in arguments (scalar or array) // and returns the result. If one argument is a scalar and the other is an // array, the scalar value is multiplied against each value of the array. // // ArithmeticOptions specifies whether or not to check for overflows, // performance is faster if not explicitly checking for overflows but // will error on an overflow if NoCheckOverflow is false (default). func Multiply(ctx context.Context, opts ArithmeticOptions, left, right Datum) (Datum, error) { return impl(ctx, "multiply", opts, left, right) } // Divide performs a division between the passed in arguments (scalar or array) // and returns the result. If one argument is a scalar and the other is an // array, the scalar value is used with each value of the array. // // ArithmeticOptions specifies whether or not to check for overflows, // performance is faster if not explicitly checking for overflows but // will error on an overflow if NoCheckOverflow is false (default). // // Will error on divide by zero regardless of whether or not checking for // overflows. func Divide(ctx context.Context, opts ArithmeticOptions, left, right Datum) (Datum, error) { return impl(ctx, "divide", opts, left, right) } // AbsoluteValue returns the AbsoluteValue for each element in the input // argument. It accepts either a scalar or an array. // // ArithmeticOptions specifies whether or not to check for overflows, // performance is faster if not explicitly checking for overflows but // will error on an overflow if CheckOverflow is true. func AbsoluteValue(ctx context.Context, opts ArithmeticOptions, input Datum) (Datum, error) { fn := "abs" if opts.NoCheckOverflow { fn += "_unchecked" } return CallFunction(ctx, fn, nil, input) } // Negate returns a result containing the negation of each element in the // input argument. It accepts either a scalar or an array. // // ArithmeticOptions specifies whether or not to check for overflows, // or to throw an error on unsigned types. func Negate(ctx context.Context, opts ArithmeticOptions, input Datum) (Datum, error) { fn := "negate" if opts.NoCheckOverflow { fn += "_unchecked" } return CallFunction(ctx, fn, nil, input) } // Sign returns -1, 0, or 1 depending on the sign of each element in the // input. For x in the input: // // if x > 0: 1 // if x < 0: -1 // if x == 0: 0 func Sign(ctx context.Context, input Datum) (Datum, error) { return CallFunction(ctx, "sign", nil, input) } // Power returns base**exp for each element in the input arrays. Should work // for both Arrays and Scalars func Power(ctx context.Context, opts ArithmeticOptions, base, exp Datum) (Datum, error) { fn := "power" if opts.NoCheckOverflow { fn += "_unchecked" } return CallFunction(ctx, fn, nil, base, exp) } // ShiftLeft only accepts integral types and shifts each element of the // first argument to the left by the value of the corresponding element // in the second argument. // // The value to shift by should be >= 0 and < precision of the type. func ShiftLeft(ctx context.Context, opts ArithmeticOptions, lhs, rhs Datum) (Datum, error) { fn := "shift_left" if opts.NoCheckOverflow { fn += "_unchecked" } return CallFunction(ctx, fn, nil, lhs, rhs) } // ShiftRight only accepts integral types and shifts each element of the // first argument to the right by the value of the corresponding element // in the second argument. // // The value to shift by should be >= 0 and < precision of the type. func ShiftRight(ctx context.Context, opts ArithmeticOptions, lhs, rhs Datum) (Datum, error) { fn := "shift_right" if opts.NoCheckOverflow { fn += "_unchecked" } return CallFunction(ctx, fn, nil, lhs, rhs) } func Sin(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error) { fn := "sin" if opts.NoCheckOverflow { fn += "_unchecked" } return CallFunction(ctx, fn, nil, arg) } func Cos(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error) { fn := "cos" if opts.NoCheckOverflow { fn += "_unchecked" } return CallFunction(ctx, fn, nil, arg) } func Tan(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error) { fn := "tan" if opts.NoCheckOverflow { fn += "_unchecked" } return CallFunction(ctx, fn, nil, arg) } func Asin(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error) { fn := "asin" if opts.NoCheckOverflow { fn += "_unchecked" } return CallFunction(ctx, fn, nil, arg) } func Acos(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error) { fn := "acos" if opts.NoCheckOverflow { fn += "_unchecked" } return CallFunction(ctx, fn, nil, arg) } func Atan(ctx context.Context, arg Datum) (Datum, error) { return CallFunction(ctx, "atan", nil, arg) } func Atan2(ctx context.Context, x, y Datum) (Datum, error) { return CallFunction(ctx, "atan2", nil, x, y) } func Ln(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error) { fn := "ln" if opts.NoCheckOverflow { fn += "_unchecked" } return CallFunction(ctx, fn, nil, arg) } func Log10(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error) { fn := "log10" if opts.NoCheckOverflow { fn += "_unchecked" } return CallFunction(ctx, fn, nil, arg) } func Log2(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error) { fn := "log2" if opts.NoCheckOverflow { fn += "_unchecked" } return CallFunction(ctx, fn, nil, arg) } func Log1p(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error) { fn := "log1p" if opts.NoCheckOverflow { fn += "_unchecked" } return CallFunction(ctx, fn, nil, arg) } func Logb(ctx context.Context, opts ArithmeticOptions, x, base Datum) (Datum, error) { fn := "logb" if opts.NoCheckOverflow { fn += "_unchecked" } return CallFunction(ctx, fn, nil, x, base) } func Round(ctx context.Context, opts RoundOptions, arg Datum) (Datum, error) { return CallFunction(ctx, "round", &opts, arg) } func RoundToMultiple(ctx context.Context, opts RoundToMultipleOptions, arg Datum) (Datum, error) { return CallFunction(ctx, "round_to_multiple", &opts, arg) } arrow-go-18.2.0/arrow/compute/arithmetic_test.go000066400000000000000000004336751476434502500217200ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.18 package compute_test import ( "context" "fmt" "math" "strings" "testing" "unsafe" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/compute" "github.com/apache/arrow-go/v18/arrow/compute/exec" "github.com/apache/arrow-go/v18/arrow/compute/internal/kernels" "github.com/apache/arrow-go/v18/arrow/decimal128" "github.com/apache/arrow-go/v18/arrow/decimal256" "github.com/apache/arrow-go/v18/arrow/internal/testing/gen" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/arrow/scalar" "github.com/klauspost/cpuid/v2" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" "golang.org/x/exp/constraints" ) var ( CpuCacheSizes = [...]int{ // defaults 32 * 1024, // level 1: 32K 256 * 1024, // level 2: 256K 3072 * 1024, // level 3: 3M } ) func init() { if cpuid.CPU.Cache.L1D != -1 { CpuCacheSizes[0] = cpuid.CPU.Cache.L1D } if cpuid.CPU.Cache.L2 != -1 { CpuCacheSizes[1] = cpuid.CPU.Cache.L2 } if cpuid.CPU.Cache.L3 != -1 { CpuCacheSizes[2] = cpuid.CPU.Cache.L3 } } func assertNullToNull(t *testing.T, ctx context.Context, fn string, mem memory.Allocator) { f, ok := compute.GetFunctionRegistry().GetFunction(fn) require.True(t, ok) nulls := array.MakeArrayOfNull(mem, arrow.Null, 7) defer nulls.Release() n := f.Arity().NArgs t.Run("null to null array", func(t *testing.T) { args := make([]compute.Datum, n) for i := 0; i < n; i++ { args[i] = &compute.ArrayDatum{nulls.Data()} } result, err := compute.CallFunction(ctx, fn, nil, args...) assert.NoError(t, err) defer result.Release() out := result.(*compute.ArrayDatum).MakeArray() defer out.Release() assertArraysEqual(t, nulls, out) }) t.Run("null to null scalar", func(t *testing.T) { args := make([]compute.Datum, n) for i := 0; i < n; i++ { args[i] = compute.NewDatum(scalar.ScalarNull) } result, err := compute.CallFunction(ctx, fn, nil, args...) assert.NoError(t, err) assertScalarEquals(t, scalar.ScalarNull, result.(*compute.ScalarDatum).Value) }) } type fnOpts interface { compute.ArithmeticOptions | compute.RoundOptions | compute.RoundToMultipleOptions } type unaryArithmeticFunc[O fnOpts] func(context.Context, O, compute.Datum) (compute.Datum, error) // type unaryFunc = func(compute.Datum) (compute.Datum, error) type binaryArithmeticFunc = func(context.Context, compute.ArithmeticOptions, compute.Datum, compute.Datum) (compute.Datum, error) type binaryFunc = func(left, right compute.Datum) (compute.Datum, error) func assertScalarEquals(t *testing.T, expected, actual scalar.Scalar, opt ...scalar.EqualOption) { assert.Truef(t, scalar.ApproxEquals(expected, actual, opt...), "expected: %s\ngot: %s", expected, actual) } func assertBinop(t *testing.T, fn binaryFunc, left, right, expected arrow.Array, opt []array.EqualOption, scalarOpt []scalar.EqualOption) { actual, err := fn(&compute.ArrayDatum{Value: left.Data()}, &compute.ArrayDatum{Value: right.Data()}) require.NoError(t, err) defer actual.Release() assertDatumsEqual(t, &compute.ArrayDatum{Value: expected.Data()}, actual, opt, scalarOpt) // also check (Scalar, Scalar) operations for i := 0; i < expected.Len(); i++ { s, err := scalar.GetScalar(expected, i) require.NoError(t, err) lhs, _ := scalar.GetScalar(left, i) rhs, _ := scalar.GetScalar(right, i) actual, err := fn(&compute.ScalarDatum{Value: lhs}, &compute.ScalarDatum{Value: rhs}) assert.NoError(t, err) assertScalarEquals(t, s, actual.(*compute.ScalarDatum).Value, scalarOpt...) } } func assertBinopErr(t *testing.T, fn binaryFunc, left, right arrow.Array, expectedMsg string) { _, err := fn(&compute.ArrayDatum{left.Data()}, &compute.ArrayDatum{Value: right.Data()}) assert.ErrorIs(t, err, arrow.ErrInvalid) assert.ErrorContains(t, err, expectedMsg) } type BinaryFuncTestSuite struct { suite.Suite mem *memory.CheckedAllocator ctx context.Context } func (b *BinaryFuncTestSuite) SetupTest() { b.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) b.ctx = compute.WithAllocator(context.TODO(), b.mem) } func (b *BinaryFuncTestSuite) TearDownTest() { b.mem.AssertSize(b.T(), 0) } func (b *BinaryFuncTestSuite) getArr(dt arrow.DataType, str string) arrow.Array { arr, _, err := array.FromJSON(b.mem, dt, strings.NewReader(str), array.WithUseNumber()) b.Require().NoError(err) return arr } type Float16BinaryFuncTestSuite struct { BinaryFuncTestSuite } func (b *Float16BinaryFuncTestSuite) assertBinopErr(fn binaryFunc, lhs, rhs string) { left, _, _ := array.FromJSON(b.mem, arrow.FixedWidthTypes.Float16, strings.NewReader(lhs), array.WithUseNumber()) defer left.Release() right, _, _ := array.FromJSON(b.mem, arrow.FixedWidthTypes.Float16, strings.NewReader(rhs), array.WithUseNumber()) defer right.Release() _, err := fn(&compute.ArrayDatum{left.Data()}, &compute.ArrayDatum{right.Data()}) b.ErrorIs(err, arrow.ErrNotImplemented) } func (b *Float16BinaryFuncTestSuite) TestAdd() { for _, overflow := range []bool{false, true} { b.Run(fmt.Sprintf("no_overflow_check=%t", overflow), func() { opts := compute.ArithmeticOptions{NoCheckOverflow: overflow} b.assertBinopErr(func(left, right compute.Datum) (compute.Datum, error) { return compute.Add(b.ctx, opts, left, right) }, `[1.5]`, `[1.5]`) }) } } func (b *Float16BinaryFuncTestSuite) TestSub() { for _, overflow := range []bool{false, true} { b.Run(fmt.Sprintf("no_overflow_check=%t", overflow), func() { opts := compute.ArithmeticOptions{NoCheckOverflow: overflow} b.assertBinopErr(func(left, right compute.Datum) (compute.Datum, error) { return compute.Subtract(b.ctx, opts, left, right) }, `[1.5]`, `[1.5]`) }) } } type BinaryArithmeticSuite[T arrow.NumericType] struct { BinaryFuncTestSuite opts compute.ArithmeticOptions min, max T equalOpts []array.EqualOption scalarEqualOpts []scalar.EqualOption } func (BinaryArithmeticSuite[T]) DataType() arrow.DataType { return arrow.GetDataType[T]() } func (b *BinaryArithmeticSuite[T]) setNansEqual(val bool) { b.equalOpts = []array.EqualOption{array.WithNaNsEqual(val)} b.scalarEqualOpts = []scalar.EqualOption{scalar.WithNaNsEqual(val)} } func (b *BinaryArithmeticSuite[T]) SetupTest() { b.BinaryFuncTestSuite.SetupTest() b.opts.NoCheckOverflow = false } func (b *BinaryArithmeticSuite[T]) makeNullScalar() scalar.Scalar { return scalar.MakeNullScalar(b.DataType()) } func (b *BinaryArithmeticSuite[T]) makeScalar(val T) scalar.Scalar { return scalar.MakeScalar(val) } func (b *BinaryArithmeticSuite[T]) assertBinopScalars(fn binaryArithmeticFunc, lhs, rhs T, expected T) { left, right := b.makeScalar(lhs), b.makeScalar(rhs) exp := b.makeScalar(expected) actual, err := fn(b.ctx, b.opts, &compute.ScalarDatum{Value: left}, &compute.ScalarDatum{Value: right}) b.NoError(err) sc := actual.(*compute.ScalarDatum).Value assertScalarEquals(b.T(), exp, sc) } func (b *BinaryArithmeticSuite[T]) assertBinopScalarValArr(fn binaryArithmeticFunc, lhs T, rhs, expected string) { left := b.makeScalar(lhs) b.assertBinopScalarArr(fn, left, rhs, expected) } func (b *BinaryArithmeticSuite[T]) assertBinopScalarArr(fn binaryArithmeticFunc, lhs scalar.Scalar, rhs, expected string) { right, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(rhs)) defer right.Release() exp, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(expected)) defer exp.Release() actual, err := fn(b.ctx, b.opts, &compute.ScalarDatum{Value: lhs}, &compute.ArrayDatum{Value: right.Data()}) b.NoError(err) defer actual.Release() assertDatumsEqual(b.T(), &compute.ArrayDatum{Value: exp.Data()}, actual, b.equalOpts, b.scalarEqualOpts) } func (b *BinaryArithmeticSuite[T]) assertBinopArrScalarExpArr(fn binaryArithmeticFunc, lhs string, rhs scalar.Scalar, exp arrow.Array) { left, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(lhs)) defer left.Release() actual, err := fn(b.ctx, b.opts, &compute.ArrayDatum{left.Data()}, compute.NewDatum(rhs)) b.Require().NoError(err) defer actual.Release() assertDatumsEqual(b.T(), &compute.ArrayDatum{exp.Data()}, actual, b.equalOpts, b.scalarEqualOpts) } func (b *BinaryArithmeticSuite[T]) assertBinopArrScalarVal(fn binaryArithmeticFunc, lhs string, rhs T, expected string) { right := b.makeScalar(rhs) b.assertBinopArrScalar(fn, lhs, right, expected) } func (b *BinaryArithmeticSuite[T]) assertBinopArrScalar(fn binaryArithmeticFunc, lhs string, rhs scalar.Scalar, expected string) { left, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(lhs)) defer left.Release() exp, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(expected)) defer exp.Release() actual, err := fn(b.ctx, b.opts, &compute.ArrayDatum{Value: left.Data()}, &compute.ScalarDatum{Value: rhs}) b.NoError(err) defer actual.Release() assertDatumsEqual(b.T(), &compute.ArrayDatum{Value: exp.Data()}, actual, b.equalOpts, b.scalarEqualOpts) } func (b *BinaryArithmeticSuite[T]) assertBinopArrs(fn binaryArithmeticFunc, lhs, rhs, exp arrow.Array) { assertBinop(b.T(), func(left, right compute.Datum) (compute.Datum, error) { return fn(b.ctx, b.opts, left, right) }, lhs, rhs, exp, b.equalOpts, b.scalarEqualOpts) } func (b *BinaryArithmeticSuite[T]) assertBinopExpArr(fn binaryArithmeticFunc, lhs, rhs string, exp arrow.Array) { left, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(lhs), array.WithUseNumber()) defer left.Release() right, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(rhs), array.WithUseNumber()) defer right.Release() b.assertBinopArrs(fn, left, right, exp) } func (b *BinaryArithmeticSuite[T]) assertBinop(fn binaryArithmeticFunc, lhs, rhs, expected string) { left, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(lhs), array.WithUseNumber()) defer left.Release() right, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(rhs), array.WithUseNumber()) defer right.Release() exp, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(expected), array.WithUseNumber()) defer exp.Release() b.assertBinopArrs(fn, left, right, exp) } func (b *BinaryArithmeticSuite[T]) setOverflowCheck(value bool) { b.opts.NoCheckOverflow = !value } func (b *BinaryArithmeticSuite[T]) assertBinopErr(fn binaryArithmeticFunc, lhs, rhs, expectedMsg string) { left, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(lhs), array.WithUseNumber()) defer left.Release() right, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(rhs), array.WithUseNumber()) defer right.Release() assertBinopErr(b.T(), func(left, right compute.Datum) (compute.Datum, error) { return fn(b.ctx, b.opts, left, right) }, left, right, expectedMsg) } func (b *BinaryArithmeticSuite[T]) TestAdd() { b.Run(b.DataType().String(), func() { for _, overflow := range []bool{false, true} { b.Run(fmt.Sprintf("no_overflow_check=%t", overflow), func() { b.setOverflowCheck(overflow) b.assertBinop(compute.Add, `[]`, `[]`, `[]`) b.assertBinop(compute.Add, `[3, 2, 6]`, `[1, 0, 2]`, `[4, 2, 8]`) // nulls on one side b.assertBinop(compute.Add, `[null, 1, null]`, `[3, 4, 5]`, `[null, 5, null]`) b.assertBinop(compute.Add, `[3, 4, 5]`, `[null, 1, null]`, `[null, 5, null]`) // nulls on both sides b.assertBinop(compute.Add, `[null, 1, 2]`, `[3, 4, null]`, `[null, 5, null]`) // all nulls b.assertBinop(compute.Add, `[null]`, `[null]`, `[null]`) // scalar on the left b.assertBinopScalarValArr(compute.Add, 3, `[1, 2]`, `[4, 5]`) b.assertBinopScalarValArr(compute.Add, 3, `[null, 2]`, `[null, 5]`) b.assertBinopScalarArr(compute.Add, b.makeNullScalar(), `[1, 2]`, `[null, null]`) b.assertBinopScalarArr(compute.Add, b.makeNullScalar(), `[null, 2]`, `[null, null]`) // scalar on the right b.assertBinopArrScalarVal(compute.Add, `[1, 2]`, 3, `[4, 5]`) b.assertBinopArrScalarVal(compute.Add, `[null, 2]`, 3, `[null, 5]`) b.assertBinopArrScalar(compute.Add, `[1, 2]`, b.makeNullScalar(), `[null, null]`) b.assertBinopArrScalar(compute.Add, `[null, 2]`, b.makeNullScalar(), `[null, null]`) if !arrow.IsFloating(b.DataType().ID()) && overflow { val := fmt.Sprintf("[%v]", b.max) b.assertBinopErr(compute.Add, val, val, "overflow") } }) } }) } func (b *BinaryArithmeticSuite[T]) TestSub() { b.Run(b.DataType().String(), func() { for _, overflow := range []bool{false, true} { b.Run(fmt.Sprintf("no_overflow_check=%t", overflow), func() { b.setOverflowCheck(overflow) b.assertBinop(compute.Subtract, `[]`, `[]`, `[]`) b.assertBinop(compute.Subtract, `[3, 2, 6]`, `[1, 0, 2]`, `[2, 2, 4]`) // nulls on one side b.assertBinop(compute.Subtract, `[null, 4, null]`, `[2, 1, 0]`, `[null, 3, null]`) b.assertBinop(compute.Subtract, `[3, 4, 5]`, `[null, 1, null]`, `[null, 3, null]`) // nulls on both sides b.assertBinop(compute.Subtract, `[null, 4, 3]`, `[2, 1, null]`, `[null, 3, null]`) // all nulls b.assertBinop(compute.Subtract, `[null]`, `[null]`, `[null]`) // scalar on the left b.assertBinopScalarValArr(compute.Subtract, 3, `[1, 2]`, `[2, 1]`) b.assertBinopScalarValArr(compute.Subtract, 3, `[null, 2]`, `[null, 1]`) b.assertBinopScalarArr(compute.Subtract, b.makeNullScalar(), `[1, 2]`, `[null, null]`) b.assertBinopScalarArr(compute.Subtract, b.makeNullScalar(), `[null, 2]`, `[null, null]`) // scalar on the right b.assertBinopArrScalarVal(compute.Subtract, `[4, 5]`, 3, `[1, 2]`) b.assertBinopArrScalarVal(compute.Subtract, `[null, 5]`, 3, `[null, 2]`) b.assertBinopArrScalar(compute.Subtract, `[1, 2]`, b.makeNullScalar(), `[null, null]`) b.assertBinopArrScalar(compute.Subtract, `[null, 2]`, b.makeNullScalar(), `[null, null]`) if !arrow.IsFloating(b.DataType().ID()) && overflow { b.assertBinopErr(compute.Subtract, fmt.Sprintf("[%v]", b.min), fmt.Sprintf("[%v]", b.max), "overflow") } }) } }) } func (b *BinaryArithmeticSuite[T]) TestMultiply() { b.Run(b.DataType().String(), func() { for _, overflow := range []bool{false, true} { b.Run(fmt.Sprintf("no_overflow_check=%t", overflow), func() { b.setOverflowCheck(overflow) b.assertBinop(compute.Multiply, `[]`, `[]`, `[]`) b.assertBinop(compute.Multiply, `[3, 2, 6]`, `[1, 0, 2]`, `[3, 0, 12]`) // nulls on one side b.assertBinop(compute.Multiply, `[null, 2, null]`, `[4, 5, 6]`, `[null, 10, null]`) b.assertBinop(compute.Multiply, `[4, 5, 6]`, `[null, 2, null]`, `[null, 10, null]`) // nulls on both sides b.assertBinop(compute.Multiply, `[null, 2, 3]`, `[4, 5, null]`, `[null, 10, null]`) // all nulls b.assertBinop(compute.Multiply, `[null]`, `[null]`, `[null]`) // scalar on left b.assertBinopScalarValArr(compute.Multiply, 3, `[4, 5]`, `[12, 15]`) b.assertBinopScalarValArr(compute.Multiply, 3, `[null, 5]`, `[null, 15]`) b.assertBinopScalarArr(compute.Multiply, b.makeNullScalar(), `[1, 2]`, `[null, null]`) b.assertBinopScalarArr(compute.Multiply, b.makeNullScalar(), `[null, 2]`, `[null, null]`) // scalar on right b.assertBinopArrScalarVal(compute.Multiply, `[4, 5]`, 3, `[12, 15]`) b.assertBinopArrScalarVal(compute.Multiply, `[null, 5]`, 3, `[null, 15]`) b.assertBinopArrScalar(compute.Multiply, `[1, 2]`, b.makeNullScalar(), `[null, null]`) b.assertBinopArrScalar(compute.Multiply, `[null, 2]`, b.makeNullScalar(), `[null, null]`) }) } }) } func (b *BinaryArithmeticSuite[T]) TestDiv() { b.Run(b.DataType().String(), func() { for _, overflow := range []bool{false, true} { b.Run(fmt.Sprintf("no_overflow_check=%t", overflow), func() { b.setOverflowCheck(overflow) // empty arrays b.assertBinop(compute.Divide, `[]`, `[]`, `[]`) // ordinary arrays b.assertBinop(compute.Divide, `[3, 2, 6]`, `[1, 1, 2]`, `[3, 2, 3]`) // with nulls b.assertBinop(compute.Divide, `[null, 10, 30, null, 20]`, `[1, 5, 2, 5, 10]`, `[null, 2, 15, null, 2]`) if !arrow.IsFloating(b.DataType().ID()) { // scalar divided by array b.assertBinopScalarValArr(compute.Divide, 33, `[null, 1, 3, null, 2]`, `[null, 33, 11, null, 16]`) // array divided by scalar b.assertBinopArrScalarVal(compute.Divide, `[null, 10, 30, null, 2]`, 3, `[null, 3, 10, null, 0]`) // scalar divided by scalar b.assertBinopScalars(compute.Divide, 16, 7, 2) } else { b.assertBinop(compute.Divide, `[3.4, 0.64, 1.28]`, `[1, 2, 4]`, `[3.4, 0.32, 0.32]`) b.assertBinop(compute.Divide, `[null, 1, 3.3, null, 2]`, `[1, 4, 2, 5, 0.1]`, `[null, 0.25, 1.65, null, 20]`) b.assertBinopScalarValArr(compute.Divide, 10, `[null, 1, 2.5, null, 2, 5]`, `[null, 10, 4, null, 5, 2]`) b.assertBinopArrScalarVal(compute.Divide, `[null, 1, 2.5, null, 2, 5]`, 10, `[null, 0.1, 0.25, null, 0.2, 0.5]`) b.assertBinop(compute.Divide, `[3.4, "Inf", "-Inf"]`, `[1, 2, 3]`, `[3.4, "Inf", "-Inf"]`) b.setNansEqual(true) b.assertBinop(compute.Divide, `[3.4, "NaN", 2.0]`, `[1, 2, 2.0]`, `[3.4, "NaN", 1.0]`) b.assertBinopScalars(compute.Divide, 21, 3, 7) } }) } }) } func (b *BinaryArithmeticSuite[T]) TestDivideByZero() { if !arrow.IsFloating(b.DataType().ID()) { for _, checkOverflow := range []bool{false, true} { b.setOverflowCheck(checkOverflow) b.assertBinopErr(compute.Divide, `[3, 2, 6]`, `[1, 1, 0]`, "divide by zero") } } else { b.setOverflowCheck(true) b.assertBinopErr(compute.Divide, `[3, 2, 6]`, `[1, 1, 0]`, "divide by zero") b.assertBinopErr(compute.Divide, `[3, 2, 0]`, `[1, 1, 0]`, "divide by zero") b.assertBinopErr(compute.Divide, `[3, 2, -6]`, `[1, 1, 0]`, "divide by zero") b.setOverflowCheck(false) b.setNansEqual(true) b.assertBinop(compute.Divide, `[3, 2, 6]`, `[1, 1, 0]`, `[3, 2, "Inf"]`) b.assertBinop(compute.Divide, `[3, 2, 0]`, `[1, 1, 0]`, `[3, 2, "NaN"]`) b.assertBinop(compute.Divide, `[3, 2, -6]`, `[1, 1, 0]`, `[3, 2, "-Inf"]`) } } func (b *BinaryArithmeticSuite[T]) TestPower() { b.setNansEqual(true) b.Run(b.DataType().String(), func() { for _, checkOverflow := range []bool{false, true} { b.Run(fmt.Sprintf("checkOverflow=%t", checkOverflow), func() { b.setOverflowCheck(checkOverflow) b.assertBinop(compute.Power, `[]`, `[]`, `[]`) if !arrow.IsFloating(b.DataType().ID()) { b.assertBinop(compute.Power, `[3, 2, 6, 2]`, `[1, 1, 2, 0]`, `[3, 2, 36, 1]`) b.assertBinop(compute.Power, `[null, 2, 3, null, 20]`, `[1, 6, 2, 5, 1]`, `[null, 64, 9, null, 20]`) b.assertBinopScalarValArr(compute.Power, 3, `[null, 3, 4, null, 2]`, `[null, 27, 81, null, 9]`) b.assertBinopArrScalarVal(compute.Power, `[null, 10, 3, null, 2]`, 2, `[null, 100, 9, null, 4]`) b.assertBinopScalars(compute.Power, 4, 3, 64) b.assertBinop(compute.Power, `[0, 1, 0]`, `[0, 0, 42]`, `[1, 1, 0]`) if checkOverflow { b.assertBinopErr(compute.Power, fmt.Sprintf("[%v]", b.max), `[10]`, "overflow") } else { b.assertBinopScalars(compute.Power, b.max, 10, 1) } } else { b.assertBinop(compute.Power, `[3.4, 16, 0.64, 1.2, 0]`, `[1, 0.5, 2, 4, 0]`, `[3.4, 4, 0.4096, 2.0736, 1]`) b.assertBinop(compute.Power, `[null, 1, 3.3, null, 2]`, `[1, 4, 2, 5, 0.1]`, `[null, 1, 10.89, null, 1.07177346]`) b.assertBinopScalarValArr(compute.Power, 10, `[null, 1, 2.5, null, 2, 5]`, `[null, 10, 316.227766017, null, 100, 100000]`) b.assertBinopArrScalarVal(compute.Power, `[null, 1, 2.5, null, 2, 5]`, 10, `[null, 1, 9536.74316406, null, 1024, 9765625]`) b.assertBinop(compute.Power, `[3.4, "Inf", "-Inf", 1.1, 10000]`, `[1, 2, 3, "Inf", 100000]`, `[3.4, "Inf", "-Inf", "Inf", "Inf"]`) b.assertBinop(compute.Power, `[3.4, "NaN", 2.0]`, `[1, 2, 2.0]`, `[3.4, "NaN", 4.0]`) b.assertBinop(compute.Power, `[0.0, 0.0]`, `[-1.0, -3.0]`, `["Inf", "Inf"]`) } }) } }) } type BinaryFloatingArithmeticSuite[T constraints.Float] struct { BinaryArithmeticSuite[T] smallest T } func (bs *BinaryFloatingArithmeticSuite[T]) TestTrigAtan2() { bs.setNansEqual(true) atan2 := func(ctx context.Context, _ compute.ArithmeticOptions, x, y compute.Datum) (compute.Datum, error) { return compute.Atan2(ctx, x, y) } bs.assertBinop(atan2, `[]`, `[]`, `[]`) bs.assertBinop(atan2, `[0, 0, null, "NaN"]`, `[null, "NaN", 0, 0]`, `[null, "NaN", null, "NaN"]`) bs.assertBinop(atan2, `[0, 0, -0.0, 0, -0.0, 0, 1, 0, -1, "Inf", "-Inf", 0, 0]`, `[0, 0, 0, -0.0, -0.0, 1, 0, -1, 0, 0, 0, "Inf", "-Inf"]`, fmt.Sprintf("[0, 0, -0.0, %f, %f, 0, %f, %f, %f, %f, %f, 0, %f]", math.Pi, -math.Pi, math.Pi/2, math.Pi, -math.Pi/2, math.Pi/2, -math.Pi/2, math.Pi)) } func (bs *BinaryFloatingArithmeticSuite[T]) TestLog() { bs.setNansEqual(true) for _, overflow := range []bool{false, true} { bs.setOverflowCheck(overflow) bs.assertBinop(compute.Logb, `[1, 10, null, "NaN", "Inf"]`, `[100, 10, null, 2, 10]`, `[0, 1, null, "NaN", "Inf"]`) bs.assertBinopScalars(compute.Logb, bs.smallest, 10, T(math.Log(float64(bs.smallest))/math.Log(10))) bs.assertBinopScalars(compute.Logb, bs.max, 10, T(math.Log(float64(bs.max))/math.Log(10))) } bs.setOverflowCheck(true) bs.assertBinop(compute.Logb, `[1, 10, null]`, `[10, 10, null]`, `[0, 1, null]`) bs.assertBinop(compute.Logb, `[1, 2, null]`, `[2, 2, null]`, `[0, 1, null]`) bs.assertBinopArrScalarVal(compute.Logb, `[10, 100, 1000, null]`, 10, `[1, 2, 3, null]`) bs.assertBinopArrScalarVal(compute.Logb, `[1, 2, 4, 8]`, 0.25, `[-0.0, -0.5, -1.0, -1.5]`) bs.setOverflowCheck(false) bs.assertBinopArrScalarVal(compute.Logb, `["-Inf", -1, 0, "Inf"]`, 10, `["NaN", "NaN", "-Inf", "Inf"]`) bs.assertBinopArrScalarVal(compute.Logb, `["-Inf", -1, 0, "Inf"]`, 2, `["NaN", "NaN", "-Inf", "Inf"]`) bs.assertBinop(compute.Logb, `["-Inf", -1, 0, "Inf"]`, `[2, 10, 0, 0]`, `["NaN", "NaN", "NaN", "NaN"]`) bs.assertBinopArrScalarVal(compute.Logb, `["-Inf", -1, 0, "Inf"]`, 0, `["NaN", "NaN", "NaN", "NaN"]`) bs.assertBinopArrScalarVal(compute.Logb, `["-Inf", -2, -1, "Inf"]`, 2, `["NaN", "NaN", "NaN", "Inf"]`) bs.setOverflowCheck(true) bs.assertBinopErr(compute.Logb, `[0]`, `[2]`, "logarithm of zero") bs.assertBinopErr(compute.Logb, `[2]`, `[0]`, "logarithm of zero") bs.assertBinopErr(compute.Logb, `[-1]`, `[2]`, "logarithm of negative number") bs.assertBinopErr(compute.Logb, `["-Inf"]`, `[2]`, "logarithm of negative number") } type BinaryIntegralArithmeticSuite[T arrow.IntType | arrow.UintType] struct { BinaryArithmeticSuite[T] } func (b *BinaryIntegralArithmeticSuite[T]) TestShiftLeft() { b.Run(b.DataType().String(), func() { for _, overflow := range []bool{false, true} { b.Run(fmt.Sprintf("check_overflow=%t", overflow), func() { b.setOverflowCheck(overflow) b.assertBinop(compute.ShiftLeft, `[]`, `[]`, `[]`) b.assertBinop(compute.ShiftLeft, `[0, 1, 2, 3]`, `[2, 3, 4, 5]`, `[0, 8, 32, 96]`) b.assertBinop(compute.ShiftLeft, `[0, null, 2, 3]`, `[2, 3, 4, 5]`, `[0, null, 32, 96]`) b.assertBinop(compute.ShiftLeft, `[0, 1, 2, 3]`, `[2, 3, null, 5]`, `[0, 8, null, 96]`) b.assertBinop(compute.ShiftLeft, `[0, null, 2, 3]`, `[2, 3, null, 5]`, `[0, null, null, 96]`) b.assertBinop(compute.ShiftLeft, `[null]`, `[null]`, `[null]`) b.assertBinopScalarValArr(compute.ShiftLeft, 2, `[null, 5]`, `[null, 64]`) b.assertBinopScalarArr(compute.ShiftLeft, b.makeNullScalar(), `[null, 5]`, `[null, null]`) b.assertBinopArrScalarVal(compute.ShiftLeft, `[null, 5]`, 3, `[null, 40]`) b.assertBinopArrScalar(compute.ShiftLeft, `[null, 5]`, b.makeNullScalar(), `[null, null]`) }) } }) } func (b *BinaryIntegralArithmeticSuite[T]) TestShiftRight() { b.Run(b.DataType().String(), func() { for _, overflow := range []bool{false, true} { b.Run(fmt.Sprintf("check_overflow=%t", overflow), func() { b.setOverflowCheck(overflow) b.assertBinop(compute.ShiftRight, `[]`, `[]`, `[]`) b.assertBinop(compute.ShiftRight, `[0, 1, 4, 8]`, `[1, 1, 1, 4]`, `[0, 0, 2, 0]`) b.assertBinop(compute.ShiftRight, `[0, null, 4, 8]`, `[1, 1, 1, 4]`, `[0, null, 2, 0]`) b.assertBinop(compute.ShiftRight, `[0, 1, 4, 8]`, `[1, 1, null, 4]`, `[0, 0, null, 0]`) b.assertBinop(compute.ShiftRight, `[0, null, 4, 8]`, `[1, 1, null, 4]`, `[0, null, null, 0]`) b.assertBinop(compute.ShiftRight, `[null]`, `[null]`, `[null]`) b.assertBinopScalarValArr(compute.ShiftRight, 64, `[null, 2, 6]`, `[null, 16, 1]`) b.assertBinopScalarArr(compute.ShiftRight, b.makeNullScalar(), `[null, 2, 6]`, `[null, null, null]`) b.assertBinopArrScalarVal(compute.ShiftRight, `[null, 3, 96]`, 3, `[null, 0, 12]`) b.assertBinopArrScalar(compute.ShiftRight, `[null, 3, 96]`, b.makeNullScalar(), `[null, null, null]`) }) } }) } func (b *BinaryIntegralArithmeticSuite[T]) TestShiftLeftOverflowError() { b.Run(b.DataType().String(), func() { bitWidth := b.DataType().(arrow.FixedWidthDataType).BitWidth() if !arrow.IsUnsignedInteger(b.DataType().ID()) { bitWidth-- } b.setOverflowCheck(true) b.assertBinop(compute.ShiftLeft, `[1]`, fmt.Sprintf("[%d]", bitWidth-1), fmt.Sprintf("[%d]", T(1)<<(bitWidth-1))) b.assertBinop(compute.ShiftLeft, `[2]`, fmt.Sprintf("[%d]", bitWidth-2), fmt.Sprintf("[%d]", T(1)<<(bitWidth-1))) if arrow.IsUnsignedInteger(b.DataType().ID()) { b.assertBinop(compute.ShiftLeft, `[2]`, fmt.Sprintf("[%d]", bitWidth-1), `[0]`) b.assertBinop(compute.ShiftLeft, `[4]`, fmt.Sprintf("[%d]", bitWidth-1), `[0]`) b.assertBinopErr(compute.ShiftLeft, `[1]`, fmt.Sprintf("[%d]", bitWidth), "shift amount must be >= 0 and less than precision of type") } else { // shift a bit into the sign bit b.assertBinop(compute.ShiftLeft, `[2]`, fmt.Sprintf("[%d]", bitWidth-1), fmt.Sprintf("[%d]", b.min)) // shift a bit past the sign bit b.assertBinop(compute.ShiftLeft, `[4]`, fmt.Sprintf("[%d]", bitWidth-1), `[0]`) b.assertBinop(compute.ShiftLeft, fmt.Sprintf("[%d]", b.min), `[1]`, `[0]`) b.assertBinopErr(compute.ShiftLeft, `[1, 2]`, `[1, -1]`, "shift amount must be >= 0 and less than precision of type") b.assertBinopErr(compute.ShiftLeft, `[1]`, fmt.Sprintf("[%d]", bitWidth), "shift amount must be >= 0 and less than precision of type") b.setOverflowCheck(false) b.assertBinop(compute.ShiftLeft, `[1, 1]`, fmt.Sprintf("[-1, %d]", bitWidth), `[1, 1]`) } }) } func (b *BinaryIntegralArithmeticSuite[T]) TestShiftRightOverflowError() { b.Run(b.DataType().String(), func() { bitWidth := b.DataType().(arrow.FixedWidthDataType).BitWidth() if !arrow.IsUnsignedInteger(b.DataType().ID()) { bitWidth-- } b.setOverflowCheck(true) b.assertBinop(compute.ShiftRight, fmt.Sprintf("[%d]", b.max), fmt.Sprintf("[%d]", bitWidth-1), `[1]`) if arrow.IsUnsignedInteger(b.DataType().ID()) { b.assertBinopErr(compute.ShiftRight, `[1]`, fmt.Sprintf("[%d]", bitWidth), "shift amount must be >= 0 and less than precision of type") } else { b.assertBinop(compute.ShiftRight, `[-1, -1]`, `[1, 5]`, `[-1, -1]`) b.assertBinop(compute.ShiftRight, fmt.Sprintf("[%d]", b.min), `[1]`, fmt.Sprintf("[%d]", b.min/2)) b.assertBinopErr(compute.ShiftRight, `[1, 2]`, `[1, -1]`, "shift amount must be >= 0 and less than precision of type") b.assertBinopErr(compute.ShiftRight, `[1]`, fmt.Sprintf("[%d]", bitWidth), "shift amount must be >= 0 and less than precision of type") b.setOverflowCheck(false) b.assertBinop(compute.ShiftRight, `[1, 1]`, fmt.Sprintf("[-1, %d]", bitWidth), `[1, 1]`) } }) } func (b *BinaryIntegralArithmeticSuite[T]) TestTrig() { // integer arguments promoted to float64, sanity check here ty := b.DataType() b.setNansEqual(true) atan2 := func(ctx context.Context, _ compute.ArithmeticOptions, x, y compute.Datum) (compute.Datum, error) { return compute.Atan2(ctx, x, y) } lhs, rhs := b.getArr(ty, `[0, 1]`), b.getArr(ty, `[1, 0]`) defer lhs.Release() defer rhs.Release() exp := b.getArr(arrow.PrimitiveTypes.Float64, fmt.Sprintf(`[0, %f]`, math.Pi/2)) defer exp.Release() b.assertBinopArrs(atan2, lhs, rhs, exp) } func (b *BinaryIntegralArithmeticSuite[T]) TestLog() { // integer arguments promoted to double, sanity check here exp1 := b.getArr(arrow.PrimitiveTypes.Float64, `[0, 1, null]`) exp2 := b.getArr(arrow.PrimitiveTypes.Float64, `[1, 2, null]`) defer exp1.Release() defer exp2.Release() b.assertBinopExpArr(compute.Logb, `[1, 10, null]`, `[10, 10, null]`, exp1) b.assertBinopExpArr(compute.Logb, `[1, 2, null]`, `[2, 2, null]`, exp1) b.assertBinopArrScalarExpArr(compute.Logb, `[10, 100, null]`, scalar.MakeScalar(T(10)), exp2) } func TestBinaryArithmetic(t *testing.T) { suite.Run(t, &BinaryIntegralArithmeticSuite[int8]{BinaryArithmeticSuite[int8]{min: math.MinInt8, max: math.MaxInt8}}) suite.Run(t, &BinaryIntegralArithmeticSuite[uint8]{BinaryArithmeticSuite[uint8]{min: 0, max: math.MaxUint8}}) suite.Run(t, &BinaryIntegralArithmeticSuite[int16]{BinaryArithmeticSuite[int16]{min: math.MinInt16, max: math.MaxInt16}}) suite.Run(t, &BinaryIntegralArithmeticSuite[uint16]{BinaryArithmeticSuite[uint16]{min: 0, max: math.MaxUint16}}) suite.Run(t, &BinaryIntegralArithmeticSuite[int32]{BinaryArithmeticSuite[int32]{min: math.MinInt32, max: math.MaxInt32}}) suite.Run(t, &BinaryIntegralArithmeticSuite[uint32]{BinaryArithmeticSuite[uint32]{min: 0, max: math.MaxUint32}}) suite.Run(t, &BinaryIntegralArithmeticSuite[int64]{BinaryArithmeticSuite[int64]{min: math.MinInt64, max: math.MaxInt64}}) suite.Run(t, &BinaryIntegralArithmeticSuite[uint64]{BinaryArithmeticSuite[uint64]{min: 0, max: math.MaxUint64}}) suite.Run(t, &BinaryFloatingArithmeticSuite[float32]{BinaryArithmeticSuite[float32]{min: -math.MaxFloat32, max: math.MaxFloat32}, math.SmallestNonzeroFloat32}) suite.Run(t, &BinaryFloatingArithmeticSuite[float64]{BinaryArithmeticSuite[float64]{min: -math.MaxFloat64, max: math.MaxFloat64}, math.SmallestNonzeroFloat64}) suite.Run(t, new(Float16BinaryFuncTestSuite)) suite.Run(t, new(DecimalBinaryArithmeticSuite)) suite.Run(t, new(ScalarBinaryTemporalArithmeticSuite)) } func TestBinaryArithmeticDispatchBest(t *testing.T) { for _, name := range []string{"add", "sub", "multiply", "divide", "power"} { for _, suffix := range []string{"", "_unchecked"} { name += suffix t.Run(name, func(t *testing.T) { tests := []struct { left, right arrow.DataType expected arrow.DataType }{ {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int32}, {arrow.PrimitiveTypes.Int32, arrow.Null, arrow.PrimitiveTypes.Int32}, {arrow.Null, arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int32}, {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Int32}, {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Int32}, {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int32}, {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Int64}, {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Uint8, arrow.PrimitiveTypes.Int32}, {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Int32}, {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Uint32, arrow.PrimitiveTypes.Int64}, {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Uint64, arrow.PrimitiveTypes.Int64}, {arrow.PrimitiveTypes.Uint8, arrow.PrimitiveTypes.Uint8, arrow.PrimitiveTypes.Uint8}, {arrow.PrimitiveTypes.Uint8, arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Uint16}, {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Float32}, {arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Float32}, {arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Float64}, {&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.PrimitiveTypes.Float64}, arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Float64}, {&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.PrimitiveTypes.Float64}, arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Float64}, } for _, tt := range tests { CheckDispatchBest(t, name, []arrow.DataType{tt.left, tt.right}, []arrow.DataType{tt.expected, tt.expected}) } }) } } } type DecimalArithmeticSuite struct { BinaryFuncTestSuite } func (*DecimalArithmeticSuite) positiveScales() []arrow.DataType { return []arrow.DataType{ &arrow.Decimal128Type{Precision: 4, Scale: 2}, &arrow.Decimal256Type{Precision: 4, Scale: 2}, &arrow.Decimal128Type{Precision: 38, Scale: 2}, &arrow.Decimal256Type{Precision: 76, Scale: 2}, } } func (*DecimalArithmeticSuite) negativeScales() []arrow.DataType { return []arrow.DataType{ &arrow.Decimal128Type{Precision: 2, Scale: -2}, &arrow.Decimal256Type{Precision: 2, Scale: -2}, } } func (ds *DecimalArithmeticSuite) checkDecimalToFloat(fn string, args []compute.Datum) { // validate that fn(*decimals) is the same as // fn([cast(x, float64) x for x in decimals]) newArgs := make([]compute.Datum, len(args)) for i, arg := range args { if arrow.IsDecimal(arg.(compute.ArrayLikeDatum).Type().ID()) { casted, err := compute.CastDatum(ds.ctx, arg, compute.NewCastOptions(arrow.PrimitiveTypes.Float64, true)) ds.Require().NoError(err) defer casted.Release() newArgs[i] = casted } else { newArgs[i] = arg } } expected, err := compute.CallFunction(ds.ctx, fn, nil, newArgs...) ds.Require().NoError(err) defer expected.Release() actual, err := compute.CallFunction(ds.ctx, fn, nil, args...) ds.Require().NoError(err) defer actual.Release() assertDatumsEqual(ds.T(), expected, actual, []array.EqualOption{array.WithNaNsEqual(true)}, []scalar.EqualOption{scalar.WithNaNsEqual(true)}) } func (ds *DecimalArithmeticSuite) checkFail(fn string, args []compute.Datum, substr string, opts compute.FunctionOptions) { _, err := compute.CallFunction(ds.ctx, fn, opts, args...) ds.ErrorIs(err, arrow.ErrInvalid) ds.ErrorContains(err, substr) } func (ds *DecimalArithmeticSuite) decimalArrayFromJSON(ty arrow.DataType, str string) arrow.Array { arr, _, err := array.FromJSON(ds.mem, ty, strings.NewReader(str)) ds.Require().NoError(err) return arr } type DecimalBinaryArithmeticSuite struct { DecimalArithmeticSuite } func (ds *DecimalBinaryArithmeticSuite) TestDispatchBest() { // decimal, floating point ds.Run("dec/floatingpoint", func() { for _, fn := range []string{"add", "sub", "multiply", "divide"} { for _, suffix := range []string{"", "_unchecked"} { fn += suffix ds.Run(fn, func() { CheckDispatchBest(ds.T(), fn, []arrow.DataType{ &arrow.Decimal128Type{Precision: 1, Scale: 0}, arrow.PrimitiveTypes.Float32}, []arrow.DataType{ arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Float32}) CheckDispatchBest(ds.T(), fn, []arrow.DataType{ &arrow.Decimal256Type{Precision: 1, Scale: 0}, arrow.PrimitiveTypes.Float64}, []arrow.DataType{arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Float64}) CheckDispatchBest(ds.T(), fn, []arrow.DataType{ arrow.PrimitiveTypes.Float32, &arrow.Decimal256Type{Precision: 1, Scale: 0}}, []arrow.DataType{arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Float32}) CheckDispatchBest(ds.T(), fn, []arrow.DataType{ arrow.PrimitiveTypes.Float64, &arrow.Decimal128Type{Precision: 1, Scale: 0}}, []arrow.DataType{arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Float64}) }) } } }) // decimal, decimal => decimal // decimal, integer => decimal ds.Run("dec/dec_int", func() { for _, fn := range []string{"add", "sub"} { for _, suffix := range []string{"", "_unchecked"} { fn += suffix ds.Run(fn, func() { CheckDispatchBest(ds.T(), fn, []arrow.DataType{ arrow.PrimitiveTypes.Int64, &arrow.Decimal128Type{Precision: 1, Scale: 0}}, []arrow.DataType{&arrow.Decimal128Type{Precision: 19, Scale: 0}, &arrow.Decimal128Type{Precision: 1, Scale: 0}}) CheckDispatchBest(ds.T(), fn, []arrow.DataType{ &arrow.Decimal128Type{Precision: 1, Scale: 0}, arrow.PrimitiveTypes.Int64}, []arrow.DataType{&arrow.Decimal128Type{Precision: 1, Scale: 0}, &arrow.Decimal128Type{Precision: 19, Scale: 0}}) CheckDispatchBest(ds.T(), fn, []arrow.DataType{ &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, []arrow.DataType{&arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}) CheckDispatchBest(ds.T(), fn, []arrow.DataType{ &arrow.Decimal256Type{Precision: 2, Scale: 1}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}, []arrow.DataType{&arrow.Decimal256Type{Precision: 2, Scale: 1}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}) CheckDispatchBest(ds.T(), fn, []arrow.DataType{ &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}, []arrow.DataType{&arrow.Decimal256Type{Precision: 2, Scale: 1}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}) CheckDispatchBest(ds.T(), fn, []arrow.DataType{ &arrow.Decimal256Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, []arrow.DataType{&arrow.Decimal256Type{Precision: 2, Scale: 1}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}) CheckDispatchBest(ds.T(), fn, []arrow.DataType{ &arrow.Decimal128Type{Precision: 2, Scale: 0}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, []arrow.DataType{&arrow.Decimal128Type{Precision: 3, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}) CheckDispatchBest(ds.T(), fn, []arrow.DataType{ &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 0}}, []arrow.DataType{&arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 3, Scale: 1}}) }) } } }) { fn := "multiply" for _, suffix := range []string{"", "_unchecked"} { fn += suffix ds.Run(fn, func() { CheckDispatchBest(ds.T(), fn, []arrow.DataType{ arrow.PrimitiveTypes.Int64, &arrow.Decimal128Type{Precision: 1}}, []arrow.DataType{&arrow.Decimal128Type{Precision: 19}, &arrow.Decimal128Type{Precision: 1}}) CheckDispatchBest(ds.T(), fn, []arrow.DataType{ &arrow.Decimal128Type{Precision: 1}, arrow.PrimitiveTypes.Int64}, []arrow.DataType{&arrow.Decimal128Type{Precision: 1}, &arrow.Decimal128Type{Precision: 19}}) CheckDispatchBest(ds.T(), fn, []arrow.DataType{ &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, []arrow.DataType{&arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}) CheckDispatchBest(ds.T(), fn, []arrow.DataType{ &arrow.Decimal256Type{Precision: 2, Scale: 1}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}, []arrow.DataType{&arrow.Decimal256Type{Precision: 2, Scale: 1}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}) CheckDispatchBest(ds.T(), fn, []arrow.DataType{ &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}, []arrow.DataType{&arrow.Decimal256Type{Precision: 2, Scale: 1}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}) CheckDispatchBest(ds.T(), fn, []arrow.DataType{ &arrow.Decimal256Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, []arrow.DataType{&arrow.Decimal256Type{Precision: 2, Scale: 1}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}) CheckDispatchBest(ds.T(), fn, []arrow.DataType{ &arrow.Decimal128Type{Precision: 2, Scale: 0}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, []arrow.DataType{&arrow.Decimal128Type{Precision: 2, Scale: 0}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}) CheckDispatchBest(ds.T(), fn, []arrow.DataType{ &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 0}}, []arrow.DataType{&arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 0}}) }) } } { fn := "divide" for _, suffix := range []string{"", "_unchecked"} { fn += suffix ds.Run(fn, func() { CheckDispatchBest(ds.T(), fn, []arrow.DataType{ arrow.PrimitiveTypes.Int64, &arrow.Decimal128Type{Precision: 1, Scale: 0}}, []arrow.DataType{&arrow.Decimal128Type{Precision: 23, Scale: 4}, &arrow.Decimal128Type{Precision: 1, Scale: 0}}) CheckDispatchBest(ds.T(), fn, []arrow.DataType{ &arrow.Decimal128Type{Precision: 1, Scale: 0}, arrow.PrimitiveTypes.Int64}, []arrow.DataType{&arrow.Decimal128Type{Precision: 21, Scale: 20}, &arrow.Decimal128Type{Precision: 19, Scale: 0}}) CheckDispatchBest(ds.T(), fn, []arrow.DataType{ &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, []arrow.DataType{&arrow.Decimal128Type{Precision: 6, Scale: 5}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}) CheckDispatchBest(ds.T(), fn, []arrow.DataType{ &arrow.Decimal256Type{Precision: 2, Scale: 1}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}, []arrow.DataType{&arrow.Decimal256Type{Precision: 6, Scale: 5}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}) CheckDispatchBest(ds.T(), fn, []arrow.DataType{ &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}, []arrow.DataType{&arrow.Decimal256Type{Precision: 6, Scale: 5}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}) CheckDispatchBest(ds.T(), fn, []arrow.DataType{ &arrow.Decimal256Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, []arrow.DataType{&arrow.Decimal256Type{Precision: 6, Scale: 5}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}) CheckDispatchBest(ds.T(), fn, []arrow.DataType{ &arrow.Decimal128Type{Precision: 2, Scale: 0}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, []arrow.DataType{&arrow.Decimal128Type{Precision: 7, Scale: 5}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}) CheckDispatchBest(ds.T(), fn, []arrow.DataType{ &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 0}}, []arrow.DataType{&arrow.Decimal128Type{Precision: 5, Scale: 4}, &arrow.Decimal128Type{Precision: 2, Scale: 0}}) }) } } for _, name := range []string{"power", "power_unchecked", "atan2", "logb", "logb_unchecked"} { ds.Run(name, func() { CheckDispatchBest(ds.T(), name, []arrow.DataType{ &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, []arrow.DataType{arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Float64}) CheckDispatchBest(ds.T(), name, []arrow.DataType{ &arrow.Decimal256Type{Precision: 2, Scale: 1}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}, []arrow.DataType{arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Float64}) CheckDispatchBest(ds.T(), name, []arrow.DataType{ &arrow.Decimal128Type{Precision: 2, Scale: 1}, arrow.PrimitiveTypes.Int64}, []arrow.DataType{arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Float64}) CheckDispatchBest(ds.T(), name, []arrow.DataType{ arrow.PrimitiveTypes.Int32, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, []arrow.DataType{arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Float64}) CheckDispatchBest(ds.T(), name, []arrow.DataType{ &arrow.Decimal128Type{Precision: 2, Scale: 1}, arrow.PrimitiveTypes.Float64}, []arrow.DataType{arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Float64}) CheckDispatchBest(ds.T(), name, []arrow.DataType{ arrow.PrimitiveTypes.Float32, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, []arrow.DataType{arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Float64}) }) } } func (ds *DecimalBinaryArithmeticSuite) TestAddSubtractDec128() { left, _, _ := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 30, Scale: 3}, strings.NewReader(`["1.000", "-123456789012345678901234567.890", "98765432109876543210.987", "-999999999999999999999999999.999"]`)) defer left.Release() right, _, _ := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 20, Scale: 9}, strings.NewReader(`["-1.000000000", "12345678901.234567890", "98765.432101234", "-99999999999.999999999"]`)) defer right.Release() added, _, _ := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 37, Scale: 9}, strings.NewReader(`["0.000000000", "-123456789012345666555555666.655432110", "98765432109876641976.419101234", "-1000000000000000099999999999.998999999"]`)) defer added.Release() subtracted, _, _ := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 37, Scale: 9}, strings.NewReader(`["2.000000000", "-123456789012345691246913469.124567890", "98765432109876444445.554898766", "-999999999999999899999999999.999000001"]`)) defer subtracted.Release() leftDatum, rightDatum := &compute.ArrayDatum{Value: left.Data()}, &compute.ArrayDatum{Value: right.Data()} checkScalarBinary(ds.T(), "add", leftDatum, rightDatum, &compute.ArrayDatum{Value: added.Data()}, nil) checkScalarBinary(ds.T(), "sub", leftDatum, rightDatum, &compute.ArrayDatum{Value: subtracted.Data()}, nil) } func (ds *DecimalBinaryArithmeticSuite) TestAddSubtractDec256() { left, _, _ := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 30, Scale: 20}, strings.NewReader(`[ "-1.00000000000000000001", "1234567890.12345678900000000000", "-9876543210.09876543210987654321", "9999999999.99999999999999999999" ]`)) defer left.Release() right, _, _ := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 30, Scale: 10}, strings.NewReader(`[ "1.0000000000", "-1234567890.1234567890", "6789.5432101234", "99999999999999999999.9999999999" ]`)) defer right.Release() added, _, _ := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 41, Scale: 20}, strings.NewReader(`[ "-0.00000000000000000001", "0.00000000000000000000", "-9876536420.55555530870987654321", "100000000009999999999.99999999989999999999" ]`)) defer added.Release() subtracted, _, _ := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 41, Scale: 20}, strings.NewReader(`[ "-2.00000000000000000001", "2469135780.24691357800000000000", "-9876549999.64197555550987654321", "-99999999989999999999.99999999990000000001" ]`)) defer subtracted.Release() leftDatum, rightDatum := &compute.ArrayDatum{Value: left.Data()}, &compute.ArrayDatum{Value: right.Data()} checkScalarBinary(ds.T(), "add", leftDatum, rightDatum, &compute.ArrayDatum{Value: added.Data()}, nil) checkScalarBinary(ds.T(), "sub", leftDatum, rightDatum, &compute.ArrayDatum{Value: subtracted.Data()}, nil) } func (ds *DecimalBinaryArithmeticSuite) TestAddSubScalars() { ds.Run("scalar_array", func() { left := scalar.NewDecimal128Scalar(decimal128.New(0, 123456), &arrow.Decimal128Type{Precision: 6, Scale: 1}) right, _, _ := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 10, Scale: 3}, strings.NewReader(`["1.234", "1234.000", "-9876.543", "666.888"]`)) defer right.Release() added, _, _ := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 11, Scale: 3}, strings.NewReader(`["12346.834", "13579.600", "2469.057", "13012.488"]`)) defer added.Release() leftSubRight, _, _ := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 11, Scale: 3}, strings.NewReader(`["12344.366", "11111.600", "22222.143", "11678.712"]`)) defer leftSubRight.Release() rightSubLeft, _, _ := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 11, Scale: 3}, strings.NewReader(`["-12344.366", "-11111.600", "-22222.143", "-11678.712"]`)) defer rightSubLeft.Release() rightDatum := &compute.ArrayDatum{right.Data()} addedDatum := &compute.ArrayDatum{added.Data()} checkScalarBinary(ds.T(), "add", compute.NewDatum(left), rightDatum, addedDatum, nil) checkScalarBinary(ds.T(), "add", rightDatum, compute.NewDatum(left), addedDatum, nil) checkScalarBinary(ds.T(), "sub", compute.NewDatum(left), rightDatum, &compute.ArrayDatum{leftSubRight.Data()}, nil) checkScalarBinary(ds.T(), "sub", rightDatum, compute.NewDatum(left), &compute.ArrayDatum{rightSubLeft.Data()}, nil) }) ds.Run("scalar_scalar", func() { left := scalar.NewDecimal256Scalar(decimal256.FromU64(666), &arrow.Decimal256Type{Precision: 3}) right := scalar.NewDecimal256Scalar(decimal256.FromU64(888), &arrow.Decimal256Type{Precision: 3}) added := scalar.NewDecimal256Scalar(decimal256.FromU64(1554), &arrow.Decimal256Type{Precision: 4}) subtracted := scalar.NewDecimal256Scalar(decimal256.FromI64(-222), &arrow.Decimal256Type{Precision: 4}) checkScalarBinary(ds.T(), "add", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(added), nil) checkScalarBinary(ds.T(), "sub", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(subtracted), nil) }) ds.Run("dec128_dec256", func() { left := scalar.NewDecimal128Scalar(decimal128.FromU64(666), &arrow.Decimal128Type{Precision: 3}) right := scalar.NewDecimal256Scalar(decimal256.FromU64(888), &arrow.Decimal256Type{Precision: 3}) added := scalar.NewDecimal256Scalar(decimal256.FromU64(1554), &arrow.Decimal256Type{Precision: 4}) checkScalarBinary(ds.T(), "add", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(added), nil) checkScalarBinary(ds.T(), "add", compute.NewDatum(right), compute.NewDatum(left), compute.NewDatum(added), nil) }) ds.Run("decimal_float", func() { left := scalar.NewDecimal128Scalar(decimal128.FromU64(666), &arrow.Decimal128Type{Precision: 3}) right := scalar.MakeScalar(float64(888)) added := scalar.MakeScalar(float64(1554)) checkScalarBinary(ds.T(), "add", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(added), nil) checkScalarBinary(ds.T(), "add", compute.NewDatum(right), compute.NewDatum(left), compute.NewDatum(added), nil) }) ds.Run("decimal_integer", func() { left := scalar.NewDecimal128Scalar(decimal128.FromU64(666), &arrow.Decimal128Type{Precision: 3}) right := scalar.MakeScalar(int64(888)) added := scalar.NewDecimal128Scalar(decimal128.FromU64(1554), &arrow.Decimal128Type{Precision: 20}) subtracted := scalar.NewDecimal128Scalar(decimal128.FromI64(-222), &arrow.Decimal128Type{Precision: 20}) checkScalarBinary(ds.T(), "add", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(added), nil) checkScalarBinary(ds.T(), "sub", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(subtracted), nil) }) } func (ds *DecimalBinaryArithmeticSuite) TestMultiply() { ds.Run("array x array, decimal128", func() { left, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 20, Scale: 10}, strings.NewReader(`["1234567890.1234567890", "-0.0000000001", "-9999999999.9999999999"]`)) ds.Require().NoError(err) defer left.Release() right, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 13, Scale: 3}, strings.NewReader(`["1234567890.123", "0.001", "-9999999999.999"]`)) ds.Require().NoError(err) defer right.Release() expected, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 34, Scale: 13}, strings.NewReader(`["1524157875323319737.98709039504701", "-0.0000000000001", "99999999999989999999.0000000000001"]`)) ds.Require().NoError(err) defer expected.Release() checkScalarBinary(ds.T(), "multiply_unchecked", &compute.ArrayDatum{left.Data()}, &compute.ArrayDatum{right.Data()}, &compute.ArrayDatum{expected.Data()}, nil) }) ds.Run("array x array decimal256", func() { left, _, err := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 30, Scale: 3}, strings.NewReader(`["123456789012345678901234567.890", "0.000"]`)) ds.Require().NoError(err) defer left.Release() right, _, err := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 20, Scale: 9}, strings.NewReader(`["-12345678901.234567890", "99999999999.999999999"]`)) ds.Require().NoError(err) defer right.Release() expected, _, err := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 51, Scale: 12}, strings.NewReader(`["-1524157875323883675034293577501905199.875019052100", "0.000000000000"]`)) ds.Require().NoError(err) defer expected.Release() checkScalarBinary(ds.T(), "multiply_unchecked", &compute.ArrayDatum{left.Data()}, &compute.ArrayDatum{right.Data()}, &compute.ArrayDatum{expected.Data()}, nil) }) ds.Run("scalar x array", func() { left, err := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 3, Scale: 2}, "3.14") ds.Require().NoError(err) right, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 1, Scale: 0}, strings.NewReader(`["1", "2", "3", "4", "5"]`)) ds.Require().NoError(err) defer right.Release() expected, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 5, Scale: 2}, strings.NewReader(`["3.14", "6.28", "9.42", "12.56", "15.70"]`)) ds.Require().NoError(err) defer expected.Release() leftDatum, rightDatum := &compute.ScalarDatum{left}, &compute.ArrayDatum{right.Data()} expDatum := &compute.ArrayDatum{expected.Data()} checkScalarBinary(ds.T(), "multiply_unchecked", leftDatum, rightDatum, expDatum, nil) checkScalarBinary(ds.T(), "multiply_unchecked", rightDatum, leftDatum, expDatum, nil) }) ds.Run("scalar x scalar", func() { left, err := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 1}, "1") ds.Require().NoError(err) right, err := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 1}, "1") ds.Require().NoError(err) expected, err := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 3}, "1") ds.Require().NoError(err) checkScalarBinary(ds.T(), "multiply_unchecked", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(expected), nil) }) ds.Run("decimal128 x decimal256", func() { left, _ := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 3, Scale: 2}, "6.66") right, _ := scalar.ParseScalar(&arrow.Decimal256Type{Precision: 3, Scale: 1}, "88.8") expected, _ := scalar.ParseScalar(&arrow.Decimal256Type{Precision: 7, Scale: 3}, "591.408") checkScalarBinary(ds.T(), "multiply_unchecked", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(expected), nil) checkScalarBinary(ds.T(), "multiply_unchecked", compute.NewDatum(right), compute.NewDatum(left), compute.NewDatum(expected), nil) }) ds.Run("decimal x float", func() { left, _ := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 3}, "666") right := scalar.MakeScalar(float64(888)) expected := scalar.MakeScalar(float64(591408)) checkScalarBinary(ds.T(), "multiply_unchecked", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(expected), nil) checkScalarBinary(ds.T(), "multiply_unchecked", compute.NewDatum(right), compute.NewDatum(left), compute.NewDatum(expected), nil) }) ds.Run("decimal x integer", func() { left, _ := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 3}, "666") right := scalar.MakeScalar(int64(888)) expected, _ := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 23}, "591408") checkScalarBinary(ds.T(), "multiply_unchecked", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(expected), nil) }) } func (ds *DecimalBinaryArithmeticSuite) TestDivide() { ds.Run("array / array, decimal128", func() { left, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 13, Scale: 3}, strings.NewReader(`["1234567890.123", "0.001"]`)) ds.Require().NoError(err) defer left.Release() right, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 3, Scale: 0}, strings.NewReader(`["-987", "999"]`)) ds.Require().NoError(err) defer right.Release() expected, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 17, Scale: 7}, strings.NewReader(`["-1250828.6627386", "0.0000010"]`)) ds.Require().NoError(err) defer expected.Release() checkScalarBinary(ds.T(), "divide_unchecked", &compute.ArrayDatum{left.Data()}, &compute.ArrayDatum{right.Data()}, &compute.ArrayDatum{expected.Data()}, nil) }) ds.Run("array / array decimal256", func() { left, _, err := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 20, Scale: 10}, strings.NewReader(`["1234567890.1234567890", "9999999999.9999999999"]`)) ds.Require().NoError(err) defer left.Release() right, _, err := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 13, Scale: 3}, strings.NewReader(`["1234567890.123", "0.001"]`)) ds.Require().NoError(err) defer right.Release() expected, _, err := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 34, Scale: 21}, strings.NewReader(`["1.000000000000369999093", "9999999999999.999999900000000000000"]`)) ds.Require().NoError(err) defer expected.Release() checkScalarBinary(ds.T(), "divide_unchecked", &compute.ArrayDatum{left.Data()}, &compute.ArrayDatum{right.Data()}, &compute.ArrayDatum{expected.Data()}, nil) }) ds.Run("scalar / array", func() { left, err := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 1, Scale: 0}, "1") ds.Require().NoError(err) right, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 1, Scale: 0}, strings.NewReader(`["1", "2", "3", "4"]`)) ds.Require().NoError(err) defer right.Release() leftDivRight, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 5, Scale: 4}, strings.NewReader(`["1.0000", "0.5000", "0.3333", "0.2500"]`)) ds.Require().NoError(err) defer leftDivRight.Release() rightDivLeft, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 5, Scale: 4}, strings.NewReader(`["1.0000", "2.0000", "3.0000", "4.0000"]`)) ds.Require().NoError(err) defer rightDivLeft.Release() leftDatum, rightDatum := &compute.ScalarDatum{left}, &compute.ArrayDatum{right.Data()} checkScalarBinary(ds.T(), "divide_unchecked", leftDatum, rightDatum, &compute.ArrayDatum{leftDivRight.Data()}, nil) checkScalarBinary(ds.T(), "divide_unchecked", rightDatum, leftDatum, &compute.ArrayDatum{rightDivLeft.Data()}, nil) }) ds.Run("scalar / scalar", func() { left, err := scalar.ParseScalar(&arrow.Decimal256Type{Precision: 6, Scale: 5}, "2.71828") ds.Require().NoError(err) right, err := scalar.ParseScalar(&arrow.Decimal256Type{Precision: 6, Scale: 5}, "3.14159") ds.Require().NoError(err) expected, err := scalar.ParseScalar(&arrow.Decimal256Type{Precision: 13, Scale: 7}, "0.8652561") ds.Require().NoError(err) checkScalarBinary(ds.T(), "divide_unchecked", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(expected), nil) }) ds.Run("decimal128 / decimal256", func() { left, err := scalar.ParseScalar(&arrow.Decimal256Type{Precision: 6, Scale: 5}, "2.71828") ds.Require().NoError(err) right, err := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 6, Scale: 5}, "3.14159") ds.Require().NoError(err) leftDivRight, err := scalar.ParseScalar(&arrow.Decimal256Type{Precision: 13, Scale: 7}, "0.8652561") ds.Require().NoError(err) rightDivLeft, err := scalar.ParseScalar(&arrow.Decimal256Type{Precision: 13, Scale: 7}, "1.1557271") ds.Require().NoError(err) checkScalarBinary(ds.T(), "divide_unchecked", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(leftDivRight), nil) checkScalarBinary(ds.T(), "divide_unchecked", compute.NewDatum(right), compute.NewDatum(left), compute.NewDatum(rightDivLeft), nil) }) ds.Run("decimal / float", func() { left, _ := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 3}, "100") right := scalar.MakeScalar(float64(50)) leftDivRight := scalar.MakeScalar(float64(2)) rightDivLeft := scalar.MakeScalar(float64(0.5)) checkScalarBinary(ds.T(), "divide_unchecked", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(leftDivRight), nil) checkScalarBinary(ds.T(), "divide_unchecked", compute.NewDatum(right), compute.NewDatum(left), compute.NewDatum(rightDivLeft), nil) }) ds.Run("decimal / integer", func() { left, _ := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 3}, "100") right := scalar.MakeScalar(int64(50)) leftDivRight, _ := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 23, Scale: 20}, "2.0000000000000000000") rightDivLeft, _ := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 23, Scale: 4}, "0.5000") checkScalarBinary(ds.T(), "divide_unchecked", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(leftDivRight), nil) checkScalarBinary(ds.T(), "divide_unchecked", compute.NewDatum(right), compute.NewDatum(left), compute.NewDatum(rightDivLeft), nil) }) } func (ds *DecimalBinaryArithmeticSuite) TestAtan2() { // decimal arguments get promoted to float64, sanity check here fn := "atan2" for _, ty := range ds.positiveScales() { empty := ds.getArr(ty, `[]`) defer empty.Release() ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}, &compute.ArrayDatum{empty.Data()}}) larr := ds.getArr(ty, `["1.00", "10.00", "1.00", "2.00", null]`) defer larr.Release() ldatum := &compute.ArrayDatum{larr.Data()} test := ds.getArr(ty, `["10.00", "10.00", "2.00", "2.00", null]`) defer test.Release() ds.checkDecimalToFloat(fn, []compute.Datum{ldatum, &compute.ArrayDatum{test.Data()}}) test = ds.getArr(&arrow.Decimal128Type{Precision: 4, Scale: 2}, `["10.00", "10.00", "2.00", "2.00", null]`) defer test.Release() ds.checkDecimalToFloat(fn, []compute.Datum{ldatum, &compute.ArrayDatum{test.Data()}}) ds.checkDecimalToFloat(fn, []compute.Datum{ldatum, compute.NewDatum(scalar.MakeScalar(int64(10)))}) ds.checkDecimalToFloat(fn, []compute.Datum{ldatum, compute.NewDatum(scalar.MakeScalar(float64(10)))}) larr = ds.getArr(arrow.PrimitiveTypes.Float64, `[1, 10, 1, 2, null]`) defer larr.Release() sc, _ := scalar.MakeScalarParam("10.00", ty) ds.checkDecimalToFloat(fn, []compute.Datum{ &compute.ArrayDatum{larr.Data()}, compute.NewDatum(sc)}) larr = ds.getArr(arrow.PrimitiveTypes.Int64, `[1, 10, 1, 2, null]`) defer larr.Release() ds.checkDecimalToFloat(fn, []compute.Datum{ &compute.ArrayDatum{larr.Data()}, compute.NewDatum(sc)}) } for _, ty := range ds.negativeScales() { empty := ds.getArr(ty, `[]`) defer empty.Release() ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}, &compute.ArrayDatum{empty.Data()}}) larr := ds.getArr(ty, `["12E2", "42E2", null]`) defer larr.Release() ds.checkDecimalToFloat(fn, []compute.Datum{ &compute.ArrayDatum{larr.Data()}, &compute.ArrayDatum{larr.Data()}}) rarr := ds.getArr(&arrow.Decimal128Type{Precision: 2, Scale: -2}, `["12E2", "42E2", null]`) defer rarr.Release() ds.checkDecimalToFloat(fn, []compute.Datum{ &compute.ArrayDatum{larr.Data()}, &compute.ArrayDatum{rarr.Data()}}) ds.checkDecimalToFloat(fn, []compute.Datum{ &compute.ArrayDatum{larr.Data()}, compute.NewDatum(scalar.MakeScalar(int64(10)))}) } } func (ds *DecimalBinaryArithmeticSuite) TestLogb() { // decimal arguments get promoted to float64, sanity check here for _, fn := range []string{"logb", "logb_unchecked"} { ds.Run(fn, func() { for _, ty := range ds.positiveScales() { empty := ds.getArr(ty, `[]`) defer empty.Release() ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}, &compute.ArrayDatum{empty.Data()}}) larr := ds.getArr(ty, `["1.00", "10.00", "1.00", "2.00", null]`) defer larr.Release() ldatum := &compute.ArrayDatum{larr.Data()} test := ds.getArr(ty, `["10.00", "10.00", "2.00", "2.00", null]`) defer test.Release() ds.checkDecimalToFloat(fn, []compute.Datum{ldatum, &compute.ArrayDatum{test.Data()}}) test = ds.getArr(&arrow.Decimal128Type{Precision: 4, Scale: 2}, `["10.00", "10.00", "2.00", "2.00", null]`) defer test.Release() ds.checkDecimalToFloat(fn, []compute.Datum{ldatum, &compute.ArrayDatum{test.Data()}}) ds.checkDecimalToFloat(fn, []compute.Datum{ldatum, compute.NewDatum(scalar.MakeScalar(int64(10)))}) ds.checkDecimalToFloat(fn, []compute.Datum{ldatum, compute.NewDatum(scalar.MakeScalar(float64(10)))}) larr = ds.getArr(arrow.PrimitiveTypes.Float64, `[1, 10, 1, 2, null]`) defer larr.Release() sc, _ := scalar.MakeScalarParam("10.00", ty) ds.checkDecimalToFloat(fn, []compute.Datum{ &compute.ArrayDatum{larr.Data()}, compute.NewDatum(sc)}) larr = ds.getArr(arrow.PrimitiveTypes.Int64, `[1, 10, 1, 2, null]`) defer larr.Release() ds.checkDecimalToFloat(fn, []compute.Datum{ &compute.ArrayDatum{larr.Data()}, compute.NewDatum(sc)}) } for _, ty := range ds.negativeScales() { empty := ds.getArr(ty, `[]`) defer empty.Release() ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}, &compute.ArrayDatum{empty.Data()}}) larr := ds.getArr(ty, `["12E2", "42E2", null]`) defer larr.Release() ds.checkDecimalToFloat(fn, []compute.Datum{ &compute.ArrayDatum{larr.Data()}, &compute.ArrayDatum{larr.Data()}}) rarr := ds.getArr(&arrow.Decimal128Type{Precision: 2, Scale: -2}, `["12E2", "42E2", null]`) defer rarr.Release() ds.checkDecimalToFloat(fn, []compute.Datum{ &compute.ArrayDatum{larr.Data()}, &compute.ArrayDatum{rarr.Data()}}) ds.checkDecimalToFloat(fn, []compute.Datum{ &compute.ArrayDatum{larr.Data()}, compute.NewDatum(scalar.MakeScalar(int64(10)))}) } }) } } type DecimalUnaryArithmeticSuite struct { DecimalArithmeticSuite } func (ds *DecimalUnaryArithmeticSuite) TestAbsoluteValue() { max128 := decimal128.GetMaxValue(38) max256 := decimal256.GetMaxValue(76) ds.Run("decimal", func() { for _, fn := range []string{"abs_unchecked", "abs"} { ds.Run(fn, func() { for _, ty := range ds.positiveScales() { ds.Run(ty.String(), func() { empty, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`[]`)) defer empty.Release() in, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`["1.00", "-42.15", null]`)) defer in.Release() exp, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`["1.00", "42.15", null]`)) defer exp.Release() checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, nil) checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{in.Data()}}, &compute.ArrayDatum{exp.Data()}, nil) }) } checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(scalar.NewDecimal128Scalar(max128.Negate(), &arrow.Decimal128Type{Precision: 38}))}, compute.NewDatum(scalar.NewDecimal128Scalar(max128, &arrow.Decimal128Type{Precision: 38})), nil) checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(scalar.NewDecimal256Scalar(max256.Negate(), &arrow.Decimal256Type{Precision: 76}))}, compute.NewDatum(scalar.NewDecimal256Scalar(max256, &arrow.Decimal256Type{Precision: 76})), nil) for _, ty := range ds.negativeScales() { ds.Run(ty.String(), func() { empty, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`[]`)) defer empty.Release() in, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`["12E2", "-42E2", null]`)) defer in.Release() exp, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`["12E2", "42E2", null]`)) defer exp.Release() checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, nil) checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{in.Data()}}, &compute.ArrayDatum{exp.Data()}, nil) }) } }) } }) } func (ds *DecimalUnaryArithmeticSuite) TestNegate() { max128 := decimal128.GetMaxValue(38) max256 := decimal256.GetMaxValue(76) for _, fn := range []string{"negate_unchecked", "negate"} { ds.Run(fn, func() { for _, ty := range ds.positiveScales() { empty, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`[]`)) defer empty.Release() in, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`["0.00", "1.00", "-42.15", null]`)) defer in.Release() exp, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`["0.00", "-1.00", "42.15", null]`)) defer exp.Release() checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, nil) checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{in.Data()}}, &compute.ArrayDatum{exp.Data()}, nil) } checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(scalar.NewDecimal128Scalar(max128.Negate(), &arrow.Decimal128Type{Precision: 38}))}, compute.NewDatum(scalar.NewDecimal128Scalar(max128, &arrow.Decimal128Type{Precision: 38})), nil) checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(scalar.NewDecimal256Scalar(max256.Negate(), &arrow.Decimal256Type{Precision: 76}))}, compute.NewDatum(scalar.NewDecimal256Scalar(max256, &arrow.Decimal256Type{Precision: 76})), nil) checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(scalar.NewDecimal128Scalar(max128, &arrow.Decimal128Type{Precision: 38}))}, compute.NewDatum(scalar.NewDecimal128Scalar(max128.Negate(), &arrow.Decimal128Type{Precision: 38})), nil) checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(scalar.NewDecimal256Scalar(max256, &arrow.Decimal256Type{Precision: 76}))}, compute.NewDatum(scalar.NewDecimal256Scalar(max256.Negate(), &arrow.Decimal256Type{Precision: 76})), nil) for _, ty := range ds.negativeScales() { ds.Run(ty.String(), func() { empty, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`[]`)) defer empty.Release() in, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`["0", "12E2", "-42E2", null]`)) defer in.Release() exp, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`["0", "-12E2", "42E2", null]`)) defer exp.Release() checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, nil) checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{in.Data()}}, &compute.ArrayDatum{exp.Data()}, nil) }) } }) } } func (ds *DecimalUnaryArithmeticSuite) TestSquareRoot() { for _, fn := range []string{"sqrt_unchecked", "sqrt"} { ds.Run(fn, func() { for _, ty := range ds.positiveScales() { ds.Run(ty.String(), func() { empty := ds.decimalArrayFromJSON(ty, `[]`) defer empty.Release() arr := ds.decimalArrayFromJSON(ty, `["4.00", "16.00", "36.00", null]`) defer arr.Release() ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{Value: empty.Data()}}) ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{Value: arr.Data()}}) neg := ds.decimalArrayFromJSON(ty, `["-2.00"]`) defer neg.Release() ds.checkFail("sqrt", []compute.Datum{&compute.ArrayDatum{Value: neg.Data()}}, "square root of negative number", nil) }) } for _, ty := range ds.negativeScales() { ds.Run(ty.String(), func() { empty := ds.decimalArrayFromJSON(ty, `[]`) defer empty.Release() arr := ds.decimalArrayFromJSON(ty, `["400", "1600", "3600", null]`) defer arr.Release() ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{Value: empty.Data()}}) ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{Value: arr.Data()}}) neg := ds.decimalArrayFromJSON(ty, `["-400"]`) defer neg.Release() ds.checkFail("sqrt", []compute.Datum{&compute.ArrayDatum{Value: neg.Data()}}, "square root of negative number", nil) }) } }) } } func (ds *DecimalUnaryArithmeticSuite) TestSign() { max128 := decimal128.GetMaxValue(38) max256 := decimal256.GetMaxValue(76) for _, ty := range ds.positiveScales() { empty := ds.decimalArrayFromJSON(ty, `[]`) defer empty.Release() emptyOut := ds.decimalArrayFromJSON(arrow.PrimitiveTypes.Int64, `[]`) defer emptyOut.Release() in := ds.decimalArrayFromJSON(ty, `["1.00", "0.00", "-42.15", null]`) defer in.Release() exp := ds.decimalArrayFromJSON(arrow.PrimitiveTypes.Int64, `[1, 0, -1, null]`) defer exp.Release() checkScalar(ds.T(), "sign", []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{emptyOut.Data()}, nil) checkScalar(ds.T(), "sign", []compute.Datum{&compute.ArrayDatum{in.Data()}}, &compute.ArrayDatum{exp.Data()}, nil) } checkScalar(ds.T(), "sign", []compute.Datum{compute.NewDatum( scalar.NewDecimal128Scalar(max128, &arrow.Decimal128Type{Precision: 38}))}, compute.NewDatum(scalar.MakeScalar(int64(1))), nil) checkScalar(ds.T(), "sign", []compute.Datum{compute.NewDatum( scalar.NewDecimal128Scalar(max128.Negate(), &arrow.Decimal128Type{Precision: 38}))}, compute.NewDatum(scalar.MakeScalar(int64(-1))), nil) checkScalar(ds.T(), "sign", []compute.Datum{compute.NewDatum( scalar.NewDecimal256Scalar(max256, &arrow.Decimal256Type{Precision: 38}))}, compute.NewDatum(scalar.MakeScalar(int64(1))), nil) checkScalar(ds.T(), "sign", []compute.Datum{compute.NewDatum( scalar.NewDecimal256Scalar(max256.Negate(), &arrow.Decimal256Type{Precision: 38}))}, compute.NewDatum(scalar.MakeScalar(int64(-1))), nil) for _, ty := range ds.negativeScales() { empty := ds.decimalArrayFromJSON(ty, `[]`) defer empty.Release() emptyOut := ds.decimalArrayFromJSON(arrow.PrimitiveTypes.Int64, `[]`) defer emptyOut.Release() in := ds.decimalArrayFromJSON(ty, `["12e2", "0.00", "-42E2", null]`) defer in.Release() exp := ds.decimalArrayFromJSON(arrow.PrimitiveTypes.Int64, `[1, 0, -1, null]`) defer exp.Release() checkScalar(ds.T(), "sign", []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{emptyOut.Data()}, nil) checkScalar(ds.T(), "sign", []compute.Datum{&compute.ArrayDatum{in.Data()}}, &compute.ArrayDatum{exp.Data()}, nil) } } func (ds *DecimalUnaryArithmeticSuite) TestTrigAcosAsin() { for _, fn := range []string{"acos", "acos_unchecked", "asin", "asin_unchecked"} { ds.Run(fn, func() { for _, ty := range ds.positiveScales() { ds.Run(ty.String(), func() { empty := ds.decimalArrayFromJSON(ty, `[]`) defer empty.Release() vals := ds.decimalArrayFromJSON(ty, `["0.00", "-1.00", "1.00", null]`) defer vals.Release() ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}) ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{vals.Data()}}) }) } }) } for _, fn := range []string{"acos", "asin"} { ds.Run(fn, func() { for _, ty := range ds.negativeScales() { ds.Run(ty.String(), func() { arr := ds.decimalArrayFromJSON(ty, `["12E2", "-42E2", null]`) defer arr.Release() ds.checkDecimalToFloat(fn+"_unchecked", []compute.Datum{&compute.ArrayDatum{arr.Data()}}) ds.checkFail(fn, []compute.Datum{&compute.ArrayDatum{arr.Data()}}, "domain error", nil) }) } }) } } func (ds *DecimalUnaryArithmeticSuite) TestAtan() { fn := "atan" for _, ty := range ds.positiveScales() { ds.Run(ty.String(), func() { empty := ds.decimalArrayFromJSON(ty, `[]`) defer empty.Release() vals := ds.decimalArrayFromJSON(ty, `["0.00", "-1.00", "1.00", null]`) defer vals.Release() ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}) ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{vals.Data()}}) }) } for _, ty := range ds.negativeScales() { ds.Run(ty.String(), func() { empty := ds.decimalArrayFromJSON(ty, `[]`) defer empty.Release() vals := ds.decimalArrayFromJSON(ty, `["12E2", "-42E2", null]`) defer vals.Release() ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}) ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{vals.Data()}}) }) } } func (ds *DecimalUnaryArithmeticSuite) TestTrig() { for _, fn := range []string{"cos", "sin", "tan"} { for _, suffix := range []string{"", "_unchecked"} { fn += suffix ds.Run(fn, func() { for _, ty := range ds.positiveScales() { ds.Run(ty.String(), func() { empty := ds.decimalArrayFromJSON(ty, `[]`) defer empty.Release() vals := ds.decimalArrayFromJSON(ty, `["0.00", "-1.00", "1.00", null]`) defer vals.Release() ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}) ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{vals.Data()}}) }) } for _, ty := range ds.negativeScales() { ds.Run(ty.String(), func() { empty := ds.decimalArrayFromJSON(ty, `[]`) defer empty.Release() vals := ds.decimalArrayFromJSON(ty, `["12E2", "-42E2", null]`) defer vals.Release() ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}) ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{vals.Data()}}) }) } }) } } } func (ds *DecimalUnaryArithmeticSuite) TestRound() { options := compute.RoundOptions{NDigits: 2, Mode: compute.RoundDown} cases := []struct { mode compute.RoundMode exp string }{ {compute.RoundDown, `["1.010", "1.010", "1.010", "1.010", "-1.010", "-1.020", "-1.020", "-1.020", null]`}, {compute.RoundUp, `["1.010", "1.020", "1.020", "1.020", "-1.010", "-1.010", "-1.010", "-1.010", null]`}, {compute.RoundTowardsZero, `["1.010", "1.010", "1.010", "1.010", "-1.010", "-1.010", "-1.010", "-1.010", null]`}, {compute.RoundTowardsInfinity, `["1.010", "1.020", "1.020", "1.020", "-1.010", "-1.020", "-1.020", "-1.020", null]`}, {compute.RoundHalfDown, `["1.010", "1.010", "1.010", "1.020", "-1.010", "-1.010", "-1.020", "-1.020", null]`}, {compute.RoundHalfUp, `["1.010", "1.010", "1.020", "1.020", "-1.010", "-1.010", "-1.010", "-1.020", null]`}, {compute.RoundHalfTowardsZero, `["1.010", "1.010", "1.010", "1.020", "-1.010", "-1.010", "-1.010", "-1.020", null]`}, {compute.RoundHalfTowardsInfinity, `["1.010", "1.010", "1.020", "1.020", "-1.010", "-1.010", "-1.020", "-1.020", null]`}, {compute.RoundHalfToEven, `["1.010", "1.010", "1.020", "1.020", "-1.010", "-1.010", "-1.020", "-1.020", null]`}, {compute.RoundHalfToOdd, `["1.010", "1.010", "1.010", "1.020", "-1.010", "-1.010", "-1.010", "-1.020", null]`}, } fn := "round" for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 4, Scale: 3}, &arrow.Decimal256Type{Precision: 4, Scale: 3}} { ds.Run(ty.String(), func() { values := ds.getArr(ty, `["1.010", "1.012", "1.015", "1.019", "-1.010", "-1.012", "-1.015", "-1.019", null]`) defer values.Release() for _, tt := range cases { ds.Run(tt.mode.String(), func() { options.Mode = tt.mode exp := ds.getArr(ty, tt.exp) defer exp.Release() checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{values.Data()}}, &compute.ArrayDatum{exp.Data()}, options) }) } }) } } func (ds *DecimalUnaryArithmeticSuite) TestRoundTowardsInfinity() { fn := "round" options := compute.RoundOptions{NDigits: 0, Mode: compute.RoundTowardsInfinity} for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 4, Scale: 2}, &arrow.Decimal256Type{Precision: 4, Scale: 2}} { ds.Run(ty.String(), func() { empty := ds.getArr(ty, `[]`) defer empty.Release() vals := ds.getArr(ty, `["1.00", "1.99", "1.01", "-42.00", "-42.99", "-42.15", null]`) defer vals.Release() checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, options) input := []compute.Datum{&compute.ArrayDatum{vals.Data()}} options.NDigits = 0 exp0 := ds.getArr(ty, `["1.00", "2.00", "2.00", "-42.00", "-43.00", "-43.00", null]`) defer exp0.Release() checkScalar(ds.T(), fn, input, &compute.ArrayDatum{exp0.Data()}, options) exp1 := ds.getArr(ty, `["1.00", "2.00", "1.10", "-42.00", "-43.00", "-42.20", null]`) defer exp1.Release() options.NDigits = 1 checkScalar(ds.T(), fn, input, &compute.ArrayDatum{exp1.Data()}, options) options.NDigits = 2 checkScalar(ds.T(), fn, input, &compute.ArrayDatum{vals.Data()}, options) options.NDigits = 4 checkScalar(ds.T(), fn, input, &compute.ArrayDatum{vals.Data()}, options) options.NDigits = 100 checkScalar(ds.T(), fn, input, &compute.ArrayDatum{vals.Data()}, options) options.NDigits = -1 neg := ds.getArr(ty, `["10.00", "10.00", "10.00", "-50.00", "-50.00", "-50.00", null]`) defer neg.Release() checkScalar(ds.T(), fn, input, &compute.ArrayDatum{neg.Data()}, options) options.NDigits = -2 ds.checkFail(fn, input, "rounding to -2 digits will not fit in precision", options) options.NDigits = -1 noprec := ds.getArr(ty, `["99.99"]`) defer noprec.Release() ds.checkFail(fn, []compute.Datum{&compute.ArrayDatum{noprec.Data()}}, "rounded value 100.00 does not fit in precision", options) }) } for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 2, Scale: -2}, &arrow.Decimal256Type{Precision: 2, Scale: -2}} { ds.Run(ty.String(), func() { values := ds.getArr(ty, `["10E2", "12E2", "18E2", "-10E2", "-12E2", "-18E2", null]`) defer values.Release() input := &compute.ArrayDatum{values.Data()} options.NDigits = 0 checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) options.NDigits = 2 checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) options.NDigits = 100 checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) options.NDigits = -1 checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) options.NDigits = -2 checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) options.NDigits = -3 res := ds.getArr(ty, `["10E2", "20E2", "20E2", "-10E2", "-20E2", "-20E2", null]`) defer res.Release() checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{res.Data()}, options) options.NDigits = -4 ds.checkFail(fn, []compute.Datum{input}, "rounding to -4 digits will not fit in precision", options) }) } } func (ds *DecimalUnaryArithmeticSuite) TestRoundHalfToEven() { fn := "round" options := compute.RoundOptions{NDigits: 0, Mode: compute.RoundHalfToEven} for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 4, Scale: 2}, &arrow.Decimal256Type{Precision: 4, Scale: 2}} { ds.Run(ty.String(), func() { empty := ds.getArr(ty, `[]`) defer empty.Release() checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, options) values := ds.getArr(ty, `["1.00", "5.99", "1.01", "-42.00", "-42.99", "-42.15", "1.50", "2.50", "-5.50", "-2.55", null]`) defer values.Release() input := &compute.ArrayDatum{values.Data()} exp0 := ds.getArr(ty, `["1.00", "6.00", "1.00", "-42.00", "-43.00", "-42.00", "2.00", "2.00", "-6.00", "-3.00", null]`) defer exp0.Release() exp1 := ds.getArr(ty, `["1.00", "6.00", "1.00", "-42.00", "-43.00", "-42.20", "1.50", "2.50", "-5.50", "-2.60", null]`) defer exp1.Release() expNeg1 := ds.getArr(ty, `["0.00", "10.00", "0.00", "-40.00", "-40.00", "-40.00", "0.00", "0.00", "-10.00", "0.00", null]`) defer expNeg1.Release() options.NDigits = 0 checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp0.Data()}, options) options.NDigits = 1 checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp1.Data()}, options) options.NDigits = 2 checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) options.NDigits = 4 checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) options.NDigits = 100 checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) options.NDigits = -1 checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{expNeg1.Data()}, options) options.NDigits = -2 ds.checkFail(fn, []compute.Datum{input}, "rounding to -2 digits will not fit in precision", options) options.NDigits = -1 noprec := ds.getArr(ty, `["99.99"]`) defer noprec.Release() ds.checkFail(fn, []compute.Datum{&compute.ArrayDatum{noprec.Data()}}, "rounded value 100.00 does not fit in precision", options) }) } for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 2, Scale: -2}, &arrow.Decimal256Type{Precision: 2, Scale: -2}} { ds.Run(ty.String(), func() { values := ds.getArr(ty, `["5E2", "10E2", "12E2", "15E2", "18E2", "-10E2", "-12E2", "-15E2", "-18E2", null]`) defer values.Release() input := &compute.ArrayDatum{values.Data()} options.NDigits = 0 checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) options.NDigits = 2 checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) options.NDigits = 100 checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) options.NDigits = -1 checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) options.NDigits = -2 checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) options.NDigits = -3 res := ds.getArr(ty, `["0", "10E2", "10E2", "20E2", "20E2", "-10E2", "-10E2", "-20E2", "-20E2", null]`) defer res.Release() checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{res.Data()}, options) options.NDigits = -4 ds.checkFail(fn, []compute.Datum{input}, "rounding to -4 digits will not fit in precision", options) }) } } func (ds *DecimalUnaryArithmeticSuite) TestRoundCeil() { fn := "ceil" for _, ty := range ds.positiveScales() { ds.Run(ty.String(), func() { empty := ds.getArr(ty, `[]`) defer empty.Release() checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, nil) in := ds.getArr(ty, `["1.00", "1.99", "1.01", "-42.00", "-42.99", "-42.15", null]`) defer in.Release() out := ds.getArr(ty, `["1.00", "2.00", "2.00", "-42.00", "-42.00", "-42.00", null]`) defer out.Release() checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{in.Data()}}, &compute.ArrayDatum{out.Data()}, nil) }) } for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 4, Scale: 2}, &arrow.Decimal256Type{Precision: 4, Scale: 2}} { ds.Run(ty.String(), func() { sc, _ := scalar.MakeScalarParam("99.99", ty) ds.checkFail(fn, []compute.Datum{compute.NewDatum(sc)}, "rounded value 100.00 does not fit in precision of decimal", nil) sc, _ = scalar.MakeScalarParam("-99.99", ty) out, _ := scalar.MakeScalarParam("-99.00", ty) checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(sc)}, compute.NewDatum(out), nil) }) } for _, ty := range ds.negativeScales() { ds.Run(ty.String(), func() { empty := ds.getArr(ty, `[]`) defer empty.Release() checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, nil) ex := ds.getArr(ty, `["12E2", "-42E2", null]`) defer ex.Release() checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{ex.Data()}}, &compute.ArrayDatum{ex.Data()}, nil) }) } } func (ds *DecimalUnaryArithmeticSuite) TestRoundFloor() { fn := "floor" for _, ty := range ds.positiveScales() { ds.Run(ty.String(), func() { empty := ds.getArr(ty, `[]`) defer empty.Release() checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, nil) in := ds.getArr(ty, `["1.00", "1.99", "1.01", "-42.00", "-42.99", "-42.15", null]`) defer in.Release() out := ds.getArr(ty, `["1.00", "1.00", "1.00", "-42.00", "-43.00", "-43.00", null]`) defer out.Release() checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{in.Data()}}, &compute.ArrayDatum{out.Data()}, nil) }) } for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 4, Scale: 2}, &arrow.Decimal256Type{Precision: 4, Scale: 2}} { ds.Run(ty.String(), func() { sc, _ := scalar.MakeScalarParam("-99.99", ty) ds.checkFail(fn, []compute.Datum{compute.NewDatum(sc)}, "rounded value -100.00 does not fit in precision of decimal", nil) sc, _ = scalar.MakeScalarParam("99.99", ty) out, _ := scalar.MakeScalarParam("99.00", ty) checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(sc)}, compute.NewDatum(out), nil) }) } for _, ty := range ds.negativeScales() { ds.Run(ty.String(), func() { empty := ds.getArr(ty, `[]`) defer empty.Release() checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, nil) ex := ds.getArr(ty, `["12E2", "-42E2", null]`) defer ex.Release() checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{ex.Data()}}, &compute.ArrayDatum{ex.Data()}, nil) }) } } func (ds *DecimalUnaryArithmeticSuite) TestRoundTrunc() { fn := "trunc" for _, ty := range ds.positiveScales() { ds.Run(ty.String(), func() { empty := ds.getArr(ty, `[]`) defer empty.Release() checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, nil) in := ds.getArr(ty, `["1.00", "1.99", "1.01", "-42.00", "-42.99", "-42.15", null]`) defer in.Release() out := ds.getArr(ty, `["1.00", "1.00", "1.00", "-42.00", "-42.00", "-42.00", null]`) defer out.Release() checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{in.Data()}}, &compute.ArrayDatum{out.Data()}, nil) }) } for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 4, Scale: 2}, &arrow.Decimal256Type{Precision: 4, Scale: 2}} { ds.Run(ty.String(), func() { sc, _ := scalar.MakeScalarParam("99.99", ty) out, _ := scalar.MakeScalarParam("99.00", ty) checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(sc)}, compute.NewDatum(out), nil) sc, _ = scalar.MakeScalarParam("-99.99", ty) out, _ = scalar.MakeScalarParam("-99.00", ty) checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(sc)}, compute.NewDatum(out), nil) }) } for _, ty := range ds.negativeScales() { ds.Run(ty.String(), func() { empty := ds.getArr(ty, `[]`) defer empty.Release() checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, nil) ex := ds.getArr(ty, `["12E2", "-42E2", null]`) defer ex.Release() checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{ex.Data()}}, &compute.ArrayDatum{ex.Data()}, nil) }) } } func (ds *DecimalUnaryArithmeticSuite) TestRoundToMultiple() { fn := "round_to_multiple" var options compute.RoundToMultipleOptions for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 4, Scale: 2}, &arrow.Decimal256Type{Precision: 4, Scale: 2}} { ds.Run(ty.String(), func() { if ty.ID() == arrow.DECIMAL128 { options.Multiple, _ = scalar.MakeScalarParam(decimal128.FromI64(200), ty) } else { options.Multiple, _ = scalar.MakeScalarParam(decimal256.FromI64(200), ty) } values := ds.getArr(ty, `["-3.50", "-3.00", "-2.50", "-2.00", "-1.50", "-1.00", "-0.50", "0.00", "0.50", "1.00", "1.50", "2.00", "2.50", "3.00", "3.50", null]`) defer values.Release() input := []compute.Datum{&compute.ArrayDatum{values.Data()}} tests := []struct { mode compute.RoundMode exp string }{ {compute.RoundDown, `["-4.00", "-4.00", "-4.00", "-2.00", "-2.00", "-2.00", "-2.00", "0.00", "0.00", "0.00", "0.00", "2.00", "2.00", "2.00", "2.00", null]`}, {compute.RoundUp, `["-2.00", "-2.00", "-2.00", "-2.00", "-0.00", "-0.00", "-0.00", "0.00", "2.00", "2.00", "2.00", "2.00", "4.00", "4.00", "4.00", null]`}, {compute.RoundTowardsZero, `["-2.00", "-2.00", "-2.00", "-2.00", "-0.00", "-0.00", "-0.00", "0.00", "0.00", "0.00", "0.00", "2.00", "2.00", "2.00", "2.00", null]`}, {compute.RoundTowardsInfinity, `["-4.00", "-4.00", "-4.00", "-2.00", "-2.00", "-2.00", "-2.00", "0.00", "2.00", "2.00", "2.00", "2.00", "4.00", "4.00", "4.00", null]`}, {compute.RoundHalfDown, `["-4.00", "-4.00", "-2.00", "-2.00", "-2.00", "-2.00", "-0.00", "0.00", "0.00", "0.00", "2.00", "2.00", "2.00", "2.00", "4.00", null]`}, {compute.RoundHalfUp, `["-4.00", "-2.00", "-2.00", "-2.00", "-2.00", "-0.00", "-0.00", "0.00", "0.00", "2.00", "2.00", "2.00", "2.00", "4.00", "4.00", null]`}, {compute.RoundHalfTowardsZero, `["-4.00", "-2.00", "-2.00", "-2.00", "-2.00", "-0.00", "-0.00", "0.00", "0.00", "0.00", "2.00", "2.00", "2.00", "2.00", "4.00", null]`}, {compute.RoundHalfTowardsInfinity, `["-4.00", "-4.00", "-2.00", "-2.00", "-2.00", "-2.00", "-0.00", "0.00", "0.00", "2.00", "2.00", "2.00", "2.00", "4.00", "4.00", null]`}, {compute.RoundHalfToEven, `["-4.00", "-4.00", "-2.00", "-2.00", "-2.00", "-0.00", "-0.00", "0.00", "0.00", "0.00", "2.00", "2.00", "2.00", "4.00", "4.00", null]`}, {compute.RoundHalfToOdd, `["-4.00", "-2.00", "-2.00", "-2.00", "-2.00", "-2.00", "-0.00", "0.00", "0.00", "2.00", "2.00", "2.00", "2.00", "2.00", "4.00", null]`}, } for _, tt := range tests { ds.Run(tt.mode.String(), func() { options.Mode = tt.mode result := ds.getArr(ty, tt.exp) defer result.Release() checkScalar(ds.T(), fn, input, &compute.ArrayDatum{result.Data()}, options) }) } }) } } func (ds *DecimalUnaryArithmeticSuite) TestRoundToMultipleTowardsInfinity() { fn := "round_to_multiple" options := compute.RoundToMultipleOptions{Mode: compute.RoundTowardsInfinity} setMultiple := func(ty arrow.DataType, val int64) { if ty.ID() == arrow.DECIMAL128 { options.Multiple = scalar.NewDecimal128Scalar(decimal128.FromI64(val), ty) } else { options.Multiple = scalar.NewDecimal256Scalar(decimal256.FromI64(val), ty) } } for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 4, Scale: 2}, &arrow.Decimal256Type{Precision: 4, Scale: 2}} { ds.Run(ty.String(), func() { empty := ds.getArr(ty, `[]`) defer empty.Release() values := ds.getArr(ty, `["1.00", "1.99", "1.01", "-42.00", "-42.99", "-42.15", null]`) defer values.Release() input := &compute.ArrayDatum{values.Data()} setMultiple(ty, 25) checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, options) exp25 := ds.getArr(ty, `["1.00", "2.00", "1.25", "-42.00", "-43.00", "-42.25", null]`) defer exp25.Release() checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp25.Data()}, options) setMultiple(ty, 1) checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) setMultiple(&arrow.Decimal128Type{Precision: 2, Scale: 0}, 2) exp20 := ds.getArr(ty, `["2.00", "2.00", "2.00", "-42.00", "-44.00", "-44.00", null]`) defer exp20.Release() checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp20.Data()}, options) setMultiple(ty, 0) ds.checkFail(fn, []compute.Datum{input}, "rounding multiple must be positive", options) options.Multiple = scalar.NewDecimal128Scalar(decimal128.Num{}, &arrow.Decimal128Type{Precision: 4, Scale: 2}) ds.checkFail(fn, []compute.Datum{input}, "rounding multiple must be positive", options) tester := ds.getArr(ty, `["99.99"]`) defer tester.Release() testDatum := &compute.ArrayDatum{tester.Data()} setMultiple(ty, -10) ds.checkFail(fn, []compute.Datum{testDatum}, "rounding multiple must be positive", options) setMultiple(ty, 100) ds.checkFail(fn, []compute.Datum{testDatum}, "rounded value 100.00 does not fit in precision", options) options.Multiple = scalar.NewFloat64Scalar(1) ds.checkFail(fn, []compute.Datum{testDatum}, "rounded value 100.00 does not fit in precision", options) options.Multiple = scalar.MakeNullScalar(&arrow.Decimal128Type{Precision: 3}) ds.checkFail(fn, []compute.Datum{testDatum}, "rounding multiple must be non-null and valid", options) options.Multiple = nil ds.checkFail(fn, []compute.Datum{testDatum}, "rounding multiple must be non-null and valid", options) }) } for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 2, Scale: -2}, &arrow.Decimal256Type{Precision: 2, Scale: -2}} { ds.Run(ty.String(), func() { values := ds.getArr(ty, `["10E2", "12E2", "18E2", "-10E2", "-12E2", "-18E2", null]`) defer values.Release() input := &compute.ArrayDatum{values.Data()} setMultiple(ty, 4) exp := ds.getArr(ty, `["12E2", "12E2", "20E2", "-12E2", "-12E2", "-20E2", null]`) defer exp.Release() checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp.Data()}, options) setMultiple(ty, 1) checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) }) } } func (ds *DecimalUnaryArithmeticSuite) TestRoundToMultipleHalfToOdd() { fn := "round_to_multiple" options := compute.RoundToMultipleOptions{Mode: compute.RoundHalfToOdd} setMultiple := func(ty arrow.DataType, val int64) { if ty.ID() == arrow.DECIMAL128 { options.Multiple = scalar.NewDecimal128Scalar(decimal128.FromI64(val), ty) } else { options.Multiple = scalar.NewDecimal256Scalar(decimal256.FromI64(val), ty) } } for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 4, Scale: 2}, &arrow.Decimal256Type{Precision: 4, Scale: 2}} { empty := ds.getArr(ty, `[]`) defer empty.Release() values := ds.getArr(ty, `["-0.38", "-0.37", "-0.25", "-0.13", "-0.12", "0.00", "0.12", "0.13", "0.25", "0.37", "0.38", null]`) defer values.Release() input := &compute.ArrayDatum{values.Data()} // there is no exact halfway point, check what happens setMultiple(ty, 25) checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, options) exp25 := ds.getArr(ty, `["-0.50", "-0.25", "-0.25", "-0.25", "-0.00", "0.00", "0.00", "0.25", "0.25", "0.25", "0.50", null]`) defer exp25.Release() checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp25.Data()}, options) setMultiple(ty, 1) checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) setMultiple(ty, 24) checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, options) exp24 := ds.getArr(ty, `["-0.48", "-0.48", "-0.24", "-0.24", "-0.24", "0.00", "0.24", "0.24", "0.24", "0.48", "0.48", null]`) defer exp24.Release() checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp24.Data()}, options) setMultiple(&arrow.Decimal128Type{Precision: 3, Scale: 1}, 1) exp1 := ds.getArr(ty, `["-0.40", "-0.40", "-0.30", "-0.10", "-0.10", "0.00", "0.10", "0.10", "0.30", "0.40", "0.40", null]`) defer exp1.Release() checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp1.Data()}, options) } for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 2, Scale: -2}, &arrow.Decimal256Type{Precision: 2, Scale: -2}} { values := ds.getArr(ty, `["10E2", "12E2", "18E2", "-10E2", "-12E2", "-18E2", null]`) defer values.Release() exp4 := ds.getArr(ty, `["12E2", "12E2", "20E2", "-12E2", "-12E2", "-20E2", null]`) defer exp4.Release() exp5 := ds.getArr(ty, `["10E2", "10E2", "20E2", "-10E2", "-10E2", "-20E2", null]`) defer exp5.Release() input := &compute.ArrayDatum{values.Data()} setMultiple(ty, 4) checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp4.Data()}, options) setMultiple(ty, 5) checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp5.Data()}, options) setMultiple(ty, 1) checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) } } type ScalarBinaryTemporalArithmeticSuite struct { BinaryFuncTestSuite } var ( date32JSON = `[0, 11016, -25932, 23148, 18262, 18261, 18260, 14609, 14610, 14612, 14613, 13149, 13148, 14241, 14242, 15340, null]` date32JSON2 = `[365, 10650, -25901, 23118, 18263, 18259, 18260, 14609, 14610, 14612, 14613, 13149, 13148, 14240, 13937, 15400, null]` date64JSON = `[0, 951782400000, -2240524800000, 1999987200000, 1577836800000, 1577750400000, 1577664000000, 1262217600000, 1262304000000, 1262476800000, 1262563200000, 1136073600000, 1135987200000, 1230422400000, 1230508800000, 1325376000000, null]` date64JSON2 = `[31536000000, 920160000000, -2237846400000, 1997395200000, 1577923200000, 1577577600000, 1577664000000, 1262217600000, 1262304000000, 1262476800000, 1262563200000, 1136073600000, 1135987200000, 1230336000000, 1204156800000, 1330560000000, null]` timeJSONs = `[59, 84203, 3560, 12800, 3905, 7810, 11715, 15620, 19525, 23430, 27335, 31240, 35145, 0, 0, 3723, null]` timeJSONs2 = `[59, 84203, 12642, 7182, 68705, 7390, 915, 16820, 19525, 5430, 84959, 31207, 35145, 0, 0, 3723, null]` timeJSONms = `[59123, 84203999, 3560001, 12800000, 3905001, 7810002, 11715003, 15620004, 19525005, 23430006, 27335000, 31240000, 35145000, 0, 0, 3723000, null]` timeJSONms2 = `[59103, 84203999, 12642001, 7182000, 68705005, 7390000, 915003, 16820004, 19525005, 5430006, 84959000, 31207000, 35145000, 0, 0, 3723000, null]` timeJSONus = `[59123456, 84203999999, 3560001001, 12800000000, 3905001000, 7810002000, 11715003000, 15620004132, 19525005321, 23430006163, 27335000000, 31240000000, 35145000000, 0, 0, 3723000000, null]` timeJSONus2 = `[59103476, 84203999999, 12642001001, 7182000000, 68705005000, 7390000000, 915003000, 16820004432, 19525005021, 5430006163, 84959000000, 31207000000, 35145000000, 0, 0, 3723000000, null]` timeJSONns = `[59123456789, 84203999999999, 3560001001001, 12800000000000, 3905001000000, 7810002000000, 11715003000000, 15620004132000, 19525005321000, 23430006163000, 27335000000000, 31240000000000, 35145000000000, 0, 0, 3723000000000, null]` timeJSONns2 = `[59103476799, 84203999999909, 12642001001001, 7182000000000, 68705005000000, 7390000000000, 915003000000, 16820004432000, 19525005021000, 5430006163000, 84959000000000, 31207000000000, 35145000000000, 0, 0, 3723000000000, null]` ) func (s *ScalarBinaryTemporalArithmeticSuite) TestTemporalAddSub() { tests := []struct { val1 string val2 string dt arrow.DataType exp arrow.DataType }{ {date32JSON, date32JSON2, arrow.FixedWidthTypes.Date32, arrow.FixedWidthTypes.Duration_s}, {date64JSON, date64JSON2, arrow.FixedWidthTypes.Date64, arrow.FixedWidthTypes.Duration_ms}, {timeJSONs, timeJSONs2, arrow.FixedWidthTypes.Time32s, arrow.FixedWidthTypes.Duration_s}, {timeJSONms, timeJSONms2, arrow.FixedWidthTypes.Time32ms, arrow.FixedWidthTypes.Duration_ms}, {timeJSONus, timeJSONus2, arrow.FixedWidthTypes.Time64us, arrow.FixedWidthTypes.Duration_us}, {timeJSONns, timeJSONns2, arrow.FixedWidthTypes.Time64ns, arrow.FixedWidthTypes.Duration_ns}, } for _, tt := range tests { s.Run(tt.dt.String(), func() { for _, checked := range []bool{true, false} { s.Run(fmt.Sprintf("checked=%t", checked), func() { opts := compute.ArithmeticOptions{NoCheckOverflow: !checked} arr1, _, _ := array.FromJSON(s.mem, tt.dt, strings.NewReader(tt.val1)) defer arr1.Release() arr2, _, _ := array.FromJSON(s.mem, tt.dt, strings.NewReader(tt.val2)) defer arr2.Release() datum1 := &compute.ArrayDatum{Value: arr1.Data()} datum2 := &compute.ArrayDatum{Value: arr2.Data()} result, err := compute.Subtract(s.ctx, opts, datum1, datum2) s.Require().NoError(err) defer result.Release() res := result.(*compute.ArrayDatum) s.Truef(arrow.TypeEqual(tt.exp, res.Type()), "expected: %s\n got: %s", tt.exp, res.Type()) out, err := compute.Add(s.ctx, opts, datum2, result) s.Require().NoError(err) defer out.Release() // date32 - date32 / date64 - date64 produce durations // and date + duration == timestamp so we need to cast // the timestamp back to a date in that case. Otherwise // we get back time32/time64 in those cases and can // compare them accurately. if arrow.TypeEqual(arr1.DataType(), out.(*compute.ArrayDatum).Type()) { assertDatumsEqual(s.T(), datum1, out, nil, nil) } else { casted, err := compute.CastDatum(s.ctx, out, compute.SafeCastOptions(arr1.DataType())) s.Require().NoError(err) defer casted.Release() assertDatumsEqual(s.T(), datum1, casted, nil, nil) } }) } }) } } func TestUnaryDispatchBest(t *testing.T) { for _, fn := range []string{"abs"} { for _, suffix := range []string{"", "_unchecked"} { fn += suffix t.Run(fn, func(t *testing.T) { for _, ty := range numericTypes { t.Run(ty.String(), func(t *testing.T) { CheckDispatchBest(t, fn, []arrow.DataType{ty}, []arrow.DataType{ty}) CheckDispatchBest(t, fn, []arrow.DataType{&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: ty}}, []arrow.DataType{ty}) }) } }) } } for _, fn := range []string{"negate_unchecked", "sign"} { t.Run(fn, func(t *testing.T) { for _, ty := range numericTypes { t.Run(ty.String(), func(t *testing.T) { CheckDispatchBest(t, fn, []arrow.DataType{ty}, []arrow.DataType{ty}) CheckDispatchBest(t, fn, []arrow.DataType{&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: ty}}, []arrow.DataType{ty}) }) } }) } for _, fn := range []string{"negate"} { t.Run(fn, func(t *testing.T) { for _, ty := range append(signedIntTypes, floatingTypes...) { t.Run(ty.String(), func(t *testing.T) { CheckDispatchBest(t, fn, []arrow.DataType{ty}, []arrow.DataType{ty}) CheckDispatchBest(t, fn, []arrow.DataType{&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: ty}}, []arrow.DataType{ty}) }) } }) } // float types (with _unchecked variants) for _, fn := range []string{"ln", "log2", "log10", "log1p", "sin", "cos", "tan", "asin", "acos"} { for _, suffix := range []string{"", "_unchecked"} { fn += suffix t.Run(fn, func(t *testing.T) { for _, ty := range floatingTypes { t.Run(ty.String(), func(t *testing.T) { CheckDispatchBest(t, fn, []arrow.DataType{ty}, []arrow.DataType{ty}) CheckDispatchBest(t, fn, []arrow.DataType{&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: ty}}, []arrow.DataType{ty}) }) } }) } } // float types (without _unchecked variants) for _, fn := range []string{"atan", "sign", "floor", "ceil", "trunc", "round"} { t.Run(fn, func(t *testing.T) { for _, ty := range floatingTypes { t.Run(ty.String(), func(t *testing.T) { CheckDispatchBest(t, fn, []arrow.DataType{ty}, []arrow.DataType{ty}) CheckDispatchBest(t, fn, []arrow.DataType{&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: ty}}, []arrow.DataType{ty}) }) } }) } // integer -> float64 (with _unchecked variant) for _, fn := range []string{"ln", "log2", "log10", "log1p", "sin", "cos", "tan", "asin", "acos"} { for _, suffix := range []string{"", "_unchecked"} { fn += suffix t.Run(fn, func(t *testing.T) { for _, ty := range integerTypes { t.Run(ty.String(), func(t *testing.T) { CheckDispatchBest(t, fn, []arrow.DataType{ty}, []arrow.DataType{arrow.PrimitiveTypes.Float64}) CheckDispatchBest(t, fn, []arrow.DataType{&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: ty}}, []arrow.DataType{arrow.PrimitiveTypes.Float64}) }) } }) } } // integer -> float64 (without _unchecked variants) for _, fn := range []string{"atan", "floor", "ceil", "trunc", "round"} { t.Run(fn, func(t *testing.T) { for _, ty := range integerTypes { t.Run(ty.String(), func(t *testing.T) { CheckDispatchBest(t, fn, []arrow.DataType{ty}, []arrow.DataType{arrow.PrimitiveTypes.Float64}) CheckDispatchBest(t, fn, []arrow.DataType{&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: ty}}, []arrow.DataType{arrow.PrimitiveTypes.Float64}) }) } }) } } func TestUnaryArithmeticNull(t *testing.T) { for _, fn := range []string{"abs", "negate", "acos", "asin", "cos", "ln", "log10", "log1p", "log2", "sin", "tan"} { for _, suffix := range []string{"", "_unchecked"} { fn += suffix assertNullToNull(t, context.TODO(), fn, memory.DefaultAllocator) } } for _, fn := range []string{"sign", "atan", "bit_wise_not", "floor", "ceil", "trunc", "round"} { assertNullToNull(t, context.TODO(), fn, memory.DefaultAllocator) } } type UnaryArithmeticSuite[T arrow.NumericType, O fnOpts] struct { suite.Suite mem *memory.CheckedAllocator ctx context.Context opts O } func (us *UnaryArithmeticSuite[T, O]) SetupTest() { us.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) us.ctx = compute.WithAllocator(context.TODO(), us.mem) var def O us.opts = def } func (us *UnaryArithmeticSuite[T, O]) TearDownTest() { us.mem.AssertSize(us.T(), 0) } func (*UnaryArithmeticSuite[T, O]) datatype() arrow.DataType { return arrow.GetDataType[T]() } func (us *UnaryArithmeticSuite[T, O]) makeNullScalar() scalar.Scalar { return scalar.MakeNullScalar(us.datatype()) } func (us *UnaryArithmeticSuite[T, O]) makeScalar(v T) scalar.Scalar { return scalar.MakeScalar(v) } func (us *UnaryArithmeticSuite[T, O]) makeArray(v ...T) arrow.Array { return exec.ArrayFromSlice(us.mem, v) } func (us *UnaryArithmeticSuite[T, O]) getArr(dt arrow.DataType, str string) arrow.Array { arr, _, err := array.FromJSON(us.mem, dt, strings.NewReader(str), array.WithUseNumber()) us.Require().NoError(err) return arr } func (us *UnaryArithmeticSuite[T, O]) assertUnaryOpValError(fn unaryArithmeticFunc[O], arg T, msg string) { in := us.makeScalar(arg) _, err := fn(us.ctx, us.opts, compute.NewDatum(in)) us.ErrorIs(err, arrow.ErrInvalid) us.ErrorContains(err, msg) } func (us *UnaryArithmeticSuite[T, O]) assertUnaryOpNotImplemented(fn unaryArithmeticFunc[O], arg T, msg string) { in := us.makeScalar(arg) _, err := fn(us.ctx, us.opts, compute.NewDatum(in)) us.ErrorIs(err, arrow.ErrNotImplemented) us.ErrorContains(err, msg) } func (us *UnaryArithmeticSuite[T, O]) assertUnaryOpVals(fn unaryArithmeticFunc[O], arg, expected T) { in := us.makeScalar(arg) exp := us.makeScalar(expected) actual, err := fn(us.ctx, us.opts, compute.NewDatum(in)) us.Require().NoError(err) assertScalarEquals(us.T(), exp, actual.(*compute.ScalarDatum).Value, scalar.WithNaNsEqual(true)) } func (us *UnaryArithmeticSuite[T, O]) assertUnaryOpScalars(fn unaryArithmeticFunc[O], arg, exp scalar.Scalar) { actual, err := fn(us.ctx, us.opts, compute.NewDatum(arg)) us.Require().NoError(err) assertScalarEquals(us.T(), exp, actual.(*compute.ScalarDatum).Value, scalar.WithNaNsEqual(true)) } func (us *UnaryArithmeticSuite[T, O]) assertUnaryOpArrs(fn unaryArithmeticFunc[O], arg, exp arrow.Array) { datum := &compute.ArrayDatum{arg.Data()} actual, err := fn(us.ctx, us.opts, datum) us.Require().NoError(err) defer actual.Release() assertDatumsEqual(us.T(), &compute.ArrayDatum{exp.Data()}, actual, []array.EqualOption{array.WithNaNsEqual(true)}, []scalar.EqualOption{scalar.WithNaNsEqual(true)}) // also check scalar ops for i := 0; i < arg.Len(); i++ { expScalar, err := scalar.GetScalar(exp, i) us.NoError(err) argScalar, err := scalar.GetScalar(arg, i) us.NoError(err) actual, err := fn(us.ctx, us.opts, compute.NewDatum(argScalar)) us.Require().NoError(err) assertDatumsEqual(us.T(), compute.NewDatum(expScalar), compute.NewDatum(actual), []array.EqualOption{array.WithNaNsEqual(true)}, []scalar.EqualOption{scalar.WithNaNsEqual(true)}) } } func (us *UnaryArithmeticSuite[T, O]) assertUnaryOpExpArr(fn unaryArithmeticFunc[O], arg string, exp arrow.Array) { in, _, err := array.FromJSON(us.mem, us.datatype(), strings.NewReader(arg), array.WithUseNumber()) us.Require().NoError(err) defer in.Release() us.assertUnaryOpArrs(fn, in, exp) } func (us *UnaryArithmeticSuite[T, O]) assertUnaryOp(fn unaryArithmeticFunc[O], arg, exp string) { in, _, err := array.FromJSON(us.mem, us.datatype(), strings.NewReader(arg), array.WithUseNumber()) us.Require().NoError(err) defer in.Release() expected, _, err := array.FromJSON(us.mem, us.datatype(), strings.NewReader(exp), array.WithUseNumber()) us.Require().NoError(err) defer expected.Release() us.assertUnaryOpArrs(fn, in, expected) } func (us *UnaryArithmeticSuite[T, O]) assertUnaryOpErr(fn unaryArithmeticFunc[O], arg string, msg string) { in, _, err := array.FromJSON(us.mem, us.datatype(), strings.NewReader(arg), array.WithUseNumber()) us.Require().NoError(err) defer in.Release() _, err = fn(us.ctx, us.opts, &compute.ArrayDatum{in.Data()}) us.ErrorIs(err, arrow.ErrInvalid) us.ErrorContains(err, msg) } type UnaryArithmeticIntegral[T arrow.IntType | arrow.UintType] struct { UnaryArithmeticSuite[T, compute.ArithmeticOptions] } func (us *UnaryArithmeticIntegral[T]) setOverflowCheck(v bool) { us.opts.NoCheckOverflow = !v } func (us *UnaryArithmeticIntegral[T]) TestTrig() { // integer arguments promoted to float64, sanity check here atan := func(ctx context.Context, _ compute.ArithmeticOptions, arg compute.Datum) (compute.Datum, error) { return compute.Atan(ctx, arg) } input := us.makeArray(0, 1) defer input.Release() for _, overflow := range []bool{false, true} { us.setOverflowCheck(overflow) sinOut := us.getArr(arrow.PrimitiveTypes.Float64, `[0, 0.8414709848078965]`) defer sinOut.Release() cosOut := us.getArr(arrow.PrimitiveTypes.Float64, `[1, 0.5403023058681398]`) defer cosOut.Release() tanOut := us.getArr(arrow.PrimitiveTypes.Float64, `[0, 1.5574077246549023]`) defer tanOut.Release() asinOut := us.getArr(arrow.PrimitiveTypes.Float64, fmt.Sprintf("[0, %f]", math.Pi/2)) defer asinOut.Release() acosOut := us.getArr(arrow.PrimitiveTypes.Float64, fmt.Sprintf("[%f, 0]", math.Pi/2)) defer acosOut.Release() atanOut := us.getArr(arrow.PrimitiveTypes.Float64, fmt.Sprintf("[0, %f]", math.Pi/4)) defer atanOut.Release() us.assertUnaryOpArrs(compute.Sin, input, sinOut) us.assertUnaryOpArrs(compute.Cos, input, cosOut) us.assertUnaryOpArrs(compute.Tan, input, tanOut) us.assertUnaryOpArrs(compute.Asin, input, asinOut) us.assertUnaryOpArrs(compute.Acos, input, acosOut) us.assertUnaryOpArrs(atan, input, atanOut) } } func (us *UnaryArithmeticIntegral[T]) TestLog() { // integer arguments promoted to double, sanity check here ty := us.datatype() for _, overflow := range []bool{false, true} { us.setOverflowCheck(overflow) exp1 := us.getArr(arrow.PrimitiveTypes.Float64, `[0, null]`) defer exp1.Release() exp2 := us.getArr(arrow.PrimitiveTypes.Float64, `[0, 1, null]`) defer exp2.Release() ln := us.getArr(ty, `[1, null]`) defer ln.Release() log10 := us.getArr(ty, `[1, 10, null]`) defer log10.Release() log2 := us.getArr(ty, `[1, 2, null]`) defer log2.Release() log1p := us.getArr(ty, `[0, null]`) defer log1p.Release() us.assertUnaryOpArrs(compute.Ln, ln, exp1) us.assertUnaryOpArrs(compute.Log10, log10, exp2) us.assertUnaryOpArrs(compute.Log2, log2, exp2) us.assertUnaryOpArrs(compute.Log1p, log1p, exp1) } } type UnaryArithmeticSigned[T arrow.IntType] struct { UnaryArithmeticIntegral[T] } func (us *UnaryArithmeticSigned[T]) TestAbsoluteValue() { var ( dt = us.datatype() min = kernels.MinOf[T]() max = kernels.MaxOf[T]() ) fn := func(in, exp string) { us.assertUnaryOp(compute.AbsoluteValue, in, exp) } us.Run(dt.String(), func() { for _, checkOverflow := range []bool{true, false} { us.setOverflowCheck(checkOverflow) us.Run(fmt.Sprintf("check_overflow=%t", checkOverflow), func() { // empty array fn(`[]`, `[]`) // scalar/arrays with nulls fn(`[null]`, `[null]`) fn(`[1, null, -10]`, `[1, null, 10]`) us.assertUnaryOpScalars(compute.AbsoluteValue, us.makeNullScalar(), us.makeNullScalar()) // scalar/arrays with zeros fn(`[0, -0]`, `[0, 0]`) us.assertUnaryOpVals(compute.AbsoluteValue, -0, 0) us.assertUnaryOpVals(compute.AbsoluteValue, 0, 0) // ordinary scalars/arrays (positive inputs) fn(`[1, 10, 127]`, `[1, 10, 127]`) us.assertUnaryOpVals(compute.AbsoluteValue, 1, 1) // ordinary scalars/arrays (negative inputs) fn(`[-1, -10, -127]`, `[1, 10, 127]`) us.assertUnaryOpVals(compute.AbsoluteValue, -1, 1) // min/max us.assertUnaryOpVals(compute.AbsoluteValue, max, max) if checkOverflow { us.assertUnaryOpValError(compute.AbsoluteValue, min, "overflow") } else { us.assertUnaryOpVals(compute.AbsoluteValue, min, min) } }) } }) } func (us *UnaryArithmeticSigned[T]) TestNegate() { var ( dt = us.datatype() min = kernels.MinOf[T]() max = kernels.MaxOf[T]() ) fn := func(in, exp string) { us.assertUnaryOp(compute.Negate, in, exp) } us.Run(dt.String(), func() { for _, checkOverflow := range []bool{true, false} { us.setOverflowCheck(checkOverflow) us.Run(fmt.Sprintf("check_overflow=%t", checkOverflow), func() { fn(`[]`, `[]`) // scalar/arrays with nulls fn(`[null]`, `[null]`) fn(`[1, null, -10]`, `[-1, null, 10]`) // ordinary scalars/arrays (positive inputs) fn(`[1, 10, 127]`, `[-1, -10, -127]`) us.assertUnaryOpVals(compute.Negate, 1, -1) // ordinary scalars/arrays (negative inputs) fn(`[-1, -10, -127]`, `[1, 10, 127]`) us.assertUnaryOpVals(compute.Negate, -1, 1) // min/max us.assertUnaryOpVals(compute.Negate, min+1, max) us.assertUnaryOpVals(compute.Negate, max, min+1) }) } }) } type UnaryArithmeticUnsigned[T arrow.UintType] struct { UnaryArithmeticIntegral[T] } func (us *UnaryArithmeticUnsigned[T]) TestAbsoluteValue() { var ( min, max T = 0, kernels.MaxOf[T]() ) fn := func(in, exp string) { us.assertUnaryOp(compute.AbsoluteValue, in, exp) } us.Run(us.datatype().String(), func() { for _, checkOverflow := range []bool{true, false} { us.setOverflowCheck(checkOverflow) us.Run(fmt.Sprintf("check_overflow=%t", checkOverflow), func() { fn(`[]`, `[]`) fn(`[null]`, `[null]`) us.assertUnaryOpScalars(compute.AbsoluteValue, us.makeNullScalar(), us.makeNullScalar()) fn(`[0, 1, 10, 127]`, `[0, 1, 10, 127]`) us.assertUnaryOpVals(compute.AbsoluteValue, min, min) us.assertUnaryOpVals(compute.AbsoluteValue, max, max) }) } }) } func (us *UnaryArithmeticUnsigned[T]) TestNegate() { var ( dt = us.datatype() ) fn := func(in, exp string) { us.assertUnaryOp(compute.Negate, in, exp) } us.Run(dt.String(), func() { us.setOverflowCheck(true) us.assertUnaryOpNotImplemented(compute.Negate, 1, "no kernel matching input types") us.setOverflowCheck(false) fn(`[]`, `[]`) fn(`[null]`, `[null]`) us.assertUnaryOpVals(compute.Negate, 1, ^T(1)+1) }) } type UnaryArithmeticFloating[T constraints.Float] struct { UnaryArithmeticSuite[T, compute.ArithmeticOptions] min, max T smallest T } func (us *UnaryArithmeticFloating[T]) setOverflowCheck(v bool) { us.opts.NoCheckOverflow = !v } func (us *UnaryArithmeticFloating[T]) TestAbsoluteValue() { fn := func(in, exp string) { us.assertUnaryOp(compute.AbsoluteValue, in, exp) } us.Run(us.datatype().String(), func() { for _, checkOverflow := range []bool{true, false} { us.setOverflowCheck(checkOverflow) us.Run(fmt.Sprintf("check_overflow=%t", checkOverflow), func() { fn(`[]`, `[]`) fn(`[null]`, `[null]`) fn(`[1.3, null, -10.80]`, `[1.3, null, 10.80]`) us.assertUnaryOpScalars(compute.AbsoluteValue, us.makeNullScalar(), us.makeNullScalar()) fn(`[0.0, -0.0]`, `[0.0, 0.0]`) us.assertUnaryOpVals(compute.AbsoluteValue, T(math.Copysign(0, -1)), 0) us.assertUnaryOpVals(compute.AbsoluteValue, 0, 0) fn(`[1.3, 10.80, 12748.001]`, `[1.3, 10.80, 12748.001]`) us.assertUnaryOpVals(compute.AbsoluteValue, 1.3, 1.3) fn(`[-1.3, -10.80, -12748.001]`, `[1.3, 10.80, 12748.001]`) us.assertUnaryOpVals(compute.AbsoluteValue, -1.3, 1.3) fn(`["Inf", "-Inf"]`, `["Inf", "Inf"]`) us.assertUnaryOpVals(compute.AbsoluteValue, us.min, us.max) us.assertUnaryOpVals(compute.AbsoluteValue, us.max, us.max) }) } }) } func (us *UnaryArithmeticFloating[T]) TestNegate() { var ( dt = us.datatype() ) fn := func(in, exp string) { us.assertUnaryOp(compute.Negate, in, exp) } us.Run(dt.String(), func() { for _, checkOverflow := range []bool{true, false} { us.setOverflowCheck(checkOverflow) us.Run(fmt.Sprintf("check_overflow=%t", checkOverflow), func() { fn(`[]`, `[]`) // scalar/arrays with nulls fn(`[null]`, `[null]`) fn(`[1.5, null, -10.25]`, `[-1.5, null, 10.25]`) // ordinary scalars/arrays (positive inputs) fn(`[0.5, 10.123, 127.321]`, `[-0.5, -10.123, -127.321]`) us.assertUnaryOpVals(compute.Negate, 1.25, -1.25) // ordinary scalars/arrays (negative inputs) fn(`[-0.5, -10.123, -127.321]`, `[0.5, 10.123, 127.321]`) us.assertUnaryOpVals(compute.Negate, -1.25, 1.25) // min/max us.assertUnaryOpVals(compute.Negate, us.min, us.max) us.assertUnaryOpVals(compute.Negate, us.max, us.min) }) } }) } func (us *UnaryArithmeticFloating[T]) TestTrigSin() { us.setOverflowCheck(false) us.assertUnaryOp(compute.Sin, `["Inf", "-Inf"]`, `["NaN", "NaN"]`) for _, overflow := range []bool{false, true} { us.setOverflowCheck(overflow) us.assertUnaryOp(compute.Sin, `[]`, `[]`) us.assertUnaryOp(compute.Sin, `[null, "NaN"]`, `[null, "NaN"]`) arr := us.makeArray(0, math.Pi/2, math.Pi) exp := us.makeArray(0, 1, 0) defer arr.Release() defer exp.Release() us.assertUnaryOpArrs(compute.Sin, arr, exp) } us.setOverflowCheck(true) us.assertUnaryOpErr(compute.Sin, `["Inf", "-Inf"]`, "domain error") } func (us *UnaryArithmeticFloating[T]) TestTrigCos() { us.setOverflowCheck(false) us.assertUnaryOp(compute.Cos, `["Inf", "-Inf"]`, `["NaN", "NaN"]`) for _, overflow := range []bool{false, true} { us.setOverflowCheck(overflow) us.assertUnaryOp(compute.Cos, `[]`, `[]`) us.assertUnaryOp(compute.Cos, `[null, "NaN"]`, `[null, "NaN"]`) arr := us.makeArray(0, math.Pi/2, math.Pi) exp := us.makeArray(1, 0, -1) defer arr.Release() defer exp.Release() us.assertUnaryOpArrs(compute.Cos, arr, exp) } us.setOverflowCheck(true) us.assertUnaryOpErr(compute.Cos, `["Inf", "-Inf"]`, "domain error") } func (us *UnaryArithmeticFloating[T]) TestTrigTan() { us.setOverflowCheck(false) us.assertUnaryOp(compute.Tan, `["Inf", "-Inf"]`, `["NaN", "NaN"]`) for _, overflow := range []bool{false, true} { us.setOverflowCheck(overflow) us.assertUnaryOp(compute.Tan, `[]`, `[]`) us.assertUnaryOp(compute.Tan, `[null, "NaN"]`, `[null, "NaN"]`) // pi/2 isn't representable exactly -> there are no poles // (i.e. tan(pi/2) is merely a large value and not +Inf) arr := us.makeArray(0, math.Pi) exp := us.makeArray(0, 0) defer arr.Release() defer exp.Release() us.assertUnaryOpArrs(compute.Tan, arr, exp) } us.setOverflowCheck(true) us.assertUnaryOpErr(compute.Tan, `["Inf", "-Inf"]`, "domain error") } func (us *UnaryArithmeticFloating[T]) TestTrigAsin() { us.setOverflowCheck(false) us.assertUnaryOp(compute.Asin, `["Inf", "-Inf", -2, 2]`, `["NaN", "NaN", "NaN", "NaN"]`) for _, overflow := range []bool{false, true} { us.setOverflowCheck(overflow) us.assertUnaryOp(compute.Asin, `[]`, `[]`) us.assertUnaryOp(compute.Asin, `[null, "NaN"]`, `[null, "NaN"]`) arr := us.makeArray(0, 1, -1) exp := us.makeArray(0, math.Pi/2, -math.Pi/2) defer arr.Release() defer exp.Release() us.assertUnaryOpArrs(compute.Asin, arr, exp) } us.setOverflowCheck(true) us.assertUnaryOpErr(compute.Asin, `["Inf", "-Inf", -2, 2]`, "domain error") } func (us *UnaryArithmeticFloating[T]) TestTrigAcos() { us.setOverflowCheck(false) us.assertUnaryOp(compute.Acos, `["Inf", "-Inf", -2, 2]`, `["NaN", "NaN", "NaN", "NaN"]`) for _, overflow := range []bool{false, true} { us.setOverflowCheck(overflow) us.assertUnaryOp(compute.Acos, `[]`, `[]`) us.assertUnaryOp(compute.Acos, `[null, "NaN"]`, `[null, "NaN"]`) arr := us.makeArray(0, 1, -1) exp := us.makeArray(math.Pi/2, 0, math.Pi) defer arr.Release() defer exp.Release() us.assertUnaryOpArrs(compute.Acos, arr, exp) } us.setOverflowCheck(true) us.assertUnaryOpErr(compute.Acos, `["Inf", "-Inf", -2, 2]`, "domain error") } func (us *UnaryArithmeticFloating[T]) TestTrigAtan() { us.setOverflowCheck(false) atan := func(ctx context.Context, _ compute.ArithmeticOptions, arg compute.Datum) (compute.Datum, error) { return compute.Atan(ctx, arg) } us.assertUnaryOp(atan, `[]`, `[]`) us.assertUnaryOp(atan, `[null, "NaN"]`, `[null, "NaN"]`) arr := us.makeArray(0, 1, -1, T(math.Inf(1)), T(math.Inf(-1))) exp := us.makeArray(0, math.Pi/4, -math.Pi/4, math.Pi/2, -math.Pi/2) defer arr.Release() defer exp.Release() us.assertUnaryOpArrs(atan, arr, exp) } func (us *UnaryArithmeticFloating[T]) TestLog() { for _, overflow := range []bool{false, true} { us.setOverflowCheck(overflow) us.Run(fmt.Sprintf("checked=%t", overflow), func() { us.assertUnaryOp(compute.Ln, `[1, 2.718281828459045, null, "NaN", "Inf"]`, `[0, 1, null, "NaN", "Inf"]`) us.assertUnaryOpVals(compute.Ln, us.smallest, T(math.Log(float64(us.smallest)))) us.assertUnaryOpVals(compute.Ln, us.max, T(math.Log(float64(us.max)))) us.assertUnaryOp(compute.Log10, `[1, 10, null, "NaN", "Inf"]`, `[0, 1, null, "NaN", "Inf"]`) us.assertUnaryOpVals(compute.Log10, us.smallest, T(math.Log10(float64(us.smallest)))) us.assertUnaryOpVals(compute.Log10, us.max, T(math.Log10(float64(us.max)))) us.assertUnaryOp(compute.Log2, `[1, 2, null, "NaN", "Inf"]`, `[0, 1, null, "NaN", "Inf"]`) us.assertUnaryOpVals(compute.Log2, us.smallest, T(math.Log2(float64(us.smallest)))) us.assertUnaryOpVals(compute.Log2, us.max, T(math.Log2(float64(us.max)))) us.assertUnaryOp(compute.Log1p, `[0, 1.718281828459045, null, "NaN", "Inf"]`, `[0, 1, null, "NaN", "Inf"]`) us.assertUnaryOpVals(compute.Log1p, us.smallest, T(math.Log1p(float64(us.smallest)))) us.assertUnaryOpVals(compute.Log1p, us.max, T(math.Log1p(float64(us.max)))) }) } us.setOverflowCheck(false) us.assertUnaryOp(compute.Ln, `["-Inf", -1, 0, "Inf"]`, `["NaN", "NaN", "-Inf", "Inf"]`) us.assertUnaryOp(compute.Log10, `["-Inf", -1, 0, "Inf"]`, `["NaN", "NaN", "-Inf", "Inf"]`) us.assertUnaryOp(compute.Log2, `["-Inf", -1, 0, "Inf"]`, `["NaN", "NaN", "-Inf", "Inf"]`) us.assertUnaryOp(compute.Log1p, `["-Inf", -2, -1, "Inf"]`, `["NaN", "NaN", "-Inf", "Inf"]`) us.setOverflowCheck(true) us.assertUnaryOpErr(compute.Ln, `[0]`, "logarithm of zero") us.assertUnaryOpErr(compute.Ln, `[-1]`, "logarithm of negative number") us.assertUnaryOpErr(compute.Ln, `["-Inf"]`, "logarithm of negative number") us.assertUnaryOpValError(compute.Ln, us.min, "logarithm of negative number") us.assertUnaryOpErr(compute.Log10, `[0]`, "logarithm of zero") us.assertUnaryOpErr(compute.Log10, `[-1]`, "logarithm of negative number") us.assertUnaryOpErr(compute.Log10, `["-Inf"]`, "logarithm of negative number") us.assertUnaryOpValError(compute.Log10, us.min, "logarithm of negative number") us.assertUnaryOpErr(compute.Log2, `[0]`, "logarithm of zero") us.assertUnaryOpErr(compute.Log2, `[-1]`, "logarithm of negative number") us.assertUnaryOpErr(compute.Log2, `["-Inf"]`, "logarithm of negative number") us.assertUnaryOpValError(compute.Log2, us.min, "logarithm of negative number") us.assertUnaryOpErr(compute.Log1p, `[-1]`, "logarithm of zero") us.assertUnaryOpErr(compute.Log1p, `[-2]`, "logarithm of negative number") us.assertUnaryOpErr(compute.Log1p, `["-Inf"]`, "logarithm of negative number") us.assertUnaryOpValError(compute.Log1p, us.min, "logarithm of negative number") } func TestUnaryArithmetic(t *testing.T) { suite.Run(t, new(UnaryArithmeticSigned[int8])) suite.Run(t, new(UnaryArithmeticSigned[int16])) suite.Run(t, new(UnaryArithmeticSigned[int32])) suite.Run(t, new(UnaryArithmeticSigned[int64])) suite.Run(t, new(UnaryArithmeticUnsigned[uint8])) suite.Run(t, new(UnaryArithmeticUnsigned[uint16])) suite.Run(t, new(UnaryArithmeticUnsigned[uint32])) suite.Run(t, new(UnaryArithmeticUnsigned[uint64])) suite.Run(t, &UnaryArithmeticFloating[float32]{min: -math.MaxFloat32, max: math.MaxFloat32, smallest: math.SmallestNonzeroFloat32}) suite.Run(t, &UnaryArithmeticFloating[float64]{min: -math.MaxFloat64, max: math.MaxFloat64, smallest: math.SmallestNonzeroFloat64}) suite.Run(t, new(DecimalUnaryArithmeticSuite)) } type BitwiseArithmeticSuite[T arrow.IntType | arrow.UintType] struct { BinaryFuncTestSuite } func (bs *BitwiseArithmeticSuite[T]) datatype() arrow.DataType { return arrow.GetDataType[T]() } // to make it easier to test different widths, tests give bytes which // get repeated to make an array of the actual type func (bs *BitwiseArithmeticSuite[T]) expandByteArray(values []byte) arrow.Array { vals := make([]T, len(values)+1) sz := kernels.SizeOf[T]() for i, v := range values { memory.Set(unsafe.Slice((*byte)(unsafe.Pointer(&vals[i])), sz), v) } valid := make([]bool, len(vals)) for i := range values { valid[i] = true } return exec.ArrayFromSliceWithValid(bs.mem, vals, valid) } func (bs *BitwiseArithmeticSuite[T]) assertBinaryOp(fn string, arg0, arg1, expected []byte) { in0, in1 := bs.expandByteArray(arg0), bs.expandByteArray(arg1) out := bs.expandByteArray(expected) defer func() { in0.Release() in1.Release() out.Release() }() actual, err := compute.CallFunction(bs.ctx, fn, nil, &compute.ArrayDatum{in0.Data()}, &compute.ArrayDatum{in1.Data()}) bs.Require().NoError(err) defer actual.Release() assertDatumsEqual(bs.T(), &compute.ArrayDatum{out.Data()}, actual, nil, nil) for i := 0; i < out.Len(); i++ { a0, err := scalar.GetScalar(in0, i) bs.Require().NoError(err) a1, err := scalar.GetScalar(in1, i) bs.Require().NoError(err) exp, err := scalar.GetScalar(out, i) bs.Require().NoError(err) actual, err := compute.CallFunction(bs.ctx, fn, nil, compute.NewDatum(a0), compute.NewDatum(a1)) bs.Require().NoError(err) assertScalarEquals(bs.T(), exp, actual.(*compute.ScalarDatum).Value) } } func (bs *BitwiseArithmeticSuite[T]) TestBitWiseAnd() { bs.Run(bs.datatype().String(), func() { bs.assertBinaryOp("bit_wise_and", []byte{0x00, 0xFF, 0x00, 0xFF}, []byte{0x00, 0x00, 0xFF, 0xFF}, []byte{0x00, 0x00, 0x00, 0xFF}) }) } func (bs *BitwiseArithmeticSuite[T]) TestBitWiseOr() { bs.Run(bs.datatype().String(), func() { bs.assertBinaryOp("bit_wise_or", []byte{0x00, 0xFF, 0x00, 0xFF}, []byte{0x00, 0x00, 0xFF, 0xFF}, []byte{0x00, 0xFF, 0xFF, 0xFF}) }) } func (bs *BitwiseArithmeticSuite[T]) TestBitWiseXor() { bs.Run(bs.datatype().String(), func() { bs.assertBinaryOp("bit_wise_xor", []byte{0x00, 0xFF, 0x00, 0xFF}, []byte{0x00, 0x00, 0xFF, 0xFF}, []byte{0x00, 0xFF, 0xFF, 0x00}) }) } func TestBitwiseArithmetic(t *testing.T) { suite.Run(t, new(BitwiseArithmeticSuite[int8])) suite.Run(t, new(BitwiseArithmeticSuite[uint8])) suite.Run(t, new(BitwiseArithmeticSuite[int16])) suite.Run(t, new(BitwiseArithmeticSuite[uint16])) suite.Run(t, new(BitwiseArithmeticSuite[int32])) suite.Run(t, new(BitwiseArithmeticSuite[uint32])) suite.Run(t, new(BitwiseArithmeticSuite[int64])) suite.Run(t, new(BitwiseArithmeticSuite[uint64])) } var roundModes = []compute.RoundMode{ compute.RoundDown, compute.RoundUp, compute.RoundTowardsZero, compute.RoundTowardsInfinity, compute.RoundHalfDown, compute.RoundHalfUp, compute.RoundHalfTowardsZero, compute.RoundHalfTowardsInfinity, compute.RoundHalfToEven, compute.RoundHalfToOdd, } type UnaryRoundSuite[T arrow.NumericType] struct { UnaryArithmeticSuite[T, compute.RoundOptions] } func (us *UnaryRoundSuite[T]) setRoundMode(mode compute.RoundMode) { us.opts.Mode = mode } func (us *UnaryRoundSuite[T]) setRoundNDigits(v int64) { us.opts.NDigits = v } type UnaryRoundToMultipleSuite[T arrow.NumericType] struct { UnaryArithmeticSuite[T, compute.RoundToMultipleOptions] } func (us *UnaryRoundToMultipleSuite[T]) setRoundMode(mode compute.RoundMode) { us.opts.Mode = mode } func (us *UnaryRoundToMultipleSuite[T]) setRoundMultiple(val float64) { us.opts.Multiple = scalar.NewFloat64Scalar(val) } type UnaryRoundIntegral[T arrow.IntType | arrow.UintType] struct { UnaryRoundSuite[T] } type UnaryRoundToMultipleIntegral[T arrow.IntType | arrow.UintType] struct { UnaryRoundToMultipleSuite[T] } type UnaryRoundSigned[T arrow.IntType] struct { UnaryRoundIntegral[T] } func (us *UnaryRoundSigned[T]) TestRound() { values := `[0, 1, -13, -50, 115]` us.setRoundNDigits(0) arr := us.getArr(arrow.PrimitiveTypes.Float64, values) defer arr.Release() for _, mode := range roundModes { us.setRoundMode(mode) us.assertUnaryOpExpArr(compute.Round, values, arr) } // test different round N-digits for nearest rounding mode ndigExpected := []struct { n int64 exp string }{ {-2, `[0, 0, -0.0, -100, 100]`}, {-1, `[0.0, 0.0, -10, -50, 120]`}, {0, values}, {1, values}, {2, values}, } us.setRoundMode(compute.RoundHalfTowardsInfinity) for _, tt := range ndigExpected { us.Run(fmt.Sprintf("ndigits=%d", tt.n), func() { us.setRoundNDigits(tt.n) arr := us.getArr(arrow.PrimitiveTypes.Float64, tt.exp) defer arr.Release() us.assertUnaryOpExpArr(compute.Round, values, arr) }) } } type UnaryRoundToMultipleSigned[T arrow.IntType] struct { UnaryRoundToMultipleIntegral[T] } func (us *UnaryRoundToMultipleSigned[T]) TestRoundToMultiple() { values := `[0, 1, -13, -50, 115]` us.setRoundMultiple(1) for _, mode := range roundModes { us.setRoundMode(mode) arr := us.getArr(arrow.PrimitiveTypes.Float64, values) defer arr.Release() us.assertUnaryOpExpArr(compute.RoundToMultiple, values, arr) } tests := []struct { mult float64 exp string }{ {2, `[0.0, 2, -14, -50, 116]`}, {0.05, `[0.0, 1, -13, -50, 115]`}, {0.1, values}, {10, `[0.0, 0.0, -10, -50, 120]`}, {100, `[0.0, 0.0, -0.0, -100, 100]`}, } us.setRoundMode(compute.RoundHalfTowardsInfinity) for _, tt := range tests { us.setRoundMultiple(tt.mult) arr := us.getArr(arrow.PrimitiveTypes.Float64, tt.exp) defer arr.Release() us.assertUnaryOpExpArr(compute.RoundToMultiple, values, arr) } } type UnaryRoundUnsigned[T arrow.UintType] struct { UnaryRoundIntegral[T] } func (us *UnaryRoundUnsigned[T]) TestRound() { values := `[0, 1, 13, 50, 115]` us.setRoundNDigits(0) arr := us.getArr(arrow.PrimitiveTypes.Float64, values) defer arr.Release() for _, mode := range roundModes { us.setRoundMode(mode) us.assertUnaryOpExpArr(compute.Round, values, arr) } // test different round N-digits for nearest rounding mode ndigExpected := []struct { n int64 exp string }{ {-2, `[0, 0, 0, 100, 100]`}, {-1, `[0.0, 0.0, 10, 50, 120]`}, {0, values}, {1, values}, {2, values}, } us.setRoundMode(compute.RoundHalfTowardsInfinity) for _, tt := range ndigExpected { us.Run(fmt.Sprintf("ndigits=%d", tt.n), func() { us.setRoundNDigits(tt.n) arr := us.getArr(arrow.PrimitiveTypes.Float64, tt.exp) defer arr.Release() us.assertUnaryOpExpArr(compute.Round, values, arr) }) } } type UnaryRoundToMultipleUnsigned[T arrow.UintType] struct { UnaryRoundToMultipleIntegral[T] } func (us *UnaryRoundToMultipleUnsigned[T]) TestRoundToMultiple() { values := `[0, 1, 13, 50, 115]` us.setRoundMultiple(1) for _, mode := range roundModes { us.setRoundMode(mode) arr := us.getArr(arrow.PrimitiveTypes.Float64, values) defer arr.Release() us.assertUnaryOpExpArr(compute.RoundToMultiple, values, arr) } tests := []struct { mult float64 exp string }{ {0.05, `[0, 1, 13, 50, 115]`}, {0.1, values}, {2, `[0, 2, 14, 50, 116]`}, {10, `[0, 0, 10, 50, 120]`}, {100, `[0, 0, 0, 100, 100]`}, } us.setRoundMode(compute.RoundHalfTowardsInfinity) for _, tt := range tests { us.setRoundMultiple(tt.mult) arr := us.getArr(arrow.PrimitiveTypes.Float64, tt.exp) defer arr.Release() us.assertUnaryOpExpArr(compute.RoundToMultiple, values, arr) } } type UnaryRoundFloating[T constraints.Float] struct { UnaryRoundSuite[T] } func (us *UnaryRoundFloating[T]) TestRound() { values := `[3.2, 3.5, 3.7, 4.5, -3.2, -3.5, -3.7]` rmodeExpected := []struct { mode compute.RoundMode exp string }{ {compute.RoundDown, `[3, 3, 3, 4, -4, -4, -4]`}, {compute.RoundUp, `[4, 4, 4, 5, -3, -3, -3]`}, {compute.RoundTowardsZero, `[3, 3, 3, 4, -3, -3, -3]`}, {compute.RoundTowardsInfinity, `[4, 4, 4, 5, -4, -4, -4]`}, {compute.RoundHalfDown, `[3, 3, 4, 4, -3, -4, -4]`}, {compute.RoundHalfUp, `[3, 4, 4, 5, -3, -3, -4]`}, {compute.RoundHalfTowardsZero, `[3, 3, 4, 4, -3, -3, -4]`}, {compute.RoundHalfToEven, `[3, 4, 4, 4, -3, -4, -4]`}, {compute.RoundHalfToOdd, `[3, 3, 4, 5, -3, -3, -4]`}, } us.setRoundNDigits(0) for _, tt := range rmodeExpected { us.Run(tt.mode.String(), func() { us.setRoundMode(tt.mode) us.assertUnaryOp(compute.Round, `[]`, `[]`) us.assertUnaryOp(compute.Round, `[null, 0, "Inf", "-Inf", "NaN"]`, `[null, 0, "Inf", "-Inf", "NaN"]`) us.assertUnaryOp(compute.Round, values, tt.exp) }) } // test different round n-digits for nearest rounding mode values = `[320, 3.5, 3.075, 4.5, -3.212, -35.1234, -3.045]` ndigitsExp := []struct { n int64 exp string }{ {-2, `[300, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0]`}, {-1, `[320, 0.0, 0.0, 0.0, -0.0, -40, -0.0]`}, {0, `[320, 4, 3, 5, -3, -35, -3]`}, {1, `[320, 3.5, 3.1, 4.5, -3.2, -35.1, -3]`}, {2, `[320, 3.5, 3.08, 4.5, -3.21, -35.12, -3.05]`}, } us.setRoundMode(compute.RoundHalfTowardsInfinity) for _, tt := range ndigitsExp { us.Run(fmt.Sprintf("ndigits=%d", tt.n), func() { us.setRoundNDigits(tt.n) us.assertUnaryOp(compute.Round, values, tt.exp) }) } } type UnaryRoundToMultipleFloating[T constraints.Float] struct { UnaryRoundToMultipleSuite[T] } func (us *UnaryRoundToMultipleFloating[T]) TestRoundToMultiple() { values := `[3.2, 3.5, 3.7, 4.5, -3.2, -3.5, -3.7]` rmodeExpected := []struct { mode compute.RoundMode exp string }{ {compute.RoundDown, `[3, 3, 3, 4, -4, -4, -4]`}, {compute.RoundUp, `[4, 4, 4, 5, -3, -3, -3]`}, {compute.RoundTowardsZero, `[3, 3, 3, 4, -3, -3, -3]`}, {compute.RoundTowardsInfinity, `[4, 4, 4, 5, -4, -4, -4]`}, {compute.RoundHalfDown, `[3, 3, 4, 4, -3, -4, -4]`}, {compute.RoundHalfUp, `[3, 4, 4, 5, -3, -3, -4]`}, {compute.RoundHalfTowardsZero, `[3, 3, 4, 4, -3, -3, -4]`}, {compute.RoundHalfToEven, `[3, 4, 4, 4, -3, -4, -4]`}, {compute.RoundHalfToOdd, `[3, 3, 4, 5, -3, -3, -4]`}, } us.setRoundMultiple(1) for _, tt := range rmodeExpected { us.Run(tt.mode.String(), func() { us.setRoundMode(tt.mode) us.assertUnaryOp(compute.RoundToMultiple, `[]`, `[]`) us.assertUnaryOp(compute.RoundToMultiple, `[null, 0, "Inf", "-Inf", "NaN"]`, `[null, 0, "Inf", "-Inf", "NaN"]`) us.assertUnaryOp(compute.RoundToMultiple, values, tt.exp) }) } // test different round n-digits for nearest rounding mode values = `[320, 3.5, 3.075, 4.5, -3.212, -35.1234, -3.045]` multAndExp := []struct { mult float64 exp string }{ {0.05, `[320, 3.5, 3.1, 4.5, -3.2, -35.1, -3.05]`}, {0.1, `[320, 3.5, 3.1, 4.5, -3.2, -35.1, -3]`}, {2, `[320, 4, 4, 4, -4, -36, -4]`}, {10, `[320, 0.0, 0.0, 0.0, -0.0, -40, -0.0]`}, {100, `[300, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0]`}, } us.setRoundMode(compute.RoundHalfTowardsInfinity) for _, tt := range multAndExp { us.Run(fmt.Sprintf("multiple=%f", tt.mult), func() { us.setRoundMultiple(tt.mult) us.assertUnaryOp(compute.RoundToMultiple, values, tt.exp) }) } } func TestRounding(t *testing.T) { suite.Run(t, new(UnaryRoundSigned[int8])) suite.Run(t, new(UnaryRoundSigned[int16])) suite.Run(t, new(UnaryRoundSigned[int32])) suite.Run(t, new(UnaryRoundSigned[int64])) suite.Run(t, new(UnaryRoundUnsigned[uint8])) suite.Run(t, new(UnaryRoundUnsigned[uint16])) suite.Run(t, new(UnaryRoundUnsigned[uint32])) suite.Run(t, new(UnaryRoundUnsigned[uint64])) suite.Run(t, new(UnaryRoundFloating[float32])) suite.Run(t, new(UnaryRoundFloating[float64])) suite.Run(t, new(UnaryRoundToMultipleSigned[int8])) suite.Run(t, new(UnaryRoundToMultipleSigned[int16])) suite.Run(t, new(UnaryRoundToMultipleSigned[int32])) suite.Run(t, new(UnaryRoundToMultipleSigned[int64])) suite.Run(t, new(UnaryRoundToMultipleUnsigned[uint8])) suite.Run(t, new(UnaryRoundToMultipleUnsigned[uint16])) suite.Run(t, new(UnaryRoundToMultipleUnsigned[uint32])) suite.Run(t, new(UnaryRoundToMultipleUnsigned[uint64])) suite.Run(t, new(UnaryRoundToMultipleFloating[float32])) suite.Run(t, new(UnaryRoundToMultipleFloating[float64])) } const seed = 0x94378165 type binaryOp = func(ctx context.Context, left, right compute.Datum) (compute.Datum, error) func Add(ctx context.Context, left, right compute.Datum) (compute.Datum, error) { var opts compute.ArithmeticOptions return compute.Add(ctx, opts, left, right) } func Subtract(ctx context.Context, left, right compute.Datum) (compute.Datum, error) { var opts compute.ArithmeticOptions return compute.Subtract(ctx, opts, left, right) } func AddUnchecked(ctx context.Context, left, right compute.Datum) (compute.Datum, error) { opts := compute.ArithmeticOptions{NoCheckOverflow: true} return compute.Add(ctx, opts, left, right) } func SubtractUnchecked(ctx context.Context, left, right compute.Datum) (compute.Datum, error) { opts := compute.ArithmeticOptions{NoCheckOverflow: true} return compute.Subtract(ctx, opts, left, right) } func arrayScalarKernel(b *testing.B, sz int, nullProp float64, op binaryOp, dt arrow.DataType) { b.Run("array scalar", func(b *testing.B) { var ( mem = memory.NewCheckedAllocator(memory.DefaultAllocator) arraySize = int64(sz / dt.(arrow.FixedWidthDataType).Bytes()) min int64 = 6 max = min + 15 sc, _ = scalar.MakeScalarParam(6, dt) rhs compute.Datum = &compute.ScalarDatum{Value: sc} rng = gen.NewRandomArrayGenerator(seed, mem) ) lhs := rng.Numeric(dt.ID(), arraySize, min, max, nullProp) b.Cleanup(func() { lhs.Release() }) var ( res compute.Datum err error ctx = context.Background() left = &compute.ArrayDatum{Value: lhs.Data()} ) b.SetBytes(arraySize) b.ResetTimer() for n := 0; n < b.N; n++ { res, err = op(ctx, left, rhs) b.StopTimer() if err != nil { b.Fatal(err) } res.Release() b.StartTimer() } }) } func arrayArrayKernel(b *testing.B, sz int, nullProp float64, op binaryOp, dt arrow.DataType) { b.Run("array array", func(b *testing.B) { var ( mem = memory.NewCheckedAllocator(memory.DefaultAllocator) arraySize = int64(sz / dt.(arrow.FixedWidthDataType).Bytes()) rmin int64 = 1 rmax = rmin + 6 // 7 lmin = rmax + 1 // 8 lmax = lmin + 6 // 14 rng = gen.NewRandomArrayGenerator(seed, mem) ) lhs := rng.Numeric(dt.ID(), arraySize, lmin, lmax, nullProp) rhs := rng.Numeric(dt.ID(), arraySize, rmin, rmax, nullProp) b.Cleanup(func() { lhs.Release() rhs.Release() }) var ( res compute.Datum err error ctx = context.Background() left = &compute.ArrayDatum{Value: lhs.Data()} right = &compute.ArrayDatum{Value: rhs.Data()} ) b.SetBytes(arraySize) b.ResetTimer() for n := 0; n < b.N; n++ { res, err = op(ctx, left, right) b.StopTimer() if err != nil { b.Fatal(err) } res.Release() b.StartTimer() } }) } func BenchmarkScalarArithmetic(b *testing.B) { args := []struct { sz int nullProb float64 }{ {CpuCacheSizes[2], 0}, {CpuCacheSizes[2], 0.5}, {CpuCacheSizes[2], 1}, } testfns := []struct { name string op binaryOp }{ {"Add", Add}, {"AddUnchecked", AddUnchecked}, {"Subtract", Subtract}, {"SubtractUnchecked", SubtractUnchecked}, } for _, dt := range numericTypes { b.Run(dt.String(), func(b *testing.B) { for _, benchArgs := range args { b.Run(fmt.Sprintf("sz=%d/nullprob=%.2f", benchArgs.sz, benchArgs.nullProb), func(b *testing.B) { for _, tfn := range testfns { b.Run(tfn.name, func(b *testing.B) { arrayArrayKernel(b, benchArgs.sz, benchArgs.nullProb, tfn.op, dt) arrayScalarKernel(b, benchArgs.sz, benchArgs.nullProb, tfn.op, dt) }) } }) } }) } } arrow-go-18.2.0/arrow/compute/cast.go000066400000000000000000000472111476434502500174450ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.18 package compute import ( "context" "fmt" "sync" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/compute/exec" "github.com/apache/arrow-go/v18/arrow/compute/internal/kernels" ) var ( castTable map[arrow.Type]*castFunction castInit sync.Once castDoc = FunctionDoc{ Summary: "cast values to another data type", Description: "Behavior when values wouldn't fit in the target type\ncan be controlled through CastOptions.", ArgNames: []string{"input"}, OptionsType: "CastOptions", OptionsRequired: true, } castMetaFunc = NewMetaFunction("cast", Unary(), castDoc, func(ctx context.Context, fo FunctionOptions, d ...Datum) (Datum, error) { castOpts := fo.(*CastOptions) if castOpts == nil || castOpts.ToType == nil { return nil, fmt.Errorf("%w: cast requires that options be passed with a ToType", arrow.ErrInvalid) } if arrow.TypeEqual(d[0].(ArrayLikeDatum).Type(), castOpts.ToType) { return NewDatum(d[0]), nil } fn, err := getCastFunction(castOpts.ToType) if err != nil { return nil, fmt.Errorf("%w from %s", err, d[0].(ArrayLikeDatum).Type()) } return fn.Execute(ctx, fo, d...) }) ) func RegisterScalarCast(reg FunctionRegistry) { reg.AddFunction(castMetaFunc, false) } type castFunction struct { ScalarFunction inIDs []arrow.Type out arrow.Type } func newCastFunction(name string, outType arrow.Type) *castFunction { return &castFunction{ ScalarFunction: *NewScalarFunction(name, Unary(), EmptyFuncDoc), out: outType, inIDs: make([]arrow.Type, 0, 1), } } func (cf *castFunction) AddTypeCast(in arrow.Type, kernel exec.ScalarKernel) error { kernel.Init = exec.OptionsInit[kernels.CastState] if err := cf.AddKernel(kernel); err != nil { return err } cf.inIDs = append(cf.inIDs, in) return nil } func (cf *castFunction) AddNewTypeCast(inID arrow.Type, inTypes []exec.InputType, out exec.OutputType, ex exec.ArrayKernelExec, nullHandle exec.NullHandling, memAlloc exec.MemAlloc) error { kn := exec.NewScalarKernel(inTypes, out, ex, nil) kn.NullHandling = nullHandle kn.MemAlloc = memAlloc return cf.AddTypeCast(inID, kn) } func (cf *castFunction) DispatchExact(vals ...arrow.DataType) (exec.Kernel, error) { if err := cf.checkArity(len(vals)); err != nil { return nil, err } candidates := make([]*exec.ScalarKernel, 0, 1) for i := range cf.kernels { if cf.kernels[i].Signature.MatchesInputs(vals) { candidates = append(candidates, &cf.kernels[i]) } } if len(candidates) == 0 { return nil, fmt.Errorf("%w: unsupported cast from %s to %s using function %s", arrow.ErrNotImplemented, vals[0], cf.out, cf.name) } if len(candidates) == 1 { // one match! return candidates[0], nil } // in this situation we may have both an EXACT type and // a SAME_TYPE_ID match. So we will see if there is an exact // match among the candidates and if not, we just return the // first one for _, k := range candidates { arg0 := k.Signature.InputTypes[0] if arg0.Kind == exec.InputExact { // found one! return k, nil } } // just return some kernel that matches since we didn't find an exact return candidates[0], nil } func unpackDictionary(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { var ( dictArr = batch.Values[0].Array.MakeArray().(*array.Dictionary) opts = ctx.State.(kernels.CastState) dictType = dictArr.DataType().(*arrow.DictionaryType) toType = opts.ToType ) defer dictArr.Release() if !arrow.TypeEqual(toType, dictType) && !CanCast(dictType, toType) { return fmt.Errorf("%w: cast type %s incompatible with dictionary type %s", arrow.ErrInvalid, toType, dictType) } unpacked, err := TakeArray(ctx.Ctx, dictArr.Dictionary(), dictArr.Indices()) if err != nil { return err } defer unpacked.Release() if !arrow.TypeEqual(dictType, toType) { unpacked, err = CastArray(ctx.Ctx, unpacked, &opts) if err != nil { return err } defer unpacked.Release() } out.TakeOwnership(unpacked.Data()) return nil } func CastFromExtension(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { opts := ctx.State.(kernels.CastState) arr := batch.Values[0].Array.MakeArray().(array.ExtensionArray) defer arr.Release() castOpts := CastOptions(opts) result, err := CastArray(ctx.Ctx, arr.Storage(), &castOpts) if err != nil { return err } defer result.Release() out.TakeOwnership(result.Data()) return nil } func CastList[SrcOffsetT, DestOffsetT int32 | int64](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { var ( opts = ctx.State.(kernels.CastState) childType = out.Type.(arrow.NestedType).Fields()[0].Type input = &batch.Values[0].Array offsets = exec.GetSpanOffsets[SrcOffsetT](input, 1) isDowncast = kernels.SizeOf[SrcOffsetT]() > kernels.SizeOf[DestOffsetT]() ) out.Buffers[0] = input.Buffers[0] out.Buffers[1] = input.Buffers[1] if input.Offset != 0 && len(input.Buffers[0].Buf) > 0 { out.Buffers[0].WrapBuffer(ctx.AllocateBitmap(input.Len)) bitutil.CopyBitmap(input.Buffers[0].Buf, int(input.Offset), int(input.Len), out.Buffers[0].Buf, 0) } // Handle list offsets // Several cases possible: // - The source offset is non-zero, in which case we slice the // underlying values and shift the list offsets (regardless of // their respective types) // - the source offset is zero but the source and destination types // have different list offset types, in which case we cast the offsets // - otherwise we simply keep the original offsets if isDowncast { if offsets[input.Len] > SrcOffsetT(kernels.MaxOf[DestOffsetT]()) { return fmt.Errorf("%w: array of type %s too large to convert to %s", arrow.ErrInvalid, input.Type, out.Type) } } values := input.Children[0].MakeArray() defer values.Release() if input.Offset != 0 { out.Buffers[1].WrapBuffer( ctx.Allocate(out.Type.(arrow.OffsetsDataType). OffsetTypeTraits().BytesRequired(int(input.Len) + 1))) shiftedOffsets := exec.GetSpanOffsets[DestOffsetT](out, 1) for i := 0; i < int(input.Len)+1; i++ { shiftedOffsets[i] = DestOffsetT(offsets[i] - offsets[0]) } values = array.NewSlice(values, int64(offsets[0]), int64(offsets[input.Len])) defer values.Release() } else if kernels.SizeOf[SrcOffsetT]() != kernels.SizeOf[DestOffsetT]() { out.Buffers[1].WrapBuffer(ctx.Allocate(out.Type.(arrow.OffsetsDataType). OffsetTypeTraits().BytesRequired(int(input.Len) + 1))) kernels.DoStaticCast(exec.GetSpanOffsets[SrcOffsetT](input, 1), exec.GetSpanOffsets[DestOffsetT](out, 1)) } // handle values opts.ToType = childType castedValues, err := CastArray(ctx.Ctx, values, &opts) if err != nil { return err } defer castedValues.Release() out.Children = make([]exec.ArraySpan, 1) out.Children[0].SetMembers(castedValues.Data()) for i, b := range out.Children[0].Buffers { if b.Owner != nil && b.Owner != values.Data().Buffers()[i] { b.Owner.Retain() b.SelfAlloc = true } } return nil } func CastStruct(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { var ( opts = ctx.State.(kernels.CastState) inType = batch.Values[0].Array.Type.(*arrow.StructType) outType = out.Type.(*arrow.StructType) inFieldCount = inType.NumFields() outFieldCount = outType.NumFields() ) fieldsToSelect := make([]int, outFieldCount) for i := range fieldsToSelect { fieldsToSelect[i] = -1 } outFieldIndex := 0 for inFieldIndex := 0; inFieldIndex < inFieldCount && outFieldIndex < outFieldCount; inFieldIndex++ { inField := inType.Field(inFieldIndex) outField := outType.Field(outFieldIndex) if inField.Name == outField.Name { if inField.Nullable && !outField.Nullable { return fmt.Errorf("%w: cannot cast nullable field to non-nullable field: %s %s", arrow.ErrType, inType, outType) } fieldsToSelect[outFieldIndex] = inFieldIndex outFieldIndex++ } } if outFieldIndex < outFieldCount { return fmt.Errorf("%w: struct fields don't match or are in the wrong order: Input: %s Output: %s", arrow.ErrType, inType, outType) } input := &batch.Values[0].Array if len(input.Buffers[0].Buf) > 0 { out.Buffers[0].WrapBuffer(ctx.AllocateBitmap(input.Len)) bitutil.CopyBitmap(input.Buffers[0].Buf, int(input.Offset), int(input.Len), out.Buffers[0].Buf, 0) } out.Children = make([]exec.ArraySpan, outFieldCount) for outFieldIndex, idx := range fieldsToSelect { values := input.Children[idx].MakeArray() defer values.Release() values = array.NewSlice(values, input.Offset, input.Len) defer values.Release() opts.ToType = outType.Field(outFieldIndex).Type castedValues, err := CastArray(ctx.Ctx, values, &opts) if err != nil { return err } defer castedValues.Release() out.Children[outFieldIndex].TakeOwnership(castedValues.Data()) } return nil } func addListCast[SrcOffsetT, DestOffsetT int32 | int64](fn *castFunction, inType arrow.Type) error { kernel := exec.NewScalarKernel([]exec.InputType{exec.NewIDInput(inType)}, kernels.OutputTargetType, CastList[SrcOffsetT, DestOffsetT], nil) kernel.NullHandling = exec.NullComputedNoPrealloc kernel.MemAlloc = exec.MemNoPrealloc return fn.AddTypeCast(inType, kernel) } func addStructToStructCast(fn *castFunction) error { kernel := exec.NewScalarKernel([]exec.InputType{exec.NewIDInput(arrow.STRUCT)}, kernels.OutputTargetType, CastStruct, nil) kernel.NullHandling = exec.NullComputedNoPrealloc return fn.AddTypeCast(arrow.STRUCT, kernel) } func addCastFuncs(fn []*castFunction) { for _, f := range fn { f.AddNewTypeCast(arrow.EXTENSION, []exec.InputType{exec.NewIDInput(arrow.EXTENSION)}, f.kernels[0].Signature.OutType, CastFromExtension, exec.NullComputedNoPrealloc, exec.MemNoPrealloc) castTable[f.out] = f } } func initCastTable() { castTable = make(map[arrow.Type]*castFunction) addCastFuncs(getBooleanCasts()) addCastFuncs(getNumericCasts()) addCastFuncs(getBinaryLikeCasts()) addCastFuncs(getTemporalCasts()) addCastFuncs(getNestedCasts()) nullToExt := newCastFunction("cast_extension", arrow.EXTENSION) nullToExt.AddNewTypeCast(arrow.NULL, []exec.InputType{exec.NewExactInput(arrow.Null)}, kernels.OutputTargetType, kernels.CastFromNull, exec.NullComputedNoPrealloc, exec.MemNoPrealloc) castTable[arrow.EXTENSION] = nullToExt } func getCastFunction(to arrow.DataType) (*castFunction, error) { castInit.Do(initCastTable) fn, ok := castTable[to.ID()] if ok { return fn, nil } return nil, fmt.Errorf("%w: unsupported cast to %s", arrow.ErrNotImplemented, to) } func getNestedCasts() []*castFunction { out := make([]*castFunction, 0) addKernels := func(fn *castFunction, kernels []exec.ScalarKernel) { for _, k := range kernels { if err := fn.AddTypeCast(k.Signature.InputTypes[0].MatchID(), k); err != nil { panic(err) } } } castLists := newCastFunction("cast_list", arrow.LIST) addKernels(castLists, kernels.GetCommonCastKernels(arrow.LIST, kernels.OutputTargetType)) if err := addListCast[int32, int32](castLists, arrow.LIST); err != nil { panic(err) } if err := addListCast[int64, int32](castLists, arrow.LARGE_LIST); err != nil { panic(err) } out = append(out, castLists) castLargeLists := newCastFunction("cast_large_list", arrow.LARGE_LIST) addKernels(castLargeLists, kernels.GetCommonCastKernels(arrow.LARGE_LIST, kernels.OutputTargetType)) if err := addListCast[int32, int64](castLargeLists, arrow.LIST); err != nil { panic(err) } if err := addListCast[int64, int64](castLargeLists, arrow.LARGE_LIST); err != nil { panic(err) } out = append(out, castLargeLists) castFsl := newCastFunction("cast_fixed_size_list", arrow.FIXED_SIZE_LIST) addKernels(castFsl, kernels.GetCommonCastKernels(arrow.FIXED_SIZE_LIST, kernels.OutputTargetType)) out = append(out, castFsl) castStruct := newCastFunction("cast_struct", arrow.STRUCT) addKernels(castStruct, kernels.GetCommonCastKernels(arrow.STRUCT, kernels.OutputTargetType)) if err := addStructToStructCast(castStruct); err != nil { panic(err) } out = append(out, castStruct) return out } func getBooleanCasts() []*castFunction { fn := newCastFunction("cast_boolean", arrow.BOOL) kns := kernels.GetBooleanCastKernels() for _, k := range kns { if err := fn.AddTypeCast(k.Signature.InputTypes[0].Type.ID(), k); err != nil { panic(err) } } return []*castFunction{fn} } func getTemporalCasts() []*castFunction { output := make([]*castFunction, 0) addFn := func(name string, id arrow.Type, kernels []exec.ScalarKernel) { fn := newCastFunction(name, id) for _, k := range kernels { if err := fn.AddTypeCast(k.Signature.InputTypes[0].MatchID(), k); err != nil { panic(err) } } fn.AddNewTypeCast(arrow.DICTIONARY, []exec.InputType{exec.NewIDInput(arrow.DICTIONARY)}, kernels[0].Signature.OutType, unpackDictionary, exec.NullComputedNoPrealloc, exec.MemNoPrealloc) output = append(output, fn) } addFn("cast_timestamp", arrow.TIMESTAMP, kernels.GetTimestampCastKernels()) addFn("cast_date32", arrow.DATE32, kernels.GetDate32CastKernels()) addFn("cast_date64", arrow.DATE64, kernels.GetDate64CastKernels()) addFn("cast_time32", arrow.TIME32, kernels.GetTime32CastKernels()) addFn("cast_time64", arrow.TIME64, kernels.GetTime64CastKernels()) addFn("cast_duration", arrow.DURATION, kernels.GetDurationCastKernels()) addFn("cast_month_day_nano_interval", arrow.INTERVAL_MONTH_DAY_NANO, kernels.GetIntervalCastKernels()) return output } func getNumericCasts() []*castFunction { out := make([]*castFunction, 0) getFn := func(name string, ty arrow.Type, kns []exec.ScalarKernel) *castFunction { fn := newCastFunction(name, ty) for _, k := range kns { if err := fn.AddTypeCast(k.Signature.InputTypes[0].MatchID(), k); err != nil { panic(err) } } fn.AddNewTypeCast(arrow.DICTIONARY, []exec.InputType{exec.NewIDInput(arrow.DICTIONARY)}, kns[0].Signature.OutType, unpackDictionary, exec.NullComputedNoPrealloc, exec.MemNoPrealloc) return fn } out = append(out, getFn("cast_int8", arrow.INT8, kernels.GetCastToInteger[int8](arrow.PrimitiveTypes.Int8))) out = append(out, getFn("cast_int16", arrow.INT16, kernels.GetCastToInteger[int8](arrow.PrimitiveTypes.Int16))) castInt32 := getFn("cast_int32", arrow.INT32, kernels.GetCastToInteger[int32](arrow.PrimitiveTypes.Int32)) castInt32.AddTypeCast(arrow.DATE32, kernels.GetZeroCastKernel(arrow.DATE32, exec.NewExactInput(arrow.FixedWidthTypes.Date32), exec.NewOutputType(arrow.PrimitiveTypes.Int32))) castInt32.AddTypeCast(arrow.TIME32, kernels.GetZeroCastKernel(arrow.TIME32, exec.NewIDInput(arrow.TIME32), exec.NewOutputType(arrow.PrimitiveTypes.Int32))) out = append(out, castInt32) castInt64 := getFn("cast_int64", arrow.INT64, kernels.GetCastToInteger[int64](arrow.PrimitiveTypes.Int64)) castInt64.AddTypeCast(arrow.DATE64, kernels.GetZeroCastKernel(arrow.DATE64, exec.NewIDInput(arrow.DATE64), exec.NewOutputType(arrow.PrimitiveTypes.Int64))) castInt64.AddTypeCast(arrow.TIME64, kernels.GetZeroCastKernel(arrow.TIME64, exec.NewIDInput(arrow.TIME64), exec.NewOutputType(arrow.PrimitiveTypes.Int64))) castInt64.AddTypeCast(arrow.DURATION, kernels.GetZeroCastKernel(arrow.DURATION, exec.NewIDInput(arrow.DURATION), exec.NewOutputType(arrow.PrimitiveTypes.Int64))) castInt64.AddTypeCast(arrow.TIMESTAMP, kernels.GetZeroCastKernel(arrow.TIMESTAMP, exec.NewIDInput(arrow.TIMESTAMP), exec.NewOutputType(arrow.PrimitiveTypes.Int64))) out = append(out, castInt64) out = append(out, getFn("cast_uint8", arrow.UINT8, kernels.GetCastToInteger[uint8](arrow.PrimitiveTypes.Uint8))) out = append(out, getFn("cast_uint16", arrow.UINT16, kernels.GetCastToInteger[uint16](arrow.PrimitiveTypes.Uint16))) out = append(out, getFn("cast_uint32", arrow.UINT32, kernels.GetCastToInteger[uint32](arrow.PrimitiveTypes.Uint32))) out = append(out, getFn("cast_uint64", arrow.UINT64, kernels.GetCastToInteger[uint64](arrow.PrimitiveTypes.Uint64))) out = append(out, getFn("cast_half_float", arrow.FLOAT16, kernels.GetCommonCastKernels(arrow.FLOAT16, exec.NewOutputType(arrow.FixedWidthTypes.Float16)))) out = append(out, getFn("cast_float", arrow.FLOAT32, kernels.GetCastToFloating[float32](arrow.PrimitiveTypes.Float32))) out = append(out, getFn("cast_double", arrow.FLOAT64, kernels.GetCastToFloating[float64](arrow.PrimitiveTypes.Float64))) // cast to decimal128 out = append(out, getFn("cast_decimal", arrow.DECIMAL128, kernels.GetCastToDecimal128())) // cast to decimal256 out = append(out, getFn("cast_decimal256", arrow.DECIMAL256, kernels.GetCastToDecimal256())) return out } func getBinaryLikeCasts() []*castFunction { out := make([]*castFunction, 0) addFn := func(name string, ty arrow.Type, kns []exec.ScalarKernel) { fn := newCastFunction(name, ty) for _, k := range kns { if err := fn.AddTypeCast(k.Signature.InputTypes[0].MatchID(), k); err != nil { panic(err) } } fn.AddNewTypeCast(arrow.DICTIONARY, []exec.InputType{exec.NewIDInput(arrow.DICTIONARY)}, kns[0].Signature.OutType, unpackDictionary, exec.NullComputedNoPrealloc, exec.MemNoPrealloc) out = append(out, fn) } addFn("cast_binary", arrow.BINARY, kernels.GetToBinaryKernels(arrow.BinaryTypes.Binary)) addFn("cast_large_binary", arrow.LARGE_BINARY, kernels.GetToBinaryKernels(arrow.BinaryTypes.LargeBinary)) addFn("cast_string", arrow.STRING, kernels.GetToBinaryKernels(arrow.BinaryTypes.String)) addFn("cast_large_string", arrow.LARGE_STRING, kernels.GetToBinaryKernels(arrow.BinaryTypes.LargeString)) addFn("cast_fixed_sized_binary", arrow.FIXED_SIZE_BINARY, kernels.GetFsbCastKernels()) return out } // CastDatum is a convenience function for casting a Datum to another type. // It is equivalent to calling CallFunction(ctx, "cast", opts, Datum) and // should work for Scalar, Array or ChunkedArray Datums. func CastDatum(ctx context.Context, val Datum, opts *CastOptions) (Datum, error) { return CallFunction(ctx, "cast", opts, val) } // CastArray is a convenience function for casting an Array to another type. // It is equivalent to constructing a Datum for the array and using // CallFunction(ctx, "cast", ...). func CastArray(ctx context.Context, val arrow.Array, opts *CastOptions) (arrow.Array, error) { d := NewDatum(val) defer d.Release() out, err := CastDatum(ctx, d, opts) if err != nil { return nil, err } defer out.Release() return out.(*ArrayDatum).MakeArray(), nil } // CastToType is a convenience function equivalent to calling // CastArray(ctx, val, compute.SafeCastOptions(toType)) func CastToType(ctx context.Context, val arrow.Array, toType arrow.DataType) (arrow.Array, error) { return CastArray(ctx, val, SafeCastOptions(toType)) } // CanCast returns true if there is an implementation for casting an array // or scalar value from the specified DataType to the other data type. func CanCast(from, to arrow.DataType) bool { fn, err := getCastFunction(to) if err != nil { return false } for _, id := range fn.inIDs { if from.ID() == id { return true } } return false } arrow-go-18.2.0/arrow/compute/cast_test.go000066400000000000000000003353321476434502500205100ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.18 package compute_test import ( "context" "fmt" "math" "strconv" "strings" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/compute" "github.com/apache/arrow-go/v18/arrow/decimal128" "github.com/apache/arrow-go/v18/arrow/decimal256" "github.com/apache/arrow-go/v18/arrow/internal/testing/gen" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/arrow/scalar" "github.com/apache/arrow-go/v18/internal/types" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" ) func getScalars(inputs []compute.Datum, idx int) []scalar.Scalar { out := make([]scalar.Scalar, len(inputs)) for i, in := range inputs { if in.Kind() == compute.KindArray { arr := in.(*compute.ArrayDatum).MakeArray() defer arr.Release() out[i], _ = scalar.GetScalar(arr, idx) } else { out[i] = in.(*compute.ScalarDatum).Value } } return out } func getDatums[T any](inputs []T) []compute.Datum { out := make([]compute.Datum, len(inputs)) for i, in := range inputs { out[i] = compute.NewDatum(in) } return out } func assertArraysEqual(t *testing.T, expected, actual arrow.Array, opts ...array.EqualOption) bool { return assert.Truef(t, array.ApproxEqual(expected, actual, opts...), "expected: %s\ngot: %s", expected, actual) } func assertDatumsEqual(t *testing.T, expected, actual compute.Datum, opts []array.EqualOption, scalarOpts []scalar.EqualOption) { require.Equal(t, expected.Kind(), actual.Kind()) switch expected.Kind() { case compute.KindScalar: want := expected.(*compute.ScalarDatum).Value got := actual.(*compute.ScalarDatum).Value assert.Truef(t, scalar.ApproxEquals(want, got, scalarOpts...), "expected: %s\ngot: %s", want, got) case compute.KindArray: want := expected.(*compute.ArrayDatum).MakeArray() got := actual.(*compute.ArrayDatum).MakeArray() assertArraysEqual(t, want, got, opts...) want.Release() got.Release() case compute.KindChunked: want := expected.(*compute.ChunkedDatum).Value got := actual.(*compute.ChunkedDatum).Value assert.Truef(t, array.ChunkedEqual(want, got), "expected: %s\ngot: %s", want, got) default: assert.Truef(t, actual.Equals(expected), "expected: %s\ngot: %s", expected, actual) } } func checkScalarNonRecursive(t *testing.T, funcName string, inputs []compute.Datum, expected compute.Datum, opts compute.FunctionOptions) { out, err := compute.CallFunction(context.Background(), funcName, opts, inputs...) assert.NoError(t, err) defer out.Release() assertDatumsEqual(t, expected, out, nil, nil) } func checkScalarWithScalars(t *testing.T, funcName string, inputs []scalar.Scalar, expected scalar.Scalar, opts compute.FunctionOptions) { datums := getDatums(inputs) defer func() { for _, s := range inputs { if r, ok := s.(scalar.Releasable); ok { r.Release() } } for _, d := range datums { d.Release() } }() out, err := compute.CallFunction(context.Background(), funcName, opts, datums...) assert.NoError(t, err) defer out.Release() if !scalar.Equals(out.(*compute.ScalarDatum).Value, expected) { var b strings.Builder b.WriteString(funcName + "(") for i, in := range inputs { if i != 0 { b.WriteByte(',') } b.WriteString(in.String()) } b.WriteByte(')') b.WriteString(" = " + out.(*compute.ScalarDatum).Value.String()) b.WriteString(" != " + expected.String()) if !arrow.TypeEqual(out.(*compute.ScalarDatum).Type(), expected.DataType()) { fmt.Fprintf(&b, " (types differed: %s vs %s)", out.(*compute.ScalarDatum).Type(), expected.DataType()) } t.Fatal(b.String()) } } func checkScalar(t *testing.T, funcName string, inputs []compute.Datum, expected compute.Datum, opts compute.FunctionOptions) { checkScalarNonRecursive(t, funcName, inputs, expected, opts) if expected.Kind() == compute.KindScalar { return } exp := expected.(*compute.ArrayDatum).MakeArray() defer exp.Release() // check for at least 1 array, and make sure the others are of equal len hasArray := false for _, in := range inputs { if in.Kind() == compute.KindArray { assert.EqualValues(t, exp.Len(), in.(*compute.ArrayDatum).Len()) hasArray = true } } require.True(t, hasArray) // check all the input scalars for i := 0; i < exp.Len(); i++ { e, _ := scalar.GetScalar(exp, i) checkScalarWithScalars(t, funcName, getScalars(inputs, i), e, opts) if r, ok := e.(scalar.Releasable); ok { r.Release() } } } func assertBufferSame(t *testing.T, left, right arrow.Array, idx int) { assert.Same(t, left.Data().Buffers()[idx], right.Data().Buffers()[idx]) } func checkScalarUnary(t *testing.T, funcName string, input compute.Datum, exp compute.Datum, opt compute.FunctionOptions) { checkScalar(t, funcName, []compute.Datum{input}, exp, opt) } func checkCast(t *testing.T, input arrow.Array, exp arrow.Array, opts compute.CastOptions) { opts.ToType = exp.DataType() in, out := compute.NewDatum(input), compute.NewDatum(exp) defer in.Release() defer out.Release() checkScalarUnary(t, "cast", in, out, &opts) } func checkCastFails(t *testing.T, input arrow.Array, opt compute.CastOptions) { _, err := compute.CastArray(context.Background(), input, &opt) assert.ErrorIs(t, err, arrow.ErrInvalid) // for scalars, check that at least one of the input fails // since many of the tests contain a mix of passing and failing values. // in some cases we will want to check more precisely nfail := 0 for i := 0; i < input.Len(); i++ { sc, _ := scalar.GetScalar(input, i) if r, ok := sc.(scalar.Releasable); ok { defer r.Release() } d := compute.NewDatum(sc) defer d.Release() out, err := compute.CastDatum(context.Background(), d, &opt) if err != nil { nfail++ } else { out.Release() } } assert.Greater(t, nfail, 0) } func checkCastZeroCopy(t *testing.T, input arrow.Array, toType arrow.DataType, opts *compute.CastOptions) { opts.ToType = toType out, err := compute.CastArray(context.Background(), input, opts) assert.NoError(t, err) defer out.Release() assert.Len(t, out.Data().Buffers(), len(input.Data().Buffers())) for i := range out.Data().Buffers() { assertBufferSame(t, out, input, i) } } var ( signedIntTypes = []arrow.DataType{ arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int64, } unsignedIntTypes = []arrow.DataType{ arrow.PrimitiveTypes.Uint8, arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Uint32, arrow.PrimitiveTypes.Uint64, } integerTypes = append(signedIntTypes, unsignedIntTypes...) floatingTypes = []arrow.DataType{ arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Float64, } numericTypes = append(integerTypes, floatingTypes...) baseBinaryTypes = []arrow.DataType{ arrow.BinaryTypes.Binary, arrow.BinaryTypes.LargeBinary, arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString, } dictIndexTypes = integerTypes ) type CastSuite struct { suite.Suite mem *memory.CheckedAllocator } func (c *CastSuite) allocateEmptyBitmap(len int) *memory.Buffer { buf := memory.NewResizableBuffer(c.mem) buf.Resize(int(bitutil.BytesForBits(int64(len)))) return buf } func (c *CastSuite) maskArrayWithNullsAt(input arrow.Array, toMask []int) arrow.Array { masked := input.Data().(*array.Data).Copy() defer masked.Release() if masked.Buffers()[0] != nil { masked.Buffers()[0].Release() } masked.Buffers()[0] = c.allocateEmptyBitmap(input.Len()) masked.SetNullN(array.UnknownNullCount) if original := input.NullBitmapBytes(); len(original) > 0 { bitutil.CopyBitmap(original, input.Data().Offset(), input.Len(), masked.Buffers()[0].Bytes(), 0) } else { bitutil.SetBitsTo(masked.Buffers()[0].Bytes(), 0, int64(input.Len()), true) } for _, i := range toMask { bitutil.SetBitTo(masked.Buffers()[0].Bytes(), i, false) } return array.MakeFromData(masked) } func (c *CastSuite) invalidUtf8Arr(dt arrow.DataType) arrow.Array { bldr := array.NewBinaryBuilder(c.mem, dt.(arrow.BinaryDataType)) defer bldr.Release() bldr.AppendValues([][]byte{ []byte("Hi"), []byte("olá mundo"), []byte("你好世界"), []byte(""), []byte("\xa0\xa1"), // invalid utf8! }, nil) return bldr.NewArray() } type binaryBuilderAppend interface { array.Builder AppendValues([][]byte, []bool) } func (c *CastSuite) fixedSizeInvalidUtf8(dt arrow.DataType) arrow.Array { var bldr binaryBuilderAppend if dt.ID() == arrow.FIXED_SIZE_BINARY { c.Require().Equal(3, dt.(*arrow.FixedSizeBinaryType).ByteWidth) bldr = array.NewFixedSizeBinaryBuilder(c.mem, dt.(*arrow.FixedSizeBinaryType)) } else { bldr = array.NewBinaryBuilder(c.mem, dt.(arrow.BinaryDataType)) } defer bldr.Release() bldr.AppendValues([][]byte{ []byte("Hi!"), []byte("lá"), []byte("ä½ "), []byte(" "), []byte("\xa0\xa1\xa2"), // invalid utf8! }, nil) return bldr.NewArray() } func (c *CastSuite) SetupTest() { c.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) } func (c *CastSuite) TearDownTest() { c.mem.AssertSize(c.T(), 0) } func (c *CastSuite) TestCanCast() { expectCanCast := func(from arrow.DataType, toSet []arrow.DataType, expected bool) { for _, to := range toSet { c.Equalf(expected, compute.CanCast(from, to), "CanCast from: %s, to: %s, expected: %t", from, to, expected) } } canCast := func(from arrow.DataType, toSet []arrow.DataType) { expectCanCast(from, toSet, true) } cannotCast := func(from arrow.DataType, toSet []arrow.DataType) { expectCanCast(from, toSet, false) } canCast(arrow.Null, []arrow.DataType{arrow.FixedWidthTypes.Boolean}) canCast(arrow.Null, numericTypes) canCast(arrow.Null, baseBinaryTypes) canCast(arrow.Null, []arrow.DataType{ arrow.FixedWidthTypes.Date32, arrow.FixedWidthTypes.Date64, arrow.FixedWidthTypes.Time32ms, arrow.FixedWidthTypes.Timestamp_s, }) cannotCast(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint16, ValueType: arrow.Null}, []arrow.DataType{arrow.Null}) canCast(arrow.FixedWidthTypes.Boolean, []arrow.DataType{arrow.FixedWidthTypes.Boolean}) canCast(arrow.FixedWidthTypes.Boolean, numericTypes) canCast(arrow.FixedWidthTypes.Boolean, []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString}) cannotCast(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: arrow.FixedWidthTypes.Boolean}, []arrow.DataType{arrow.FixedWidthTypes.Boolean}) cannotCast(arrow.FixedWidthTypes.Boolean, []arrow.DataType{arrow.Null}) cannotCast(arrow.FixedWidthTypes.Boolean, []arrow.DataType{arrow.BinaryTypes.Binary, arrow.BinaryTypes.LargeBinary}) cannotCast(arrow.FixedWidthTypes.Boolean, []arrow.DataType{ arrow.FixedWidthTypes.Date32, arrow.FixedWidthTypes.Date64, arrow.FixedWidthTypes.Time32ms, arrow.FixedWidthTypes.Timestamp_s}) for _, from := range numericTypes { canCast(from, []arrow.DataType{arrow.FixedWidthTypes.Boolean}) canCast(from, numericTypes) canCast(from, []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString}) canCast(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: from}, []arrow.DataType{from}) cannotCast(from, []arrow.DataType{arrow.Null}) } for _, from := range baseBinaryTypes { canCast(from, []arrow.DataType{arrow.FixedWidthTypes.Boolean}) canCast(from, numericTypes) canCast(from, baseBinaryTypes) canCast(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int64, ValueType: from}, []arrow.DataType{from}) // any cast which is valid for the dictionary is valid for the dictionary array canCast(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint32, ValueType: from}, baseBinaryTypes) canCast(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int16, ValueType: from}, baseBinaryTypes) cannotCast(from, []arrow.DataType{arrow.Null}) } canCast(arrow.BinaryTypes.String, []arrow.DataType{arrow.FixedWidthTypes.Timestamp_ms}) canCast(arrow.BinaryTypes.LargeString, []arrow.DataType{arrow.FixedWidthTypes.Timestamp_ns}) // no formatting supported cannotCast(arrow.FixedWidthTypes.Timestamp_us, []arrow.DataType{arrow.BinaryTypes.Binary, arrow.BinaryTypes.LargeBinary}) canCast(&arrow.FixedSizeBinaryType{ByteWidth: 3}, []arrow.DataType{ arrow.BinaryTypes.Binary, arrow.BinaryTypes.LargeBinary, arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString, &arrow.FixedSizeBinaryType{ByteWidth: 3}}) arrow.RegisterExtensionType(types.NewSmallintType()) defer arrow.UnregisterExtensionType("smallint") canCast(types.NewSmallintType(), []arrow.DataType{arrow.PrimitiveTypes.Int16}) canCast(types.NewSmallintType(), numericTypes) // any cast which is valid for storage is supported canCast(arrow.Null, []arrow.DataType{types.NewSmallintType()}) canCast(arrow.FixedWidthTypes.Date32, []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString}) canCast(arrow.FixedWidthTypes.Date64, []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString}) canCast(arrow.FixedWidthTypes.Timestamp_ns, []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString}) canCast(arrow.FixedWidthTypes.Timestamp_us, []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString}) canCast(arrow.FixedWidthTypes.Time32ms, []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString}) canCast(arrow.FixedWidthTypes.Time64ns, []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString}) } func (c *CastSuite) checkCastFails(dt arrow.DataType, input string, opts *compute.CastOptions) { inArr, _, _ := array.FromJSON(c.mem, dt, strings.NewReader(input), array.WithUseNumber()) defer inArr.Release() checkCastFails(c.T(), inArr, *opts) } func (c *CastSuite) checkCastOpts(dtIn, dtOut arrow.DataType, inJSON, outJSON string, opts compute.CastOptions) { inArr, _, _ := array.FromJSON(c.mem, dtIn, strings.NewReader(inJSON), array.WithUseNumber()) outArr, _, _ := array.FromJSON(c.mem, dtOut, strings.NewReader(outJSON), array.WithUseNumber()) defer inArr.Release() defer outArr.Release() checkCast(c.T(), inArr, outArr, opts) } func (c *CastSuite) checkCast(dtIn, dtOut arrow.DataType, inJSON, outJSON string) { c.checkCastOpts(dtIn, dtOut, inJSON, outJSON, *compute.DefaultCastOptions(true)) } func (c *CastSuite) checkCastArr(in arrow.Array, dtOut arrow.DataType, json string, opts compute.CastOptions) { outArr, _, _ := array.FromJSON(c.mem, dtOut, strings.NewReader(json), array.WithUseNumber()) defer outArr.Release() checkCast(c.T(), in, outArr, opts) } func (c *CastSuite) checkCastExp(dtIn arrow.DataType, inJSON string, exp arrow.Array) { inArr, _, _ := array.FromJSON(c.mem, dtIn, strings.NewReader(inJSON), array.WithUseNumber()) defer inArr.Release() checkCast(c.T(), inArr, exp, *compute.DefaultCastOptions(true)) } func (c *CastSuite) TestNumericToBool() { for _, dt := range numericTypes { c.checkCast(dt, arrow.FixedWidthTypes.Boolean, `[0, null, 127, 1, 0]`, `[false, null, true, true, false]`) } // check negative numbers for _, dt := range []arrow.DataType{arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Float64} { c.checkCast(dt, arrow.FixedWidthTypes.Boolean, `[0, null, 127, -1, 0]`, `[false, null, true, true, false]`) } } func (c *CastSuite) StringToBool() { for _, dt := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { c.checkCast(dt, arrow.FixedWidthTypes.Boolean, `["False", null, "true", "True", "false"]`, `[false, null, true, true, false]`) c.checkCast(dt, arrow.FixedWidthTypes.Boolean, `["0", null, "1", "1", "0"]`, `[false, null, true, true, false]`) opts := compute.NewCastOptions(arrow.FixedWidthTypes.Boolean, true) c.checkCastFails(dt, `["false "]`, opts) c.checkCastFails(dt, `["T"]`, opts) } } func (c *CastSuite) TestToIntUpcast() { c.checkCast(arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Int32, `[0, null, 127, -1, 0]`, `[0, null, 127, -1, 0]`) c.checkCast(arrow.PrimitiveTypes.Uint8, arrow.PrimitiveTypes.Int16, `[0, 100, 200, 255, 0]`, `[0, 100, 200, 255, 0]`) } func (c *CastSuite) TestToIntDowncastSafe() { // int16 to uint8 no overflow/underflow c.checkCast(arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Uint8, `[0, null, 200, 1, 2]`, `[0, null, 200, 1, 2]`) // int16 to uint8, overflow c.checkCastFails(arrow.PrimitiveTypes.Int16, `[0, null, 256, 0, 0]`, compute.NewCastOptions(arrow.PrimitiveTypes.Uint8, true)) // and underflow c.checkCastFails(arrow.PrimitiveTypes.Int16, `[0, null, -1, 0, 0]`, compute.NewCastOptions(arrow.PrimitiveTypes.Uint8, true)) // int32 to int16, no overflow/underflow c.checkCast(arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int16, `[0, null, 2000, 1, 2]`, `[0, null, 2000, 1, 2]`) // int32 to int16, overflow c.checkCastFails(arrow.PrimitiveTypes.Int32, `[0, null, 2000, 70000, 2]`, compute.NewCastOptions(arrow.PrimitiveTypes.Int16, true)) // and underflow c.checkCastFails(arrow.PrimitiveTypes.Int32, `[0, null, 2000, -70000, 2]`, compute.NewCastOptions(arrow.PrimitiveTypes.Int16, true)) c.checkCastFails(arrow.PrimitiveTypes.Int32, `[0, null, 2000, -70000, 2]`, compute.NewCastOptions(arrow.PrimitiveTypes.Uint8, true)) } func (c *CastSuite) TestIntegerSignedToUnsigned() { i32s, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[-2147483648, null, -1, 65535, 2147483647]`)) defer i32s.Release() // same width checkCastFails(c.T(), i32s, *compute.NewCastOptions(arrow.PrimitiveTypes.Uint32, true)) // wider checkCastFails(c.T(), i32s, *compute.NewCastOptions(arrow.PrimitiveTypes.Uint64, true)) // narrower checkCastFails(c.T(), i32s, *compute.NewCastOptions(arrow.PrimitiveTypes.Uint16, true)) var options compute.CastOptions options.AllowIntOverflow = true u32s, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Uint32, strings.NewReader(`[2147483648, null, 4294967295, 65535, 2147483647]`)) defer u32s.Release() checkCast(c.T(), i32s, u32s, options) u64s, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Uint64, strings.NewReader(`[18446744071562067968, null, 18446744073709551615, 65535, 2147483647]`), array.WithUseNumber()) // have to use WithUseNumber so it doesn't lose precision converting to float64 defer u64s.Release() checkCast(c.T(), i32s, u64s, options) // fail because of overflow, instead of underflow i32s, _, _ = array.FromJSON(c.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[0, null, 0, 65536, 2147483647]`)) defer i32s.Release() checkCastFails(c.T(), i32s, *compute.NewCastOptions(arrow.PrimitiveTypes.Uint16, true)) u16s, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Uint16, strings.NewReader(`[0, null, 0, 0, 65535]`)) defer u16s.Release() checkCast(c.T(), i32s, u16s, options) } func (c *CastSuite) TestIntegerUnsignedToSigned() { u32s, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Uint32, strings.NewReader(`[4294967295, null, 0, 32768]`)) defer u32s.Release() // same width checkCastFails(c.T(), u32s, *compute.SafeCastOptions(arrow.PrimitiveTypes.Int32)) // narrower checkCastFails(c.T(), u32s, *compute.SafeCastOptions(arrow.PrimitiveTypes.Int16)) sl := array.NewSlice(u32s, 1, int64(u32s.Len())) defer sl.Release() checkCastFails(c.T(), sl, *compute.SafeCastOptions(arrow.PrimitiveTypes.Int16)) var opts compute.CastOptions opts.AllowIntOverflow = true c.checkCastArr(u32s, arrow.PrimitiveTypes.Int32, `[-1, null, 0, 32768]`, opts) c.checkCastArr(u32s, arrow.PrimitiveTypes.Int64, `[4294967295, null, 0, 32768]`, opts) c.checkCastArr(u32s, arrow.PrimitiveTypes.Int16, `[-1, null, 0, -32768]`, opts) } func (c *CastSuite) TestToIntDowncastUnsafe() { opts := compute.CastOptions{AllowIntOverflow: true} c.checkCastOpts(arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Uint8, `[0, null, 200, 1, 2]`, `[0, null, 200, 1, 2]`, opts) c.checkCastOpts(arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Uint8, `[0, null, 256, 1, 2, -1]`, `[0, null, 0, 1, 2, 255]`, opts) c.checkCastOpts(arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int16, `[0, null, 2000, 1, 2, -1]`, `[0, null, 2000, 1, 2, -1]`, opts) c.checkCastOpts(arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int16, `[0, null, 2000, 70000, -70000]`, `[0, null, 2000, 4464, -4464]`, opts) } func (c *CastSuite) TestFloatingToInt() { for _, from := range []arrow.DataType{arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Float64} { for _, to := range []arrow.DataType{arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int64} { // float to int no truncation c.checkCast(from, to, `[1.0, null, 0.0, -1.0, 5.0]`, `[1, null, 0, -1, 5]`) // float to int truncate error opts := compute.SafeCastOptions(to) c.checkCastFails(from, `[1.5, 0.0, null, 0.5, -1.5, 5.5]`, opts) // float to int truncate allowed opts.AllowFloatTruncate = true c.checkCastOpts(from, to, `[1.5, 0.0, null, 0.5, -1.5, 5.5]`, `[1, 0, null, 0, -1, 5]`, *opts) } } } func (c *CastSuite) TestIntToFloating() { for _, from := range []arrow.DataType{arrow.PrimitiveTypes.Uint32, arrow.PrimitiveTypes.Int32} { two24 := `[16777216, 16777217]` c.checkCastFails(from, two24, compute.SafeCastOptions(arrow.PrimitiveTypes.Float32)) one24 := `[16777216]` c.checkCast(from, arrow.PrimitiveTypes.Float32, one24, one24) } i64s, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[-9223372036854775808, -9223372036854775807, 0, 9223372036854775806, 9223372036854775807]`), array.WithUseNumber()) defer i64s.Release() checkCastFails(c.T(), i64s, *compute.SafeCastOptions(arrow.PrimitiveTypes.Float64)) masked := c.maskArrayWithNullsAt(i64s, []int{0, 1, 3, 4}) defer masked.Release() c.checkCastArr(masked, arrow.PrimitiveTypes.Float64, `[null, null, 0, null, null]`, *compute.DefaultCastOptions(true)) c.checkCastFails(arrow.PrimitiveTypes.Uint64, `[9007199254740992, 9007199254740993]`, compute.SafeCastOptions(arrow.PrimitiveTypes.Float64)) } func (c *CastSuite) TestDecimal128ToInt() { opts := compute.SafeCastOptions(arrow.PrimitiveTypes.Int64) c.Run("no overflow no truncate", func() { for _, allowIntOverflow := range []bool{false, true} { c.Run(fmt.Sprintf("int_overflow=%t", allowIntOverflow), func() { for _, allowDecTruncate := range []bool{false, true} { c.Run(fmt.Sprintf("dec_truncate=%t", allowDecTruncate), func() { opts.AllowIntOverflow = allowIntOverflow opts.AllowDecimalTruncate = allowDecTruncate noOverflowNoTrunc, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, strings.NewReader(`["02.0000000000", "-11.0000000000", "22.0000000000", "-121.000000000", null]`)) c.checkCastArr(noOverflowNoTrunc, arrow.PrimitiveTypes.Int64, `[2, -11, 22, -121, null]`, *opts) noOverflowNoTrunc.Release() }) } }) } }) c.Run("truncate no overflow", func() { for _, allowIntOverflow := range []bool{false, true} { c.Run("allow overflow"+strconv.FormatBool(allowIntOverflow), func() { opts.AllowIntOverflow = allowIntOverflow truncNoOverflow, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, strings.NewReader(`["02.1000000000", "-11.0000004500", "22.0000004500", "-121.1210000000", null]`)) opts.AllowDecimalTruncate = true c.checkCastArr(truncNoOverflow, arrow.PrimitiveTypes.Int64, `[2, -11, 22, -121, null]`, *opts) opts.AllowDecimalTruncate = false checkCastFails(c.T(), truncNoOverflow, *opts) truncNoOverflow.Release() }) } }) c.Run("overflow no truncate", func() { for _, allowDecTruncate := range []bool{false, true} { c.Run("allow truncate "+strconv.FormatBool(allowDecTruncate), func() { opts.AllowDecimalTruncate = allowDecTruncate overflowNoTrunc, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, strings.NewReader(`[ "12345678901234567890000.0000000000", "99999999999999999999999.0000000000", null]`), array.WithUseNumber()) defer overflowNoTrunc.Release() opts.AllowIntOverflow = true c.checkCastArr(overflowNoTrunc, arrow.PrimitiveTypes.Int64, // 12345678901234567890000 % 2**64, 99999999999999999999999 % 2**64 `[4807115922877858896, 200376420520689663, null]`, *opts) opts.AllowIntOverflow = false checkCastFails(c.T(), overflowNoTrunc, *opts) }) } }) c.Run("overflow and truncate", func() { for _, allowIntOverFlow := range []bool{false, true} { c.Run("allow overflow = "+strconv.FormatBool(allowIntOverFlow), func() { for _, allowDecTruncate := range []bool{false, true} { c.Run("allow truncate = "+strconv.FormatBool(allowDecTruncate), func() { opts.AllowIntOverflow = allowIntOverFlow opts.AllowDecimalTruncate = allowDecTruncate overflowAndTruncate, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, strings.NewReader(`[ "12345678901234567890000.0045345000", "99999999999999999999999.0000344300", null]`), array.WithUseNumber()) defer overflowAndTruncate.Release() if opts.AllowIntOverflow && opts.AllowDecimalTruncate { c.checkCastArr(overflowAndTruncate, arrow.PrimitiveTypes.Int64, // 12345678901234567890000 % 2**64, 99999999999999999999999 % 2**64 `[4807115922877858896, 200376420520689663, null]`, *opts) } else { checkCastFails(c.T(), overflowAndTruncate, *opts) } }) } }) } }) c.Run("negative scale", func() { bldr := array.NewDecimal128Builder(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: -4}) defer bldr.Release() var err error for _, d := range []decimal128.Num{decimal128.FromU64(1234567890000), decimal128.FromI64(-120000)} { d, err = d.Rescale(0, -4) c.Require().NoError(err) bldr.Append(d) } negScale := bldr.NewArray() defer negScale.Release() opts.AllowIntOverflow = true opts.AllowDecimalTruncate = true c.checkCastArr(negScale, arrow.PrimitiveTypes.Int64, `[1234567890000, -120000]`, *opts) }) } func (c *CastSuite) TestDecimal256ToInt() { opts := compute.SafeCastOptions(arrow.PrimitiveTypes.Int64) c.Run("no overflow no truncate", func() { for _, allowIntOverflow := range []bool{false, true} { c.Run(fmt.Sprintf("int_overflow=%t", allowIntOverflow), func() { for _, allowDecTruncate := range []bool{false, true} { c.Run(fmt.Sprintf("dec_truncate=%t", allowDecTruncate), func() { opts.AllowIntOverflow = allowIntOverflow opts.AllowDecimalTruncate = allowDecTruncate noOverflowNoTrunc, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 40, Scale: 10}, strings.NewReader(`["02.0000000000", "-11.0000000000", "22.0000000000", "-121.000000000", null]`)) c.checkCastArr(noOverflowNoTrunc, arrow.PrimitiveTypes.Int64, `[2, -11, 22, -121, null]`, *opts) noOverflowNoTrunc.Release() }) } }) } }) c.Run("truncate no overflow", func() { for _, allowIntOverflow := range []bool{false, true} { c.Run("allow overflow"+strconv.FormatBool(allowIntOverflow), func() { opts.AllowIntOverflow = allowIntOverflow truncNoOverflow, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 40, Scale: 10}, strings.NewReader(`["02.1000000000", "-11.0000004500", "22.0000004500", "-121.1210000000", null]`)) opts.AllowDecimalTruncate = true c.checkCastArr(truncNoOverflow, arrow.PrimitiveTypes.Int64, `[2, -11, 22, -121, null]`, *opts) opts.AllowDecimalTruncate = false checkCastFails(c.T(), truncNoOverflow, *opts) truncNoOverflow.Release() }) } }) c.Run("overflow no truncate", func() { for _, allowDecTruncate := range []bool{false, true} { c.Run("allow truncate "+strconv.FormatBool(allowDecTruncate), func() { opts.AllowDecimalTruncate = allowDecTruncate overflowNoTrunc, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 40, Scale: 10}, strings.NewReader(`[ "1234567890123456789000000.0000000000", "9999999999999999999999999.0000000000", null]`), array.WithUseNumber()) defer overflowNoTrunc.Release() opts.AllowIntOverflow = true c.checkCastArr(overflowNoTrunc, arrow.PrimitiveTypes.Int64, // 1234567890123456789000000 % 2**64, 9999999999999999999999999 % 2**64 `[1096246371337547584, 1590897978359414783, null]`, *opts) opts.AllowIntOverflow = false checkCastFails(c.T(), overflowNoTrunc, *opts) }) } }) c.Run("overflow and truncate", func() { for _, allowIntOverFlow := range []bool{false, true} { c.Run("allow overflow = "+strconv.FormatBool(allowIntOverFlow), func() { for _, allowDecTruncate := range []bool{false, true} { c.Run("allow truncate = "+strconv.FormatBool(allowDecTruncate), func() { opts.AllowIntOverflow = allowIntOverFlow opts.AllowDecimalTruncate = allowDecTruncate overflowAndTruncate, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 40, Scale: 10}, strings.NewReader(`[ "1234567890123456789000000.0045345000", "9999999999999999999999999.0000344300", null]`), array.WithUseNumber()) defer overflowAndTruncate.Release() if opts.AllowIntOverflow && opts.AllowDecimalTruncate { c.checkCastArr(overflowAndTruncate, arrow.PrimitiveTypes.Int64, // 1234567890123456789000000 % 2**64, 9999999999999999999999999 % 2**64 `[1096246371337547584, 1590897978359414783, null]`, *opts) } else { checkCastFails(c.T(), overflowAndTruncate, *opts) } }) } }) } }) c.Run("negative scale", func() { bldr := array.NewDecimal256Builder(c.mem, &arrow.Decimal256Type{Precision: 40, Scale: -4}) defer bldr.Release() var err error for _, d := range []decimal256.Num{decimal256.FromU64(1234567890000), decimal256.FromI64(-120000)} { d, err = d.Rescale(0, -4) c.Require().NoError(err) bldr.Append(d) } negScale := bldr.NewArray() defer negScale.Release() opts.AllowIntOverflow = true opts.AllowDecimalTruncate = true c.checkCastArr(negScale, arrow.PrimitiveTypes.Int64, `[1234567890000, -120000]`, *opts) }) } func (c *CastSuite) TestIntegerToDecimal() { for _, decType := range []arrow.DataType{&arrow.Decimal128Type{Precision: 22, Scale: 2}, &arrow.Decimal256Type{Precision: 22, Scale: 2}} { c.Run(decType.String(), func() { for _, intType := range integerTypes { c.Run(intType.String(), func() { c.checkCast(intType, decType, `[0, 7, null, 100, 99]`, `["0.00", "7.00", null, "100.00", "99.00"]`) }) } }) } c.Run("extreme value", func() { for _, dt := range []arrow.DataType{&arrow.Decimal128Type{Precision: 19, Scale: 0}, &arrow.Decimal256Type{Precision: 19, Scale: 0}} { c.Run(dt.String(), func() { c.checkCast(arrow.PrimitiveTypes.Int64, dt, `[-9223372036854775808, 9223372036854775807]`, `["-9223372036854775808", "9223372036854775807"]`) }) } for _, dt := range []arrow.DataType{&arrow.Decimal128Type{Precision: 20, Scale: 0}, &arrow.Decimal256Type{Precision: 20, Scale: 0}} { c.Run(dt.String(), func() { c.checkCast(arrow.PrimitiveTypes.Uint64, dt, `[0, 18446744073709551615]`, `["0", "18446744073709551615"]`) }) } }) c.Run("insufficient output precision", func() { var opts compute.CastOptions opts.ToType = &arrow.Decimal128Type{Precision: 5, Scale: 3} c.checkCastFails(arrow.PrimitiveTypes.Int8, `[0]`, &opts) opts.ToType = &arrow.Decimal256Type{Precision: 76, Scale: 67} c.checkCastFails(arrow.PrimitiveTypes.Int32, `[0]`, &opts) }) } func (c *CastSuite) TestDecimal128ToDecimal128() { var opts compute.CastOptions for _, allowDecTruncate := range []bool{false, true} { c.Run("decTruncate="+strconv.FormatBool(allowDecTruncate), func() { opts.AllowDecimalTruncate = allowDecTruncate noTruncate, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, strings.NewReader(`["02.0000000000", "30.0000000000", "22.0000000000", "-121.0000000000", null]`)) expected, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 28, Scale: 10}, strings.NewReader(`["02.", "30.", "22.", "-121.", null]`)) defer noTruncate.Release() defer expected.Release() checkCast(c.T(), noTruncate, expected, opts) checkCast(c.T(), expected, noTruncate, opts) }) } c.Run("same scale diff precision", func() { for _, allowDecTruncate := range []bool{false, true} { c.Run("decTruncate="+strconv.FormatBool(allowDecTruncate), func() { opts.AllowDecimalTruncate = allowDecTruncate d52, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 5, Scale: 2}, strings.NewReader(`["12.34", "0.56"]`)) d42, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 4, Scale: 2}, strings.NewReader(`["12.34", "0.56"]`)) defer d52.Release() defer d42.Release() checkCast(c.T(), d52, d42, opts) checkCast(c.T(), d42, d52, opts) }) } }) c.Run("rescale leads to trunc", func() { dP38S10, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, strings.NewReader(`["-02.1234567890", "30.1234567890", null]`)) dP28S0, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 28, Scale: 0}, strings.NewReader(`["-02.", "30.", null]`)) dP38S10RoundTripped, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, strings.NewReader(`["-02.0000000000", "30.0000000000", null]`)) defer func() { dP38S10.Release() dP28S0.Release() dP38S10RoundTripped.Release() }() opts.AllowDecimalTruncate = true checkCast(c.T(), dP38S10, dP28S0, opts) checkCast(c.T(), dP28S0, dP38S10RoundTripped, opts) opts.AllowDecimalTruncate = false opts.ToType = dP28S0.DataType() checkCastFails(c.T(), dP38S10, opts) checkCast(c.T(), dP28S0, dP38S10RoundTripped, opts) }) c.Run("precision loss without rescale = trunc", func() { d42, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 4, Scale: 2}, strings.NewReader(`["12.34"]`)) defer d42.Release() for _, dt := range []arrow.DataType{ &arrow.Decimal128Type{Precision: 3, Scale: 2}, &arrow.Decimal128Type{Precision: 4, Scale: 3}, &arrow.Decimal128Type{Precision: 2, Scale: 1}} { opts.AllowDecimalTruncate = true opts.ToType = dt out, err := compute.CastArray(context.Background(), d42, &opts) out.Release() c.NoError(err) opts.AllowDecimalTruncate = false opts.ToType = dt checkCastFails(c.T(), d42, opts) } }) } func (c *CastSuite) TestDecimal256ToDecimal256() { var opts compute.CastOptions for _, allowDecTruncate := range []bool{false, true} { c.Run("decTruncate="+strconv.FormatBool(allowDecTruncate), func() { opts.AllowDecimalTruncate = allowDecTruncate noTruncate, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 38, Scale: 10}, strings.NewReader(`["02.0000000000", "30.0000000000", "22.0000000000", "-121.0000000000", null]`)) expected, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 28, Scale: 10}, strings.NewReader(`["02.", "30.", "22.", "-121.", null]`)) defer noTruncate.Release() defer expected.Release() checkCast(c.T(), noTruncate, expected, opts) checkCast(c.T(), expected, noTruncate, opts) }) } c.Run("same scale diff precision", func() { for _, allowDecTruncate := range []bool{false, true} { c.Run("decTruncate="+strconv.FormatBool(allowDecTruncate), func() { opts.AllowDecimalTruncate = allowDecTruncate d52, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 5, Scale: 2}, strings.NewReader(`["12.34", "0.56"]`)) d42, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 4, Scale: 2}, strings.NewReader(`["12.34", "0.56"]`)) defer d52.Release() defer d42.Release() checkCast(c.T(), d52, d42, opts) checkCast(c.T(), d42, d52, opts) }) } }) c.Run("rescale leads to trunc", func() { dP38S10, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 38, Scale: 10}, strings.NewReader(`["-02.1234567890", "30.1234567890", null]`)) dP28S0, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 28, Scale: 0}, strings.NewReader(`["-02.", "30.", null]`)) dP38S10RoundTripped, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 38, Scale: 10}, strings.NewReader(`["-02.0000000000", "30.0000000000", null]`)) defer func() { dP38S10.Release() dP28S0.Release() dP38S10RoundTripped.Release() }() opts.AllowDecimalTruncate = true checkCast(c.T(), dP38S10, dP28S0, opts) checkCast(c.T(), dP28S0, dP38S10RoundTripped, opts) opts.AllowDecimalTruncate = false opts.ToType = dP28S0.DataType() checkCastFails(c.T(), dP38S10, opts) checkCast(c.T(), dP28S0, dP38S10RoundTripped, opts) }) c.Run("precision loss without rescale = trunc", func() { d42, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 4, Scale: 2}, strings.NewReader(`["12.34"]`)) defer d42.Release() for _, dt := range []arrow.DataType{ &arrow.Decimal256Type{Precision: 3, Scale: 2}, &arrow.Decimal256Type{Precision: 4, Scale: 3}, &arrow.Decimal256Type{Precision: 2, Scale: 1}} { opts.AllowDecimalTruncate = true opts.ToType = dt out, err := compute.CastArray(context.Background(), d42, &opts) out.Release() c.NoError(err) opts.AllowDecimalTruncate = false opts.ToType = dt checkCastFails(c.T(), d42, opts) } }) } func (c *CastSuite) TestDecimal128ToDecimal256() { var opts compute.CastOptions for _, allowDecTruncate := range []bool{false, true} { c.Run("decTruncate="+strconv.FormatBool(allowDecTruncate), func() { opts.AllowDecimalTruncate = allowDecTruncate noTruncate, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, strings.NewReader(`["02.0000000000", "30.0000000000", "22.0000000000", "-121.0000000000", null]`)) expected, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 28, Scale: 10}, strings.NewReader(`["02.", "30.", "22.", "-121.", null]`)) defer noTruncate.Release() defer expected.Release() checkCast(c.T(), noTruncate, expected, opts) }) } c.Run("same scale diff precision", func() { for _, allowDecTruncate := range []bool{false, true} { c.Run("decTruncate="+strconv.FormatBool(allowDecTruncate), func() { opts.AllowDecimalTruncate = allowDecTruncate d52, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 5, Scale: 2}, strings.NewReader(`["12.34", "0.56"]`)) d42, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 4, Scale: 2}, strings.NewReader(`["12.34", "0.56"]`)) d402, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 40, Scale: 2}, strings.NewReader(`["12.34", "0.56"]`)) defer d52.Release() defer d42.Release() defer d402.Release() checkCast(c.T(), d52, d42, opts) checkCast(c.T(), d52, d402, opts) }) } }) c.Run("rescale leads to trunc", func() { d128P38S10, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, strings.NewReader(`["-02.1234567890", "30.1234567890", null]`)) d128P28S0, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 28, Scale: 0}, strings.NewReader(`["-02.", "30.", null]`)) d256P28S0, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 28, Scale: 0}, strings.NewReader(`["-02.", "30.", null]`)) d256P38S10RoundTripped, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 38, Scale: 10}, strings.NewReader(`["-02.0000000000", "30.0000000000", null]`)) defer func() { d128P38S10.Release() d128P28S0.Release() d256P28S0.Release() d256P38S10RoundTripped.Release() }() opts.AllowDecimalTruncate = true checkCast(c.T(), d128P38S10, d256P28S0, opts) checkCast(c.T(), d128P28S0, d256P38S10RoundTripped, opts) opts.AllowDecimalTruncate = false opts.ToType = d256P28S0.DataType() checkCastFails(c.T(), d128P38S10, opts) checkCast(c.T(), d128P28S0, d256P38S10RoundTripped, opts) }) c.Run("precision loss without rescale = trunc", func() { d128P4S2, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 4, Scale: 2}, strings.NewReader(`["12.34"]`)) defer d128P4S2.Release() for _, dt := range []arrow.DataType{ &arrow.Decimal256Type{Precision: 3, Scale: 2}, &arrow.Decimal256Type{Precision: 4, Scale: 3}, &arrow.Decimal256Type{Precision: 2, Scale: 1}} { opts.AllowDecimalTruncate = true opts.ToType = dt out, err := compute.CastArray(context.Background(), d128P4S2, &opts) out.Release() c.NoError(err) opts.AllowDecimalTruncate = false opts.ToType = dt checkCastFails(c.T(), d128P4S2, opts) } }) } func (c *CastSuite) TestDecimal256ToDecimal128() { var opts compute.CastOptions for _, allowDecTruncate := range []bool{false, true} { c.Run("decTruncate="+strconv.FormatBool(allowDecTruncate), func() { opts.AllowDecimalTruncate = allowDecTruncate noTruncate, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 42, Scale: 10}, strings.NewReader(`["02.0000000000", "30.0000000000", "22.0000000000", "-121.0000000000", null]`)) expected, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 28, Scale: 0}, strings.NewReader(`["02.", "30.", "22.", "-121.", null]`)) defer noTruncate.Release() defer expected.Release() checkCast(c.T(), noTruncate, expected, opts) checkCast(c.T(), expected, noTruncate, opts) }) } c.Run("same scale diff precision", func() { for _, allowDecTruncate := range []bool{false, true} { c.Run("decTruncate="+strconv.FormatBool(allowDecTruncate), func() { opts.AllowDecimalTruncate = allowDecTruncate dP42S2, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 42, Scale: 2}, strings.NewReader(`["12.34", "0.56"]`)) d42, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 4, Scale: 2}, strings.NewReader(`["12.34", "0.56"]`)) defer dP42S2.Release() defer d42.Release() checkCast(c.T(), dP42S2, d42, opts) checkCast(c.T(), d42, dP42S2, opts) }) } }) c.Run("rescale leads to trunc", func() { d256P52S10, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 52, Scale: 10}, strings.NewReader(`["-02.1234567890", "30.1234567890", null]`)) d256P42S0, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 42, Scale: 0}, strings.NewReader(`["-02.", "30.", null]`)) d128P28S0, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 28, Scale: 0}, strings.NewReader(`["-02.", "30.", null]`)) d128P38S10RoundTripped, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 38, Scale: 10}, strings.NewReader(`["-02.0000000000", "30.0000000000", null]`)) defer func() { d256P52S10.Release() d256P42S0.Release() d128P28S0.Release() d128P38S10RoundTripped.Release() }() opts.AllowDecimalTruncate = true checkCast(c.T(), d256P52S10, d128P28S0, opts) checkCast(c.T(), d256P42S0, d128P38S10RoundTripped, opts) opts.AllowDecimalTruncate = false opts.ToType = d128P28S0.DataType() checkCastFails(c.T(), d256P52S10, opts) checkCast(c.T(), d256P42S0, d128P38S10RoundTripped, opts) }) c.Run("precision loss without rescale = trunc", func() { d42, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 4, Scale: 2}, strings.NewReader(`["12.34"]`)) defer d42.Release() for _, dt := range []arrow.DataType{ &arrow.Decimal128Type{Precision: 3, Scale: 2}, &arrow.Decimal128Type{Precision: 4, Scale: 3}, &arrow.Decimal128Type{Precision: 2, Scale: 1}} { opts.AllowDecimalTruncate = true opts.ToType = dt out, err := compute.CastArray(context.Background(), d42, &opts) out.Release() c.NoError(err) opts.AllowDecimalTruncate = false opts.ToType = dt checkCastFails(c.T(), d42, opts) } }) } func (c *CastSuite) TestFloatingToDecimal() { for _, fltType := range []arrow.DataType{arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Float64} { c.Run("from "+fltType.String(), func() { for _, decType := range []arrow.DataType{&arrow.Decimal128Type{Precision: 5, Scale: 2}, &arrow.Decimal256Type{Precision: 5, Scale: 2}} { c.Run("to "+decType.String(), func() { c.checkCast(fltType, decType, `[0.0, null, 123.45, 123.456, 999.994]`, `["0.00", null, "123.45", "123.46", "999.99"]`) c.Run("overflow", func() { opts := compute.CastOptions{ToType: decType} c.checkCastFails(fltType, `[999.996]`, &opts) opts.AllowDecimalTruncate = true c.checkCastOpts(fltType, decType, `[0.0, null, 999.996, 123.45, 999.994]`, `["0.00", null, "0.00", "123.45", "999.99"]`, opts) }) }) } }) } dec128 := func(prec, scale int32) arrow.DataType { return &arrow.Decimal128Type{Precision: prec, Scale: scale} } dec256 := func(prec, scale int32) arrow.DataType { return &arrow.Decimal256Type{Precision: prec, Scale: scale} } type decFunc func(int32, int32) arrow.DataType for _, decType := range []decFunc{dec128, dec256} { // 2**64 + 2**41 (exactly representable as a float) c.checkCast(arrow.PrimitiveTypes.Float32, decType(20, 0), `[1.8446746e+19, -1.8446746e+19]`, `[18446746272732807168, -18446746272732807168]`) c.checkCast(arrow.PrimitiveTypes.Float64, decType(20, 0), `[1.8446744073709556e+19, -1.8446744073709556e+19]`, `[18446744073709555712, -18446744073709555712]`) c.checkCast(arrow.PrimitiveTypes.Float32, decType(20, 4), `[1.8446746e+15, -1.8446746e+15]`, `[1844674627273280.7168, -1844674627273280.7168]`) c.checkCast(arrow.PrimitiveTypes.Float64, decType(20, 4), `[1.8446744073709556e+15, -1.8446744073709556e+15]`, `[1844674407370955.5712, -1844674407370955.5712]`) } } func (c *CastSuite) TestDecimalToFloating() { for _, flt := range []arrow.DataType{arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Float64} { c.Run(flt.String(), func() { for _, dec := range []arrow.DataType{&arrow.Decimal128Type{Precision: 5, Scale: 2}, &arrow.Decimal256Type{Precision: 5, Scale: 2}} { c.Run(dec.String(), func() { c.checkCast(dec, flt, `["0.00", null, "123.45", "999.99"]`, `[0.0, null, 123.45, 999.99]`) }) } }) } } func (c *CastSuite) TestDateToString() { for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { c.checkCast(arrow.FixedWidthTypes.Date32, stype, `[0, null]`, `["1970-01-01", null]`) c.checkCast(arrow.FixedWidthTypes.Date64, stype, `[86400000, null]`, `["1970-01-02", null]`) } } func (c *CastSuite) TestTimeToString() { for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { c.checkCast(arrow.FixedWidthTypes.Time32s, stype, `[1, 62]`, `["00:00:01", "00:01:02"]`) c.checkCast(arrow.FixedWidthTypes.Time64ns, stype, `[0, 1]`, `["00:00:00.000000000", "00:00:00.000000001"]`) } } func (c *CastSuite) TestTimestampToString() { for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { c.checkCast(&arrow.TimestampType{Unit: arrow.Second}, stype, `[-30610224000, -5364662400]`, `["1000-01-01 00:00:00", "1800-01-01 00:00:00"]`) c.checkCast(&arrow.TimestampType{Unit: arrow.Millisecond}, stype, `[-30610224000000, -5364662400000]`, `["1000-01-01 00:00:00.000", "1800-01-01 00:00:00.000"]`) c.checkCast(&arrow.TimestampType{Unit: arrow.Microsecond}, stype, `[-30610224000000000, -5364662400000000]`, `["1000-01-01 00:00:00.000000", "1800-01-01 00:00:00.000000"]`) c.checkCast(&arrow.TimestampType{Unit: arrow.Nanosecond}, stype, `[-596933876543210988, 349837323456789012]`, `["1951-02-01 01:02:03.456789012", "1981-02-01 01:02:03.456789012"]`) } } func (c *CastSuite) TestTimestampWithZoneToString() { for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { c.checkCast(arrow.FixedWidthTypes.Timestamp_s, stype, `[-30610224000, -5364662400]`, `["1000-01-01 00:00:00Z", "1800-01-01 00:00:00Z"]`) c.checkCast(&arrow.TimestampType{Unit: arrow.Second, TimeZone: "America/Phoenix"}, stype, `[-34226955, 1456767743]`, `["1968-11-30 13:30:45-0700", "2016-02-29 10:42:23-0700"]`) c.checkCast(&arrow.TimestampType{Unit: arrow.Millisecond, TimeZone: "America/Phoenix"}, stype, `[-34226955877, 1456767743456]`, `["1968-11-30 13:30:44.123-0700", "2016-02-29 10:42:23.456-0700"]`) c.checkCast(&arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: "America/Phoenix"}, stype, `[-34226955877000, 1456767743456789]`, `["1968-11-30 13:30:44.123000-0700", "2016-02-29 10:42:23.456789-0700"]`) c.checkCast(&arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "America/Phoenix"}, stype, `[-34226955876543211, 1456767743456789246]`, `["1968-11-30 13:30:44.123456789-0700", "2016-02-29 10:42:23.456789246-0700"]`) } } func (c *CastSuite) assertBinaryZeroCopy(lhs, rhs arrow.Array) { // null bitmap and data buffers are always zero-copied assertBufferSame(c.T(), lhs, rhs, 0) assertBufferSame(c.T(), lhs, rhs, 2) lOffsetByteWidth := lhs.DataType().Layout().Buffers[1].ByteWidth rOffsetByteWidth := rhs.DataType().Layout().Buffers[1].ByteWidth if lOffsetByteWidth == rOffsetByteWidth { assertBufferSame(c.T(), lhs, rhs, 1) return } offsets := make([]arrow.Array, 0, 2) for _, arr := range []arrow.Array{lhs, rhs} { length := arr.Len() buffer := arr.Data().Buffers()[1] byteWidth := arr.DataType().Layout().Buffers[1].ByteWidth switch byteWidth { case 4: data := array.NewData(arrow.PrimitiveTypes.Int32, length, []*memory.Buffer{nil, buffer}, nil, 0, 0) defer data.Release() i32 := array.NewInt32Data(data) i64, err := compute.CastArray(context.Background(), i32, compute.SafeCastOptions(arrow.PrimitiveTypes.Int64)) c.Require().NoError(err) i32.Release() defer i64.Release() offsets = append(offsets, i64) default: data := array.NewData(arrow.PrimitiveTypes.Int64, length, []*memory.Buffer{nil, buffer}, nil, 0, 0) defer data.Release() i64 := array.NewInt64Data(data) defer i64.Release() offsets = append(offsets, i64) } } c.Truef(array.Equal(offsets[0], offsets[1]), "lhs: %s\nrhs: %s", offsets[0], offsets[1]) } func (c *CastSuite) TestBinaryToString() { for _, btype := range []arrow.DataType{arrow.BinaryTypes.Binary, arrow.BinaryTypes.LargeBinary} { c.Run(btype.String(), func() { for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { c.Run(stype.String(), func() { // empty -> empty always works c.checkCast(btype, stype, `[]`, `[]`) invalidUtf8 := c.invalidUtf8Arr(btype) defer invalidUtf8.Release() invalidutf8Str := c.invalidUtf8Arr(stype) defer invalidutf8Str.Release() // invalid utf8 masked by a null bit is not an error masked := c.maskArrayWithNullsAt(invalidUtf8, []int{4}) expMasked := c.maskArrayWithNullsAt(invalidutf8Str, []int{4}) defer masked.Release() defer expMasked.Release() checkCast(c.T(), masked, expMasked, *compute.SafeCastOptions(stype)) opts := compute.SafeCastOptions(stype) checkCastFails(c.T(), invalidUtf8, *opts) // override utf8 check opts.AllowInvalidUtf8 = true strs, err := compute.CastArray(context.Background(), invalidUtf8, opts) c.NoError(err) defer strs.Release() c.assertBinaryZeroCopy(invalidUtf8, strs) }) } }) } c.Run("fixed size binary", func() { fromType := &arrow.FixedSizeBinaryType{ByteWidth: 3} invalidUtf8Arr := c.fixedSizeInvalidUtf8(fromType) defer invalidUtf8Arr.Release() for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { c.Run(stype.String(), func() { c.checkCast(fromType, stype, `[]`, `[]`) // invalid utf-8 masked by a null bit is not an error strInvalidUtf8 := c.fixedSizeInvalidUtf8(stype) defer strInvalidUtf8.Release() masked := c.maskArrayWithNullsAt(invalidUtf8Arr, []int{4}) expMasked := c.maskArrayWithNullsAt(strInvalidUtf8, []int{4}) defer masked.Release() defer expMasked.Release() checkCast(c.T(), masked, expMasked, *compute.SafeCastOptions(stype)) opts := compute.SafeCastOptions(stype) checkCastFails(c.T(), invalidUtf8Arr, *opts) // override utf8 check opts.AllowInvalidUtf8 = true strs, err := compute.CastArray(context.Background(), invalidUtf8Arr, opts) c.NoError(err) defer strs.Release() // null buffer is not always the same if input is sliced assertBufferSame(c.T(), invalidUtf8Arr, strs, 0) c.Same(invalidUtf8Arr.Data().Buffers()[1], strs.Data().Buffers()[2]) }) } }) } func (c *CastSuite) TestBinaryOrStringToBinary() { for _, fromType := range baseBinaryTypes { c.Run(fromType.String(), func() { for _, toType := range []arrow.DataType{arrow.BinaryTypes.Binary, arrow.BinaryTypes.LargeBinary} { c.Run(toType.String(), func() { // empty -> empty always works c.checkCast(fromType, toType, `[]`, `[]`) invalidUtf8 := c.invalidUtf8Arr(fromType) defer invalidUtf8.Release() // invalid utf-8 is not an error for binary out, err := compute.CastToType(context.Background(), invalidUtf8, toType) c.NoError(err) defer out.Release() c.assertBinaryZeroCopy(invalidUtf8, out) // invalid utf-8 masked by a null is also not an erro invalidutf8Bin := c.invalidUtf8Arr(toType) defer invalidutf8Bin.Release() // invalid utf8 masked by a null bit is not an error masked := c.maskArrayWithNullsAt(invalidUtf8, []int{4}) expMasked := c.maskArrayWithNullsAt(invalidutf8Bin, []int{4}) defer masked.Release() defer expMasked.Release() checkCast(c.T(), masked, expMasked, *compute.SafeCastOptions(toType)) }) } }) } c.Run("fixed size binary", func() { fromType := &arrow.FixedSizeBinaryType{ByteWidth: 3} invalidUtf8Arr := c.fixedSizeInvalidUtf8(fromType) defer invalidUtf8Arr.Release() checkCast(c.T(), invalidUtf8Arr, invalidUtf8Arr, *compute.DefaultCastOptions(true)) checkCastFails(c.T(), invalidUtf8Arr, *compute.SafeCastOptions(&arrow.FixedSizeBinaryType{ByteWidth: 5})) for _, toType := range []arrow.DataType{arrow.BinaryTypes.Binary, arrow.BinaryTypes.LargeBinary} { c.Run(toType.String(), func() { c.checkCast(fromType, toType, `[]`, `[]`) out, err := compute.CastToType(context.Background(), invalidUtf8Arr, toType) c.NoError(err) defer out.Release() assertBufferSame(c.T(), invalidUtf8Arr, out, 0) c.Same(invalidUtf8Arr.Data().Buffers()[1], out.Data().Buffers()[2]) }) } }) } func (c *CastSuite) TestStringToString() { for _, fromType := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { c.Run("from "+fromType.String(), func() { for _, toType := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { c.Run("to "+toType.String(), func() { c.checkCast(fromType, toType, `[]`, `[]`) invalidUtf8 := c.invalidUtf8Arr(fromType) defer invalidUtf8.Release() invalidutf8Str := c.invalidUtf8Arr(toType) defer invalidutf8Str.Release() // invalid utf8 masked by a null bit is not an error masked := c.maskArrayWithNullsAt(invalidUtf8, []int{4}) expMasked := c.maskArrayWithNullsAt(invalidutf8Str, []int{4}) defer masked.Release() defer expMasked.Release() checkCast(c.T(), masked, expMasked, *compute.SafeCastOptions(toType)) opts := compute.SafeCastOptions(toType) // override utf8 check opts.AllowInvalidUtf8 = true // utf-8 is not checked by cast when the origin (utf-8) guarantees utf-8 strs, err := compute.CastArray(context.Background(), invalidUtf8, opts) c.NoError(err) defer strs.Release() c.assertBinaryZeroCopy(invalidUtf8, strs) }) } }) } } func (c *CastSuite) TestStringToInt() { for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { for _, dt := range signedIntTypes { c.checkCast(stype, dt, `["0", null, "127", "-1", "0", "0x0", "0x7F"]`, `[0, null, 127, -1, 0, 0, 127]`) } c.checkCast(stype, arrow.PrimitiveTypes.Int32, `["2147483647", null, "-2147483648", "0", "0X0", "0x7FFFFFFF", "-0X1", "-0x10000000"]`, `[2147483647, null, -2147483648, 0, 0, 2147483647, -1, -268435456]`) c.checkCast(stype, arrow.PrimitiveTypes.Int64, `["9223372036854775807", null, "-9223372036854775808", "0", "0x0", "0x7FFFFFFFFFFFFFFf", "-0x0FFFFFFFFFFFFFFF"]`, `[9223372036854775807, null, -9223372036854775808, 0, 0, 9223372036854775807, -1152921504606846975]`) for _, dt := range unsignedIntTypes { c.checkCast(stype, dt, `["0", null, "127", "255", "0", "0x0", "0xff", "0X7f"]`, `[0, null, 127, 255, 0, 0, 255, 127]`) } c.checkCast(stype, arrow.PrimitiveTypes.Uint32, `["2147483647", null, "4294967295", "0", "0x0", "0x7FFFFFFf", "0xFFFFFFFF"]`, `[2147483647, null, 4294967295, 0, 0, 2147483647, 4294967295]`) c.checkCast(stype, arrow.PrimitiveTypes.Uint64, `["9223372036854775807", null, "18446744073709551615", "0", "0x0", "0x7FFFFFFFFFFFFFFf", "0xfFFFFFFFFFFFFFFf"]`, `[9223372036854775807, null, 18446744073709551615, 0, 0, 9223372036854775807, 18446744073709551615]`) for _, notInt8 := range []string{"z", "12 z", "128", "-129", "0.5", "0x", "0xfff", "-0xf0"} { c.checkCastFails(stype, `["`+notInt8+`"]`, compute.SafeCastOptions(arrow.PrimitiveTypes.Int8)) } for _, notUint8 := range []string{"256", "-1", "0.5", "0x", "0x3wa", "0x123"} { c.checkCastFails(stype, `["`+notUint8+`"]`, compute.SafeCastOptions(arrow.PrimitiveTypes.Uint8)) } } } func (c *CastSuite) TestStringToFloating() { for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { for _, dt := range []arrow.DataType{arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Float64} { c.checkCast(stype, dt, `["0.1", null, "127.3", "1e3", "200.4", "0.5"]`, `[0.1, null, 127.3, 1000, 200.4, 0.5]`) for _, notFloat := range []string{"z"} { c.checkCastFails(stype, `["`+notFloat+`"]`, compute.SafeCastOptions(dt)) } } } } func (c *CastSuite) TestUnsupportedInputType() { // casting to a supported target type, but with an unsupported // input for that target type. arr, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[1, 2, 3]`)) defer arr.Release() toType := arrow.ListOf(arrow.BinaryTypes.String) _, err := compute.CastToType(context.Background(), arr, toType) c.ErrorIs(err, arrow.ErrNotImplemented) c.ErrorContains(err, "function 'cast_list' has no kernel matching input types (int32)") // test calling through the generic kernel API datum := compute.NewDatum(arr) defer datum.Release() _, err = compute.CallFunction(context.Background(), "cast", compute.SafeCastOptions(toType), datum) c.ErrorIs(err, arrow.ErrNotImplemented) c.ErrorContains(err, "function 'cast_list' has no kernel matching input types (int32)") } func (c *CastSuite) TestUnsupportedTargetType() { arr, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[1, 2, 3]`)) defer arr.Release() toType := arrow.DenseUnionOf([]arrow.Field{{Name: "a", Type: arrow.PrimitiveTypes.Int32}}, []arrow.UnionTypeCode{0}) _, err := compute.CastToType(context.Background(), arr, toType) c.ErrorIs(err, arrow.ErrNotImplemented) c.ErrorContains(err, "unsupported cast to dense_union from int32") // test calling through the generic kernel API datum := compute.NewDatum(arr) defer datum.Release() _, err = compute.CallFunction(context.Background(), "cast", compute.SafeCastOptions(toType), datum) c.ErrorIs(err, arrow.ErrNotImplemented) c.ErrorContains(err, "unsupported cast to dense_union from int32") } func (c *CastSuite) checkCastSelfZeroCopy(dt arrow.DataType, json string) { arr, _, _ := array.FromJSON(c.mem, dt, strings.NewReader(json)) defer arr.Release() checkCastZeroCopy(c.T(), arr, dt, compute.NewCastOptions(dt, true)) } func (c *CastSuite) checkCastZeroCopy(from arrow.DataType, json string, to arrow.DataType) { arr, _, _ := array.FromJSON(c.mem, from, strings.NewReader(json)) defer arr.Release() checkCastZeroCopy(c.T(), arr, to, compute.NewCastOptions(to, true)) } func (c *CastSuite) TestTimestampToTimestampSimpleTimezone() { c.checkCast(&arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: "Etc/UTC"}, arrow.FixedWidthTypes.Timestamp_us, `["2023-01-01T19:25:00.123456+00:00", null, "2023-01-01T19:25:00.123456+00:00"]`, `["2023-01-01T19:25:00.123456+00:00", null, "2023-01-01T19:25:00.123456+00:00"]`) } func (c *CastSuite) TestTimestampToTimestamp() { tests := []struct { coarse, fine arrow.DataType }{ {arrow.FixedWidthTypes.Timestamp_s, arrow.FixedWidthTypes.Timestamp_ms}, {arrow.FixedWidthTypes.Timestamp_ms, arrow.FixedWidthTypes.Timestamp_us}, {arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Timestamp_ns}, } var opts compute.CastOptions for _, tt := range tests { c.Run("coarse "+tt.coarse.String()+" fine "+tt.fine.String(), func() { c.checkCast(tt.coarse, tt.fine, `[0, null, 200, 1, 2]`, `[0, null, 200000, 1000, 2000]`) opts.AllowTimeTruncate = false opts.ToType = tt.coarse c.checkCastFails(tt.fine, `[0, null, 200456, 1123, 2456]`, &opts) // with truncation allowed, divide/truncate opts.AllowTimeTruncate = true c.checkCastOpts(tt.fine, tt.coarse, `[0, null, 200456, 1123, 2456]`, `[0, null, 200, 1, 2]`, opts) }) } tests = []struct { coarse, fine arrow.DataType }{ {arrow.FixedWidthTypes.Timestamp_s, arrow.FixedWidthTypes.Timestamp_ns}, } for _, tt := range tests { c.Run("coarse "+tt.coarse.String()+" fine "+tt.fine.String(), func() { c.checkCast(tt.coarse, tt.fine, `[0, null, 200, 1, 2]`, `[0, null, 200000000000, 1000000000, 2000000000]`) opts.AllowTimeTruncate = false opts.ToType = tt.coarse c.checkCastFails(tt.fine, `[0, null, 200456000000, 1123000000, 2456000000]`, &opts) // with truncation allowed, divide/truncate opts.AllowTimeTruncate = true c.checkCastOpts(tt.fine, tt.coarse, `[0, null, 200456000000, 1123000000, 2456000000]`, `[0, null, 200, 1, 2]`, opts) }) } } func (c *CastSuite) TestTimestampZeroCopy() { for _, dt := range []arrow.DataType{arrow.FixedWidthTypes.Timestamp_s /*, arrow.PrimitiveTypes.Int64*/} { c.checkCastZeroCopy(arrow.FixedWidthTypes.Timestamp_s, `[0, null, 2000, 1000, 0]`, dt) } c.checkCastZeroCopy(arrow.PrimitiveTypes.Int64, `[0, null, 2000, 1000, 0]`, arrow.FixedWidthTypes.Timestamp_s) } func (c *CastSuite) TestTimestampToTimestampMultiplyOverflow() { opts := compute.CastOptions{ToType: arrow.FixedWidthTypes.Timestamp_ns} // 1000-01-01, 1800-01-01, 2000-01-01, 2300-01-01, 3000-01-01 c.checkCastFails(arrow.FixedWidthTypes.Timestamp_s, `[-30610224000, -5364662400, 946684800, 10413792000, 32503680000]`, &opts) } var ( timestampJSON = `["1970-01-01T00:00:59.123456789","2000-02-29T23:23:23.999999999", "1899-01-01T00:59:20.001001001","2033-05-18T03:33:20.000000000", "2020-01-01T01:05:05.001", "2019-12-31T02:10:10.002", "2019-12-30T03:15:15.003", "2009-12-31T04:20:20.004132", "2010-01-01T05:25:25.005321", "2010-01-03T06:30:30.006163", "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45", "2008-12-28", "2008-12-29", "2012-01-01 01:02:03", null]` timestampSecondsJSON = `["1970-01-01T00:00:59","2000-02-29T23:23:23", "1899-01-01T00:59:20","2033-05-18T03:33:20", "2020-01-01T01:05:05", "2019-12-31T02:10:10", "2019-12-30T03:15:15", "2009-12-31T04:20:20", "2010-01-01T05:25:25", "2010-01-03T06:30:30", "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45", "2008-12-28", "2008-12-29", "2012-01-01 01:02:03", null]` timestampExtremeJSON = `["1677-09-20T00:00:59.123456", "2262-04-13T23:23:23.999999"]` ) func (c *CastSuite) TestTimestampToDate() { stamps, _, _ := array.FromJSON(c.mem, arrow.FixedWidthTypes.Timestamp_ns, strings.NewReader(timestampJSON)) defer stamps.Release() date32, _, _ := array.FromJSON(c.mem, arrow.FixedWidthTypes.Date32, strings.NewReader(`[ 0, 11016, -25932, 23148, 18262, 18261, 18260, 14609, 14610, 14612, 14613, 13149, 13148, 14241, 14242, 15340, null ]`)) defer date32.Release() date64, _, _ := array.FromJSON(c.mem, arrow.FixedWidthTypes.Date64, strings.NewReader(`[ 0, 951782400000, -2240524800000, 1999987200000, 1577836800000, 1577750400000, 1577664000000, 1262217600000, 1262304000000, 1262476800000, 1262563200000, 1136073600000, 1135987200000, 1230422400000, 1230508800000, 1325376000000, null]`), array.WithUseNumber()) defer date64.Release() checkCast(c.T(), stamps, date32, *compute.DefaultCastOptions(true)) checkCast(c.T(), stamps, date64, *compute.DefaultCastOptions(true)) c.checkCast(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Date32, timestampExtremeJSON, `[-106753, 106753]`) c.checkCast(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Date64, timestampExtremeJSON, `[-9223459200000, 9223459200000]`) for _, u := range []arrow.TimeUnit{arrow.Second, arrow.Microsecond, arrow.Millisecond, arrow.Nanosecond} { dt := &arrow.TimestampType{Unit: u} c.checkCastExp(dt, timestampSecondsJSON, date32) c.checkCastExp(dt, timestampSecondsJSON, date64) } } func (c *CastSuite) TestZonedTimestampToDate() { c.Run("Pacific/Marquesas", func() { dt := &arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "Pacific/Marquesas"} c.checkCast(dt, arrow.FixedWidthTypes.Date32, timestampJSON, `[-1, 11016, -25933, 23147, 18261, 18260, 18259, 14608, 14609, 14611, 14612, 13148, 13148, 14240, 14241, 15339, null]`) c.checkCast(dt, arrow.FixedWidthTypes.Date64, timestampJSON, `[-86400000, 951782400000, -2240611200000, 1999900800000, 1577750400000, 1577664000000, 1577577600000, 1262131200000, 1262217600000, 1262390400000, 1262476800000, 1135987200000, 1135987200000, 1230336000000, 1230422400000, 1325289600000, null]`) }) for _, u := range []arrow.TimeUnit{arrow.Second, arrow.Millisecond, arrow.Microsecond, arrow.Nanosecond} { dt := &arrow.TimestampType{Unit: u, TimeZone: "Australia/Broken_Hill"} c.checkCast(dt, arrow.FixedWidthTypes.Date32, timestampSecondsJSON, `[ 0, 11017, -25932, 23148, 18262, 18261, 18260, 14609, 14610, 14612, 14613, 13149, 13148, 14241, 14242, 15340, null]`) c.checkCast(dt, arrow.FixedWidthTypes.Date64, timestampSecondsJSON, `[ 0, 951868800000, -2240524800000, 1999987200000, 1577836800000, 1577750400000, 1577664000000, 1262217600000, 1262304000000, 1262476800000, 1262563200000, 1136073600000, 1135987200000, 1230422400000, 1230508800000, 1325376000000, null]`) } // invalid timezones for _, u := range []arrow.TimeUnit{arrow.Second, arrow.Millisecond, arrow.Microsecond, arrow.Nanosecond} { dt := &arrow.TimestampType{Unit: u, TimeZone: "Mars/Mariner_Valley"} c.checkCastFails(dt, timestampSecondsJSON, compute.NewCastOptions(arrow.FixedWidthTypes.Date32, false)) c.checkCastFails(dt, timestampSecondsJSON, compute.NewCastOptions(arrow.FixedWidthTypes.Date64, false)) } } func (c *CastSuite) TestTimestampToTime() { c.checkCast(arrow.FixedWidthTypes.Timestamp_ns, arrow.FixedWidthTypes.Time64ns, timestampJSON, `[ 59123456789, 84203999999999, 3560001001001, 12800000000000, 3905001000000, 7810002000000, 11715003000000, 15620004132000, 19525005321000, 23430006163000, 27335000000000, 31240000000000, 35145000000000, 0, 0, 3723000000000, null]`) c.checkCastFails(arrow.FixedWidthTypes.Timestamp_ns, timestampJSON, compute.NewCastOptions(arrow.FixedWidthTypes.Time64us, true)) c.checkCast(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Time64us, timestampExtremeJSON, `[59123456, 84203999999]`) timesSec := `[59, 84203, 3560, 12800, 3905, 7810, 11715, 15620, 19525, 23430, 27335, 31240, 35145, 0, 0, 3723, null]` timesMs := `[59000, 84203000, 3560000, 12800000, 3905000, 7810000, 11715000, 15620000, 19525000, 23430000, 27335000, 31240000, 35145000, 0, 0, 3723000, null]` timesUs := `[59000000, 84203000000, 3560000000, 12800000000, 3905000000, 7810000000, 11715000000, 15620000000, 19525000000, 23430000000, 27335000000, 31240000000, 35145000000, 0, 0, 3723000000, null]` timesNs := `[59000000000, 84203000000000, 3560000000000, 12800000000000, 3905000000000, 7810000000000, 11715000000000, 15620000000000, 19525000000000, 23430000000000, 27335000000000, 31240000000000, 35145000000000, 0, 0, 3723000000000, null]` c.checkCast(arrow.FixedWidthTypes.Timestamp_s, arrow.FixedWidthTypes.Time32s, timestampSecondsJSON, timesSec) c.checkCast(arrow.FixedWidthTypes.Timestamp_s, arrow.FixedWidthTypes.Time32ms, timestampSecondsJSON, timesMs) c.checkCast(arrow.FixedWidthTypes.Timestamp_ms, arrow.FixedWidthTypes.Time32s, timestampSecondsJSON, timesSec) c.checkCast(arrow.FixedWidthTypes.Timestamp_ms, arrow.FixedWidthTypes.Time32ms, timestampSecondsJSON, timesMs) c.checkCast(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Time64us, timestampSecondsJSON, timesUs) c.checkCast(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Time64ns, timestampSecondsJSON, timesNs) c.checkCast(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Time32ms, timestampSecondsJSON, timesMs) c.checkCast(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Time32s, timestampSecondsJSON, timesSec) c.checkCast(arrow.FixedWidthTypes.Timestamp_ns, arrow.FixedWidthTypes.Time64us, timestampSecondsJSON, timesUs) c.checkCast(arrow.FixedWidthTypes.Timestamp_ns, arrow.FixedWidthTypes.Time64ns, timestampSecondsJSON, timesNs) c.checkCast(arrow.FixedWidthTypes.Timestamp_ns, arrow.FixedWidthTypes.Time32ms, timestampSecondsJSON, timesMs) c.checkCast(arrow.FixedWidthTypes.Timestamp_ns, arrow.FixedWidthTypes.Time32s, timestampSecondsJSON, timesSec) trunc := compute.CastOptions{AllowTimeTruncate: true} timestampsUS := `["1970-01-01T00:00:59.123456","2000-02-29T23:23:23.999999", "1899-01-01T00:59:20.001001","2033-05-18T03:33:20.000000", "2020-01-01T01:05:05.001", "2019-12-31T02:10:10.002", "2019-12-30T03:15:15.003", "2009-12-31T04:20:20.004132", "2010-01-01T05:25:25.005321", "2010-01-03T06:30:30.006163", "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45", "2008-12-28", "2008-12-29", "2012-01-01 01:02:03", null]` timestampsMS := `["1970-01-01T00:00:59.123","2000-02-29T23:23:23.999", "1899-01-01T00:59:20.001","2033-05-18T03:33:20.000", "2020-01-01T01:05:05.001", "2019-12-31T02:10:10.002", "2019-12-30T03:15:15.003", "2009-12-31T04:20:20.004", "2010-01-01T05:25:25.005", "2010-01-03T06:30:30.006", "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45", "2008-12-28", "2008-12-29", "2012-01-01 01:02:03", null]` c.checkCastFails(arrow.FixedWidthTypes.Timestamp_ns, timestampJSON, compute.NewCastOptions(arrow.FixedWidthTypes.Time64us, true)) c.checkCastFails(arrow.FixedWidthTypes.Timestamp_ns, timestampJSON, compute.NewCastOptions(arrow.FixedWidthTypes.Time32ms, true)) c.checkCastFails(arrow.FixedWidthTypes.Timestamp_ns, timestampJSON, compute.NewCastOptions(arrow.FixedWidthTypes.Time32s, true)) c.checkCastFails(arrow.FixedWidthTypes.Timestamp_us, timestampsUS, compute.NewCastOptions(arrow.FixedWidthTypes.Time32ms, true)) c.checkCastFails(arrow.FixedWidthTypes.Timestamp_us, timestampsUS, compute.NewCastOptions(arrow.FixedWidthTypes.Time32s, true)) c.checkCastFails(arrow.FixedWidthTypes.Timestamp_ms, timestampsMS, compute.NewCastOptions(arrow.FixedWidthTypes.Time32s, true)) timesNsUs := `[59123456, 84203999999, 3560001001, 12800000000, 3905001000, 7810002000, 11715003000, 15620004132, 19525005321, 23430006163, 27335000000, 31240000000, 35145000000, 0, 0, 3723000000, null]` timesNsMs := `[59123, 84203999, 3560001, 12800000, 3905001, 7810002, 11715003, 15620004, 19525005, 23430006, 27335000, 31240000, 35145000, 0, 0, 3723000, null]` timesUsNs := `[59123456000, 84203999999000, 3560001001000, 12800000000000, 3905001000000, 7810002000000, 11715003000000, 15620004132000, 19525005321000, 23430006163000, 27335000000000, 31240000000000, 35145000000000, 0, 0, 3723000000000, null]` timesMsNs := `[59123000000, 84203999000000, 3560001000000, 12800000000000, 3905001000000, 7810002000000, 11715003000000, 15620004000000, 19525005000000, 23430006000000, 27335000000000, 31240000000000, 35145000000000, 0, 0, 3723000000000, null]` timesMsUs := `[59123000, 84203999000, 3560001000, 12800000000, 3905001000, 7810002000, 11715003000, 15620004000, 19525005000, 23430006000, 27335000000, 31240000000, 35145000000, 0, 0, 3723000000, null]` c.checkCastOpts(arrow.FixedWidthTypes.Timestamp_ns, arrow.FixedWidthTypes.Time64us, timestampJSON, timesNsUs, trunc) c.checkCastOpts(arrow.FixedWidthTypes.Timestamp_ns, arrow.FixedWidthTypes.Time32ms, timestampJSON, timesNsMs, trunc) c.checkCastOpts(arrow.FixedWidthTypes.Timestamp_ns, arrow.FixedWidthTypes.Time32s, timestampJSON, timesSec, trunc) c.checkCastOpts(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Time32ms, timestampsUS, timesNsMs, trunc) c.checkCastOpts(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Time32s, timestampsUS, timesSec, trunc) c.checkCastOpts(arrow.FixedWidthTypes.Timestamp_ms, arrow.FixedWidthTypes.Time32s, timestampsMS, timesSec, trunc) // upscaling tests c.checkCast(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Time64ns, timestampsUS, timesUsNs) c.checkCast(arrow.FixedWidthTypes.Timestamp_ms, arrow.FixedWidthTypes.Time64ns, timestampsMS, timesMsNs) c.checkCast(arrow.FixedWidthTypes.Timestamp_ms, arrow.FixedWidthTypes.Time64us, timestampsMS, timesMsUs) c.checkCast(arrow.FixedWidthTypes.Timestamp_s, arrow.FixedWidthTypes.Time64ns, timestampSecondsJSON, timesNs) c.checkCast(arrow.FixedWidthTypes.Timestamp_s, arrow.FixedWidthTypes.Time64us, timestampSecondsJSON, timesUs) c.checkCast(arrow.FixedWidthTypes.Timestamp_s, arrow.FixedWidthTypes.Time32ms, timestampSecondsJSON, timesMs) // invalid timezones for _, u := range []arrow.TimeUnit{arrow.Second, arrow.Millisecond, arrow.Microsecond, arrow.Nanosecond} { dt := &arrow.TimestampType{Unit: u, TimeZone: "Mars/Mariner_Valley"} switch u { case arrow.Second, arrow.Millisecond: c.checkCastFails(dt, timestampSecondsJSON, compute.NewCastOptions(&arrow.Time32Type{Unit: u}, false)) default: c.checkCastFails(dt, timestampSecondsJSON, compute.NewCastOptions(&arrow.Time64Type{Unit: u}, false)) } } } func (c *CastSuite) TestZonedTimestampToTime() { c.checkCast(&arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "Pacific/Marquesas"}, arrow.FixedWidthTypes.Time64ns, timestampJSON, `[52259123456789, 50003999999999, 56480001001001, 65000000000000, 56105001000000, 60010002000000, 63915003000000, 67820004132000, 71725005321000, 75630006163000, 79535000000000, 83440000000000, 945000000000, 52200000000000, 52200000000000, 55923000000000, null]`) timesSec := `[ 34259, 35603, 35960, 47000, 41705, 45610, 49515, 53420, 57325, 61230, 65135, 69040, 72945, 37800, 37800, 41523, null ]` timesMs := `[ 34259000, 35603000, 35960000, 47000000, 41705000, 45610000, 49515000, 53420000, 57325000, 61230000, 65135000, 69040000, 72945000, 37800000, 37800000, 41523000, null ]` timesUs := `[ 34259000000, 35603000000, 35960000000, 47000000000, 41705000000, 45610000000, 49515000000, 53420000000, 57325000000, 61230000000, 65135000000, 69040000000, 72945000000, 37800000000, 37800000000, 41523000000, null ]` timesNs := `[ 34259000000000, 35603000000000, 35960000000000, 47000000000000, 41705000000000, 45610000000000, 49515000000000, 53420000000000, 57325000000000, 61230000000000, 65135000000000, 69040000000000, 72945000000000, 37800000000000, 37800000000000, 41523000000000, null ]` c.checkCast(&arrow.TimestampType{Unit: arrow.Second, TimeZone: "Australia/Broken_Hill"}, arrow.FixedWidthTypes.Time32s, timestampSecondsJSON, timesSec) c.checkCast(&arrow.TimestampType{Unit: arrow.Millisecond, TimeZone: "Australia/Broken_Hill"}, arrow.FixedWidthTypes.Time32ms, timestampSecondsJSON, timesMs) c.checkCast(&arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: "Australia/Broken_Hill"}, arrow.FixedWidthTypes.Time64us, timestampSecondsJSON, timesUs) c.checkCast(&arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "Australia/Broken_Hill"}, arrow.FixedWidthTypes.Time64ns, timestampSecondsJSON, timesNs) } func (c *CastSuite) TestTimeToTime() { var opts compute.CastOptions tests := []struct { coarse, fine arrow.DataType }{ {arrow.FixedWidthTypes.Time32s, arrow.FixedWidthTypes.Time32ms}, {arrow.FixedWidthTypes.Time32ms, arrow.FixedWidthTypes.Time64us}, {arrow.FixedWidthTypes.Time64us, arrow.FixedWidthTypes.Time64ns}, } for _, tt := range tests { c.Run("coarse "+tt.coarse.String()+" fine "+tt.fine.String(), func() { coarse := `[0, null, 200, 1, 2]` promoted := `[0, null, 200000, 1000, 2000]` willBeTruncated := `[0, null, 200456, 1123, 2456]` c.checkCast(tt.coarse, tt.fine, coarse, promoted) opts.AllowTimeTruncate = false opts.ToType = tt.coarse c.checkCastFails(tt.fine, willBeTruncated, &opts) opts.AllowTimeTruncate = true c.checkCastOpts(tt.fine, tt.coarse, willBeTruncated, coarse, opts) }) } tests = []struct { coarse, fine arrow.DataType }{ {arrow.FixedWidthTypes.Time32s, arrow.FixedWidthTypes.Time64us}, {arrow.FixedWidthTypes.Time32ms, arrow.FixedWidthTypes.Time64ns}, } for _, tt := range tests { c.Run("coarse "+tt.coarse.String()+" fine "+tt.fine.String(), func() { coarse := `[0, null, 200, 1, 2]` promoted := `[0, null, 200000000, 1000000, 2000000]` willBeTruncated := `[0, null, 200456000, 1123000, 2456000]` c.checkCast(tt.coarse, tt.fine, coarse, promoted) opts.AllowTimeTruncate = false opts.ToType = tt.coarse c.checkCastFails(tt.fine, willBeTruncated, &opts) opts.AllowTimeTruncate = true c.checkCastOpts(tt.fine, tt.coarse, willBeTruncated, coarse, opts) }) } tests = []struct { coarse, fine arrow.DataType }{ {arrow.FixedWidthTypes.Time32s, arrow.FixedWidthTypes.Time64ns}, } for _, tt := range tests { c.Run("coarse "+tt.coarse.String()+" fine "+tt.fine.String(), func() { coarse := `[0, null, 200, 1, 2]` promoted := `[0, null, 200000000000, 1000000000, 2000000000]` willBeTruncated := `[0, null, 200456000000, 1123000000, 2456000000]` c.checkCast(tt.coarse, tt.fine, coarse, promoted) opts.AllowTimeTruncate = false opts.ToType = tt.coarse c.checkCastFails(tt.fine, willBeTruncated, &opts) opts.AllowTimeTruncate = true c.checkCastOpts(tt.fine, tt.coarse, willBeTruncated, coarse, opts) }) } } func (c *CastSuite) TestTimeZeroCopy() { for _, dt := range []arrow.DataType{arrow.FixedWidthTypes.Time32s /*, arrow.PrimitiveTypes.Int32*/} { c.checkCastZeroCopy(arrow.FixedWidthTypes.Time32s, `[0, null, 2000, 1000, 0]`, dt) } c.checkCastZeroCopy(arrow.PrimitiveTypes.Int32, `[0, null, 2000, 1000, 0]`, arrow.FixedWidthTypes.Time32s) for _, dt := range []arrow.DataType{arrow.FixedWidthTypes.Time64us /*, arrow.PrimitiveTypes.Int64*/} { c.checkCastZeroCopy(arrow.FixedWidthTypes.Time64us, `[0, null, 2000, 1000, 0]`, dt) } c.checkCastZeroCopy(arrow.PrimitiveTypes.Int64, `[0, null, 2000, 1000, 0]`, arrow.FixedWidthTypes.Time64us) } func (c *CastSuite) TestDateToDate() { day32 := `[0, null, 100, 1, 10]` day64 := `[0, null, 8640000000, 86400000, 864000000]` // multiply promotion c.checkCast(arrow.FixedWidthTypes.Date32, arrow.FixedWidthTypes.Date64, day32, day64) // no truncation c.checkCast(arrow.FixedWidthTypes.Date64, arrow.FixedWidthTypes.Date32, day64, day32) day64WillBeTruncated := `[0, null, 8640000123, 86400456, 864000789]` opts := compute.CastOptions{ToType: arrow.FixedWidthTypes.Date32} c.checkCastFails(arrow.FixedWidthTypes.Date64, day64WillBeTruncated, &opts) opts.AllowTimeTruncate = true c.checkCastOpts(arrow.FixedWidthTypes.Date64, arrow.FixedWidthTypes.Date32, day64WillBeTruncated, day32, opts) } func (c *CastSuite) TestDateZeroCopy() { for _, dt := range []arrow.DataType{arrow.FixedWidthTypes.Date32 /*, arrow.PrimitiveTypes.Int32*/} { c.checkCastZeroCopy(arrow.FixedWidthTypes.Date32, `[0, null, 2000, 1000, 0]`, dt) } c.checkCastZeroCopy(arrow.PrimitiveTypes.Int32, `[0, null, 2000, 1000, 0]`, arrow.FixedWidthTypes.Date32) for _, dt := range []arrow.DataType{arrow.FixedWidthTypes.Date64 /*, arrow.PrimitiveTypes.Int64*/} { c.checkCastZeroCopy(arrow.FixedWidthTypes.Date64, `[0, null, 172800000, 86400000, 0]`, dt) } c.checkCastZeroCopy(arrow.PrimitiveTypes.Int64, `[0, null, 172800000, 86400000, 0]`, arrow.FixedWidthTypes.Date64) } func (c *CastSuite) TestDurationToDuration() { var opts compute.CastOptions tests := []struct { coarse, fine arrow.DataType }{ {arrow.FixedWidthTypes.Duration_s, arrow.FixedWidthTypes.Duration_ms}, {arrow.FixedWidthTypes.Duration_ms, arrow.FixedWidthTypes.Duration_us}, {arrow.FixedWidthTypes.Duration_us, arrow.FixedWidthTypes.Duration_ns}, } for _, tt := range tests { c.Run("coarse "+tt.coarse.String()+" fine "+tt.fine.String(), func() { coarse := `[0, null, 200, 1, 2]` promoted := `[0, null, 200000, 1000, 2000]` willBeTruncated := `[0, null, 200456, 1123, 2456]` c.checkCast(tt.coarse, tt.fine, coarse, promoted) opts.AllowTimeTruncate = false opts.ToType = tt.coarse c.checkCastFails(tt.fine, willBeTruncated, &opts) opts.AllowTimeTruncate = true c.checkCastOpts(tt.fine, tt.coarse, willBeTruncated, coarse, opts) }) } tests = []struct { coarse, fine arrow.DataType }{ {arrow.FixedWidthTypes.Duration_s, arrow.FixedWidthTypes.Duration_us}, {arrow.FixedWidthTypes.Duration_ms, arrow.FixedWidthTypes.Duration_ns}, } for _, tt := range tests { c.Run("coarse "+tt.coarse.String()+" fine "+tt.fine.String(), func() { coarse := `[0, null, 200, 1, 2]` promoted := `[0, null, 200000000, 1000000, 2000000]` willBeTruncated := `[0, null, 200456000, 1123000, 2456000]` c.checkCast(tt.coarse, tt.fine, coarse, promoted) opts.AllowTimeTruncate = false opts.ToType = tt.coarse c.checkCastFails(tt.fine, willBeTruncated, &opts) opts.AllowTimeTruncate = true c.checkCastOpts(tt.fine, tt.coarse, willBeTruncated, coarse, opts) }) } tests = []struct { coarse, fine arrow.DataType }{ {arrow.FixedWidthTypes.Duration_s, arrow.FixedWidthTypes.Duration_ns}, } for _, tt := range tests { c.Run("coarse "+tt.coarse.String()+" fine "+tt.fine.String(), func() { coarse := `[0, null, 200, 1, 2]` promoted := `[0, null, 200000000000, 1000000000, 2000000000]` willBeTruncated := `[0, null, 200456000000, 1123000000, 2456000000]` c.checkCast(tt.coarse, tt.fine, coarse, promoted) opts.AllowTimeTruncate = false opts.ToType = tt.coarse c.checkCastFails(tt.fine, willBeTruncated, &opts) opts.AllowTimeTruncate = true c.checkCastOpts(tt.fine, tt.coarse, willBeTruncated, coarse, opts) }) } } func (c *CastSuite) TestDurationZeroCopy() { for _, dt := range []arrow.DataType{arrow.FixedWidthTypes.Duration_s /*, arrow.PrimitiveTypes.Int64*/} { c.checkCastZeroCopy(arrow.FixedWidthTypes.Duration_s, `[0, null, 2000, 1000, 0]`, dt) } c.checkCastZeroCopy(arrow.PrimitiveTypes.Int64, `[0, null, 2000, 1000, 0]`, arrow.FixedWidthTypes.Duration_s) } func (c *CastSuite) TestDurationToDurationMultiplyOverflow() { opts := compute.CastOptions{ToType: arrow.FixedWidthTypes.Duration_ns} c.checkCastFails(arrow.FixedWidthTypes.Duration_s, `[10000000000, 1, 2, 3, 10000000000]`, &opts) } func (c *CastSuite) TestStringToTimestamp() { for _, dt := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { c.checkCast(dt, &arrow.TimestampType{Unit: arrow.Second}, `["1970-01-01", null, "2000-02-29"]`, `[0, null, 951782400]`) c.checkCast(dt, &arrow.TimestampType{Unit: arrow.Microsecond}, `["1970-01-01", null, "2000-02-29"]`, `[0, null, 951782400000000]`) for _, u := range []arrow.TimeUnit{arrow.Second, arrow.Millisecond, arrow.Microsecond, arrow.Nanosecond} { for _, notTS := range []string{"", "xxx"} { opts := compute.NewCastOptions(&arrow.TimestampType{Unit: u}, true) c.checkCastFails(dt, `["`+notTS+`"]`, opts) } } zoned, _, _ := array.FromJSON(c.mem, dt, strings.NewReader(`["2020-02-29T00:00:00Z", "2020-03-02T10:11:12+0102"]`)) defer zoned.Release() mixed, _, _ := array.FromJSON(c.mem, dt, strings.NewReader(`["2020-03-02T10:11:12+0102", "2020-02-29T00:00:00"]`)) defer mixed.Release() c.checkCastArr(zoned, &arrow.TimestampType{Unit: arrow.Second, TimeZone: "UTC"}, `[1582934400, 1583140152]`, *compute.DefaultCastOptions(true)) // timestamp with zone offset should not parse as naive checkCastFails(c.T(), zoned, *compute.NewCastOptions(&arrow.TimestampType{Unit: arrow.Second}, true)) // mixed zoned/unzoned should not parse as naive checkCastFails(c.T(), mixed, *compute.NewCastOptions(&arrow.TimestampType{Unit: arrow.Second}, true)) // timestamp with zone offset can parse as any time zone (since they're unambiguous) c.checkCastArr(zoned, arrow.FixedWidthTypes.Timestamp_s, `[1582934400, 1583140152]`, *compute.DefaultCastOptions(true)) c.checkCastArr(zoned, &arrow.TimestampType{Unit: arrow.Second, TimeZone: "America/Phoenix"}, `[1582934400, 1583140152]`, *compute.DefaultCastOptions(true)) } } func (c *CastSuite) TestIntToString() { for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { c.Run(stype.String(), func() { c.checkCast(arrow.PrimitiveTypes.Int8, stype, `[0, 1, 127, -128, null]`, `["0", "1", "127", "-128", null]`) c.checkCast(arrow.PrimitiveTypes.Uint8, stype, `[0, 1, 255, null]`, `["0", "1", "255", null]`) c.checkCast(arrow.PrimitiveTypes.Int16, stype, `[0, 1, 32767, -32768, null]`, `["0", "1", "32767", "-32768", null]`) c.checkCast(arrow.PrimitiveTypes.Uint16, stype, `[0, 1, 65535, null]`, `["0", "1", "65535", null]`) c.checkCast(arrow.PrimitiveTypes.Int32, stype, `[0, 1, 2147483647, -2147483648, null]`, `["0", "1", "2147483647", "-2147483648", null]`) c.checkCast(arrow.PrimitiveTypes.Uint32, stype, `[0, 1, 4294967295, null]`, `["0", "1", "4294967295", null]`) c.checkCast(arrow.PrimitiveTypes.Int64, stype, `[0, 1, 9223372036854775807, -9223372036854775808, null]`, `["0", "1", "9223372036854775807", "-9223372036854775808", null]`) c.checkCast(arrow.PrimitiveTypes.Uint64, stype, `[0, 1, 18446744073709551615, null]`, `["0", "1", "18446744073709551615", null]`) }) } } func (c *CastSuite) TestFloatingToString() { for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { c.Run(stype.String(), func() { bldr := array.NewFloat32Builder(c.mem) defer bldr.Release() bldr.AppendValues([]float32{ 0, float32(math.Copysign(0, -1)), 1.5, float32(math.Inf(-1)), float32(math.Inf(0)), float32(math.NaN())}, nil) bldr.AppendNull() arr := bldr.NewArray() defer arr.Release() bldr64 := array.NewFloat64Builder(c.mem) defer bldr64.Release() bldr64.AppendValues([]float64{ 0, math.Copysign(0, -1), 1.5, math.Inf(-1), math.Inf(0), math.NaN()}, nil) bldr64.AppendNull() arr64 := bldr64.NewArray() defer arr64.Release() c.checkCastArr(arr, stype, `["0", "-0", "1.5", "-Inf", "+Inf", "NaN", null]`, *compute.DefaultCastOptions(true)) c.checkCastArr(arr64, stype, `["0", "-0", "1.5", "-Inf", "+Inf", "NaN", null]`, *compute.DefaultCastOptions(true)) }) } } func (c *CastSuite) TestBooleanToString() { for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { c.Run(stype.String(), func() { c.checkCast(arrow.FixedWidthTypes.Boolean, stype, `[true, true, false, null]`, `["true", "true", "false", null]`) }) } } func (c *CastSuite) TestIdentityCasts() { c.checkCastSelfZeroCopy(arrow.FixedWidthTypes.Boolean, `[false, true, null, false]`) c.checkCastSelfZeroCopy(arrow.Null, `[null, null, null]`) for _, typ := range numericTypes { c.checkCastSelfZeroCopy(typ, `[1, 2, null, 4]`) } // ["foo", "bar"] base64 encoded for binary c.checkCastSelfZeroCopy(arrow.BinaryTypes.Binary, `["Zm9v", "YmFy"]`) c.checkCastSelfZeroCopy(arrow.BinaryTypes.String, `["foo", "bar"]`) c.checkCastSelfZeroCopy(&arrow.FixedSizeBinaryType{ByteWidth: 3}, `["Zm9v", "YmFy"]`) c.checkCastSelfZeroCopy(arrow.FixedWidthTypes.Time32ms, `[1, 2, 3, 4]`) c.checkCastSelfZeroCopy(arrow.FixedWidthTypes.Time64us, `[1, 2, 3, 4]`) c.checkCastSelfZeroCopy(arrow.FixedWidthTypes.Date32, `[1, 2, 3, 4]`) c.checkCastSelfZeroCopy(arrow.FixedWidthTypes.Date64, `[86400000, 0]`) c.checkCastSelfZeroCopy(arrow.FixedWidthTypes.Timestamp_s, `[1, 2, 3, 4]`) c.checkCastSelfZeroCopy(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.PrimitiveTypes.Int8}, `[1, 2, 3, 1, null, 3]`) } func (c *CastSuite) TestListToPrimitive() { arr, _, _ := array.FromJSON(c.mem, arrow.ListOf(arrow.PrimitiveTypes.Int8), strings.NewReader(`[[1, 2], [3, 4]]`)) defer arr.Release() _, err := compute.CastToType(context.Background(), arr, arrow.PrimitiveTypes.Uint8) c.ErrorIs(err, arrow.ErrNotImplemented) } type makeList func(arrow.DataType) arrow.DataType var listFactories = []makeList{ func(dt arrow.DataType) arrow.DataType { return arrow.ListOf(dt) }, func(dt arrow.DataType) arrow.DataType { return arrow.LargeListOf(dt) }, } func (c *CastSuite) checkListToList(valTypes []arrow.DataType, jsonData string) { for _, makeSrc := range listFactories { for _, makeDest := range listFactories { for _, srcValueType := range valTypes { for _, dstValueType := range valTypes { srcType := makeSrc(srcValueType) dstType := makeDest(dstValueType) c.Run(fmt.Sprintf("from %s to %s", srcType, dstType), func() { c.checkCast(srcType, dstType, jsonData, jsonData) }) } } } } } func (c *CastSuite) TestListToList() { c.checkListToList([]arrow.DataType{arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Int64}, `[[0], [1], null, [2, 3, 4], [5, 6], null, [], [7], [8, 9]]`) } func (c *CastSuite) TestListToListNoNulls() { c.checkListToList([]arrow.DataType{arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Int64}, `[[0], [1], [2, 3, 4], [5, 6], [], [7], [8, 9]]`) } func (c *CastSuite) TestListToListOptionsPassthru() { for _, makeSrc := range listFactories { for _, makeDest := range listFactories { opts := compute.SafeCastOptions(makeDest(arrow.PrimitiveTypes.Int16)) c.checkCastFails(makeSrc(arrow.PrimitiveTypes.Int32), `[[87654321]]`, opts) opts.AllowIntOverflow = true c.checkCastOpts(makeSrc(arrow.PrimitiveTypes.Int32), makeDest(arrow.PrimitiveTypes.Int16), `[[87654321]]`, `[[32689]]`, *opts) } } } func (c *CastSuite) checkStructToStruct(types []arrow.DataType) { for _, srcType := range types { c.Run(srcType.String(), func() { for _, destType := range types { c.Run(destType.String(), func() { fieldNames := []string{"a", "b"} a1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[1, 2, 3, 4, null]`)) b1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[null, 7, 8, 9, 0]`)) a2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[1, 2, 3, 4, null]`)) b2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[null, 7, 8, 9, 0]`)) src, _ := array.NewStructArray([]arrow.Array{a1, b1}, fieldNames) dest, _ := array.NewStructArray([]arrow.Array{a2, b2}, fieldNames) defer func() { a1.Release() b1.Release() a2.Release() b2.Release() src.Release() dest.Release() }() checkCast(c.T(), src, dest, *compute.DefaultCastOptions(true)) c.Run("with nulls", func() { nullBitmap := memory.NewBufferBytes([]byte{10}) srcNullData := src.Data().(*array.Data).Copy() srcNullData.Buffers()[0] = nullBitmap srcNullData.SetNullN(3) defer srcNullData.Release() destNullData := dest.Data().(*array.Data).Copy() destNullData.Buffers()[0] = nullBitmap destNullData.SetNullN(3) defer destNullData.Release() srcNulls := array.NewStructData(srcNullData) destNulls := array.NewStructData(destNullData) defer srcNulls.Release() defer destNulls.Release() checkCast(c.T(), srcNulls, destNulls, *compute.DefaultCastOptions(true)) }) }) } }) } } func (c *CastSuite) checkStructToStructSubset(types []arrow.DataType) { for _, srcType := range types { c.Run(srcType.String(), func() { for _, destType := range types { c.Run(destType.String(), func() { fieldNames := []string{"a", "b", "c", "d", "e"} a1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[1, 2, 5]`)) defer a1.Release() b1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[3, 4, 7]`)) defer b1.Release() c1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[9, 11, 44]`)) defer c1.Release() d1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[6, 51, 49]`)) defer d1.Release() e1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[19, 17, 74]`)) defer e1.Release() a2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[1, 2, 5]`)) defer a2.Release() b2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[3, 4, 7]`)) defer b2.Release() c2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[9, 11, 44]`)) defer c2.Release() d2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[6, 51, 49]`)) defer d2.Release() e2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[19, 17, 74]`)) defer e2.Release() src, _ := array.NewStructArray([]arrow.Array{a1, b1, c1, d1, e1}, fieldNames) defer src.Release() dest1, _ := array.NewStructArray([]arrow.Array{a2}, []string{"a"}) defer dest1.Release() opts := *compute.DefaultCastOptions(true) checkCast(c.T(), src, dest1, opts) dest2, _ := array.NewStructArray([]arrow.Array{b2, c2}, []string{"b", "c"}) defer dest2.Release() checkCast(c.T(), src, dest2, opts) dest3, _ := array.NewStructArray([]arrow.Array{c2, d2, e2}, []string{"c", "d", "e"}) defer dest3.Release() checkCast(c.T(), src, dest3, opts) dest4, _ := array.NewStructArray([]arrow.Array{a2, b2, c2, e2}, []string{"a", "b", "c", "e"}) defer dest4.Release() checkCast(c.T(), src, dest4, opts) dest5, _ := array.NewStructArray([]arrow.Array{a2, b2, c2, d2, e2}, []string{"a", "b", "c", "d", "e"}) defer dest5.Release() checkCast(c.T(), src, dest5, opts) // field does not exist dest6 := arrow.StructOf( arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, arrow.Field{Name: "d", Type: arrow.PrimitiveTypes.Int16, Nullable: true}, arrow.Field{Name: "f", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, ) options6 := compute.SafeCastOptions(dest6) _, err := compute.CastArray(context.TODO(), src, options6) c.ErrorIs(err, arrow.ErrType) c.ErrorContains(err, "struct fields don't match or are in the wrong order") // fields in wrong order dest7 := arrow.StructOf( arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, arrow.Field{Name: "c", Type: arrow.PrimitiveTypes.Int16, Nullable: true}, arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, ) options7 := compute.SafeCastOptions(dest7) _, err = compute.CastArray(context.TODO(), src, options7) c.ErrorIs(err, arrow.ErrType) c.ErrorContains(err, "struct fields don't match or are in the wrong order") }) } }) } } func (c *CastSuite) checkStructToStructSubsetWithNulls(types []arrow.DataType) { for _, srcType := range types { c.Run(srcType.String(), func() { for _, destType := range types { c.Run(destType.String(), func() { fieldNames := []string{"a", "b", "c", "d", "e"} a1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[1, 2, 5]`)) defer a1.Release() b1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[3, null, 7]`)) defer b1.Release() c1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[9, 11, 44]`)) defer c1.Release() d1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[6, 51, null]`)) defer d1.Release() e1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[null, 17, 74]`)) defer e1.Release() a2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[1, 2, 5]`)) defer a2.Release() b2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[3, null, 7]`)) defer b2.Release() c2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[9, 11, 44]`)) defer c2.Release() d2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[6, 51, null]`)) defer d2.Release() e2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[null, 17, 74]`)) defer e2.Release() // 0, 1, 0 nullBitmap := memory.NewBufferBytes([]byte{2}) srcNull, _ := array.NewStructArrayWithNulls([]arrow.Array{a1, b1, c1, d1, e1}, fieldNames, nullBitmap, 2, 0) defer srcNull.Release() dest1Null, _ := array.NewStructArrayWithNulls([]arrow.Array{a2}, []string{"a"}, nullBitmap, -1, 0) defer dest1Null.Release() opts := compute.DefaultCastOptions(true) checkCast(c.T(), srcNull, dest1Null, *opts) dest2Null, _ := array.NewStructArrayWithNulls([]arrow.Array{b2, c2}, []string{"b", "c"}, nullBitmap, -1, 0) defer dest2Null.Release() checkCast(c.T(), srcNull, dest2Null, *opts) dest3Null, _ := array.NewStructArrayWithNulls([]arrow.Array{a2, d2, e2}, []string{"a", "d", "e"}, nullBitmap, -1, 0) defer dest3Null.Release() checkCast(c.T(), srcNull, dest3Null, *opts) dest4Null, _ := array.NewStructArrayWithNulls([]arrow.Array{a2, b2, c2, e2}, []string{"a", "b", "c", "e"}, nullBitmap, -1, 0) defer dest4Null.Release() checkCast(c.T(), srcNull, dest4Null, *opts) dest5Null, _ := array.NewStructArrayWithNulls([]arrow.Array{a2, b2, c2, d2, e2}, []string{"a", "b", "c", "d", "e"}, nullBitmap, -1, 0) defer dest5Null.Release() checkCast(c.T(), srcNull, dest5Null, *opts) // field does not exist dest6Null := arrow.StructOf( arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, arrow.Field{Name: "d", Type: arrow.PrimitiveTypes.Int16, Nullable: true}, arrow.Field{Name: "f", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, ) options6Null := compute.SafeCastOptions(dest6Null) _, err := compute.CastArray(context.TODO(), srcNull, options6Null) c.ErrorIs(err, arrow.ErrType) c.ErrorContains(err, "struct fields don't match or are in the wrong order") // fields in wrong order dest7Null := arrow.StructOf( arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, arrow.Field{Name: "c", Type: arrow.PrimitiveTypes.Int16, Nullable: true}, arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, ) options7Null := compute.SafeCastOptions(dest7Null) _, err = compute.CastArray(context.TODO(), srcNull, options7Null) c.ErrorIs(err, arrow.ErrType) c.ErrorContains(err, "struct fields don't match or are in the wrong order") }) } }) } } func (c *CastSuite) TestStructToSameSizedAndNamedStruct() { c.checkStructToStruct(numericTypes) } func (c *CastSuite) TestStructToStructSubset() { c.checkStructToStructSubset(numericTypes) } func (c *CastSuite) TestStructToStructSubsetWithNulls() { c.checkStructToStructSubsetWithNulls(numericTypes) } func (c *CastSuite) TestStructToSameSizedButDifferentNamedStruct() { fieldNames := []string{"a", "b"} a, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[1, 2]`)) defer a.Release() b, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[3, 4]`)) defer b.Release() src, _ := array.NewStructArray([]arrow.Array{a, b}, fieldNames) defer src.Release() dest := arrow.StructOf( arrow.Field{Name: "c", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, arrow.Field{Name: "d", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, ) opts := compute.SafeCastOptions(dest) _, err := compute.CastArray(context.TODO(), src, opts) c.ErrorIs(err, arrow.ErrType) c.ErrorContains(err, "struct fields don't match or are in the wrong order") } func (c *CastSuite) TestStructToBiggerStruct() { fieldNames := []string{"a", "b"} a, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[1, 2]`)) defer a.Release() b, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[3, 4]`)) defer b.Release() src, _ := array.NewStructArray([]arrow.Array{a, b}, fieldNames) defer src.Release() dest := arrow.StructOf( arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, arrow.Field{Name: "c", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, ) opts := compute.SafeCastOptions(dest) _, err := compute.CastArray(context.TODO(), src, opts) c.ErrorIs(err, arrow.ErrType) c.ErrorContains(err, "struct fields don't match or are in the wrong order") } func (c *CastSuite) TestStructToDifferentNullabilityStruct() { c.Run("non-nullable to nullable", func() { fieldsSrcNonNullable := []arrow.Field{ {Name: "a", Type: arrow.PrimitiveTypes.Int8}, {Name: "b", Type: arrow.PrimitiveTypes.Int8}, {Name: "c", Type: arrow.PrimitiveTypes.Int8}, } srcNonNull, _, err := array.FromJSON(c.mem, arrow.StructOf(fieldsSrcNonNullable...), strings.NewReader(`[ {"a": 11, "b": 32, "c": 95}, {"a": 23, "b": 46, "c": 11}, {"a": 56, "b": 37, "c": 44} ]`)) c.Require().NoError(err) defer srcNonNull.Release() fieldsDest1Nullable := []arrow.Field{ {Name: "a", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, {Name: "b", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, {Name: "c", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, } destNullable, _, err := array.FromJSON(c.mem, arrow.StructOf(fieldsDest1Nullable...), strings.NewReader(`[ {"a": 11, "b": 32, "c": 95}, {"a": 23, "b": 46, "c": 11}, {"a": 56, "b": 37, "c": 44} ]`)) c.Require().NoError(err) defer destNullable.Release() checkCast(c.T(), srcNonNull, destNullable, *compute.DefaultCastOptions(true)) fieldsDest2Nullable := []arrow.Field{ {Name: "a", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, {Name: "c", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, } data := array.NewData(arrow.StructOf(fieldsDest2Nullable...), destNullable.Len(), destNullable.Data().Buffers(), []arrow.ArrayData{destNullable.Data().Children()[0], destNullable.Data().Children()[2]}, destNullable.NullN(), 0) defer data.Release() dest2Nullable := array.NewStructData(data) defer dest2Nullable.Release() checkCast(c.T(), srcNonNull, dest2Nullable, *compute.DefaultCastOptions(true)) fieldsDest3Nullable := []arrow.Field{ {Name: "b", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, } data = array.NewData(arrow.StructOf(fieldsDest3Nullable...), destNullable.Len(), destNullable.Data().Buffers(), []arrow.ArrayData{destNullable.Data().Children()[1]}, destNullable.NullN(), 0) defer data.Release() dest3Nullable := array.NewStructData(data) defer dest3Nullable.Release() checkCast(c.T(), srcNonNull, dest3Nullable, *compute.DefaultCastOptions(true)) }) c.Run("nullable to non-nullable", func() { fieldsSrcNullable := []arrow.Field{ {Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, {Name: "b", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, {Name: "c", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, } srcNullable, _, err := array.FromJSON(c.mem, arrow.StructOf(fieldsSrcNullable...), strings.NewReader(`[ {"a": 1, "b": 3, "c": 9}, {"a": null, "b": 4, "c": 11}, {"a": 5, "b": null, "c": 44} ]`)) c.Require().NoError(err) defer srcNullable.Release() fieldsDest1NonNullable := []arrow.Field{ {Name: "a", Type: arrow.PrimitiveTypes.Int64, Nullable: false}, {Name: "b", Type: arrow.PrimitiveTypes.Int64, Nullable: false}, {Name: "c", Type: arrow.PrimitiveTypes.Int64, Nullable: false}, } dest1NonNullable := arrow.StructOf(fieldsDest1NonNullable...) options1NoNullable := compute.SafeCastOptions(dest1NonNullable) _, err = compute.CastArray(context.TODO(), srcNullable, options1NoNullable) c.ErrorIs(err, arrow.ErrType) c.ErrorContains(err, "cannot cast nullable field to non-nullable field") fieldsDest2NonNullable := []arrow.Field{ {Name: "a", Type: arrow.PrimitiveTypes.Int64, Nullable: false}, {Name: "c", Type: arrow.PrimitiveTypes.Int64, Nullable: false}, } dest2NonNullable := arrow.StructOf(fieldsDest2NonNullable...) options2NoNullable := compute.SafeCastOptions(dest2NonNullable) _, err = compute.CastArray(context.TODO(), srcNullable, options2NoNullable) c.ErrorIs(err, arrow.ErrType) c.ErrorContains(err, "cannot cast nullable field to non-nullable field") fieldsDest3NonNullable := []arrow.Field{ {Name: "c", Type: arrow.PrimitiveTypes.Int64, Nullable: false}, } dest3NonNullable := arrow.StructOf(fieldsDest3NonNullable...) options3NoNullable := compute.SafeCastOptions(dest3NonNullable) _, err = compute.CastArray(context.TODO(), srcNullable, options3NoNullable) c.ErrorIs(err, arrow.ErrType) c.ErrorContains(err, "cannot cast nullable field to non-nullable field") }) } func (c *CastSuite) smallIntArrayFromJSON(data string) arrow.Array { arr, _, _ := array.FromJSON(c.mem, types.NewSmallintType(), strings.NewReader(data)) return arr } func (c *CastSuite) TestExtensionTypeToIntDowncast() { smallint := types.NewSmallintType() arrow.RegisterExtensionType(smallint) defer arrow.UnregisterExtensionType("smallint") c.Run("smallint(int16) to int16", func() { arr := c.smallIntArrayFromJSON(`[0, 100, 200, 1, 2]`) defer arr.Release() checkCastZeroCopy(c.T(), arr, arrow.PrimitiveTypes.Int16, compute.DefaultCastOptions(true)) c.checkCast(smallint, arrow.PrimitiveTypes.Uint8, `[0, 100, 200, 1, 2]`, `[0, 100, 200, 1, 2]`) }) c.Run("smallint(int16) to uint8 with overflow", func() { opts := compute.SafeCastOptions(arrow.PrimitiveTypes.Uint8) c.checkCastFails(smallint, `[0, null, 256, 1, 3]`, opts) opts.AllowIntOverflow = true c.checkCastOpts(smallint, arrow.PrimitiveTypes.Uint8, `[0, null, 256, 1, 3]`, `[0, null, 0, 1, 3]`, *opts) }) c.Run("smallint(int16) to uint8 with underflow", func() { opts := compute.SafeCastOptions(arrow.PrimitiveTypes.Uint8) c.checkCastFails(smallint, `[0, null, -1, 1, 3]`, opts) opts.AllowIntOverflow = true c.checkCastOpts(smallint, arrow.PrimitiveTypes.Uint8, `[0, null, -1, 1, 3]`, `[0, null, 255, 1, 3]`, *opts) }) } func (c *CastSuite) TestNoOutBitmapIfIsAllValid() { a, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[1]`)) defer a.Release() opts := compute.SafeCastOptions(arrow.PrimitiveTypes.Int32) result, err := compute.CastArray(context.Background(), a, opts) c.NoError(err) c.NotNil(a.Data().Buffers()[0]) c.Nil(result.Data().Buffers()[0]) } func (c *CastSuite) TestFromDictionary() { ctx := compute.WithAllocator(context.Background(), c.mem) dictionaries := []arrow.Array{} for _, ty := range numericTypes { a, _, _ := array.FromJSON(c.mem, ty, strings.NewReader(`[23, 12, 45, 12, null]`)) defer a.Release() dictionaries = append(dictionaries, a) } for _, ty := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { a, _, _ := array.FromJSON(c.mem, ty, strings.NewReader(`["foo", "bar", "baz", "foo", null]`)) defer a.Release() dictionaries = append(dictionaries, a) } for _, d := range dictionaries { for _, ty := range dictIndexTypes { indices, _, _ := array.FromJSON(c.mem, ty, strings.NewReader(`[4, 0, 1, 2, 0, 4, null, 2]`)) expected, err := compute.Take(ctx, compute.TakeOptions{}, &compute.ArrayDatum{d.Data()}, &compute.ArrayDatum{indices.Data()}) c.Require().NoError(err) exp := expected.(*compute.ArrayDatum).MakeArray() dictArr := array.NewDictionaryArray(&arrow.DictionaryType{IndexType: ty, ValueType: d.DataType()}, indices, d) checkCast(c.T(), dictArr, exp, *compute.SafeCastOptions(d.DataType())) indices.Release() expected.Release() exp.Release() dictArr.Release() return } } } func TestCasts(t *testing.T) { suite.Run(t, new(CastSuite)) } const rngseed = 0x94378165 func benchmarkNumericCast(b *testing.B, fromType, toType arrow.DataType, opts compute.CastOptions, size, min, max int64, nullprob float64) { rng := gen.NewRandomArrayGenerator(rngseed, memory.DefaultAllocator) arr := rng.Numeric(fromType.ID(), size, min, max, nullprob) var ( err error out compute.Datum ctx = context.Background() input = compute.NewDatum(arr.Data()) ) b.Cleanup(func() { arr.Release() input.Release() }) opts.ToType = toType b.ResetTimer() b.SetBytes(size * int64(fromType.(arrow.FixedWidthDataType).Bytes())) for i := 0; i < b.N; i++ { out, err = compute.CastDatum(ctx, input, &opts) if err != nil { b.Fatal(err) } out.Release() } } func benchmarkFloatingToIntegerCast(b *testing.B, fromType, toType arrow.DataType, opts compute.CastOptions, size, min, max int64, nullprob float64) { rng := gen.NewRandomArrayGenerator(rngseed, memory.DefaultAllocator) arr := rng.Numeric(toType.ID(), size, min, max, nullprob) asFloat, err := compute.CastToType(context.Background(), arr, fromType) if err != nil { b.Fatal(err) } arr.Release() var ( out compute.Datum ctx = context.Background() input = compute.NewDatum(asFloat.Data()) ) b.Cleanup(func() { asFloat.Release() input.Release() }) opts.ToType = toType b.ResetTimer() b.SetBytes(size * int64(fromType.(arrow.FixedWidthDataType).Bytes())) for i := 0; i < b.N; i++ { out, err = compute.CastDatum(ctx, input, &opts) if err != nil { b.Fatal(err) } out.Release() } } func BenchmarkCasting(b *testing.B) { type benchfn func(b *testing.B, fromType, toType arrow.DataType, opts compute.CastOptions, size, min, max int64, nullprob float64) tests := []struct { from, to arrow.DataType min, max int64 safe bool fn benchfn }{ {arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Int32, math.MinInt32, math.MaxInt32, true, benchmarkNumericCast}, {arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Int32, math.MinInt32, math.MaxInt32, false, benchmarkNumericCast}, {arrow.PrimitiveTypes.Uint32, arrow.PrimitiveTypes.Int32, 0, math.MaxInt32, true, benchmarkNumericCast}, {arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Float64, 0, 1000, true, benchmarkNumericCast}, {arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Float64, 0, 1000, false, benchmarkNumericCast}, {arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Int32, -1000, 1000, true, benchmarkFloatingToIntegerCast}, {arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Int32, -1000, 1000, false, benchmarkFloatingToIntegerCast}, } for _, tt := range tests { for _, sz := range []int64{int64(CpuCacheSizes[1]) /* L2 Cache Size */} { for _, nullProb := range []float64{0, 0.1, 0.5, 0.9, 1} { arraySize := sz / int64(tt.from.(arrow.FixedWidthDataType).Bytes()) opts := compute.DefaultCastOptions(tt.safe) b.Run(fmt.Sprintf("sz=%d/nullprob=%.2f/from=%s/to=%s/safe=%t", arraySize, nullProb, tt.from, tt.to, tt.safe), func(b *testing.B) { tt.fn(b, tt.from, tt.to, *opts, arraySize, tt.min, tt.max, nullProb) }) } } } } arrow-go-18.2.0/arrow/compute/datum.go000066400000000000000000000224631476434502500176270ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.18 package compute import ( "fmt" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/scalar" ) //go:generate go run golang.org/x/tools/cmd/stringer -type=DatumKind -linecomment // DatumKind is an enum used for denoting which kind of type a datum is encapsulating type DatumKind int const ( KindNone DatumKind = iota // none KindScalar // scalar KindArray // array KindChunked // chunked_array KindRecord // record_batch KindTable // table ) const UnknownLength int64 = -1 // DatumIsValue returns true if the datum passed is a Scalar, Array // or ChunkedArray type (e.g. it contains a specific value not a // group of values) func DatumIsValue(d Datum) bool { switch d.Kind() { case KindScalar, KindArray, KindChunked: return true } return false } // Datum is a variant interface for wrapping the various Arrow data structures // for now the various Datum types just hold a Value which is the type they // are wrapping, but it might make sense in the future for those types // to actually be aliases or embed their types instead. Not sure yet. type Datum interface { fmt.Stringer Kind() DatumKind Len() int64 Equals(Datum) bool Release() data() any } // ArrayLikeDatum is an interface for treating a Datum similarly to an Array, // so that it is easy to differentiate between Record/Table/Collection and Scalar, // Array/ChunkedArray for ease of use. Chunks will return an empty slice for Scalar, // a slice with 1 element for Array, and the slice of chunks for a chunked array. type ArrayLikeDatum interface { Datum NullN() int64 Type() arrow.DataType Chunks() []arrow.Array } // TableLikeDatum is an interface type for specifying either a RecordBatch or a // Table as both contain a schema as opposed to just a single data type. type TableLikeDatum interface { Datum Schema() *arrow.Schema } // EmptyDatum is the null case, a Datum with nothing in it. type EmptyDatum struct{} func (EmptyDatum) String() string { return "nullptr" } func (EmptyDatum) Kind() DatumKind { return KindNone } func (EmptyDatum) Len() int64 { return UnknownLength } func (EmptyDatum) Release() {} func (EmptyDatum) Equals(other Datum) bool { _, ok := other.(EmptyDatum) return ok } func (EmptyDatum) data() any { return nil } // ScalarDatum contains a scalar value type ScalarDatum struct { Value scalar.Scalar } func (ScalarDatum) Kind() DatumKind { return KindScalar } func (ScalarDatum) Len() int64 { return 1 } func (ScalarDatum) Chunks() []arrow.Array { return nil } func (d *ScalarDatum) Type() arrow.DataType { return d.Value.DataType() } func (d *ScalarDatum) String() string { return d.Value.String() } func (d *ScalarDatum) ToScalar() (scalar.Scalar, error) { return d.Value, nil } func (d *ScalarDatum) data() any { return d.Value } func (d *ScalarDatum) NullN() int64 { if d.Value.IsValid() { return 0 } return 1 } type releasable interface { Release() } func (d *ScalarDatum) Release() { if v, ok := d.Value.(releasable); ok { v.Release() } } func (d *ScalarDatum) Equals(other Datum) bool { if rhs, ok := other.(*ScalarDatum); ok { return scalar.Equals(d.Value, rhs.Value) } return false } // ArrayDatum references an array.Data object which can be used to create // array instances from if needed. type ArrayDatum struct { Value arrow.ArrayData } func (ArrayDatum) Kind() DatumKind { return KindArray } func (d *ArrayDatum) Type() arrow.DataType { return d.Value.DataType() } func (d *ArrayDatum) Len() int64 { return int64(d.Value.Len()) } func (d *ArrayDatum) NullN() int64 { return int64(d.Value.NullN()) } func (d *ArrayDatum) String() string { return fmt.Sprintf("Array:{%s}", d.Value.DataType()) } func (d *ArrayDatum) MakeArray() arrow.Array { return array.MakeFromData(d.Value) } func (d *ArrayDatum) Chunks() []arrow.Array { return []arrow.Array{d.MakeArray()} } func (d *ArrayDatum) ToScalar() (scalar.Scalar, error) { return scalar.NewListScalarData(d.Value), nil } func (d *ArrayDatum) Release() { d.Value.Release() d.Value = nil } func (d *ArrayDatum) data() any { return d.Value } func (d *ArrayDatum) Equals(other Datum) bool { rhs, ok := other.(*ArrayDatum) if !ok { return false } left := d.MakeArray() defer left.Release() right := rhs.MakeArray() defer right.Release() return array.Equal(left, right) } // ChunkedDatum contains a chunked array for use with expressions and compute. type ChunkedDatum struct { Value *arrow.Chunked } func (ChunkedDatum) Kind() DatumKind { return KindChunked } func (d *ChunkedDatum) Type() arrow.DataType { return d.Value.DataType() } func (d *ChunkedDatum) Len() int64 { return int64(d.Value.Len()) } func (d *ChunkedDatum) NullN() int64 { return int64(d.Value.NullN()) } func (d *ChunkedDatum) String() string { return fmt.Sprintf("Array:{%s}", d.Value.DataType()) } func (d *ChunkedDatum) Chunks() []arrow.Array { return d.Value.Chunks() } func (d *ChunkedDatum) data() any { return d.Value } func (d *ChunkedDatum) Release() { d.Value.Release() d.Value = nil } func (d *ChunkedDatum) Equals(other Datum) bool { if rhs, ok := other.(*ChunkedDatum); ok { return array.ChunkedEqual(d.Value, rhs.Value) } return false } // RecordDatum contains an array.Record for passing a full record to an expression // or to compute. type RecordDatum struct { Value arrow.Record } func (RecordDatum) Kind() DatumKind { return KindRecord } func (RecordDatum) String() string { return "RecordBatch" } func (r *RecordDatum) Len() int64 { return r.Value.NumRows() } func (r *RecordDatum) Schema() *arrow.Schema { return r.Value.Schema() } func (r *RecordDatum) data() any { return r.Value } func (r *RecordDatum) Release() { r.Value.Release() r.Value = nil } func (r *RecordDatum) Equals(other Datum) bool { if rhs, ok := other.(*RecordDatum); ok { return array.RecordEqual(r.Value, rhs.Value) } return false } // TableDatum contains a table so that multiple record batches can be worked with // together as a single table for being passed to compute and expression handling. type TableDatum struct { Value arrow.Table } func (TableDatum) Kind() DatumKind { return KindTable } func (TableDatum) String() string { return "Table" } func (d *TableDatum) Len() int64 { return d.Value.NumRows() } func (d *TableDatum) Schema() *arrow.Schema { return d.Value.Schema() } func (d *TableDatum) data() any { return d.Value } func (d *TableDatum) Release() { d.Value.Release() d.Value = nil } func (d *TableDatum) Equals(other Datum) bool { if rhs, ok := other.(*TableDatum); ok { return array.TableEqual(d.Value, rhs.Value) } return false } // NewDatum will construct the appropriate Datum type based on what is passed in // as the argument. // // An arrow.Array gets an ArrayDatum // An array.Chunked gets a ChunkedDatum // An array.Record gets a RecordDatum // an array.Table gets a TableDatum // a scalar.Scalar gets a ScalarDatum // // Anything else is passed to scalar.MakeScalar and receives a scalar // datum of that appropriate type. func NewDatum(value interface{}) Datum { switch v := value.(type) { case Datum: return NewDatum(v.data()) case arrow.Array: v.Data().Retain() return &ArrayDatum{v.Data()} case scalar.Releasable: v.Retain() return NewDatumWithoutOwning(v) case scalar.Scalar: return &ScalarDatum{v} default: return &ScalarDatum{scalar.MakeScalar(value)} } } // NewDatumWithoutOwning is like NewDatum only it does not call Retain on // the passed in value (if applicable). This means that if the resulting // Datum should not have Release called on it and the original value needs // to outlive the Datum. // // Only use this if you know what you're doing. For the most part this is // just a convenience function.+- func NewDatumWithoutOwning(value interface{}) Datum { switch v := value.(type) { case arrow.Array: return &ArrayDatum{v.Data()} case arrow.ArrayData: return &ArrayDatum{v} case *arrow.Chunked: return &ChunkedDatum{v} case arrow.Record: return &RecordDatum{v} case arrow.Table: return &TableDatum{v} case scalar.Scalar: return &ScalarDatum{v} default: return &ScalarDatum{scalar.MakeScalar(value)} } } var ( _ ArrayLikeDatum = (*ScalarDatum)(nil) _ ArrayLikeDatum = (*ArrayDatum)(nil) _ ArrayLikeDatum = (*ChunkedDatum)(nil) _ TableLikeDatum = (*RecordDatum)(nil) _ TableLikeDatum = (*TableDatum)(nil) ) arrow-go-18.2.0/arrow/compute/datumkind_string.go000066400000000000000000000014331476434502500220550ustar00rootroot00000000000000// Code generated by "stringer -type=DatumKind -linecomment"; DO NOT EDIT. //go:build go1.18 package compute import "strconv" func _() { // An "invalid array index" compiler error signifies that the constant values have changed. // Re-run the stringer command to generate them again. var x [1]struct{} _ = x[KindNone-0] _ = x[KindScalar-1] _ = x[KindArray-2] _ = x[KindChunked-3] _ = x[KindRecord-4] _ = x[KindTable-5] } const _DatumKind_name = "nonescalararraychunked_arrayrecord_batchtable" var _DatumKind_index = [...]uint8{0, 4, 10, 15, 28, 40, 45} func (i DatumKind) String() string { if i < 0 || i >= DatumKind(len(_DatumKind_index)-1) { return "DatumKind(" + strconv.FormatInt(int64(i), 10) + ")" } return _DatumKind_name[_DatumKind_index[i]:_DatumKind_index[i+1]] } arrow-go-18.2.0/arrow/compute/doc.go000066400000000000000000000031731476434502500172570ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Package compute is a native-go implementation of an Acero-like // arrow compute engine. It requires go1.18+ // // While consumers of Arrow that are able to use CGO could utilize the // C Data API (using the cdata package) and could link against the // acero library directly, there are consumers who cannot use CGO. This // is an attempt to provide for those users, and in general create a // native-go arrow compute engine. // // The overwhelming majority of things in this package require go1.18 as // it utilizes generics. The files in this package and its sub-packages // are all excluded from being built by go versions lower than 1.18 so // that the larger Arrow module itself is still compatible with go1.17. // // Everything in this package should be considered Experimental for now. package compute //go:generate stringer -type=FuncKind -linecomment arrow-go-18.2.0/arrow/compute/example_test.go000066400000000000000000000052361476434502500212060ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.18 package compute_test import ( "context" "fmt" "log" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/compute" "github.com/apache/arrow-go/v18/arrow/compute/exec" "github.com/apache/arrow-go/v18/arrow/memory" ) // This example demonstrates how to register a custom scalar function. func Example_customFunction() { pool := memory.NewGoAllocator() ctx := context.Background() execCtx := compute.DefaultExecCtx() ctx = compute.SetExecCtx(ctx, execCtx) add42 := compute.NewScalarFunction("add_42", compute.Arity{ NArgs: 1, }, compute.FunctionDoc{ Summary: "Returns the input values plus 42", ArgNames: []string{"input"}, }) if err := add42.AddNewKernel( []exec.InputType{ // We accept a single argument (array) of Int8 type. { Kind: exec.InputExact, Type: arrow.PrimitiveTypes.Int8, }, }, // We'll return a single Int8 array. exec.NewOutputType(arrow.PrimitiveTypes.Int8), func(ctx *exec.KernelCtx, span *exec.ExecSpan, result *exec.ExecResult) error { // The second buffer contains the values. Both for the input and the output arrays. for i, x := range span.Values[0].Array.Buffers[1].Buf { result.Buffers[1].Buf[i] = x + 42 } return nil }, nil, ); err != nil { log.Fatal(err) } execCtx.Registry.AddFunction(add42, true) inputArrayBuilder := array.NewInt8Builder(pool) for i := 0; i < 16; i++ { inputArrayBuilder.Append(int8(i)) } inputArray := inputArrayBuilder.NewArray() outputArrayDatum, err := compute.CallFunction( compute.SetExecCtx(context.Background(), execCtx), "add_42", nil, &compute.ArrayDatum{Value: inputArray.Data()}, ) if err != nil { log.Fatal(err) } fmt.Println(array.NewInt8Data(outputArrayDatum.(*compute.ArrayDatum).Value).Int8Values()) // Output: // [42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57] } arrow-go-18.2.0/arrow/compute/exec.go000066400000000000000000000125201476434502500174320ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.18 package compute import ( "context" "fmt" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/compute/exec" "github.com/apache/arrow-go/v18/arrow/internal/debug" ) func haveChunkedArray(values []Datum) bool { for _, v := range values { if v.Kind() == KindChunked { return true } } return false } // ExecSpanFromBatch constructs and returns a new ExecSpan from the values // inside of the ExecBatch which could be scalar or arrays. // // This is mostly used for tests but is also a convenience method for other // cases. func ExecSpanFromBatch(batch *ExecBatch) *exec.ExecSpan { out := &exec.ExecSpan{Len: batch.Len, Values: make([]exec.ExecValue, len(batch.Values))} for i, v := range batch.Values { outVal := &out.Values[i] if v.Kind() == KindScalar { outVal.Scalar = v.(*ScalarDatum).Value } else { outVal.Array.SetMembers(v.(*ArrayDatum).Value) outVal.Scalar = nil } } return out } // this is the primary driver of execution func execInternal(ctx context.Context, fn Function, opts FunctionOptions, passedLen int64, args ...Datum) (result Datum, err error) { if opts == nil { if err = checkOptions(fn, opts); err != nil { return } opts = fn.DefaultOptions() } // we only allow Array, ChunkedArray, and Scalars for now. // RecordBatch and Table datums are disallowed. if err = checkAllIsValue(args); err != nil { return } inTypes := make([]arrow.DataType, len(args)) for i, a := range args { inTypes[i] = a.(ArrayLikeDatum).Type() } var ( k exec.Kernel executor KernelExecutor ) switch fn.Kind() { case FuncScalar: executor = scalarExecPool.Get().(*scalarExecutor) defer func() { executor.Clear() scalarExecPool.Put(executor.(*scalarExecutor)) }() case FuncVector: executor = vectorExecPool.Get().(*vectorExecutor) defer func() { executor.Clear() vectorExecPool.Put(executor.(*vectorExecutor)) }() default: return nil, fmt.Errorf("%w: direct execution of %s", arrow.ErrNotImplemented, fn.Kind()) } if k, err = fn.DispatchBest(inTypes...); err != nil { return } var newArgs []Datum // cast arguments if necessary for i, arg := range args { if !arrow.TypeEqual(inTypes[i], arg.(ArrayLikeDatum).Type()) { if newArgs == nil { newArgs = make([]Datum, len(args)) copy(newArgs, args) } newArgs[i], err = CastDatum(ctx, arg, SafeCastOptions(inTypes[i])) if err != nil { return nil, err } defer newArgs[i].Release() } } if newArgs != nil { args = newArgs } kctx := &exec.KernelCtx{Ctx: ctx, Kernel: k} init := k.GetInitFn() kinitArgs := exec.KernelInitArgs{Kernel: k, Inputs: inTypes, Options: opts} if init != nil { kctx.State, err = init(kctx, kinitArgs) if err != nil { return } } if err = executor.Init(kctx, kinitArgs); err != nil { return } input := ExecBatch{Values: args, Len: 0} if input.NumValues() == 0 { if passedLen != -1 { input.Len = passedLen } } else { inferred, allSame := inferBatchLength(input.Values) input.Len = inferred switch fn.Kind() { case FuncScalar: if passedLen != -1 && passedLen != inferred { return nil, fmt.Errorf("%w: passed batch length for execution did not match actual length for scalar fn execution", arrow.ErrInvalid) } case FuncVector: vkernel := k.(*exec.VectorKernel) if !(allSame || !vkernel.CanExecuteChunkWise) { return nil, fmt.Errorf("%w: vector kernel arguments must all be the same length", arrow.ErrInvalid) } } } ectx := GetExecCtx(ctx) ctx, cancel := context.WithCancel(context.Background()) defer cancel() ch := make(chan Datum, ectx.ExecChannelSize) go func() { defer close(ch) if err = executor.Execute(ctx, &input, ch); err != nil { cancel() } }() result = executor.WrapResults(ctx, ch, haveChunkedArray(input.Values)) if err == nil { debug.Assert(executor.CheckResultType(result) == nil, "invalid result type") } if ctx.Err() == context.Canceled && result != nil { result.Release() } return } // CallFunction is a one-shot invoker for all types of functions. // // It will perform kernel-dispatch, argument checking, iteration of // ChunkedArray inputs and wrapping of outputs. // // To affect the execution options, you must call SetExecCtx and pass // the resulting context in here. func CallFunction(ctx context.Context, funcName string, opts FunctionOptions, args ...Datum) (Datum, error) { ectx := GetExecCtx(ctx) fn, ok := ectx.Registry.GetFunction(funcName) if !ok { return nil, fmt.Errorf("%w: function '%s' not found", arrow.ErrKey, funcName) } return fn.Execute(ctx, opts, args...) } arrow-go-18.2.0/arrow/compute/exec/000077500000000000000000000000001476434502500171035ustar00rootroot00000000000000arrow-go-18.2.0/arrow/compute/exec/hash_util.go000066400000000000000000000017111476434502500214120ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package exec // ADAPTED FROM HASH UTILITIES FOR BOOST func HashCombine(seed, value uint64) uint64 { seed ^= value + 0x9e3779b9 + (seed << 6) + (seed >> 2) return seed } arrow-go-18.2.0/arrow/compute/exec/kernel.go000066400000000000000000000511661476434502500207230ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.18 package exec import ( "context" "fmt" "hash/maphash" "strings" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" "golang.org/x/exp/slices" ) var hashSeed = maphash.MakeSeed() type ctxAllocKey struct{} // WithAllocator returns a new context with the provided allocator // embedded into the context. func WithAllocator(ctx context.Context, mem memory.Allocator) context.Context { return context.WithValue(ctx, ctxAllocKey{}, mem) } // GetAllocator retrieves the allocator from the context, or returns // memory.DefaultAllocator if there was no allocator in the provided // context. func GetAllocator(ctx context.Context) memory.Allocator { mem, ok := ctx.Value(ctxAllocKey{}).(memory.Allocator) if !ok { return memory.DefaultAllocator } return mem } // Kernel defines the minimum interface required for the basic execution // kernel. It will grow as the implementation requires. type Kernel interface { GetInitFn() KernelInitFn GetSig() *KernelSignature } // NonAggKernel builds on the base Kernel interface for // non aggregate execution kernels. Specifically this will // represent Scalar and Vector kernels. type NonAggKernel interface { Kernel Exec(*KernelCtx, *ExecSpan, *ExecResult) error GetNullHandling() NullHandling GetMemAlloc() MemAlloc CanFillSlices() bool } // KernelCtx is a small struct holding the context for a kernel execution // consisting of a pointer to the kernel, initialized state (if needed) // and the context for this execution. type KernelCtx struct { Ctx context.Context Kernel Kernel State KernelState } func (k *KernelCtx) Allocate(bufsize int) *memory.Buffer { buf := memory.NewResizableBuffer(GetAllocator(k.Ctx)) buf.Resize(bufsize) return buf } func (k *KernelCtx) AllocateBitmap(nbits int64) *memory.Buffer { nbytes := bitutil.BytesForBits(nbits) return k.Allocate(int(nbytes)) } // TypeMatcher define an interface for matching Input or Output types // for execution kernels. There are multiple implementations of this // interface provided by this package. type TypeMatcher interface { fmt.Stringer Matches(typ arrow.DataType) bool Equals(other TypeMatcher) bool } type sameTypeIDMatcher struct { accepted arrow.Type } func (s sameTypeIDMatcher) Matches(typ arrow.DataType) bool { return s.accepted == typ.ID() } func (s sameTypeIDMatcher) Equals(other TypeMatcher) bool { if s == other { return true } o, ok := other.(*sameTypeIDMatcher) if !ok { return false } return s.accepted == o.accepted } func (s sameTypeIDMatcher) String() string { return "Type::" + s.accepted.String() } // SameTypeID returns a type matcher which will match // any DataType that uses the same arrow.Type ID as the one // passed in here. func SameTypeID(id arrow.Type) TypeMatcher { return &sameTypeIDMatcher{id} } type timeUnitMatcher struct { id arrow.Type unit arrow.TimeUnit } func (s timeUnitMatcher) Matches(typ arrow.DataType) bool { if typ.ID() != s.id { return false } return s.unit == typ.(arrow.TemporalWithUnit).TimeUnit() } func (s timeUnitMatcher) String() string { return strings.ToLower(s.id.String()) + "(" + s.unit.String() + ")" } func (s *timeUnitMatcher) Equals(other TypeMatcher) bool { if s == other { return true } o, ok := other.(*timeUnitMatcher) if !ok { return false } return o.id == s.id && o.unit == s.unit } // TimestampTypeUnit returns a TypeMatcher that will match only // a Timestamp datatype with the specified TimeUnit. func TimestampTypeUnit(unit arrow.TimeUnit) TypeMatcher { return &timeUnitMatcher{arrow.TIMESTAMP, unit} } // Time32TypeUnit returns a TypeMatcher that will match only // a Time32 datatype with the specified TimeUnit. func Time32TypeUnit(unit arrow.TimeUnit) TypeMatcher { return &timeUnitMatcher{arrow.TIME32, unit} } // Time64TypeUnit returns a TypeMatcher that will match only // a Time64 datatype with the specified TimeUnit. func Time64TypeUnit(unit arrow.TimeUnit) TypeMatcher { return &timeUnitMatcher{arrow.TIME64, unit} } // DurationTypeUnit returns a TypeMatcher that will match only // a Duration datatype with the specified TimeUnit. func DurationTypeUnit(unit arrow.TimeUnit) TypeMatcher { return &timeUnitMatcher{arrow.DURATION, unit} } type integerMatcher struct{} func (integerMatcher) String() string { return "integer" } func (integerMatcher) Matches(typ arrow.DataType) bool { return arrow.IsInteger(typ.ID()) } func (integerMatcher) Equals(other TypeMatcher) bool { _, ok := other.(integerMatcher) return ok } type binaryLikeMatcher struct{} func (binaryLikeMatcher) String() string { return "binary-like" } func (binaryLikeMatcher) Matches(typ arrow.DataType) bool { return arrow.IsBinaryLike(typ.ID()) } func (binaryLikeMatcher) Equals(other TypeMatcher) bool { _, ok := other.(binaryLikeMatcher) return ok } type largeBinaryLikeMatcher struct{} func (largeBinaryLikeMatcher) String() string { return "large-binary-like" } func (largeBinaryLikeMatcher) Matches(typ arrow.DataType) bool { return arrow.IsLargeBinaryLike(typ.ID()) } func (largeBinaryLikeMatcher) Equals(other TypeMatcher) bool { _, ok := other.(largeBinaryLikeMatcher) return ok } type fsbLikeMatcher struct{} func (fsbLikeMatcher) String() string { return "fixed-size-binary-like" } func (fsbLikeMatcher) Matches(typ arrow.DataType) bool { return arrow.IsFixedSizeBinary(typ.ID()) } func (fsbLikeMatcher) Equals(other TypeMatcher) bool { _, ok := other.(fsbLikeMatcher) return ok } // Integer returns a TypeMatcher which will match any integral type like int8 or uint16 func Integer() TypeMatcher { return integerMatcher{} } // BinaryLike returns a TypeMatcher that will match Binary or String func BinaryLike() TypeMatcher { return binaryLikeMatcher{} } // LargeBinaryLike returns a TypeMatcher which will match LargeBinary or LargeString func LargeBinaryLike() TypeMatcher { return largeBinaryLikeMatcher{} } // FixedSizeBinaryLike returns a TypeMatcher that will match FixedSizeBinary // or Decimal128/256 func FixedSizeBinaryLike() TypeMatcher { return fsbLikeMatcher{} } type primitiveMatcher struct{} func (primitiveMatcher) String() string { return "primitive" } func (primitiveMatcher) Matches(typ arrow.DataType) bool { return arrow.IsPrimitive(typ.ID()) } func (primitiveMatcher) Equals(other TypeMatcher) bool { _, ok := other.(primitiveMatcher) return ok } // Primitive returns a TypeMatcher that will match any type that arrow.IsPrimitive // returns true for. func Primitive() TypeMatcher { return primitiveMatcher{} } type reeMatcher struct { runEndsMatcher TypeMatcher encodedMatcher TypeMatcher } func (r reeMatcher) Matches(typ arrow.DataType) bool { if typ.ID() != arrow.RUN_END_ENCODED { return false } dt := typ.(*arrow.RunEndEncodedType) return r.runEndsMatcher.Matches(dt.RunEnds()) && r.encodedMatcher.Matches(dt.Encoded()) } func (r reeMatcher) Equals(other TypeMatcher) bool { o, ok := other.(reeMatcher) if !ok { return false } return r.runEndsMatcher.Equals(o.runEndsMatcher) && r.encodedMatcher.Equals(o.encodedMatcher) } func (r reeMatcher) String() string { return "run_end_encoded(run_ends=" + r.runEndsMatcher.String() + ", values=" + r.encodedMatcher.String() + ")" } // RunEndEncoded returns a matcher which matches a RunEndEncoded // type whose encoded type is matched by the passed in matcher. func RunEndEncoded(runEndsMatcher, encodedMatcher TypeMatcher) TypeMatcher { return reeMatcher{ runEndsMatcher: runEndsMatcher, encodedMatcher: encodedMatcher} } // InputKind is an enum representing the type of Input matching // that will be done. Either accepting any type, an exact specific type // or using a TypeMatcher. type InputKind int8 const ( InputAny InputKind = iota InputExact InputUseMatcher ) // InputType is used for type checking arguments passed to a kernel // and stored within a KernelSignature. The type-checking rule can // be supplied either with an exact DataType instance or a custom // TypeMatcher. type InputType struct { Kind InputKind Type arrow.DataType Matcher TypeMatcher } func NewExactInput(dt arrow.DataType) InputType { return InputType{Kind: InputExact, Type: dt} } func NewMatchedInput(match TypeMatcher) InputType { return InputType{Kind: InputUseMatcher, Matcher: match} } func NewIDInput(id arrow.Type) InputType { return NewMatchedInput(SameTypeID(id)) } func (it InputType) MatchID() arrow.Type { switch it.Kind { case InputExact: return it.Type.ID() case InputUseMatcher: if idMatch, ok := it.Matcher.(*sameTypeIDMatcher); ok { return idMatch.accepted } } debug.Assert(false, "MatchID called on non-id matching InputType") return -1 } func (it InputType) String() string { switch it.Kind { case InputAny: return "any" case InputUseMatcher: return it.Matcher.String() case InputExact: return it.Type.String() } return "" } func (it *InputType) Equals(other *InputType) bool { if it == other { return true } if it.Kind != other.Kind { return false } switch it.Kind { case InputAny: return true case InputExact: return arrow.TypeEqual(it.Type, other.Type) case InputUseMatcher: return it.Matcher.Equals(other.Matcher) default: return false } } func (it InputType) Hash() uint64 { var h maphash.Hash h.SetSeed(hashSeed) result := HashCombine(h.Sum64(), uint64(it.Kind)) switch it.Kind { case InputExact: result = HashCombine(result, arrow.HashType(hashSeed, it.Type)) } return result } func (it InputType) Matches(dt arrow.DataType) bool { switch it.Kind { case InputExact: return arrow.TypeEqual(it.Type, dt) case InputUseMatcher: return it.Matcher.Matches(dt) case InputAny: return true default: debug.Assert(false, "invalid InputKind") return true } } // ResolveKind defines the way that a particular OutputType resolves // its type. Either it has a fixed type to resolve to or it contains // a Resolver which will compute the resolved type based on // the input types. type ResolveKind int8 const ( ResolveFixed ResolveKind = iota ResolveComputed ) // TypeResolver is simply a function that takes a KernelCtx and a list of input types // and returns the resolved type or an error. type TypeResolver = func(*KernelCtx, []arrow.DataType) (arrow.DataType, error) type OutputType struct { Kind ResolveKind Type arrow.DataType Resolver TypeResolver } func NewOutputType(dt arrow.DataType) OutputType { return OutputType{Kind: ResolveFixed, Type: dt} } func NewComputedOutputType(resolver TypeResolver) OutputType { return OutputType{Kind: ResolveComputed, Resolver: resolver} } func (o OutputType) String() string { if o.Kind == ResolveFixed { return o.Type.String() } return "computed" } func (o OutputType) Resolve(ctx *KernelCtx, types []arrow.DataType) (arrow.DataType, error) { switch o.Kind { case ResolveFixed: return o.Type, nil } return o.Resolver(ctx, types) } // NullHandling is an enum representing how a particular Kernel // wants the executor to handle nulls. type NullHandling int8 const ( // Compute the output validity bitmap by intersection the validity // bitmaps of the arguments using bitwise-and operations. This means // that values in the output are valid/non-null only if the corresponding // values in all input arguments were valid/non-null. Kernels generally // do not have to touch the bitmap afterwards, but a kernel's exec function // is permitted to alter the bitmap after the null intersection is computed // if necessary. NullIntersection NullHandling = iota // Kernel expects a pre-allocated buffer to write the result bitmap // into. NullComputedPrealloc // Kernel will allocate and set the validity bitmap of the output NullComputedNoPrealloc // kernel output is never null and a validity bitmap doesn't need to // be allocated NullNoOutput ) // MemAlloc is the preference for preallocating memory of fixed-width // type outputs during kernel execution. type MemAlloc int8 const ( // For data types that support pre-allocation (fixed-width), the // kernel expects to be provided a pre-allocated buffer to write into. // Non-fixed-width types must always allocate their own buffers. // The allocation is made for the same length as the execution batch, // so vector kernels yielding differently sized outputs should not // use this. // // It is valid for the data to not be preallocated but the validity // bitmap is (or is computed using intersection). // // For variable-size output types like Binary or String, or for nested // types, this option has no effect. MemPrealloc MemAlloc = iota // The kernel is responsible for allocating its own data buffer // for fixed-width output types. MemNoPrealloc ) type KernelState any // KernelInitArgs are the arguments required to initialize an Kernel's // state using the input types and any options. type KernelInitArgs struct { Kernel Kernel Inputs []arrow.DataType // Options are opaque and specific to the Kernel being initialized, // may be nil if the kernel doesn't require options. Options any } // KernelInitFn is any function that receives a KernelCtx and initialization // arguments and returns the initialized state or an error. type KernelInitFn = func(*KernelCtx, KernelInitArgs) (KernelState, error) // KernelSignature holds the input and output types for a kernel. // // Variable argument functions with a minimum of N arguments should pass // up to N input types to be used to validate for invocation. The first // N-1 types will be matched against the first N-1 arguments and the last // type will be matched against the remaining arguments. type KernelSignature struct { InputTypes []InputType OutType OutputType IsVarArgs bool // store the hashcode after it is computed so we don't // need to recompute it hashCode uint64 } func (k KernelSignature) String() string { var b strings.Builder if k.IsVarArgs { b.WriteString("varargs[") } else { b.WriteByte('(') } for i, t := range k.InputTypes { if i != 0 { b.WriteString(", ") } b.WriteString(t.String()) } if k.IsVarArgs { b.WriteString("*]") } else { b.WriteByte(')') } b.WriteString(" -> ") b.WriteString(k.OutType.String()) return b.String() } func (k KernelSignature) Equals(other KernelSignature) bool { if k.IsVarArgs != other.IsVarArgs { return false } return slices.EqualFunc(k.InputTypes, other.InputTypes, func(e1, e2 InputType) bool { return e1.Equals(&e2) }) } func (k *KernelSignature) Hash() uint64 { if k.hashCode != 0 { return k.hashCode } var h maphash.Hash h.SetSeed(hashSeed) result := h.Sum64() for _, typ := range k.InputTypes { result = HashCombine(result, typ.Hash()) } k.hashCode = result return result } func (k KernelSignature) MatchesInputs(types []arrow.DataType) bool { switch k.IsVarArgs { case true: // check that it has enough to match at least the non-vararg types if len(types) < (len(k.InputTypes) - 1) { return false } for i, t := range types { if !k.InputTypes[Min(i, len(k.InputTypes)-1)].Matches(t) { return false } } case false: if len(types) != len(k.InputTypes) { return false } for i, t := range types { if !k.InputTypes[i].Matches(t) { return false } } } return true } // ArrayKernelExec is an alias definition for a kernel's execution function. // // This is used for both stateless and stateful kernels. If a kernel // depends on some execution state, it can be accessed from the KernelCtx // object, which also contains the context.Context object which can be // used for shortcircuiting by checking context.Done / context.Err. // This allows kernels to control handling timeouts or cancellation of // computation. type ArrayKernelExec = func(*KernelCtx, *ExecSpan, *ExecResult) error type kernel struct { Init KernelInitFn Signature *KernelSignature Data KernelState Parallelizable bool } func (k kernel) GetInitFn() KernelInitFn { return k.Init } func (k kernel) GetSig() *KernelSignature { return k.Signature } // A ScalarKernel is the kernel implementation for a Scalar Function. // In addition to the members found in the base Kernel, it contains // the null handling and memory pre-allocation preferences. type ScalarKernel struct { kernel ExecFn ArrayKernelExec CanWriteIntoSlices bool NullHandling NullHandling MemAlloc MemAlloc } // NewScalarKernel constructs a new kernel for scalar execution, constructing // a KernelSignature with the provided input types and output type, and using // the passed in execution implementation and initialization function. func NewScalarKernel(in []InputType, out OutputType, exec ArrayKernelExec, init KernelInitFn) ScalarKernel { return NewScalarKernelWithSig(&KernelSignature{ InputTypes: in, OutType: out, }, exec, init) } // NewScalarKernelWithSig is a convenience when you already have a signature // to use for constructing a kernel. It's equivalent to passing the components // of the signature (input and output types) to NewScalarKernel. func NewScalarKernelWithSig(sig *KernelSignature, exec ArrayKernelExec, init KernelInitFn) ScalarKernel { return ScalarKernel{ kernel: kernel{Signature: sig, Init: init, Parallelizable: true}, ExecFn: exec, CanWriteIntoSlices: true, NullHandling: NullIntersection, MemAlloc: MemPrealloc, } } func (s *ScalarKernel) Exec(ctx *KernelCtx, sp *ExecSpan, out *ExecResult) error { return s.ExecFn(ctx, sp, out) } func (s ScalarKernel) GetNullHandling() NullHandling { return s.NullHandling } func (s ScalarKernel) GetMemAlloc() MemAlloc { return s.MemAlloc } func (s ScalarKernel) CanFillSlices() bool { return s.CanWriteIntoSlices } // ChunkedExec is the signature for executing a stateful vector kernel // against a ChunkedArray input. It is optional type ChunkedExec func(*KernelCtx, []*arrow.Chunked, *ExecResult) ([]*ExecResult, error) // FinalizeFunc is an optional finalizer function for any postprocessing // that may need to be done on data before returning it type FinalizeFunc func(*KernelCtx, []*ArraySpan) ([]*ArraySpan, error) // VectorKernel is a structure for implementations of vector functions. // It can optionally contain a finalizer function, the null handling // and memory pre-allocation preferences (different defaults from // scalar kernels when using NewVectorKernel), and other execution related // options. type VectorKernel struct { kernel ExecFn ArrayKernelExec ExecChunked ChunkedExec Finalize FinalizeFunc NullHandling NullHandling MemAlloc MemAlloc CanWriteIntoSlices bool CanExecuteChunkWise bool OutputChunked bool } // NewVectorKernel constructs a new kernel for execution of vector functions, // which take into account more than just the individual scalar values // of its input. Output of a vector kernel may be a different length // than its inputs. func NewVectorKernel(inTypes []InputType, outType OutputType, exec ArrayKernelExec, init KernelInitFn) VectorKernel { return NewVectorKernelWithSig(&KernelSignature{ InputTypes: inTypes, OutType: outType}, exec, init) } // NewVectorKernelWithSig is a convenience function for creating a kernel // when you already have a signature constructed. func NewVectorKernelWithSig(sig *KernelSignature, exec ArrayKernelExec, init KernelInitFn) VectorKernel { return VectorKernel{ kernel: kernel{Signature: sig, Init: init, Parallelizable: true}, ExecFn: exec, CanWriteIntoSlices: true, CanExecuteChunkWise: true, OutputChunked: true, NullHandling: NullComputedNoPrealloc, MemAlloc: MemNoPrealloc, } } func (s *VectorKernel) Exec(ctx *KernelCtx, sp *ExecSpan, out *ExecResult) error { return s.ExecFn(ctx, sp, out) } func (s VectorKernel) GetNullHandling() NullHandling { return s.NullHandling } func (s VectorKernel) GetMemAlloc() MemAlloc { return s.MemAlloc } func (s VectorKernel) CanFillSlices() bool { return s.CanWriteIntoSlices } arrow-go-18.2.0/arrow/compute/exec/kernel_test.go000066400000000000000000000522441476434502500217600ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.18 package exec_test import ( "fmt" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/compute" "github.com/apache/arrow-go/v18/arrow/compute/exec" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/arrow/scalar" "github.com/stretchr/testify/assert" ) func TestTypeMatcherSameTypeID(t *testing.T) { matcher := exec.SameTypeID(arrow.DECIMAL128) assert.True(t, matcher.Matches(&arrow.Decimal128Type{Precision: 12, Scale: 2})) assert.False(t, matcher.Matches(arrow.PrimitiveTypes.Int8)) assert.Equal(t, "Type::DECIMAL128", matcher.String()) assert.True(t, matcher.Equals(matcher)) assert.True(t, matcher.Equals(exec.SameTypeID(arrow.DECIMAL))) assert.False(t, matcher.Equals(exec.SameTypeID(arrow.TIMESTAMP))) assert.False(t, matcher.Equals(exec.Time32TypeUnit(arrow.Microsecond))) } func TestTypeMatcherTimestampTypeUnit(t *testing.T) { matcher := exec.TimestampTypeUnit(arrow.Millisecond) matcher2 := exec.Time32TypeUnit(arrow.Millisecond) matcher3 := exec.Time64TypeUnit(arrow.Microsecond) matcher4 := exec.DurationTypeUnit(arrow.Microsecond) assert.True(t, matcher.Matches(arrow.FixedWidthTypes.Timestamp_ms)) assert.True(t, matcher.Matches(&arrow.TimestampType{Unit: arrow.Millisecond, TimeZone: "utc"})) assert.False(t, matcher.Matches(arrow.FixedWidthTypes.Timestamp_s)) assert.False(t, matcher.Matches(arrow.FixedWidthTypes.Time32ms)) assert.True(t, matcher2.Matches(arrow.FixedWidthTypes.Time32ms)) assert.True(t, matcher3.Matches(arrow.FixedWidthTypes.Time64us)) assert.False(t, matcher3.Matches(arrow.FixedWidthTypes.Time64ns)) assert.True(t, matcher4.Matches(arrow.FixedWidthTypes.Duration_us)) assert.False(t, matcher4.Matches(arrow.FixedWidthTypes.Duration_ms)) // check String() representation assert.Equal(t, "timestamp(s)", exec.TimestampTypeUnit(arrow.Second).String()) assert.Equal(t, "timestamp(ms)", exec.TimestampTypeUnit(arrow.Millisecond).String()) assert.Equal(t, "timestamp(us)", exec.TimestampTypeUnit(arrow.Microsecond).String()) assert.Equal(t, "timestamp(ns)", exec.TimestampTypeUnit(arrow.Nanosecond).String()) // equals implementation assert.True(t, matcher.Equals(matcher)) assert.True(t, matcher.Equals(exec.TimestampTypeUnit(arrow.Millisecond))) assert.False(t, matcher.Equals(exec.TimestampTypeUnit(arrow.Microsecond))) assert.False(t, matcher.Equals(exec.Time32TypeUnit(arrow.Millisecond))) assert.False(t, matcher3.Equals(matcher2)) assert.False(t, matcher4.Equals(matcher3)) assert.True(t, matcher4.Equals(exec.DurationTypeUnit(arrow.Microsecond))) assert.False(t, matcher.Equals(exec.SameTypeID(arrow.TIMESTAMP))) } func TestIntegerMatcher(t *testing.T) { match := exec.Integer() assert.Equal(t, "integer", match.String()) assert.True(t, match.Matches(arrow.PrimitiveTypes.Int8)) assert.True(t, match.Matches(arrow.PrimitiveTypes.Uint64)) assert.True(t, match.Equals(exec.Integer())) assert.False(t, match.Equals(exec.BinaryLike())) } func TestBinaryLikeMatcher(t *testing.T) { match := exec.BinaryLike() assert.Equal(t, "binary-like", match.String()) assert.True(t, match.Matches(arrow.BinaryTypes.String)) assert.True(t, match.Matches(arrow.BinaryTypes.Binary)) assert.False(t, match.Matches(arrow.BinaryTypes.LargeString)) assert.False(t, match.Matches(arrow.BinaryTypes.LargeBinary)) assert.False(t, match.Equals(exec.LargeBinaryLike())) assert.True(t, match.Equals(exec.BinaryLike())) } func TestLargeBinaryLikeMatcher(t *testing.T) { match := exec.LargeBinaryLike() assert.Equal(t, "large-binary-like", match.String()) assert.False(t, match.Matches(arrow.BinaryTypes.String)) assert.False(t, match.Matches(arrow.BinaryTypes.Binary)) assert.True(t, match.Matches(arrow.BinaryTypes.LargeString)) assert.True(t, match.Matches(arrow.BinaryTypes.LargeBinary)) assert.True(t, match.Equals(exec.LargeBinaryLike())) assert.False(t, match.Equals(exec.BinaryLike())) } func TestFixedSizeBinaryMatcher(t *testing.T) { match := exec.FixedSizeBinaryLike() assert.Equal(t, "fixed-size-binary-like", match.String()) assert.False(t, match.Matches(arrow.BinaryTypes.String)) assert.True(t, match.Matches(&arrow.Decimal128Type{Precision: 12, Scale: 5})) assert.True(t, match.Matches(&arrow.Decimal256Type{Precision: 12, Scale: 10})) assert.True(t, match.Matches(&arrow.FixedSizeBinaryType{})) assert.False(t, match.Equals(exec.LargeBinaryLike())) assert.True(t, match.Equals(exec.FixedSizeBinaryLike())) } func TestPrimitiveMatcher(t *testing.T) { match := exec.Primitive() assert.Equal(t, "primitive", match.String()) assert.True(t, match.Equals(exec.Primitive())) types := []arrow.DataType{ arrow.FixedWidthTypes.Boolean, arrow.PrimitiveTypes.Uint8, arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Uint32, arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Uint64, arrow.PrimitiveTypes.Int64, arrow.FixedWidthTypes.Float16, arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Float64, arrow.FixedWidthTypes.Date32, arrow.FixedWidthTypes.Date64, arrow.FixedWidthTypes.Time32ms, arrow.FixedWidthTypes.Time64ns, arrow.FixedWidthTypes.Timestamp_ms, arrow.FixedWidthTypes.Duration_ms, arrow.FixedWidthTypes.MonthInterval, arrow.FixedWidthTypes.DayTimeInterval, arrow.FixedWidthTypes.MonthDayNanoInterval, } for _, typ := range types { assert.True(t, match.Matches(typ)) } assert.False(t, match.Matches(arrow.Null)) } func TestREEMatcher(t *testing.T) { tests := []struct { runEnds exec.TypeMatcher enc exec.TypeMatcher matchRunEnds arrow.DataType nomatchRunEnds arrow.DataType matchEnc arrow.DataType nomatchEnc arrow.DataType }{ {exec.Integer(), exec.Integer(), arrow.PrimitiveTypes.Int16, arrow.FixedWidthTypes.Float16, arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String}, {exec.SameTypeID(arrow.INT32), exec.BinaryLike(), arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int64, arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32}, {exec.SameTypeID(arrow.INT64), exec.SameTypeID(arrow.STRUCT), arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Int32, arrow.StructOf(arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int16}), arrow.PrimitiveTypes.Int8}, } for _, tt := range tests { t.Run(tt.enc.String(), func(t *testing.T) { matcher := exec.RunEndEncoded(tt.runEnds, tt.enc) assert.False(t, matcher.Matches(tt.matchEnc)) assert.True(t, matcher.Matches(arrow.RunEndEncodedOf(tt.matchRunEnds, tt.matchEnc))) assert.False(t, matcher.Matches(arrow.RunEndEncodedOf(tt.matchRunEnds, tt.nomatchEnc))) assert.False(t, matcher.Matches(arrow.RunEndEncodedOf(tt.nomatchRunEnds, tt.matchEnc))) assert.False(t, matcher.Matches(arrow.RunEndEncodedOf(tt.nomatchRunEnds, tt.nomatchEnc))) assert.Equal(t, "run_end_encoded(run_ends="+tt.runEnds.String()+", values="+tt.enc.String()+")", matcher.String()) assert.True(t, matcher.Equals(exec.RunEndEncoded(tt.runEnds, tt.enc))) assert.False(t, matcher.Equals(exec.Primitive())) assert.False(t, matcher.Equals(exec.RunEndEncoded(exec.SameTypeID(tt.nomatchRunEnds.ID()), exec.SameTypeID(tt.nomatchEnc.ID())))) assert.False(t, matcher.Equals(exec.RunEndEncoded(exec.SameTypeID(tt.matchRunEnds.ID()), exec.SameTypeID(tt.nomatchEnc.ID())))) assert.False(t, matcher.Equals(exec.RunEndEncoded(exec.SameTypeID(tt.nomatchRunEnds.ID()), exec.SameTypeID(tt.matchEnc.ID())))) }) } } func TestInputTypeAnyType(t *testing.T) { var ty exec.InputType assert.Equal(t, exec.InputAny, ty.Kind) } func TestInputType(t *testing.T) { ty1 := exec.NewExactInput(arrow.PrimitiveTypes.Int8) assert.Equal(t, exec.InputExact, ty1.Kind) assert.True(t, arrow.TypeEqual(arrow.PrimitiveTypes.Int8, ty1.Type)) assert.Equal(t, "int8", ty1.String()) ty2 := exec.NewIDInput(arrow.DECIMAL) assert.Equal(t, exec.InputUseMatcher, ty2.Kind) assert.Equal(t, "Type::DECIMAL128", ty2.String()) assert.True(t, ty2.Matcher.Matches(&arrow.Decimal128Type{Precision: 12, Scale: 2})) assert.False(t, ty2.Matcher.Matches(arrow.PrimitiveTypes.Int16)) ty3 := exec.NewMatchedInput(exec.TimestampTypeUnit(arrow.Microsecond)) assert.Equal(t, "timestamp(us)", ty3.String()) var ty4 exec.InputType assert.Equal(t, "any", ty4.String()) // InputAny matches anything assert.True(t, ty4.Matches((arrow.DataType)(nil))) } func TestInputTypeEquals(t *testing.T) { t1 := exec.NewExactInput(arrow.PrimitiveTypes.Int8) t2 := exec.NewExactInput(arrow.PrimitiveTypes.Int8) t3 := exec.NewExactInput(arrow.PrimitiveTypes.Int32) t5 := exec.NewIDInput(arrow.DECIMAL) t6 := exec.NewIDInput(arrow.DECIMAL) assert.True(t, t1.Equals(&t2)) assert.False(t, t1.Equals(&t3)) assert.False(t, t1.Equals(&t5)) assert.True(t, t5.Equals(&t5)) assert.True(t, t5.Equals(&t6)) var ty exec.InputType assert.True(t, ty.Equals(&exec.InputType{Kind: exec.InputAny})) // for now, an ID matcher for arrow.INT32 and a ExactInput for // arrow.PrimitiveTypes.Int32 are treated as being different. // this could be made equivalent later if desireable // check that field metadata is excluded from equality checks t7 := exec.NewExactInput(arrow.ListOfField( arrow.Field{Name: "item", Type: arrow.BinaryTypes.String, Nullable: true, Metadata: arrow.NewMetadata([]string{"foo"}, []string{"bar"})})) t8 := exec.NewExactInput(arrow.ListOf(arrow.BinaryTypes.String)) assert.True(t, t7.Equals(&t8)) } func TestInputTypeHash(t *testing.T) { var ( t0 exec.InputType t1 = exec.NewExactInput(arrow.PrimitiveTypes.Int8) t2 = exec.NewIDInput(arrow.DECIMAL) ) // these checks try to determine first of all whether hash // always returns the same value, and whether the elements // of the type are all incorporated into the hash assert.Equal(t, t0.Hash(), t0.Hash()) assert.Equal(t, t1.Hash(), t1.Hash()) assert.Equal(t, t2.Hash(), t2.Hash()) assert.NotEqual(t, t0.Hash(), t1.Hash()) assert.NotEqual(t, t0.Hash(), t2.Hash()) assert.NotEqual(t, t1.Hash(), t2.Hash()) } func TestInputTypeMatches(t *testing.T) { in1 := exec.NewExactInput(arrow.PrimitiveTypes.Int8) assert.True(t, in1.Matches(arrow.PrimitiveTypes.Int8)) assert.False(t, in1.Matches(arrow.PrimitiveTypes.Int16)) in2 := exec.NewIDInput(arrow.DECIMAL) assert.True(t, in2.Matches(&arrow.Decimal128Type{Precision: 12, Scale: 2})) ty2 := &arrow.Decimal128Type{Precision: 12, Scale: 2} ty3 := arrow.PrimitiveTypes.Float64 mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) arr2 := array.MakeArrayOfNull(mem, ty2, 1) arr3 := array.MakeArrayOfNull(mem, ty3, 1) defer arr2.Release() defer arr3.Release() scalar2, err := scalar.GetScalar(arr2, 0) assert.NoError(t, err) datumArr := compute.NewDatum(arr2) defer datumArr.Release() datumScalar := compute.NewDatum(scalar2) defer datumScalar.Release() assert.False(t, in2.Matches(ty3)) assert.False(t, in2.Matches(arr3.DataType())) } func TestOutputType(t *testing.T) { ty1 := exec.NewOutputType(arrow.PrimitiveTypes.Int8) assert.Equal(t, exec.ResolveFixed, ty1.Kind) assert.True(t, arrow.TypeEqual(arrow.PrimitiveTypes.Int8, ty1.Type)) dummyResolver := func(_ *exec.KernelCtx, args []arrow.DataType) (arrow.DataType, error) { return arrow.PrimitiveTypes.Int32, nil } ty2 := exec.NewComputedOutputType(dummyResolver) assert.Equal(t, exec.ResolveComputed, ty2.Kind) outType2, err := ty2.Resolve(nil, nil) assert.NoError(t, err) assert.Same(t, arrow.PrimitiveTypes.Int32, outType2) ty3 := ty1 assert.Equal(t, exec.ResolveFixed, ty3.Kind) assert.True(t, arrow.TypeEqual(ty1.Type, ty3.Type)) ty4 := ty2 assert.Equal(t, exec.ResolveComputed, ty4.Kind) outType4, err := ty4.Resolve(nil, nil) assert.NoError(t, err) assert.Same(t, arrow.PrimitiveTypes.Int32, outType4) assert.Equal(t, "int8", ty3.String()) assert.Equal(t, "computed", ty4.String()) } func TestOutputTypeResolve(t *testing.T) { ty1 := exec.NewOutputType(arrow.PrimitiveTypes.Int32) result, err := ty1.Resolve(nil, nil) assert.NoError(t, err) assert.Same(t, arrow.PrimitiveTypes.Int32, result) result, err = ty1.Resolve(nil, []arrow.DataType{arrow.PrimitiveTypes.Int8}) assert.NoError(t, err) assert.Same(t, arrow.PrimitiveTypes.Int32, result) result, err = ty1.Resolve(nil, []arrow.DataType{arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Int8}) assert.NoError(t, err) assert.Same(t, arrow.PrimitiveTypes.Int32, result) resolver := func(_ *exec.KernelCtx, args []arrow.DataType) (arrow.DataType, error) { return args[0], nil } ty2 := exec.NewComputedOutputType(resolver) result, err = ty2.Resolve(nil, []arrow.DataType{arrow.BinaryTypes.String}) assert.NoError(t, err) assert.Same(t, arrow.BinaryTypes.String, result) // type resolver that returns an error ty3 := exec.NewComputedOutputType(func(_ *exec.KernelCtx, dt []arrow.DataType) (arrow.DataType, error) { // checking the value types versus the function arity should be validated // elsewhere. this is just for illustration purposes if len(dt) == 0 { return nil, fmt.Errorf("%w: need at least one argument", arrow.ErrInvalid) } return dt[0], nil }) _, err = ty3.Resolve(nil, []arrow.DataType{}) assert.ErrorIs(t, err, arrow.ErrInvalid) // resolver returns a fixed value ty4 := exec.NewComputedOutputType(func(*exec.KernelCtx, []arrow.DataType) (arrow.DataType, error) { return arrow.PrimitiveTypes.Int32, nil }) result, err = ty4.Resolve(nil, []arrow.DataType{arrow.PrimitiveTypes.Int8}) assert.NoError(t, err) assert.Same(t, arrow.PrimitiveTypes.Int32, result) result, err = ty4.Resolve(nil, []arrow.DataType{}) assert.NoError(t, err) assert.Same(t, arrow.PrimitiveTypes.Int32, result) } func TestKernelSignatureEquals(t *testing.T) { sig1 := exec.KernelSignature{ InputTypes: []exec.InputType{}, OutType: exec.NewOutputType(arrow.BinaryTypes.String)} sig1Copy := exec.KernelSignature{ InputTypes: []exec.InputType{}, OutType: exec.NewOutputType(arrow.BinaryTypes.String)} sig2 := exec.KernelSignature{ InputTypes: []exec.InputType{ exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, OutType: exec.NewOutputType(arrow.BinaryTypes.String), } // output type doesn't matter (for now) sig3 := exec.KernelSignature{ InputTypes: []exec.InputType{ exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, OutType: exec.NewOutputType(arrow.PrimitiveTypes.Int32), } sig4 := exec.KernelSignature{ InputTypes: []exec.InputType{ exec.NewExactInput(arrow.PrimitiveTypes.Int8), exec.NewExactInput(arrow.PrimitiveTypes.Int16), }, OutType: exec.NewOutputType(arrow.BinaryTypes.String), } sig4Copy := exec.KernelSignature{ InputTypes: []exec.InputType{ exec.NewExactInput(arrow.PrimitiveTypes.Int8), exec.NewExactInput(arrow.PrimitiveTypes.Int16), }, OutType: exec.NewOutputType(arrow.BinaryTypes.String), } sig5 := exec.KernelSignature{ InputTypes: []exec.InputType{ exec.NewExactInput(arrow.PrimitiveTypes.Int8), exec.NewExactInput(arrow.PrimitiveTypes.Int16), exec.NewExactInput(arrow.PrimitiveTypes.Int32), }, OutType: exec.NewOutputType(arrow.BinaryTypes.String), } assert.True(t, sig1.Equals(sig1)) assert.True(t, sig2.Equals(sig3)) assert.False(t, sig3.Equals(sig4)) // different sig objects but same sig assert.True(t, sig1.Equals(sig1Copy)) assert.True(t, sig4.Equals(sig4Copy)) // match first 2 args, but not third assert.False(t, sig4.Equals(sig5)) } func TestKernelSignatureVarArgsEqual(t *testing.T) { sig1 := exec.KernelSignature{ InputTypes: []exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, OutType: exec.NewOutputType(arrow.BinaryTypes.String), IsVarArgs: true, } sig2 := exec.KernelSignature{ InputTypes: []exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, OutType: exec.NewOutputType(arrow.BinaryTypes.String), IsVarArgs: true, } sig3 := exec.KernelSignature{ InputTypes: []exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, OutType: exec.NewOutputType(arrow.BinaryTypes.String), } assert.True(t, sig1.Equals(sig2)) assert.False(t, sig2.Equals(sig3)) } func TestKernelSignatureHash(t *testing.T) { sig1 := exec.KernelSignature{ InputTypes: []exec.InputType{}, OutType: exec.NewOutputType(arrow.BinaryTypes.String), } sig2 := exec.KernelSignature{ InputTypes: []exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, OutType: exec.NewOutputType(arrow.BinaryTypes.String), } sig3 := exec.KernelSignature{ InputTypes: []exec.InputType{ exec.NewExactInput(arrow.PrimitiveTypes.Int8), exec.NewExactInput(arrow.PrimitiveTypes.Int32)}, OutType: exec.NewOutputType(arrow.BinaryTypes.String), } assert.Equal(t, sig1.Hash(), sig1.Hash()) assert.Equal(t, sig2.Hash(), sig2.Hash()) assert.NotEqual(t, sig1.Hash(), sig2.Hash()) assert.NotEqual(t, sig2.Hash(), sig3.Hash()) } func TestKernelSignatureMatchesInputs(t *testing.T) { // () -> boolean sig1 := exec.KernelSignature{ OutType: exec.NewOutputType(arrow.FixedWidthTypes.Boolean)} assert.True(t, sig1.MatchesInputs([]arrow.DataType{})) assert.False(t, sig1.MatchesInputs([]arrow.DataType{arrow.PrimitiveTypes.Int8})) // (int8, decimal) -> boolean sig2 := exec.KernelSignature{ InputTypes: []exec.InputType{ exec.NewExactInput(arrow.PrimitiveTypes.Int8), exec.NewIDInput(arrow.DECIMAL)}, OutType: exec.NewOutputType(arrow.FixedWidthTypes.Boolean), } assert.False(t, sig2.MatchesInputs([]arrow.DataType{})) assert.False(t, sig2.MatchesInputs([]arrow.DataType{arrow.PrimitiveTypes.Int8})) assert.True(t, sig2.MatchesInputs([]arrow.DataType{ arrow.PrimitiveTypes.Int8, &arrow.Decimal128Type{Precision: 12, Scale: 2}})) // (int8, int32) -> boolean sig3 := exec.KernelSignature{ InputTypes: []exec.InputType{ exec.NewExactInput(arrow.PrimitiveTypes.Int8), exec.NewExactInput(arrow.PrimitiveTypes.Int32), }, OutType: exec.NewOutputType(arrow.FixedWidthTypes.Boolean), } assert.False(t, sig3.MatchesInputs(nil)) assert.True(t, sig3.MatchesInputs([]arrow.DataType{arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Int32})) assert.False(t, sig3.MatchesInputs([]arrow.DataType{arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Int16})) } func TestKernelSignatureVarArgsMatchesInputs(t *testing.T) { { sig := exec.KernelSignature{ InputTypes: []exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, OutType: exec.NewOutputType(arrow.BinaryTypes.String), IsVarArgs: true, } args := []arrow.DataType{arrow.PrimitiveTypes.Int8} assert.True(t, sig.MatchesInputs(args)) args = append(args, arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Int8) assert.True(t, sig.MatchesInputs(args)) args = append(args, arrow.PrimitiveTypes.Int32) assert.False(t, sig.MatchesInputs(args)) } { sig := exec.KernelSignature{ InputTypes: []exec.InputType{ exec.NewExactInput(arrow.PrimitiveTypes.Int8), exec.NewExactInput(arrow.BinaryTypes.String), }, OutType: exec.NewOutputType(arrow.BinaryTypes.String), IsVarArgs: true, } args := []arrow.DataType{arrow.PrimitiveTypes.Int8} assert.True(t, sig.MatchesInputs(args)) args = append(args, arrow.BinaryTypes.String, arrow.BinaryTypes.String) assert.True(t, sig.MatchesInputs(args)) args = append(args, arrow.PrimitiveTypes.Int32) assert.False(t, sig.MatchesInputs(args)) } } func TestKernelSignatureToString(t *testing.T) { inTypes := []exec.InputType{ exec.NewExactInput(arrow.PrimitiveTypes.Int8), exec.NewIDInput(arrow.DECIMAL), exec.NewExactInput(arrow.BinaryTypes.String), } sig := exec.KernelSignature{ InputTypes: inTypes, OutType: exec.NewOutputType(arrow.BinaryTypes.String), } assert.Equal(t, "(int8, Type::DECIMAL128, utf8) -> utf8", sig.String()) outType := exec.NewComputedOutputType(func(*exec.KernelCtx, []arrow.DataType) (arrow.DataType, error) { return nil, arrow.ErrInvalid }) sig2 := exec.KernelSignature{ InputTypes: []exec.InputType{ exec.NewExactInput(arrow.PrimitiveTypes.Int8), exec.NewIDInput(arrow.DECIMAL)}, OutType: outType, } assert.Equal(t, "(int8, Type::DECIMAL128) -> computed", sig2.String()) } func TestKernelSignatureVarArgsToString(t *testing.T) { sig1 := exec.KernelSignature{ InputTypes: []exec.InputType{ exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, OutType: exec.NewOutputType(arrow.BinaryTypes.String), IsVarArgs: true, } assert.Equal(t, "varargs[int8*] -> utf8", sig1.String()) sig2 := exec.KernelSignature{ InputTypes: []exec.InputType{ exec.NewExactInput(arrow.BinaryTypes.String), exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, OutType: exec.NewOutputType(arrow.BinaryTypes.String), IsVarArgs: true, } assert.Equal(t, "varargs[utf8, int8*] -> utf8", sig2.String()) } arrow-go-18.2.0/arrow/compute/exec/span.go000066400000000000000000000445461476434502500204100ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.18 package exec import ( "sync/atomic" "unsafe" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/arrow/scalar" ) // BufferSpan is a lightweight Buffer holder for ArraySpans that does not // take ownership of the underlying memory.Buffer at all or could be // used to reference raw byte slices instead. type BufferSpan struct { // Buf should be the byte slice representing this buffer, if this is // nil then this bufferspan should be considered empty. Buf []byte // Owner should point to an underlying parent memory.Buffer if this // memory is owned by a different, existing, buffer. Retain is not // called on this buffer, so it must not be released as long as // this BufferSpan refers to it. Owner *memory.Buffer // SelfAlloc tracks whether or not this bufferspan is the only owner // of the Owning memory.Buffer. This happens when preallocating // memory or if a kernel allocates it's own buffer for a result. // In these cases, we have to know so we can properly maintain the // refcount if this is later turned into an ArrayData object. SelfAlloc bool } // SetBuffer sets the given buffer into this BufferSpan and marks // SelfAlloc as false. This should be called when setting a buffer // that is externally owned/created. func (b *BufferSpan) SetBuffer(buf *memory.Buffer) { b.Buf = buf.Bytes() b.Owner = buf b.SelfAlloc = false } // WrapBuffer wraps this bufferspan around a buffer and marks // SelfAlloc as true. This should be called when setting a buffer // that was allocated as part of an execution rather than just // re-using an existing buffer from an input array. func (b *BufferSpan) WrapBuffer(buf *memory.Buffer) { b.Buf = buf.Bytes() b.Owner = buf b.SelfAlloc = true } // ArraySpan is a light-weight, non-owning version of arrow.ArrayData // for more efficient handling with computation and engines. We use // explicit go Arrays to define the buffers and some scratch space // for easily populating and shifting around pointers to memory without // having to worry about and deal with retain/release during calculations. type ArraySpan struct { Type arrow.DataType Len int64 Nulls int64 Offset int64 Buffers [3]BufferSpan // Scratch is a holding spot for things such as // offsets or union type codes when converting from scalars Scratch [2]uint64 Children []ArraySpan } // if an error is encountered, call Release on a preallocated span // to ensure it releases any self-allocated buffers, it will // not call release on buffers it doesn't own (SelfAlloc != true) func (a *ArraySpan) Release() { for _, c := range a.Children { c.Release() } for _, b := range a.Buffers { if b.SelfAlloc { b.Owner.Release() } } } func (a *ArraySpan) MayHaveNulls() bool { return atomic.LoadInt64(&a.Nulls) != 0 && a.Buffers[0].Buf != nil } // UpdateNullCount will count the bits in the null bitmap and update the // number of nulls if the current null count is unknown, otherwise it just // returns the value of a.Nulls func (a *ArraySpan) UpdateNullCount() int64 { curNulls := atomic.LoadInt64(&a.Nulls) if curNulls != array.UnknownNullCount { return curNulls } newNulls := a.Len - int64(bitutil.CountSetBits(a.Buffers[0].Buf, int(a.Offset), int(a.Len))) atomic.StoreInt64(&a.Nulls, newNulls) return newNulls } // Dictionary returns a pointer to the array span for the dictionary which // we will always place as the first (and only) child if it exists. func (a *ArraySpan) Dictionary() *ArraySpan { return &a.Children[0] } // NumBuffers returns the number of expected buffers for this type func (a *ArraySpan) NumBuffers() int { return getNumBuffers(a.Type) } // MakeData generates an arrow.ArrayData object for this ArraySpan, // properly updating the buffer ref count if necessary. func (a *ArraySpan) MakeData() arrow.ArrayData { var bufs [3]*memory.Buffer for i := range bufs { b := a.GetBuffer(i) bufs[i] = b if b != nil && a.Buffers[i].SelfAlloc { // if this buffer is just a pointer to another existing buffer // then we never bumped the refcount for that buffer. // As a result, we won't call release here so that the call // to array.NewData properly updates the ref counts of the buffers. // If instead this buffer was allocated during calculation // (such as during prealloc or by a kernel itself) // then we need to release after we create the ArrayData so that it // maintains the correct refcount of 1, giving the resulting // ArrayData object ownership of this buffer. defer b.Release() } } var ( nulls = int(atomic.LoadInt64(&a.Nulls)) length = int(a.Len) off = int(a.Offset) dt = a.Type children []arrow.ArrayData ) if a.Type.ID() == arrow.NULL { nulls = length } else if len(a.Buffers[0].Buf) == 0 { nulls = 0 } // we use a.Type for the NewData call at the end, so we can // handle extension types by using dt to point to the storage type // and let the proper extension type get set into the ArrayData // object we return. if dt.ID() == arrow.EXTENSION { dt = dt.(arrow.ExtensionType).StorageType() } if dt.ID() == arrow.DICTIONARY { result := array.NewData(a.Type, length, bufs[:a.NumBuffers()], nil, nulls, off) dict := a.Dictionary().MakeData() defer dict.Release() result.SetDictionary(dict) return result } else if dt.ID() == arrow.DENSE_UNION || dt.ID() == arrow.SPARSE_UNION { bufs[0] = nil nulls = 0 } if len(a.Children) > 0 { children = make([]arrow.ArrayData, len(a.Children)) for i, c := range a.Children { d := c.MakeData() defer d.Release() children[i] = d } } return array.NewData(a.Type, length, bufs[:a.NumBuffers()], children, nulls, off) } // MakeArray is a convenience function for calling array.MakeFromData(a.MakeData()) func (a *ArraySpan) MakeArray() arrow.Array { d := a.MakeData() defer d.Release() return array.MakeFromData(d) } // SetSlice updates the offset and length of this ArraySpan to refer to // a specific slice of the underlying buffers. func (a *ArraySpan) SetSlice(off, length int64) { if off == a.Offset && length == a.Len { // don't modify the nulls if the slice is the entire span return } if a.Type.ID() != arrow.NULL { if a.Nulls != 0 { if a.Nulls == a.Len { a.Nulls = length } else { a.Nulls = array.UnknownNullCount } } } else { a.Nulls = length } a.Offset, a.Len = off, length } // GetBuffer returns the buffer for the requested index. If this buffer // is owned by another array/arrayspan the Owning buffer is returned, // otherwise if this slice has no owning buffer, we call NewBufferBytes // to wrap it as a memory.Buffer. Can also return nil if there is no // buffer in this index. func (a *ArraySpan) GetBuffer(idx int) *memory.Buffer { buf := a.Buffers[idx] switch { case buf.Owner != nil: return buf.Owner case buf.Buf != nil: return memory.NewBufferBytes(buf.Buf) } return nil } // convenience function to resize the children slice if necessary, // or just shrink the slice without re-allocating if there's enough // capacity already. func (a *ArraySpan) resizeChildren(i int) { if cap(a.Children) >= i { a.Children = a.Children[:i] } else { a.Children = make([]ArraySpan, i) } } // FillFromScalar populates this ArraySpan as if it were a 1 length array // with the single value equal to the passed in Scalar. func (a *ArraySpan) FillFromScalar(val scalar.Scalar) { var ( trueBit byte = 0x01 falseBit byte = 0x00 ) a.Type = val.DataType() a.Len = 1 typeID := a.Type.ID() if val.IsValid() { a.Nulls = 0 } else { a.Nulls = 1 } if !arrow.IsUnion(typeID) && typeID != arrow.NULL { if val.IsValid() { a.Buffers[0].Buf = []byte{trueBit} } else { a.Buffers[0].Buf = []byte{falseBit} } a.Buffers[0].Owner = nil a.Buffers[0].SelfAlloc = false } switch { case typeID == arrow.BOOL: if val.(*scalar.Boolean).Value { a.Buffers[1].Buf = []byte{trueBit} } else { a.Buffers[1].Buf = []byte{falseBit} } a.Buffers[1].Owner = nil a.Buffers[1].SelfAlloc = false case arrow.IsPrimitive(typeID) || arrow.IsDecimal(typeID): sc := val.(scalar.PrimitiveScalar) a.Buffers[1].Buf = sc.Data() a.Buffers[1].Owner = nil a.Buffers[1].SelfAlloc = false case typeID == arrow.DICTIONARY: sc := val.(scalar.PrimitiveScalar) a.Buffers[1].Buf = sc.Data() a.Buffers[1].Owner = nil a.Buffers[1].SelfAlloc = false a.resizeChildren(1) a.Children[0].SetMembers(val.(*scalar.Dictionary).Value.Dict.Data()) case arrow.IsBaseBinary(typeID): sc := val.(scalar.BinaryScalar) a.Buffers[1].Buf = arrow.Uint64Traits.CastToBytes(a.Scratch[:]) a.Buffers[1].Owner = nil a.Buffers[1].SelfAlloc = false var dataBuffer []byte if sc.IsValid() { dataBuffer = sc.Data() a.Buffers[2].Owner = sc.Buffer() a.Buffers[2].SelfAlloc = false } if arrow.IsBinaryLike(typeID) { setOffsetsForScalar(a, unsafe.Slice((*int32)(unsafe.Pointer(&a.Scratch[0])), 2), int64(len(dataBuffer)), 1) } else { // large_binary_like setOffsetsForScalar(a, unsafe.Slice((*int64)(unsafe.Pointer(&a.Scratch[0])), 2), int64(len(dataBuffer)), 1) } a.Buffers[2].Buf = dataBuffer case typeID == arrow.FIXED_SIZE_BINARY: sc := val.(scalar.BinaryScalar) if !sc.IsValid() { a.Buffers[1].Buf = make([]byte, sc.DataType().(*arrow.FixedSizeBinaryType).ByteWidth) a.Buffers[1].Owner = nil a.Buffers[1].SelfAlloc = false break } a.Buffers[1].Buf = sc.Data() a.Buffers[1].Owner = sc.Buffer() a.Buffers[1].SelfAlloc = false case arrow.IsListLike(typeID): sc := val.(scalar.ListScalar) valueLen := 0 a.resizeChildren(1) if sc.GetList() != nil { a.Children[0].SetMembers(sc.GetList().Data()) valueLen = sc.GetList().Len() } else { // even when the value is null, we must populate // child data to yield a valid array. ugh FillZeroLength(sc.DataType().(arrow.NestedType).Fields()[0].Type, &a.Children[0]) } switch typeID { case arrow.LIST, arrow.MAP: setOffsetsForScalar(a, unsafe.Slice((*int32)(unsafe.Pointer(&a.Scratch[0])), 2), int64(valueLen), 1) case arrow.LARGE_LIST: setOffsetsForScalar(a, unsafe.Slice((*int64)(unsafe.Pointer(&a.Scratch[0])), 2), int64(valueLen), 1) default: // fixed size list has no second buffer a.Buffers[1].Buf, a.Buffers[1].Owner = nil, nil a.Buffers[1].SelfAlloc = false } case typeID == arrow.STRUCT: sc := val.(*scalar.Struct) a.Buffers[1].Buf = nil a.Buffers[1].Owner = nil a.Buffers[1].SelfAlloc = false a.resizeChildren(len(sc.Value)) for i, v := range sc.Value { a.Children[i].FillFromScalar(v) } case arrow.IsUnion(typeID): // first buffer is kept null since unions have no validity vector a.Buffers[0].Buf, a.Buffers[0].Owner = nil, nil a.Buffers[0].SelfAlloc = false a.Buffers[1].Buf = arrow.Uint64Traits.CastToBytes(a.Scratch[:])[:1] a.Buffers[1].Owner = nil a.Buffers[1].SelfAlloc = false codes := unsafe.Slice((*arrow.UnionTypeCode)(unsafe.Pointer(&a.Buffers[1].Buf[0])), 1) a.resizeChildren(len(a.Type.(arrow.UnionType).Fields())) switch sc := val.(type) { case *scalar.DenseUnion: codes[0] = sc.TypeCode // has offset, start 4 bytes in so it's aligned to the 32-bit boundaries off := unsafe.Slice((*int32)(unsafe.Add(unsafe.Pointer(&a.Scratch[0]), arrow.Int32SizeBytes)), 2) setOffsetsForScalar(a, off, 1, 2) // we can't "see" the other arrays in the union, but we put the "active" // union array in the right place and fill zero-length arrays for // the others. childIDS := a.Type.(arrow.UnionType).ChildIDs() for i, f := range a.Type.(arrow.UnionType).Fields() { if i == childIDS[sc.TypeCode] { a.Children[i].FillFromScalar(sc.Value) } else { FillZeroLength(f.Type, &a.Children[i]) } } case *scalar.SparseUnion: codes[0] = sc.TypeCode // sparse union scalars have a full complement of child values // even though only one of them is relevant, so we just fill them // in here for i, v := range sc.Value { a.Children[i].FillFromScalar(v) } } case typeID == arrow.EXTENSION: // pass through storage sc := val.(*scalar.Extension) a.FillFromScalar(sc.Value) // restore the extension type a.Type = val.DataType() case typeID == arrow.NULL: for i := range a.Buffers { a.Buffers[i].Buf = nil a.Buffers[i].Owner = nil a.Buffers[i].SelfAlloc = false } } } func (a *ArraySpan) SetDictionary(span *ArraySpan) { a.resizeChildren(1) a.Children[0].Release() a.Children[0] = *span } // TakeOwnership is like SetMembers only this takes ownership of // the buffers by calling Retain on them so that the passed in // ArrayData can be released without negatively affecting this // ArraySpan func (a *ArraySpan) TakeOwnership(data arrow.ArrayData) { a.Type = data.DataType() a.Len = int64(data.Len()) if a.Type.ID() == arrow.NULL { a.Nulls = a.Len } else { a.Nulls = int64(data.NullN()) } a.Offset = int64(data.Offset()) for i, b := range data.Buffers() { if b != nil { a.Buffers[i].WrapBuffer(b) b.Retain() } else { a.Buffers[i].Buf = nil a.Buffers[i].Owner = nil a.Buffers[i].SelfAlloc = false } } typeID := a.Type.ID() if a.Buffers[0].Buf == nil { switch typeID { case arrow.NULL, arrow.SPARSE_UNION, arrow.DENSE_UNION: default: // should already be zero, but we make sure a.Nulls = 0 } } for i := len(data.Buffers()); i < 3; i++ { a.Buffers[i].Buf = nil a.Buffers[i].Owner = nil a.Buffers[i].SelfAlloc = false } if typeID == arrow.DICTIONARY { a.resizeChildren(1) dict := data.Dictionary() if dict != (*array.Data)(nil) { a.Children[0].TakeOwnership(dict) } } else { a.resizeChildren(len(data.Children())) for i, c := range data.Children() { a.Children[i].TakeOwnership(c) } } } // SetMembers populates this ArraySpan from the given ArrayData object. // As this is a non-owning reference, the ArrayData object must not // be fully released while this ArraySpan is in use, otherwise any buffers // referenced will be released too func (a *ArraySpan) SetMembers(data arrow.ArrayData) { a.Type = data.DataType() a.Len = int64(data.Len()) if a.Type.ID() == arrow.NULL { a.Nulls = a.Len } else { a.Nulls = int64(data.NullN()) } a.Offset = int64(data.Offset()) for i, b := range data.Buffers() { if b != nil { a.Buffers[i].SetBuffer(b) } else { a.Buffers[i].Buf = nil a.Buffers[i].Owner = nil a.Buffers[i].SelfAlloc = false } } typeID := a.Type.ID() if a.Buffers[0].Buf == nil { switch typeID { case arrow.NULL, arrow.SPARSE_UNION, arrow.DENSE_UNION: default: // should already be zero, but we make sure a.Nulls = 0 } } for i := len(data.Buffers()); i < 3; i++ { a.Buffers[i].Buf = nil a.Buffers[i].Owner = nil a.Buffers[i].SelfAlloc = false } if typeID == arrow.DICTIONARY { a.resizeChildren(1) dict := data.Dictionary() if dict != (*array.Data)(nil) { a.Children[0].SetMembers(dict) } } else { if cap(a.Children) >= len(data.Children()) { a.Children = a.Children[:len(data.Children())] } else { a.Children = make([]ArraySpan, len(data.Children())) } for i, c := range data.Children() { a.Children[i].SetMembers(c) } } } // ExecValue represents a single input to an execution which could // be either an Array (ArraySpan) or a Scalar value type ExecValue struct { Array ArraySpan Scalar scalar.Scalar } func (e *ExecValue) IsArray() bool { return e.Scalar == nil } func (e *ExecValue) IsScalar() bool { return !e.IsArray() } func (e *ExecValue) Type() arrow.DataType { if e.IsArray() { return e.Array.Type } return e.Scalar.DataType() } // ExecResult is the result of a kernel execution and should be populated // by the execution functions and/or a kernel. For now we're just going to // alias an ArraySpan. type ExecResult = ArraySpan // ExecSpan represents a slice of inputs and is used to provide slices // of input values to iterate over. // // Len is the length of the span (all elements in Values should either // be scalar or an array with a length + offset of at least Len). type ExecSpan struct { Len int64 Values []ExecValue } func getNumBuffers(dt arrow.DataType) int { switch dt.ID() { case arrow.RUN_END_ENCODED: return 0 case arrow.NULL, arrow.STRUCT, arrow.FIXED_SIZE_LIST: return 1 case arrow.BINARY, arrow.LARGE_BINARY, arrow.STRING, arrow.LARGE_STRING, arrow.DENSE_UNION: return 3 case arrow.EXTENSION: return getNumBuffers(dt.(arrow.ExtensionType).StorageType()) default: return 2 } } // FillZeroLength fills an ArraySpan with the appropriate information for // a Zero Length Array of the provided type. func FillZeroLength(dt arrow.DataType, span *ArraySpan) { span.Scratch[0], span.Scratch[1] = 0, 0 span.Type = dt span.Len = 0 numBufs := getNumBuffers(dt) for i := 0; i < numBufs; i++ { span.Buffers[i].Buf = arrow.Uint64Traits.CastToBytes(span.Scratch[:])[:0] span.Buffers[i].Owner = nil } for i := numBufs; i < 3; i++ { span.Buffers[i].Buf, span.Buffers[i].Owner = nil, nil } if dt.ID() == arrow.DICTIONARY { span.resizeChildren(1) FillZeroLength(dt.(*arrow.DictionaryType).ValueType, &span.Children[0]) return } nt, ok := dt.(arrow.NestedType) if !ok { if len(span.Children) > 0 { span.Children = span.Children[:0] } return } span.resizeChildren(nt.NumFields()) for i, f := range nt.Fields() { FillZeroLength(f.Type, &span.Children[i]) } } // PromoteExecSpanScalars promotes the values of the passed in ExecSpan // from scalars to Arrays of length 1 for each value. func PromoteExecSpanScalars(span ExecSpan) { for i := range span.Values { if span.Values[i].Scalar != nil { span.Values[i].Array.FillFromScalar(span.Values[i].Scalar) span.Values[i].Scalar = nil } } } arrow-go-18.2.0/arrow/compute/exec/span_offsets.go000066400000000000000000000024011476434502500221210ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.20 || tinygo package exec import ( "unsafe" ) // convenience function for populating the offsets buffer from a scalar // value's size. func setOffsetsForScalar[T int32 | int64](span *ArraySpan, buf []T, valueSize int64, bufidx int) { buf[0] = 0 buf[1] = T(valueSize) span.Buffers[bufidx].Buf = unsafe.Slice((*byte)(unsafe.Pointer(unsafe.SliceData(buf))), 2*int(unsafe.Sizeof(T(0)))) span.Buffers[bufidx].Owner = nil span.Buffers[bufidx].SelfAlloc = false } arrow-go-18.2.0/arrow/compute/exec/span_test.go000066400000000000000000000577531476434502500214530ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.18 package exec_test import ( "reflect" "strings" "testing" "unsafe" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/compute/exec" "github.com/apache/arrow-go/v18/arrow/decimal128" "github.com/apache/arrow-go/v18/arrow/endian" "github.com/apache/arrow-go/v18/arrow/extensions" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/arrow/scalar" "github.com/apache/arrow-go/v18/internal/types" "github.com/stretchr/testify/assert" ) func TestBufferSpan_SetBuffer(t *testing.T) { type fields struct { Buf []byte Owner *memory.Buffer SelfAlloc bool } type args struct { buf *memory.Buffer } foo := []byte{0xde, 0xad, 0xbe, 0xef} own := memory.NewBufferBytes(foo) tests := []struct { name string fields fields args args }{ {"simple", fields{SelfAlloc: true}, args{own}}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { b := &exec.BufferSpan{ Buf: tt.fields.Buf, Owner: tt.fields.Owner, SelfAlloc: tt.fields.SelfAlloc, } b.SetBuffer(tt.args.buf) assert.Same(t, &foo[0], &b.Buf[0]) assert.Same(t, own, b.Owner) assert.False(t, b.SelfAlloc) }) } } func TestBufferSpan_WrapBuffer(t *testing.T) { type fields struct { Buf []byte Owner *memory.Buffer SelfAlloc bool } type args struct { buf *memory.Buffer } foo := []byte{0xde, 0xad, 0xbe, 0xef} own := memory.NewBufferBytes(foo) tests := []struct { name string fields fields args args }{ {"simple", fields{SelfAlloc: false}, args{own}}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { b := &exec.BufferSpan{ Buf: tt.fields.Buf, Owner: tt.fields.Owner, SelfAlloc: tt.fields.SelfAlloc, } b.WrapBuffer(tt.args.buf) assert.Same(t, &foo[0], &b.Buf[0]) assert.Same(t, own, b.Owner) assert.True(t, b.SelfAlloc) }) } } func TestArraySpan_UpdateNullCount(t *testing.T) { type fields struct { Type arrow.DataType Len int64 Nulls int64 Offset int64 Buffers [3]exec.BufferSpan Scratch [2]uint64 Children []exec.ArraySpan } tests := []struct { name string fields fields want int64 }{ {"known", fields{Nulls: 25}, 25}, {"unknown", fields{ Nulls: array.UnknownNullCount, Len: 8, // 0b01101101 Buffers: [3]exec.BufferSpan{{Buf: []byte{109}}, {}, {}}}, 3}, {"unknown with offset", fields{ Nulls: array.UnknownNullCount, Len: 4, Offset: 2, // 0b01101101 Buffers: [3]exec.BufferSpan{{Buf: []byte{109}}, {}, {}}}, 1}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { a := &exec.ArraySpan{ Type: tt.fields.Type, Len: tt.fields.Len, Nulls: tt.fields.Nulls, Offset: tt.fields.Offset, Buffers: tt.fields.Buffers, Scratch: tt.fields.Scratch, Children: tt.fields.Children, } if got := a.UpdateNullCount(); got != tt.want { t.Errorf("ArraySpan.UpdateNullCount() = %v, want %v", got, tt.want) } }) } } func TestArraySpan_Dictionary(t *testing.T) { type fields struct { Type arrow.DataType Len int64 Nulls int64 Offset int64 Buffers [3]exec.BufferSpan Scratch [2]uint64 Children []exec.ArraySpan } children := []exec.ArraySpan{{}} tests := []struct { name string fields fields want *exec.ArraySpan }{ {"basic", fields{Children: children}, &children[0]}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { a := &exec.ArraySpan{ Type: tt.fields.Type, Len: tt.fields.Len, Nulls: tt.fields.Nulls, Offset: tt.fields.Offset, Buffers: tt.fields.Buffers, Scratch: tt.fields.Scratch, Children: tt.fields.Children, } if got := a.Dictionary(); !reflect.DeepEqual(got, tt.want) { t.Errorf("ArraySpan.Dictionary() = %v, want %v", got, tt.want) } }) } } func TestArraySpan_NumBuffers(t *testing.T) { type fields struct { Type arrow.DataType Len int64 Nulls int64 Offset int64 Buffers [3]exec.BufferSpan Scratch [2]uint64 Children []exec.ArraySpan } tests := []struct { name string fields fields want int }{ {"null", fields{Type: arrow.Null}, 1}, {"struct", fields{Type: arrow.StructOf()}, 1}, {"fixed size list", fields{Type: arrow.FixedSizeListOf(4, arrow.PrimitiveTypes.Int32)}, 1}, {"binary", fields{Type: arrow.BinaryTypes.Binary}, 3}, {"large binary", fields{Type: arrow.BinaryTypes.LargeBinary}, 3}, {"string", fields{Type: arrow.BinaryTypes.String}, 3}, {"large string", fields{Type: arrow.BinaryTypes.LargeString}, 3}, {"extension", fields{Type: extensions.NewUUIDType()}, 2}, {"int32", fields{Type: arrow.PrimitiveTypes.Int32}, 2}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { a := &exec.ArraySpan{ Type: tt.fields.Type, Len: tt.fields.Len, Nulls: tt.fields.Nulls, Offset: tt.fields.Offset, Buffers: tt.fields.Buffers, Scratch: tt.fields.Scratch, Children: tt.fields.Children, } if got := a.NumBuffers(); got != tt.want { t.Errorf("ArraySpan.NumBuffers() = %v, want %v", got, tt.want) } }) } } func TestArraySpan_MakeData(t *testing.T) { type fields struct { Type arrow.DataType Len int64 Nulls int64 Offset int64 Buffers [3]exec.BufferSpan Scratch [2]uint64 Children []exec.ArraySpan } var ( buf1 *memory.Buffer ) arrow.RegisterExtensionType(types.NewDictExtensionType()) defer arrow.UnregisterExtensionType("dict-extension") tests := []struct { name string fields func(mem memory.Allocator) fields want func(mem memory.Allocator) arrow.ArrayData }{ {"null type", func(mem memory.Allocator) fields { return fields{ Type: arrow.Null, Len: 5, Nulls: array.UnknownNullCount, } }, func(mem memory.Allocator) arrow.ArrayData { return array.NewData(arrow.Null, 5, []*memory.Buffer{nil}, nil, 5, 0) }}, {"zero len", func(mem memory.Allocator) fields { return fields{Type: arrow.PrimitiveTypes.Int32} }, func(mem memory.Allocator) arrow.ArrayData { return array.NewData(arrow.PrimitiveTypes.Int32, 0, []*memory.Buffer{nil, nil}, nil, 0, 0) }}, {"non-owning offset", func(mem memory.Allocator) fields { ret := fields{ Type: arrow.PrimitiveTypes.Int8, Len: 4, Nulls: 1, Offset: 1, } buf1 = memory.NewResizableBuffer(mem) buf1.Resize(1) buf1.Bytes()[0] = 109 ret.Buffers[0].SetBuffer(buf1) ret.Buffers[1].SetBuffer(memory.NewBufferBytes([]byte{5, 5, 5, 5, 5})) return ret }, func(mem memory.Allocator) arrow.ArrayData { // created in the above func, we release after constructing // the NewData so the refcount is as expected defer buf1.Release() return array.NewData(arrow.PrimitiveTypes.Int8, 4, []*memory.Buffer{buf1, memory.NewBufferBytes([]byte{5, 5, 5, 5, 5})}, nil, 1, 1) }}, {"self-alloc", func(mem memory.Allocator) fields { ret := fields{ Type: arrow.PrimitiveTypes.Int8, Len: 4, } buf := memory.NewResizableBuffer(mem) buf.Resize(1) ret.Buffers[0].WrapBuffer(buf) buf2 := memory.NewResizableBuffer(mem) buf2.Resize(4) ret.Buffers[1].WrapBuffer(buf2) return ret }, func(mem memory.Allocator) arrow.ArrayData { buf := memory.NewResizableBuffer(mem) buf.Resize(1) defer buf.Release() buf2 := memory.NewResizableBuffer(mem) buf2.Resize(4) defer buf2.Release() return array.NewData(arrow.PrimitiveTypes.Int8, 4, []*memory.Buffer{buf, buf2}, nil, 0, 0) }}, {"with children", func(mem memory.Allocator) fields { ret := fields{ Type: arrow.ListOf(arrow.PrimitiveTypes.Int8), Len: 1, Children: []exec.ArraySpan{{ Type: arrow.PrimitiveTypes.Int8, Len: 4, }}, } var offsets [8]byte endian.Native.PutUint32(offsets[4:], 4) ret.Buffers[1].SetBuffer(memory.NewBufferBytes(offsets[:])) buf := memory.NewResizableBuffer(mem) buf.Resize(4) buf.Bytes()[0] = 1 buf.Bytes()[1] = 2 buf.Bytes()[2] = 3 buf.Bytes()[3] = 4 ret.Children[0].Buffers[1].WrapBuffer(buf) return ret }, func(mem memory.Allocator) arrow.ArrayData { buf := memory.NewResizableBuffer(mem) buf.Resize(4) buf.Bytes()[0] = 1 buf.Bytes()[1] = 2 buf.Bytes()[2] = 3 buf.Bytes()[3] = 4 defer buf.Release() child := array.NewData(arrow.PrimitiveTypes.Int8, 4, []*memory.Buffer{nil, buf}, nil, 0, 0) defer child.Release() var offsets [8]byte endian.Native.PutUint32(offsets[4:], 4) return array.NewData(arrow.ListOf(arrow.PrimitiveTypes.Int8), 1, []*memory.Buffer{nil, memory.NewBufferBytes(offsets[:])}, []arrow.ArrayData{child}, 0, 0) }}, {"dict-extension-type", func(mem memory.Allocator) fields { // dict-extension-type is dict(Index: int8, Value: string) // so there should be an int8 in the arrayspan and // a child of a string arrayspan in the first index of // Children ret := fields{ Type: types.NewDictExtensionType(), Len: 1, Children: []exec.ArraySpan{{ Type: arrow.BinaryTypes.String, Len: 2, }}, } indices := memory.NewResizableBuffer(mem) indices.Resize(1) indices.Bytes()[0] = 1 ret.Buffers[1].WrapBuffer(indices) offsets := memory.NewResizableBuffer(mem) offsets.Resize(3 * arrow.Int32SizeBytes) copy(offsets.Bytes(), arrow.Int32Traits.CastToBytes([]int32{0, 5, 10})) values := memory.NewResizableBuffer(mem) values.Resize(len("HelloWorld")) copy(values.Bytes(), []byte("HelloWorld")) nulls := memory.NewResizableBuffer(mem) nulls.Resize(1) nulls.Bytes()[0] = 3 ret.Children[0].Buffers[0].WrapBuffer(nulls) ret.Children[0].Buffers[1].WrapBuffer(offsets) ret.Children[0].Buffers[2].WrapBuffer(values) return ret }, func(mem memory.Allocator) arrow.ArrayData { dict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["Hello", "World"]`)) defer dict.Release() index, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[1]`)) defer index.Release() out := array.NewData(types.NewDictExtensionType(), 1, []*memory.Buffer{nil, index.Data().Buffers()[1]}, nil, 0, 0) out.SetDictionary(dict.Data()) return out }}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) t.Run("MakeData", func(t *testing.T) { f := tt.fields(mem) a := &exec.ArraySpan{ Type: f.Type, Len: f.Len, Nulls: f.Nulls, Offset: f.Offset, Buffers: f.Buffers, Scratch: f.Scratch, Children: f.Children, } got := a.MakeData() want := tt.want(mem) if !reflect.DeepEqual(got, want) { t.Errorf("ArraySpan.MakeData() = %v, want %v", got, want) } want.Release() got.Release() }) t.Run("MakeArray", func(t *testing.T) { f := tt.fields(mem) a := &exec.ArraySpan{ Type: f.Type, Len: f.Len, Nulls: f.Nulls, Offset: f.Offset, Buffers: f.Buffers, Scratch: f.Scratch, Children: f.Children, } arr := a.MakeArray() want := tt.want(mem) defer want.Release() exp := array.MakeFromData(want) assert.Truef(t, array.Equal(arr, exp), "expected: %s\ngot: %s", exp, arr) exp.Release() arr.Release() }) }) } } func TestArraySpan_SetSlice(t *testing.T) { type fields struct { Type arrow.DataType Len int64 Nulls int64 Offset int64 Buffers [3]exec.BufferSpan Scratch [2]uint64 Children []exec.ArraySpan } type args struct { off int64 length int64 } tests := []struct { name string fields fields args args wantNulls int64 }{ {"null type", fields{Type: arrow.Null}, args{5, 10}, 10}, {"not-null type", fields{Type: arrow.PrimitiveTypes.Int8}, args{5, 10}, 0}, {"not-null type with nulls", fields{Type: arrow.PrimitiveTypes.Int8, Nulls: -1}, args{5, 10}, array.UnknownNullCount}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { a := &exec.ArraySpan{ Type: tt.fields.Type, Len: tt.fields.Len, Nulls: tt.fields.Nulls, Offset: tt.fields.Offset, Buffers: tt.fields.Buffers, Scratch: tt.fields.Scratch, Children: tt.fields.Children, } a.SetSlice(tt.args.off, tt.args.length) assert.Equal(t, tt.args.off, a.Offset) assert.Equal(t, tt.args.length, a.Len) assert.Equal(t, tt.wantNulls, a.Nulls) }) } } func TestArraySpan_FillFromScalar(t *testing.T) { var ( expDecimalBuf [arrow.Decimal128SizeBytes]byte expScratch [2]uint64 ) endian.Native.PutUint64(expDecimalBuf[:], 1234) endian.Native.PutUint32(arrow.Uint64Traits.CastToBytes(expScratch[:])[4:], 10) dict, _, _ := array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.String, strings.NewReader(`["Hello", "World"]`)) defer dict.Release() tests := []struct { name string args scalar.Scalar exp exec.ArraySpan }{ {"null-type", scalar.MakeNullScalar(arrow.Null), exec.ArraySpan{Type: arrow.Null, Len: 1, Nulls: 1}}, {"bool valid", scalar.MakeScalar(true), exec.ArraySpan{ Type: arrow.FixedWidthTypes.Boolean, Len: 1, Nulls: 0, Buffers: [3]exec.BufferSpan{{Buf: []byte{0x01}}, {Buf: []byte{0x01}}, {}}, }}, {"bool valid false", scalar.MakeScalar(false), exec.ArraySpan{ Type: arrow.FixedWidthTypes.Boolean, Len: 1, Nulls: 0, Buffers: [3]exec.BufferSpan{{Buf: []byte{0x01}}, {Buf: []byte{0x00}}, {}}, }}, {"primitive null", scalar.MakeNullScalar(arrow.PrimitiveTypes.Int32), exec.ArraySpan{ Type: arrow.PrimitiveTypes.Int32, Len: 1, Nulls: 1, Buffers: [3]exec.BufferSpan{{Buf: []byte{0x00}}, {Buf: []byte{0, 0, 0, 0}}, {}}, }}, {"decimal valid", scalar.NewDecimal128Scalar(decimal128.FromU64(1234), &arrow.Decimal128Type{Precision: 12, Scale: 2}), exec.ArraySpan{ Type: &arrow.Decimal128Type{Precision: 12, Scale: 2}, Len: 1, Nulls: 0, Buffers: [3]exec.BufferSpan{{Buf: []byte{0x01}}, {Buf: expDecimalBuf[:]}, {}}, }}, {"dictionary scalar", scalar.NewDictScalar(scalar.NewInt8Scalar(1), dict), exec.ArraySpan{ Type: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.BinaryTypes.String}, Len: 1, Nulls: 0, Buffers: [3]exec.BufferSpan{{Buf: []byte{0x01}}, {Buf: []byte{1}}, {}, }, Children: []exec.ArraySpan{{ Type: arrow.BinaryTypes.String, Len: 2, Buffers: [3]exec.BufferSpan{ {Buf: dict.NullBitmapBytes(), Owner: dict.Data().Buffers()[0]}, {Buf: dict.Data().Buffers()[1].Bytes(), Owner: dict.Data().Buffers()[1]}, {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}, }, }}, }, }, {"binary scalar", scalar.NewBinaryScalar(dict.Data().Buffers()[2], arrow.BinaryTypes.String), exec.ArraySpan{ Type: arrow.BinaryTypes.String, Len: 1, Nulls: 0, Scratch: expScratch, Buffers: [3]exec.BufferSpan{ {Buf: []byte{0x01}}, {Buf: arrow.Uint64Traits.CastToBytes(expScratch[:1])}, {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}}, }, }, {"large binary", scalar.NewLargeStringScalarFromBuffer(dict.Data().Buffers()[2]), exec.ArraySpan{ Type: arrow.BinaryTypes.LargeString, Len: 1, Nulls: 0, Scratch: [2]uint64{0, 10}, Buffers: [3]exec.BufferSpan{ {Buf: []byte{0x01}}, {Buf: arrow.Uint64Traits.CastToBytes([]uint64{0, 10})}, {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}}, }}, {"fixed size binary", scalar.NewFixedSizeBinaryScalar(dict.Data().Buffers()[2], &arrow.FixedSizeBinaryType{ByteWidth: 10}), exec.ArraySpan{ Type: &arrow.FixedSizeBinaryType{ByteWidth: 10}, Len: 1, Buffers: [3]exec.BufferSpan{ {Buf: []byte{0x01}}, {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}, {}, }, }}, {"map scalar null value", scalar.MakeNullScalar(arrow.MapOf(arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String)), exec.ArraySpan{ Type: arrow.MapOf(arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String), Len: 1, Nulls: 1, Buffers: [3]exec.BufferSpan{ {Buf: []byte{0}}, {Buf: []byte{0, 0, 0, 0, 0, 0, 0, 0}}, {}, }, Children: []exec.ArraySpan{{ Type: arrow.StructOf(arrow.Field{Name: "key", Type: arrow.PrimitiveTypes.Int8}, arrow.Field{Name: "value", Type: arrow.BinaryTypes.String, Nullable: true}), Len: 0, Nulls: 0, Buffers: [3]exec.BufferSpan{ {Buf: []byte{}}, {}, {}, }, Children: []exec.ArraySpan{ { Type: arrow.PrimitiveTypes.Int8, Buffers: [3]exec.BufferSpan{ {Buf: []byte{}}, {Buf: []byte{}}, {}, }, }, { Type: arrow.BinaryTypes.String, Buffers: [3]exec.BufferSpan{ {Buf: []byte{}}, {Buf: []byte{}}, {Buf: []byte{}}, }, }, }, }}, }}, {"list scalar", scalar.NewListScalarData(dict.Data()), exec.ArraySpan{ Type: arrow.ListOf(arrow.BinaryTypes.String), Len: 1, Scratch: [2]uint64{ *(*uint64)(unsafe.Pointer(&[]int32{0, 2}[0])), 0, }, Buffers: [3]exec.BufferSpan{ {Buf: []byte{0x1}}, {Buf: arrow.Int32Traits.CastToBytes([]int32{0, 2})}, }, Children: []exec.ArraySpan{{ Type: arrow.BinaryTypes.String, Len: 2, Buffers: [3]exec.BufferSpan{ {Buf: dict.NullBitmapBytes(), Owner: dict.Data().Buffers()[0]}, {Buf: dict.Data().Buffers()[1].Bytes(), Owner: dict.Data().Buffers()[1]}, {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}, }, }}, }, }, {"large list scalar", scalar.NewLargeListScalarData(dict.Data()), exec.ArraySpan{ Type: arrow.LargeListOf(arrow.BinaryTypes.String), Len: 1, Scratch: [2]uint64{0, 2}, Buffers: [3]exec.BufferSpan{ {Buf: []byte{0x1}}, {Buf: arrow.Int64Traits.CastToBytes([]int64{0, 2})}, }, Children: []exec.ArraySpan{{ Type: arrow.BinaryTypes.String, Len: 2, Buffers: [3]exec.BufferSpan{ {Buf: dict.NullBitmapBytes(), Owner: dict.Data().Buffers()[0]}, {Buf: dict.Data().Buffers()[1].Bytes(), Owner: dict.Data().Buffers()[1]}, {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}, }, }}, }, }, {"fixed size list", scalar.NewFixedSizeListScalar(dict), exec.ArraySpan{ Type: arrow.FixedSizeListOf(2, arrow.BinaryTypes.String), Len: 1, Buffers: [3]exec.BufferSpan{ {Buf: []byte{0x1}}, {}, {}, }, Children: []exec.ArraySpan{{ Type: arrow.BinaryTypes.String, Len: 2, Buffers: [3]exec.BufferSpan{ {Buf: dict.NullBitmapBytes(), Owner: dict.Data().Buffers()[0]}, {Buf: dict.Data().Buffers()[1].Bytes(), Owner: dict.Data().Buffers()[1]}, {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}, }, }}, }, }, {"struct scalar", func() scalar.Scalar { s, _ := scalar.NewStructScalarWithNames([]scalar.Scalar{ scalar.MakeScalar(int32(5)), scalar.MakeScalar(uint8(10)), }, []string{"int32", "uint8"}) return s }(), exec.ArraySpan{ Type: arrow.StructOf( arrow.Field{Name: "int32", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, arrow.Field{Name: "uint8", Type: arrow.PrimitiveTypes.Uint8, Nullable: true}), Buffers: [3]exec.BufferSpan{ {Buf: []byte{0x1}}, {}, {}, }, Len: 1, Children: []exec.ArraySpan{ { Type: arrow.PrimitiveTypes.Int32, Len: 1, Buffers: [3]exec.BufferSpan{ {Buf: []byte{0x1}}, {Buf: arrow.Int32Traits.CastToBytes([]int32{5})}, {}, }, }, { Type: arrow.PrimitiveTypes.Uint8, Len: 1, Buffers: [3]exec.BufferSpan{ {Buf: []byte{0x1}}, {Buf: []byte{10}}, {}, }, }, }, }, }, {"dense union scalar", func() scalar.Scalar { dt := arrow.UnionOf(arrow.DenseMode, []arrow.Field{ {Name: "string", Type: arrow.BinaryTypes.String, Nullable: true}, {Name: "number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, {Name: "other_number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, }, []arrow.UnionTypeCode{3, 42, 43}) return scalar.NewDenseUnionScalar(scalar.MakeScalar(uint64(25)), 42, dt.(*arrow.DenseUnionType)) }(), exec.ArraySpan{ Type: arrow.UnionOf(arrow.DenseMode, []arrow.Field{ {Name: "string", Type: arrow.BinaryTypes.String, Nullable: true}, {Name: "number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, {Name: "other_number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, }, []arrow.UnionTypeCode{3, 42, 43}), Len: 1, Scratch: [2]uint64{42, 1}, Buffers: [3]exec.BufferSpan{{}, {Buf: []byte{42}}, {Buf: arrow.Int32Traits.CastToBytes([]int32{0, 1})}, }, Children: []exec.ArraySpan{ { Type: arrow.BinaryTypes.String, Buffers: [3]exec.BufferSpan{ {Buf: []byte{}}, {Buf: []byte{}}, {Buf: []byte{}}, }, }, { Type: arrow.PrimitiveTypes.Uint64, Len: 1, Buffers: [3]exec.BufferSpan{ {Buf: []byte{0x1}}, {Buf: arrow.Uint64Traits.CastToBytes([]uint64{25})}, {}, }, }, { Type: arrow.PrimitiveTypes.Uint64, Buffers: [3]exec.BufferSpan{ {Buf: []byte{}}, {Buf: []byte{}}, {}, }, }, }, }, }, {"sparse union", func() scalar.Scalar { dt := arrow.UnionOf(arrow.SparseMode, []arrow.Field{ {Name: "string", Type: arrow.BinaryTypes.String, Nullable: true}, {Name: "number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, {Name: "other_number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, }, []arrow.UnionTypeCode{3, 42, 43}) return scalar.NewSparseUnionScalarFromValue(scalar.MakeScalar(uint64(25)), 1, dt.(*arrow.SparseUnionType)) }(), exec.ArraySpan{ Type: arrow.UnionOf(arrow.SparseMode, []arrow.Field{ {Name: "string", Type: arrow.BinaryTypes.String, Nullable: true}, {Name: "number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, {Name: "other_number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, }, []arrow.UnionTypeCode{3, 42, 43}), Len: 1, Scratch: [2]uint64{42, 0}, Buffers: [3]exec.BufferSpan{{}, {Buf: []byte{42}}, {}, }, Children: []exec.ArraySpan{ { Type: arrow.BinaryTypes.String, Len: 1, Nulls: 1, Buffers: [3]exec.BufferSpan{ {Buf: []byte{0x0}}, {Buf: []byte{0, 0, 0, 0, 0, 0, 0, 0}}, {}, }, }, { Type: arrow.PrimitiveTypes.Uint64, Len: 1, Buffers: [3]exec.BufferSpan{ {Buf: []byte{0x1}}, {Buf: arrow.Uint64Traits.CastToBytes([]uint64{25})}, {}, }, }, { Type: arrow.PrimitiveTypes.Uint64, Len: 1, Nulls: 1, Buffers: [3]exec.BufferSpan{ {Buf: []byte{0x0}}, {Buf: []byte{0, 0, 0, 0, 0, 0, 0, 0}}, {}, }, }, }, }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { a := &exec.ArraySpan{ Nulls: array.UnknownNullCount, Buffers: [3]exec.BufferSpan{{SelfAlloc: true, Owner: &memory.Buffer{}}, {SelfAlloc: true, Owner: &memory.Buffer{}}, {}}, } a.FillFromScalar(tt.args) assert.Equal(t, tt.exp, *a) }) } } arrow-go-18.2.0/arrow/compute/exec/utils.go000066400000000000000000000162211476434502500205740ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.18 package exec import ( "fmt" "math" "sync/atomic" "unsafe" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/memory" "golang.org/x/exp/constraints" "golang.org/x/exp/slices" ) // GetSpanValues returns a properly typed slice by reinterpreting // the buffer at index i using unsafe.Slice. This will take into account // the offset of the given ArraySpan. func GetSpanValues[T arrow.FixedWidthType](span *ArraySpan, i int) []T { if len(span.Buffers[i].Buf) == 0 { return nil } ret := unsafe.Slice((*T)(unsafe.Pointer(&span.Buffers[i].Buf[0])), span.Offset+span.Len) return ret[span.Offset:] } // GetSpanOffsets is like GetSpanValues, except it is only for int32 // or int64 and adds the additional 1 expected value for an offset // buffer (ie. len(output) == span.Len+1) func GetSpanOffsets[T int32 | int64](span *ArraySpan, i int) []T { ret := unsafe.Slice((*T)(unsafe.Pointer(&span.Buffers[i].Buf[0])), span.Offset+span.Len+1) return ret[span.Offset:] } func Min[T constraints.Ordered](a, b T) T { if a < b { return a } return b } func Max[T constraints.Ordered](a, b T) T { if a > b { return a } return b } // OptionsInit should be used in the case where a KernelState is simply // represented with a specific type by value (instead of pointer). // This will initialize the KernelState as a value-copied instance of // the passed in function options argument to ensure separation // and allow the kernel to manipulate the options if necessary without // any negative consequences since it will have its own copy of the options. func OptionsInit[T any](_ *KernelCtx, args KernelInitArgs) (KernelState, error) { if opts, ok := args.Options.(*T); ok { return *opts, nil } return nil, fmt.Errorf("%w: attempted to initialize kernel state from invalid function options", arrow.ErrInvalid) } type arrayBuilder[T arrow.NumericType | bool] interface { array.Builder Append(T) AppendValues([]T, []bool) } func ArrayFromSlice[T arrow.NumericType | bool](mem memory.Allocator, data []T) arrow.Array { bldr := array.NewBuilder(mem, arrow.GetDataType[T]()).(arrayBuilder[T]) defer bldr.Release() bldr.AppendValues(data, nil) return bldr.NewArray() } func ArrayFromSliceWithValid[T arrow.NumericType | bool](mem memory.Allocator, data []T, valid []bool) arrow.Array { bldr := array.NewBuilder(mem, arrow.GetDataType[T]()).(arrayBuilder[T]) defer bldr.Release() bldr.AppendValues(data, valid) return bldr.NewArray() } func RechunkArraysConsistently(groups [][]arrow.Array) [][]arrow.Array { if len(groups) <= 1 { return groups } var totalLen int for _, a := range groups[0] { totalLen += a.Len() } if totalLen == 0 { return groups } rechunked := make([][]arrow.Array, len(groups)) offsets := make([]int64, len(groups)) // scan all array vectors at once, rechunking along the way var start int64 for start < int64(totalLen) { // first compute max possible length for next chunk var chunkLength int64 = math.MaxInt64 for i, g := range groups { offset := offsets[i] // skip any done arrays including 0-length for offset == int64(g[0].Len()) { g = g[1:] offset = 0 } arr := g[0] chunkLength = Min(chunkLength, int64(arr.Len())-offset) offsets[i] = offset groups[i] = g } // now slice all the arrays along this chunk size for i, g := range groups { offset := offsets[i] arr := g[0] if offset == 0 && int64(arr.Len()) == chunkLength { // slice spans entire array arr.Retain() rechunked[i] = append(rechunked[i], arr) } else { rechunked[i] = append(rechunked[i], array.NewSlice(arr, int64(offset), int64(offset+chunkLength))) } offsets[i] += chunkLength } start += int64(chunkLength) } return rechunked } type ChunkResolver struct { offsets []int64 cached int64 } func NewChunkResolver(chunks []arrow.Array) *ChunkResolver { offsets := make([]int64, len(chunks)+1) var offset int64 for i, c := range chunks { curOffset := offset offset += int64(c.Len()) offsets[i] = curOffset } offsets[len(chunks)] = offset return &ChunkResolver{offsets: offsets} } func (c *ChunkResolver) Resolve(idx int64) (chunk, index int64) { // some algorithms consecutively access indexes that are a // relatively small distance from each other, falling into // the same chunk. // This is trivial when merging (assuming each side of the // merge uses its own resolver), but also in the inner // recursive invocations of partitioning. if len(c.offsets) <= 1 { return 0, idx } cached := atomic.LoadInt64(&c.cached) cacheHit := idx >= c.offsets[cached] && idx < c.offsets[cached+1] if cacheHit { return cached, idx - c.offsets[cached] } chkIdx, found := slices.BinarySearch(c.offsets, idx) if !found { chkIdx-- } chunk, index = int64(chkIdx), idx-c.offsets[chkIdx] atomic.StoreInt64(&c.cached, chunk) return } type arrayTypes interface { arrow.FixedWidthType | arrow.TemporalType | bool | string | []byte } type ArrayIter[T arrayTypes] interface { Next() T } type BoolIter struct { Rdr *bitutil.BitmapReader } func NewBoolIter(arr *ArraySpan) ArrayIter[bool] { return &BoolIter{ Rdr: bitutil.NewBitmapReader(arr.Buffers[1].Buf, int(arr.Offset), int(arr.Len))} } func (b *BoolIter) Next() (out bool) { out = b.Rdr.Set() b.Rdr.Next() return } type PrimitiveIter[T arrow.FixedWidthType] struct { Values []T } func NewPrimitiveIter[T arrow.FixedWidthType](arr *ArraySpan) ArrayIter[T] { return &PrimitiveIter[T]{Values: GetSpanValues[T](arr, 1)} } func (p *PrimitiveIter[T]) Next() (v T) { v = p.Values[0] p.Values = p.Values[1:] return } type VarBinaryIter[OffsetT int32 | int64] struct { Offsets []OffsetT Data []byte Pos int64 } func NewVarBinaryIter[OffsetT int32 | int64](arr *ArraySpan) ArrayIter[[]byte] { return &VarBinaryIter[OffsetT]{ Offsets: GetSpanOffsets[OffsetT](arr, 1), Data: arr.Buffers[2].Buf, } } func (v *VarBinaryIter[OffsetT]) Next() []byte { cur := v.Pos v.Pos++ return v.Data[v.Offsets[cur]:v.Offsets[v.Pos]] } type FSBIter struct { Data []byte Width int Pos int64 } func NewFSBIter(arr *ArraySpan) ArrayIter[[]byte] { return &FSBIter{ Data: arr.Buffers[1].Buf, Width: arr.Type.(arrow.FixedWidthDataType).Bytes(), } } func (f *FSBIter) Next() []byte { start := f.Width * int(f.Pos) f.Pos++ return f.Data[start : start+f.Width] } arrow-go-18.2.0/arrow/compute/exec/utils_test.go000066400000000000000000000066151476434502500216410ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.18 package exec_test import ( "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/compute/exec" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) func TestRechunkConsistentArraysTrivial(t *testing.T) { var groups [][]arrow.Array rechunked := exec.RechunkArraysConsistently(groups) assert.Zero(t, rechunked) mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) a1 := exec.ArrayFromSlice(mem, []int16{}) defer a1.Release() a2 := exec.ArrayFromSlice(mem, []int16{}) defer a2.Release() b1 := exec.ArrayFromSlice(mem, []int32{}) defer b1.Release() groups = [][]arrow.Array{{a1, a2}, {}, {b1}} rechunked = exec.RechunkArraysConsistently(groups) assert.Len(t, rechunked, 3) for _, arrvec := range rechunked { for _, arr := range arrvec { assert.Zero(t, arr.Len()) } } } func assertEqual[T arrow.NumericType](t *testing.T, mem memory.Allocator, arr arrow.Array, data []T) { exp := exec.ArrayFromSlice(mem, data) defer exp.Release() assert.Truef(t, array.Equal(exp, arr), "expected: %s\ngot: %s", exp, arr) } func TestRechunkArraysConsistentlyPlain(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) a1 := exec.ArrayFromSlice(mem, []int16{1, 2, 3}) defer a1.Release() a2 := exec.ArrayFromSlice(mem, []int16{4, 5}) defer a2.Release() a3 := exec.ArrayFromSlice(mem, []int16{6, 7, 8, 9}) defer a3.Release() b1 := exec.ArrayFromSlice(mem, []int32{41, 42}) defer b1.Release() b2 := exec.ArrayFromSlice(mem, []int32{43, 44, 45}) defer b2.Release() b3 := exec.ArrayFromSlice(mem, []int32{46, 47}) defer b3.Release() b4 := exec.ArrayFromSlice(mem, []int32{48, 49}) defer b4.Release() groups := [][]arrow.Array{{a1, a2, a3}, {b1, b2, b3, b4}} rechunked := exec.RechunkArraysConsistently(groups) assert.Len(t, rechunked, 2) ra := rechunked[0] rb := rechunked[1] assert.Len(t, ra, 5) assertEqual(t, mem, ra[0], []int16{1, 2}) ra[0].Release() assertEqual(t, mem, ra[1], []int16{3}) ra[1].Release() assertEqual(t, mem, ra[2], []int16{4, 5}) ra[2].Release() assertEqual(t, mem, ra[3], []int16{6, 7}) ra[3].Release() assertEqual(t, mem, ra[4], []int16{8, 9}) ra[4].Release() assert.Len(t, rb, 5) assertEqual(t, mem, rb[0], []int32{41, 42}) rb[0].Release() assertEqual(t, mem, rb[1], []int32{43}) rb[1].Release() assertEqual(t, mem, rb[2], []int32{44, 45}) rb[2].Release() assertEqual(t, mem, rb[3], []int32{46, 47}) rb[3].Release() assertEqual(t, mem, rb[4], []int32{48, 49}) rb[4].Release() } arrow-go-18.2.0/arrow/compute/exec_internals_test.go000066400000000000000000000427451476434502500225640ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.18 package compute import ( "bytes" "context" "fmt" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/compute/exec" "github.com/apache/arrow-go/v18/arrow/internal/testing/gen" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/arrow/scalar" "github.com/stretchr/testify/suite" ) type ComputeInternalsTestSuite struct { suite.Suite mem *memory.CheckedAllocator execCtx ExecCtx ctx *exec.KernelCtx rng gen.RandomArrayGenerator } func (c *ComputeInternalsTestSuite) SetupTest() { c.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) c.rng = gen.NewRandomArrayGenerator(0, c.mem) c.resetCtx() } func (c *ComputeInternalsTestSuite) TearDownTest() { c.mem.AssertSize(c.T(), 0) } func (c *ComputeInternalsTestSuite) assertArrayEqual(expected, got arrow.Array) { c.Truef(array.Equal(expected, got), "expected: %s\ngot: %s", expected, got) } func (c *ComputeInternalsTestSuite) assertDatumEqual(expected arrow.Array, got Datum) { arr := got.(*ArrayDatum).MakeArray() defer arr.Release() c.Truef(array.Equal(expected, arr), "expected: %s\ngot: %s", expected, arr) } func (c *ComputeInternalsTestSuite) resetCtx() { c.execCtx = ExecCtx{Registry: GetFunctionRegistry(), ChunkSize: DefaultMaxChunkSize, PreallocContiguous: true} c.ctx = &exec.KernelCtx{Ctx: SetExecCtx(context.Background(), c.execCtx)} } func (c *ComputeInternalsTestSuite) getBoolArr(sz int64, trueprob, nullprob float64) arrow.Array { return c.rng.Boolean(sz, trueprob, nullprob) } func (c *ComputeInternalsTestSuite) getUint8Arr(sz int64, nullprob float64) arrow.Array { return c.rng.Uint8(sz, 0, 100, nullprob) } func (c *ComputeInternalsTestSuite) getInt32Arr(sz int64, nullprob float64) arrow.Array { return c.rng.Int32(sz, 0, 1000, nullprob) } func (c *ComputeInternalsTestSuite) getFloat64Arr(sz int64, nullprob float64) arrow.Array { return c.rng.Float64(sz, 0, 1000, nullprob) } func (c *ComputeInternalsTestSuite) getInt32Chunked(szs []int64) *arrow.Chunked { chunks := make([]arrow.Array, 0) for i, s := range szs { chunks = append(chunks, c.getInt32Arr(s, 0.1)) defer chunks[i].Release() } return arrow.NewChunked(arrow.PrimitiveTypes.Int32, chunks) } func (c *ComputeInternalsTestSuite) assertValidityZeroExtraBits(data []byte, length, offset int) { bitExtent := ((offset + length + 7) / 8) * 8 for i := offset + length; i < bitExtent; i++ { c.False(bitutil.BitIsSet(data, i)) } } type PropagateNullsSuite struct { ComputeInternalsTestSuite } func (p *PropagateNullsSuite) TestUnknownNullCountWithNullsZeroCopies() { const length int = 16 bitmap := [8]byte{254, 0, 0, 0, 0, 0, 0, 0} nulls := memory.NewBufferBytes(bitmap[:]) output := array.NewData(arrow.FixedWidthTypes.Boolean, length, []*memory.Buffer{nil, nil}, nil, 0, 0) input := array.NewData(arrow.FixedWidthTypes.Boolean, length, []*memory.Buffer{nulls, nil}, nil, array.UnknownNullCount, 0) var outSpan exec.ArraySpan outSpan.SetMembers(output) batch := ExecBatch{Values: []Datum{NewDatum(input)}, Len: int64(length)} p.NoError(propagateNulls(p.ctx, ExecSpanFromBatch(&batch), &outSpan)) p.Same(nulls, outSpan.Buffers[0].Owner) p.EqualValues(array.UnknownNullCount, outSpan.Nulls) p.Equal(9, int(outSpan.Len)-bitutil.CountSetBits(outSpan.Buffers[0].Buf, int(outSpan.Offset), int(outSpan.Len))) } func (p *PropagateNullsSuite) TestUnknownNullCountWithoutNulls() { const length int = 16 bitmap := [8]byte{255, 255, 0, 0, 0, 0, 0, 0} nulls := memory.NewBufferBytes(bitmap[:]) output := array.NewData(arrow.FixedWidthTypes.Boolean, length, []*memory.Buffer{nil, nil}, nil, 0, 0) input := array.NewData(arrow.FixedWidthTypes.Boolean, length, []*memory.Buffer{nulls, nil}, nil, array.UnknownNullCount, 0) var outSpan exec.ArraySpan outSpan.SetMembers(output) batch := ExecBatch{Values: []Datum{NewDatum(input)}, Len: int64(length)} p.NoError(propagateNulls(p.ctx, ExecSpanFromBatch(&batch), &outSpan)) p.EqualValues(-1, outSpan.Nulls) p.Same(nulls, outSpan.Buffers[0].Owner) } func (p *PropagateNullsSuite) TestSetAllNulls() { const length int = 16 checkSetAll := func(vals []Datum, prealloc bool) { // fresh bitmap with all 1s bitmapData := [2]byte{255, 255} preallocatedMem := memory.NewBufferBytes(bitmapData[:]) output := &exec.ArraySpan{ Type: arrow.FixedWidthTypes.Boolean, Len: int64(length), Nulls: array.UnknownNullCount, } if prealloc { output.Buffers[0].SetBuffer(preallocatedMem) } batch := &ExecBatch{Values: vals, Len: int64(length)} p.NoError(propagateNulls(p.ctx, ExecSpanFromBatch(batch), output)) if prealloc { // ensure that the buffer object is the same when we pass preallocated // memory to it p.Same(preallocatedMem, output.Buffers[0].Owner) } else { defer output.Buffers[0].Owner.Release() } p.NotNil(output.Buffers[0].Buf) expected := [2]byte{0, 0} p.True(bytes.Equal(expected[:], output.Buffers[0].Buf)) } var vals []Datum const trueProb float64 = 0.5 p.Run("Null Scalar", func() { i32Val := scalar.MakeScalar(int32(3)) vals = []Datum{NewDatum(i32Val), NewDatum(scalar.MakeNullScalar(arrow.FixedWidthTypes.Boolean))} checkSetAll(vals, true) checkSetAll(vals, false) arr := p.getBoolArr(int64(length), trueProb, 0) defer arr.Release() vals[0] = NewDatum(arr) defer vals[0].Release() checkSetAll(vals, true) checkSetAll(vals, false) }) p.Run("one all null", func() { arrAllNulls := p.getBoolArr(int64(length), trueProb, 1) defer arrAllNulls.Release() arrHalf := p.getBoolArr(int64(length), trueProb, 0.5) defer arrHalf.Release() vals = []Datum{NewDatum(arrHalf), NewDatum(arrAllNulls)} defer vals[0].Release() defer vals[1].Release() checkSetAll(vals, true) checkSetAll(vals, false) }) p.Run("one value is NullType", func() { nullarr := array.NewNull(length) arr := p.getBoolArr(int64(length), trueProb, 0) defer arr.Release() vals = []Datum{NewDatum(arr), NewDatum(nullarr)} defer vals[0].Release() checkSetAll(vals, true) checkSetAll(vals, false) }) p.Run("Other scenarios", func() { // an all-null bitmap is zero-copied over, even though // there is a null-scalar earlier in the batch outSpan := &exec.ArraySpan{ Type: arrow.FixedWidthTypes.Boolean, Len: int64(length), } arrAllNulls := p.getBoolArr(int64(length), trueProb, 1) defer arrAllNulls.Release() batch := &ExecBatch{ Values: []Datum{ NewDatum(scalar.MakeNullScalar(arrow.FixedWidthTypes.Boolean)), NewDatum(arrAllNulls), }, Len: int64(length), } defer batch.Values[1].Release() p.NoError(propagateNulls(p.ctx, ExecSpanFromBatch(batch), outSpan)) p.Same(arrAllNulls.Data().Buffers()[0], outSpan.Buffers[0].Owner) outSpan.Buffers[0].Owner.Release() }) } func (p *PropagateNullsSuite) TestSingleValueWithNulls() { const length int64 = 100 arr := p.getBoolArr(length, 0.5, 0.5) defer arr.Release() checkSliced := func(offset int64, prealloc bool, outOffset int64) { // unaligned bitmap, zero copy not possible sliced := array.NewSlice(arr, offset, int64(arr.Len())) defer sliced.Release() vals := []Datum{NewDatum(sliced)} defer vals[0].Release() output := &exec.ArraySpan{ Type: arrow.FixedWidthTypes.Boolean, Len: vals[0].Len(), Offset: outOffset, } batch := &ExecBatch{Values: vals, Len: vals[0].Len()} var preallocatedBitmap *memory.Buffer if prealloc { preallocatedBitmap = memory.NewResizableBuffer(p.mem) preallocatedBitmap.Resize(int(bitutil.BytesForBits(int64(sliced.Len()) + outOffset))) defer preallocatedBitmap.Release() output.Buffers[0].SetBuffer(preallocatedBitmap) output.Buffers[0].SelfAlloc = true } else { p.EqualValues(0, output.Offset) } p.NoError(propagateNulls(p.ctx, ExecSpanFromBatch(batch), output)) if !prealloc { parentBuf := arr.Data().Buffers()[0] if offset == 0 { // validity bitmap same, no slice p.Same(parentBuf, output.Buffers[0].Owner) } else if offset%8 == 0 { // validity bitmap sliced p.NotSame(parentBuf, output.Buffers[0].Owner) p.Same(parentBuf, output.Buffers[0].Owner.Parent()) defer output.Buffers[0].Owner.Release() } else { // new memory for offset not 0 mod 8 p.NotSame(parentBuf, output.Buffers[0].Owner) p.Nil(output.Buffers[0].Owner.Parent()) defer output.Buffers[0].Owner.Release() } } else { // preallocated, so check that the validity bitmap is unbothered p.Same(preallocatedBitmap, output.Buffers[0].Owner) } p.EqualValues(sliced.NullN(), output.UpdateNullCount()) p.True(bitutil.BitmapEquals( sliced.NullBitmapBytes(), output.Buffers[0].Buf, int64(sliced.Data().Offset()), output.Offset, output.Len)) p.assertValidityZeroExtraBits(output.Buffers[0].Buf, int(output.Len), int(output.Offset)) } tests := []struct { offset, outoffset int64 prealloc bool }{ {8, 0, false}, {7, 0, false}, {8, 0, true}, {7, 0, true}, {8, 4, true}, {7, 4, true}, } for _, tt := range tests { name := fmt.Sprintf("off=%d,prealloc=%t,outoff=%d", tt.offset, tt.prealloc, tt.outoffset) p.Run(name, func() { checkSliced(tt.offset, tt.prealloc, tt.outoffset) }) } } func (p *PropagateNullsSuite) TestIntersectsNulls() { const length = 16 var ( // 0b01111111 0b11001111 bitmap1 = [8]byte{127, 207, 0, 0, 0, 0, 0, 0} // 0b11111110 0b01111111 bitmap2 = [8]byte{254, 127, 0, 0, 0, 0, 0, 0} // 0b11101111 0b11111110 bitmap3 = [8]byte{239, 254, 0, 0, 0, 0, 0, 0} ) arr1 := array.NewData(arrow.FixedWidthTypes.Boolean, length, []*memory.Buffer{memory.NewBufferBytes(bitmap1[:]), nil}, nil, array.UnknownNullCount, 0) arr2 := array.NewData(arrow.FixedWidthTypes.Boolean, length, []*memory.Buffer{memory.NewBufferBytes(bitmap2[:]), nil}, nil, array.UnknownNullCount, 0) arr3 := array.NewData(arrow.FixedWidthTypes.Boolean, length, []*memory.Buffer{memory.NewBufferBytes(bitmap3[:]), nil}, nil, array.UnknownNullCount, 0) checkCase := func(vals []Datum, exNullCount int, exBitmap []byte, prealloc bool, outoffset int) { batch := &ExecBatch{Values: vals, Len: length} output := &exec.ArraySpan{Type: arrow.FixedWidthTypes.Boolean, Len: length} var nulls *memory.Buffer if prealloc { // make the buffer one byte bigger so we can have non-zero offsets nulls = memory.NewResizableBuffer(p.mem) nulls.Resize(3) defer nulls.Release() output.Buffers[0].SetBuffer(nulls) output.Buffers[0].SelfAlloc = true } else { // non-zero output offset not permitted unless output memory is preallocated p.Equal(0, outoffset) } output.Offset = int64(outoffset) p.NoError(propagateNulls(p.ctx, ExecSpanFromBatch(batch), output)) // preallocated memory used if prealloc { p.Same(nulls, output.Buffers[0].Owner) } else { defer output.Buffers[0].Owner.Release() } p.EqualValues(array.UnknownNullCount, output.Nulls) p.EqualValues(exNullCount, output.UpdateNullCount()) p.True(bitutil.BitmapEquals(exBitmap, output.Buffers[0].Buf, 0, output.Offset, length)) p.assertValidityZeroExtraBits(output.Buffers[0].Buf, int(output.Len), int(output.Offset)) } p.Run("0b01101110 0b01001110", func() { // 0b01101110 0b01001110 expected := [2]byte{110, 78} checkCase([]Datum{NewDatum(arr1), NewDatum(arr2), NewDatum(arr3)}, 7, expected[:], false, 0) checkCase([]Datum{NewDatum(arr1), NewDatum(arr2), NewDatum(arr3)}, 7, expected[:], true, 0) checkCase([]Datum{NewDatum(arr1), NewDatum(arr2), NewDatum(arr3)}, 7, expected[:], true, 4) }) p.Run("0b01111110 0b01001111", func() { expected := [2]byte{126, 79} checkCase([]Datum{NewDatum(arr1), NewDatum(arr2)}, 5, expected[:], false, 0) checkCase([]Datum{NewDatum(arr1), NewDatum(arr2)}, 5, expected[:], true, 4) }) } func TestComputeInternals(t *testing.T) { suite.Run(t, new(PropagateNullsSuite)) } type ExecSpanItrSuite struct { ComputeInternalsTestSuite iter spanIterator } func (e *ExecSpanItrSuite) setupIterator(batch *ExecBatch, maxChunk int64) { var err error _, e.iter, err = iterateExecSpans(batch, maxChunk, true) e.NoError(err) } func (e *ExecSpanItrSuite) checkIteration(input *ExecBatch, chunksize int, exBatchSizes []int) { e.setupIterator(input, int64(chunksize)) var ( batch exec.ExecSpan curPos int64 pos int64 next bool ) for _, sz := range exBatchSizes { batch, pos, next = e.iter() e.True(next) e.EqualValues(sz, batch.Len) for j, val := range input.Values { switch val := val.(type) { case *ScalarDatum: e.Truef(scalar.Equals(batch.Values[j].Scalar, val.Value), "expected: %s\ngot: %s", val.Value, batch.Values[j].Scalar) case *ArrayDatum: arr := val.MakeArray() sl := array.NewSlice(arr, curPos, curPos+batch.Len) got := batch.Values[j].Array.MakeArray() e.Truef(array.Equal(sl, got), "expected: %s\ngot: %s", sl, got) got.Release() arr.Release() sl.Release() case *ChunkedDatum: carr := val.Value if batch.Len == 0 { e.Zero(carr.Len()) } else { chkd := array.NewChunkedSlice(carr, curPos, curPos+batch.Len) defer chkd.Release() e.Len(chkd.Chunks(), 1) got := batch.Values[j].Array.MakeArray() defer got.Release() e.Truef(array.Equal(got, chkd.Chunk(0)), "expected: %s\ngot: %s", chkd.Chunk(0), got) } } } curPos += int64(sz) e.EqualValues(curPos, pos) } batch, pos, next = e.iter() e.Zero(batch) e.False(next) e.EqualValues(input.Len, pos) } func (e *ExecSpanItrSuite) TestBasics() { const length = 100 arr1 := e.getInt32Arr(length, 0.1) defer arr1.Release() arr2 := e.getFloat64Arr(length, 0.1) defer arr2.Release() input := &ExecBatch{ Len: length, Values: []Datum{NewDatum(arr1), NewDatum(arr2), NewDatum(int32(3))}, } defer func() { for _, v := range input.Values { v.Release() } }() e.Run("simple", func() { e.setupIterator(input, DefaultMaxChunkSize) batch, pos, next := e.iter() e.True(next) e.Len(batch.Values, 3) e.EqualValues(length, batch.Len) e.EqualValues(length, pos) in1 := input.Values[0].(*ArrayDatum).MakeArray() defer in1.Release() in2 := input.Values[1].(*ArrayDatum).MakeArray() defer in2.Release() out1 := batch.Values[0].Array.MakeArray() defer out1.Release() out2 := batch.Values[1].Array.MakeArray() defer out2.Release() e.Truef(array.Equal(in1, out1), "expected: %s\ngot: %s", in1, out1) e.Truef(array.Equal(in2, out2), "expected: %s\ngot: %s", in2, out2) e.True(scalar.Equals(input.Values[2].(*ScalarDatum).Value, batch.Values[2].Scalar), input.Values[2].(*ScalarDatum).Value, batch.Values[2].Scalar) _, pos, next = e.iter() e.EqualValues(length, pos) e.False(next) }) e.Run("iterations", func() { e.checkIteration(input, 16, []int{16, 16, 16, 16, 16, 16, 4}) }) } func (e *ExecSpanItrSuite) TestInputValidation() { arr1 := e.getInt32Arr(10, 0.1) defer arr1.Release() arr2 := e.getInt32Arr(9, 0.1) defer arr2.Release() // length mismatch batch := &ExecBatch{ Values: []Datum{&ArrayDatum{arr1.Data()}, &ArrayDatum{arr2.Data()}}, Len: 10, } _, _, err := iterateExecSpans(batch, DefaultMaxChunkSize, true) e.ErrorIs(err, arrow.ErrInvalid) // swap order of input batch.Values = []Datum{&ArrayDatum{arr2.Data()}, &ArrayDatum{arr1.Data()}} _, _, err = iterateExecSpans(batch, DefaultMaxChunkSize, true) e.ErrorIs(err, arrow.ErrInvalid) batch.Values = []Datum{&ArrayDatum{arr1.Data()}} _, _, err = iterateExecSpans(batch, DefaultMaxChunkSize, true) e.NoError(err) } func (e *ExecSpanItrSuite) TestChunkedArrays() { arr1 := e.getInt32Chunked([]int64{0, 20, 10}) defer arr1.Release() arr2 := e.getInt32Chunked([]int64{15, 15}) defer arr2.Release() arr3 := e.getInt32Arr(30, 0.1) defer arr3.Release() batch := &ExecBatch{ Values: []Datum{ &ChunkedDatum{arr1}, &ChunkedDatum{arr2}, &ArrayDatum{arr3.Data()}, NewDatum(int32(5)), NewDatum(scalar.MakeNullScalar(arrow.FixedWidthTypes.Boolean))}, Len: 30, } e.checkIteration(batch, 10, []int{10, 5, 5, 10}) e.checkIteration(batch, 20, []int{15, 5, 10}) e.checkIteration(batch, 30, []int{15, 5, 10}) } func (e *ExecSpanItrSuite) TestZeroLengthInput() { carr := arrow.NewChunked(arrow.PrimitiveTypes.Int32, []arrow.Array{}) checkArgs := func(batch *ExecBatch) { _, itr, err := iterateExecSpans(batch, DefaultMaxChunkSize, true) e.NoError(err) itrSpan, _, next := itr() e.False(next) e.Zero(itrSpan) } input := &ExecBatch{Len: 0} // zero-length chunkedarray with zero chunks input.Values = []Datum{&ChunkedDatum{carr}} checkArgs(input) // zero-length array arr := e.getInt32Arr(0, 0.1) defer arr.Release() input.Values = []Datum{&ArrayDatum{arr.Data()}} checkArgs(input) // chunkedarray with single empty chunk carr = e.getInt32Chunked([]int64{0}) input.Values = []Datum{&ChunkedDatum{carr}} checkArgs(input) } func TestExecSpanIterator(t *testing.T) { suite.Run(t, new(ExecSpanItrSuite)) } arrow-go-18.2.0/arrow/compute/exec_test.go000066400000000000000000000313671476434502500205030ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.18 package compute import ( "strings" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/compute/exec" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/scalar" "github.com/stretchr/testify/suite" ) func ExecCopyArray(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { debug.Assert(len(batch.Values) == 1, "wrong number of values") valueSize := int64(batch.Values[0].Type().(arrow.FixedWidthDataType).BitWidth() / 8) arg0 := batch.Values[0].Array dst := out.Buffers[1].Buf[out.Offset*valueSize:] src := arg0.Buffers[1].Buf[arg0.Offset*valueSize:] copy(dst, src[:batch.Len*valueSize]) return nil } func ExecComputedBitmap(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { // propagate nulls not used. check that out bitmap isn't the same already // as the input bitmap arg0 := batch.Values[0].Array if bitutil.CountSetBits(arg0.Buffers[1].Buf, int(arg0.Offset), int(batch.Len)) > 0 { // check that the bitmap hasn't already been copied debug.Assert(!bitutil.BitmapEquals(arg0.Buffers[0].Buf, out.Buffers[0].Buf, arg0.Offset, out.Offset, batch.Len), "bitmap should not have already been copied") } bitutil.CopyBitmap(arg0.Buffers[0].Buf, int(arg0.Offset), int(batch.Len), out.Buffers[0].Buf, int(out.Offset)) return ExecCopyArray(ctx, batch, out) } func ExecNoPreallocatedData(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { // validity preallocated, not data debug.Assert(out.Offset == 0, "invalid offset for non-prealloc") valueSize := int64(batch.Values[0].Type().(arrow.FixedWidthDataType).BitWidth() / 8) out.Buffers[1].SetBuffer(ctx.Allocate(int(out.Len * valueSize))) out.Buffers[1].SelfAlloc = true return ExecCopyArray(ctx, batch, out) } func ExecNoPreallocatedAnything(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { // neither validity nor data preallocated debug.Assert(out.Offset == 0, "invalid offset for non-prealloc") out.Buffers[0].SetBuffer(ctx.AllocateBitmap(out.Len)) out.Buffers[0].SelfAlloc = true arg0 := batch.Values[0].Array bitutil.CopyBitmap(arg0.Buffers[0].Buf, int(arg0.Offset), int(batch.Len), out.Buffers[0].Buf, 0) // reuse kernel that allocates data return ExecNoPreallocatedData(ctx, batch, out) } type ExampleOptions struct { Value scalar.Scalar } func (e *ExampleOptions) TypeName() string { return "example" } type ExampleState struct { Value scalar.Scalar } func InitStateful(_ *exec.KernelCtx, args exec.KernelInitArgs) (exec.KernelState, error) { value := args.Options.(*ExampleOptions).Value return &ExampleState{Value: value}, nil } func ExecStateful(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { state := ctx.State.(*ExampleState) multiplier := state.Value.(*scalar.Int32).Value arg0 := batch.Values[0].Array arg0Data := exec.GetSpanValues[int32](&arg0, 1) dst := exec.GetSpanValues[int32](out, 1) for i, v := range arg0Data { dst[i] = v * multiplier } return nil } func ExecAddInt32(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { left := exec.GetSpanValues[int32](&batch.Values[0].Array, 1) right := exec.GetSpanValues[int32](&batch.Values[1].Array, 1) outValues := exec.GetSpanValues[int32](out, 1) for i := 0; i < int(batch.Len); i++ { outValues[i] = left[i] + right[i] } return nil } type CallScalarFuncSuite struct { ComputeInternalsTestSuite } func (c *CallScalarFuncSuite) addCopyFuncs() { registry = GetFunctionRegistry() fn := NewScalarFunction("test_copy", Unary(), EmptyFuncDoc) types := []arrow.DataType{arrow.PrimitiveTypes.Uint8, arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Float64} for _, t := range types { c.NoError(fn.AddNewKernel([]exec.InputType{exec.NewExactInput(t)}, exec.NewOutputType(t), ExecCopyArray, nil)) } c.True(registry.AddFunction(fn, false)) // a version which doesn't want the executor to call propagatenulls fn2 := NewScalarFunction("test_copy_computed_bitmap", Unary(), EmptyFuncDoc) kernel := exec.NewScalarKernel([]exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Uint8)}, exec.NewOutputType(arrow.PrimitiveTypes.Uint8), ExecComputedBitmap, nil) kernel.NullHandling = exec.NullComputedPrealloc c.NoError(fn2.AddKernel(kernel)) c.True(registry.AddFunction(fn2, false)) } func (c *CallScalarFuncSuite) addNoPreallocFuncs() { registry = GetFunctionRegistry() // a function that allocates its own output memory. we have cases // for both non-preallocated data and non-preallocated bitmap f1 := NewScalarFunction("test_nopre_data", Unary(), EmptyFuncDoc) f2 := NewScalarFunction("test_nopre_validity_or_data", Unary(), EmptyFuncDoc) kernel := exec.NewScalarKernel( []exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Uint8)}, exec.NewOutputType(arrow.PrimitiveTypes.Uint8), ExecNoPreallocatedData, nil) kernel.MemAlloc = exec.MemNoPrealloc c.NoError(f1.AddKernel(kernel)) kernel.ExecFn = ExecNoPreallocatedAnything kernel.NullHandling = exec.NullComputedNoPrealloc c.NoError(f2.AddKernel(kernel)) c.True(registry.AddFunction(f1, false)) c.True(registry.AddFunction(f2, false)) } func (c *CallScalarFuncSuite) addStatefulFunc() { registry := GetFunctionRegistry() // this functions behavior depends on a static parameter that // is made available to the execution through its options object fn := NewScalarFunction("test_stateful", Unary(), EmptyFuncDoc) c.NoError(fn.AddNewKernel([]exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Int32)}, exec.NewOutputType(arrow.PrimitiveTypes.Int32), ExecStateful, InitStateful)) c.True(registry.AddFunction(fn, false)) } func (c *CallScalarFuncSuite) addScalarFunc() { registry := GetFunctionRegistry() fn := NewScalarFunction("test_scalar_add_int32", Binary(), EmptyFuncDoc) c.NoError(fn.AddNewKernel([]exec.InputType{ exec.NewExactInput(arrow.PrimitiveTypes.Int32), exec.NewExactInput(arrow.PrimitiveTypes.Int32)}, exec.NewOutputType(arrow.PrimitiveTypes.Int32), ExecAddInt32, nil)) c.True(registry.AddFunction(fn, false)) } func (c *CallScalarFuncSuite) SetupSuite() { c.addCopyFuncs() c.addNoPreallocFuncs() c.addStatefulFunc() c.addScalarFunc() } func (c *CallScalarFuncSuite) TestArgumentValidation() { // copy accepts only a single array arg arr := c.getInt32Arr(10, 0.1) defer arr.Release() d1 := &ArrayDatum{Value: arr.Data()} c.Run("too many args", func() { args := []Datum{d1, d1} _, err := CallFunction(c.ctx.Ctx, "test_copy", nil, args...) c.ErrorIs(err, arrow.ErrInvalid) }) c.Run("too few args", func() { _, err := CallFunction(c.ctx.Ctx, "test_copy", nil) c.ErrorIs(err, arrow.ErrInvalid) }) d1Scalar := NewDatum(int32(5)) result, err := CallFunction(c.ctx.Ctx, "test_copy", nil, d1) c.NoError(err) result.Release() result, err = CallFunction(c.ctx.Ctx, "test_copy", nil, d1Scalar) c.NoError(err) result.Release() } func (c *CallScalarFuncSuite) TestPreallocationCases() { nullProb := float64(0.2) arr := c.getUint8Arr(100, nullProb) defer arr.Release() funcNames := []string{"test_copy", "test_copy_computed_bitmap"} for _, funcName := range funcNames { c.Run(funcName, func() { c.resetCtx() c.Run("single output default", func() { result, err := CallFunction(c.ctx.Ctx, funcName, nil, &ArrayDatum{arr.Data()}) c.NoError(err) defer result.Release() c.Equal(KindArray, result.Kind()) c.assertDatumEqual(arr, result) }) c.Run("exec chunks", func() { // set the exec_chunksize to be smaller so now we have // several invocations of the kernel, // but still only one output array c.execCtx.ChunkSize = 80 result, err := CallFunction(SetExecCtx(c.ctx.Ctx, c.execCtx), funcName, nil, &ArrayDatum{arr.Data()}) c.NoError(err) defer result.Release() c.Equal(KindArray, result.Kind()) c.assertDatumEqual(arr, result) }) c.Run("not multiple 8 chunk", func() { // chunksize is not a multiple of 8 c.execCtx.ChunkSize = 11 result, err := CallFunction(SetExecCtx(c.ctx.Ctx, c.execCtx), funcName, nil, &ArrayDatum{arr.Data()}) c.NoError(err) defer result.Release() c.Equal(KindArray, result.Kind()) c.assertDatumEqual(arr, result) }) c.Run("chunked", func() { // input is chunked, output is one big chunk chk1, chk2 := array.NewSlice(arr, 0, 10), array.NewSlice(arr, 10, int64(arr.Len())) defer chk1.Release() defer chk2.Release() carr := arrow.NewChunked(arr.DataType(), []arrow.Array{chk1, chk2}) defer carr.Release() result, err := CallFunction(SetExecCtx(c.ctx.Ctx, c.execCtx), funcName, nil, &ChunkedDatum{carr}) c.NoError(err) defer result.Release() c.Equal(KindChunked, result.Kind()) actual := result.(*ChunkedDatum).Value c.Len(actual.Chunks(), 1) c.Truef(array.ChunkedEqual(actual, carr), "expected: %s\ngot: %s", carr, actual) }) c.Run("independent", func() { // preallocate independently for each batch c.execCtx.PreallocContiguous = false c.execCtx.ChunkSize = 40 result, err := CallFunction(SetExecCtx(c.ctx.Ctx, c.execCtx), funcName, nil, &ArrayDatum{arr.Data()}) c.NoError(err) defer result.Release() c.Equal(KindChunked, result.Kind()) carr := result.(*ChunkedDatum).Value c.Len(carr.Chunks(), 3) sl := array.NewSlice(arr, 0, 40) defer sl.Release() c.assertArrayEqual(sl, carr.Chunk(0)) sl = array.NewSlice(arr, 40, 80) defer sl.Release() c.assertArrayEqual(sl, carr.Chunk(1)) sl = array.NewSlice(arr, 80, int64(arr.Len())) defer sl.Release() c.assertArrayEqual(sl, carr.Chunk(2)) }) }) } } func (c *CallScalarFuncSuite) TestBasicNonStandardCases() { // test some more cases // // * validity bitmap computed by kernel rather than propagate nulls // * data not pre-allocated // * validity bitmap not pre-allocated nullProb := float64(0.2) arr := c.getUint8Arr(1000, nullProb) defer arr.Release() args := []Datum{&ArrayDatum{arr.Data()}} for _, funcName := range []string{"test_nopre_data", "test_nopre_validity_or_data"} { c.Run("funcName", func() { c.resetCtx() c.Run("single output default", func() { result, err := CallFunction(c.ctx.Ctx, funcName, nil, args...) c.NoError(err) defer result.Release() c.Equal(KindArray, result.Kind()) c.assertDatumEqual(arr, result) }) c.Run("split into 3 chunks", func() { c.execCtx.ChunkSize = 400 result, err := CallFunction(SetExecCtx(c.ctx.Ctx, c.execCtx), funcName, nil, args...) c.NoError(err) defer result.Release() c.Equal(KindChunked, result.Kind()) carr := result.(*ChunkedDatum).Value c.Len(carr.Chunks(), 3) sl := array.NewSlice(arr, 0, 400) defer sl.Release() c.assertArrayEqual(sl, carr.Chunk(0)) sl = array.NewSlice(arr, 400, 800) defer sl.Release() c.assertArrayEqual(sl, carr.Chunk(1)) sl = array.NewSlice(arr, 800, int64(arr.Len())) defer sl.Release() c.assertArrayEqual(sl, carr.Chunk(2)) }) }) } } func (c *CallScalarFuncSuite) TestStatefulKernel() { input, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[1, 2, 3, null, 5]`)) defer input.Release() multiplier := scalar.MakeScalar(int32(2)) expected, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[2, 4, 6, null, 10]`)) defer expected.Release() options := &ExampleOptions{multiplier} result, err := CallFunction(c.ctx.Ctx, "test_stateful", options, &ArrayDatum{input.Data()}) c.NoError(err) defer result.Release() c.assertDatumEqual(expected, result) } func (c *CallScalarFuncSuite) TestScalarFunction() { args := []Datum{NewDatum(int32(5)), NewDatum(int32(7))} result, err := CallFunction(c.ctx.Ctx, "test_scalar_add_int32", nil, args...) c.NoError(err) defer result.Release() c.Equal(KindScalar, result.Kind()) expected := scalar.MakeScalar(int32(12)) c.True(scalar.Equals(expected, result.(*ScalarDatum).Value)) } func TestCallScalarFunctions(t *testing.T) { suite.Run(t, new(CallScalarFuncSuite)) } arrow-go-18.2.0/arrow/compute/executor.go000066400000000000000000000757621476434502500203650ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.18 package compute import ( "context" "fmt" "math" "runtime" "sync" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/compute/exec" "github.com/apache/arrow-go/v18/arrow/internal" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/arrow/scalar" ) // ExecCtx holds simple contextual information for execution // such as the default ChunkSize for batch iteration, whether or not // to ensure contiguous preallocations for kernels that want preallocation, // and a reference to the desired function registry to use. // // An ExecCtx should be placed into a context.Context by using // SetExecCtx and GetExecCtx to pass it along for execution. type ExecCtx struct { // ChunkSize is the size used when iterating batches for execution // ChunkSize elements will be operated on as a time unless an argument // is a chunkedarray with a chunk that is smaller ChunkSize int64 // PreallocContiguous determines whether preallocating memory for // execution of compute attempts to preallocate a full contiguous // buffer for all of the chunks beforehand. PreallocContiguous bool // Registry allows specifying the Function Registry to utilize // when searching for kernel implementations. Registry FunctionRegistry // ExecChannelSize is the size of the channel used for passing // exec results to the WrapResults function. ExecChannelSize int // NumParallel determines the number of parallel goroutines // allowed for parallel executions. NumParallel int } type ctxExecKey struct{} const DefaultMaxChunkSize = math.MaxInt64 var ( // global default ExecCtx object, initialized with the // default max chunk size, contiguous preallocations, and // the default function registry. defaultExecCtx ExecCtx // WithAllocator returns a new context with the provided allocator // embedded into the context. WithAllocator = exec.WithAllocator // GetAllocator retrieves the allocator from the context, or returns // memory.DefaultAllocator if there was no allocator in the provided // context. GetAllocator = exec.GetAllocator ) // DefaultExecCtx returns the default exec context which will be used // if there is no ExecCtx set into the context for execution. // // This can be called to get a copy of the default values which can // then be modified to set into a context. // // The default exec context uses the following values: // - ChunkSize = DefaultMaxChunkSize (MaxInt64) // - PreallocContiguous = true // - Registry = GetFunctionRegistry() // - ExecChannelSize = 10 // - NumParallel = runtime.NumCPU() func DefaultExecCtx() ExecCtx { return defaultExecCtx } func init() { defaultExecCtx.ChunkSize = DefaultMaxChunkSize defaultExecCtx.PreallocContiguous = true defaultExecCtx.Registry = GetFunctionRegistry() defaultExecCtx.ExecChannelSize = 10 // default level of parallelism // set to 1 to disable parallelization defaultExecCtx.NumParallel = runtime.NumCPU() } // SetExecCtx returns a new child context containing the passed in ExecCtx func SetExecCtx(ctx context.Context, e ExecCtx) context.Context { return context.WithValue(ctx, ctxExecKey{}, e) } // GetExecCtx returns an embedded ExecCtx from the provided context. // If it does not contain an ExecCtx, then the default one is returned. func GetExecCtx(ctx context.Context) ExecCtx { e, ok := ctx.Value(ctxExecKey{}).(ExecCtx) if ok { return e } return defaultExecCtx } // ExecBatch is a unit of work for kernel execution. It contains a collection // of Array and Scalar values. // // ExecBatch is semantically similar to a RecordBatch but for a SQL-style // execution context. It represents a collection or records, but constant // "columns" are represented by Scalar values rather than having to be // converted into arrays with repeated values. type ExecBatch struct { Values []Datum // Guarantee is a predicate Expression guaranteed to evaluate to true for // all rows in this batch. // Guarantee Expression // Len is the semantic length of this ExecBatch. When the values are // all scalars, the length should be set to 1 for non-aggregate kernels. // Otherwise the length is taken from the array values. Aggregate kernels // can have an ExecBatch formed by projecting just the partition columns // from a batch in which case it would have scalar rows with length > 1 // // If the array values are of length 0, then the length is 0 regardless of // whether any values are Scalar. Len int64 } func (e ExecBatch) NumValues() int { return len(e.Values) } // simple struct for defining how to preallocate a particular buffer. type bufferPrealloc struct { bitWidth int addLen int } func allocateDataBuffer(ctx *exec.KernelCtx, length, bitWidth int) *memory.Buffer { switch bitWidth { case 1: return ctx.AllocateBitmap(int64(length)) default: bufsiz := int(bitutil.BytesForBits(int64(length * bitWidth))) return ctx.Allocate(bufsiz) } } func addComputeDataPrealloc(dt arrow.DataType, widths []bufferPrealloc) []bufferPrealloc { if typ, ok := dt.(arrow.FixedWidthDataType); ok { return append(widths, bufferPrealloc{bitWidth: typ.BitWidth()}) } switch dt.ID() { case arrow.BINARY, arrow.STRING, arrow.LIST, arrow.MAP: return append(widths, bufferPrealloc{bitWidth: 32, addLen: 1}) case arrow.LARGE_BINARY, arrow.LARGE_STRING, arrow.LARGE_LIST: return append(widths, bufferPrealloc{bitWidth: 64, addLen: 1}) case arrow.STRING_VIEW, arrow.BINARY_VIEW: return append(widths, bufferPrealloc{bitWidth: arrow.ViewHeaderSizeBytes * 8}) } return widths } // enum to define a generalized assumption of the nulls in the inputs type nullGeneralization int8 const ( nullGenPerhapsNull nullGeneralization = iota nullGenAllValid nullGenAllNull ) func getNullGen(val *exec.ExecValue) nullGeneralization { dtID := val.Type().ID() switch { case dtID == arrow.NULL: return nullGenAllNull case !internal.DefaultHasValidityBitmap(dtID): return nullGenAllValid case val.IsScalar(): if val.Scalar.IsValid() { return nullGenAllValid } return nullGenAllNull default: arr := val.Array // do not count if they haven't been counted already if arr.Nulls == 0 || arr.Buffers[0].Buf == nil { return nullGenAllValid } if arr.Nulls == arr.Len { return nullGenAllNull } } return nullGenPerhapsNull } func getNullGenDatum(datum Datum) nullGeneralization { var val exec.ExecValue switch datum.Kind() { case KindArray: val.Array.SetMembers(datum.(*ArrayDatum).Value) case KindScalar: val.Scalar = datum.(*ScalarDatum).Value case KindChunked: return nullGenPerhapsNull default: debug.Assert(false, "should be array, scalar, or chunked!") return nullGenPerhapsNull } return getNullGen(&val) } // populate the validity bitmaps with the intersection of the nullity // of the arguments. If a preallocated bitmap is not provided, then one // will be allocated if needed (in some cases a bitmap can be zero-copied // from the arguments). If any Scalar value is null, then the entire // validity bitmap will be set to null. func propagateNulls(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ArraySpan) (err error) { if out.Type.ID() == arrow.NULL { // null output type is a no-op (rare but it happens) return } // this function is ONLY able to write into output with non-zero offset // when the bitmap is preallocated. if out.Offset != 0 && out.Buffers[0].Buf == nil { return fmt.Errorf("%w: can only propagate nulls into pre-allocated memory when output offset is non-zero", arrow.ErrInvalid) } var ( arrsWithNulls = make([]*exec.ArraySpan, 0, len(batch.Values)) isAllNull bool prealloc bool = out.Buffers[0].Buf != nil ) for i := range batch.Values { v := &batch.Values[i] nullGen := getNullGen(v) if nullGen == nullGenAllNull { isAllNull = true } if nullGen != nullGenAllValid && v.IsArray() { arrsWithNulls = append(arrsWithNulls, &v.Array) } } outBitmap := out.Buffers[0].Buf if isAllNull { // an all-null value gives us a short circuit opportunity // output should all be null out.Nulls = out.Len if prealloc { bitutil.SetBitsTo(outBitmap, out.Offset, out.Len, false) return } // walk all the values with nulls instead of breaking on the first // in case we find a bitmap that can be reused in the non-preallocated case for _, arr := range arrsWithNulls { if arr.Nulls == arr.Len && arr.Buffers[0].Owner != nil { buf := arr.GetBuffer(0) buf.Retain() out.Buffers[0].Buf = buf.Bytes() out.Buffers[0].Owner = buf return } } buf := ctx.AllocateBitmap(int64(out.Len)) out.Buffers[0].Owner = buf out.Buffers[0].Buf = buf.Bytes() out.Buffers[0].SelfAlloc = true bitutil.SetBitsTo(out.Buffers[0].Buf, out.Offset, out.Len, false) return } out.Nulls = array.UnknownNullCount switch len(arrsWithNulls) { case 0: out.Nulls = 0 if prealloc { bitutil.SetBitsTo(outBitmap, out.Offset, out.Len, true) } case 1: arr := arrsWithNulls[0] out.Nulls = arr.Nulls if prealloc { bitutil.CopyBitmap(arr.Buffers[0].Buf, int(arr.Offset), int(arr.Len), outBitmap, int(out.Offset)) return } switch { case arr.Offset == 0: out.Buffers[0] = arr.Buffers[0] out.Buffers[0].Owner.Retain() case arr.Offset%8 == 0: buf := memory.SliceBuffer(arr.GetBuffer(0), int(arr.Offset)/8, int(bitutil.BytesForBits(arr.Len))) out.Buffers[0].Buf = buf.Bytes() out.Buffers[0].Owner = buf default: buf := ctx.AllocateBitmap(int64(out.Len)) out.Buffers[0].Owner = buf out.Buffers[0].Buf = buf.Bytes() out.Buffers[0].SelfAlloc = true bitutil.CopyBitmap(arr.Buffers[0].Buf, int(arr.Offset), int(arr.Len), out.Buffers[0].Buf, 0) } return default: if !prealloc { buf := ctx.AllocateBitmap(int64(out.Len)) out.Buffers[0].Owner = buf out.Buffers[0].Buf = buf.Bytes() out.Buffers[0].SelfAlloc = true outBitmap = out.Buffers[0].Buf } acc := func(left, right *exec.ArraySpan) { debug.Assert(left.Buffers[0].Buf != nil, "invalid intersection for null propagation") debug.Assert(right.Buffers[0].Buf != nil, "invalid intersection for null propagation") bitutil.BitmapAnd(left.Buffers[0].Buf, right.Buffers[0].Buf, left.Offset, right.Offset, outBitmap, out.Offset, out.Len) } acc(arrsWithNulls[0], arrsWithNulls[1]) for _, arr := range arrsWithNulls[2:] { acc(out, arr) } } return } func inferBatchLength(values []Datum) (length int64, allSame bool) { length, allSame = -1, true areAllScalar := true for _, arg := range values { switch arg := arg.(type) { case *ArrayDatum: argLength := arg.Len() if length < 0 { length = argLength } else { if length != argLength { allSame = false return } } areAllScalar = false case *ChunkedDatum: argLength := arg.Len() if length < 0 { length = argLength } else { if length != argLength { allSame = false return } } areAllScalar = false } } if areAllScalar && len(values) > 0 { length = 1 } else if length < 0 { length = 0 } allSame = true return } // KernelExecutor is the interface for all executors to initialize and // call kernel execution functions on batches. type KernelExecutor interface { // Init must be called *after* the kernel's init method and any // KernelState must be set into the KernelCtx *before* calling // this Init method. This is to facilitate the case where // Init may be expensive and does not need to be called // again for each execution of the kernel. For example, // the same lookup table can be re-used for all scanned batches // in a dataset filter. Init(*exec.KernelCtx, exec.KernelInitArgs) error // Execute the kernel for the provided batch and pass the resulting // Datum values to the provided channel. Execute(context.Context, *ExecBatch, chan<- Datum) error // WrapResults exists for the case where an executor wants to post process // the batches of result datums. Such as creating a ChunkedArray from // multiple output batches or so on. Results from individual batch // executions should be read from the out channel, and WrapResults should // return the final Datum result. WrapResults(ctx context.Context, out <-chan Datum, chunkedArgs bool) Datum // CheckResultType checks the actual result type against the resolved // output type. If the types don't match an error is returned CheckResultType(out Datum) error // Clear resets the state in the executor so that it can be reused. Clear() } // the base implementation for executing non-aggregate kernels. type nonAggExecImpl struct { ctx *exec.KernelCtx ectx ExecCtx kernel exec.NonAggKernel outType arrow.DataType numOutBuf int dataPrealloc []bufferPrealloc preallocValidity bool } func (e *nonAggExecImpl) Clear() { e.ctx, e.kernel, e.outType = nil, nil, nil if e.dataPrealloc != nil { e.dataPrealloc = e.dataPrealloc[:0] } } func (e *nonAggExecImpl) Init(ctx *exec.KernelCtx, args exec.KernelInitArgs) (err error) { e.ctx, e.kernel = ctx, args.Kernel.(exec.NonAggKernel) e.outType, err = e.kernel.GetSig().OutType.Resolve(ctx, args.Inputs) e.ectx = GetExecCtx(ctx.Ctx) return } func (e *nonAggExecImpl) prepareOutput(length int) *exec.ExecResult { var nullCount int = array.UnknownNullCount if e.kernel.GetNullHandling() == exec.NullNoOutput { nullCount = 0 } output := &exec.ArraySpan{ Type: e.outType, Len: int64(length), Nulls: int64(nullCount), } if e.preallocValidity { buf := e.ctx.AllocateBitmap(int64(length)) output.Buffers[0].Owner = buf output.Buffers[0].Buf = buf.Bytes() output.Buffers[0].SelfAlloc = true } for i, pre := range e.dataPrealloc { if pre.bitWidth >= 0 { buf := allocateDataBuffer(e.ctx, length+pre.addLen, pre.bitWidth) output.Buffers[i+1].Owner = buf output.Buffers[i+1].Buf = buf.Bytes() output.Buffers[i+1].SelfAlloc = true } } return output } func (e *nonAggExecImpl) CheckResultType(out Datum) error { typ := out.(ArrayLikeDatum).Type() if typ != nil && !arrow.TypeEqual(e.outType, typ) { return fmt.Errorf("%w: kernel type result mismatch: declared as %s, actual is %s", arrow.ErrType, e.outType, typ) } return nil } type spanIterator func() (exec.ExecSpan, int64, bool) func NewScalarExecutor() KernelExecutor { return &scalarExecutor{} } type scalarExecutor struct { nonAggExecImpl elideValidityBitmap bool preallocAllBufs bool preallocContiguous bool allScalars bool iter spanIterator iterLen int64 } func (s *scalarExecutor) Execute(ctx context.Context, batch *ExecBatch, data chan<- Datum) (err error) { s.allScalars, s.iter, err = iterateExecSpans(batch, s.ectx.ChunkSize, true) if err != nil { return } s.iterLen = batch.Len if batch.Len == 0 { result := array.MakeArrayOfNull(exec.GetAllocator(s.ctx.Ctx), s.outType, 0) defer result.Release() out := &exec.ArraySpan{} out.SetMembers(result.Data()) return s.emitResult(out, data) } if err = s.setupPrealloc(batch.Len, batch.Values); err != nil { return } return s.executeSpans(data) } func (s *scalarExecutor) WrapResults(ctx context.Context, out <-chan Datum, hasChunked bool) Datum { var ( output Datum acc []arrow.Array ) toChunked := func() { acc = output.(ArrayLikeDatum).Chunks() output.Release() output = nil } // get first output select { case <-ctx.Done(): return nil case output = <-out: // if the inputs contained at least one chunked array // then we want to return chunked output if hasChunked { toChunked() } } for { select { case <-ctx.Done(): // context is done, either cancelled or a timeout. // either way, we end early and return what we've got so far. return output case o, ok := <-out: if !ok { // channel closed, wrap it up if output != nil { return output } for _, c := range acc { defer c.Release() } chkd := arrow.NewChunked(s.outType, acc) defer chkd.Release() return NewDatum(chkd) } // if we get multiple batches of output, then we need // to return it as a chunked array. if acc == nil { toChunked() } defer o.Release() if o.Len() == 0 { // skip any empty batches continue } acc = append(acc, o.(*ArrayDatum).MakeArray()) } } } func (s *scalarExecutor) executeSpans(data chan<- Datum) (err error) { var ( input exec.ExecSpan output exec.ExecResult next bool ) if s.preallocContiguous { // make one big output alloc output := s.prepareOutput(int(s.iterLen)) output.Offset = 0 var resultOffset int64 var nextOffset int64 for err == nil { if input, nextOffset, next = s.iter(); !next { break } output.SetSlice(resultOffset, input.Len) err = s.executeSingleSpan(&input, output) resultOffset = nextOffset } if err != nil { output.Release() return } if output.Offset != 0 { output.SetSlice(0, s.iterLen) } return s.emitResult(output, data) } // fully preallocating, but not contiguously // we (maybe) preallocate only for the output of processing // the current chunk for err == nil { if input, _, next = s.iter(); !next { break } output = *s.prepareOutput(int(input.Len)) if err = s.executeSingleSpan(&input, &output); err != nil { output.Release() return } err = s.emitResult(&output, data) } return } func (s *scalarExecutor) executeSingleSpan(input *exec.ExecSpan, out *exec.ExecResult) error { switch { case out.Type.ID() == arrow.NULL: out.Nulls = out.Len case s.kernel.GetNullHandling() == exec.NullIntersection: if !s.elideValidityBitmap { propagateNulls(s.ctx, input, out) } case s.kernel.GetNullHandling() == exec.NullNoOutput: out.Nulls = 0 } return s.kernel.Exec(s.ctx, input, out) } func (s *scalarExecutor) setupPrealloc(totalLen int64, args []Datum) error { s.numOutBuf = len(s.outType.Layout().Buffers) outTypeID := s.outType.ID() // default to no validity pre-allocation for the following cases: // - Output Array is NullArray // - kernel.NullHandling is ComputeNoPrealloc or OutputNotNull s.preallocValidity = false if outTypeID != arrow.NULL { switch s.kernel.GetNullHandling() { case exec.NullComputedPrealloc: s.preallocValidity = true case exec.NullIntersection: s.elideValidityBitmap = true for _, a := range args { nullGen := getNullGenDatum(a) == nullGenAllValid s.elideValidityBitmap = s.elideValidityBitmap && nullGen } s.preallocValidity = !s.elideValidityBitmap case exec.NullNoOutput: s.elideValidityBitmap = true } } if s.kernel.GetMemAlloc() == exec.MemPrealloc { s.dataPrealloc = addComputeDataPrealloc(s.outType, s.dataPrealloc) } // validity bitmap either preallocated or elided, and all data buffers allocated // this is basically only true for primitive types that are not dict-encoded s.preallocAllBufs = ((s.preallocValidity || s.elideValidityBitmap) && len(s.dataPrealloc) == (s.numOutBuf-1) && !arrow.IsNested(outTypeID) && outTypeID != arrow.DICTIONARY) // contiguous prealloc only possible on non-nested types if all // buffers are preallocated. otherwise we have to go chunk by chunk // // some kernels are also unable to write into sliced outputs, so // we respect the kernel's attributes s.preallocContiguous = (s.ectx.PreallocContiguous && s.kernel.CanFillSlices() && s.preallocAllBufs) return nil } func (s *scalarExecutor) emitResult(resultData *exec.ArraySpan, data chan<- Datum) error { var output Datum if len(resultData.Buffers[0].Buf) != 0 { resultData.UpdateNullCount() } if s.allScalars { // we boxed scalar inputs as ArraySpan so now we have to unbox the output arr := resultData.MakeArray() defer arr.Release() sc, err := scalar.GetScalar(arr, 0) if err != nil { return err } if r, ok := sc.(scalar.Releasable); ok { defer r.Release() } output = NewDatum(sc) } else { d := resultData.MakeData() defer d.Release() output = NewDatum(d) } data <- output return nil } func checkAllIsValue(vals []Datum) error { for _, v := range vals { if !DatumIsValue(v) { return fmt.Errorf("%w: tried executing function with non-value type: %s", arrow.ErrInvalid, v) } } return nil } func checkIfAllScalar(batch *ExecBatch) bool { for _, v := range batch.Values { if v.Kind() != KindScalar { return false } } return batch.NumValues() > 0 } // iterateExecSpans sets up and returns a function which can iterate a batch // according to the chunk sizes. If the inputs contain chunked arrays, then // we will find the min(chunk sizes, maxChunkSize) to ensure we return // contiguous spans to execute on. // // the iteration function returns the next span to execute on, the current // position in the full batch, and a boolean indicating whether or not // a span was actually returned (there is data to process). func iterateExecSpans(batch *ExecBatch, maxChunkSize int64, promoteIfAllScalar bool) (haveAllScalars bool, itr spanIterator, err error) { if batch.NumValues() > 0 { inferred, allArgsSame := inferBatchLength(batch.Values) if inferred != batch.Len { return false, nil, fmt.Errorf("%w: value lengths differed from execbatch length", arrow.ErrInvalid) } if !allArgsSame { return false, nil, fmt.Errorf("%w: array args must all be the same length", arrow.ErrInvalid) } } var ( args []Datum = batch.Values haveChunked bool chunkIdxes = make([]int, len(args)) valuePositions = make([]int64, len(args)) valueOffsets = make([]int64, len(args)) pos, length int64 = 0, batch.Len ) haveAllScalars = checkIfAllScalar(batch) maxChunkSize = exec.Min(length, maxChunkSize) span := exec.ExecSpan{Values: make([]exec.ExecValue, len(args)), Len: 0} for i, a := range args { switch arg := a.(type) { case *ScalarDatum: span.Values[i].Scalar = arg.Value case *ArrayDatum: span.Values[i].Array.SetMembers(arg.Value) valueOffsets[i] = int64(arg.Value.Offset()) case *ChunkedDatum: // populate from first chunk carr := arg.Value if len(carr.Chunks()) > 0 { arr := carr.Chunk(0).Data() span.Values[i].Array.SetMembers(arr) valueOffsets[i] = int64(arr.Offset()) } else { // fill as zero len exec.FillZeroLength(carr.DataType(), &span.Values[i].Array) } haveChunked = true } } if haveAllScalars && promoteIfAllScalar { exec.PromoteExecSpanScalars(span) } nextChunkSpan := func(iterSz int64, span exec.ExecSpan) int64 { for i := 0; i < len(args) && iterSz > 0; i++ { // if the argument is not chunked, it's either a scalar or an array // in which case it doesn't influence the size of the span chunkedArg, ok := args[i].(*ChunkedDatum) if !ok { continue } arg := chunkedArg.Value if len(arg.Chunks()) == 0 { iterSz = 0 continue } var curChunk arrow.Array for { curChunk = arg.Chunk(chunkIdxes[i]) if valuePositions[i] == int64(curChunk.Len()) { // chunk is zero-length, or was exhausted in the previous // iteration, move to next chunk chunkIdxes[i]++ curChunk = arg.Chunk(chunkIdxes[i]) span.Values[i].Array.SetMembers(curChunk.Data()) valuePositions[i] = 0 valueOffsets[i] = int64(curChunk.Data().Offset()) continue } break } iterSz = exec.Min(int64(curChunk.Len())-valuePositions[i], iterSz) } return iterSz } return haveAllScalars, func() (exec.ExecSpan, int64, bool) { if pos == length { return exec.ExecSpan{}, pos, false } iterationSize := exec.Min(length-pos, maxChunkSize) if haveChunked { iterationSize = nextChunkSpan(iterationSize, span) } span.Len = iterationSize for i, a := range args { if a.Kind() != KindScalar { span.Values[i].Array.SetSlice(valuePositions[i]+valueOffsets[i], iterationSize) valuePositions[i] += iterationSize } } pos += iterationSize debug.Assert(pos <= length, "bad state for iteration exec span") return span, pos, true }, nil } var ( // have a pool of scalar executors to avoid excessive object creation scalarExecPool = sync.Pool{ New: func() any { return &scalarExecutor{} }, } vectorExecPool = sync.Pool{ New: func() any { return &vectorExecutor{} }, } ) func checkCanExecuteChunked(k *exec.VectorKernel) error { if k.ExecChunked == nil { return fmt.Errorf("%w: vector kernel cannot execute chunkwise and no chunked exec function defined", arrow.ErrInvalid) } if k.NullHandling == exec.NullIntersection { return fmt.Errorf("%w: null pre-propagation is unsupported for chunkedarray execution in vector kernels", arrow.ErrInvalid) } return nil } type vectorExecutor struct { nonAggExecImpl iter spanIterator results []*exec.ArraySpan iterLen int64 allScalars bool } func (v *vectorExecutor) Execute(ctx context.Context, batch *ExecBatch, data chan<- Datum) (err error) { final := v.kernel.(*exec.VectorKernel).Finalize if final != nil { if v.results == nil { v.results = make([]*exec.ArraySpan, 0, 1) } else { v.results = v.results[:0] } } // some vector kernels have a separate code path for handling chunked // arrays (VectorKernel.ExecChunked) so we check for any chunked // arrays. If we do and an ExecChunked function is defined // then we call that. hasChunked := haveChunkedArray(batch.Values) v.numOutBuf = len(v.outType.Layout().Buffers) v.preallocValidity = v.kernel.GetNullHandling() != exec.NullComputedNoPrealloc && v.kernel.GetNullHandling() != exec.NullNoOutput if v.kernel.GetMemAlloc() == exec.MemPrealloc { v.dataPrealloc = addComputeDataPrealloc(v.outType, v.dataPrealloc) } if v.kernel.(*exec.VectorKernel).CanExecuteChunkWise { v.allScalars, v.iter, err = iterateExecSpans(batch, v.ectx.ChunkSize, true) v.iterLen = batch.Len var ( input exec.ExecSpan next bool ) if v.iterLen == 0 { input.Values = make([]exec.ExecValue, batch.NumValues()) for i, v := range batch.Values { exec.FillZeroLength(v.(ArrayLikeDatum).Type(), &input.Values[i].Array) } err = v.exec(&input, data) } for err == nil { if input, _, next = v.iter(); !next { break } err = v.exec(&input, data) } if err != nil { return } } else { // kernel cannot execute chunkwise. if we have any chunked arrays, // then execchunked must be defined or we raise an error if hasChunked { if err = v.execChunked(batch, data); err != nil { return } } else { // no chunked arrays. we pack the args into an execspan // and call regular exec code path span := ExecSpanFromBatch(batch) if checkIfAllScalar(batch) { exec.PromoteExecSpanScalars(*span) } if err = v.exec(span, data); err != nil { return } } } if final != nil { // intermediate results require post-processing after execution is // completed (possibly involving some accumulated state) output, err := final(v.ctx, v.results) if err != nil { return err } for _, r := range output { d := r.MakeData() defer d.Release() data <- NewDatum(d) } } return nil } func (v *vectorExecutor) WrapResults(ctx context.Context, out <-chan Datum, hasChunked bool) Datum { // if kernel doesn't output chunked, just grab the one output and return it if !v.kernel.(*exec.VectorKernel).OutputChunked { var output Datum select { case <-ctx.Done(): return nil case output = <-out: } // we got an output datum, but let's wait for the channel to // close so we don't have any race conditions select { case <-ctx.Done(): output.Release() return nil case <-out: return output } } // if execution yielded multiple chunks then the result is a chunked array var ( output Datum acc []arrow.Array ) toChunked := func() { out := output.(ArrayLikeDatum).Chunks() acc = make([]arrow.Array, 0, len(out)) for _, o := range out { if o.Len() > 0 { acc = append(acc, o) } } if output.Kind() != KindChunked { output.Release() } output = nil } // get first output select { case <-ctx.Done(): return nil case output = <-out: if output == nil || ctx.Err() != nil { return nil } // if the inputs contained at least one chunked array // then we want to return chunked output if hasChunked { toChunked() } } for { select { case <-ctx.Done(): // context is done, either cancelled or a timeout. // either way, we end early and return what we've got so far. return output case o, ok := <-out: if !ok { // channel closed, wrap it up if output != nil { return output } for _, c := range acc { defer c.Release() } chkd := arrow.NewChunked(v.outType, acc) defer chkd.Release() return NewDatum(chkd) } // if we get multiple batches of output, then we need // to return it as a chunked array. if acc == nil { toChunked() } defer o.Release() if o.Len() == 0 { // skip any empty batches continue } acc = append(acc, o.(*ArrayDatum).MakeArray()) } } } func (v *vectorExecutor) exec(span *exec.ExecSpan, data chan<- Datum) (err error) { out := v.prepareOutput(int(span.Len)) if v.kernel.GetNullHandling() == exec.NullIntersection { if err = propagateNulls(v.ctx, span, out); err != nil { return } } if err = v.kernel.Exec(v.ctx, span, out); err != nil { return } return v.emitResult(out, data) } func (v *vectorExecutor) emitResult(result *exec.ArraySpan, data chan<- Datum) (err error) { if v.kernel.(*exec.VectorKernel).Finalize == nil { d := result.MakeData() defer d.Release() data <- NewDatum(d) } else { v.results = append(v.results, result) } return nil } func (v *vectorExecutor) execChunked(batch *ExecBatch, out chan<- Datum) error { if err := checkCanExecuteChunked(v.kernel.(*exec.VectorKernel)); err != nil { return err } output := v.prepareOutput(int(batch.Len)) input := make([]*arrow.Chunked, len(batch.Values)) for i, v := range batch.Values { switch val := v.(type) { case *ArrayDatum: chks := val.Chunks() input[i] = arrow.NewChunked(val.Type(), chks) chks[0].Release() defer input[i].Release() case *ChunkedDatum: input[i] = val.Value default: return fmt.Errorf("%w: handling with exec chunked", arrow.ErrNotImplemented) } } result, err := v.kernel.(*exec.VectorKernel).ExecChunked(v.ctx, input, output) if err != nil { return err } if len(result) == 0 { empty := output.MakeArray() defer empty.Release() out <- &ChunkedDatum{Value: arrow.NewChunked(output.Type, []arrow.Array{empty})} return nil } for _, r := range result { if err := v.emitResult(r, out); err != nil { return err } } return nil } arrow-go-18.2.0/arrow/compute/expression.go000066400000000000000000000553441476434502500207200ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.18 package compute import ( "bytes" "encoding/hex" "errors" "fmt" "hash/maphash" "reflect" "strconv" "strings" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/compute/exec" "github.com/apache/arrow-go/v18/arrow/compute/internal/kernels" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/ipc" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/arrow/scalar" ) var hashSeed = maphash.MakeSeed() // Expression is an interface for mapping one datum to another. An expression // is one of: // // A literal Datum // A reference to a single (potentially nested) field of an input Datum // A call to a compute function, with arguments specified by other Expressions // // Deprecated: use substrait-go expressions instead. type Expression interface { fmt.Stringer // IsBound returns true if this expression has been bound to a particular // Datum and/or Schema. IsBound() bool // IsScalarExpr returns true if this expression is composed only of scalar // literals, field references and calls to scalar functions. IsScalarExpr() bool // IsNullLiteral returns true if this expression is a literal and entirely // null. IsNullLiteral() bool // IsSatisfiable returns true if this expression could evaluate to true IsSatisfiable() bool // FieldRef returns a pointer to the underlying field reference, or nil if // this expression is not a field reference. FieldRef() *FieldRef // Type returns the datatype this expression will evaluate to. Type() arrow.DataType Hash() uint64 Equals(Expression) bool // Release releases the underlying bound C++ memory that is allocated when // a Bind is performed. Any bound expression should get released to ensure // no memory leaks. Release() } func printDatum(datum Datum) string { switch datum := datum.(type) { case *ScalarDatum: if !datum.Value.IsValid() { return "null" } switch datum.Type().ID() { case arrow.STRING, arrow.LARGE_STRING: return strconv.Quote(datum.Value.(scalar.BinaryScalar).String()) case arrow.BINARY, arrow.FIXED_SIZE_BINARY, arrow.LARGE_BINARY: return `"` + strings.ToUpper(hex.EncodeToString(datum.Value.(scalar.BinaryScalar).Data())) + `"` } return datum.Value.String() default: return datum.String() } } // Literal is an expression denoting a literal Datum which could be any value // as a scalar, an array, or so on. // // Deprecated: use substrait-go expressions Literal instead. type Literal struct { Literal Datum } func (Literal) FieldRef() *FieldRef { return nil } func (l *Literal) String() string { return printDatum(l.Literal) } func (l *Literal) Type() arrow.DataType { return l.Literal.(ArrayLikeDatum).Type() } func (l *Literal) IsBound() bool { return l.Type() != nil } func (l *Literal) IsScalarExpr() bool { return l.Literal.Kind() == KindScalar } func (l *Literal) Equals(other Expression) bool { if rhs, ok := other.(*Literal); ok { return l.Literal.Equals(rhs.Literal) } return false } func (l *Literal) IsNullLiteral() bool { if ad, ok := l.Literal.(ArrayLikeDatum); ok { return ad.NullN() == ad.Len() } return true } func (l *Literal) IsSatisfiable() bool { if l.IsNullLiteral() { return false } if sc, ok := l.Literal.(*ScalarDatum); ok && sc.Type().ID() == arrow.BOOL { return sc.Value.(*scalar.Boolean).Value } return true } func (l *Literal) Hash() uint64 { if l.IsScalarExpr() { return scalar.Hash(hashSeed, l.Literal.(*ScalarDatum).Value) } return 0 } func (l *Literal) Release() { l.Literal.Release() } // Parameter represents a field reference and needs to be bound in order to determine // its type and shape. // // Deprecated: use substrait-go field references instead. type Parameter struct { ref *FieldRef // post bind props dt arrow.DataType index int } func (Parameter) IsNullLiteral() bool { return false } func (p *Parameter) Type() arrow.DataType { return p.dt } func (p *Parameter) IsBound() bool { return p.Type() != nil } func (p *Parameter) IsScalarExpr() bool { return p.ref != nil } func (p *Parameter) IsSatisfiable() bool { return p.Type() == nil || p.Type().ID() != arrow.NULL } func (p *Parameter) FieldRef() *FieldRef { return p.ref } func (p *Parameter) Hash() uint64 { return p.ref.Hash(hashSeed) } func (p *Parameter) String() string { switch { case p.ref.IsName(): return p.ref.Name() case p.ref.IsFieldPath(): return p.ref.FieldPath().String() default: return p.ref.String() } } func (p *Parameter) Equals(other Expression) bool { if rhs, ok := other.(*Parameter); ok { return p.ref.Equals(*rhs.ref) } return false } func (p *Parameter) Release() {} type comparisonType int8 const ( compNA comparisonType = 0 compEQ comparisonType = 1 compLT comparisonType = 2 compGT comparisonType = 4 compNE comparisonType = compLT | compGT compLE comparisonType = compLT | compEQ compGE comparisonType = compGT | compEQ ) //lint:ignore U1000 ignore that this is unused for now func (c comparisonType) name() string { switch c { case compEQ: return "equal" case compLT: return "less" case compGT: return "greater" case compNE: return "not_equal" case compLE: return "less_equal" case compGE: return "greater_equal" } return "na" } func (c comparisonType) getOp() string { switch c { case compEQ: return "==" case compLT: return "<" case compGT: return ">" case compNE: return "!=" case compLE: return "<=" case compGE: return ">=" } debug.Assert(false, "invalid getop") return "" } var compmap = map[string]comparisonType{ "equal": compEQ, "less": compLT, "greater": compGT, "not_equal": compNE, "less_equal": compLE, "greater_equal": compGE, } func optionsToString(fn FunctionOptions) string { if s, ok := fn.(fmt.Stringer); ok { return s.String() } var b strings.Builder v := reflect.Indirect(reflect.ValueOf(fn)) b.WriteByte('{') for i := 0; i < v.Type().NumField(); i++ { fld := v.Type().Field(i) tag := fld.Tag.Get("compute") if tag == "-" { continue } fldVal := v.Field(i) fmt.Fprintf(&b, "%s=%v, ", tag, fldVal.Interface()) } ret := b.String() return ret[:len(ret)-2] + "}" } // Call is a function call with specific arguments which are themselves other // expressions. A call can also have options that are specific to the function // in question. It must be bound to determine the shape and type. // // Deprecated: use substrait-go expression functions instead. type Call struct { funcName string args []Expression dt arrow.DataType options FunctionOptions cachedHash uint64 } func (c *Call) IsNullLiteral() bool { return false } func (c *Call) FieldRef() *FieldRef { return nil } func (c *Call) Type() arrow.DataType { return c.dt } func (c *Call) IsSatisfiable() bool { return c.Type() == nil || c.Type().ID() != arrow.NULL } func (c *Call) String() string { binary := func(op string) string { return "(" + c.args[0].String() + " " + op + " " + c.args[1].String() + ")" } if cmp, ok := compmap[c.funcName]; ok { return binary(cmp.getOp()) } const kleene = "_kleene" if strings.HasSuffix(c.funcName, kleene) { return binary(strings.TrimSuffix(c.funcName, kleene)) } if c.funcName == "make_struct" && c.options != nil { opts := c.options.(*MakeStructOptions) out := "{" for i, a := range c.args { out += opts.FieldNames[i] + "=" + a.String() + ", " } return out[:len(out)-2] + "}" } var b strings.Builder b.WriteString(c.funcName + "(") for _, a := range c.args { b.WriteString(a.String() + ", ") } if c.options != nil { b.WriteString(optionsToString(c.options)) b.WriteString(" ") } ret := b.String() return ret[:len(ret)-2] + ")" } func (c *Call) Hash() uint64 { if c.cachedHash != 0 { return c.cachedHash } var h maphash.Hash h.SetSeed(hashSeed) h.WriteString(c.funcName) c.cachedHash = h.Sum64() for _, arg := range c.args { c.cachedHash = exec.HashCombine(c.cachedHash, arg.Hash()) } return c.cachedHash } func (c *Call) IsScalarExpr() bool { for _, arg := range c.args { if !arg.IsScalarExpr() { return false } } return false // return isFuncScalar(c.funcName) } func (c *Call) IsBound() bool { return c.Type() != nil } func (c *Call) Equals(other Expression) bool { rhs, ok := other.(*Call) if !ok { return false } if c.funcName != rhs.funcName || len(c.args) != len(rhs.args) { return false } for i := range c.args { if !c.args[i].Equals(rhs.args[i]) { return false } } if opt, ok := c.options.(FunctionOptionsEqual); ok { return opt.Equals(rhs.options) } return reflect.DeepEqual(c.options, rhs.options) } func (c *Call) Release() { for _, a := range c.args { a.Release() } if r, ok := c.options.(releasable); ok { r.Release() } } // FunctionOptions can be any type which has a TypeName function. The fields // of the type will be used (via reflection) to determine the information to // propagate when serializing to pass to the C++ for execution. type FunctionOptions interface { TypeName() string } type FunctionOptionsEqual interface { Equals(FunctionOptions) bool } type FunctionOptionsCloneable interface { Clone() FunctionOptions } type MakeStructOptions struct { FieldNames []string `compute:"field_names"` FieldNullability []bool `compute:"field_nullability"` FieldMetadata []*arrow.Metadata `compute:"field_metadata"` } func (MakeStructOptions) TypeName() string { return "MakeStructOptions" } type NullOptions struct { NanIsNull bool `compute:"nan_is_null"` } func (NullOptions) TypeName() string { return "NullOptions" } type StrptimeOptions struct { Format string `compute:"format"` Unit arrow.TimeUnit `compute:"unit"` } func (StrptimeOptions) TypeName() string { return "StrptimeOptions" } type NullSelectionBehavior = kernels.NullSelectionBehavior const ( SelectionEmitNulls = kernels.EmitNulls SelectionDropNulls = kernels.DropNulls ) type ArithmeticOptions struct { NoCheckOverflow bool `compute:"check_overflow"` } func (ArithmeticOptions) TypeName() string { return "ArithmeticOptions" } type ( CastOptions = kernels.CastOptions FilterOptions = kernels.FilterOptions TakeOptions = kernels.TakeOptions ) func DefaultFilterOptions() *FilterOptions { return &FilterOptions{} } func DefaultTakeOptions() *TakeOptions { return &TakeOptions{BoundsCheck: true} } func DefaultCastOptions(safe bool) *CastOptions { if safe { return &CastOptions{} } return &CastOptions{ AllowIntOverflow: true, AllowTimeTruncate: true, AllowTimeOverflow: true, AllowDecimalTruncate: true, AllowFloatTruncate: true, AllowInvalidUtf8: true, } } func UnsafeCastOptions(dt arrow.DataType) *CastOptions { return NewCastOptions(dt, false) } func SafeCastOptions(dt arrow.DataType) *CastOptions { return NewCastOptions(dt, true) } func NewCastOptions(dt arrow.DataType, safe bool) *CastOptions { opts := DefaultCastOptions(safe) if dt != nil { opts.ToType = dt } else { opts.ToType = arrow.Null } return opts } func Cast(ex Expression, dt arrow.DataType) Expression { opts := &CastOptions{} if dt == nil { opts.ToType = arrow.Null } else { opts.ToType = dt } return NewCall("cast", []Expression{ex}, opts) } type SetLookupOptions struct { ValueSet Datum `compute:"value_set"` SkipNulls bool `compute:"skip_nulls"` } func (SetLookupOptions) TypeName() string { return "SetLookupOptions" } func (s *SetLookupOptions) Release() { s.ValueSet.Release() } func (s *SetLookupOptions) Equals(other FunctionOptions) bool { rhs, ok := other.(*SetLookupOptions) if !ok { return false } return s.SkipNulls == rhs.SkipNulls && s.ValueSet.Equals(rhs.ValueSet) } func (s *SetLookupOptions) FromStructScalar(sc *scalar.Struct) error { if v, err := sc.Field("skip_nulls"); err == nil { s.SkipNulls = v.(*scalar.Boolean).Value } value, err := sc.Field("value_set") if err != nil { return err } if v, ok := value.(scalar.ListScalar); ok { s.ValueSet = NewDatum(v.GetList()) return nil } return errors.New("set lookup options valueset should be a list") } var ( funcOptionsMap map[string]reflect.Type funcOptsTypes = []FunctionOptions{ SetLookupOptions{}, ArithmeticOptions{}, CastOptions{}, FilterOptions{}, NullOptions{}, StrptimeOptions{}, MakeStructOptions{}, } ) func init() { funcOptionsMap = make(map[string]reflect.Type) for _, ft := range funcOptsTypes { funcOptionsMap[ft.TypeName()] = reflect.TypeOf(ft) } } // NewLiteral constructs a new literal expression from any value. It is passed // to NewDatum which will construct the appropriate Datum and/or scalar // value for the type provided. func NewLiteral(arg interface{}) Expression { return &Literal{Literal: NewDatum(arg)} } func NullLiteral(dt arrow.DataType) Expression { return &Literal{Literal: NewDatum(scalar.MakeNullScalar(dt))} } // NewRef constructs a parameter expression which refers to a specific field func NewRef(ref FieldRef) Expression { return &Parameter{ref: &ref, index: -1} } // NewFieldRef is shorthand for NewRef(FieldRefName(field)) func NewFieldRef(field string) Expression { return NewRef(FieldRefName(field)) } // NewCall constructs an expression that represents a specific function call with // the given arguments and options. func NewCall(name string, args []Expression, opts FunctionOptions) Expression { return &Call{funcName: name, args: args, options: opts} } // Project is shorthand for `make_struct` to produce a record batch output // from a group of expressions. func Project(values []Expression, names []string) Expression { nulls := make([]bool, len(names)) for i := range nulls { nulls[i] = true } meta := make([]*arrow.Metadata, len(names)) return NewCall("make_struct", values, &MakeStructOptions{FieldNames: names, FieldNullability: nulls, FieldMetadata: meta}) } // Equal is a convenience function for the equal function func Equal(lhs, rhs Expression) Expression { return NewCall("equal", []Expression{lhs, rhs}, nil) } // NotEqual creates a call to not_equal func NotEqual(lhs, rhs Expression) Expression { return NewCall("not_equal", []Expression{lhs, rhs}, nil) } // Less is shorthand for NewCall("less",....) func Less(lhs, rhs Expression) Expression { return NewCall("less", []Expression{lhs, rhs}, nil) } // LessEqual is shorthand for NewCall("less_equal",....) func LessEqual(lhs, rhs Expression) Expression { return NewCall("less_equal", []Expression{lhs, rhs}, nil) } // Greater is shorthand for NewCall("greater",....) func Greater(lhs, rhs Expression) Expression { return NewCall("greater", []Expression{lhs, rhs}, nil) } // GreaterEqual is shorthand for NewCall("greater_equal",....) func GreaterEqual(lhs, rhs Expression) Expression { return NewCall("greater_equal", []Expression{lhs, rhs}, nil) } // IsNull creates an expression that returns true if the passed in expression is // null. Optionally treating NaN as null if desired. func IsNull(lhs Expression, nanIsNull bool) Expression { return NewCall("less", []Expression{lhs}, &NullOptions{nanIsNull}) } // IsValid is the inverse of IsNull func IsValid(lhs Expression) Expression { return NewCall("is_valid", []Expression{lhs}, nil) } type binop func(lhs, rhs Expression) Expression func foldLeft(op binop, args ...Expression) Expression { switch len(args) { case 0: return nil case 1: return args[0] } folded := args[0] for _, a := range args[1:] { folded = op(folded, a) } return folded } func and(lhs, rhs Expression) Expression { return NewCall("and_kleene", []Expression{lhs, rhs}, nil) } // And constructs a tree of calls to and_kleene for boolean And logic taking // an arbitrary number of values. func And(lhs, rhs Expression, ops ...Expression) Expression { folded := foldLeft(and, append([]Expression{lhs, rhs}, ops...)...) if folded != nil { return folded } return NewLiteral(true) } func or(lhs, rhs Expression) Expression { return NewCall("or_kleene", []Expression{lhs, rhs}, nil) } // Or constructs a tree of calls to or_kleene for boolean Or logic taking // an arbitrary number of values. func Or(lhs, rhs Expression, ops ...Expression) Expression { folded := foldLeft(or, append([]Expression{lhs, rhs}, ops...)...) if folded != nil { return folded } return NewLiteral(false) } // Not creates a call to "invert" for the value specified. func Not(expr Expression) Expression { return NewCall("invert", []Expression{expr}, nil) } func SerializeOptions(opts FunctionOptions, mem memory.Allocator) (*memory.Buffer, error) { sc, err := scalar.ToScalar(opts, mem) if err != nil { return nil, err } if sc, ok := sc.(releasable); ok { defer sc.Release() } arr, err := scalar.MakeArrayFromScalar(sc, 1, mem) if err != nil { return nil, err } defer arr.Release() batch := array.NewRecord(arrow.NewSchema([]arrow.Field{{Type: arr.DataType(), Nullable: true}}, nil), []arrow.Array{arr}, 1) defer batch.Release() buf := &bufferWriteSeeker{mem: mem} wr, err := ipc.NewFileWriter(buf, ipc.WithSchema(batch.Schema()), ipc.WithAllocator(mem)) if err != nil { return nil, err } wr.Write(batch) wr.Close() return buf.buf, nil } // SerializeExpr serializes expressions by converting them to Metadata and // storing this in the schema of a Record. Embedded arrays and scalars are // stored in its columns. Finally the record is written as an IPC file func SerializeExpr(expr Expression, mem memory.Allocator) (*memory.Buffer, error) { var ( cols []arrow.Array metaKey []string metaValue []string visit func(Expression) error ) addScalar := func(s scalar.Scalar) (string, error) { ret := len(cols) arr, err := scalar.MakeArrayFromScalar(s, 1, mem) if err != nil { return "", err } cols = append(cols, arr) return strconv.Itoa(ret), nil } visit = func(e Expression) error { switch e := e.(type) { case *Literal: if !e.IsScalarExpr() { return errors.New("not implemented: serialization of non-scalar literals") } metaKey = append(metaKey, "literal") s, err := addScalar(e.Literal.(*ScalarDatum).Value) if err != nil { return err } metaValue = append(metaValue, s) case *Parameter: if e.ref.Name() == "" { return errors.New("not implemented: serialization of non-name field_ref") } metaKey = append(metaKey, "field_ref") metaValue = append(metaValue, e.ref.Name()) case *Call: metaKey = append(metaKey, "call") metaValue = append(metaValue, e.funcName) for _, arg := range e.args { visit(arg) } if e.options != nil { st, err := scalar.ToScalar(e.options, mem) if err != nil { return err } metaKey = append(metaKey, "options") s, err := addScalar(st) if err != nil { return err } metaValue = append(metaValue, s) for _, f := range st.(*scalar.Struct).Value { switch s := f.(type) { case releasable: defer s.Release() } } } metaKey = append(metaKey, "end") metaValue = append(metaValue, e.funcName) } return nil } if err := visit(expr); err != nil { return nil, err } fields := make([]arrow.Field, len(cols)) for i, c := range cols { fields[i].Type = c.DataType() defer c.Release() } metadata := arrow.NewMetadata(metaKey, metaValue) rec := array.NewRecord(arrow.NewSchema(fields, &metadata), cols, 1) defer rec.Release() buf := &bufferWriteSeeker{mem: mem} wr, err := ipc.NewFileWriter(buf, ipc.WithSchema(rec.Schema()), ipc.WithAllocator(mem)) if err != nil { return nil, err } wr.Write(rec) wr.Close() return buf.buf, nil } func DeserializeExpr(mem memory.Allocator, buf *memory.Buffer) (Expression, error) { rdr, err := ipc.NewFileReader(bytes.NewReader(buf.Bytes()), ipc.WithAllocator(mem)) if err != nil { return nil, err } defer rdr.Close() batch, err := rdr.Read() if err != nil { return nil, err } if !batch.Schema().HasMetadata() { return nil, errors.New("serialized Expression's batch repr had no metadata") } if batch.NumRows() != 1 { return nil, fmt.Errorf("serialized Expression's batch repr was not a single row - had %d", batch.NumRows()) } var ( getone func() (Expression, error) index int = 0 metadata = batch.Schema().Metadata() ) getscalar := func(i string) (scalar.Scalar, error) { colIndex, err := strconv.ParseInt(i, 10, 32) if err != nil { return nil, err } if colIndex >= batch.NumCols() { return nil, errors.New("column index out of bounds") } return scalar.GetScalar(batch.Column(int(colIndex)), 0) } getone = func() (Expression, error) { if index >= metadata.Len() { return nil, errors.New("unterminated serialized Expression") } key, val := metadata.Keys()[index], metadata.Values()[index] index++ switch key { case "literal": scalar, err := getscalar(val) if err != nil { return nil, err } if r, ok := scalar.(releasable); ok { defer r.Release() } return NewLiteral(scalar), err case "field_ref": return NewFieldRef(val), nil case "call": args := make([]Expression, 0) for metadata.Keys()[index] != "end" { if metadata.Keys()[index] == "options" { optsScalar, err := getscalar(metadata.Values()[index]) if err != nil { return nil, err } if r, ok := optsScalar.(releasable); ok { defer r.Release() } var opts FunctionOptions if optsScalar != nil { typname, err := optsScalar.(*scalar.Struct).Field("_type_name") if err != nil { return nil, err } if typname.DataType().ID() != arrow.BINARY { return nil, errors.New("options scalar typename must be binary") } optionsVal := reflect.New(funcOptionsMap[string(typname.(*scalar.Binary).Data())]).Interface() if err := scalar.FromScalar(optsScalar.(*scalar.Struct), optionsVal); err != nil { return nil, err } opts = optionsVal.(FunctionOptions) } index += 2 return NewCall(val, args, opts), nil } arg, err := getone() if err != nil { return nil, err } args = append(args, arg) } index++ return NewCall(val, args, nil), nil default: return nil, fmt.Errorf("unrecognized serialized Expression key %s", key) } } return getone() } arrow-go-18.2.0/arrow/compute/expression_test.go000066400000000000000000000265511476434502500217550ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. //go:build go1.18 package compute_test import ( "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/compute" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/arrow/scalar" "github.com/stretchr/testify/assert" ) func TestExpressionToString(t *testing.T) { ts, _ := scalar.MakeScalar("1990-10-23 10:23:33.123456").CastTo(arrow.FixedWidthTypes.Timestamp_ns) add := compute.NewCall("add", []compute.Expression{compute.NewFieldRef("beta"), compute.NewLiteral(3)}, &compute.ArithmeticOptions{}) tests := []struct { expr compute.Expression expected string }{ {compute.NewFieldRef("alpha"), "alpha"}, {compute.NewLiteral(3), "3"}, {compute.NewLiteral("a"), `"a"`}, {compute.NewLiteral("a\nb"), `"a\nb"`}, {compute.NewLiteral(&scalar.Boolean{}), "null"}, {compute.NewLiteral(&scalar.Int64{}), "null"}, {compute.NewLiteral(scalar.NewBinaryScalar(memory.NewBufferBytes([]byte("az")), arrow.BinaryTypes.Binary)), `"617A"`}, {compute.NewLiteral(ts), "1990-10-23 10:23:33.123456"}, {compute.NewCall("add", []compute.Expression{compute.NewLiteral(3), compute.NewFieldRef("beta")}, nil), "add(3, beta)"}, {compute.And(compute.NewFieldRef("a"), compute.NewFieldRef("b")), "(a and b)"}, {compute.Or(compute.NewFieldRef("a"), compute.NewFieldRef("b")), "(a or b)"}, {compute.Not(compute.NewFieldRef("a")), "invert(a)"}, {compute.Cast(compute.NewFieldRef("a"), arrow.PrimitiveTypes.Int32), "cast(a, {to_type=int32, allow_int_overflow=false, allow_time_truncate=false, " + "allow_time_overflow=false, allow_decimal_truncate=false, " + "allow_float_truncate=false, allow_invalid_utf8=false})"}, {compute.Cast(compute.NewFieldRef("a"), nil), "cast(a, {to_type=null, allow_int_overflow=false, allow_time_truncate=false, " + "allow_time_overflow=false, allow_decimal_truncate=false, " + "allow_float_truncate=false, allow_invalid_utf8=false})"}, {compute.Equal(compute.NewFieldRef("a"), compute.NewLiteral(1)), "(a == 1)"}, {compute.Less(compute.NewFieldRef("a"), compute.NewLiteral(2)), "(a < 2)"}, {compute.Greater(compute.NewFieldRef("a"), compute.NewLiteral(3)), "(a > 3)"}, {compute.NotEqual(compute.NewFieldRef("a"), compute.NewLiteral("a")), `(a != "a")`}, {compute.LessEqual(compute.NewFieldRef("a"), compute.NewLiteral("b")), `(a <= "b")`}, {compute.GreaterEqual(compute.NewFieldRef("a"), compute.NewLiteral("c")), `(a >= "c")`}, {compute.Project( []compute.Expression{ compute.NewFieldRef("a"), compute.NewFieldRef("a"), compute.NewLiteral(3), add, }, []string{"a", "renamed_a", "three", "b"}), "{a=a, renamed_a=a, three=3, b=" + add.String() + "}"}, } for _, tt := range tests { t.Run(tt.expected, func(t *testing.T) { assert.Equal(t, tt.expected, tt.expr.String()) }) } } func TestExpressionEquality(t *testing.T) { tests := []struct { exp1 compute.Expression exp2 compute.Expression equal bool }{ {compute.NewLiteral(1), compute.NewLiteral(1), true}, {compute.NewLiteral(1), compute.NewLiteral(2), false}, {compute.NewFieldRef("a"), compute.NewFieldRef("a"), true}, {compute.NewFieldRef("a"), compute.NewFieldRef("b"), false}, {compute.NewFieldRef("a"), compute.NewLiteral(2), false}, {compute.NewCall("add", []compute.Expression{compute.NewLiteral(3), compute.NewLiteral("a")}, nil), compute.NewCall("add", []compute.Expression{compute.NewLiteral(3), compute.NewLiteral("a")}, nil), true}, {compute.NewCall("add", []compute.Expression{compute.NewLiteral(3), compute.NewLiteral("a")}, nil), compute.NewCall("add", []compute.Expression{compute.NewLiteral(2), compute.NewLiteral("a")}, nil), false}, {compute.NewCall("add", []compute.Expression{compute.NewLiteral(3), compute.NewLiteral("a")}, nil), compute.NewCall("add", []compute.Expression{compute.NewFieldRef("a"), compute.NewLiteral(3)}, nil), false}, {compute.NewCall("add", []compute.Expression{compute.NewLiteral(3), compute.NewLiteral("a")}, &compute.ArithmeticOptions{true}), compute.NewCall("add", []compute.Expression{compute.NewLiteral(3), compute.NewLiteral("a")}, &compute.ArithmeticOptions{true}), true}, {compute.NewCall("add", []compute.Expression{compute.NewLiteral(3), compute.NewLiteral("a")}, &compute.ArithmeticOptions{true}), compute.NewCall("add", []compute.Expression{compute.NewLiteral(3), compute.NewLiteral("a")}, &compute.ArithmeticOptions{false}), false}, {compute.Cast(compute.NewFieldRef("a"), arrow.PrimitiveTypes.Int32), compute.Cast(compute.NewFieldRef("a"), arrow.PrimitiveTypes.Int32), true}, {compute.Cast(compute.NewFieldRef("a"), arrow.PrimitiveTypes.Int32), compute.Cast(compute.NewFieldRef("a"), arrow.PrimitiveTypes.Int64), false}, {compute.Cast(compute.NewFieldRef("a"), arrow.PrimitiveTypes.Int32), compute.NewCall("cast", []compute.Expression{compute.NewFieldRef("a")}, compute.NewCastOptions(arrow.PrimitiveTypes.Int32, false)), false}, } for _, tt := range tests { t.Run(tt.exp1.String(), func(t *testing.T) { assert.Equal(t, tt.equal, tt.exp1.Equals(tt.exp2)) }) } } func TestExpressionHashing(t *testing.T) { set := make(map[uint64]compute.Expression) e := compute.NewFieldRef("alpha") set[e.Hash()] = e e = compute.NewFieldRef("beta") _, ok := set[e.Hash()] assert.False(t, ok) set[e.Hash()] = e e = compute.NewFieldRef("beta") ex, ok := set[e.Hash()] assert.True(t, ok) assert.True(t, e.Equals(ex)) e = compute.NewLiteral(1) set[e.Hash()] = e _, ok = set[compute.NewLiteral(1).Hash()] assert.True(t, ok) _, ok = set[compute.NewLiteral(3).Hash()] assert.False(t, ok) set[compute.NewLiteral(3).Hash()] = compute.NewLiteral(3) e = compute.NullLiteral(arrow.PrimitiveTypes.Int32) set[e.Hash()] = e _, ok = set[compute.NullLiteral(arrow.PrimitiveTypes.Int32).Hash()] assert.True(t, ok) e = compute.NullLiteral(arrow.PrimitiveTypes.Float32) _, ok = set[e.Hash()] assert.False(t, ok) set[e.Hash()] = e e = compute.NewCall("add", []compute.Expression{}, nil) set[e.Hash()] = e _, ok = set[compute.NewCall("add", nil, nil).Hash()] assert.True(t, ok) e = compute.NewCall("widgetify", nil, nil) _, ok = set[e.Hash()] assert.False(t, ok) set[e.Hash()] = e assert.Len(t, set, 8) } func TestIsScalarExpression(t *testing.T) { assert.True(t, compute.NewLiteral(true).IsScalarExpr()) arr := array.MakeFromData(array.NewData(arrow.PrimitiveTypes.Int8, 0, []*memory.Buffer{nil, nil}, nil, 0, 0)) defer arr.Release() assert.False(t, compute.NewLiteral(arr).IsScalarExpr()) assert.True(t, compute.NewFieldRef("a").IsScalarExpr()) } func TestExpressionIsSatisfiable(t *testing.T) { assert.True(t, compute.NewLiteral(true).IsSatisfiable()) assert.False(t, compute.NewLiteral(false).IsSatisfiable()) null := scalar.MakeNullScalar(arrow.FixedWidthTypes.Boolean) assert.False(t, compute.NewLiteral(null).IsSatisfiable()) assert.True(t, compute.NewFieldRef("a").IsSatisfiable()) assert.True(t, compute.Equal(compute.NewFieldRef("a"), compute.NewLiteral(1)).IsSatisfiable()) // no constant folding here assert.True(t, compute.Equal(compute.NewLiteral(0), compute.NewLiteral(1)).IsSatisfiable()) // when a top level conjunction contains an Expression which is certain to // evaluate to null, it can only evaluate to null or false neverTrue := compute.And(compute.NewLiteral(null), compute.NewFieldRef("a")) // this may appear in satisfiable filters if coalesced (for example, wrapped in fill_na) assert.True(t, compute.NewCall("is_null", []compute.Expression{neverTrue}, nil).IsSatisfiable()) } func TestExpressionSerializationRoundTrip(t *testing.T) { bldr := array.NewInt32Builder(memory.DefaultAllocator) defer bldr.Release() bldr.AppendValues([]int32{1, 2, 3}, nil) lookupArr := bldr.NewArray() defer lookupArr.Release() intvalueset := compute.NewDatum(lookupArr) defer intvalueset.Release() bldr2 := array.NewFloat64Builder(memory.DefaultAllocator) defer bldr2.Release() bldr2.AppendValues([]float64{0.5, 1.0, 2.0}, nil) lookupArr = bldr2.NewArray() defer lookupArr.Release() fltvalueset := compute.NewDatum(lookupArr) defer fltvalueset.Release() tests := []struct { name string expr compute.Expression }{ {"null literal", compute.NewLiteral(scalar.MakeNullScalar(arrow.Null))}, {"null int32 literal", compute.NewLiteral(scalar.MakeNullScalar(arrow.PrimitiveTypes.Int32))}, {"null struct literal", compute.NewLiteral(scalar.MakeNullScalar(arrow.StructOf( arrow.Field{Name: "i", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, arrow.Field{Name: "s", Type: arrow.BinaryTypes.String, Nullable: true}, )))}, {"literal true", compute.NewLiteral(true)}, {"literal false", compute.NewLiteral(false)}, {"literal int", compute.NewLiteral(1)}, {"literal float", compute.NewLiteral(1.125)}, {"stringy strings", compute.NewLiteral("stringy strings")}, {"field ref", compute.NewFieldRef("field")}, {"greater", compute.Greater(compute.NewFieldRef("a"), compute.NewLiteral(0.25))}, {"or", compute.Or( compute.Equal(compute.NewFieldRef("a"), compute.NewLiteral(1)), compute.NotEqual(compute.NewFieldRef("b"), compute.NewLiteral("hello")), compute.Equal(compute.NewFieldRef("b"), compute.NewLiteral("foo bar")))}, {"not", compute.Not(compute.NewFieldRef("alpha"))}, {"is_in", compute.NewCall("is_in", []compute.Expression{compute.NewLiteral(1)}, &compute.SetLookupOptions{ValueSet: intvalueset})}, {"is_in cast", compute.NewCall("is_in", []compute.Expression{ compute.NewCall("cast", []compute.Expression{compute.NewFieldRef("version")}, compute.NewCastOptions(arrow.PrimitiveTypes.Float64, true))}, &compute.SetLookupOptions{ValueSet: fltvalueset})}, {"is valid", compute.IsValid(compute.NewFieldRef("validity"))}, {"lots and", compute.And( compute.And( compute.GreaterEqual(compute.NewFieldRef("x"), compute.NewLiteral(-1.5)), compute.Less(compute.NewFieldRef("x"), compute.NewLiteral(0.0))), compute.And(compute.GreaterEqual(compute.NewFieldRef("y"), compute.NewLiteral(0.0)), compute.Less(compute.NewFieldRef("y"), compute.NewLiteral(1.5))), compute.And(compute.Greater(compute.NewFieldRef("z"), compute.NewLiteral(1.5)), compute.LessEqual(compute.NewFieldRef("z"), compute.NewLiteral(3.0))))}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) serialized, err := compute.SerializeExpr(tt.expr, mem) assert.NoError(t, err) defer serialized.Release() roundTripped, err := compute.DeserializeExpr(mem, serialized) assert.NoError(t, err) defer roundTripped.Release() assert.Truef(t, tt.expr.Equals(roundTripped), "started with: %s, got: %s", tt.expr, roundTripped) }) } } arrow-go-18.2.0/arrow/compute/exprs/000077500000000000000000000000001476434502500173205ustar00rootroot00000000000000arrow-go-18.2.0/arrow/compute/exprs/builders.go000066400000000000000000000345111476434502500214640ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. //go:build go1.18 package exprs import ( "fmt" "strconv" "strings" "unicode" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/compute" "github.com/substrait-io/substrait-go/v3/expr" "github.com/substrait-io/substrait-go/v3/extensions" "github.com/substrait-io/substrait-go/v3/types" ) // NewDefaultExtensionSet constructs an empty extension set using the default // Arrow Extension registry and the default collection of substrait extensions // from the Substrait-go repo. func NewDefaultExtensionSet() ExtensionIDSet { return NewExtensionSetDefault( expr.NewEmptyExtensionRegistry(extensions.GetDefaultCollectionWithNoError())) } // NewScalarCall constructs a substrait ScalarFunction expression with the provided // options and arguments. // // The function name (fn) is looked up in the internal Arrow DefaultExtensionIDRegistry // to ensure it exists and to convert from the Arrow function name to the substrait // function name. It is then looked up using the DefaultCollection from the // substrait extensions module to find the declaration. If it cannot be found, // we try constructing the compound signature name by getting the types of the // arguments which were passed and appending them to the function name appropriately. // // An error is returned if the function cannot be resolved. func NewScalarCall(reg ExtensionIDSet, fn string, opts []*types.FunctionOption, args ...types.FuncArg) (*expr.ScalarFunction, error) { conv, ok := reg.GetArrowRegistry().GetArrowToSubstrait(fn) if !ok { return nil, arrow.ErrNotFound } id, convOpts, err := conv(fn) if err != nil { return nil, err } opts = append(opts, convOpts...) return expr.NewScalarFunc(reg.GetSubstraitRegistry(), id, opts, args...) } // NewFieldRefFromDotPath constructs a substrait reference segment from // a dot path and the base schema. // // dot_path = '.' name // // | '[' digit+ ']' // | dot_path+ // // # Examples // // Assume root schema of {alpha: i32, beta: struct>, delta: map} // // ".alpha" => StructFieldRef(0) // "[2]" => StructFieldRef(2) // ".beta[0]" => StructFieldRef(1, StructFieldRef(0)) // "[1].gamma[3]" => StructFieldRef(1, StructFieldRef(0, ListElementRef(3))) // ".delta.foobar" => StructFieldRef(2, MapKeyRef("foobar")) // // Note: when parsing a name, a '\' preceding any other character // will be dropped from the resulting name. Therefore if a name must // contain the characters '.', '\', '[', or ']' then they must be escaped // with a preceding '\'. func NewFieldRefFromDotPath(dotpath string, rootSchema *arrow.Schema) (expr.ReferenceSegment, error) { if len(dotpath) == 0 { return nil, fmt.Errorf("%w dotpath was empty", arrow.ErrInvalid) } parseName := func() string { var name string for { idx := strings.IndexAny(dotpath, `\[.`) if idx == -1 { name += dotpath dotpath = "" break } if dotpath[idx] != '\\' { // subscript for a new field ref name += dotpath[:idx] dotpath = dotpath[idx:] break } if len(dotpath) == idx+1 { // dotpath ends with a backslash; consume it all name += dotpath dotpath = "" break } // append all characters before backslash, then the character which follows it name += dotpath[:idx] + string(dotpath[idx+1]) dotpath = dotpath[idx+2:] } return name } var curType arrow.DataType = arrow.StructOf(rootSchema.Fields()...) children := make([]expr.ReferenceSegment, 0) for len(dotpath) > 0 { subscript := dotpath[0] dotpath = dotpath[1:] switch subscript { case '.': // next element is a name n := parseName() switch ct := curType.(type) { case *arrow.StructType: idx, found := ct.FieldIdx(n) if !found { return nil, fmt.Errorf("%w: dot path '%s' referenced invalid field", arrow.ErrInvalid, dotpath) } children = append(children, &expr.StructFieldRef{Field: int32(idx)}) curType = ct.Field(idx).Type case *arrow.MapType: curType = ct.KeyType() switch ct.KeyType().ID() { case arrow.BINARY, arrow.LARGE_BINARY: children = append(children, &expr.MapKeyRef{MapKey: expr.NewByteSliceLiteral([]byte(n), false)}) case arrow.STRING, arrow.LARGE_STRING: children = append(children, &expr.MapKeyRef{MapKey: expr.NewPrimitiveLiteral(n, false)}) default: return nil, fmt.Errorf("%w: MapKeyRef to non-binary/string map not supported", arrow.ErrNotImplemented) } default: return nil, fmt.Errorf("%w: dot path names must refer to struct fields or map keys", arrow.ErrInvalid) } case '[': subend := strings.IndexFunc(dotpath, func(r rune) bool { return !unicode.IsDigit(r) }) if subend == -1 || dotpath[subend] != ']' { return nil, fmt.Errorf("%w: dot path '%s' contained an unterminated index", arrow.ErrInvalid, dotpath) } idx, _ := strconv.Atoi(dotpath[:subend]) switch ct := curType.(type) { case *arrow.StructType: if idx > ct.NumFields() { return nil, fmt.Errorf("%w: field out of bounds in dotpath", arrow.ErrIndex) } curType = ct.Field(idx).Type children = append(children, &expr.StructFieldRef{Field: int32(idx)}) case *arrow.MapType: curType = ct.KeyType() var keyLiteral expr.Literal // TODO: implement user defined types and variations switch ct.KeyType().ID() { case arrow.INT8: keyLiteral = expr.NewPrimitiveLiteral(int8(idx), false) case arrow.INT16: keyLiteral = expr.NewPrimitiveLiteral(int16(idx), false) case arrow.INT32: keyLiteral = expr.NewPrimitiveLiteral(int32(idx), false) case arrow.INT64: keyLiteral = expr.NewPrimitiveLiteral(int64(idx), false) case arrow.FLOAT32: keyLiteral = expr.NewPrimitiveLiteral(float32(idx), false) case arrow.FLOAT64: keyLiteral = expr.NewPrimitiveLiteral(float64(idx), false) default: return nil, fmt.Errorf("%w: dotpath ref to map key type %s", arrow.ErrNotImplemented, ct.KeyType()) } children = append(children, &expr.MapKeyRef{MapKey: keyLiteral}) case *arrow.ListType: curType = ct.Elem() children = append(children, &expr.ListElementRef{Offset: int32(idx)}) case *arrow.LargeListType: curType = ct.Elem() children = append(children, &expr.ListElementRef{Offset: int32(idx)}) case *arrow.FixedSizeListType: curType = ct.Elem() children = append(children, &expr.ListElementRef{Offset: int32(idx)}) default: return nil, fmt.Errorf("%w: %s type not supported for dotpath ref", arrow.ErrInvalid, ct) } dotpath = dotpath[subend+1:] default: return nil, fmt.Errorf("%w: dot path must begin with '[' or '.' got '%s'", arrow.ErrInvalid, dotpath) } } out := children[0] if len(children) > 1 { cur := out for _, c := range children[1:] { switch r := cur.(type) { case *expr.StructFieldRef: r.Child = c case *expr.MapKeyRef: r.Child = c case *expr.ListElementRef: r.Child = c } cur = c } } return out, nil } // RefFromFieldPath constructs a substrait field reference segment // from a compute.FieldPath which should be a slice of integers // indicating nested field paths to travel. This will return a // series of StructFieldRef's whose child is the next element in // the field path. func RefFromFieldPath(field compute.FieldPath) expr.ReferenceSegment { if len(field) == 0 { return nil } seg := expr.NewStructFieldRef(int32(field[0])) parent := seg for _, ref := range field[1:] { next := expr.NewStructFieldRef(int32(ref)) parent.Child = next parent = next } return seg } // NewFieldRef constructs a properly typed substrait field reference segment, // from a given arrow field reference, schema and extension set (for resolving // substrait types). func NewFieldRef(ref compute.FieldRef, schema *arrow.Schema, ext ExtensionIDSet) (*expr.FieldReference, error) { path, err := ref.FindOne(schema) if err != nil { return nil, err } st, err := ToSubstraitType(arrow.StructOf(schema.Fields()...), false, ext) if err != nil { return nil, err } return expr.NewRootFieldRef(RefFromFieldPath(path), types.NewRecordTypeFromStruct(*st.(*types.StructType))) } // Builder wraps the substrait-go expression Builder and FuncArgBuilder // interfaces for a simple interface that can be passed around to build // substrait expressions from Arrow data. type Builder interface { expr.Builder expr.FuncArgBuilder } // ExprBuilder is the parent for building substrait expressions // via Arrow types and functions. // // The expectation is that it should be utilized like so: // // bldr := NewExprBuilder(extSet) // bldr.SetInputSchema(arrowschema) // call, err := bldr.CallScalar("equal", nil, // bldr.FieldRef("i32"), // bldr.Literal(expr.NewPrimitiveLiteral( // int32(0), false))) // ex, err := call.BuildExpr() // ... // result, err := exprs.ExecuteScalarExpression(ctx, arrowschema, // ex, input) type ExprBuilder struct { b expr.ExprBuilder extSet ExtensionIDSet inputSchema *arrow.Schema } // NewExprBuilder constructs a new Expression Builder that will use the // provided extension set and registry. func NewExprBuilder(extSet ExtensionIDSet) ExprBuilder { return ExprBuilder{ b: expr.ExprBuilder{Reg: extSet.GetSubstraitRegistry()}, extSet: extSet, } } // SetInputSchema sets the current Arrow schema that will be utilized // for performing field reference and field type resolutions. func (e *ExprBuilder) SetInputSchema(s *arrow.Schema) error { st, err := ToSubstraitType(arrow.StructOf(s.Fields()...), false, e.extSet) if err != nil { return err } e.inputSchema = s e.b.BaseSchema = types.NewRecordTypeFromStruct(*st.(*types.StructType)) return nil } // MustCallScalar is like CallScalar, but will panic on error rather than // return it. func (e *ExprBuilder) MustCallScalar(fn string, opts []*types.FunctionOption, args ...expr.FuncArgBuilder) Builder { b, err := e.CallScalar(fn, opts, args...) if err != nil { panic(err) } return b } // CallScalar constructs a builder for a scalar function call. The function // name is expected to be valid in the Arrow function registry which will // map it properly to a substrait expression by resolving the types of // the arguments. Examples are: "greater", "multiply", "equal", etc. // // Can return arrow.ErrNotFound if there is no function mapping found. // Or will forward any error encountered when converting from an Arrow // function to a substrait one. func (e *ExprBuilder) CallScalar(fn string, opts []*types.FunctionOption, args ...expr.FuncArgBuilder) (Builder, error) { conv, ok := e.extSet.GetArrowRegistry().GetArrowToSubstrait(fn) if !ok { return nil, arrow.ErrNotFound } id, convOpts, err := conv(fn) if err != nil { return nil, err } opts = append(opts, convOpts...) return e.b.ScalarFunc(id, opts...).Args(args...), nil } // FieldPath uses a field path to construct a Field Reference // expression. func (e *ExprBuilder) FieldPath(path compute.FieldPath) Builder { segments := make([]expr.ReferenceSegment, len(path)) for i, p := range path { segments[i] = expr.NewStructFieldRef(int32(p)) } return e.b.RootRef(expr.FlattenRefSegments(segments...)) } // FieldIndex is shorthand for creating a single field reference // to the struct field index provided. func (e *ExprBuilder) FieldIndex(i int) Builder { return e.b.RootRef(expr.NewStructFieldRef(int32(i))) } // FieldRef constructs a field reference expression to the field with // the given name from the input. It will be resolved to a field // index when calling BuildExpr. func (e *ExprBuilder) FieldRef(field string) Builder { return &refBuilder{eb: e, fieldRef: compute.FieldRefName(field)} } // FieldRefList accepts a list of either integers or strings to // construct a field reference expression from. This will panic // if any of elems are not a string or int. // // Field names will be resolved to their indexes when BuildExpr is called // by using the provided Arrow schema. func (e *ExprBuilder) FieldRefList(elems ...any) Builder { return &refBuilder{eb: e, fieldRef: compute.FieldRefList(elems...)} } // Literal wraps a substrait literal to be used as an argument to // building other expressions. func (e *ExprBuilder) Literal(l expr.Literal) Builder { return e.b.Literal(l) } // WrapLiteral is a convenience for accepting functions like NewLiteral // which can potentially return an error. If an error is encountered, // it will be surfaced when BuildExpr is called. func (e *ExprBuilder) WrapLiteral(l expr.Literal, err error) Builder { return e.b.Wrap(l, err) } // Must is a convenience wrapper for any method that returns a Builder // and error, panic'ing if it received an error or otherwise returning // the Builder. func (*ExprBuilder) Must(b Builder, err error) Builder { if err != nil { panic(err) } return b } // Cast returns a Cast expression with the FailBehavior of ThrowException, // erroring for invalid casts. func (e *ExprBuilder) Cast(from Builder, to arrow.DataType) (Builder, error) { t, err := ToSubstraitType(to, true, e.extSet) if err != nil { return nil, err } return e.b.Cast(from, t).FailBehavior(types.BehaviorThrowException), nil } type refBuilder struct { eb *ExprBuilder fieldRef compute.FieldRef } func (r *refBuilder) BuildFuncArg() (types.FuncArg, error) { return r.BuildExpr() } func (r *refBuilder) BuildExpr() (expr.Expression, error) { if r.eb.inputSchema == nil { return nil, fmt.Errorf("%w: no input schema specified for ref", arrow.ErrInvalid) } path, err := r.fieldRef.FindOne(r.eb.inputSchema) if err != nil { return nil, err } segments := make([]expr.ReferenceSegment, len(path)) for i, p := range path { segments[i] = expr.NewStructFieldRef(int32(p)) } return r.eb.b.RootRef(expr.FlattenRefSegments(segments...)).Build() } arrow-go-18.2.0/arrow/compute/exprs/builders_test.go000066400000000000000000000070761476434502500225310ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.18 package exprs_test import ( "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/compute/exprs" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/substrait-io/substrait-go/v3/expr" ) func TestNewScalarFunc(t *testing.T) { reg := exprs.NewDefaultExtensionSet() fn, err := exprs.NewScalarCall(reg, "add", nil, expr.NewPrimitiveLiteral(int32(1), false), expr.NewPrimitiveLiteral(int32(10), false)) require.NoError(t, err) assert.Equal(t, "add(i32(1), i32(10), {overflow: [ERROR]}) => i32", fn.String()) assert.Equal(t, "add:i32_i32", fn.CompoundName()) } func TestFieldRefDotPath(t *testing.T) { f0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} f1_0 := arrow.Field{Name: "be.ta", Type: arrow.PrimitiveTypes.Int32} f1 := arrow.Field{Name: "beta", Type: arrow.StructOf(f1_0)} f2_0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} f2_1_0 := arrow.Field{Name: "[alpha]", Type: arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32)} f2_1_1 := arrow.Field{Name: "beta", Type: arrow.ListOf(arrow.PrimitiveTypes.Int32)} f2_1 := arrow.Field{Name: "gamma", Type: arrow.StructOf(f2_1_0, f2_1_1)} f2 := arrow.Field{Name: "gamma", Type: arrow.StructOf(f2_0, f2_1)} s := arrow.NewSchema([]arrow.Field{f0, f1, f2}, nil) tests := []struct { dotpath string shouldErr bool expected expr.ReferenceSegment }{ {".alpha", false, &expr.StructFieldRef{Field: 0}}, {"[2]", false, &expr.StructFieldRef{Field: 2}}, {".beta[0]", false, &expr.StructFieldRef{Field: 1, Child: &expr.StructFieldRef{Field: 0}}}, {"[2].gamma[1][5]", false, &expr.StructFieldRef{Field: 2, Child: &expr.StructFieldRef{Field: 1, Child: &expr.StructFieldRef{Field: 1, Child: &expr.ListElementRef{Offset: 5}}}}}, {"[2].gamma[0].foobar", false, &expr.StructFieldRef{Field: 2, Child: &expr.StructFieldRef{Field: 1, Child: &expr.StructFieldRef{Field: 0, Child: &expr.MapKeyRef{MapKey: expr.NewPrimitiveLiteral("foobar", false)}}}}}, {`[1].be\.ta`, false, &expr.StructFieldRef{Field: 1, Child: &expr.StructFieldRef{Field: 0}}}, {`[2].gamma.\[alpha\]`, false, &expr.StructFieldRef{Field: 2, Child: &expr.StructFieldRef{Field: 1, Child: &expr.StructFieldRef{Field: 0}}}}, {`[5]`, true, nil}, // bad struct index {``, true, nil}, // empty {`delta`, true, nil}, // not found {`[1234`, true, nil}, // bad syntax {`[1stuf]`, true, nil}, // bad syntax } for _, tt := range tests { t.Run(tt.dotpath, func(t *testing.T) { ref, err := exprs.NewFieldRefFromDotPath(tt.dotpath, s) if tt.shouldErr { assert.Error(t, err) } else { assert.NoError(t, err) assert.Truef(t, tt.expected.Equals(ref), "expected: %s\ngot: %s", tt.expected, ref) } }) } } arrow-go-18.2.0/arrow/compute/exprs/exec.go000066400000000000000000000502001476434502500205700ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.18 package exprs import ( "context" "fmt" "unsafe" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/compute" "github.com/apache/arrow-go/v18/arrow/compute/exec" "github.com/apache/arrow-go/v18/arrow/decimal128" "github.com/apache/arrow-go/v18/arrow/endian" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/arrow/scalar" "github.com/substrait-io/substrait-go/v3/expr" "github.com/substrait-io/substrait-go/v3/extensions" "github.com/substrait-io/substrait-go/v3/types" ) func makeExecBatch(ctx context.Context, schema *arrow.Schema, partial compute.Datum) (out compute.ExecBatch, err error) { // cleanup if we get an error defer func() { if err != nil { for _, v := range out.Values { if v != nil { v.Release() } } } }() if partial.Kind() == compute.KindRecord { partialBatch := partial.(*compute.RecordDatum).Value batchSchema := partialBatch.Schema() out.Values = make([]compute.Datum, schema.NumFields()) out.Len = partialBatch.NumRows() for i, field := range schema.Fields() { idxes := batchSchema.FieldIndices(field.Name) switch len(idxes) { case 0: out.Values[i] = compute.NewDatum(scalar.MakeNullScalar(field.Type)) case 1: col := partialBatch.Column(idxes[0]) if !arrow.TypeEqual(col.DataType(), field.Type) { // referenced field was present but didn't have expected type // we'll cast this case for now col, err = compute.CastArray(ctx, col, compute.SafeCastOptions(field.Type)) if err != nil { return compute.ExecBatch{}, err } defer col.Release() } out.Values[i] = compute.NewDatum(col) default: err = fmt.Errorf("%w: exec batch field '%s' ambiguous, more than one match", arrow.ErrInvalid, field.Name) return compute.ExecBatch{}, err } } return } part, ok := partial.(compute.ArrayLikeDatum) if !ok { return out, fmt.Errorf("%w: MakeExecBatch from %s", arrow.ErrNotImplemented, partial) } // wasteful but useful for testing if part.Type().ID() == arrow.STRUCT { switch part := part.(type) { case *compute.ArrayDatum: arr := part.MakeArray().(*array.Struct) defer arr.Release() batch := array.RecordFromStructArray(arr, nil) defer batch.Release() return makeExecBatch(ctx, schema, compute.NewDatumWithoutOwning(batch)) case *compute.ScalarDatum: out.Len = 1 out.Values = make([]compute.Datum, schema.NumFields()) s := part.Value.(*scalar.Struct) dt := s.Type.(*arrow.StructType) for i, field := range schema.Fields() { idx, found := dt.FieldIdx(field.Name) if !found { out.Values[i] = compute.NewDatum(scalar.MakeNullScalar(field.Type)) continue } val := s.Value[idx] if !arrow.TypeEqual(val.DataType(), field.Type) { // referenced field was present but didn't have the expected // type. for now we'll cast this val, err = val.CastTo(field.Type) if err != nil { return compute.ExecBatch{}, err } } out.Values[i] = compute.NewDatum(val) } return } } return out, fmt.Errorf("%w: MakeExecBatch from %s", arrow.ErrNotImplemented, partial) } // ToArrowSchema takes a substrait NamedStruct and an extension set (for // type resolution mapping) and creates the equivalent Arrow Schema. func ToArrowSchema(base types.NamedStruct, ext ExtensionIDSet) (*arrow.Schema, error) { fields := make([]arrow.Field, len(base.Names)) for i, typ := range base.Struct.Types { dt, nullable, err := FromSubstraitType(typ, ext) if err != nil { return nil, err } fields[i] = arrow.Field{ Name: base.Names[i], Type: dt, Nullable: nullable, } } return arrow.NewSchema(fields, nil), nil } type ( regCtxKey struct{} extCtxKey struct{} ) func WithExtensionRegistry(ctx context.Context, reg *ExtensionIDRegistry) context.Context { return context.WithValue(ctx, regCtxKey{}, reg) } func GetExtensionRegistry(ctx context.Context) *ExtensionIDRegistry { v, ok := ctx.Value(regCtxKey{}).(*ExtensionIDRegistry) if !ok { v = DefaultExtensionIDRegistry } return v } func WithExtensionIDSet(ctx context.Context, ext ExtensionIDSet) context.Context { return context.WithValue(ctx, extCtxKey{}, ext) } func GetExtensionIDSet(ctx context.Context) ExtensionIDSet { v, ok := ctx.Value(extCtxKey{}).(ExtensionIDSet) if !ok { return NewExtensionSet( expr.NewEmptyExtensionRegistry(extensions.GetDefaultCollectionWithNoError()), GetExtensionRegistry(ctx)) } return v } func literalToDatum(mem memory.Allocator, lit expr.Literal, ext ExtensionIDSet) (compute.Datum, error) { switch v := lit.(type) { case *expr.PrimitiveLiteral[bool]: return compute.NewDatum(scalar.NewBooleanScalar(v.Value)), nil case *expr.PrimitiveLiteral[int8]: return compute.NewDatum(scalar.NewInt8Scalar(v.Value)), nil case *expr.PrimitiveLiteral[int16]: return compute.NewDatum(scalar.NewInt16Scalar(v.Value)), nil case *expr.PrimitiveLiteral[int32]: return compute.NewDatum(scalar.NewInt32Scalar(v.Value)), nil case *expr.PrimitiveLiteral[int64]: return compute.NewDatum(scalar.NewInt64Scalar(v.Value)), nil case *expr.PrimitiveLiteral[float32]: return compute.NewDatum(scalar.NewFloat32Scalar(v.Value)), nil case *expr.PrimitiveLiteral[float64]: return compute.NewDatum(scalar.NewFloat64Scalar(v.Value)), nil case *expr.PrimitiveLiteral[string]: return compute.NewDatum(scalar.NewStringScalar(v.Value)), nil case *expr.PrimitiveLiteral[types.Timestamp]: return compute.NewDatum(scalar.NewTimestampScalar(arrow.Timestamp(v.Value), &arrow.TimestampType{Unit: arrow.Microsecond})), nil case *expr.PrimitiveLiteral[types.TimestampTz]: return compute.NewDatum(scalar.NewTimestampScalar(arrow.Timestamp(v.Value), &arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: TimestampTzTimezone})), nil case *expr.PrimitiveLiteral[types.Date]: return compute.NewDatum(scalar.NewDate32Scalar(arrow.Date32(v.Value))), nil case *expr.PrimitiveLiteral[types.Time]: return compute.NewDatum(scalar.NewTime64Scalar(arrow.Time64(v.Value), &arrow.Time64Type{Unit: arrow.Microsecond})), nil case *expr.PrimitiveLiteral[types.FixedChar]: length := int(v.Type.(*types.FixedCharType).Length) return compute.NewDatum(scalar.NewExtensionScalar( scalar.NewFixedSizeBinaryScalar(memory.NewBufferBytes([]byte(v.Value)), &arrow.FixedSizeBinaryType{ByteWidth: length}), fixedChar(int32(length)))), nil case *expr.ByteSliceLiteral[[]byte]: return compute.NewDatum(scalar.NewBinaryScalar(memory.NewBufferBytes(v.Value), arrow.BinaryTypes.Binary)), nil case *expr.ByteSliceLiteral[types.UUID]: return compute.NewDatum(scalar.NewExtensionScalar(scalar.NewFixedSizeBinaryScalar( memory.NewBufferBytes(v.Value), uuid().(arrow.ExtensionType).StorageType()), uuid())), nil case *expr.ByteSliceLiteral[types.FixedBinary]: return compute.NewDatum(scalar.NewFixedSizeBinaryScalar(memory.NewBufferBytes(v.Value), &arrow.FixedSizeBinaryType{ByteWidth: int(v.Type.(*types.FixedBinaryType).Length)})), nil case *expr.NullLiteral: dt, _, err := FromSubstraitType(v.Type, ext) if err != nil { return nil, err } return compute.NewDatum(scalar.MakeNullScalar(dt)), nil case *expr.ListLiteral: var elemType arrow.DataType values := make([]scalar.Scalar, len(v.Value)) for i, val := range v.Value { d, err := literalToDatum(mem, val, ext) if err != nil { return nil, err } defer d.Release() values[i] = d.(*compute.ScalarDatum).Value if elemType != nil { if !arrow.TypeEqual(values[i].DataType(), elemType) { return nil, fmt.Errorf("%w: %s has a value whose type doesn't match the other list values", arrow.ErrInvalid, v) } } else { elemType = values[i].DataType() } } bldr := array.NewBuilder(memory.DefaultAllocator, elemType) defer bldr.Release() if err := scalar.AppendSlice(bldr, values); err != nil { return nil, err } arr := bldr.NewArray() defer arr.Release() return compute.NewDatum(scalar.NewListScalar(arr)), nil case *expr.MapLiteral: dt, _, err := FromSubstraitType(v.Type, ext) if err != nil { return nil, err } mapType, ok := dt.(*arrow.MapType) if !ok { return nil, fmt.Errorf("%w: map literal with non-map type", arrow.ErrInvalid) } keys, values := make([]scalar.Scalar, len(v.Value)), make([]scalar.Scalar, len(v.Value)) for i, kv := range v.Value { k, err := literalToDatum(mem, kv.Key, ext) if err != nil { return nil, err } defer k.Release() scalarKey := k.(*compute.ScalarDatum).Value v, err := literalToDatum(mem, kv.Value, ext) if err != nil { return nil, err } defer v.Release() scalarValue := v.(*compute.ScalarDatum).Value if !arrow.TypeEqual(mapType.KeyType(), scalarKey.DataType()) { return nil, fmt.Errorf("%w: key type mismatch for %s, got key with type %s", arrow.ErrInvalid, mapType, scalarKey.DataType()) } if !arrow.TypeEqual(mapType.ItemType(), scalarValue.DataType()) { return nil, fmt.Errorf("%w: value type mismatch for %s, got value with type %s", arrow.ErrInvalid, mapType, scalarValue.DataType()) } keys[i], values[i] = scalarKey, scalarValue } keyBldr, valBldr := array.NewBuilder(mem, mapType.KeyType()), array.NewBuilder(mem, mapType.ItemType()) defer keyBldr.Release() defer valBldr.Release() if err := scalar.AppendSlice(keyBldr, keys); err != nil { return nil, err } if err := scalar.AppendSlice(valBldr, values); err != nil { return nil, err } keyArr, valArr := keyBldr.NewArray(), valBldr.NewArray() defer keyArr.Release() defer valArr.Release() kvArr, err := array.NewStructArray([]arrow.Array{keyArr, valArr}, []string{"key", "value"}) if err != nil { return nil, err } defer kvArr.Release() return compute.NewDatumWithoutOwning(scalar.NewMapScalar(kvArr)), nil case *expr.StructLiteral: fields := make([]scalar.Scalar, len(v.Value)) names := make([]string, len(v.Value)) for i, l := range v.Value { lit, err := literalToDatum(mem, l, ext) if err != nil { return nil, err } fields[i] = lit.(*compute.ScalarDatum).Value } s, err := scalar.NewStructScalarWithNames(fields, names) return compute.NewDatum(s), err case *expr.ProtoLiteral: switch v := v.Value.(type) { case *types.Decimal: if len(v.Value) != arrow.Decimal128SizeBytes { return nil, fmt.Errorf("%w: decimal literal had %d bytes (expected %d)", arrow.ErrInvalid, len(v.Value), arrow.Decimal128SizeBytes) } var val decimal128.Num data := (*(*[arrow.Decimal128SizeBytes]byte)(unsafe.Pointer(&val)))[:] copy(data, v.Value) if endian.IsBigEndian { // reverse the bytes for i := len(data)/2 - 1; i >= 0; i-- { opp := len(data) - 1 - i data[i], data[opp] = data[opp], data[i] } } return compute.NewDatum(scalar.NewDecimal128Scalar(val, &arrow.Decimal128Type{Precision: v.Precision, Scale: v.Scale})), nil case *types.UserDefinedLiteral: // not yet implemented case *types.IntervalYearToMonth: bldr := array.NewInt32Builder(memory.DefaultAllocator) defer bldr.Release() typ := intervalYear() bldr.Append(v.Years) bldr.Append(v.Months) arr := bldr.NewArray() defer arr.Release() return &compute.ScalarDatum{Value: scalar.NewExtensionScalar( scalar.NewFixedSizeListScalar(arr), typ)}, nil case *types.IntervalDayToSecond: bldr := array.NewInt32Builder(memory.DefaultAllocator) defer bldr.Release() typ := intervalDay() bldr.Append(v.Days) bldr.Append(v.Seconds) arr := bldr.NewArray() defer arr.Release() return &compute.ScalarDatum{Value: scalar.NewExtensionScalar( scalar.NewFixedSizeListScalar(arr), typ)}, nil case *types.VarChar: return compute.NewDatum(scalar.NewExtensionScalar( scalar.NewStringScalar(v.Value), varChar(int32(v.Length)))), nil } } return nil, arrow.ErrNotImplemented } // ExecuteScalarExpression executes the given substrait expression using the provided datum as input. // It will first create an exec batch using the input schema and the datum. // The datum may have missing or incorrectly ordered columns while the input schema // should describe the expected input schema for the expression. Missing fields will // be replaced with null scalars and incorrectly ordered columns will be re-ordered // according to the schema. // // You can provide an allocator to use through the context via compute.WithAllocator. // // You can provide the ExtensionIDSet to use through the context via WithExtensionIDSet. func ExecuteScalarExpression(ctx context.Context, inputSchema *arrow.Schema, expression expr.Expression, partialInput compute.Datum) (compute.Datum, error) { if expression == nil { return nil, arrow.ErrInvalid } batch, err := makeExecBatch(ctx, inputSchema, partialInput) if err != nil { return nil, err } defer func() { for _, v := range batch.Values { v.Release() } }() return executeScalarBatch(ctx, batch, expression, GetExtensionIDSet(ctx)) } // ExecuteScalarSubstrait uses the provided Substrait extended expression to // determine the expected input schema (replacing missing fields in the partial // input datum with null scalars and re-ordering columns if necessary) and // ExtensionIDSet to use. You can provide the extension registry to use // through the context via WithExtensionRegistry, otherwise the default // Arrow registry will be used. You can provide a memory.Allocator to use // the same way via compute.WithAllocator. func ExecuteScalarSubstrait(ctx context.Context, expression *expr.Extended, partialInput compute.Datum) (compute.Datum, error) { if expression == nil { return nil, arrow.ErrInvalid } var toExecute expr.Expression switch len(expression.ReferredExpr) { case 0: return nil, fmt.Errorf("%w: no referred expression to execute", arrow.ErrInvalid) case 1: if toExecute = expression.ReferredExpr[0].GetExpr(); toExecute == nil { return nil, fmt.Errorf("%w: measures not implemented", arrow.ErrNotImplemented) } default: return nil, fmt.Errorf("%w: only single referred expression implemented", arrow.ErrNotImplemented) } reg := GetExtensionRegistry(ctx) set := NewExtensionSet(expr.NewExtensionRegistry(expression.Extensions, extensions.GetDefaultCollectionWithNoError()), reg) sc, err := ToArrowSchema(expression.BaseSchema, set) if err != nil { return nil, err } return ExecuteScalarExpression(WithExtensionIDSet(ctx, set), sc, toExecute, partialInput) } func execFieldRef(ctx context.Context, e *expr.FieldReference, input compute.ExecBatch, ext ExtensionIDSet) (compute.Datum, error) { if e.Root != expr.RootReference { return nil, fmt.Errorf("%w: only RootReference is implemented", arrow.ErrNotImplemented) } ref, ok := e.Reference.(expr.ReferenceSegment) if !ok { return nil, fmt.Errorf("%w: only direct references are implemented", arrow.ErrNotImplemented) } expectedType, _, err := FromSubstraitType(e.GetType(), ext) if err != nil { return nil, err } var param compute.Datum if sref, ok := ref.(*expr.StructFieldRef); ok { if sref.Field < 0 || sref.Field >= int32(len(input.Values)) { return nil, arrow.ErrInvalid } param = input.Values[sref.Field] ref = ref.GetChild() } out, err := GetReferencedValue(compute.GetAllocator(ctx), ref, param, ext) if err == compute.ErrEmpty { out = compute.NewDatum(param) } else if err != nil { return nil, err } if !arrow.TypeEqual(out.(compute.ArrayLikeDatum).Type(), expectedType) { return nil, fmt.Errorf("%w: referenced field %s was %s, but should have been %s", arrow.ErrInvalid, ref, out.(compute.ArrayLikeDatum).Type(), expectedType) } return out, nil } func executeScalarBatch(ctx context.Context, input compute.ExecBatch, exp expr.Expression, ext ExtensionIDSet) (compute.Datum, error) { if !exp.IsScalar() { return nil, fmt.Errorf("%w: ExecuteScalarExpression cannot execute non-scalar expressions", arrow.ErrInvalid) } switch e := exp.(type) { case expr.Literal: return literalToDatum(compute.GetAllocator(ctx), e, ext) case *expr.FieldReference: return execFieldRef(ctx, e, input, ext) case *expr.Cast: if e.Input == nil { return nil, fmt.Errorf("%w: cast without argument to cast", arrow.ErrInvalid) } arg, err := executeScalarBatch(ctx, input, e.Input, ext) if err != nil { return nil, err } defer arg.Release() dt, _, err := FromSubstraitType(e.Type, ext) if err != nil { return nil, fmt.Errorf("%w: could not determine type for cast", err) } var opts *compute.CastOptions switch e.FailureBehavior { case types.BehaviorThrowException: opts = compute.UnsafeCastOptions(dt) case types.BehaviorUnspecified: return nil, fmt.Errorf("%w: cast behavior unspecified", arrow.ErrInvalid) case types.BehaviorReturnNil: return nil, fmt.Errorf("%w: cast behavior return nil", arrow.ErrNotImplemented) } return compute.CastDatum(ctx, arg, opts) case *expr.ScalarFunction: var ( err error allScalar = true args = make([]compute.Datum, e.NArgs()) argTypes = make([]arrow.DataType, e.NArgs()) ) for i := 0; i < e.NArgs(); i++ { switch v := e.Arg(i).(type) { case types.Enum: args[i] = compute.NewDatum(scalar.NewStringScalar(string(v))) case expr.Expression: args[i], err = executeScalarBatch(ctx, input, v, ext) if err != nil { return nil, err } defer args[i].Release() if args[i].Kind() != compute.KindScalar { allScalar = false } default: return nil, arrow.ErrNotImplemented } argTypes[i] = args[i].(compute.ArrayLikeDatum).Type() } _, conv, ok := ext.DecodeFunction(e.FuncRef()) if !ok { return nil, arrow.ErrNotImplemented } fname, opts, err := conv(e) if err != nil { return nil, err } ectx := compute.GetExecCtx(ctx) fn, ok := ectx.Registry.GetFunction(fname) if !ok { return nil, arrow.ErrInvalid } if fn.Kind() != compute.FuncScalar { return nil, arrow.ErrInvalid } k, err := fn.DispatchBest(argTypes...) if err != nil { return nil, err } var newArgs []compute.Datum // cast arguments if necessary for i, arg := range args { if !arrow.TypeEqual(argTypes[i], arg.(compute.ArrayLikeDatum).Type()) { if newArgs == nil { newArgs = make([]compute.Datum, len(args)) copy(newArgs, args) } newArgs[i], err = compute.CastDatum(ctx, arg, compute.SafeCastOptions(argTypes[i])) if err != nil { return nil, err } defer newArgs[i].Release() } } if newArgs != nil { args = newArgs } kctx := &exec.KernelCtx{Ctx: ctx, Kernel: k} init := k.GetInitFn() kinitArgs := exec.KernelInitArgs{Kernel: k, Inputs: argTypes, Options: opts} if init != nil { kctx.State, err = init(kctx, kinitArgs) if err != nil { return nil, err } } executor := compute.NewScalarExecutor() if err := executor.Init(kctx, kinitArgs); err != nil { return nil, err } batch := compute.ExecBatch{Values: args} if allScalar { batch.Len = 1 } else { batch.Len = input.Len } ctx, cancel := context.WithCancel(context.Background()) defer cancel() ch := make(chan compute.Datum, ectx.ExecChannelSize) go func() { defer close(ch) if err = executor.Execute(ctx, &batch, ch); err != nil { cancel() } }() result := executor.WrapResults(ctx, ch, false) if err == nil { debug.Assert(executor.CheckResultType(result) == nil, "invalid result type") } if ctx.Err() == context.Canceled && result != nil { result.Release() result = nil } return result, err } return nil, arrow.ErrNotImplemented } arrow-go-18.2.0/arrow/compute/exprs/exec_internal_test.go000066400000000000000000000100531476434502500235250ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.18 package exprs import ( "context" "strings" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/compute" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) var ( boringArrowSchema = arrow.NewSchema([]arrow.Field{ {Name: "bool", Type: arrow.FixedWidthTypes.Boolean, Nullable: true}, {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, {Name: "i32", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, {Name: "i32_req", Type: arrow.PrimitiveTypes.Int32}, {Name: "u32", Type: arrow.PrimitiveTypes.Uint32, Nullable: true}, {Name: "i64", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, {Name: "f32", Type: arrow.PrimitiveTypes.Float32, Nullable: true}, {Name: "f32_req", Type: arrow.PrimitiveTypes.Float32}, {Name: "f64", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, {Name: "date32", Type: arrow.FixedWidthTypes.Date32, Nullable: true}, {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, {Name: "bin", Type: arrow.BinaryTypes.Binary, Nullable: true}, }, nil) ) func TestMakeExecBatch(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) const numRows = 3 var ( ctx = compute.WithAllocator(context.Background(), mem) i32, _, _ = array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[1, 2, 3]`)) f32, _, _ = array.FromJSON(mem, arrow.PrimitiveTypes.Float32, strings.NewReader(`[1.5, 2.25, 3.125]`)) empty, _, _ = array.RecordFromJSON(mem, boringArrowSchema, strings.NewReader(`[]`)) ) defer i32.Release() defer f32.Release() getField := func(n string) arrow.Field { f, _ := boringArrowSchema.FieldsByName(n) return f[0] } tests := []struct { name string batch arrow.Record }{ {"empty", empty}, {"subset", array.NewRecord(arrow.NewSchema([]arrow.Field{getField("i32"), getField("f32")}, nil), []arrow.Array{i32, f32}, numRows)}, {"flipped subset", array.NewRecord(arrow.NewSchema([]arrow.Field{getField("f32"), getField("i32")}, nil), []arrow.Array{f32, i32}, numRows)}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { defer tt.batch.Release() batch, err := makeExecBatch(ctx, boringArrowSchema, compute.NewDatumWithoutOwning(tt.batch)) require.NoError(t, err) require.Equal(t, tt.batch.NumRows(), batch.Len) defer func() { for _, v := range batch.Values { v.Release() } }() for i, field := range boringArrowSchema.Fields() { typ := batch.Values[i].(compute.ArrayLikeDatum).Type() assert.Truef(t, arrow.TypeEqual(typ, field.Type), "expected: %s\ngot: %s", field.Type, typ) idxes := tt.batch.Schema().FieldIndices(field.Name) if batch.Values[i].Kind() == compute.KindScalar { assert.False(t, batch.Values[i].(*compute.ScalarDatum).Value.IsValid(), "null placeholder should be injected") assert.Len(t, idxes, 0, "should only happen when column isn't found") } else { col := tt.batch.Column(idxes[0]) val := batch.Values[i].(*compute.ArrayDatum).MakeArray() defer val.Release() assert.Truef(t, array.Equal(col, val), "expected: %s\ngot: %s", col, val) } } }) } } arrow-go-18.2.0/arrow/compute/exprs/exec_test.go000066400000000000000000000424331476434502500216400ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.18 package exprs_test import ( "context" "strings" "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/compute" "github.com/apache/arrow-go/v18/arrow/compute/exprs" "github.com/apache/arrow-go/v18/arrow/extensions" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/arrow/scalar" "github.com/google/uuid" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/substrait-io/substrait-go/v3/expr" "github.com/substrait-io/substrait-go/v3/types" ) var ( extSet = exprs.NewDefaultExtensionSet() _, u32TypeRef, _ = extSet.EncodeTypeVariation(arrow.PrimitiveTypes.Uint32) boringSchema = types.NamedStruct{ Names: []string{ "bool", "i8", "i32", "i32_req", "u32", "i64", "f32", "f32_req", "f64", "date32", "str", "bin"}, Struct: types.StructType{ Nullability: types.NullabilityRequired, Types: []types.Type{ &types.BooleanType{}, &types.Int8Type{}, &types.Int32Type{}, &types.Int32Type{Nullability: types.NullabilityRequired}, &types.Int32Type{ TypeVariationRef: u32TypeRef, }, &types.Int64Type{}, &types.Float32Type{}, &types.Float32Type{Nullability: types.NullabilityRequired}, &types.Float64Type{}, &types.DateType{}, &types.StringType{}, &types.BinaryType{}, }, }, } boringArrowSchema = arrow.NewSchema([]arrow.Field{ {Name: "bool", Type: arrow.FixedWidthTypes.Boolean, Nullable: true}, {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, {Name: "i32", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, {Name: "u32", Type: arrow.PrimitiveTypes.Uint32, Nullable: true}, {Name: "i64", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, {Name: "f32", Type: arrow.PrimitiveTypes.Float32, Nullable: true}, {Name: "f64", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, {Name: "date32", Type: arrow.FixedWidthTypes.Date32, Nullable: true}, {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, {Name: "bin", Type: arrow.BinaryTypes.Binary, Nullable: true}, }, nil) ) func TestToArrowSchema(t *testing.T) { expectedSchema := arrow.NewSchema([]arrow.Field{ {Name: "bool", Type: arrow.FixedWidthTypes.Boolean, Nullable: true}, {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, {Name: "i32", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, {Name: "i32_req", Type: arrow.PrimitiveTypes.Int32}, {Name: "u32", Type: arrow.PrimitiveTypes.Uint32, Nullable: true}, {Name: "i64", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, {Name: "f32", Type: arrow.PrimitiveTypes.Float32, Nullable: true}, {Name: "f32_req", Type: arrow.PrimitiveTypes.Float32}, {Name: "f64", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, {Name: "date32", Type: arrow.FixedWidthTypes.Date32, Nullable: true}, {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, {Name: "bin", Type: arrow.BinaryTypes.Binary, Nullable: true}, }, nil) sc, err := exprs.ToArrowSchema(boringSchema, extSet) assert.NoError(t, err) assert.Truef(t, expectedSchema.Equal(sc), "expected: %s\ngot: %s", expectedSchema, sc) } func assertEqual(t *testing.T, expected, actual any) bool { switch e := expected.(type) { case compute.Datum: return assert.Truef(t, e.Equals(compute.NewDatumWithoutOwning(actual)), "expected: %s\ngot: %s", e, actual) case arrow.Array: switch a := actual.(type) { case compute.Datum: if a.Kind() == compute.KindArray { actual := a.(*compute.ArrayDatum).MakeArray() defer actual.Release() return assert.Truef(t, array.Equal(e, actual), "expected: %s\ngot: %s", e, actual) } case arrow.Array: return assert.Truef(t, array.Equal(e, a), "expected: %s\ngot: %s", e, actual) } t.Errorf("expected arrow Array, got %s", actual) return false } panic("unimplemented comparison") } func TestComparisons(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) var ( ctx = compute.WithAllocator(context.Background(), mem) zero = scalar.MakeScalar(int32(0)) one = scalar.MakeScalar(int32(1)) two = scalar.MakeScalar(int32(2)) str = scalar.MakeScalar("hello") bin = scalar.MakeScalar([]byte("hello")) exampleUUID = uuid.MustParse("102cb62f-e6f8-4eb0-9973-d9b012ff0967") exampleUUID2 = uuid.MustParse("c1b0d8e0-0b0e-4b1e-9b0a-0e0b0d0c0a0b") uuidStorage, _ = scalar.MakeScalarParam(exampleUUID[:], &arrow.FixedSizeBinaryType{ByteWidth: 16}) uuidScalar = scalar.NewExtensionScalar(uuidStorage, extensions.NewUUIDType()) uuidStorage2, _ = scalar.MakeScalarParam(exampleUUID2[:], &arrow.FixedSizeBinaryType{ByteWidth: 16}) uuidScalar2 = scalar.NewExtensionScalar(uuidStorage2, extensions.NewUUIDType()) ) getArgType := func(dt arrow.DataType) types.Type { switch dt.ID() { case arrow.INT32: return &types.Int32Type{} case arrow.STRING: return &types.StringType{} case arrow.BINARY: return &types.BinaryType{} case arrow.EXTENSION: return &types.UUIDType{} } panic("wtf") } expect := func(t *testing.T, fn string, arg1, arg2 scalar.Scalar, res bool) { baseStruct := types.NamedStruct{ Names: []string{"arg1", "arg2"}, Struct: types.StructType{ Types: []types.Type{getArgType(arg1.DataType()), getArgType(arg2.DataType())}, }, } ex, err := exprs.NewScalarCall(extSet, fn, nil, expr.MustExpr(expr.NewRootFieldRef(expr.NewStructFieldRef(0), types.NewRecordTypeFromStruct(baseStruct.Struct))), expr.MustExpr(expr.NewRootFieldRef(expr.NewStructFieldRef(1), types.NewRecordTypeFromStruct(baseStruct.Struct)))) require.NoError(t, err) expression := &expr.Extended{ Extensions: extSet.GetSubstraitRegistry().Set, ReferredExpr: []expr.ExpressionReference{ expr.NewExpressionReference([]string{"out"}, ex), }, BaseSchema: baseStruct, } input, _ := scalar.NewStructScalarWithNames([]scalar.Scalar{arg1, arg2}, []string{"arg1", "arg2"}) out, err := exprs.ExecuteScalarSubstrait(ctx, expression, compute.NewDatum(input)) require.NoError(t, err) require.Equal(t, compute.KindScalar, out.Kind()) result := out.(*compute.ScalarDatum).Value assert.Equal(t, res, result.(*scalar.Boolean).Value) } expect(t, "equal", one, one, true) expect(t, "equal", one, two, false) expect(t, "less", one, two, true) expect(t, "less", one, zero, false) expect(t, "greater", one, zero, true) expect(t, "greater", one, two, false) expect(t, "equal", str, bin, true) expect(t, "equal", bin, str, true) expect(t, "equal", uuidScalar, uuidScalar, true) expect(t, "equal", uuidScalar, uuidScalar2, false) expect(t, "less", uuidScalar, uuidScalar2, true) expect(t, "less", uuidScalar2, uuidScalar, false) expect(t, "greater", uuidScalar, uuidScalar2, false) expect(t, "greater", uuidScalar2, uuidScalar, true) } func TestExecuteFieldRef(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) fromJSON := func(ty arrow.DataType, json string) arrow.Array { arr, _, err := array.FromJSON(mem, ty, strings.NewReader(json)) require.NoError(t, err) return arr } scalarFromJSON := func(ty arrow.DataType, json string) scalar.Scalar { arr, _, err := array.FromJSON(mem, ty, strings.NewReader(json)) require.NoError(t, err) defer arr.Release() s, err := scalar.GetScalar(arr, 0) require.NoError(t, err) return s } tests := []struct { testName string ref compute.FieldRef input compute.Datum expected compute.Datum }{ {"basic ref", compute.FieldRefName("a"), compute.NewDatumWithoutOwning(fromJSON( arrow.StructOf(arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Float64, Nullable: true}), `[ {"a": 6.125}, {"a": 0.0}, {"a": -1} ]`)), compute.NewDatumWithoutOwning(fromJSON( arrow.PrimitiveTypes.Float64, `[6.125, 0.0, -1]`))}, {"ref one field", compute.FieldRefName("a"), compute.NewDatumWithoutOwning(fromJSON( arrow.StructOf( arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Float64, Nullable: true}), `[ {"a": 6.125, "b": 7.5}, {"a": 0.0, "b": 2.125}, {"a": -1, "b": 4.0} ]`)), compute.NewDatumWithoutOwning(fromJSON( arrow.PrimitiveTypes.Float64, `[6.125, 0.0, -1]`))}, {"second field", compute.FieldRefName("b"), compute.NewDatumWithoutOwning(fromJSON( arrow.StructOf( arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Float64, Nullable: true}), `[ {"a": 6.125, "b": 7.5}, {"a": 0.0, "b": 2.125}, {"a": -1, "b": 4.0} ]`)), compute.NewDatumWithoutOwning(fromJSON( arrow.PrimitiveTypes.Float64, `[7.5, 2.125, 4.0]`))}, {"nested field by path", compute.FieldRefPath(compute.FieldPath{0, 0}), compute.NewDatumWithoutOwning(fromJSON( arrow.StructOf( arrow.Field{Name: "a", Type: arrow.StructOf( arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Float64, Nullable: true}), Nullable: true}), `[ {"a": {"b": 6.125}}, {"a": {"b": 0.0}}, {"a": {"b": -1}} ]`)), compute.NewDatumWithoutOwning(fromJSON( arrow.PrimitiveTypes.Float64, `[6.125, 0.0, -1]`))}, {"nested field by name", compute.FieldRefList("a", "b"), compute.NewDatumWithoutOwning(fromJSON( arrow.StructOf( arrow.Field{Name: "a", Type: arrow.StructOf( arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Float64, Nullable: true}), Nullable: true}), `[ {"a": {"b": 6.125}}, {"a": {"b": 0.0}}, {"a": {"b": -1}} ]`)), compute.NewDatumWithoutOwning(fromJSON( arrow.PrimitiveTypes.Float64, `[6.125, 0.0, -1]`))}, {"nested field with nulls", compute.FieldRefList("a", "b"), compute.NewDatumWithoutOwning(fromJSON( arrow.StructOf( arrow.Field{Name: "a", Type: arrow.StructOf( arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Float64, Nullable: true}), Nullable: true}), `[ {"a": {"b": 6.125}}, {"a": null}, {"a": {"b": null}} ]`)), compute.NewDatumWithoutOwning(fromJSON( arrow.PrimitiveTypes.Float64, `[6.125, null, null]`))}, {"nested scalar", compute.FieldRefList("a", "b"), compute.NewDatumWithoutOwning( scalarFromJSON(arrow.StructOf( arrow.Field{Name: "a", Type: arrow.StructOf( arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Float64, Nullable: true}), Nullable: true}), `[{"a": {"b": 64.0}}]`)), compute.NewDatum(scalar.NewFloat64Scalar(64.0))}, {"nested scalar with null", compute.FieldRefList("a", "b"), compute.NewDatumWithoutOwning( scalarFromJSON(arrow.StructOf( arrow.Field{Name: "a", Type: arrow.StructOf( arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Float64, Nullable: true}), Nullable: true}), `[{"a": {"b": null}}]`)), compute.NewDatum(scalar.MakeNullScalar(arrow.PrimitiveTypes.Float64))}, {"nested scalar null", compute.FieldRefList("a", "b"), compute.NewDatumWithoutOwning( scalarFromJSON(arrow.StructOf( arrow.Field{Name: "a", Type: arrow.StructOf( arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Float64, Nullable: true}), Nullable: true}), `[{"a": null}]`)), compute.NewDatum(scalar.MakeNullScalar(arrow.PrimitiveTypes.Float64))}, } for _, tt := range tests { t.Run(tt.testName, func(t *testing.T) { scoped := memory.NewCheckedAllocatorScope(mem) defer scoped.CheckSize(t) ctx := exprs.WithExtensionIDSet(compute.WithAllocator(context.Background(), mem), extSet) dt := tt.input.(compute.ArrayLikeDatum).Type().(arrow.NestedType) schema := arrow.NewSchema(dt.Fields(), nil) ref, err := exprs.NewFieldRef(tt.ref, schema, extSet) require.NoError(t, err) assert.NotNil(t, ref) actual, err := exprs.ExecuteScalarExpression(ctx, schema, ref, tt.input) require.NoError(t, err) defer actual.Release() assert.Truef(t, tt.expected.Equals(actual), "expected: %s\ngot: %s", tt.expected, actual) }) } } func TestExecuteScalarFuncCall(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) fromJSON := func(ty arrow.DataType, json string) arrow.Array { arr, _, err := array.FromJSON(mem, ty, strings.NewReader(json)) require.NoError(t, err) return arr } basicSchema := arrow.NewSchema([]arrow.Field{ {Name: "a", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, {Name: "b", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, }, nil) nestedSchema := arrow.NewSchema([]arrow.Field{ {Name: "a", Type: arrow.StructOf(basicSchema.Fields()...), Nullable: false}, }, nil) bldr := exprs.NewExprBuilder(extSet) tests := []struct { name string ex exprs.Builder sc *arrow.Schema input compute.Datum expected compute.Datum }{ {"add", bldr.MustCallScalar("add", nil, bldr.FieldRef("a"), bldr.Literal(expr.NewPrimitiveLiteral(float64(3.5), false))), basicSchema, compute.NewDatumWithoutOwning(fromJSON(arrow.StructOf(basicSchema.Fields()...), `[ {"a": 6.125, "b": 3.375}, {"a": 0.0, "b": 1}, {"a": -1, "b": 4.75} ]`)), compute.NewDatumWithoutOwning(fromJSON(arrow.PrimitiveTypes.Float64, `[9.625, 3.5, 2.5]`))}, {"add sub", bldr.MustCallScalar("add", nil, bldr.FieldRef("a"), bldr.MustCallScalar("subtract", nil, bldr.WrapLiteral(expr.NewLiteral(float64(3.5), false)), bldr.FieldRef("b"))), basicSchema, compute.NewDatumWithoutOwning(fromJSON(arrow.StructOf(basicSchema.Fields()...), `[ {"a": 6.125, "b": 3.375}, {"a": 0.0, "b": 1}, {"a": -1, "b": 4.75} ]`)), compute.NewDatumWithoutOwning(fromJSON(arrow.PrimitiveTypes.Float64, `[6.25, 2.5, -2.25]`))}, {"add nested", bldr.MustCallScalar("add", nil, bldr.FieldRefList("a", "a"), bldr.FieldRefList("a", "b")), nestedSchema, compute.NewDatumWithoutOwning(fromJSON(arrow.StructOf(nestedSchema.Fields()...), `[ {"a": {"a": 6.125, "b": 3.375}}, {"a": {"a": 0.0, "b": 1}}, {"a": {"a": -1, "b": 4.75}} ]`)), compute.NewDatumWithoutOwning(fromJSON(arrow.PrimitiveTypes.Float64, `[9.5, 1, 3.75]`))}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { scoped := memory.NewCheckedAllocatorScope(mem) defer scoped.CheckSize(t) bldr.SetInputSchema(tt.sc) ex, err := tt.ex.BuildExpr() require.NoError(t, err) ctx := exprs.WithExtensionIDSet(compute.WithAllocator(context.Background(), mem), extSet) dt := tt.input.(compute.ArrayLikeDatum).Type().(arrow.NestedType) schema := arrow.NewSchema(dt.Fields(), nil) actual, err := exprs.ExecuteScalarExpression(ctx, schema, ex, tt.input) require.NoError(t, err) defer actual.Release() assert.Truef(t, tt.expected.Equals(actual), "expected: %s\ngot: %s", tt.expected, actual) }) } } func TestGenerateMask(t *testing.T) { sc, err := boringArrowSchema.AddField(0, arrow.Field{ Name: "in", Type: arrow.FixedWidthTypes.Boolean, Nullable: true}) require.NoError(t, err) bldr := exprs.NewExprBuilder(extSet) require.NoError(t, bldr.SetInputSchema(sc)) tests := []struct { name string json string filter exprs.Builder }{ {"simple", `[ {"i32": 0, "f32": -0.1, "in": true}, {"i32": 0, "f32": 0.3, "in": true}, {"i32": 1, "f32": 0.2, "in": false}, {"i32": 2, "f32": -0.1, "in": false}, {"i32": 0, "f32": 0.1, "in": true}, {"i32": 0, "f32": null, "in": true}, {"i32": 0, "f32": 1.0, "in": true} ]`, bldr.MustCallScalar("equal", nil, bldr.FieldRef("i32"), bldr.Literal(expr.NewPrimitiveLiteral(int32(0), false)))}, {"complex", `[ {"f64": 0.3, "f32": 0.1, "in": true}, {"f64": -0.1, "f32": 0.3, "in": false}, {"f64": 0.1, "f32": 0.2, "in": true}, {"f64": 0.0, "f32": -0.1, "in": false}, {"f64": 1.0, "f32": 0.1, "in": true}, {"f64": -2.0, "f32": null, "in": null}, {"f64": 3.0, "f32": 1.0, "in": true} ]`, bldr.MustCallScalar("greater", nil, bldr.MustCallScalar("multiply", nil, bldr.Must(bldr.Cast(bldr.FieldRef("f32"), arrow.PrimitiveTypes.Float64)), bldr.FieldRef("f64")), bldr.Literal(expr.NewPrimitiveLiteral(float64(0), false)))}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) ctx := exprs.WithExtensionIDSet(compute.WithAllocator(context.Background(), mem), extSet) rec, _, err := array.RecordFromJSON(mem, sc, strings.NewReader(tt.json)) require.NoError(t, err) defer rec.Release() input := compute.NewDatumWithoutOwning(rec) expectedMask := rec.Column(0) mask, err := exprs.ExecuteScalarExpression(ctx, sc, expr.MustExpr(tt.filter.BuildExpr()), input) require.NoError(t, err) defer mask.Release() assertEqual(t, expectedMask, mask) }) } } arrow-go-18.2.0/arrow/compute/exprs/extension_types.go000066400000000000000000000114301476434502500231060ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.18 package exprs import ( "encoding/json" "fmt" "reflect" "strings" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" ) type simpleExtensionTypeFactory[P comparable] struct { arrow.ExtensionBase params P name string getStorage func(P) arrow.DataType } func (ef *simpleExtensionTypeFactory[P]) String() string { return "extension<" + ef.Serialize() + ">" } func (ef *simpleExtensionTypeFactory[P]) ExtensionName() string { return ef.name } func (ef *simpleExtensionTypeFactory[P]) Serialize() string { s, _ := json.Marshal(ef.params) return ef.name + string(s) } func (ef *simpleExtensionTypeFactory[P]) Deserialize(storage arrow.DataType, data string) (arrow.ExtensionType, error) { if !strings.HasPrefix(data, ef.name) { return nil, fmt.Errorf("%w: invalid deserialization of extension type %s", arrow.ErrInvalid, ef.name) } data = strings.TrimPrefix(data, ef.name) if err := json.Unmarshal([]byte(data), &ef.params); err != nil { return nil, fmt.Errorf("%w: failed parsing parameters for extension type", err) } if !arrow.TypeEqual(storage, ef.getStorage(ef.params)) { return nil, fmt.Errorf("%w: invalid storage type for %s: %s (expected: %s)", arrow.ErrInvalid, ef.name, storage, ef.getStorage(ef.params)) } return &simpleExtensionTypeFactory[P]{ name: ef.name, params: ef.params, getStorage: ef.getStorage, ExtensionBase: arrow.ExtensionBase{ Storage: storage, }, }, nil } func (ef *simpleExtensionTypeFactory[P]) ExtensionEquals(other arrow.ExtensionType) bool { if ef.name != other.ExtensionName() { return false } rhs := other.(*simpleExtensionTypeFactory[P]) return ef.params == rhs.params } func (ef *simpleExtensionTypeFactory[P]) ArrayType() reflect.Type { return reflect.TypeOf(simpleExtensionArrayFactory[P]{}) } func (ef *simpleExtensionTypeFactory[P]) CreateType(params P) arrow.DataType { storage := ef.getStorage(params) return &simpleExtensionTypeFactory[P]{ name: ef.name, params: params, getStorage: ef.getStorage, ExtensionBase: arrow.ExtensionBase{ Storage: storage, }, } } type simpleExtensionArrayFactory[P comparable] struct { array.ExtensionArrayBase } type uuidExtParams struct{} var uuidType = simpleExtensionTypeFactory[uuidExtParams]{ name: "arrow.uuid", getStorage: func(uuidExtParams) arrow.DataType { return &arrow.FixedSizeBinaryType{ByteWidth: 16} }} type fixedCharExtensionParams struct { Length int32 `json:"length"` } var fixedCharType = simpleExtensionTypeFactory[fixedCharExtensionParams]{ name: "fixed_char", getStorage: func(p fixedCharExtensionParams) arrow.DataType { return &arrow.FixedSizeBinaryType{ByteWidth: int(p.Length)} }, } type varCharExtensionParams struct { Length int32 `json:"length"` } var varCharType = simpleExtensionTypeFactory[varCharExtensionParams]{ name: "varchar", getStorage: func(varCharExtensionParams) arrow.DataType { return arrow.BinaryTypes.String }, } type intervalYearExtensionParams struct{} var intervalYearType = simpleExtensionTypeFactory[intervalYearExtensionParams]{ name: "interval_year", getStorage: func(intervalYearExtensionParams) arrow.DataType { return arrow.FixedSizeListOf(2, arrow.PrimitiveTypes.Int32) }, } type intervalDayExtensionParams struct{} var intervalDayType = simpleExtensionTypeFactory[intervalDayExtensionParams]{ name: "interval_day", getStorage: func(intervalDayExtensionParams) arrow.DataType { return arrow.FixedSizeListOf(2, arrow.PrimitiveTypes.Int32) }, } func uuid() arrow.DataType { return uuidType.CreateType(uuidExtParams{}) } func fixedChar(length int32) arrow.DataType { return fixedCharType.CreateType(fixedCharExtensionParams{Length: length}) } func varChar(length int32) arrow.DataType { return varCharType.CreateType(varCharExtensionParams{Length: length}) } func intervalYear() arrow.DataType { return intervalYearType.CreateType(intervalYearExtensionParams{}) } func intervalDay() arrow.DataType { return intervalDayType.CreateType(intervalDayExtensionParams{}) } arrow-go-18.2.0/arrow/compute/exprs/field_refs.go000066400000000000000000000157331476434502500217620ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.18 package exprs import ( "fmt" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/compute" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/arrow/scalar" "github.com/substrait-io/substrait-go/v3/expr" ) func getFields(typ arrow.DataType) []arrow.Field { if nested, ok := typ.(arrow.NestedType); ok { return nested.Fields() } return nil } // GetRefField evaluates the substrait field reference to retrieve the // referenced field or return an error. func GetRefField(ref expr.ReferenceSegment, fields []arrow.Field) (*arrow.Field, error) { if ref == nil { return nil, compute.ErrEmpty } var ( out *arrow.Field ) for ref != nil { if len(fields) == 0 { return nil, fmt.Errorf("%w: %s", compute.ErrNoChildren, out.Type) } switch f := ref.(type) { case *expr.StructFieldRef: if f.Field < 0 || f.Field >= int32(len(fields)) { return nil, fmt.Errorf("%w: indices=%s", compute.ErrIndexRange, f) } out = &fields[f.Field] fields = getFields(out.Type) default: return nil, arrow.ErrNotImplemented } ref = ref.GetChild() } return out, nil } // GetRefSchema evaluates the provided substrait field reference against // the schema to retrieve the referenced (potentially nested) field. func GetRefSchema(ref expr.ReferenceSegment, schema *arrow.Schema) (*arrow.Field, error) { return GetRefField(ref, schema.Fields()) } // GetScalar returns the evaluated referenced scalar value from the provided // scalar which must be appropriate to the type of reference. // // A StructFieldRef can only reference against a Struct-type scalar, a // ListElementRef can only reference against a List or LargeList scalar, // and a MapKeyRef will only reference against a Map scalar. An error is // returned if following the reference children ends up with an invalid // nested reference object. func GetScalar(ref expr.ReferenceSegment, s scalar.Scalar, mem memory.Allocator, ext ExtensionIDSet) (scalar.Scalar, error) { if ref == nil { return nil, compute.ErrEmpty } var out scalar.Scalar for ref != nil { switch f := ref.(type) { case *expr.StructFieldRef: if s.DataType().ID() != arrow.STRUCT { return nil, fmt.Errorf("%w: attempting to reference field from non-struct scalar %s", arrow.ErrInvalid, s) } st := s.(*scalar.Struct) if f.Field < 0 || f.Field >= int32(len(st.Value)) { return nil, fmt.Errorf("%w: indices=%s", compute.ErrIndexRange, ref) } out = st.Value[f.Field] case *expr.ListElementRef: switch v := s.(type) { case *scalar.List: sc, err := scalar.GetScalar(v.Value, int(f.Offset)) if err != nil { return nil, err } out = sc case *scalar.LargeList: sc, err := scalar.GetScalar(v.Value, int(f.Offset)) if err != nil { return nil, err } out = sc default: return nil, fmt.Errorf("%w: cannot get ListElementRef from non-list scalar %s", arrow.ErrInvalid, v) } case *expr.MapKeyRef: v, ok := s.(*scalar.Map) if !ok { return nil, arrow.ErrInvalid } dt, _, err := FromSubstraitType(f.MapKey.GetType(), ext) if err != nil { return nil, err } if !arrow.TypeEqual(dt, v.Type.(*arrow.MapType).KeyType()) { return nil, arrow.ErrInvalid } keyvalDatum, err := literalToDatum(mem, f.MapKey, ext) if err != nil { return nil, err } var ( keyval = keyvalDatum.(*compute.ScalarDatum) m = v.Value.(*array.Struct) keys = m.Field(0) valueScalar scalar.Scalar ) for i := 0; i < v.Value.Len(); i++ { kv, err := scalar.GetScalar(keys, i) if err != nil { return nil, err } if scalar.Equals(kv, keyval.Value) { valueScalar, err = scalar.GetScalar(m.Field(1), i) if err != nil { return nil, err } break } } if valueScalar == nil { return nil, arrow.ErrNotFound } out = valueScalar } s = out ref = ref.GetChild() } return out, nil } // GetReferencedValue retrieves the referenced (potentially nested) value from // the provided datum which may be a scalar, array, or record batch. func GetReferencedValue(mem memory.Allocator, ref expr.ReferenceSegment, value compute.Datum, ext ExtensionIDSet) (compute.Datum, error) { if ref == nil { return nil, compute.ErrEmpty } for ref != nil { // process the rest of the refs for the scalars // since arrays can go down to a scalar, but you // won't get an array from a scalar via ref if v, ok := value.(*compute.ScalarDatum); ok { out, err := GetScalar(ref, v.Value, mem, ext) if err != nil { return nil, err } return &compute.ScalarDatum{Value: out}, nil } switch r := ref.(type) { case *expr.MapKeyRef: return nil, arrow.ErrNotImplemented case *expr.StructFieldRef: switch v := value.(type) { case *compute.ArrayDatum: if v.Type().ID() != arrow.STRUCT { return nil, fmt.Errorf("%w: struct field ref for non struct type %s", arrow.ErrInvalid, v.Type()) } if r.Field < 0 || r.Field >= int32(len(v.Value.Children())) { return nil, fmt.Errorf("%w: indices=%s", compute.ErrIndexRange, ref) } value = &compute.ArrayDatum{Value: v.Value.Children()[r.Field]} case *compute.RecordDatum: if r.Field < 0 || r.Field >= int32(v.Value.NumCols()) { return nil, fmt.Errorf("%w: indices=%s", compute.ErrIndexRange, ref) } value = &compute.ArrayDatum{Value: v.Value.Column(int(r.Field)).Data()} default: return nil, arrow.ErrNotImplemented } case *expr.ListElementRef: switch v := value.(type) { case *compute.ArrayDatum: switch v.Type().ID() { case arrow.LIST, arrow.LARGE_LIST, arrow.FIXED_SIZE_LIST: arr := v.MakeArray() defer arr.Release() sc, err := scalar.GetScalar(arr, int(r.Offset)) if err != nil { return nil, err } if s, ok := sc.(scalar.Releasable); ok { defer s.Release() } value = &compute.ScalarDatum{Value: sc} default: return nil, fmt.Errorf("%w: cannot reference list element in non-list array type %s", arrow.ErrInvalid, v.Type()) } default: return nil, arrow.ErrNotImplemented } } ref = ref.GetChild() } return value, nil } arrow-go-18.2.0/arrow/compute/exprs/types.go000066400000000000000000000565361476434502500210320ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.18 package exprs import ( "fmt" "hash/maphash" "strconv" "strings" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/compute" "github.com/substrait-io/substrait-go/v3/expr" "github.com/substrait-io/substrait-go/v3/extensions" "github.com/substrait-io/substrait-go/v3/types" ) const ( // URI for official Arrow Substrait Extension Types ArrowExtTypesUri = "https://github.com/apache/arrow/blob/main/format/substrait/extension_types.yaml" SubstraitDefaultURIPrefix = extensions.SubstraitDefaultURIPrefix // URI for official Substrait Arithmetic funcs extensions SubstraitArithmeticFuncsURI = SubstraitDefaultURIPrefix + "functions_arithmetic.yaml" // URI for official Substrait Comparison funcs extensions SubstraitComparisonFuncsURI = SubstraitDefaultURIPrefix + "functions_comparison.yaml" SubstraitBooleanFuncsURI = SubstraitDefaultURIPrefix + "functions_boolean.yaml" TimestampTzTimezone = "UTC" ) var hashSeed maphash.Seed // the default extension registry that will contain the Arrow extension // type variations and types. var DefaultExtensionIDRegistry = NewExtensionIDRegistry() func init() { hashSeed = maphash.MakeSeed() types := []struct { dt arrow.DataType name string }{ {arrow.PrimitiveTypes.Uint8, "u8"}, {arrow.PrimitiveTypes.Uint16, "u16"}, {arrow.PrimitiveTypes.Uint32, "u32"}, {arrow.PrimitiveTypes.Uint64, "u64"}, {arrow.FixedWidthTypes.Float16, "fp16"}, {arrow.Null, "null"}, {arrow.FixedWidthTypes.MonthInterval, "interval_month"}, {arrow.FixedWidthTypes.DayTimeInterval, "interval_day_milli"}, {arrow.FixedWidthTypes.MonthDayNanoInterval, "interval_month_day_nano"}, } for _, t := range types { err := DefaultExtensionIDRegistry.RegisterType(extensions.ID{ URI: ArrowExtTypesUri, Name: t.name}, t.dt) if err != nil { panic(err) } } for _, fn := range []string{"add", "subtract", "multiply", "divide", "power", "sqrt", "abs"} { err := DefaultExtensionIDRegistry.AddSubstraitScalarToArrow( extensions.ID{URI: SubstraitArithmeticFuncsURI, Name: fn}, decodeOptionlessOverflowableArithmetic(fn)) if err != nil { panic(err) } } for _, fn := range []string{"add", "subtract", "multiply", "divide"} { err := DefaultExtensionIDRegistry.AddArrowToSubstrait(fn, encodeOptionlessOverflowableArithmetic(extensions.ID{ URI: SubstraitArithmeticFuncsURI, Name: fn})) if err != nil { panic(err) } } for _, fn := range []string{"equal", "not_equal", "lt", "lte", "gt", "gte", "is_null", "is_not_null", "is_nan"} { err := DefaultExtensionIDRegistry.AddSubstraitScalarToArrow( extensions.ID{URI: SubstraitComparisonFuncsURI, Name: fn}, simpleMapSubstraitToArrowFunc) if err != nil { panic(err) } } for _, fn := range []string{"equal", "not_equal", "less", "less_equal", "greater", "greater_equal", "is_null", "is_not_null", "is_nan"} { err := DefaultExtensionIDRegistry.AddArrowToSubstrait(fn, simpleMapArrowToSubstraitFunc(SubstraitComparisonFuncsURI)) if err != nil { panic(err) } } for _, fn := range []string{"and", "or", "not"} { err := DefaultExtensionIDRegistry.AddSubstraitScalarToArrow( extensions.ID{URI: SubstraitBooleanFuncsURI, Name: fn}, simpleMapSubstraitToArrowFunc) if err != nil { panic(err) } } for _, fn := range []string{"and_kleene", "or_kleene", "not"} { err := DefaultExtensionIDRegistry.AddArrowToSubstrait(fn, simpleMapArrowToSubstraitFunc(SubstraitBooleanFuncsURI)) if err != nil { panic(err) } } } type overflowBehavior string const ( overflowSILENT = "SILENT" overflowSATURATE = "SATURATE" overflowERROR = "ERROR" ) type enumParser[typ ~string] struct { values map[typ]struct{} } func (e *enumParser[typ]) parse(v string) (typ, error) { out := typ(v) if _, ok := e.values[out]; ok { return out, nil } return "", arrow.ErrNotFound } var overflowParser = enumParser[overflowBehavior]{ values: map[overflowBehavior]struct{}{ overflowSILENT: {}, overflowSATURATE: {}, overflowERROR: {}, }, } func parseOption[typ ~string](sf *expr.ScalarFunction, optionName string, parser *enumParser[typ], implemented []typ, def typ) (typ, error) { opts := sf.GetOption(optionName) if len(opts) == 0 { return def, nil } for _, o := range opts { p, err := parser.parse(o) if err != nil { return def, arrow.ErrInvalid } for _, i := range implemented { if i == p { return p, nil } } } return def, arrow.ErrNotImplemented } type substraitToArrow = func(*expr.ScalarFunction) (fname string, opts compute.FunctionOptions, err error) type arrowToSubstrait = func(fname string) (extensions.ID, []*types.FunctionOption, error) var substraitToArrowFuncMap = map[string]string{ "lt": "less", "gt": "greater", "lte": "less_equal", "gte": "greater_equal", "or": "or_kleene", "and": "and_kleene", } var arrowToSubstraitFuncMap = map[string]string{ "less": "lt", "greater": "gt", "less_equal": "lte", "greater_equal": "gte", "and_kleene": "and", "or_kleene": "or", } func simpleMapSubstraitToArrowFunc(sf *expr.ScalarFunction) (fname string, opts compute.FunctionOptions, err error) { fname, _, _ = strings.Cut(sf.Name(), ":") f, ok := substraitToArrowFuncMap[fname] if ok { fname = f } return } func simpleMapArrowToSubstraitFunc(uri string) arrowToSubstrait { return func(fname string) (extensions.ID, []*types.FunctionOption, error) { f, ok := arrowToSubstraitFuncMap[fname] if ok { fname = f } return extensions.ID{URI: uri, Name: fname}, nil, nil } } func decodeOptionlessOverflowableArithmetic(n string) substraitToArrow { return func(sf *expr.ScalarFunction) (fname string, opts compute.FunctionOptions, err error) { overflow, err := parseOption(sf, "overflow", &overflowParser, []overflowBehavior{overflowSILENT, overflowERROR}, overflowSILENT) if err != nil { return n, nil, err } switch overflow { case overflowSILENT: return n + "_unchecked", nil, nil case overflowERROR: return n, nil, nil default: return n, nil, arrow.ErrNotImplemented } } } func encodeOptionlessOverflowableArithmetic(id extensions.ID) arrowToSubstrait { return func(fname string) (extensions.ID, []*types.FunctionOption, error) { fn, _, ok := strings.Cut(fname, ":") if ok { id.Name = fname fname = fn } opts := make([]*types.FunctionOption, 0, 1) if strings.HasSuffix(fname, "_unchecked") { opts = append(opts, &types.FunctionOption{ Name: "overflow", Preference: []string{"SILENT"}}) } else { opts = append(opts, &types.FunctionOption{ Name: "overflow", Preference: []string{"ERROR"}}) } return id, opts, nil } } // NewExtensionSetDefault is a convenience function to create a new extension // set using the Default arrow extension ID registry. // // See NewExtensionSet for more info. func NewExtensionSetDefault(set expr.ExtensionRegistry) ExtensionIDSet { return &extensionSet{ExtensionRegistry: set, reg: DefaultExtensionIDRegistry} } // NewExtensionSet creates a new extension set given a substrait extension registry, // and an Arrow <--> Substrait registry for mapping substrait extensions to // their Arrow equivalents. This extension set can then be used to manage a // particular set of extensions in use by an expression or plan, so when // serializing you only need to serialize the extensions that have been // inserted into the extension set. func NewExtensionSet(set expr.ExtensionRegistry, reg *ExtensionIDRegistry) ExtensionIDSet { return &extensionSet{ExtensionRegistry: set, reg: reg} } type extensionSet struct { expr.ExtensionRegistry reg *ExtensionIDRegistry } func (e *extensionSet) GetArrowRegistry() *ExtensionIDRegistry { return e.reg } func (e *extensionSet) GetSubstraitRegistry() expr.ExtensionRegistry { return e.ExtensionRegistry } func (e *extensionSet) DecodeTypeArrow(anchor uint32) (extensions.ID, arrow.DataType, bool) { id, ok := e.Set.DecodeType(anchor) if !ok { if id, ok = e.Set.DecodeTypeVariation(anchor); !ok { return id, nil, false } } dt, ok := e.reg.GetTypeByID(id) return id, dt, ok } func (e *extensionSet) DecodeFunction(ref uint32) (extensions.ID, substraitToArrow, bool) { id, ok := e.Set.DecodeFunc(ref) if !ok { return id, nil, false } conv, ok := e.reg.GetSubstraitScalarToArrow(id) if !ok { id.Name, _, ok = strings.Cut(id.Name, ":") if ok { conv, ok = e.reg.GetSubstraitScalarToArrow(id) } } return id, conv, ok } func (e *extensionSet) EncodeTypeVariation(dt arrow.DataType) (extensions.ID, uint32, bool) { id, ok := e.reg.GetIDByType(dt) if !ok { return extensions.ID{}, 0, false } return id, e.Set.GetTypeVariationAnchor(id), true } func (e *extensionSet) EncodeType(dt arrow.DataType) (extensions.ID, uint32, bool) { id, ok := e.reg.GetIDByType(dt) if !ok { return extensions.ID{}, 0, false } return id, e.Set.GetTypeAnchor(id), true } func (e *extensionSet) EncodeFunction(id extensions.ID) uint32 { return e.Set.GetFuncAnchor(id) } // ExtensionIDRegistry manages a set of mappings between Arrow types // and functions and their substrait equivalents. type ExtensionIDRegistry struct { typeList []arrow.DataType ids []extensions.ID substraitToIdx map[extensions.ID]int arrowToIdx map[uint64]int substraitToArrowFn map[extensions.ID]substraitToArrow arrowToSubstrait map[string]arrowToSubstrait } // NewExtensionIDRegistry initializes a new registry for use. func NewExtensionIDRegistry() *ExtensionIDRegistry { return &ExtensionIDRegistry{ typeList: make([]arrow.DataType, 0), ids: make([]extensions.ID, 0), substraitToIdx: make(map[extensions.ID]int), arrowToIdx: make(map[uint64]int), substraitToArrowFn: make(map[extensions.ID]substraitToArrow), arrowToSubstrait: make(map[string]arrowToSubstrait), } } // RegisterType creates a mapping between the given extension ID and the // provided Arrow data type. If this extension ID or arrow type are already // registered, an arrow.ErrInvalid error will be returned. func (e *ExtensionIDRegistry) RegisterType(id extensions.ID, dt arrow.DataType) error { if _, ok := e.substraitToIdx[id]; ok { return fmt.Errorf("%w: type id already registered", arrow.ErrInvalid) } dthash := arrow.HashType(hashSeed, dt) if _, ok := e.arrowToIdx[dthash]; ok { return fmt.Errorf("%w: type already registered", arrow.ErrInvalid) } idx := len(e.ids) e.typeList = append(e.typeList, dt) e.ids = append(e.ids, id) e.substraitToIdx[id] = idx e.arrowToIdx[dthash] = idx return nil } // AddSubstraitScalarToArrow creates a mapping between a given extension ID // and a function which should return the corresponding Arrow compute function // name along with any relevant FunctionOptions based on the ScalarFunction // instance passed to it. // // Any relevant options should be parsed from the ScalarFunction's options // and used to ensure the correct arrow compute function is used and necessary // options are passed. func (e *ExtensionIDRegistry) AddSubstraitScalarToArrow(id extensions.ID, toArrow substraitToArrow) error { if _, ok := e.substraitToArrowFn[id]; ok { return fmt.Errorf("%w: extension id already registered as function", arrow.ErrInvalid) } e.substraitToArrowFn[id] = toArrow return nil } // AddArrowToSubstrait creates a mapping between the provided arrow compute function // and a function which should provide the correct substrait ExtensionID and function // options from that name. func (e *ExtensionIDRegistry) AddArrowToSubstrait(name string, fn arrowToSubstrait) error { if _, ok := e.arrowToSubstrait[name]; ok { return fmt.Errorf("%w: function name '%s' already registered for conversion to substrait", arrow.ErrInvalid, name) } e.arrowToSubstrait[name] = fn return nil } // GetTypeByID returns the mapped arrow data type from the provided substrait // extension id. If no mapping exists for this substrait extension id, // the second return value will be false. func (e *ExtensionIDRegistry) GetTypeByID(id extensions.ID) (arrow.DataType, bool) { idx, ok := e.substraitToIdx[id] if !ok { return nil, false } return e.typeList[idx], true } // GetIDByType is the inverse of GetTypeByID, returning the mapped substrait // extension ID corresponding to the provided arrow data type. The second // return is false if there is no mapping found. func (e *ExtensionIDRegistry) GetIDByType(typ arrow.DataType) (extensions.ID, bool) { dthash := arrow.HashType(hashSeed, typ) idx, ok := e.arrowToIdx[dthash] if !ok { return extensions.ID{}, false } return e.ids[idx], true } // GetSubstraitScalarToArrow returns the mapped conversion function for a // given substrait extension ID to convert a substrait ScalarFunction to // the corresponding Arrow compute function call. False is returned as // the second value if there is no mapping available. func (e *ExtensionIDRegistry) GetSubstraitScalarToArrow(id extensions.ID) (substraitToArrow, bool) { conv, ok := e.substraitToArrowFn[id] if !ok { return nil, ok } return conv, true } // GetArrowToSubstrait returns the mapped function to convert an arrow compute // function to the corresponding Substrait ScalarFunction extension ID and options. // False is returned as the second value if there is no mapping found. func (e *ExtensionIDRegistry) GetArrowToSubstrait(name string) (conv arrowToSubstrait, ok bool) { conv, ok = e.arrowToSubstrait[name] if !ok { fn, _, found := strings.Cut(name, ":") if found { conv, ok = e.arrowToSubstrait[fn] } } return } // ExtensionIDSet is an interface for managing the mapping between arrow // and substrait types and function extensions. type ExtensionIDSet interface { GetArrowRegistry() *ExtensionIDRegistry GetSubstraitRegistry() expr.ExtensionRegistry DecodeTypeArrow(anchor uint32) (extensions.ID, arrow.DataType, bool) DecodeFunction(ref uint32) (extensions.ID, substraitToArrow, bool) EncodeType(dt arrow.DataType) (extensions.ID, uint32, bool) EncodeTypeVariation(dt arrow.DataType) (extensions.ID, uint32, bool) } // IsNullable is a convenience method to return whether or not // a substrait type has Nullability set to NullabilityRequired or not. func IsNullable(t types.Type) bool { return t.GetNullability() != types.NullabilityRequired } // FieldsFromSubstrait produces a list of arrow fields from a list of // substrait types (such as the fields of a StructType) using nextName // to determine the names for the fields. func FieldsFromSubstrait(typeList []types.Type, nextName func() string, ext ExtensionIDSet) (out []arrow.Field, err error) { out = make([]arrow.Field, len(typeList)) for i, t := range typeList { out[i].Name = nextName() out[i].Nullable = IsNullable(t) if st, ok := t.(*types.StructType); ok { fields, err := FieldsFromSubstrait(st.Types, nextName, ext) if err != nil { return nil, err } out[i].Type = arrow.StructOf(fields...) } else { out[i].Type, _, err = FromSubstraitType(t, ext) if err != nil { return nil, err } } } return } // ToSubstraitType converts an arrow data type to a Substrait Type. Since // arrow types don't have a nullable flag (it is in the arrow.Field) but // Substrait types do, the nullability must be passed in here. func ToSubstraitType(dt arrow.DataType, nullable bool, ext ExtensionIDSet) (types.Type, error) { var nullability types.Nullability if nullable { nullability = types.NullabilityNullable } else { nullability = types.NullabilityRequired } switch dt.ID() { case arrow.BOOL: return &types.BooleanType{Nullability: nullability}, nil case arrow.INT8: return &types.Int8Type{Nullability: nullability}, nil case arrow.INT16: return &types.Int16Type{Nullability: nullability}, nil case arrow.INT32: return &types.Int32Type{Nullability: nullability}, nil case arrow.INT64: return &types.Int64Type{Nullability: nullability}, nil case arrow.UINT8: _, anchor, ok := ext.EncodeTypeVariation(dt) if !ok { return nil, arrow.ErrNotFound } return &types.Int8Type{ Nullability: nullability, TypeVariationRef: anchor, }, nil case arrow.UINT16: _, anchor, ok := ext.EncodeTypeVariation(dt) if !ok { return nil, arrow.ErrNotFound } return &types.Int16Type{ Nullability: nullability, TypeVariationRef: anchor, }, nil case arrow.UINT32: _, anchor, ok := ext.EncodeTypeVariation(dt) if !ok { return nil, arrow.ErrNotFound } return &types.Int32Type{ Nullability: nullability, TypeVariationRef: anchor, }, nil case arrow.UINT64: _, anchor, ok := ext.EncodeTypeVariation(dt) if !ok { return nil, arrow.ErrNotFound } return &types.Int64Type{ Nullability: nullability, TypeVariationRef: anchor, }, nil case arrow.FLOAT16: _, anchor, ok := ext.EncodeTypeVariation(dt) if !ok { return nil, arrow.ErrNotFound } return &types.Int16Type{ Nullability: nullability, TypeVariationRef: anchor, }, nil case arrow.FLOAT32: return &types.Float32Type{Nullability: nullability}, nil case arrow.FLOAT64: return &types.Float64Type{Nullability: nullability}, nil case arrow.STRING, arrow.LARGE_STRING: return &types.StringType{Nullability: nullability}, nil case arrow.BINARY, arrow.LARGE_BINARY: return &types.BinaryType{Nullability: nullability}, nil case arrow.DATE32: return &types.DateType{Nullability: nullability}, nil case arrow.EXTENSION: dt := dt.(arrow.ExtensionType) switch dt.ExtensionName() { case "uuid": return &types.UUIDType{Nullability: nullability}, nil case "fixed_char": return &types.FixedCharType{ Nullability: nullability, Length: int32(dt.StorageType().(*arrow.FixedSizeBinaryType).ByteWidth), }, nil case "varchar": return &types.VarCharType{Nullability: nullability, Length: -1}, nil case "interval_year": return &types.IntervalYearType{Nullability: nullability}, nil case "interval_day": return &types.IntervalDayType{Nullability: nullability}, nil default: _, anchor, ok := ext.EncodeType(dt) if !ok { return nil, arrow.ErrNotFound } return &types.UserDefinedType{ Nullability: nullability, TypeReference: anchor, }, nil } case arrow.FIXED_SIZE_BINARY: return &types.FixedBinaryType{Nullability: nullability, Length: int32(dt.(*arrow.FixedSizeBinaryType).ByteWidth)}, nil case arrow.DECIMAL128, arrow.DECIMAL256: dt := dt.(arrow.DecimalType) return &types.DecimalType{Nullability: nullability, Precision: dt.GetPrecision(), Scale: dt.GetScale()}, nil case arrow.STRUCT: dt := dt.(*arrow.StructType) fields := make([]types.Type, dt.NumFields()) var err error for i, f := range dt.Fields() { fields[i], err = ToSubstraitType(f.Type, f.Nullable, ext) if err != nil { return nil, err } } return &types.StructType{ Nullability: nullability, Types: fields, }, nil case arrow.LIST, arrow.FIXED_SIZE_LIST, arrow.LARGE_LIST: dt := dt.(arrow.NestedType) elemType, err := ToSubstraitType(dt.Fields()[0].Type, dt.Fields()[0].Nullable, ext) if err != nil { return nil, err } return &types.ListType{ Nullability: nullability, Type: elemType, }, nil case arrow.MAP: dt := dt.(*arrow.MapType) keyType, err := ToSubstraitType(dt.KeyType(), false, ext) if err != nil { return nil, err } valueType, err := ToSubstraitType(dt.ItemType(), dt.ItemField().Nullable, ext) if err != nil { return nil, err } return &types.MapType{ Nullability: nullability, Key: keyType, Value: valueType, }, nil } return nil, arrow.ErrNotImplemented } // FromSubstraitType returns the appropriate Arrow data type for the given // substrait type, using the extension set if necessary. // Since Substrait types contain their nullability also, the nullability // returned along with the data type. func FromSubstraitType(t types.Type, ext ExtensionIDSet) (arrow.DataType, bool, error) { nullable := IsNullable(t) if t.GetTypeVariationReference() > 0 { _, dt, ok := ext.DecodeTypeArrow(t.GetTypeVariationReference()) if ok { return dt, nullable, nil } } switch t := t.(type) { case *types.BooleanType: return arrow.FixedWidthTypes.Boolean, nullable, nil case *types.Int8Type: return arrow.PrimitiveTypes.Int8, nullable, nil case *types.Int16Type: return arrow.PrimitiveTypes.Int16, nullable, nil case *types.Int32Type: return arrow.PrimitiveTypes.Int32, nullable, nil case *types.Int64Type: return arrow.PrimitiveTypes.Int64, nullable, nil case *types.Float32Type: return arrow.PrimitiveTypes.Float32, nullable, nil case *types.Float64Type: return arrow.PrimitiveTypes.Float64, nullable, nil case *types.StringType: return arrow.BinaryTypes.String, nullable, nil case *types.BinaryType: return arrow.BinaryTypes.Binary, nullable, nil case *types.TimestampType: return &arrow.TimestampType{Unit: arrow.Microsecond}, nullable, nil case *types.TimestampTzType: return &arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: TimestampTzTimezone}, nullable, nil case *types.DateType: return arrow.FixedWidthTypes.Date32, nullable, nil case *types.TimeType: return &arrow.Time64Type{Unit: arrow.Microsecond}, nullable, nil case *types.IntervalYearType: return intervalYear(), nullable, nil case *types.IntervalDayType: return intervalDay(), nullable, nil case *types.UUIDType: return uuid(), nullable, nil case *types.FixedCharType: return fixedChar(t.Length), nullable, nil case *types.VarCharType: return varChar(t.Length), nullable, nil case *types.FixedBinaryType: return &arrow.FixedSizeBinaryType{ByteWidth: int(t.Length)}, nullable, nil case *types.DecimalType: return &arrow.Decimal128Type{ Precision: t.Precision, Scale: t.Scale, }, nullable, nil case *types.StructType: i := 0 fields, err := FieldsFromSubstrait(t.Types, func() string { i++ return strconv.Itoa(i) }, ext) if err != nil { return nil, false, err } return arrow.StructOf(fields...), nullable, nil case *types.ListType: elem, elemNullable, err := FromSubstraitType(t.Type, ext) if err != nil { return nil, false, err } return arrow.ListOfField(arrow.Field{Name: "item", Type: elem, Nullable: elemNullable}), nullable, nil case *types.MapType: key, keyNullable, err := FromSubstraitType(t.Key, ext) if err != nil { return nil, false, err } if keyNullable { return nil, false, fmt.Errorf("%w: encountered nullable key field when converting to arrow.Map", arrow.ErrInvalid) } value, valueNullable, err := FromSubstraitType(t.Value, ext) if err != nil { return nil, false, err } ret := arrow.MapOf(key, value) ret.SetItemNullable(valueNullable) return ret, nullable, nil case *types.UserDefinedType: anchor := t.TypeReference _, dt, ok := ext.DecodeTypeArrow(anchor) if !ok { return nil, false, arrow.ErrNotImplemented } return dt, nullable, nil } return nil, false, arrow.ErrNotImplemented } arrow-go-18.2.0/arrow/compute/fieldref.go000066400000000000000000000373231476434502500202760ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package compute import ( "errors" "fmt" "hash/maphash" "reflect" "strconv" "strings" "unicode" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" ) var ( ErrEmpty = errors.New("cannot traverse empty field path") ErrNoChildren = errors.New("trying to get child of type with no children") ErrIndexRange = errors.New("index out of range") ErrMultipleMatches = errors.New("multiple matches") ErrNoMatch = errors.New("no match") ErrInvalid = errors.New("field ref invalid") ) func getFields(typ arrow.DataType) []arrow.Field { if nested, ok := typ.(arrow.NestedType); ok { return nested.Fields() } return nil } type listvals interface { ListValues() arrow.Array } func getChildren(arr arrow.Array) (ret []arrow.Array) { switch arr := arr.(type) { case *array.Struct: ret = make([]arrow.Array, arr.NumField()) for i := 0; i < arr.NumField(); i++ { ret[i] = arr.Field(i) } case listvals: ret = []arrow.Array{arr.ListValues()} } return } // FieldPath represents a path to a nested field using indices of child fields. // For example, given the indices {5, 9, 3} the field could be retrieved with: // schema.Field(5).Type().(*arrow.StructType).Field(9).Type().(*arrow.StructType).Field(3) // // Attempting to retrieve a child field using a FieldPath which is not valid for a given // schema will get an error such as an out of range index, or an empty path. // // FieldPaths provide for drilling down to potentially nested children for convenience // of accepting a slice of fields, a schema or a datatype (which should contain child fields). // // A fieldpath can also be used to retrieve a child arrow.Array or column from a record batch. type FieldPath []int func (f FieldPath) String() string { if len(f) == 0 { return "FieldPath(empty)" } var b strings.Builder b.WriteString("FieldPath(") for _, i := range f { fmt.Fprint(&b, i) b.WriteByte(' ') } ret := b.String() return ret[:len(ret)-1] + ")" } // Get retrieves the corresponding nested child field by drilling through the schema's // fields as per the field path. func (f FieldPath) Get(s *arrow.Schema) (*arrow.Field, error) { return f.GetFieldFromSlice(s.Fields()) } // GetFieldFromSlice treats the slice as the top layer of fields, so the first value // in the field path will index into the slice, and then drill down from there. func (f FieldPath) GetFieldFromSlice(fields []arrow.Field) (*arrow.Field, error) { if len(f) == 0 { return nil, ErrEmpty } var ( depth = 0 out *arrow.Field ) for _, idx := range f { if len(fields) == 0 { return nil, fmt.Errorf("%w: %s", ErrNoChildren, out.Type) } if idx < 0 || idx >= len(fields) { return nil, fmt.Errorf("%w: indices=%s", ErrIndexRange, f[:depth+1]) } out = &fields[idx] fields = getFields(out.Type) depth++ } return out, nil } func (f FieldPath) getArray(arrs []arrow.Array) (arrow.Array, error) { if len(f) == 0 { return nil, ErrEmpty } var ( depth = 0 out arrow.Array ) for _, idx := range f { if len(arrs) == 0 { return nil, fmt.Errorf("%w: %s", ErrNoChildren, out.DataType()) } if idx < 0 || idx >= len(arrs) { return nil, fmt.Errorf("%w. indices=%s", ErrIndexRange, f[:depth+1]) } out = arrs[idx] arrs = getChildren(out) depth++ } return out, nil } // GetFieldFromType returns the nested field from a datatype by drilling into it's // child fields. func (f FieldPath) GetFieldFromType(typ arrow.DataType) (*arrow.Field, error) { return f.GetFieldFromSlice(getFields(typ)) } // GetField is equivalent to GetFieldFromType(field.Type) func (f FieldPath) GetField(field arrow.Field) (*arrow.Field, error) { return f.GetFieldFromType(field.Type) } // GetColumn will return the correct child array by traversing the fieldpath // going to the nested arrays of the columns in the record batch. func (f FieldPath) GetColumn(batch arrow.Record) (arrow.Array, error) { return f.getArray(batch.Columns()) } func (f FieldPath) findAll(fields []arrow.Field) []FieldPath { _, err := f.GetFieldFromSlice(fields) if err == nil { return []FieldPath{f} } return nil } // a nameref represents a FieldRef by name of the field type nameRef string func (n nameRef) String() string { return "Name(" + string(n) + ")" } func (ref nameRef) findAll(fields []arrow.Field) []FieldPath { out := []FieldPath{} for i, f := range fields { if f.Name == string(ref) { out = append(out, FieldPath{i}) } } return out } func (ref nameRef) hash(h *maphash.Hash) { h.WriteString(string(ref)) } type matches struct { prefixes []FieldPath refs []*arrow.Field } func (m *matches) add(prefix, suffix FieldPath, fields []arrow.Field) { f, err := suffix.GetFieldFromSlice(fields) if err != nil { panic(err) } m.refs = append(m.refs, f) m.prefixes = append(m.prefixes, append(prefix, suffix...)) } // refList represents a list of references to use to determine which nested // field is being referenced. allowing combinations of field indices and names type refList []FieldRef func (r refList) String() string { var b strings.Builder b.WriteString("Nested(") for _, f := range r { fmt.Fprint(&b, f) b.WriteByte(' ') } ret := b.String() return ret[:len(ret)-1] + ")" } func (ref refList) hash(h *maphash.Hash) { for _, r := range ref { r.hash(h) } } func (ref refList) findAll(fields []arrow.Field) []FieldPath { if len(ref) == 0 { return nil } m := matches{} for _, list := range ref[0].FindAll(fields) { m.add(FieldPath{}, list, fields) } for _, r := range ref[1:] { next := matches{} for i, f := range m.refs { for _, match := range r.FindAllField(*f) { next.add(m.prefixes[i], match, getFields(f.Type)) } } m = next } return m.prefixes } type refImpl interface { fmt.Stringer findAll(fields []arrow.Field) []FieldPath hash(h *maphash.Hash) } // FieldRef is a descriptor of a (potentially nested) field within a schema. // // Unlike FieldPath (which is exclusively indices of child fields), FieldRef // may reference a field by name. It can be constructed from either // a field index, field name, or field path. // // Nested fields can be referenced as well, given the schema: // // arrow.NewSchema([]arrow.Field{ // {Name: "a", Type: arrow.StructOf(arrow.Field{Name: "n", Type: arrow.Null})}, // {Name: "b", Type: arrow.PrimitiveTypes.Int32}, // }) // // the following all indicate the nested field named "n": // // FieldRefPath(FieldPath{0, 0}) // FieldRefList("a", 0) // FieldRefList("a", "n") // FieldRefList(0, "n") // NewFieldRefFromDotPath(".a[0]") // // FieldPaths matching a FieldRef are retrieved with the FindAll* functions // Multiple matches are possible because field names may be duplicated within // a schema. For example: // // aIsAmbiguous := arrow.NewSchema([]arrow.Field{ // {Name: "a", Type: arrow.PrimitiveTypes.Int32}, // {Name: "a", Type: arrow.PrimitiveTypes.Float32}, // }) // matches := FieldRefName("a").FindAll(aIsAmbiguous) // assert.Len(matches, 2) // assert.True(matches[0].Get(aIsAmbiguous).Equals(aIsAmbiguous.Field(0)) // assert.True(matches[1].Get(aIsAmbiguous).Equals(aIsAmbiguous.Field(1)) type FieldRef struct { impl refImpl } // FieldRefPath constructs a FieldRef from a given FieldPath func FieldRefPath(p FieldPath) FieldRef { return FieldRef{impl: p} } // FieldRefIndex is a convenience function to construct a FieldPath reference // of a single index func FieldRefIndex(i int) FieldRef { return FieldRef{impl: FieldPath{i}} } // FieldRefName constructs a FieldRef by name func FieldRefName(n string) FieldRef { return FieldRef{impl: nameRef(n)} } // FieldRefList takes an arbitrary number of arguments which can be either // strings or ints. This will panic if anything other than a string or int // is passed in. func FieldRefList(elems ...interface{}) FieldRef { list := make(refList, len(elems)) for i, e := range elems { switch e := e.(type) { case string: list[i] = FieldRefName(e) case int: list[i] = FieldRefIndex(e) } } return FieldRef{impl: list} } // NewFieldRefFromDotPath parses a dot path into a field ref. // // dot_path = '.' name // // | '[' digit+ ']' // | dot_path+ // // Examples // // ".alpha" => FieldRefName("alpha") // "[2]" => FieldRefIndex(2) // ".beta[3]" => FieldRefList("beta", 3) // "[5].gamma.delta[7]" => FieldRefList(5, "gamma", "delta", 7) // ".hello world" => FieldRefName("hello world") // `.\[y\]\\tho\.\` => FieldRef(`[y]\tho.\`) // // Note: when parsing a name, a '\' preceding any other character will be // dropped from the resulting name. therefore if a name must contain the characters // '.', '\', '[' or ']' then they must be escaped with a preceding '\'. func NewFieldRefFromDotPath(dotpath string) (out FieldRef, err error) { if len(dotpath) == 0 { return out, fmt.Errorf("%w dotpath was empty", ErrInvalid) } parseName := func() string { var name string for { idx := strings.IndexAny(dotpath, `\[.`) if idx == -1 { name += dotpath dotpath = "" break } if dotpath[idx] != '\\' { // subscript for a new field ref name += dotpath[:idx] dotpath = dotpath[idx:] break } if len(dotpath) == idx+1 { // dotpath ends with a backslash; consume it all name += dotpath dotpath = "" break } // append all characters before backslash, then the character which follows it name += dotpath[:idx] + string(dotpath[idx+1]) dotpath = dotpath[idx+2:] } return name } children := make([]FieldRef, 0) for len(dotpath) > 0 { subscript := dotpath[0] dotpath = dotpath[1:] switch subscript { case '.': // next element is a name children = append(children, FieldRef{nameRef(parseName())}) case '[': subend := strings.IndexFunc(dotpath, func(r rune) bool { return !unicode.IsDigit(r) }) if subend == -1 || dotpath[subend] != ']' { return out, fmt.Errorf("%w: dot path '%s' contained an unterminated index", ErrInvalid, dotpath) } idx, _ := strconv.Atoi(dotpath[:subend]) children = append(children, FieldRef{FieldPath{idx}}) dotpath = dotpath[subend+1:] default: return out, fmt.Errorf("%w: dot path must begin with '[' or '.' got '%s'", ErrInvalid, dotpath) } } out.flatten(children) return } func (f FieldRef) hash(h *maphash.Hash) { f.impl.hash(h) } // Hash produces a hash of this field reference and takes in a seed so that // it can maintain consistency across multiple places / processes /etc. func (f FieldRef) Hash(seed maphash.Seed) uint64 { h := maphash.Hash{} h.SetSeed(seed) f.hash(&h) return h.Sum64() } // IsName returns true if this fieldref is a name reference func (f *FieldRef) IsName() bool { _, ok := f.impl.(nameRef) return ok } // IsFieldPath returns true if this FieldRef uses a fieldpath func (f *FieldRef) IsFieldPath() bool { _, ok := f.impl.(FieldPath) return ok } // IsNested returns true if this FieldRef expects to represent // a nested field. func (f *FieldRef) IsNested() bool { switch impl := f.impl.(type) { case nameRef: return false case FieldPath: return len(impl) > 1 default: return true } } // Name returns the name of the field this references if it is // a Name reference, otherwise the empty string func (f *FieldRef) Name() string { n, _ := f.impl.(nameRef) return string(n) } // FieldPath returns the fieldpath that this FieldRef uses, otherwise // an empty FieldPath if it's not a FieldPath reference func (f *FieldRef) FieldPath() FieldPath { p, _ := f.impl.(FieldPath) return p } func (f *FieldRef) Equals(other FieldRef) bool { return reflect.DeepEqual(f.impl, other.impl) } func (f *FieldRef) flatten(children []FieldRef) { out := make([]FieldRef, 0, len(children)) var populate func(refImpl) populate = func(refs refImpl) { switch r := refs.(type) { case nameRef: out = append(out, FieldRef{r}) case FieldPath: out = append(out, FieldRef{r}) case refList: for _, c := range r { populate(c.impl) } } } populate(refList(children)) if len(out) == 1 { f.impl = out[0].impl } else { f.impl = refList(out) } } // FindAll returns all the fieldpaths which this FieldRef matches in the given // slice of fields. func (f FieldRef) FindAll(fields []arrow.Field) []FieldPath { return f.impl.findAll(fields) } // FindAllField returns all the fieldpaths that this FieldRef matches against // the type of the given field. func (f FieldRef) FindAllField(field arrow.Field) []FieldPath { return f.impl.findAll(getFields(field.Type)) } // FindOneOrNone is a convenience helper that will either return 1 fieldpath, // or an empty fieldpath, and will return an error if there are multiple matches. func (f FieldRef) FindOneOrNone(schema *arrow.Schema) (FieldPath, error) { matches := f.FindAll(schema.Fields()) if len(matches) > 1 { return nil, fmt.Errorf("%w for %s in %s", ErrMultipleMatches, f, schema) } if len(matches) == 0 { return nil, nil } return matches[0], nil } // FindOneOrNoneRecord is like FindOneOrNone but for the schema of a record, // returning an error only if there are multiple matches. func (f FieldRef) FindOneOrNoneRecord(root arrow.Record) (FieldPath, error) { return f.FindOneOrNone(root.Schema()) } // FindOne returns an error if the field isn't matched or if there are multiple matches // otherwise it returns the path to the single valid match. func (f FieldRef) FindOne(schema *arrow.Schema) (FieldPath, error) { matches := f.FindAll(schema.Fields()) if len(matches) == 0 { return nil, fmt.Errorf("%w for %s in %s", ErrNoMatch, f, schema) } if len(matches) > 1 { return nil, fmt.Errorf("%w for %s in %s", ErrMultipleMatches, f, schema) } return matches[0], nil } // GetAllColumns gets all the matching column arrays from the given record that // this FieldRef references. func (f FieldRef) GetAllColumns(root arrow.Record) ([]arrow.Array, error) { out := make([]arrow.Array, 0) for _, m := range f.FindAll(root.Schema().Fields()) { n, err := m.GetColumn(root) if err != nil { return nil, err } out = append(out, n) } return out, nil } // GetOneField will return a pointer to a field or an error if it is not found // or if there are multiple matches. func (f FieldRef) GetOneField(schema *arrow.Schema) (*arrow.Field, error) { match, err := f.FindOne(schema) if err != nil { return nil, err } return match.GetFieldFromSlice(schema.Fields()) } // GetOneOrNone will return a field or a nil if the field is found or not, and // only errors if there are multiple matches. func (f FieldRef) GetOneOrNone(schema *arrow.Schema) (*arrow.Field, error) { match, err := f.FindOneOrNone(schema) if err != nil { return nil, err } if len(match) == 0 { return nil, nil } return match.GetFieldFromSlice(schema.Fields()) } // GetOneColumnOrNone returns either a nil or the referenced array if it can be // found, erroring only if there is an ambiguous multiple matches. func (f FieldRef) GetOneColumnOrNone(root arrow.Record) (arrow.Array, error) { match, err := f.FindOneOrNoneRecord(root) if err != nil { return nil, err } if len(match) == 0 { return nil, nil } return match.GetColumn(root) } func (f FieldRef) String() string { return "FieldRef." + f.impl.String() } arrow-go-18.2.0/arrow/compute/fieldref_hash.go000066400000000000000000000023161476434502500212730ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.20 || tinygo package compute import ( "hash/maphash" "math/bits" "unsafe" "github.com/apache/arrow-go/v18/arrow" ) func (f FieldPath) hash(h *maphash.Hash) { raw := unsafe.Pointer(unsafe.SliceData(f)) var byteLen int if bits.UintSize == 32 { byteLen = arrow.Int32Traits.BytesRequired(len(f)) } else { byteLen = arrow.Int64Traits.BytesRequired(len(f)) } h.Write(unsafe.Slice((*byte)(raw), byteLen)) } arrow-go-18.2.0/arrow/compute/fieldref_test.go000066400000000000000000000264121476434502500213320ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package compute_test import ( "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/compute" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) func TestFieldPathBasics(t *testing.T) { f0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} f1 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} f2 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} f3 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} s := arrow.NewSchema([]arrow.Field{f0, f1, f2, f3}, nil) for i := range s.Fields() { f, err := compute.FieldPath{i}.Get(s) assert.NoError(t, err) assert.Equal(t, s.Field(i), *f) } f, err := compute.FieldPath{}.Get(s) assert.Nil(t, f) assert.ErrorIs(t, err, compute.ErrEmpty) f, err = compute.FieldPath{s.NumFields() * 2}.Get(s) assert.Nil(t, f) assert.ErrorIs(t, err, compute.ErrIndexRange) } func TestFieldRefBasics(t *testing.T) { f0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} f1 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} f2 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} f3 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} s := arrow.NewSchema([]arrow.Field{f0, f1, f2, f3}, nil) // lookup by index returns Indices{index} for i := range s.Fields() { assert.ElementsMatch(t, []compute.FieldPath{{i}}, compute.FieldRefIndex(i).FindAll(s.Fields())) } // out of range index results in failure to match assert.Empty(t, compute.FieldRefIndex(s.NumFields()*2).FindAll(s.Fields())) // lookup by name returns the indices of both matching fields assert.Equal(t, []compute.FieldPath{{0}, {2}}, compute.FieldRefName("alpha").FindAll(s.Fields())) assert.Equal(t, []compute.FieldPath{{1}, {3}}, compute.FieldRefName("beta").FindAll(s.Fields())) } func TestFieldRefDotPath(t *testing.T) { ref, err := compute.NewFieldRefFromDotPath(`.alpha`) assert.True(t, ref.IsName()) assert.Equal(t, "alpha", ref.Name()) assert.False(t, ref.IsFieldPath()) assert.False(t, ref.IsNested()) assert.NoError(t, err) assert.Equal(t, compute.FieldRefName("alpha"), ref) assert.True(t, ref.Equals(compute.FieldRefName("alpha"))) ref, err = compute.NewFieldRefFromDotPath(`..`) assert.Empty(t, ref.Name()) assert.False(t, ref.IsName()) assert.False(t, ref.IsFieldPath()) assert.Nil(t, ref.FieldPath()) assert.True(t, ref.IsNested()) assert.NoError(t, err) assert.Equal(t, compute.FieldRefList("", ""), ref) ref, err = compute.NewFieldRefFromDotPath(`[2]`) assert.False(t, ref.IsName()) assert.True(t, ref.IsFieldPath()) assert.Equal(t, compute.FieldPath{2}, ref.FieldPath()) assert.False(t, ref.IsNested()) assert.NoError(t, err) assert.Equal(t, compute.FieldRefIndex(2), ref) ref, err = compute.NewFieldRefFromDotPath(`.beta[3]`) assert.NoError(t, err) assert.Equal(t, compute.FieldRefList("beta", 3), ref) ref, err = compute.NewFieldRefFromDotPath(`[5].gamma.delta[7]`) assert.NoError(t, err) assert.Equal(t, compute.FieldRefList(5, "gamma", "delta", 7), ref) ref, err = compute.NewFieldRefFromDotPath(`.hello world`) assert.NoError(t, err) assert.Equal(t, compute.FieldRefName("hello world"), ref) ref, err = compute.NewFieldRefFromDotPath(`.\[y\]\\tho\.\`) assert.NoError(t, err) assert.Equal(t, compute.FieldRefName(`[y]\tho.\`), ref) _, err = compute.NewFieldRefFromDotPath(``) assert.ErrorIs(t, err, compute.ErrInvalid) _, err = compute.NewFieldRefFromDotPath(`alpha`) assert.ErrorIs(t, err, compute.ErrInvalid) _, err = compute.NewFieldRefFromDotPath(`[134234`) assert.ErrorIs(t, err, compute.ErrInvalid) _, err = compute.NewFieldRefFromDotPath(`[1stuf]`) assert.ErrorIs(t, err, compute.ErrInvalid) } func TestFieldPathNested(t *testing.T) { f0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} f1_0 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} f1 := arrow.Field{Name: "beta", Type: arrow.StructOf(f1_0)} f2_0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} f2_1_0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} f2_1_1 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} f2_1 := arrow.Field{Name: "gamma", Type: arrow.StructOf(f2_1_0, f2_1_1)} f2 := arrow.Field{Name: "beta", Type: arrow.StructOf(f2_0, f2_1)} s := arrow.NewSchema([]arrow.Field{f0, f1, f2}, nil) f, err := compute.FieldPath{0}.Get(s) assert.NoError(t, err) assert.Equal(t, f0, *f) f, err = compute.FieldPath{0, 0}.Get(s) assert.ErrorIs(t, err, compute.ErrNoChildren) assert.Nil(t, f) f, err = compute.FieldPath{1, 0}.Get(s) assert.NoError(t, err) assert.Equal(t, f1_0, *f) f, err = compute.FieldPath{2, 0}.Get(s) assert.NoError(t, err) assert.Equal(t, f2_0, *f) f, err = compute.FieldPath{2, 1, 0}.Get(s) assert.NoError(t, err) assert.Equal(t, f2_1_0, *f) f, err = compute.FieldPath{1, 0}.GetField(s.Field(2)) assert.NoError(t, err) assert.Equal(t, f2_1_0, *f) f, err = compute.FieldPath{2, 1, 1}.Get(s) assert.NoError(t, err) assert.Equal(t, f2_1_1, *f) } func TestFindFuncs(t *testing.T) { f0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} f1_0 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} f1 := arrow.Field{Name: "alpha", Type: arrow.StructOf(f1_0)} f2_0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} f2_1_0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} f2_1_1 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} f2_1 := arrow.Field{Name: "gamma", Type: arrow.StructOf(f2_1_0, f2_1_1)} f2 := arrow.Field{Name: "beta", Type: arrow.StructOf(f2_0, f2_1)} s := arrow.NewSchema([]arrow.Field{f0, f1, f2}, nil) assert.Equal(t, []compute.FieldPath{{1}}, compute.FieldRefName("gamma").FindAllField(f2)) fp, err := compute.FieldRefName("alpha").FindOneOrNone(s) assert.ErrorIs(t, err, compute.ErrMultipleMatches) assert.Len(t, fp, 0) fp, err = compute.FieldRefName("alpha").FindOne(s) assert.ErrorIs(t, err, compute.ErrMultipleMatches) assert.Len(t, fp, 0) fp, err = compute.FieldRefName("beta").FindOneOrNone(s) assert.NoError(t, err) assert.Equal(t, compute.FieldPath{2}, fp) fp, err = compute.FieldRefName("beta").FindOne(s) assert.NoError(t, err) assert.Equal(t, compute.FieldPath{2}, fp) fp, err = compute.FieldRefName("gamma").FindOneOrNone(s) assert.NoError(t, err) assert.Len(t, fp, 0) fp, err = compute.FieldRefName("gamma").FindOne(s) assert.ErrorIs(t, err, compute.ErrNoMatch) assert.Nil(t, fp) } func TestGetFieldFuncs(t *testing.T) { f0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} f1_0 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} f1 := arrow.Field{Name: "alpha", Type: arrow.StructOf(f1_0)} f2_0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} f2_1_0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} f2_1_1 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} f2_1 := arrow.Field{Name: "gamma", Type: arrow.StructOf(f2_1_0, f2_1_1)} f2 := arrow.Field{Name: "beta", Type: arrow.StructOf(f2_0, f2_1)} s := arrow.NewSchema([]arrow.Field{f0, f1, f2}, nil) ref, err := compute.NewFieldRefFromDotPath(`[2].alpha`) assert.NoError(t, err) f, err := ref.GetOneField(s) assert.NoError(t, err) assert.Equal(t, f2_0, *f) f, err = ref.GetOneOrNone(s) assert.NoError(t, err) assert.Equal(t, f2_0, *f) ref = compute.FieldRefList("beta", "gamma", 2) f, err = ref.GetOneField(s) assert.ErrorIs(t, err, compute.ErrNoMatch) assert.Nil(t, f) f, err = ref.GetOneOrNone(s) assert.NoError(t, err) assert.Nil(t, f) f, err = compute.FieldRefName("alpha").GetOneOrNone(s) assert.ErrorIs(t, err, compute.ErrMultipleMatches) assert.Nil(t, f) } func TestFieldRefRecord(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) alphaBldr := array.NewInt32Builder(mem) defer alphaBldr.Release() betaBldr := array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32) defer betaBldr.Release() gammaBldr := array.NewStructBuilder(mem, arrow.StructOf( arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32, Nullable: true})) defer gammaBldr.Release() alphaBldr.AppendValues([]int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil) betaBldr.AppendValues([]int32{0, 3, 7, 8, 8, 10, 13, 14, 17, 20, 22}, []bool{true, true, true, false, true, true, true, true, true, true}) for i := 0; i < 22; i++ { betaBldr.ValueBuilder().(*array.Int32Builder).Append(int32(i * 2)) } gammaBldr.AppendValues([]bool{true, true, true, true, true, true, true, true, true, true}) gammaBldr.FieldBuilder(0).(*array.Int32Builder).AppendValues([]int32{10, 20, 30, 40, 50, 60, 70, 80, 90, 100}, nil) gammaBldr.FieldBuilder(1).(*array.Int32Builder).AppendValues([]int32{-10, -20, -30, -40, -50, -60, -70, -80, -90, -100}, nil) alpha := alphaBldr.NewInt32Array() defer alpha.Release() beta := betaBldr.NewListArray() defer beta.Release() gamma := gammaBldr.NewStructArray() defer gamma.Release() rec := array.NewRecord(arrow.NewSchema([]arrow.Field{ {Name: "alpha", Type: alpha.DataType(), Nullable: true}, {Name: "alpha", Type: beta.DataType(), Nullable: true}, {Name: "alpha", Type: gamma.DataType(), Nullable: true}, }, nil), []arrow.Array{alpha, beta, gamma}, 10) defer rec.Release() arr, err := compute.FieldPath{2, 0}.GetColumn(rec) assert.NoError(t, err) assert.Same(t, gamma.Field(0), arr) arr, err = compute.FieldPath{}.GetColumn(rec) assert.ErrorIs(t, err, compute.ErrEmpty) assert.Nil(t, arr) arr, err = compute.FieldPath{1, 0}.GetColumn(rec) assert.NoError(t, err) assert.Same(t, beta.ListValues(), arr) arr, err = compute.FieldPath{1, 0, 0}.GetColumn(rec) assert.ErrorIs(t, err, compute.ErrNoChildren) assert.Nil(t, arr) arr, err = compute.FieldPath{2, 2}.GetColumn(rec) assert.ErrorIs(t, err, compute.ErrIndexRange) assert.Nil(t, arr) arrs, err := compute.FieldRefName("alpha").GetAllColumns(rec) assert.NoError(t, err) assert.Equal(t, []arrow.Array{alpha, beta, gamma}, arrs) arrs, err = compute.FieldRefName("delta").GetAllColumns(rec) assert.NoError(t, err) assert.Len(t, arrs, 0) arr, err = compute.FieldRefName("delta").GetOneColumnOrNone(rec) assert.NoError(t, err) assert.Nil(t, arr) arr, err = compute.FieldRefName("alpha").GetOneColumnOrNone(rec) assert.ErrorIs(t, err, compute.ErrMultipleMatches) assert.Nil(t, arr) arr, err = compute.FieldRefList("alpha", "beta").GetOneColumnOrNone(rec) assert.NoError(t, err) assert.Same(t, gamma.Field(1), arr) } arrow-go-18.2.0/arrow/compute/funckind_string.go000066400000000000000000000013741476434502500217020ustar00rootroot00000000000000// Code generated by "stringer -type=FuncKind -linecomment"; DO NOT EDIT. //go:build go1.18 package compute import "strconv" func _() { // An "invalid array index" compiler error signifies that the constant values have changed. // Re-run the stringer command to generate them again. var x [1]struct{} _ = x[FuncScalar-0] _ = x[FuncVector-1] _ = x[FuncScalarAgg-2] _ = x[FuncHashAgg-3] _ = x[FuncMeta-4] } const _FuncKind_name = "ScalarVectorScalarAggregateHashAggregateMeta" var _FuncKind_index = [...]uint8{0, 6, 12, 27, 40, 44} func (i FuncKind) String() string { if i < 0 || i >= FuncKind(len(_FuncKind_index)-1) { return "FuncKind(" + strconv.FormatInt(int64(i), 10) + ")" } return _FuncKind_name[_FuncKind_index[i]:_FuncKind_index[i+1]] } arrow-go-18.2.0/arrow/compute/functions.go000066400000000000000000000332251476434502500205230ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.18 package compute import ( "context" "fmt" "strings" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/compute/exec" ) type Function interface { Name() string Kind() FuncKind Arity() Arity Doc() FunctionDoc NumKernels() int Execute(context.Context, FunctionOptions, ...Datum) (Datum, error) DispatchExact(...arrow.DataType) (exec.Kernel, error) DispatchBest(...arrow.DataType) (exec.Kernel, error) DefaultOptions() FunctionOptions Validate() error } // Arity defines the number of required arguments for a function. // // Naming conventions are taken from https://en.wikipedia.org/wiki/Arity type Arity struct { NArgs int IsVarArgs bool } // Convenience functions to generating Arities func Nullary() Arity { return Arity{0, false} } func Unary() Arity { return Arity{1, false} } func Binary() Arity { return Arity{2, false} } func Ternary() Arity { return Arity{3, false} } func VarArgs(minArgs int) Arity { return Arity{minArgs, true} } type FunctionDoc struct { // A one-line summary of the function, using a verb. // // For example, "Add two numeric arrays or scalars" Summary string // A detailed description of the function, meant to follow the summary. Description string // Symbolic names (identifiers) for the function arguments. // // Can be used to generate nicer function signatures. ArgNames []string // Name of the options struct type, if any OptionsType string // Whether or not options are required for function execution. // // If false, then either there are no options for this function, // or there is a usable default options value. OptionsRequired bool } // EmptyFuncDoc is a reusable empty function doc definition for convenience. var EmptyFuncDoc FunctionDoc // FuncKind is an enum representing the type of a function type FuncKind int8 const ( // A function that performs scalar data operations on whole arrays // of data. Can generally process Array or Scalar values. The size // of the output will be the same as the size (or broadcasted size, // in the case of mixing Array and Scalar inputs) of the input. FuncScalar FuncKind = iota // Scalar // A function with array input and output whose behavior depends on // the values of the entire arrays passed, rather than the value of // each scalar value. FuncVector // Vector // A function that computes a scalar summary statistic from array input. FuncScalarAgg // ScalarAggregate // A function that computes grouped summary statistics from array // input and an array of group identifiers. FuncHashAgg // HashAggregate // A function that dispatches to other functions and does not contain // its own kernels. FuncMeta // Meta ) func validateFunctionSummary(summary string) error { if strings.Contains(summary, "\n") { return fmt.Errorf("%w: summary contains a newline", arrow.ErrInvalid) } if summary[len(summary)-1] == '.' { return fmt.Errorf("%w: summary ends with a point", arrow.ErrInvalid) } return nil } func validateFunctionDescription(desc string) error { if len(desc) != 0 && desc[len(desc)-1] == '\n' { return fmt.Errorf("%w: description ends with a newline", arrow.ErrInvalid) } const maxLineSize = 78 for _, ln := range strings.Split(desc, "\n") { if len(ln) > maxLineSize { return fmt.Errorf("%w: description line length exceeds %d characters", arrow.ErrInvalid, maxLineSize) } } return nil } // baseFunction is the base class for compute functions. Function // implementations should embed this baseFunction and will contain // a collection of "kernels" which are implementations of the function // for specific argument types. Selecting a viable kernel for // executing the function is referred to as "dispatching". type baseFunction struct { name string kind FuncKind arity Arity doc FunctionDoc defaultOpts FunctionOptions } func (b *baseFunction) Name() string { return b.name } func (b *baseFunction) Kind() FuncKind { return b.kind } func (b *baseFunction) Arity() Arity { return b.arity } func (b *baseFunction) Doc() FunctionDoc { return b.doc } func (b *baseFunction) DefaultOptions() FunctionOptions { return b.defaultOpts } func (b *baseFunction) Validate() error { if b.doc.Summary == "" { return nil } argCount := len(b.doc.ArgNames) if argCount != b.arity.NArgs && !(b.arity.IsVarArgs && argCount == b.arity.NArgs+1) { return fmt.Errorf("in function '%s': number of argument names for function doc != function arity", b.name) } if err := validateFunctionSummary(b.doc.Summary); err != nil { return err } return validateFunctionDescription(b.doc.Description) } func checkOptions(fn Function, opts FunctionOptions) error { if opts == nil && fn.Doc().OptionsRequired { return fmt.Errorf("%w: function '%s' cannot be called without options", arrow.ErrInvalid, fn.Name()) } return nil } func (b *baseFunction) checkArity(nargs int) error { switch { case b.arity.IsVarArgs && nargs < b.arity.NArgs: return fmt.Errorf("%w: varargs function '%s' needs at least %d arguments, but only %d passed", arrow.ErrInvalid, b.name, b.arity.NArgs, nargs) case !b.arity.IsVarArgs && nargs != b.arity.NArgs: return fmt.Errorf("%w: function '%s' accepts %d arguments but %d passed", arrow.ErrInvalid, b.name, b.arity.NArgs, nargs) } return nil } // kernelType is a type constraint interface that is used for funcImpl // generic definitions. It will be extended as other kernel types // are defined. // // Currently only ScalarKernels are allowed to be used. type kernelType interface { exec.ScalarKernel | exec.VectorKernel // specifying the Kernel interface here allows us to utilize // the methods of the Kernel interface on the generic // constrained type exec.Kernel } // funcImpl is the basic implementation for any functions that use kernels // i.e. all except for Meta functions. type funcImpl[KT kernelType] struct { baseFunction kernels []KT } func (fi *funcImpl[KT]) DispatchExact(vals ...arrow.DataType) (*KT, error) { if err := fi.checkArity(len(vals)); err != nil { return nil, err } for i := range fi.kernels { if fi.kernels[i].GetSig().MatchesInputs(vals) { return &fi.kernels[i], nil } } return nil, fmt.Errorf("%w: function '%s' has no kernel matching input types %s", arrow.ErrNotImplemented, fi.name, arrow.TypesToString(vals)) } func (fi *funcImpl[KT]) NumKernels() int { return len(fi.kernels) } func (fi *funcImpl[KT]) Kernels() []*KT { res := make([]*KT, len(fi.kernels)) for i := range fi.kernels { res[i] = &fi.kernels[i] } return res } // A ScalarFunction is a function that executes element-wise operations // on arrays or scalars, and therefore whose results generally do not // depend on the order of the values in the arguments. Accepts and returns // arrays that are all of the same size. These functions roughly correspond // to the functions used in most SQL expressions. type ScalarFunction struct { funcImpl[exec.ScalarKernel] } // NewScalarFunction constructs a new ScalarFunction object with the passed in // name, arity and function doc. func NewScalarFunction(name string, arity Arity, doc FunctionDoc) *ScalarFunction { return &ScalarFunction{ funcImpl: funcImpl[exec.ScalarKernel]{ baseFunction: baseFunction{ name: name, arity: arity, doc: doc, kind: FuncScalar, }, }, } } func (s *ScalarFunction) SetDefaultOptions(opts FunctionOptions) { s.defaultOpts = opts } func (s *ScalarFunction) DispatchExact(vals ...arrow.DataType) (exec.Kernel, error) { return s.funcImpl.DispatchExact(vals...) } func (s *ScalarFunction) DispatchBest(vals ...arrow.DataType) (exec.Kernel, error) { return s.DispatchExact(vals...) } // AddNewKernel constructs a new kernel with the provided signature // and execution/init functions and then adds it to the function's list of // kernels. This assumes default null handling (intersection of validity bitmaps) func (s *ScalarFunction) AddNewKernel(inTypes []exec.InputType, outType exec.OutputType, execFn exec.ArrayKernelExec, init exec.KernelInitFn) error { if err := s.checkArity(len(inTypes)); err != nil { return err } if s.arity.IsVarArgs && len(inTypes) != 1 { return fmt.Errorf("%w: varargs signatures must have exactly one input type", arrow.ErrInvalid) } sig := &exec.KernelSignature{ InputTypes: inTypes, OutType: outType, IsVarArgs: s.arity.IsVarArgs, } s.kernels = append(s.kernels, exec.NewScalarKernelWithSig(sig, execFn, init)) return nil } // AddKernel adds the provided kernel to the list of kernels // this function has. A copy of the kernel is added to the slice of kernels, // which means that a given kernel object can be created, added and then // reused to add other kernels. func (s *ScalarFunction) AddKernel(k exec.ScalarKernel) error { if err := s.checkArity(len(k.Signature.InputTypes)); err != nil { return err } if s.arity.IsVarArgs && !k.Signature.IsVarArgs { return fmt.Errorf("%w: function accepts varargs but kernel signature does not", arrow.ErrInvalid) } s.kernels = append(s.kernels, k) return nil } // Execute uses the passed in context, function options and arguments to eagerly // execute the function using kernel dispatch, batch iteration and memory // allocation details as defined by the kernel. // // If opts is nil, then the DefaultOptions() will be used. func (s *ScalarFunction) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error) { return execInternal(ctx, s, opts, -1, args...) } type VectorFunction struct { funcImpl[exec.VectorKernel] } func NewVectorFunction(name string, arity Arity, doc FunctionDoc) *VectorFunction { return &VectorFunction{ funcImpl: funcImpl[exec.VectorKernel]{ baseFunction: baseFunction{ name: name, arity: arity, doc: doc, kind: FuncVector, }, }, } } func (f *VectorFunction) SetDefaultOptions(opts FunctionOptions) { f.defaultOpts = opts } func (f *VectorFunction) DispatchExact(vals ...arrow.DataType) (exec.Kernel, error) { return f.funcImpl.DispatchExact(vals...) } func (f *VectorFunction) DispatchBest(vals ...arrow.DataType) (exec.Kernel, error) { return f.DispatchExact(vals...) } func (f *VectorFunction) AddNewKernel(inTypes []exec.InputType, outType exec.OutputType, execFn exec.ArrayKernelExec, init exec.KernelInitFn) error { if err := f.checkArity(len(inTypes)); err != nil { return err } if f.arity.IsVarArgs && len(inTypes) != 1 { return fmt.Errorf("%w: varags signatures must have exactly one input type", arrow.ErrInvalid) } sig := &exec.KernelSignature{ InputTypes: inTypes, OutType: outType, IsVarArgs: f.arity.IsVarArgs, } f.kernels = append(f.kernels, exec.NewVectorKernelWithSig(sig, execFn, init)) return nil } func (f *VectorFunction) AddKernel(kernel exec.VectorKernel) error { if err := f.checkArity(len(kernel.Signature.InputTypes)); err != nil { return err } if f.arity.IsVarArgs && !kernel.Signature.IsVarArgs { return fmt.Errorf("%w: function accepts varargs but kernel signature does not", arrow.ErrInvalid) } f.kernels = append(f.kernels, kernel) return nil } func (f *VectorFunction) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error) { return execInternal(ctx, f, opts, -1, args...) } // MetaFunctionImpl is the signature needed for implementing a MetaFunction // which is a function that dispatches to another function instead. type MetaFunctionImpl func(context.Context, FunctionOptions, ...Datum) (Datum, error) // MetaFunction is a function which dispatches to other functions, the impl // must not be nil. // // For Array, ChunkedArray and Scalar datums, this may rely on the execution // of concrete function types, but this must handle other Datum kinds on its // own. type MetaFunction struct { baseFunction impl MetaFunctionImpl } // NewMetaFunction constructs a new MetaFunction which will call the provided // impl for dispatching with the expected arity. // // Will panic if impl is nil. func NewMetaFunction(name string, arity Arity, doc FunctionDoc, impl MetaFunctionImpl) *MetaFunction { if impl == nil { panic("arrow/compute: cannot construct MetaFunction with nil impl") } return &MetaFunction{ baseFunction: baseFunction{ name: name, arity: arity, doc: doc, }, impl: impl, } } func (MetaFunction) NumKernels() int { return 0 } func (m *MetaFunction) DispatchExact(...arrow.DataType) (exec.Kernel, error) { return nil, fmt.Errorf("%w: dispatch for metafunction", arrow.ErrNotImplemented) } func (m *MetaFunction) DispatchBest(...arrow.DataType) (exec.Kernel, error) { return nil, fmt.Errorf("%w: dispatch for metafunction", arrow.ErrNotImplemented) } func (m *MetaFunction) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error) { if err := m.checkArity(len(args)); err != nil { return nil, err } if err := checkOptions(m, opts); err != nil { return nil, err } if opts == nil { opts = m.defaultOpts } return m.impl(ctx, opts, args...) } arrow-go-18.2.0/arrow/compute/functions_test.go000066400000000000000000000042041476434502500215550ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build go1.18 package compute_test import ( "testing" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/compute" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) func TestArityBasics(t *testing.T) { nullary := compute.Nullary() assert.Equal(t, 0, nullary.NArgs) assert.False(t, nullary.IsVarArgs) unary := compute.Unary() assert.Equal(t, 1, unary.NArgs) assert.False(t, unary.IsVarArgs) binary := compute.Binary() assert.Equal(t, 2, binary.NArgs) assert.False(t, binary.IsVarArgs) ternary := compute.Ternary() assert.Equal(t, 3, ternary.NArgs) assert.False(t, ternary.IsVarArgs) varargs := compute.VarArgs(2) assert.Equal(t, 2, varargs.NArgs) assert.True(t, varargs.IsVarArgs) } func CheckDispatchBest(t *testing.T, funcName string, originalTypes, expected []arrow.DataType) { fn, exists := compute.GetFunctionRegistry().GetFunction(funcName) require.True(t, exists) vals := make([]arrow.DataType, len(originalTypes)) copy(vals, originalTypes) actualKernel, err := fn.DispatchBest(vals...) require.NoError(t, err) expKernel, err := fn.DispatchExact(expected...) require.NoError(t, err) assert.Same(t, expKernel, actualKernel) assert.Equal(t, len(expected), len(vals)) for i, v := range vals { assert.True(t, arrow.TypeEqual(v, expected[i]), v.String(), expected[i].String()) } } arrow-go-18.2.0/arrow/compute/internal/000077500000000000000000000000001476434502500177735ustar00rootroot00000000000000arrow-go-18.2.0/arrow/compute/internal/kernels/000077500000000000000000000000001476434502500214365ustar00rootroot00000000000000arrow-go-18.2.0/arrow/compute/internal/kernels/Makefile000066400000000000000000000111571476434502500231030ustar00rootroot00000000000000# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # this converts rotate instructions from "ro[lr] " -> "ro[lr] , 1" for yasm compatibility PERL_FIXUP_ROTATE=perl -i -pe 's/(ro[rl]\s+\w{2,3})$$/\1, 1/' C2GOASM=c2goasm CC=clang-11 CXX=clang++-11 C_FLAGS=-target x86_64-unknown-none -masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=5000 \ -fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -O3 -fno-builtin -fno-jump-tables \ -fno-math-errno -funsafe-math-optimizations -fno-rounding-math -fno-trapping-math -I_lib -I../../../../internal/utils/_lib ASM_FLAGS_AVX2=-mavx2 -mfma ASM_FLAGS_SSE4=-msse4 ASM_FLAGS_BMI2=-mbmi2 ASM_FLAGS_POPCNT=-mpopcnt C_FLAGS_NEON=-O3 -fvectorize -mllvm -force-vector-width=16 -fno-asynchronous-unwind-tables -mno-red-zone -mstackrealign -fno-exceptions \ -fno-rtti -fno-builtin -fno-math-errno -funsafe-math-optimizations -fno-rounding-math -fno-trapping-math -fno-jump-tables -I_lib -I../../../../internal/utils/_lib GO_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -not -name '*_test.go') ALL_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -name '*.s' -not -name '*_test.go') .PHONEY: assembly INTEL_SOURCES := \ cast_numeric_avx2_amd64.s cast_numeric_sse4_amd64.s constant_factor_avx2_amd64.s \ constant_factor_sse4_amd64.s base_arithmetic_avx2_amd64.s base_arithmetic_sse4_amd64.s \ scalar_comparison_avx2_amd64.s scalar_comparison_sse4_amd64.s # # ARROW-15336: DO NOT add the assembly target for Arm64 (ARM_SOURCES) until c2goasm added the Arm64 support. # min_max_neon_arm64.s was generated by asm2plan9s. # And manually formatted it as the Arm64 Plan9. # assembly: $(INTEL_SOURCES) _lib/cast_numeric_avx2_amd64.s: _lib/cast_numeric.cc $(CXX) -std=c++17 -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ _lib/cast_numeric_sse4_amd64.s: _lib/cast_numeric.cc $(CXX) -std=c++17 -S $(C_FLAGS) $(ASM_FLAGS_SSE4) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ _lib/cast_numeric_neon.s: _lib/cast_numeric.cc $(CXX) -std=c++17 -S $(C_FLAGS_NEON) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ _lib/base_arithmetic_avx2_amd64.s: _lib/base_arithmetic.cc $(CXX) -std=c++17 -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ _lib/base_arithmetic_sse4_amd64.s: _lib/base_arithmetic.cc $(CXX) -std=c++17 -S $(C_FLAGS) $(ASM_FLAGS_SSE4) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ _lib/scalar_comparison_avx2_amd64.s: _lib/scalar_comparison.cc $(CXX) -std=c++17 -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ _lib/scalar_comparison_sse4_amd64.s: _lib/scalar_comparison.cc $(CXX) -std=c++17 -S $(C_FLAGS) $(ASM_FLAGS_SSE4) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ _lib/base_arithmetic_neon.s: _lib/base_arithmetic.cc $(CXX) -std=c++17 -S $(C_FLAGS_NEON) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ _lib/constant_factor_avx2_amd64.s: _lib/constant_factor.c $(CC) -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ _lib/constant_factor_sse4_amd64.s: _lib/constant_factor.c $(CC) -S $(C_FLAGS) $(ASM_FLAGS_SSE4) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ _lib/constant_factor_neon.s: _lib/constant_factor.c $(CC) -S $(C_FLAGS_NEON) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ cast_numeric_avx2_amd64.s: _lib/cast_numeric_avx2_amd64.s $(C2GOASM) -a -f $^ $@ cast_numeric_sse4_amd64.s: _lib/cast_numeric_sse4_amd64.s $(C2GOASM) -a -f $^ $@ constant_factor_avx2_amd64.s: _lib/constant_factor_avx2_amd64.s $(C2GOASM) -a -f $^ $@ constant_factor_sse4_amd64.s: _lib/constant_factor_sse4_amd64.s $(C2GOASM) -a -f $^ $@ base_arithmetic_avx2_amd64.s: _lib/base_arithmetic_avx2_amd64.s $(C2GOASM) -a -f $^ $@ base_arithmetic_sse4_amd64.s: _lib/base_arithmetic_sse4_amd64.s $(C2GOASM) -a -f $^ $@ scalar_comparison_avx2_amd64.s: _lib/scalar_comparison_avx2_amd64.s $(C2GOASM) -a -f $^ $@ scalar_comparison_sse4_amd64.s: _lib/scalar_comparison_sse4_amd64.s $(C2GOASM) -a -f $^ $@ clean: rm -f $(INTEL_SOURCES) rm -f $(addprefix _lib/,$(INTEL_SOURCES)) arrow-go-18.2.0/arrow/compute/internal/kernels/_lib/000077500000000000000000000000001476434502500223435ustar00rootroot00000000000000arrow-go-18.2.0/arrow/compute/internal/kernels/_lib/base_arithmetic.cc000066400000000000000000000426431476434502500260060ustar00rootroot00000000000000// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include #include #include #include "types.h" // Corresponds to equivalent ArithmeticOp enum in base_arithmetic.go // for passing across which operation to perform. This allows simpler // implementation at the cost of having to pass the extra int8 and // perform a switch. // // In cases of small arrays, this is completely negligible. In cases // of large arrays, the time saved by using SIMD here is significantly // worth the cost. enum class optype : int8_t { ADD, SUB, MUL, DIV, ABSOLUTE_VALUE, NEGATE, SQRT, POWER, SIN, COS, TAN, ASIN, ACOS, ATAN, ATAN2, LN, LOG10, LOG2, LOG1P, LOGB, SIGN, // this impl doesn't actually perform any overflow checks as we need // to only run overflow checks on non-null entries ADD_CHECKED, SUB_CHECKED, MUL_CHECKED, DIV_CHECKED, ABSOLUTE_VALUE_CHECKED, NEGATE_CHECKED, SQRT_CHECKED, POWER_CHECKED, SIN_CHECKED, COS_CHECKED, TAN_CHECKED, ASIN_CHECKED, ACOS_CHECKED, LN_CHECKED, LOG10_CHECKED, LOG2_CHECKED, LOG1P_CHECKED, LOGB_CHECKED, }; struct Add { template static constexpr T Call(Arg0 left, Arg1 right) { if constexpr (is_arithmetic_v) return left + right; } }; struct Sub { template static constexpr T Call(Arg0 left, Arg1 right) { if constexpr (is_arithmetic_v) return left - right; } }; struct AddChecked { template static constexpr T Call(Arg0 left, Arg1 right) { static_assert(is_same::value && is_same::value, ""); if constexpr(is_arithmetic_v) { return left + right; } } }; struct SubChecked { template static constexpr T Call(Arg0 left, Arg1 right) { static_assert(is_same::value && is_same::value, ""); if constexpr(is_arithmetic_v) { return left - right; } } }; template using maybe_make_unsigned = conditional_t && !is_same_v, make_unsigned_t, T>; template > constexpr Unsigned to_unsigned(T signed_) { return static_cast(signed_); } struct Multiply { static_assert(is_same_v, ""); static_assert(is_same_v, ""); static_assert(is_same_v, ""); static_assert(is_same_v, ""); static_assert(is_same_v, ""); static_assert(is_same_v, ""); static_assert(is_same_v, ""); static_assert(is_same_v, ""); template static constexpr T Call(Arg0 left, Arg1 right) { static_assert(is_same_v && is_same_v, ""); if constexpr(is_floating_point_v) { return left * right; } else if constexpr(is_unsigned_v && !is_same_v) { return left * right; } else if constexpr(is_signed_v && !is_same_v) { return to_unsigned(left) * to_unsigned(right); } else if constexpr(is_same_v || is_same_v) { // multiplication of 16 bit integer types implicitly promotes to // signed 32 bit integer. However, some inputs may overflow (which // triggers undefined behavior). Therefore we first cast to 32 bit // unsigned integers where overflow is well defined. return static_cast(left) * static_cast(right); } } }; struct MultiplyChecked { template static constexpr T Call(Arg0 left, Arg1 right) { static_assert(is_same_v && is_same_v, ""); if constexpr(is_arithmetic_v) { return left * right; } } }; struct AbsoluteValue { template static constexpr T Call(Arg input) { if constexpr(is_same_v) { *(((int*)&input)+0) &= 0x7fffffff; return input; } else if constexpr(is_same_v) { *(((int*)&input)+1) &= 0x7fffffff; return input; } else if constexpr(is_unsigned_v) { return input; } else { const auto mask = input >> (sizeof(Arg) * CHAR_BIT - 1); return (input + mask) ^ mask; } } }; struct AbsoluteValueChecked { template static constexpr T Call(Arg input) { if constexpr(is_same_v) { *(((int*)&input)+0) &= 0x7fffffff; return input; } else if constexpr(is_same_v) { *(((int*)&input)+1) &= 0x7fffffff; return input; } else if constexpr(is_unsigned_v) { return input; } else { const auto mask = input >> (sizeof(Arg) * CHAR_BIT - 1); return (input + mask) ^ mask; } } }; struct Negate { template static constexpr T Call(Arg input) { if constexpr(is_floating_point_v) { return -input; } else if constexpr(is_unsigned_v) { return ~input + 1; } else { return -input; } } }; struct NegateChecked { template static constexpr T Call(Arg input) { static_assert(is_same_v, ""); if constexpr(is_floating_point_v) { return -input; } else if constexpr(is_unsigned_v) { return 0; } else { return -input; } } }; struct Sign { template static constexpr T Call(Arg input) { if constexpr(is_floating_point_v) { return isnan(input) ? input : ((input == 0) ? 0 : (signbit(input) ? -1 : 1)); } else if constexpr(is_unsigned_v) { return input > 0 ? 1 : 0; } else if constexpr(is_signed_v) { return input > 0 ? 1 : (input ? -1 : 0); } } }; template struct arithmetic_op_arr_arr_impl { static inline void exec(const void* in_left, const void* in_right, void* out, const int len) { const T* left = reinterpret_cast(in_left); const T* right = reinterpret_cast(in_right); OutT* output = reinterpret_cast(out); for (int i = 0; i < len; ++i) { output[i] = Op::template Call(left[i], right[i]); } } }; template struct arithmetic_op_arr_scalar_impl { static inline void exec(const void* in_left, const void* scalar_right, void* out, const int len) { const T* left = reinterpret_cast(in_left); const T right = *reinterpret_cast(scalar_right); OutT* output = reinterpret_cast(out); for (int i = 0; i < len; ++i) { output[i] = Op::template Call(left[i], right); } } }; template struct arithmetic_op_scalar_arr_impl { static inline void exec(const void* scalar_left, const void* in_right, void* out, const int len) { const T left = *reinterpret_cast(scalar_left); const T* right = reinterpret_cast(in_right); OutT* output = reinterpret_cast(out); for (int i = 0; i < len; ++i) { output[i] = Op::template Call(left, right[i]); } } }; template struct arithmetic_unary_op_impl { static inline void exec(const void* arg, void* out, const int len) { const T* input = reinterpret_cast(arg); OutT* output = reinterpret_cast(out); for (int i = 0; i < len; ++i) { output[i] = Op::template Call(input[i]); } } }; template typename Impl> static inline void arithmetic_op(const int type, const void* in_left, const void* in_right, void* output, const int len) { const auto intype = static_cast(type); switch (intype) { case arrtype::UINT8: return Impl::exec(in_left, in_right, output, len); case arrtype::INT8: return Impl::exec(in_left, in_right, output, len); case arrtype::UINT16: return Impl::exec(in_left, in_right, output, len); case arrtype::INT16: return Impl::exec(in_left, in_right, output, len); case arrtype::UINT32: return Impl::exec(in_left, in_right, output, len); case arrtype::INT32: return Impl::exec(in_left, in_right, output, len); case arrtype::UINT64: return Impl::exec(in_left, in_right, output, len); case arrtype::INT64: return Impl::exec(in_left, in_right, output, len); case arrtype::FLOAT32: return Impl::exec(in_left, in_right, output, len); case arrtype::FLOAT64: return Impl::exec(in_left, in_right, output, len); default: break; } } template typename Impl, typename Input> static inline void arithmetic_op(const int otype, const void* input, void* output, const int len) { const auto outtype = static_cast(otype); switch (outtype) { case arrtype::UINT8: return Impl::exec(input, output, len); case arrtype::INT8: return Impl::exec(input, output, len); case arrtype::UINT16: return Impl::exec(input, output, len); case arrtype::INT16: return Impl::exec(input, output, len); case arrtype::UINT32: return Impl::exec(input, output, len); case arrtype::INT32: return Impl::exec(input, output, len); case arrtype::UINT64: return Impl::exec(input, output, len); case arrtype::INT64: return Impl::exec(input, output, len); case arrtype::FLOAT32: return Impl::exec(input, output, len); case arrtype::FLOAT64: return Impl::exec(input, output, len); default: break; } } template typename Impl> static inline void arithmetic_op(const int type, const void* input, void* output, const int len) { const auto intype = static_cast(type); switch (intype) { case arrtype::UINT8: return Impl::exec(input, output, len); case arrtype::INT8: return Impl::exec(input, output, len); case arrtype::UINT16: return Impl::exec(input, output, len); case arrtype::INT16: return Impl::exec(input, output, len); case arrtype::UINT32: return Impl::exec(input, output, len); case arrtype::INT32: return Impl::exec(input, output, len); case arrtype::UINT64: return Impl::exec(input, output, len); case arrtype::INT64: return Impl::exec(input, output, len); case arrtype::FLOAT32: return Impl::exec(input, output, len); case arrtype::FLOAT64: return Impl::exec(input, output, len); default: break; } } template typename Impl> static inline void arithmetic_op(const int itype, const int otype, const void* input, void* output, const int len) { const auto intype = static_cast(itype); switch (intype) { case arrtype::UINT8: return arithmetic_op(otype, input, output, len); case arrtype::INT8: return arithmetic_op(otype, input, output, len); case arrtype::UINT16: return arithmetic_op(otype, input, output, len); case arrtype::INT16: return arithmetic_op(otype, input, output, len); case arrtype::UINT32: return arithmetic_op(otype, input, output, len); case arrtype::INT32: return arithmetic_op(otype, input, output, len); case arrtype::UINT64: return arithmetic_op(otype, input, output, len); case arrtype::INT64: return arithmetic_op(otype, input, output, len); case arrtype::FLOAT32: return arithmetic_op(otype, input, output, len); case arrtype::FLOAT64: return arithmetic_op(otype, input, output, len); default: break; } } template