pax_global_header00006660000000000000000000000064146167563750014536gustar00rootroot0000000000000052 comment=6ecdf605e0f7639adec599d25cf0e206d7b8f9f5 kokkos-4.3.01/000077500000000000000000000000001461675637500131245ustar00rootroot00000000000000kokkos-4.3.01/.clang-format000066400000000000000000000003101461675637500154710ustar00rootroot00000000000000#Official Tool: clang-format version 8.0.0 BasedOnStyle: google SortIncludes: false AlignConsecutiveAssignments: true AllowShortCaseLabelsOnASingleLine: true AllowShortIfStatementsOnASingleLine: true kokkos-4.3.01/.clang-format-ignore000066400000000000000000000001041461675637500167530ustar00rootroot00000000000000core/unit_test/config/results/* tpls/gtest/gtest/* core/src/desul/* kokkos-4.3.01/.clang-tidy000066400000000000000000000002271461675637500151610ustar00rootroot00000000000000Checks: '-*,kokkos-*,modernize-use-using,modernize-use-nullptr,cppcoreguidelines-pro-type-cstyle-cast' FormatStyle: file HeaderFilterRegex: '.*/*.hpp' kokkos-4.3.01/.codecov.yml000066400000000000000000000003121461675637500153430ustar00rootroot00000000000000coverage: precision: 1 round: down range: "70...100" ignore: - tpls/ - algorithms/unit_tests - core/perf_test/ - core/unit_test/ - containers/performance_tests - containers/unit_tests kokkos-4.3.01/.github/000077500000000000000000000000001461675637500144645ustar00rootroot00000000000000kokkos-4.3.01/.github/ISSUE_TEMPLATE/000077500000000000000000000000001461675637500166475ustar00rootroot00000000000000kokkos-4.3.01/.github/ISSUE_TEMPLATE/bug_report.md000066400000000000000000000014471461675637500213470ustar00rootroot00000000000000--- name: Bug report about: Create a report (for github issue tracker) to correct failures title: '' labels: '' assignees: '' --- **Describe the bug** Please provide a concise, clear description of the bug, as well as any available error logs. Feel free to contact the Kokkos Slack `# build` channel for further discussion of your issue. **Please include the following for a minimal reproducer** 1. Compilers (with versions) 2. Kokkos release or commit used (i.e., the sha1 number) 3. Platform, architecture and backend 4. CMake configure command 5. Output from CMake configure command 6. Minimum, complete code needed to reproduce the bug 7. Command line needed to reproduce the bug 8. `KokkosCore_config.h` header file (generated during the build) 9. Please provide any additional relevant error logs kokkos-4.3.01/.github/workflows/000077500000000000000000000000001461675637500165215ustar00rootroot00000000000000kokkos-4.3.01/.github/workflows/clang-format-check.yml000066400000000000000000000004631461675637500226740ustar00rootroot00000000000000name: clang-format check on: [push, pull_request] permissions: read-all jobs: formatting-check: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Run clang-format style check. uses: DoozyX/clang-format-lint-action@v0.16.2 with: clangFormatVersion: 8 kokkos-4.3.01/.github/workflows/codeql.yml000066400000000000000000000022271461675637500205160ustar00rootroot00000000000000name: "CodeQL" on: push: branches: [ "master", "develop", "release-*" ] pull_request: branches: [ "develop" ] permissions: read-all jobs: analyze: name: Analyze runs-on: ubuntu-latest timeout-minutes: 360 permissions: # required for all workflows security-events: write # only required for workflows in private repositories actions: read contents: read steps: - name: Checkout repository uses: actions/checkout@v4 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL uses: github/codeql-action/init@v3 with: languages: c-cpp - name: configure run: cmake -B build . -DKokkos_ENABLE_OPENMP=ON -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_DEPRECATED_CODE_4=OFF -DKokkos_ENABLE_TESTS=ON -DKokkos_ENABLE_EXAMPLES=ON -DKokkos_ENABLE_BENCHMARKS=ON -DCMAKE_BUILD_TYPE=Debug - name: build run: cmake --build build --parallel 2 - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v3 with: category: "/language:c-cpp" kokkos-4.3.01/.github/workflows/continuous-integration-workflow-32bit.yml000066400000000000000000000026031461675637500265650ustar00rootroot00000000000000name: github-Linux-32bit on: push: branches: - develop pull_request: paths-ignore: - '**/*.md' types: [ opened, reopened, synchronize ] permissions: read-all concurrency: group: ${ {github.event_name }}-${{ github.workflow }}-${{ github.ref }} cancel-in-progress: ${{github.event_name == 'pull_request'}} jobs: CI-32bit: name: Linux-32bit runs-on: ubuntu-latest container: image: ghcr.io/kokkos/ci-containers/ubuntu:latest steps: - name: Checkout code uses: actions/checkout@v4 - name: install_multilib run: sudo apt-get update && sudo apt-get install -y gcc-multilib g++-multilib gfortran-multilib - name: Configure Kokkos run: | cmake -B builddir \ -DKokkos_ENABLE_OPENMP=ON \ -DKokkos_ENABLE_TESTS=ON \ -DKokkos_ENABLE_BENCHMARKS=ON \ -DKokkos_ENABLE_EXAMPLES=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_4=ON \ -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DCMAKE_CXX_FLAGS="-Werror -m32 -DKOKKOS_IMPL_32BIT" \ -DCMAKE_CXX_COMPILER=g++ \ -DCMAKE_BUILD_TYPE=RelWithDebInfo - name: Build run: | cmake --build builddir --parallel 2 - name: Tests working-directory: builddir run: ctest --output-on-failure kokkos-4.3.01/.github/workflows/continuous-integration-workflow-hpx.yml000066400000000000000000000052371461675637500264470ustar00rootroot00000000000000name: github-Linux-hpx on: push: branches: - develop pull_request: paths-ignore: - '**/*.md' types: [ opened, reopened, synchronize ] concurrency: group: ${ {github.event_name }}-${{ github.workflow }}-${{ github.ref }} cancel-in-progress: ${{github.event_name == 'pull_request'}} permissions: read-all jobs: hpx: name: hpx runs-on: [ubuntu-latest] steps: - name: checkout code uses: actions/checkout@v4 with: path: kokkos - name: setup hpx dependencies run: | sudo apt update sudo apt install \ clang \ hwloc \ libasio-dev \ libboost-all-dev \ ninja-build - name: checkout hpx uses: actions/checkout@v4 with: repository: STELLAR-GROUP/hpx ref: v1.9.0 path: hpx - uses: actions/cache@v4 id: cache-hpx with: path: ./hpx/install key: kokkos-hwloc-${{ github.ref }}-${{ github.sha }} restore-keys: kokkos-hwloc-${{ github.ref }} - name: configure hpx if: steps.cache-hpx.outputs.cache-hit != 'true' run: | mkdir -p hpx/{build,install} cd hpx/build cmake \ -GNinja \ -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_INSTALL_PREFIX=$PWD/../install \ -DCMAKE_CXX_COMPILER=clang++ \ -DHPX_WITH_UNITY_BUILD=ON \ -DHPX_WITH_MALLOC=system \ -DHPX_WITH_NETWORKING=OFF \ -DHPX_WITH_EXAMPLES=OFF \ -DHPX_WITH_TESTS=OFF \ .. - name: build and install hpx if: steps.cache-hpx.outputs.cache-hit != 'true' working-directory: hpx/build run: ninja -j2 install - name: configure kokkos run: | mkdir -p kokkos/{build,install} cd kokkos/build cmake \ -GNinja \ -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_INSTALL_PREFIX=$PWD/../install \ -DCMAKE_CXX_COMPILER=clang++ \ -DCMAKE_CXX_FLAGS="-Werror" \ -DHPX_ROOT=$PWD/../../hpx/install \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_4=OFF \ -DKokkos_ENABLE_EXAMPLES=ON \ -DKokkos_ENABLE_HPX=ON \ -DKokkos_ENABLE_SERIAL=ON \ -DKokkos_ENABLE_TESTS=ON \ .. - name: build_and_install_kokkos working-directory: kokkos/build run: ninja -j2 install - name: test_kokkos working-directory: kokkos/build run: ctest --timeout 2000 -j2 --output-on-failure kokkos-4.3.01/.github/workflows/continuous-integration-workflow.yml000066400000000000000000000132531461675637500256470ustar00rootroot00000000000000name: github-Linux on: push: branches: - develop pull_request: paths-ignore: - '**/*.md' types: [ opened, reopened, synchronize ] concurrency: group: ${ {github.event_name }}-${{ github.workflow }}-${{ github.ref }} cancel-in-progress: ${{github.event_name == 'pull_request'}} permissions: read-all jobs: CI: continue-on-error: true strategy: matrix: distro: ['fedora:latest', 'fedora:rawhide', 'ubuntu:latest'] cxx: ['g++', 'clang++'] cxx_extra_flags: [''] cmake_build_type: ['Release', 'Debug'] backend: ['OPENMP'] clang-tidy: [''] include: - distro: 'ubuntu:intel' cxx: 'icpc' cxx_extra_flags: '-diag-disable=177,10441' cmake_build_type: 'Release' backend: 'OPENMP' - distro: 'ubuntu:intel' cxx: 'icpc' cxx_extra_flags: '-diag-disable=177,10441' cmake_build_type: 'Debug' backend: 'OPENMP' - distro: 'ubuntu:intel' cxx: 'icpx' cxx_extra_flags: '-fp-model=precise -Wno-pass-failed' cmake_build_type: 'Release' backend: 'OPENMP' - distro: 'ubuntu:intel' cxx: 'icpx' cxx_extra_flags: '-fp-model=precise -Wno-pass-failed' cmake_build_type: 'Debug' backend: 'OPENMP' - distro: 'ubuntu:latest' cxx: 'clang++' cxx_extra_flags: '-fsanitize=address' extra_linker_flags: '-fsanitize=address' cmake_build_type: 'RelWithDebInfo' backend: 'THREADS' clang-tidy: '-DCMAKE_CXX_CLANG_TIDY="clang-tidy;-warnings-as-errors=*"' - distro: 'ubuntu:latest' cxx: 'clang++' cxx_extra_flags: '-fsanitize=address' extra_linker_flags: '-fsanitize=address' cmake_build_type: 'RelWithDebInfo' backend: 'SERIAL' - distro: 'ubuntu:latest' cxx: 'g++' cmake_build_type: 'RelWithDebInfo' backend: 'THREADS' runs-on: ubuntu-latest container: image: ghcr.io/kokkos/ci-containers/${{ matrix.distro }} steps: - name: Checkout desul uses: actions/checkout@v4 with: repository: desul/desul ref: 477da9c8f40f8db369c28dd3f93a67e376d8511b path: desul - name: Install desul working-directory: desul run: | git submodule init git submodule update mkdir build cd build cmake -DDESUL_ENABLE_TESTS=OFF -DCMAKE_INSTALL_PREFIX=/usr/desul-install .. sudo cmake --build . --target install --parallel 2 - name: Checkout code uses: actions/checkout@v4 - uses: actions/cache@v4 with: path: ~/.cache/ccache key: kokkos-${{ matrix.distro }}-${{ matrix.cxx }}-${{ matrix.cmake_build_type }}-${{ matrix.openmp }}-${{ github.ref }}-${{ github.sha }} restore-keys: kokkos-${{ matrix.distro }}-${{ matrix.cxx }}-${{ matrix.cmake_build_type }}-${{ matrix.openmp }}-${{ github.ref }} - name: maybe_use_flang_new if: ${{ matrix.cxx == 'clang++' && startsWith(matrix.distro,'fedora:') }} run: echo "FC=flang-new" >> $GITHUB_ENV - name: maybe_use_external_gtest if: ${{ matrix.distro == 'ubuntu:latest' }} run: sudo apt-get update && sudo apt-get install -y libgtest-dev - name: maybe_install_clang_tidy if: ${{ matrix.clang-tidy != '' }} run: sudo apt-get update && sudo apt-get install -y clang-tidy - name: Configure Kokkos run: | cmake -B builddir \ -DCMAKE_INSTALL_PREFIX=/usr \ ${{ matrix.clang-tidy }} \ -Ddesul_ROOT=/usr/desul-install/ \ -DKokkos_ENABLE_DESUL_ATOMICS_EXTERNAL=ON \ -DKokkos_ENABLE_HWLOC=ON \ -DKokkos_ENABLE_${{ matrix.backend }}=ON \ -DKokkos_ENABLE_TESTS=ON \ -DKokkos_ENABLE_BENCHMARKS=ON \ -DKokkos_ENABLE_EXAMPLES=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_4=ON \ -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_IMPL_MDSPAN=ON \ -DCMAKE_CXX_FLAGS="-Werror ${{ matrix.cxx_extra_flags }}" \ -DCMAKE_EXE_LINKER_FLAGS="${{ matrix.extra_linker_flags }}" \ -DCMAKE_CXX_COMPILER=${{ matrix.cxx }} \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_BUILD_TYPE=${{ matrix.cmake_build_type }} - name: Build run: | ccache -z cmake --build builddir --parallel 2 ccache -s - name: Tests working-directory: builddir run: ctest --output-on-failure - name: Test linking against build dir if: ${{ matrix.cxx_extra_flags != '-fsanitize=address' }} working-directory: example/build_cmake_installed run: | cmake -B builddir_buildtree -DCMAKE_CXX_COMPILER=${{ matrix.cxx }} -DKokkos_ROOT=../../builddir cmake --build builddir_buildtree cmake --build builddir_buildtree --target test - name: Test DESTDIR Install run: DESTDIR=${PWD}/install cmake --build builddir --target install && rm -rf ${PWD}/install/usr && rmdir ${PWD}/install - name: Install run: sudo cmake --build builddir --target install - name: Test install if: ${{ matrix.cxx_extra_flags != '-fsanitize=address' }} working-directory: example/build_cmake_installed run: | cmake -B builddir -DCMAKE_CXX_COMPILER=${{ matrix.cxx }} cmake --build builddir cmake --build builddir --target test kokkos-4.3.01/.github/workflows/osx.yml000066400000000000000000000024661461675637500200650ustar00rootroot00000000000000name: github-OSX on: push: branches: - develop pull_request: paths-ignore: - '**/*.md' types: [ opened, reopened, synchronize ] concurrency: group: ${ {github.event_name }}-${{ github.workflow }}-${{ github.ref }} cancel-in-progress: ${{github.event_name == 'pull_request'}} permissions: read-all jobs: osxci: name: osx-ci runs-on: [macos-latest] strategy: matrix: include: - backend: "SERIAL" cmake_build_type: "RelWithDebInfo" - backend: "THREADS" cmake_build_type: "RelWithDebInfo" - backend: "SERIAL" cmake_build_type: "Debug" - backend: "SERIAL" cmake_build_type: "Release" steps: - uses: actions/checkout@v4 - name: configure run: cmake -B build . -DKokkos_ENABLE_${{ matrix.backend }}=On -DCMAKE_CXX_FLAGS="-Werror" -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_COMPILER_WARNINGS=ON -DKokkos_ENABLE_DEPRECATED_CODE_4=OFF -DKokkos_ENABLE_TESTS=On -DCMAKE_BUILD_TYPE=${{ matrix.cmake_build_type }} - name: build run: cmake --build build --parallel 2 - name: test working-directory: build run: ctest --output-on-failure kokkos-4.3.01/.github/workflows/performance-benchmark.yml000066400000000000000000000042621461675637500235010ustar00rootroot00000000000000name: github-benchmarks on: push: branches: - develop pull_request: paths-ignore: - '**/*.md' types: [ opened, reopened, synchronize ] permissions: read-all jobs: CI: continue-on-error: true strategy: matrix: distro: ['ubuntu:latest'] cxx: ['g++', 'clang++'] backend: ['OPENMP'] runs-on: ubuntu-latest container: image: ghcr.io/kokkos/ci-containers/${{ matrix.distro }} env: BUILD_ID: ${{ matrix.distro }}-${{ matrix.cxx }}-${{ matrix.backend }} steps: - name: Checkout code uses: actions/checkout@v4 - uses: actions/cache@v4 with: path: ~/.cache/ccache key: kokkos-${{ matrix.distro }}-${{ matrix.cxx }}-${{ matrix.backend }}-${{ github.ref }}-${{ github.sha }} restore-keys: kokkos-${{ matrix.distro }}-${{ matrix.cxx }}-${{ matrix.backend }}-${{ github.ref }} - name: Configure Kokkos run: | cmake -B builddir \ -DKokkos_ENABLE_HWLOC=ON \ -DKokkos_ENABLE_${{ matrix.backend }}=ON \ -DKokkos_ENABLE_BENCHMARKS=ON \ -DCMAKE_CXX_COMPILER=${{ matrix.cxx }} \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_BUILD_TYPE=Release - name: Build run: | ccache -z NUM_CPU=$(grep -c processor /proc/cpuinfo) cmake --build builddir --parallel ${NUM_CPU} ccache -s - name: Tests working-directory: builddir run: ctest --output-on-failure - name: Gather benchmark results run: | mkdir ${{ env.BUILD_ID }} find builddir/core/perf_test/ -name "*.json" -exec mv {} ${{ env.BUILD_ID }}/ \; - name: Push benchmark results if: ${{ github.ref == 'refs/heads/develop' }} uses: dmnemec/copy_file_to_another_repo_action@main env: API_TOKEN_GITHUB: ${{ secrets.DALG24_PUSH_BENCHMARK_RESULTS }} with: source_file: ${{ env.BUILD_ID }} destination_repo: 'kokkos/kokkos-benchmark-results' destination_branch: 'main' user_email: 'kokkos@users.noreply.github.com' user_name: 'Kokkos Developers' kokkos-4.3.01/.github/workflows/scorecard.yml000066400000000000000000000057231461675637500212200ustar00rootroot00000000000000# This workflow uses actions that are not certified by GitHub. They are provided # by a third-party and are governed by separate terms of service, privacy # policy, and support documentation. name: Scorecard supply-chain security on: # For Branch-Protection check. Only the default branch is supported. See # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection branch_protection_rule: # To guarantee Maintained check is occasionally updated. See # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained schedule: # Weekly on Saturdays. - cron: '30 1 * * 6' push: branches: [ master, develop ] # Declare default permissions as read only. permissions: read-all jobs: analysis: name: Scorecard analysis runs-on: ubuntu-latest permissions: # Needed to upload the results to code-scanning dashboard. security-events: write # Needed to publish results and get a badge (see publish_results below). id-token: write # Uncomment the permissions below if installing in a private repository. # contents: read # actions: read steps: - name: "Checkout code" uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # v3.5.2 with: persist-credentials: false - name: "Run analysis" uses: ossf/scorecard-action@80e868c13c90f172d68d1f4501dee99e2479f7af # v2.1.3 with: results_file: results.sarif results_format: sarif # (Optional) "write" PAT token. Uncomment the `repo_token` line below if: # - you want to enable the Branch-Protection check on a *public* repository, or # - you are installing Scorecard on a *private* repository # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action#authentication-with-pat. # repo_token: ${{ secrets.SCORECARD_TOKEN }} # Public repositories: # - Publish results to OpenSSF REST API for easy access by consumers # - Allows the repository to include the Scorecard badge. # - See https://github.com/ossf/scorecard-action#publishing-results. # For private repositories: # - `publish_results` will always be set to `false`, regardless # of the value entered here. publish_results: true # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v3.1.2 with: name: SARIF file path: results.sarif retention-days: 5 # Upload the results to GitHub's code scanning dashboard. - name: "Upload SARIF results to code scanning" uses: github/codeql-action/upload-sarif@83f0fe6c4988d98a455712a27f0255212bba9bd4 # v2.3.6 with: sarif_file: results.sarif kokkos-4.3.01/.github/workflows/windows.yml000066400000000000000000000015011461675637500207330ustar00rootroot00000000000000name: github-windows on: push: pull_request: concurrency: group: ${ {github.event_name }}-${{ github.workflow }}-${{ github.ref }} cancel-in-progress: ${{github.event_name == 'pull_request'}} permissions: read-all jobs: windows-cuda: # Cuda build on Windows name: Windows Cuda runs-on: windows-2022 steps: - uses: Jimver/cuda-toolkit@v0.2.14 id: cuda-toolkit with: cuda: '12.1.0' - uses: actions/checkout@v4 - name: configure shell: bash run: | mkdir build mkdir c:/project cd build cmake -DKokkos_ENABLE_CUDA=ON -DKokkos_ARCH_VOLTA70=ON -DKokkos_ENABLE_TESTS=ON -DKokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE=ON .. - name: build library shell: bash run: | cmake --build build --parallel 2 --config Release kokkos-4.3.01/.gitignore000066400000000000000000000004311461675637500151120ustar00rootroot00000000000000# Standard ignores *~ *.pyc \#*# .#* .*.swp .cproject .project testing/ .settings/ /.vs /out/build /CMakeSettings.json /out/mytest CMakeUserPresets.json # build directories in source tree /build* # IDE-specific files/folders ## VSCode /.vscode ## QtCreator /CMakeLists.txt.user* kokkos-4.3.01/.jenkins000066400000000000000000000635671461675637500146070ustar00rootroot00000000000000pipeline { agent none environment { CCACHE_DIR = '/tmp/ccache' CCACHE_MAXSIZE = '5G' CCACHE_CPP2 = 'true' } options { disableConcurrentBuilds(abortPrevious: true) timeout(time: 6, unit: 'HOURS') } triggers { issueCommentTrigger('.*test this please.*') } stages { stage('Clang-Format') { agent { dockerfile { filename 'Dockerfile.clang' dir 'scripts/docker' label 'nvidia-docker || docker' args '-v /tmp/ccache.kokkos:/tmp/ccache' } } steps { sh './scripts/docker/check_format_cpp.sh' } } stage('Build') { parallel { stage('OPENACC-NVHPC-CUDA-12.2') { agent { dockerfile { filename 'Dockerfile.nvhpc' dir 'scripts/docker' label 'nvidia-docker && volta && large_images' args '--env NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES' } } environment { NVHPC_CUDA_HOME = '/opt/nvidia/hpc_sdk/Linux_x86_64/23.7/cuda/12.2' } steps { sh '''rm -rf build && mkdir -p build && cd build && \ /opt/cmake/bin/cmake \ -DCMAKE_CXX_COMPILER=nvc++ \ -DCMAKE_CXX_STANDARD=17 \ -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_TESTS=ON \ -DKokkos_ENABLE_OPENACC=ON \ -DKokkos_ARCH_VOLTA70=ON \ .. && \ make -j8 && ctest --verbose''' } } stage('CUDA-12.2-NVHPC') { agent { dockerfile { filename 'Dockerfile.nvhpc' dir 'scripts/docker' label 'nvidia-docker && large_images && volta' args '-v /tmp/ccache.kokkos:/tmp/ccache --env NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES' } } environment { OMP_NUM_THREADS = 8 // Nested OpenMP does not work for this configuration, // so disabling it OMP_MAX_ACTIVE_LEVELS = 1 OMP_PLACES = 'threads' OMP_PROC_BIND = 'spread' NVHPC_CUDA_HOME = '/opt/nvidia/hpc_sdk/Linux_x86_64/23.7/cuda/12.2' } steps { sh '''rm -rf build && mkdir -p build && cd build && \ /opt/cmake/bin/cmake \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DCMAKE_CXX_COMPILER=nvc++ \ -DCMAKE_CXX_STANDARD=17 \ -DCMAKE_CXX_FLAGS="--diag_suppress=implicit_return_from_non_void_function,no_device_stack" \ -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_4=OFF \ -DKokkos_ENABLE_TESTS=ON \ -DKokkos_ENABLE_CUDA=ON \ -DKokkos_ENABLE_CUDA_LAMBDA=ON \ -DKokkos_ENABLE_OPENMP=ON \ -DKokkos_ENABLE_IMPL_MDSPAN=ON \ -DKokkos_ENABLE_IMPL_NVHPC_AS_DEVICE_COMPILER=ON \ .. && \ make -j8 && ctest --verbose''' } } stage('SYCL-OneAPI') { agent { dockerfile { filename 'Dockerfile.sycl' dir 'scripts/docker' label 'nvidia-docker && ampere' args '-v /tmp/ccache.kokkos:/tmp/ccache' } } steps { sh 'ccache --zero-stats' sh '''rm -rf build && mkdir -p build && cd build && \ cmake \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_CXX_COMPILER=clang++ \ -DCMAKE_CXX_FLAGS="-fsycl-device-code-split=per_kernel -Wno-deprecated-declarations -Werror -Wno-gnu-zero-variadic-macro-arguments -Wno-unknown-cuda-version -Wno-sycl-target" \ -DKOKKOS_IMPL_SYCL_DEVICE_GLOBAL_SUPPORTED=0 \ -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ARCH_AMPERE80=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_4=OFF \ -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ -DKokkos_ENABLE_EXAMPLES=ON \ -DKokkos_ENABLE_TESTS=ON \ -DKokkos_ENABLE_BENCHMARKS=ON \ -DKokkos_ENABLE_SYCL=ON \ -DKokkos_ENABLE_UNSUPPORTED_ARCHS=ON \ -DCMAKE_CXX_STANDARD=17 \ .. && \ make -j8 && ctest --verbose''' } post { always { sh 'ccache --show-stats' } } } stage('HIP-ROCm-5.2') { agent { dockerfile { filename 'Dockerfile.hipcc' dir 'scripts/docker' additionalBuildArgs '--build-arg BASE=rocm/dev-ubuntu-20.04:5.2-complete' label 'rocm-docker ' args '-v /tmp/ccache.kokkos:/tmp/ccache --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --env HIP_VISIBLE_DEVICES=$HIP_VISIBLE_DEVICES' } } environment { OMP_NUM_THREADS = 8 OMP_MAX_ACTIVE_LEVELS = 3 OMP_PLACES = 'threads' OMP_PROC_BIND = 'spread' } steps { sh 'ccache --zero-stats' sh 'echo "/opt/rocm/llvm/lib" > /etc/ld.so.conf.d/llvm.conf && ldconfig' sh '''rm -rf build && mkdir -p build && cd build && \ cmake \ -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_CXX_COMPILER=hipcc \ -DCMAKE_CXX_FLAGS="-Werror -Wno-unused-command-line-argument -DNDEBUG" \ -DCMAKE_CXX_STANDARD=17 \ -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_4=OFF \ -DKokkos_ENABLE_TESTS=ON \ -DKokkos_ENABLE_BENCHMARKS=ON \ -DKokkos_ENABLE_HIP=ON \ -DKokkos_ENABLE_OPENMP=ON \ -DKokkos_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS=ON \ .. && \ make -j8 && ctest --verbose''' } post { always { sh 'ccache --show-stats' } } } stage('HIP-ROCm-5.6-C++20') { agent { dockerfile { filename 'Dockerfile.hipcc' dir 'scripts/docker' additionalBuildArgs '--build-arg BASE=rocm/dev-ubuntu-20.04:5.6-complete' label 'rocm-docker' args '-v /tmp/ccache.kokkos:/tmp/ccache --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --env HIP_VISIBLE_DEVICES=$HIP_VISIBLE_DEVICES' } } steps { sh 'ccache --zero-stats' sh '''rm -rf build && mkdir -p build && cd build && \ cmake \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DCMAKE_CXX_COMPILER=hipcc \ -DCMAKE_CXX_FLAGS="-Werror -Wno-unused-command-line-argument" \ -DCMAKE_CXX_STANDARD=20 \ -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_4=ON \ -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ -DKokkos_ENABLE_TESTS=ON \ -DKokkos_ENABLE_BENCHMARKS=ON \ -DKokkos_ENABLE_HIP=ON \ .. && \ make -j8 && ctest --verbose''' } post { always { sh 'ccache --show-stats' } } } /* stage('OPENMPTARGET-ROCm-5.2') { agent { dockerfile { filename 'Dockerfile.hipcc' dir 'scripts/docker' additionalBuildArgs '--build-arg BASE=rocm/dev-ubuntu-20.04:5.2' label 'rocm-docker && vega && AMD_Radeon_Instinct_MI60' args '-v /tmp/ccache.kokkos:/tmp/ccache --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --env HIP_VISIBLE_DEVICES=$HIP_VISIBLE_DEVICES' } } environment { OMP_NUM_THREADS = 8 OMP_MAX_ACTIVE_LEVELS = 3 OMP_PLACES = 'threads' OMP_PROC_BIND = 'spread' LC_ALL = 'C' } steps { sh 'ccache --zero-stats' sh 'echo "/opt/rocm/llvm/lib" > /etc/ld.so.conf.d/llvm.conf && ldconfig' sh '''rm -rf build && \ cmake \ -Bbuild \ -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_CXX_COMPILER=amdclang++ \ -DCMAKE_CXX_STANDARD=17 \ -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_4=OFF \ -DKokkos_ENABLE_TESTS=ON \ -DKokkos_ENABLE_BENCHMARKS=ON \ -DKokkos_ENABLE_OPENMPTARGET=ON \ -DKokkos_ENABLE_OPENMP=ON \ -DKokkos_ARCH_AMD_GFX906=ON \ && \ cmake --build build --parallel ${BUILD_JOBS} && \ cd build && ctest --output-on-failure ''' } post { always { sh 'ccache --show-stats' } } } */ stage('OPENMPTARGET-Clang') { agent { dockerfile { filename 'Dockerfile.openmptarget' dir 'scripts/docker' label 'nvidia-docker && volta' args '-v /tmp/ccache.kokkos:/tmp/ccache --env NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES' } } steps { sh 'ccache --zero-stats' sh '''rm -rf build && mkdir -p build && cd build && \ cmake \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DCMAKE_CXX_COMPILER=clang++ \ -DCMAKE_CXX_FLAGS="-Wno-unknown-cuda-version -Werror -Wno-undefined-internal -Wno-pass-failed" \ -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_4=ON \ -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ -DKokkos_ENABLE_TESTS=ON \ -DKokkos_ENABLE_BENCHMARKS=ON \ -DKokkos_ENABLE_TUNING=ON \ -DKokkos_ENABLE_OPENMPTARGET=ON \ -DKokkos_ARCH_VOLTA70=ON \ -DCMAKE_CXX_STANDARD=17 \ .. && \ make -j8 && ctest --verbose''' } post { always { sh 'ccache --show-stats' } } } stage('CUDA-11.0.3-Clang-Tidy') { agent { dockerfile { filename 'Dockerfile.kokkosllvmproject' dir 'scripts/docker' label 'nvidia-docker && volta' args '-v /tmp/ccache.kokkos:/tmp/ccache --env NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES' } } steps { sh 'ccache --zero-stats' sh '''rm -rf build && mkdir -p build && cd build && \ cmake \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_CXX_CLANG_TIDY="clang-tidy;-warnings-as-errors=*" \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_CXX_COMPILER=clang++ \ -DCMAKE_CXX_FLAGS="-Werror -Wno-unknown-cuda-version" \ -DCMAKE_CXX_STANDARD=17 \ -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_4=ON \ -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ -DKokkos_ENABLE_TESTS=ON \ -DKokkos_ENABLE_BENCHMARKS=ON \ -DKokkos_ENABLE_CUDA=ON \ -DKokkos_ENABLE_CUDA_LAMBDA=ON \ -DKokkos_ENABLE_TUNING=ON \ -DKokkos_ARCH_VOLTA70=ON \ .. && \ make -j8 && ctest --verbose''' } post { always { sh 'ccache --show-stats' } } } stage('CUDA-11.7-NVCC') { agent { dockerfile { filename 'Dockerfile.nvcc' dir 'scripts/docker' additionalBuildArgs '--build-arg BASE=nvidia/cuda:11.7.1-devel-ubuntu20.04' label 'nvidia-docker && volta' args '-v /tmp/ccache.kokkos:/tmp/ccache --env NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES' } } steps { sh 'ccache --zero-stats' sh '''rm -rf build && mkdir -p build && cd build && \ ../gnu_generate_makefile.bash \ --with-options=compiler_warnings \ --cxxflags="-Werror" \ --cxxstandard=c++17 \ --with-cuda \ --with-cuda-options=enable_lambda \ --arch=Volta70 \ && \ make test -j8''' } post { always { sh 'ccache --show-stats' } } } stage('CUDA-11.0-NVCC-RDC') { agent { dockerfile { filename 'Dockerfile.nvcc' dir 'scripts/docker' additionalBuildArgs '--build-arg BASE=nvidia/cuda:11.0.3-devel-ubuntu18.04 --build-arg ADDITIONAL_PACKAGES="g++-8 gfortran clang" --build-arg CMAKE_VERSION=3.17.3' label 'nvidia-docker' args '-v /tmp/ccache.kokkos:/tmp/ccache --env NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES' } } environment { OMP_NUM_THREADS = 8 // Nested OpenMP does not work for this configuration, // so disabling it OMP_MAX_ACTIVE_LEVELS = 1 OMP_PLACES = 'threads' OMP_PROC_BIND = 'spread' NVCC_WRAPPER_DEFAULT_COMPILER = 'g++-8' } steps { sh 'ccache --zero-stats' sh '''rm -rf install && mkdir -p install && \ rm -rf build && mkdir -p build && cd build && \ cmake \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_CXX_COMPILER=g++-8 \ -DCMAKE_CXX_FLAGS=-Werror \ -DCMAKE_CXX_STANDARD=17 \ -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_OPENMP=OFF \ -DKokkos_ENABLE_CUDA=ON \ -DKokkos_ENABLE_CUDA_LAMBDA=OFF \ -DKokkos_ENABLE_CUDA_UVM=ON \ -DKokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_4=ON \ -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ -DKokkos_ENABLE_IMPL_MDSPAN=ON \ -DCMAKE_INSTALL_PREFIX=${PWD}/../install \ .. && \ make -j8 install && \ cd .. && \ rm -rf build-tests && mkdir -p build-tests && cd build-tests && \ export CMAKE_PREFIX_PATH=${PWD}/../install && \ cmake \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_CXX_COMPILER=$WORKSPACE/bin/nvcc_wrapper \ -DCMAKE_CXX_FLAGS=-Werror --Werror=all-warnings -Xcudafe --diag_suppress=3159 \ -DCMAKE_CXX_STANDARD=17 \ -DKokkos_INSTALL_TESTING=ON \ .. && \ make -j8 && ctest --verbose && \ cd ../example/build_cmake_installed && \ rm -rf build && mkdir -p build && cd build && \ cmake \ -DCMAKE_CXX_COMPILER=g++-8 \ -DCMAKE_CXX_FLAGS=-Werror \ -DCMAKE_CXX_STANDARD=17 \ .. && \ make -j8 && ctest --verbose && \ cd ../.. && \ cmake -B build_cmake_installed_different_compiler/build -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CXX_FLAGS=-Werror -DCMAKE_CXX_STANDARD=17 build_cmake_installed_different_compiler && \ cmake --build build_cmake_installed_different_compiler/build --target all && \ cmake --build build_cmake_installed_different_compiler/build --target test''' } post { always { sh 'ccache --show-stats' } } } stage('CUDA-11.6-NVCC-DEBUG') { agent { dockerfile { filename 'Dockerfile.nvcc' dir 'scripts/docker' additionalBuildArgs '--build-arg BASE=nvidia/cuda:11.6.2-devel-ubuntu20.04' label 'nvidia-docker' args '-v /tmp/ccache.kokkos:/tmp/ccache --env NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES' } } steps { sh 'ccache --zero-stats' sh '''rm -rf build && mkdir -p build && cd build && \ cmake \ -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_CXX_COMPILER=$WORKSPACE/bin/nvcc_wrapper \ -DCMAKE_CXX_FLAGS=-Werror \ -DCMAKE_CXX_STANDARD=17 \ -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_DEBUG=ON \ -DKokkos_ENABLE_DEBUG_BOUNDS_CHECK=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_4=OFF \ -DKokkos_ENABLE_TESTS=ON \ -DKokkos_ENABLE_BENCHMARKS=ON \ -DKokkos_ENABLE_CUDA=ON \ -DKokkos_ENABLE_CUDA_LAMBDA=ON \ -DKokkos_ENABLE_LIBDL=OFF \ -DKokkos_ENABLE_IMPL_MDSPAN=ON \ -DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=OFF \ .. && \ make -j8 && ctest --verbose && \ cd ../example/build_cmake_in_tree && \ rm -rf build && mkdir -p build && cd build && \ cmake -DCMAKE_CXX_STANDARD=17 .. && make -j8 && ctest --verbose''' } post { always { sh 'ccache --show-stats' } } } stage('GCC-8.4.0') { agent { dockerfile { filename 'Dockerfile.gcc' dir 'scripts/docker' label 'docker' } } environment { OMP_NUM_THREADS = 8 OMP_NESTED = 'true' OMP_MAX_ACTIVE_LEVELS = 3 OMP_PROC_BIND = 'true' } steps { sh '''rm -rf build && mkdir -p build && cd build && \ cmake \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_CXX_STANDARD=17 \ -DCMAKE_CXX_FLAGS=-Werror \ -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_4=ON \ -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ -DKokkos_ENABLE_TESTS=ON \ -DKokkos_ENABLE_BENCHMARKS=ON \ -DKokkos_ENABLE_OPENMP=ON \ -DKokkos_ENABLE_LIBDL=OFF \ -DKokkos_ENABLE_LIBQUADMATH=ON \ -DKokkos_ENABLE_SERIAL=ON \ .. && \ make -j8 && ctest --verbose && gcc -I$PWD/../core/src/ ../core/unit_test/tools/TestCInterface.c''' } } } } } } kokkos-4.3.01/.jenkins_nightly000066400000000000000000000102751461675637500163310ustar00rootroot00000000000000pipeline { agent none options { timeout(time: 6, unit: 'HOURS') } stages { stage('Build') { parallel { stage('spack-serial') { agent { docker { image 'ubuntu:22.04' label 'docker' } } steps { sh ''' DEBIAN_FRONTEND=noninteractive && \ apt-get update && apt-get upgrade -y && apt-get install -y \ build-essential \ wget \ git \ bc \ python3-dev \ && \ apt-get clean && rm -rf /var/lib/apt/lists/* rm -rf spack && \ git clone https://github.com/spack/spack.git && \ . ./spack/share/spack/setup-env.sh && \ spack install kokkos@develop+tests && \ spack load cmake && \ spack test run kokkos && \ spack test results -l ''' } } stage('spack-cuda') { agent { docker { image 'nvidia/cuda:12.1.0-devel-ubuntu22.04' label 'nvidia-docker && ampere' } } steps { sh ''' DEBIAN_FRONTEND=noninteractive && \ apt-get update && apt-get upgrade -y && apt-get install -y \ build-essential \ wget \ git \ bc \ python3-dev \ gfortran \ && \ apt-get clean && rm -rf /var/lib/apt/lists/* rm -rf spack && \ git clone https://github.com/spack/spack.git && \ . ./spack/share/spack/setup-env.sh && \ spack install kokkos@develop+cuda+wrapper+tests cuda_arch=80 ^cuda@12.1.0 && \ spack load cmake && \ spack load kokkos-nvcc-wrapper && \ spack load cuda && \ spack load kokkos && \ spack test run kokkos && \ spack test results -l ''' } } stage('GCC-13') { agent { docker { image 'gcc:13.1' label 'docker' } } steps { sh ''' DEBIAN_FRONTEND=noninteractive && \ apt-get update && apt-get upgrade -y && apt-get install -y \ cmake \ && \ apt-get clean && rm -rf /var/lib/apt/lists/* mkdir -p build && cd build && \ cmake \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_CXX_STANDARD=23 \ -DCMAKE_CXX_FLAGS=-Werror \ -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_BENCHMARKS=ON \ -DKokkos_ENABLE_EXAMPLES=ON \ -DKokkos_ENABLE_TESTS=ON \ -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ -DKokkos_ENABLE_SERIAL=ON \ .. && \ make -j8 && ctest --verbose ''' } } } } } } kokkos-4.3.01/BUILD.md000066400000000000000000000130461461675637500143110ustar00rootroot00000000000000![Kokkos](https://avatars2.githubusercontent.com/u/10199860?s=200&v=4) # Installing and Using Kokkos ## Kokkos Philosophy Kokkos provides a modern CMake style build system. As C++ continues to develop for C++20 and beyond, CMake is likely to provide the most robust support for C++. Applications heavily leveraging Kokkos are strongly encouraged to use a CMake build system. You can either use Kokkos as an installed package (encouraged) or use Kokkos in-tree in your project. Modern CMake is exceedingly simple at a high-level (with the devil in the details). Once Kokkos is installed In your `CMakeLists.txt` simply use: ````cmake find_package(Kokkos REQUIRED) ```` Then for every executable or library in your project: ````cmake target_link_libraries(myTarget Kokkos::kokkos) ```` That's it! There is no checking Kokkos preprocessor, compiler, or linker flags. Kokkos propagates all the necessary flags to your project. This means not only is linking to Kokkos easy, but Kokkos itself can actually configure compiler and linker flags for *your* project. When configuring your project just set: ````bash > cmake ${srcdir} \ -DKokkos_ROOT=${kokkos_install_prefix} \ -DCMAKE_CXX_COMPILER=${compiler_used_to_build_kokkos} ```` Note: You may need the following if your project requires a minimum CMake version older than 3.12: ````cmake cmake_policy(SET CMP0074 NEW) ```` If building in-tree, there is no `find_package`. You can use `add_subdirectory(kokkos)` with the Kokkos source and again just link with `target_link_libraries(Kokkos::kokkos)`. The examples in `examples/cmake_build_installed` and `examples/cmake_build_in_tree` can help get you started. ## Configuring CMake A very basic installation of Kokkos is done with: ````bash > cmake ${srcdir} \ -DCMAKE_CXX_COMPILER=g++ \ -DCMAKE_INSTALL_PREFIX=${kokkos_install_folder} ```` which builds and installed a default Kokkos when you run `make install`. There are numerous device backends, options, and architecture-specific optimizations that can be configured, e.g. ````bash > cmake ${srcdir} \ -DCMAKE_CXX_COMPILER=g++ \ -DCMAKE_INSTALL_PREFIX=${kokkos_install_folder} \ -DKokkos_ENABLE_OPENMP=ON ```` which activates the OpenMP backend. All of the options controlling device backends, options, architectures, and third-party libraries (TPLs) are given below. Kokkos requires as a minimum C++17, however C++20 and C++23 are supported depending on the compiler. The latest minimum compiler versions can be found in `cmake/kokkos_compiler_id.cmake`. ## Known Issues ### Cray * The Cray compiler wrappers do static linking by default. This seems to break the Kokkos build. You will likely need to set the environment variable `CRAYPE_LINK_TYPE=dynamic` in order to link correctly. Kokkos warns during configure if this is missing. * The Cray compiler identifies to CMake as Clang, but it sometimes has its own flags that differ from Clang. We try to include all exceptions, but flag errors may occur in which a Clang-specific flag is passed that the Cray compiler does not recognize. ### Fortran * In a mixed C++/Fortran code, CMake will use the C++ linker by default. If you override this behavior and use Fortran as the link language, the link may break because Kokkos adds linker flags expecting the linker to be C++. Prior to CMake 3.18, Kokkos has no way of detecting in downstream projects that the linker was changed to Fortran. From CMake 3.18, Kokkos can use generator expressions to avoid adding flags when the linker is not C++. Note: Kokkos will not add any linker flags in this Fortran case. The user will be entirely on their own to add the appropriate linker flags. ## Spack An alternative to manually building with the CMake is to use the Spack package manager. Make sure you have downloaded [Spack](https://github.com/spack/spack). The easiest way to configure the Spack environment is: ````bash > source spack/share/spack/setup-env.sh ```` with other scripts available for other shells. You can display information about how to install packages with: ````bash > spack info kokkos A basic installation would be done as: ````bash > spack install kokkos ```` Spack allows options and and compilers to be tuned in the install command. ````bash > spack install kokkos@3.0 %gcc@7.3.0 +openmp ```` This example illustrates the three most common parameters to Spack: * Variants: specified with, e.g. `+openmp`, this activates (or deactivates with, e.g. `~openmp`) certain options. * Version: immediately following `kokkos` the `@version` can specify a particular Kokkos to build * Compiler: a default compiler will be chosen if not specified, but an exact compiler version can be given with the `%`option. For a complete list of Kokkos options, run: ````bash > spack info kokkos ```` More details can be found in the [Spack README](Spack.md) #### Spack Development Spack currently installs packages to a location determined by a unique hash. This hash name is not really "human readable". Generally, Spack usage should never really require you to reference the computer-generated unique install folder. If you must know, you can locate Spack Kokkos installations with: ````bash > spack find -p kokkos ... ```` where `...` is the unique spec identifying the particular Kokkos configuration and version. A better way to use Spack for doing Kokkos development is the dev-build feature of Spack. For dev-build details, consult the kokkos-spack repository [README](https://github.com/kokkos/kokkos-spack/blob/master/README.md). # Kokkos Keyword Listing Please refer to our [wiki](https://kokkos.github.io/kokkos-core-wiki/keywords.html#cmake-keywords). kokkos-4.3.01/CHANGELOG.md000066400000000000000000005615321461675637500147510ustar00rootroot00000000000000# CHANGELOG ## [4.3.01](https://github.com/kokkos/kokkos/tree/4.3.01) [Full Changelog](https://github.com/kokkos/kokkos/compare/4.3.00...4.3.01) ### Backend and Architecture Enhancements: #### HIP: * MI300 support unified memory support [\#6877](https://github.com/kokkos/kokkos/pull/6877) ### Bug Fixes * Serial: Use the provided execution space instance in TeamPolicy [\#6951](https://github.com/kokkos/kokkos/pull/6951) * `nvcc_wrapper`: bring back support for `--fmad` option [\#6931](https://github.com/kokkos/kokkos/pull/6931) * Fix CUDA reduction overflow for `RangePolicy` [\#6578](https://github.com/kokkos/kokkos/pull/6578) ## [4.3.00](https://github.com/kokkos/kokkos/tree/4.3.00) (2024-03-19) [Full Changelog](https://github.com/kokkos/kokkos/compare/4.2.01...4.3.00) ### Features: * Add `Experimental::sort_by_key(exec, keys, values)` algorithm [\#6801](https://github.com/kokkos/kokkos/pull/6801) ### Backend and Architecture Enhancements: #### CUDA: * Experimental multi-GPU support (from the same process) [\#6782](https://github.com/kokkos/kokkos/pull/6782) * Link against CUDA libraries even with KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE [\#6701](https://github.com/kokkos/kokkos/pull/6701) * Don't use the compiler launcher script if the CMake compile language is CUDA. [\#6704](https://github.com/kokkos/kokkos/pull/6704) * nvcc(wrapper): adding "long" and "short" versions for all flags [\#6615](https://github.com/kokkos/kokkos/pull/6615) #### HIP: * Fix compilation when using amdclang (with ROCm >= 5.7) and RDC [\#6857](https://github.com/kokkos/kokkos/pull/6857) * Use rocthrust for sorting, when available [\#6793](https://github.com/kokkos/kokkos/pull/6793) #### SYCL: * We only support OneAPI SYCL implementation: add check during initialization * Error out on initialization if the backend is different from `ext_oneapi_*` [\#6784](https://github.com/kokkos/kokkos/pull/6784) * Filter GPU devices for `ext_onapi_*` GPU devices [\#6758](https://github.com/kokkos/kokkos/pull/6784) * Performance Improvements * Avoid unnecessary zero-memset of the scratch flags in SYCL [\#6739](https://github.com/kokkos/kokkos/pull/6739) * Use host-pinned memory to copy reduction/scan result [\#6500](https://github.com/kokkos/kokkos/pull/6500) * Address deprecations after oneAPI 2023.2.0 [\#6577](https://github.com/kokkos/kokkos/pull/6739) * Make sure to call find_dependency for oneDPL if necessary [\#6870](https://github.com/kokkos/kokkos/pull/6870) #### OpenMPTarget: * Use LLVM extensions for dynamic shared memory [\#6380](https://github.com/kokkos/kokkos/pull/6380) * Guard scratch memory usage in ParallelReduce [\#6585 ](https://github.com/kokkos/kokkos/pull/6585) * Update linker flags for Intel GPUs update [\#6735](https://github.com/kokkos/kokkos/pull/6735) * Improve handling of printf on Intel GPUs [\#6652](https://github.com/kokkos/kokkos/pull/6652) #### OpenACC: * Add atomics support [\#6446](https://github.com/kokkos/kokkos/pull/6446) * Make the OpenACC backend asynchronous [\#6772](https://github.com/kokkos/kokkos/pull/6772) #### Threads: * Add missing broadcast to TeamThreadRange parallel_scan [\#6601](https://github.com/kokkos/kokkos/pull/6601) #### OpenMP: * Improve performance of view initializations and filling with zeros [\#6573](https://github.com/kokkos/kokkos/pull/6573) ### General Enhancements * Improve performance of random number generation when using a normal distribution on GPUs [\#6556](https://github.com/kokkos/kokkos/pull/6556) * Allocate temporary view with the user-provided execution space instance and do not initialize in `unique` algorithm [\#6598](https://github.com/kokkos/kokkos/pull/6598) * Add deduction guide for `Kokkos::Array` [\#6373](https://github.com/kokkos/kokkos/pull/6373) * Provide new public headers `` and `` [\#6687](https://github.com/kokkos/kokkos/pull/6687) * Fix/improvement to `remove_if` parallel algorithm: use the provided execution space instance for temporary allocations and drop unnecessaryinitialization + avoid evaluating twice the predicate during final pass [\#6747](https://github.com/kokkos/kokkos/pull/6747) * Add runtime function to query the number of devices and make device ID consistent with `KOKKOS_VISIBLE_DEVICES` [\#6713](https://github.com/kokkos/kokkos/pull/6713) * simd: support `vector_aligned_tag` [\#6243](https://github.com/kokkos/kokkos/pull/6243) * Avoid unnecessary allocation when default constructing Bitset [\#6524](https://github.com/kokkos/kokkos/pull/6524) * Fix constness for views in std algorithms [\#6813](https://github.com/kokkos/kokkos/pull/6813) * Improve error message on unsafe implicit conversion in MDRangePolicy [\#6855](https://github.com/kokkos/kokkos/pull/6855) * CTAD (deduction guides) for RangePolicy [\#6850](https://github.com/kokkos/kokkos/pull/6850) * CTAD (deduction guides) for MDRangePolicy [\#5516](https://github.com/kokkos/kokkos/pull/5516) ### Build System Changes * Require `Kokkos_ENABLE_ATOMICS_BYPASS` option to bypass atomic operation for Serial backend only builds [\#6692](https://github.com/kokkos/kokkos/pull/6692) * Add support for RISCV and the Milk-V's Pioneer [\#6773](https://github.com/kokkos/kokkos/pull/6773) * Add C++26 standard to CMake setup [\#6733](https://github.com/kokkos/kokkos/pull/6733) * Fix Makefile when using gnu_generate_makefile.sh and make >= 4.3 [\#6606](https://github.com/kokkos/kokkos/pull/6606) * Cuda: Fix configuring with CMake >= 3.28.4 - temporary fallback to internal CudaToolkit.cmake [\#6898](https://github.com/kokkos/kokkos/pull/6898) ### Incompatibilities (i.e. breaking changes) * Remove all `DEPRECATED_CODE_3` option and all code that was guarded by it [\#6523](https://github.com/kokkos/kokkos/pull/6523) * Drop guards to accommodate external code defining `KOKKOS_ASSERT` [\#6665](https://github.com/kokkos/kokkos/pull/6665) * `Profiling::ProfilingSection(std::string)` constructor marked explicit and nodiscard [\#6690](https://github.com/kokkos/kokkos/pull/6690) * Add bound check preconditions for `RangePolicy` and `MDRangePolicy` [\#6617](https://github.com/kokkos/kokkos/pull/6617) [\#6726](https://github.com/kokkos/kokkos/pull/6726) * Add checks for unsafe implicit conversions in RangePolicy [\#6754](https://github.com/kokkos/kokkos/pull/6754) * Remove Kokkos::[b]half_t volatile overloads [\#6579](https://github.com/kokkos/kokkos/pull/6579) * Remove KOKKOS_IMPL_DO_NOT_USE_PRINTF [\#6593](https://github.com/kokkos/kokkos/pull/6593) * Check matching static extents in View constructor [\#5190 ](https://github.com/kokkos/kokkos/pull/5190) * Tools(profiling): fix typo Kokkos_Tools_Optim[i]zationGoal [\#6642](https://github.com/kokkos/kokkos/pull/6642) * Remove variadic range policy constructor (disallow passing multiple trailing chunk size arguments) [\#6845](https://github.com/kokkos/kokkos/pull/6845) * Improve message on view out of bounds access and always abort [\#6861](https://github.com/kokkos/kokkos/pull/6861) * Drop `KOKKOS_ENABLE_INTEL_MM_ALLOC` macro [\#6797](https://github.com/kokkos/kokkos/pull/6797) * Remove `Kokkos::Experimental::LogicalMemorySpace` (without going through deprecation) [\#6557](https://github.com/kokkos/kokkos/pull/6557) * Remove `Experimental::HBWSpace` and support for linking against memkind [\#6791](https://github.com/kokkos/kokkos/pull/6791) * Drop librt TPL and associated `KOKKOS_ENABLE_LIBRT` macro [\#6798](https://github.com/kokkos/kokkos/pull/6798) * Drop support for old CPU architectures (`ARCH_BGQ`, `ARCH_POWER7`, `ARCH_WSM` and associated `ARCH_SSE4` macro) [\#6806](https://github.com/kokkos/kokkos/pull/6806) * Drop support for deprecated command-line arguments and environment variables [\#6744](https://github.com/kokkos/kokkos/pull/6744) ### Deprecations * Provide kokkos_swap as part of Core and deprecate Experimental::swap in Algorithms [\#6697](https://github.com/kokkos/kokkos/pull/6697) * Deprecate {Cuda,HIP}::detect_device_count() and Cuda::[detect_]device_arch() [\#6710](https://github.com/kokkos/kokkos/pull/6710) * Deprecate `ExecutionSpace::in_parallel()` [\#6582](https://github.com/kokkos/kokkos/pull/6582) ### Bug Fixes * Fix team-level MDRange reductions: [\#6511](https://github.com/kokkos/kokkos/pull/6511) * Fix CUDA and SYCL small value type (16-bit) team reductions [\#5334](https://github.com/kokkos/kokkos/pull/5334) * Enable `{transform_}exclusive_scan` in place [\#6667](https://github.com/kokkos/kokkos/pull/6667) * `fill_random` overload that do not take an execution space instance argument should fence [\#6658](https://github.com/kokkos/kokkos/pull/6658) * HIP,Cuda,OpenMPTarget: Fixup use provided execution space when copying host inaccessible reduction result [\#6777](https://github.com/kokkos/kokkos/pull/6777) * Fix typo in `cuda_func_set_attribute[s]_wrapper` preventing proper setting of desired occupancy [\#6786](https://github.com/kokkos/kokkos/pull/6786) * Avoid undefined behavior due to conversion between signed and unsigned integers in shift_{right, left}_team_impl [\#6821](https://github.com/kokkos/kokkos/pull/6821) * Fix a bug in Makefile.kokkos when using AMD GPU architectures as `AMD_GFXYYY` [\#6892](https://github.com/kokkos/kokkos/pull/6892) ## [4.2.01](https://github.com/kokkos/kokkos/tree/4.2.01) (2023-12-07) [Full Changelog](https://github.com/kokkos/kokkos/compare/4.2.00...4.2.01) ### Backend and Architecture Enhancements: #### CUDA: - Add warp sync for `parallel_reduce` to avoid race condition [\#6630](https://github.com/kokkos/kokkos/pull/6630), [\#6746](https://github.com/kokkos/kokkos/pull/6746) #### HIP: - Fix Graph "multiple definition of" linking error (missing `inline` specifier) [\#6624](https://github.com/kokkos/kokkos/pull/6624) - Add support for gfx940 (AMD Instinct MI300 GPU) [\#6671](https://github.com/kokkos/kokkos/pull/6671) ### Build System - CMake: Don't let Kokkos set `CMAKE_CXX_FLAGS` for Trilinos builds [\#6742](https://github.com/kokkos/kokkos/pull/6742) ### Bug Fixes - Remove deprecation warning for `AllocationMechanism` for GCC <11.0 [\#6653](https://github.com/kokkos/kokkos/pull/6653) - Fix bug early tools finalize with non-default host execution instances [\#6635](https://github.com/kokkos/kokkos/pull/6635) - Fix various issues for MSVC CUDA builds [\#6659](https://github.com/kokkos/kokkos/pull/6659) - Fix "extra `;`" warning with `-pedantic` flag in `` [\#6510](https://github.com/kokkos/kokkos/pull/6510) ## [4.2.00](https://github.com/kokkos/kokkos/tree/4.2.00) (2023-11-06) [Full Changelog](https://github.com/kokkos/kokkos/compare/4.1.00...4.2.00) ### Features: - SIMD: significant improvements to SIMD support and alignment with C++26 SIMD - add `Kokkos::abs` overload for SIMD types [\#6069](https://github.com/kokkos/kokkos/pull/6069) - add generator constructors [\#6347](https://github.com/kokkos/kokkos/pull/6347) - convert binary operators to hidden friends [\#6320](https://github.com/kokkos/kokkos/pull/6320) - add shift operators [\#6109](https://github.com/kokkos/kokkos/pull/6109) - add `float` support [\#6177](https://github.com/kokkos/kokkos/pull/6177) - add remaining `gather_from` and `scatter_to` overloads [\#6220](https://github.com/kokkos/kokkos/pull/6220) - define simd math function overloads in the Kokkos namespace [\#6465](https://github.com/kokkos/kokkos/pull/6465), [\#6487](https://github.com/kokkos/kokkos/pull/6487) - `Kokkos_ENABLE_NATIVE=ON` autodetects SIMD types supported [\#6188](https://github.com/kokkos/kokkos/pull/6188) - fix AVX2 SIMD support for ZEN2 AMD CPU [\#6238](https://github.com/kokkos/kokkos/pull/6238) - `Kokkos::printf` [\#6083](https://github.com/kokkos/kokkos/pull/6083) - `Kokkos::sort`: support custom comparator [\#6253](https://github.com/kokkos/kokkos/pull/6253) - `half_t` and `bhalf_t` numeric traits [\#5778](https://github.com/kokkos/kokkos/pull/5778) - `half_t` and `bhalf_t` mixed comparisons [\#6407](https://github.com/kokkos/kokkos/pull/6407) - `half_t` and `bhalf_t` mathematical functions [\#6124](https://github.com/kokkos/kokkos/pull/6124) - `TeamThreadRange` `parallel_scan` with return value [\#6090](https://github.com/kokkos/kokkos/pull/6090), [\#6301](https://github.com/kokkos/kokkos/pull/6301), [\#6302](https://github.com/kokkos/kokkos/pull/6302), [\#6303](https://github.com/kokkos/kokkos/pull/6303), [\#6307](https://github.com/kokkos/kokkos/pull/6307) - `ThreadVectorRange` `parallel_scan` with return value [\#6235](https://github.com/kokkos/kokkos/pull/6235), [\#6242](https://github.com/kokkos/kokkos/pull/6242), [\#6308](https://github.com/kokkos/kokkos/pull/6308), [\#6305](https://github.com/kokkos/kokkos/pull/6305), [\#6292](https://github.com/kokkos/kokkos/pull/6292) - Add team-level std algorithms [\#6200](https://github.com/kokkos/kokkos/pull/6200), [\#6205](https://github.com/kokkos/kokkos/pull/6205), [\#6207](https://github.com/kokkos/kokkos/pull/6207), [\#6208](https://github.com/kokkos/kokkos/pull/6208), [\#6209](https://github.com/kokkos/kokkos/pull/6209), [\#6210](https://github.com/kokkos/kokkos/pull/6210), [\#6211](https://github.com/kokkos/kokkos/pull/6211), [\#6212](https://github.com/kokkos/kokkos/pull/6212), [\#6213](https://github.com/kokkos/kokkos/pull/6213), [\#6256](https://github.com/kokkos/kokkos/pull/6256), [\#6258](https://github.com/kokkos/kokkos/pull/6258), [\#6350](https://github.com/kokkos/kokkos/pull/6350), [\#6351](https://github.com/kokkos/kokkos/pull/6351) - Serial: Allow for distinct execution space instances [\#6441](https://github.com/kokkos/kokkos/pull/6441) ### Backend and Architecture Enhancements: #### CUDA: - Fixed potential data race in Cuda `parallel_reduce` [\#6236](https://github.com/kokkos/kokkos/pull/6236) - Use `cudaMallocAsync` by default [\#6402](https://github.com/kokkos/kokkos/pull/6402) - Bugfix for using Kokkos from a thread of execution [\#6299](https://github.com/kokkos/kokkos/pull/6299) #### HIP: - New naming convention for AMD GPU: VEGA906, VEGA908, VEGA90A, NAVI1030 to AMD_GFX906, AMD_GFX908, AMD_GFX90A, AMD_GFX1030 [\#6266](https://github.com/kokkos/kokkos/pull/6266) - Add initial support for gfx942: [\#6358](https://github.com/kokkos/kokkos/pull/6358) - Improve reduction performance [\#6229](https://github.com/kokkos/kokkos/pull/6229) - Deprecate `HIP(hipStream_t,bool)` constructor [\#6401](https://github.com/kokkos/kokkos/pull/6401) - Add support for Graph [\#6370](https://github.com/kokkos/kokkos/pull/6370) - Improve reduction performance when using Teams [\#6284](https://github.com/kokkos/kokkos/pull/6284) - Fix concurrency calculation [\#6479](https://github.com/kokkos/kokkos/pull/6479) - Fix potential data race in HIP `parallel_reduce` [\#6429](https://github.com/kokkos/kokkos/pull/6429) #### SYCL: - Enforce external `sycl::queues` to be in-order [\#6246](https://github.com/kokkos/kokkos/pull/6246) - Improve reduction performance: [\#6272](https://github.com/kokkos/kokkos/pull/6272) [\#6271](https://github.com/kokkos/kokkos/pull/6271) [\#6270](https://github.com/kokkos/kokkos/pull/6270) [\#6264](https://github.com/kokkos/kokkos/pull/6264) - Allow using the SYCL execution space on AMD GPUs [\#6321](https://github.com/kokkos/kokkos/pull/6321) - Allow sorting via native oneDPL to support Views with stride=1 [\#6322](https://github.com/kokkos/kokkos/pull/6322) - Make in-order queues the default via macro [\#6189](https://github.com/kokkos/kokkos/pull/6189) #### OpenACC: - Support Clacc compiler [\#6250](https://github.com/kokkos/kokkos/pull/6250) ### General Enhancements - Add missing `is_*_view` traits and `is_*_view_v` helper variable templates for `DynRankView`, `DynamicView`, `OffsetView`, `ScatterView` containers [\#6195](https://github.com/kokkos/kokkos/pull/6195) - Make `nvcc_wrapper` and `compiler_launcher` scripts more portable by switching to a `#!/usr/bin/env` shebang [\#6357](https://github.com/kokkos/kokkos/pull/6357) - Add an improved `Kokkos::malloc` / `Kokkos::free` performance test [\#6377](https://github.com/kokkos/kokkos/pull/6377) - Ensure `Views` with `size==0` can be used with `deep_copy` [\#6273](https://github.com/kokkos/kokkos/pull/6273) - `Kokkos::abort` is moved to header `Kokkos_Abort.hpp` [\#6445](https://github.com/kokkos/kokkos/pull/6445) - `KOKKOS_ASSERT`, `KOKKOS_EXPECTS`, `KOKKOS_ENSURES` are moved to header `Kokkos_Assert.hpp` [\#6445](https://github.com/kokkos/kokkos/pull/6445) - Add a permuted-index mode to the gups benchmark [\#6378](https://github.com/kokkos/kokkos/pull/6378) - Check for overflow during backend initialization [\#6159](https://github.com/kokkos/kokkos/pull/6159) - Make constraints on `Kokkos::sort` more visible [\#6234](https://github.com/kokkos/kokkos/pull/6234) and cleanup API [\#6239](https://github.com/kokkos/kokkos/pull/6239) - Add converting assignment to `DualView`: [\#6474](https://github.com/kokkos/kokkos/pull/6474) ### Build System Changes - Export `Kokkos_CXX_COMPILER_VERSION` [\#6282](https://github.com/kokkos/kokkos/pull/6282) - Disable default oneDPL support in Trilinos [\#6342](https://github.com/kokkos/kokkos/pull/6342) ### Incompatibilities (i.e. breaking changes) - Ensure that `Kokkos::complex` only gets instantiated for cv-unqualified floating-point types [\#6251](https://github.com/kokkos/kokkos/pull/6251) - Removed (deprecated-3) support for volatile join operators in reductions [\#6385](https://github.com/kokkos/kokkos/pull/6385) - Enforce `ViewCtorArgs` restrictions for `create_mirror_view` [\#6304](https://github.com/kokkos/kokkos/pull/6304) - SIMD types for ARM NEON are not autodetected anymore but need `Kokkos_ARCH_ARM_NEON` or `Kokkos_ARCH_NATIVE=ON` [\#6394](https://github.com/kokkos/kokkos/pull/6394) - Remove `#include ` from headers where possible [\#6482](https://github.com/kokkos/kokkos/pull/6482) ### Deprecations - Deprecated `Kokkos::vector` [\#6252](https://github.com/kokkos/kokkos/pull/6252) - All host allocation mechanisms except for `STD_MALLOC` have been deprecated [\#6341](https://github.com/kokkos/kokkos/pull/6341) ### Bug Fixes - Missing memory fence in `RandomPool::free_state` functions [\#6290](https://github.com/kokkos/kokkos/pull/6290) - Fix for corner case in `Kokkos::Experimental::is_partitioned` algorithm [\#6257](https://github.com/kokkos/kokkos/pull/6257) - Fix initialization of scratch lock variables in the `Cuda` backend [\#6433](https://github.com/kokkos/kokkos/pull/6433) - Fixes for `Kokkos::Array` [\#6372](https://github.com/kokkos/kokkos/pull/6372) - Fixed symlink configure issue for Windows [\#6241](https://github.com/kokkos/kokkos/pull/6241) - OpenMPTarget init-join fix [\#6444](https://github.com/kokkos/kokkos/pull/6444) - Fix atomic operations bug for Min and Max [\#6435](https://github.com/kokkos/kokkos/pull/6435) - Fix implementation for `cyl_bessel_i0` [\#6484](https://github.com/kokkos/kokkos/pull/6484) - Fix various NVCC warnings in `BinSort`, `Array`, and bit manipulation function templates [\#6483](https://github.com/kokkos/kokkos/pull/6483) ## [4.1.00](https://github.com/kokkos/kokkos/tree/4.1.00) (2023-06-16) [Full Changelog](https://github.com/kokkos/kokkos/compare/4.0.01...4.1.00) ### Features: * Add `` header [\#4577](https://github.com/kokkos/kokkos/pull/4577) [\#5907](https://github.com/kokkos/kokkos/pull/5907) [\#5967](https://github.com/kokkos/kokkos/pull/5967) [\#6101](https://github.com/kokkos/kokkos/pull/6101) * Add `UnorderedMapInsertOpTypes` [\#5877](https://github.com/kokkos/kokkos/pull/5877) and documentation [\#350](https://github.com/kokkos/kokkos-core-wiki/pull/350) * Add multiple reducers support for team-level parallel reduce [\#5727](https://github.com/kokkos/kokkos/pull/5727) ### Backend and Architecture Enhancements: #### CUDA: * Allow NVCC 12 to compile using C++20 flag [\#5977](https://github.com/kokkos/kokkos/pull/5977) * Remove ability to disable CMake option `Kokkos_ENABLE_CUDA_LAMBDA` and unconditionally enable CUDA extended lambda support. [\#5964](https://github.com/kokkos/kokkos/pull/5964) * Drop unnecessary fences around the memory allocation when using `CudaUVMSpace` in views [\#6008](https://github.com/kokkos/kokkos/pull/6008) #### HIP: * Improve performance for `parallel_reduce`. Use different parameters for `LightWeight` kernels [\#6029](https://github.com/kokkos/kokkos/pull/6029) and [\#6160](https://github.com/kokkos/kokkos/pull/6160) #### SYCL: * Only pass one wrapper object in SYCL reductions [\#6047](https://github.com/kokkos/kokkos/pull/6047) * Improve and simplify parallel_scan implementation [\#6064](https://github.com/kokkos/kokkos/pull/6064) * Remove workaround for submit_barrier not being enqueued properly [\#5504](https://github.com/kokkos/kokkos/pull/5504) * Fix guards for using scratch space with SYCL [\#6003](https://github.com/kokkos/kokkos/pull/6003) * Fix compiling SYCL with KOKKOS_IMPL_DO_NOT_USE_PRINTF_USAGE [\#6219](https://github.com/kokkos/kokkos/pull/6219) #### OpenMPTarget: * Improve hierarchical parallelism for Intel architectures [\#6043](https://github.com/kokkos/kokkos/pull/6043) * Enable Cray compiler for the OpenMPTarget backend. [\#5889](https://github.com/kokkos/kokkos/pull/5889) #### HPX: * Update HPX backend to use HPX's sender/receiver functionality [\#5628](https://github.com/kokkos/kokkos/pull/5628) * Increase minimum required HPX version to 1.8.0 [\#6132](https://github.com/kokkos/kokkos/pull/6132) * Implement HPX::in_parallel [\#6143](https://github.com/kokkos/kokkos/pull/6143) ### General Enhancements * Export CMake `Kokkos_{CUDA,HIP}_ARCHITECTURES` variables [\#5919](https://github.com/kokkos/kokkos/pull/5919) [\#5925](https://github.com/kokkos/kokkos/pull/5925) * Add `Kokkos::Profiling::ScopedRegion` [\#5959](https://github.com/kokkos/kokkos/pull/5959) [\#5972](https://github.com/kokkos/kokkos/pull/5972) * Add support for `View::rank[_dynamic]()`[\#5870](https://github.com/kokkos/kokkos/pull/5870) * Detect incompatible relocatable device code mode to prevent ODR violations [\#5991](https://github.com/kokkos/kokkos/pull/5991) * Add (experimental) support for 32-bit Darwin and PPC [\#5916](https://github.com/kokkos/kokkos/pull/5916) * Add missing half and bhalf specialization of the infinity numeric trait [\#6055](https://github.com/kokkos/kokkos/pull/6055) * Add `is_dual_view` trait and align further with regular view [\#6120](https://github.com/kokkos/kokkos/pull/6120) * Allow templated functors in parallel_for, parallel_reduce and parallel_scan [\#5976](https://github.com/kokkos/kokkos/pull/5976) * Define KOKKOS_COMPILER_INTEL_LLVM and only define at most one KOKKOS_COMPILER* macro [\#5906](https://github.com/kokkos/kokkos/pull/5906) * Allow linking against build tree [\#6078](https://github.com/kokkos/kokkos/pull/6078) * Allow passing a temporary std::vector to partition_space [\#6167](https://github.com/kokkos/kokkos/pull/6167) * `Kokkos` can be used as an external dependency in `Trilinos` [\#6142](https://github.com/kokkos/kokkos/pull/6142), [\#6157](https://github.com/kokkos/kokkos/pull/6157) [\#6163](https://github.com/kokkos/kokkos/pull/6163) * Left align demangled stacktrace output [\#6191](https://github.com/kokkos/kokkos/pull/6191) * Improve OpenMP affinity warning to include MPI concerns [\#6185](https://github.com/kokkos/kokkos/pull/6185) ### Build System Changes * Drop `Kokkos_ENABLE_LAUNCH_COMPILER` option which had no effect [\#6148](https://github.com/kokkos/kokkos/pull/6148) * Export variables for relevant Kokkos options with cmake[\#6142](https://github.com/kokkos/kokkos/pull/6142) ### Incompatibilities (i.e. breaking changes) * Desul atomics always enabled [\#5801](https://github.com/kokkos/kokkos/pull/5801) * Drop `KOKKOS_ENABLE_CUDA_ASM*` and `KOKKOS_ENABLE_*_ATOMICS` macros [\#5940](https://github.com/kokkos/kokkos/pull/5940) * Drop `KOKKOS_ENABLE_RFO_PREFETCH` macro [\#5944](https://github.com/kokkos/kokkos/pull/5944) * Deprecate `Kokkos_ENABLE_CUDA_LAMBDA` configuration option and force it to `ON` [\#5964](https://github.com/kokkos/kokkos/pull/5964) * Remove TriBITS Kokkos subpackages [\#6104](https://github.com/kokkos/kokkos/pull/6104) * Cuda: Remove unused attach_texture_object [\#6129](https://github.com/kokkos/kokkos/pull/6129) * Drop Kokkos_ENABLE_PROFILING_LOAD_PRINT configuration option [\#6150](https://github.com/kokkos/kokkos/pull/6150) * Drop pointless Kokkos{Algorithms,Containers}_config.h files [\#6108](https://github.com/kokkos/kokkos/pull/6108) ### Deprecations * Deprecate `BinSort`, `BinOp1D`, and `BinOp3D` default constructors [\#6131](https://github.com/kokkos/kokkos/pull/6131) ### Bug Fixes * Fix `SYCLTeamMember` to take arguments for scratch sizes as `std::size_t` [\#5981](https://github.com/kokkos/kokkos/pull/5981) * Fix Kokkos_SIMD with AVX2 on 64-bit architectures [\#6075](https://github.com/kokkos/kokkos/pull/6075) * Fix an incorrectly returning size for SIMD uint64_t in AVX2 [\#6004](https://github.com/kokkos/kokkos/pull/6004) * Fix missing avx512 header file with gcc versions before 10 [\#6183](https://github.com/kokkos/kokkos/pull/6183) * Fix incorrect results of `parallel_reduce` of types smaller than `int` on CUDA and HIP: [\#5745](https://github.com/kokkos/kokkos/pull/5745) * CMake: update package compatibility mode when building within Trilinos [\#6012](https://github.com/kokkos/kokkos/pull/6012) * Fix warnings generated from internal uses of `ALL_t` rather than `Kokkos::ALL_t` [\#6028](https://github.com/kokkos/kokkos/pull/6028) * Fix bug in `hpcbind` script: check for correct Slurm variable [\#6116](https://github.com/kokkos/kokkos/pull/6116) * KokkosTools: Don't call callbacks before backends are initialized [\#6114](https://github.com/kokkos/kokkos/pull/6114) * Fix global fence in Kokkos::resize(DynRankView) [\#6184](https://github.com/kokkos/kokkos/pull/6184) * Fix `BinSort` support for strided views [\#6081](https://github.com/kokkos/kokkos/pull/6184) * Fix missing `is_*_view` traits in containers [\#6195](https://github.com/kokkos/kokkos/pull/6195) * Fix broken OpenMP target on NVHPC [\#6171](https://github.com/kokkos/kokkos/pull/6171) * Sorting an empty view should exit early and not fail [\#6130](https://github.com/kokkos/kokkos/pull/6130) ## [4.0.01](https://github.com/kokkos/kokkos/tree/4.0.01) (2023-04-14) [Full Changelog](https://github.com/kokkos/kokkos/compare/4.0.00...4.0.01) ### Backend and Architecture Enhancements: #### CUDA: - Allow NVCC 12 to compile using C++20 flag [\#6020](https://github.com/kokkos/kokkos/pull/6020) - Add CUDA Ada architecture support [\#6022](https://github.com/kokkos/kokkos/pull/6022) #### HIP: - Add support for AMDGPU target NAVI31 / RX 7900 XT(X): gfx1100 [\#6021](https://github.com/kokkos/kokkos/pull/6021) - HIP: Fix warning from `std::memcpy` [\#6019](https://github.com/kokkos/kokkos/pull/6019) #### SYCL: - Fix `SYCLTeamMember` to take arguments for scratch sizes as `std::size_t` [\#5986](https://github.com/kokkos/kokkos/pull/5986) ### General Enhancements - Fixup 4.0 change log [\#6023](https://github.com/kokkos/kokkos/pull/6023) ### Build System Changes - Cherry-pick TriBITS update from Trilinos [\#6037](https://github.com/kokkos/kokkos/pull/6037) - CMake: update package compatibility mode when building within Trilinos [\#6013](https://github.com/kokkos/kokkos/pull/6013) ### Bug Fixes - Fix an incorrectly returning size for SIMD uint64_t in AVX2 [\#6011](https://github.com/kokkos/kokkos/pull/6011) - Desul atomics: wrong value for `desul::Impl::numeric_limits_max` [\#6018](https://github.com/kokkos/kokkos/pull/6018) - Fix warning in some user code when using std::memcpy [\#6000](https://github.com/kokkos/kokkos/pull/6000) - Fix excessive build times using Makefile.kokkos [\#6068](https://github.com/kokkos/kokkos/pull/6068) ## [4.0.0](https://github.com/kokkos/kokkos/tree/4.0.00) (2023-02-21) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.7.01...4.0.00) ### Features: - Allow value types without default constructor in `Kokkos::View` with `Kokkos::WithoutInitializing` [\#5307](https://github.com/kokkos/kokkos/pull/5307) - `parallel_scan` with `View` as result type. [\#5146](https://github.com/kokkos/kokkos/pull/5146) - Introduced `SharedSpace`, an alias for a `MemorySpace` that is accessible by every `ExecutionSpace`. The memory is moved and then accessed locally. [\#5289](https://github.com/kokkos/kokkos/pull/5289) - Introduced `SharedHostPinnedSpace`, an alias for a `MemorySpace` that is accessible by every `ExecutionSpace`. The memory is pinned to the host and accessed via zero-copy access. [\#5405](https://github.com/kokkos/kokkos/pull/5405) - Add team- and thread-level `sort`, `sort_by_key` algorithms. [\#5317](https://github.com/kokkos/kokkos/pull/5317) - Groundwork for `MDSpan` integration. [\#4973](https://github.com/kokkos/kokkos/pull/4973) and [\#5304](https://github.com/kokkos/kokkos/pull/5304) - Introduced MD version of hierarchical parallelism: `TeamThreadMDRange`, `ThreadVectorMDRange` and `TeamVectorMDRange`. [\#5238](https://github.com/kokkos/kokkos/pull/5238) ### Backend and Architecture Enhancements: #### CUDA: - Allow CUDA PTX forward compatibility [\#3612](https://github.com/kokkos/kokkos/pull/3612) [\#5536](https://github.com/kokkos/kokkos/pull/5536) [\#5527](https://github.com/kokkos/kokkos/pull/5527) - Add support for NVIDIA Hopper GPU architecture [\#5538](https://github.com/kokkos/kokkos/pull/5538) - Don't rely on synchronization behavior of default stream in CUDA and HIP [\#5391](https://github.com/kokkos/kokkos/pull/5391) - Improve CUDA cache config settings [\#5706](https://github.com/kokkos/kokkos/pull/5706) #### HIP: - Move `HIP`, `HIPSpace`, `HIPHostPinnedSpace`, and `HIPManagedSpace` out of the `Experimental` namespace [\#5383](https://github.com/kokkos/kokkos/pull/5383) - Don't rely on synchronization behavior of default stream in CUDA and HIP [\#5391](https://github.com/kokkos/kokkos/pull/5391) - Export AMD architecture flag when using Trilinos [\#5528](https://github.com/kokkos/kokkos/pull/5528) - Fix linking error (see [OLCF issue](https://docs.olcf.ornl.gov/systems/crusher_quick_start_guide.html#olcfdev-1167-kokkos-build-failures-with-prgenv-amd)) when using `amdclang`: [\#5539](https://github.com/kokkos/kokkos/pull/5539) - Remove support for MI25 and added support for Navi 1030 [\#5522](https://github.com/kokkos/kokkos/pull/5522) - Fix race condition when using `HSA_XNACK=1` [\#5755](https://github.com/kokkos/kokkos/pull/5755) - Add parameter to force using GlobalMemory launch mechanism. This can be used when encountering compiler bugs with ROCm 5.3 and 5.4 [\#5796](https://github.com/kokkos/kokkos/pull/5796) #### SYCL: - Delegate choice of workgroup size for `parallel_reduce` with `RangePolicy` to the compiler. [\#5227](https://github.com/kokkos/kokkos/pull/5227) - SYCL `RangePolicy`: manually specify workgroup size through chunk size [\#4875](https://github.com/kokkos/kokkos/pull/4875) #### OpenMPTarget: - Select the right device [\#5492](https://github.com/kokkos/kokkos/pull/5492) #### OpenMP: - Add `partition_space` [\#5105](https://github.com/kokkos/kokkos/pull/5105) ### General Enhancements - Implement `OffsetView` constructor taking `pair`s and `ViewCtorProp` [\#5303](https://github.com/kokkos/kokkos/pull/5303) - Promote math constants to `Kokkos::numbers` namespace [\#5434](https://github.com/kokkos/kokkos/pull/5434) - Add overloads of `hypot` math function that take 3 arguments [\#5341](https://github.com/kokkos/kokkos/pull/5341) - Add `fma` fused multiply-add math function [\#5428](https://github.com/kokkos/kokkos/pull/5428) - Views using `MemoryTraits::Atomic` don't need `volatile` overloads for the value type anymore. [\#5455](https://github.com/kokkos/kokkos/pull/5455) - Added `is_team_handle` trait [\#5375](https://github.com/kokkos/kokkos/pull/5375) - Refactor desul atomics to support compiling CUDA with NVC++ [\#5431](https://github.com/kokkos/kokkos/pull/5431) [\#5497](https://github.com/kokkos/kokkos/pull/5497) [\#5498](https://github.com/kokkos/kokkos/pull/5498) - Support finding `libquadmath` with native compiler support [\#5286](https://github.com/kokkos/kokkos/pull/5286) - Add architecture flags for MSVC [\#5673](https://github.com/kokkos/kokkos/pull/5673) - SIMD backend for ARM NEON [\#5829](https://github.com/kokkos/kokkos/pull/5829) ### Build System Changes - Let CMake determine OpenMP flags. [\#4105](https://github.com/kokkos/kokkos/pull/4105) - Update minimum compiler versions. [\#5323](https://github.com/kokkos/kokkos/pull/5323) - Makefile and CMake support for C++23 [\#5283](https://github.com/kokkos/kokkos/pull/5283) - Do not add `-cuda` to the link line with NVHPC compiler when the CUDA backend is not actually enabled [\#5485](https://github.com/kokkos/kokkos/pull/5485) - Only add `-latomic` in generated GNU makefiles when OpenMPTarget backend is enabled [\#5501](https://github.com/kokkos/kokkos/pull/5501) [\#5537](https://github.com/kokkos/kokkos/pull/5537) (3.7 patch release candidate) - `Kokkos_ENABLE_CUDA_LAMBDA` now `ON` by default with NVCC [\#5580](https://github.com/kokkos/kokkos/pull/5580) - Fix enabling of relocatable device code when using CUDA as CMake language [\#5564](https://github.com/kokkos/kokkos/pull/5564) - Fix cmake configuration with CUDA 12 [\#5691](https://github.com/kokkos/kokkos/pull/5691) ### Incompatibilities (i.e. breaking changes) - ***Require C++17*** [\#5277](https://github.com/kokkos/kokkos/pull/5277) - Turn setting `Kokkos_CXX_STANDARD` into an error [\#5293](https://github.com/kokkos/kokkos/pull/5293) - Remove all deprecations in Kokkos 3 [\#5297](https://github.com/kokkos/kokkos/pull/5297) - Remove `KOKKOS_COMPILER_CUDA_VERSION` [\#5430](https://github.com/kokkos/kokkos/pull/5430) - Drop `reciprocal_overflow_threshold` numeric trait [\#5326](https://github.com/kokkos/kokkos/pull/5326) - Move `reduction_identity` out of `` into a new `` header [\#5450](https://github.com/kokkos/kokkos/pull/5450) - Reduction and scan routines will report an error if the `join()` operator they would use takes `volatile`-qualified parameters [\#5409](https://github.com/kokkos/kokkos/pull/5409) - `ENABLE_CUDA_UVM` is dropped in favor of using `SharedSpace` as `MemorySpace` explicitly [\#5608](https://github.com/kokkos/kokkos/pull/5608) - Remove Kokkos_ENABLE_CUDA_LDG_INTRINSIC option [\#5623](https://github.com/kokkos/kokkos/pull/5623) - Don't rely on synchronization behavior of default stream in CUDA and HIP - this potentially will break unintended implicit synchronization with other libraries such as MPI [\#5391](https://github.com/kokkos/kokkos/pull/5391) - Make ExecutionSpace::concurrency() a non-static member function [\#5655](https://github.com/kokkos/kokkos/pull/5655) and related PRs - Remove code guarded by `KOKKOS_ENABLE_DEPRECATED_CODE_3` ### Deprecations - Deprecate `CudaUVMSpace::available()` which always returned `true` [\#5614](https://github.com/kokkos/kokkos/pull/5614) - Deprecate `volatile`-qualified members from `Kokkos::pair` and `Kokkos::complex` [\#5412](https://github.com/kokkos/kokkos/pull/5412) - Deprecate `KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_*` macros [\#5824](https://github.com/kokkos/kokkos/pull/5824) (oversight in 3.6) ### Bug Fixes - Avoid allocating memory for `UniqueToken` [\#5300](https://github.com/kokkos/kokkos/pull/5300) - Fix `pragma ivdep` in `Kokkos_OpenMP_Parallel.hpp` [\#5356](https://github.com/kokkos/kokkos/pull/5356) - Fix configuring with Threads support when rerunning CMake [\#5486](https://github.com/kokkos/kokkos/pull/5486) - Fix View assignment between `LayoutLeft` and `LayoutRight` with static extents [\#5535](https://github.com/kokkos/kokkos/pull/5535) (3.7 patch release candidate) - Add `fence()` calls to sorting routine overloads that don't take an execution space parameter [\#5389](https://github.com/kokkos/kokkos/pull/5389) - `ClockTic` changed to 64 bit to fix overflow on Power [\#5577](https://github.com/kokkos/kokkos/pull/5577) (incl. in 3.7.01 patch release) - Fix incorrect offset in CUDA and HIP `parallel_scan` for < 4 byte types [\#5555](https://github.com/kokkos/kokkos/pull/5555) (3.7 patch release candidate) - Fix incorrect alignment behavior of scratch allocations in some corner cases (e.g. very small allocations) [\#5687](https://github.com/kokkos/kokkos/pull/5687) (3.7 patch release candidate) - Add missing `ReductionIdentity` specialization [\#5798](https://github.com/kokkos/kokkos/pull/5798) - Don't install standard algorithms headers multiple times [\#5670](https://github.com/kokkos/kokkos/pull/5670) - Fix max scratch size calculation for level 0 scratch in CUDA and HIP [\#5718](https://github.com/kokkos/kokkos/pull/5718) ## [3.7.02](https://github.com/kokkos/kokkos/tree/3.7.02) (2023-05-17) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.7.01...3.7.02) ### Backends and Archs Enhancements: #### CUDA - Add Hopper support and update nvcc_wrapper to work with CUDA-12 [\#5693](https://github.com/kokkos/kokkos/pull/5693) ### General Enhancements: - sprintf -> snprintf [\#5787](https://github.com/kokkos/kokkos/pull/5787) ### Build System: - Add error message when not using `hipcc` and when `CMAKE_CXX_STANDARD` is not set [\#5945](https://github.com/kokkos/kokkos/pull/5945) ### Bug Fixes: - Fix Scratch allocation alignment issues [\#5692](https://github.com/kokkos/kokkos/pull/5692) - Fix Intel Classic Compiler ICE [\#5710](https://github.com/kokkos/kokkos/pull/5710) - Don't install std algorithm headers multiple times [\#5711](https://github.com/kokkos/kokkos/pull/5711) - Fix static init order issue in InitalizationSettings [\#5721](https://github.com/kokkos/kokkos/pull/5721) - Fix src/dst Properties in deep_copy(DynamicView,View) [\#5732](https://github.com/kokkos/kokkos/pull/5732) - Fix build on Fedora Rawhide [\#5782](https://github.com/kokkos/kokkos/pull/5782) - Finalize HIP lock arrays [\#5694](https://github.com/kokkos/kokkos/pull/5694) - Fix CUDA lock arrays for current Desul [\#5812](https://github.com/kokkos/kokkos/pull/5812) - Set the correct device/context in InterOp tests [\#5701](https://github.com/kokkos/kokkos/pull/5701) ## [3.7.01](https://github.com/kokkos/kokkos/tree/3.7.01) (2022-12-01) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.7.00...3.7.01) ### Bug Fixes: - Add fences to all sorting routines not taking an execution space instance argument [\#5547](https://github.com/kokkos/kokkos/pull/5547) - Fix repeated `team_reduce` without barrier [\#5552](https://github.com/kokkos/kokkos/pull/5552) - Fix memory spaces in `create_mirror_view` overloads using `view_alloc` [\#5521](https://github.com/kokkos/kokkos/pull/5521) - Allow `as_view_of_rank_n()` to be overloaded for "special" scalar types [\#5553](https://github.com/kokkos/kokkos/pull/5553) - Fix warning calling a `__host__` function from a `__host__ __device__` from `View:: as_view_of_rank_n` [\#5591](https://github.com/kokkos/kokkos/pull/5591) - OpenMPTarget: adding implementation to set device id. [\#5557](https://github.com/kokkos/kokkos/pull/5557) - Use `Kokkos::atomic_load` to Correct Race Condition Giving Rise to Seg Faulting Error in OpenMP tests [\#5559](https://github.com/kokkos/kokkos/pull/5559) - cmake: define `KOKKOS_ARCH_A64FX` [\#5561](https://github.com/kokkos/kokkos/pull/5561) - Only link against libatomic in gnu-make OpenMPTarget build [\#5565](https://github.com/kokkos/kokkos/pull/5565) - Fix static extents assignment for LayoutLeft/LayoutRight assignment [\#5566](https://github.com/kokkos/kokkos/pull/5566) - Do not add -cuda to the link line with NVHPC compiler when the CUDA backend is not actually enabled [\#5569](https://github.com/kokkos/kokkos/pull/5569) - Export the flags in `KOKKOS_AMDGPU_OPTIONS` when using Trilinos [\#5571](https://github.com/kokkos/kokkos/pull/5571) - Add support for detecting MPI local rank with MPICH and PMI [\#5570](https://github.com/kokkos/kokkos/pull/5570) [\#5582](https://github.com/kokkos/kokkos/pull/5582) - Remove listing of undefined TPL dependencies [\#5573](https://github.com/kokkos/kokkos/pull/5573) - ClockTic changed to 64 bit to fix overflow on Power [\#5592](https://github.com/kokkos/kokkos/pull/5592) - Fix incorrect offset in CUDA and HIP parallel scan for < 4 byte types [\#5607](https://github.com/kokkos/kokkos/pull/5607) - Fix initialization of Cuda lock arrays [\#5622](https://github.com/kokkos/kokkos/pull/5622) ## [3.7.00](https://github.com/kokkos/kokkos/tree/3.7.00) (2022-08-22) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.6.01...3.7.00) ### Features: - Use non-volatile `join()` member functions and `operator+=` in `parallel_reduce/scan` [\#4931](https://github.com/kokkos/kokkos/pull/4931) [\#4954](https://github.com/kokkos/kokkos/pull/4954) [\#4951](https://github.com/kokkos/kokkos/pull/4951) - Add `SIMD` sub package (requires C++17) [\#5016](https://github.com/kokkos/kokkos/pull/5016) - Add `is_finalized()` [\#5247](https://github.com/kokkos/kokkos/pull/5247) - Promote mathematical functions from `namespace Kokkos::Experimental` to `namespace Kokkos` [\#4791](https://github.com/kokkos/kokkos/pull/4791) - Promote `min`, `max`, `clamp`, `minmax` functions from `namespace Kokkos::Experimental` to `namespace Kokkos` [\#5170](https://github.com/kokkos/kokkos/pull/5170) - Add `round`, `logb`, `nextafter`, `copysign`, and `signbit` math functions [\#4768](https://github.com/kokkos/kokkos/pull/4768) - Add `HIPManagedSpace`, similar to `CudaUVMSpace` [\#5112](https://github.com/kokkos/kokkos/pull/5112) - Accept view construction allocation properties in `create_mirror[_view,_view_and_copy]` and `resize/realloc` [\#5125](https://github.com/kokkos/kokkos/pull/5125) [\#5095](https://github.com/kokkos/kokkos/pull/5095) [\#5035](https://github.com/kokkos/kokkos/pull/5035) [\#4805](https://github.com/kokkos/kokkos/pull/4805) [\#4844](https://github.com/kokkos/kokkos/pull/4844) - Allow `MemorySpace::allocate()` to be called with execution space [\#4826](https://github.com/kokkos/kokkos/pull/4826) - Experimental: Compile time view subscriber [\#4197](https://github.com/kokkos/kokkos/pull/4197) ### Backends and Archs Enhancements: - Add support for Sapphire Rapids Intel architecture [\#5015](https://github.com/kokkos/kokkos/pull/5015) - Add support for ICX, SKL and ICL Intel architectures [\#5013](https://github.com/kokkos/kokkos/pull/5013) [\#4929](https://github.com/kokkos/kokkos/pull/4929) - Add arch flags for Intel GPU Ponte Vecchio [\#4932](https://github.com/kokkos/kokkos/pull/4932) - SYCL: require GPU if GPU architecture was set at configuration time (i.e. do not allow fallback to CPU device) [\#5264](https://github.com/kokkos/kokkos/pull/5264) [\#5222](https://github.com/kokkos/kokkos/pull/5222) - SYCL: Add `SYCL::sycl_queue()` for interoperability [\#5241](https://github.com/kokkos/kokkos/pull/5241) - SYCL: Loosen restriction for using built-in `sycl::group_broadcast` [\#4552](https://github.com/kokkos/kokkos/pull/4552) - SYCL: preserve address space [\#4396](https://github.com/kokkos/kokkos/pull/4396) - OpenMPTarget: Adding a workaound for team scan [\#5219](https://github.com/kokkos/kokkos/pull/5219) - OpenMPTarget: Adding logic to skip the kernel launch if `league_size=0` [\#5067](https://github.com/kokkos/kokkos/pull/5067) - OpenMPTarget: Make sure `Kokkos::abort()` causes abnormal program termination when called on the host-side [\#4808](https://github.com/kokkos/kokkos/pull/4808) - HIP: Make HIPHostPinnedSpace coarse-grained [\#5152](https://github.com/kokkos/kokkos/pull/5152) - Refactor OpenMP `parallel_for` implementation to use more native OpenMP constructs [\#4664](https://github.com/kokkos/kokkos/pull/4664) - Add option to optimize for local CPU architecture `Kokkos_ARCH_NATIVE` [\#4930](https://github.com/kokkos/kokkos/pull/4930) ### Implemented enhancements - Add command line argument/environment variable to print the configuration [\#5233](https://github.com/kokkos/kokkos/pull/5233) - Improve error message in view memory access violations [\#4950](https://github.com/kokkos/kokkos/pull/4950) - Remove unnecessary fences in View initialization [\#4823](https://github.com/kokkos/kokkos/pull/4823) - Make `View::shmem_size()` device-callable [\#4936](https://github.com/kokkos/kokkos/pull/4936) - Update numerics support for `__float128` [\#5081](https://github.com/kokkos/kokkos/pull/5081) - Add `log10` overload for `Kokkos::complex` [\#5009](https://github.com/kokkos/kokkos/pull/5009) - Add `[[nodiscard]]` to `ScopeGuard` [\#5224](https://github.com/kokkos/kokkos/pull/5224) - Add structured binding support for `Kokkos::Array` [\#4962](https://github.com/kokkos/kokkos/pull/4962) - Enable accessing `Kokkos::Array` elements in constant expressions [\#4916](https://github.com/kokkos/kokkos/pull/4916) - Mark `as_view_of_rank_n` as KOKKOS_FUNCTION [\#5248](https://github.com/kokkos/kokkos/pull/5248) - Cleanup/rework fence overloads [\#5148](https://github.com/kokkos/kokkos/pull/5148) - Assert that `Layout` construction from extents is valid in functions taking integer extents [\#5209](https://github.com/kokkos/kokkos/pull/5209) - Add `fill_random` overload that takes an execution space as first argument [\#5181](https://github.com/kokkos/kokkos/pull/5181) - Avoid some unnecessary fences in `parallel_reduce/scan` [\#5154](https://github.com/kokkos/kokkos/pull/5154) - Include `KOKKOS_ENABLE_LIBDL` in options when printing configuration [\#5086](https://github.com/kokkos/kokkos/pull/5086) - DynRankView: make `layout()` return the same as a corresponding static View [\#5026](https://github.com/kokkos/kokkos/pull/5026) - Use `_mm_malloc` for icpx [\#5012](https://github.com/kokkos/kokkos/pull/5012) - Avoid forcing matching execution spaces in `BinSort` constructor and `sort()` [\#4919](https://github.com/kokkos/kokkos/pull/4919) - Check number of bins in `BinSort` [\#4890](https://github.com/kokkos/kokkos/pull/4890) - Improve performance in parallel STL-like algorithms [\#4887](https://github.com/kokkos/kokkos/pull/4887) [\#4886](https://github.com/kokkos/kokkos/pull/4886) - Disable `memset` on A64FX and launch `parallel_for` instead (performance) [\#4884](https://github.com/kokkos/kokkos/pull/4884) - Allow non-power-of-two team sizes for team reductions and scans [\#4809](https://github.com/kokkos/kokkos/pull/4809) #### Harmonization of Kokkos execution environment initialization: - Warn when unable to detect local MPI rank and user explicitly asked for it [\#5263](https://github.com/kokkos/kokkos/pull/5263) - Refactor parsing of command line arguments and environment variables [\#5221](https://github.com/kokkos/kokkos/pull/5221) - Refactor device selection at initialization [\#5211](https://github.com/kokkos/kokkos/pull/5211) - Rename tools settings for consistency [\#5201](https://github.com/kokkos/kokkos/pull/5201) - Print help only once [\#5128](https://github.com/kokkos/kokkos/pull/5128) - Update precedence rule in initialization [\#5130](https://github.com/kokkos/kokkos/pull/5130) - Warn instead of just ignoring user settings when kokkos-tools is disabled [\#5088](https://github.com/kokkos/kokkos/pull/5088) - Drop numa args in threads backend initialization [\#5127](https://github.com/kokkos/kokkos/pull/5127) - Warn users when a flag prefixed with -[-]kokkos is not recognized and do not remove it [\#5256](https://github.com/kokkos/kokkos/pull/5256) - Give back to Core what belongs to Core (aka moving tune_internals option from Tools back to Core) [\#5202](https://github.com/kokkos/kokkos/pull/5202) #### Build system updates: - `nvcc_wrapper`: filter out -pedantic-errors from nvcc options [\#5235](https://github.com/kokkos/kokkos/pull/5235) - `nvcc_wrapper`: add known nvcc option --source-in-ptx [\#5052](https://github.com/kokkos/kokkos/pull/5052) - Link libdl as interface library [\#5179](https://github.com/kokkos/kokkos/pull/5179) - Only show GPU architectures with enabled corresponding backend [\#5119](https://github.com/kokkos/kokkos/pull/5119) - Enable optional external desul build [\#5021](https://github.com/kokkos/kokkos/pull/5021) [\#5132](https://github.com/kokkos/kokkos/pull/5132) - Export `Kokkos_CXX_STANDARD` variable with CMake [\#5068](https://github.com/kokkos/kokkos/pull/5068) - Suppress warnings with nvc++ [\#5031](https://github.com/kokkos/kokkos/pull/5031) - Disallow multiple host architectures in CMake [\#4996](https://github.com/kokkos/kokkos/pull/4996) - Do not include compiler warning flags in the compile option of the cmake target [\#4989](https://github.com/kokkos/kokkos/pull/4989) - AOT flags for OpenMPTarget targeting Intel GPUs [\#4915](https://github.com/kokkos/kokkos/pull/4915) - Repurpose `Kokkos_ARCH_INTEL_GEN` for SYCL to mean JIT to be conforming with OMPT [\#4894](https://github.com/kokkos/kokkos/pull/4894) - Replace amdgpu-target with offload-arch [\#4874](https://github.com/kokkos/kokkos/pull/4874) - Do not enable `kokkos_launch_compiler` when `CMAKE_CXX_COMPILER_LAUNCHER` is set [\#4870](https://github.com/kokkos/kokkos/pull/4870) - Move CMake version check up [\#4797](https://github.com/kokkos/kokkos/pull/4797) ### Incompatibilities: - Remove `KOKKOS_THREAD_LOCAL` [\#5064](https://github.com/kokkos/kokkos/pull/5064) - Remove `KOKKOS_ENABLE_POSIX_MEMALIGN` [\#5011](https://github.com/kokkos/kokkos/pull/5011) - Remove unused `KOKKOS_ENABLE_TM` [\#4995](https://github.com/kokkos/kokkos/pull/4995) - Remove unused cmakedefine `KOKKOS_ENABLE_COMPILER_WARNINGS` [\#4883](https://github.com/kokkos/kokkos/pull/4883) - Remove unused `KOKKOS_ENABLE_DUALVIEW_MODIFY_CHECK` [\#4882](https://github.com/kokkos/kokkos/pull/4882) - Drop Instruction Set Architecture (ISA) macros [\#4981](https://github.com/kokkos/kokkos/pull/4981) - Warn in `ScopeGuard` about illegal usage [\#5250](https://github.com/kokkos/kokkos/pull/5250) ### Deprecations: - Guard against non-public header inclusion [\#5178](https://github.com/kokkos/kokkos/pull/5178) - Raise deprecation warnings if non empty WorkTag class is used [\#5230](https://github.com/kokkos/kokkos/pull/5230) - Deprecate `parallel_*` overloads taking the label as trailing argument [\#5141](https://github.com/kokkos/kokkos/pull/5141) - Deprecate nested types in functional [\#5185](https://github.com/kokkos/kokkos/pull/5185) - Deprecate `InitArguments` struct and replace it with `InitializationSettings` [\#5135](https://github.com/kokkos/kokkos/pull/5135) - Deprecate `finalize_all()` [\#5134](https://github.com/kokkos/kokkos/pull/5134) - Deprecate command line arguments (other than `--help`) that are not prefixed with `kokkos-*` [\#5120](https://github.com/kokkos/kokkos/pull/5120) - Deprecate `--[kokkos-]numa` cmdline arg and `KOKKOS_NUMA` env var [\#5117](https://github.com/kokkos/kokkos/pull/5117) - Deprecate `--[kokkos-]threads` command line argument in favor of `--[kokkos-]num-threads` [\#5111](https://github.com/kokkos/kokkos/pull/5111) - Deprecate `Kokkos::is_reducer_type` [\#4957](https://github.com/kokkos/kokkos/pull/4957) - Deprecate `OffsetView` constructors taking `index_list_type` [\#4810](https://github.com/kokkos/kokkos/pull/4810) - Deprecate overloads of `Kokkos::sort` taking a parameter `bool always_use_kokkos_sort` [\#5382](https://github.com/kokkos/kokkos/issues/5382) - Warn about `parallel_reduce` cases that call `join()` with volatile-qualified arguments [\#5215](https://github.com/kokkos/kokkos/pull/5215) ### Bug Fixes: - CUDA Reductions: Fix data races reported by Nvidia `compute-sanitizer` [\#4855](https://github.com/kokkos/kokkos/pull/4855) - Work around Intel compiler bug [\#5301](https://github.com/kokkos/kokkos/pull/5301) - Avoid allocating memory for UniqueToken [\#5300](https://github.com/kokkos/kokkos/pull/5300) - DynamicView: Properly resize mirror instances after construction [\#5276](https://github.com/kokkos/kokkos/pull/5276) - Remove Kokkos::Rank limit of 6 ranks [\#5271](https://github.com/kokkos/kokkos/pull/5271) - Do not forget to set last element to nullptr when removing a flag in `Kokkos::initialize` [\#5272](https://github.com/kokkos/kokkos/pull/5272) - Fix CUDA+MSVC build issue [\#5261](https://github.com/kokkos/kokkos/pull/5261) - Fix `DynamicView::resize_serial` [\#5220](https://github.com/kokkos/kokkos/pull/5220) - Fix cmake default compiler flags for unknown compiler [\#5217](https://github.com/kokkos/kokkos/pull/5217) - Fix `move_backward` [\#5191](https://github.com/kokkos/kokkos/pull/5191) - Fixing issue 5196 - missing symbol with intel compiler [\#5207](https://github.com/kokkos/kokkos/pull/5207) - Preserve `KOKKOS_INVALID_INDEX` in ViewDimension and ArrayLayout construction [\#5188](https://github.com/kokkos/kokkos/pull/5188) - Finalize `deep_copy_space` early avoiding printing to `std::cerr` for Cuda [\#5151](https://github.com/kokkos/kokkos/pull/5151) - Use correct policy in Threads MDRange `parallel_reduce` [\#5123](https://github.com/kokkos/kokkos/pull/5123) - Fix building with NVCC as the CXX compiler while the CUDA backend is not enabled [\#5115](https://github.com/kokkos/kokkos/pull/5115) - OpenMPTarget Index range fix for MDRange. [\#5089](https://github.com/kokkos/kokkos/pull/5089) - Fix bug with CUDA's team reduction for empty ranges [\#5079](https://github.com/kokkos/kokkos/pull/5079) - Fix using `ZeroMemset` for Serial [\#5077](https://github.com/kokkos/kokkos/pull/5077) - Fix `Kokkos::Vector::push_back` for default execution space [\#5047](https://github.com/kokkos/kokkos/pull/5047) - ScatterView: Fix ScatterMin/ScatterMax to use proper atomics [\#5045](https://github.com/kokkos/kokkos/pull/5045) - Fix calling `ZeroMemset` in `deep_copy` [\#5040](https://github.com/kokkos/kokkos/pull/5040) - Make View self-assignment not produce double-free [\#5024](https://github.com/kokkos/kokkos/pull/5024) - Guard against unrecognized pragma with intel compilers [\#5019](https://github.com/kokkos/kokkos/pull/5019) - Fix racing condition in `HIPParallelLaunch` [\#5008](https://github.com/kokkos/kokkos/pull/5008) - KokkosP: Fix `device_id` in profiling [\#4997](https://github.com/kokkos/kokkos/pull/4997) - Fix for `Kokkos::vector::insert` into empty vector with begin and end iterators [\#4988](https://github.com/kokkos/kokkos/pull/4988) - Fix Core header files installation [\#4984](https://github.com/kokkos/kokkos/pull/4984) - Fix bounds errors with `Kokkos::sort` [\#4980](https://github.com/kokkos/kokkos/pull/4980) - Fixup let `RangePolicy::set_chunk_size` return a reference to self [\#4918](https://github.com/kokkos/kokkos/pull/4918) - Fix allocating large Views [\#4907](https://github.com/kokkos/kokkos/pull/4907) - Fix combined reductions with `Kokkos::View` [\#4896](https://github.com/kokkos/kokkos/pull/4896) - Fixed `_CUDA_ARCH__` to `__CUDA_ARCH__` for CUDA LDG [\#4893](https://github.com/kokkos/kokkos/pull/4893) - Fixup `View::access()` truncate parameter pack [\#4876](https://github.com/kokkos/kokkos/pull/4876) - Fix `abort` with HIP backend for ROCm 5.0.2 and beyond [\#4873](https://github.com/kokkos/kokkos/pull/4873) - Fix HIP version when printing the configuration [\#4872](https://github.com/kokkos/kokkos/pull/4872) - Fix scratch lock array when using scratch level 1 [\#4871](https://github.com/kokkos/kokkos/pull/4871) - Fix Makefile.kokkos to work with fujitsu compiler [\#4867](https://github.com/kokkos/kokkos/pull/4867) - cmake: Correct link THREADS link option [\#4854](https://github.com/kokkos/kokkos/pull/4854) - UniqueToken `impl_acquire` function should be device only [\#4819](https://github.com/kokkos/kokkos/pull/4819) - Fix example calls to non existing static `print_configuration` [\#4806](https://github.com/kokkos/kokkos/pull/4806) - Fix requests for large team scratch sizes [\#4728](https://github.com/kokkos/kokkos/pull/4728) ## [3.6.01](https://github.com/kokkos/kokkos/tree/3.6.01) (2022-05-23) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.6.00...3.6.01) ### Bug Fixes: - Fix Threads: Fix serial resizing scratch space (3.6.01 cherry-pick) [\#5109](https://github.com/kokkos/kokkos/pull/5109) - Fix ScatterMin/ScatterMax to use proper atomics (3.6.01 cherry-pick) [\#5046](https://github.com/kokkos/kokkos/pull/5046) - Fix allocating large Views [\#4907](https://github.com/kokkos/kokkos/pull/4907) - Fix bounds errors with Kokkos::sort [\#4980](https://github.com/kokkos/kokkos/pull/4980) - Fix HIP version when printing the configuration [\#4872](https://github.com/kokkos/kokkos/pull/4872) - Fixed `_CUDA_ARCH__` to `__CUDA_ARCH__` for CUDA LDG [\#4893](https://github.com/kokkos/kokkos/pull/4893) - Fixed an incorrect struct initialization [\#5028](https://github.com/kokkos/kokkos/pull/5028) - Fix racing condition in `HIPParallelLaunch` [\#5008](https://github.com/kokkos/kokkos/pull/5008) - Avoid deprecation warnings with `OpenMPExec::validate_partition` [\#4982](https://github.com/kokkos/kokkos/pull/4982) - Make View self-assignment not produce double-free [\#5024](https://github.com/kokkos/kokkos/pull/5024) ## [3.6.00](https://github.com/kokkos/kokkos/tree/3.6.00) (2022-02-18) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.5.00...3.6.00) ### Features: - Add C++ standard algorithms [\#4315](https://github.com/kokkos/kokkos/pull/4315) - Implement `fill_random` for `DynRankView` [\#4763](https://github.com/kokkos/kokkos/pull/4763) - Add `bhalf_t` [\#4543](https://github.com/kokkos/kokkos/pull/4543) [\#4653](https://github.com/kokkos/kokkos/pull/4653) - Add mathematical constants [\#4519](https://github.com/kokkos/kokkos/pull/4519) - Allow `Kokkos::{create_mirror*,resize,realloc}` to be used with `WithoutInitializing` [\#4486](https://github.com/kokkos/kokkos/pull/4486) [\#4337](https://github.com/kokkos/kokkos/pull/4337) - Implement `KOKKOS_IF_ON_{HOST,DEVICE}` macros [\#4660](https://github.com/kokkos/kokkos/pull/4660) - Allow setting the CMake language for Kokkos [\#4323](https://github.com/kokkos/kokkos/pull/4323) #### Perf bug fix - Desul: Add ScopeCaller [\#4690](https://github.com/kokkos/kokkos/pull/4690) - Enable Desul atomics by default when using Makefiles [\#4606](https://github.com/kokkos/kokkos/pull/4606) - Unique token improvement [\#4741](https://github.com/kokkos/kokkos/pull/4741) [\#4748](https://github.com/kokkos/kokkos/pull/4748) #### Other improvements: - Add math function long double overload on the host side [\#4712](https://github.com/kokkos/kokkos/pull/4712) ### Deprecations: - Array reductions with pointer return types [\#4756](https://github.com/kokkos/kokkos/pull/4756) - Deprecate `partition_master`, `validate_partition` [\#4737](https://github.com/kokkos/kokkos/pull/4737) - Deprecate `Kokkos_ENABLE_PTHREAD` in favor of `Kokkos_ENABLE_THREADS` [\#4619](https://github.com/kokkos/kokkos/pull/4619) ** pair with use std::threads ** - Deprecate `log2(unsigned) -> int` (removing in next release) [\#4595](https://github.com/kokkos/kokkos/pull/4595) - Deprecate `Kokkos::Impl::is_view` [\#4592](https://github.com/kokkos/kokkos/pull/4592) - Deprecate `KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_*` macros and the `ActiveExecutionMemorySpace` alias [\#4668](https://github.com/kokkos/kokkos/issues/4668) ### Backends and Archs Enhancements: #### SYCL: - Update required SYCL compiler version [\#4749](https://github.com/kokkos/kokkos/pull/4749) - Cap vector size to kernel maximum for SYCL [\#4704](https://github.com/kokkos/kokkos/pull/4704) - Improve check for compatibility of vector size and subgroup size in SYCL [\#4579](https://github.com/kokkos/kokkos/pull/4579) - Provide `chunk_size` for SYCL [\#4635](https://github.com/kokkos/kokkos/pull/4635) - Use host-pinned memory for SYCL kernel memory [\#4627](https://github.com/kokkos/kokkos/pull/4627) - Use shuffle-based algorithm for scalar reduction [\#4608](https://github.com/kokkos/kokkos/pull/4608) - Implement pool of USM IndirectKernelMemory [\#4596](https://github.com/kokkos/kokkos/pull/4596) - Provide valid default team size for SYCL [\#4481](https://github.com/kokkos/kokkos/pull/4481) #### CUDA: - Add checks for shmem usage in `parallel_reduce` [\#4548](https://github.com/kokkos/kokkos/pull/4548) #### HIP: - Add support for fp16 in the HIP backend [\#4688](https://github.com/kokkos/kokkos/pull/4688) - Disable multiple kernel instantiations when using HIP (configure with `-DKokkos_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS=ON` to use) [\#4644](https://github.com/kokkos/kokkos/pull/4644) - Fix HIP scratch use per instance [\#4439](https://github.com/kokkos/kokkos/pull/4439) - Change allocation header to 256B alignment for AMD VEGA architecture [\#4753](https://github.com/kokkos/kokkos/pull/4753) - Add generic `KOKKOS_ARCH_VEGA` macro [\#4782](https://github.com/kokkos/kokkos/pull/4782) - Require ROCm 4.5 [\#4689](https://github.com/kokkos/kokkos/pull/4689) ### HPX: - Adapt to HPX 1.7.0 which is now required [\#4241](https://github.com/kokkos/kokkos/pull/4241) #### OpenMP: - Fix thread deduction for OpenMP for `thread_count==0` [\#4541](https://github.com/kokkos/kokkos/pull/4541) #### OpenMPTarget: - Update memory space `size_type` to improve performance (`size_t -> unsigned`) [\#4779](https://github.com/kokkos/kokkos/pull/4779) #### Other Improvements: - Improve NVHPC support [\#4599](https://github.com/kokkos/kokkos/pull/4599) - Add `Kokkos::Experimental::{min,max,minmax,clamp}` [\#4629](https://github.com/kokkos/kokkos/pull/4629) [\#4506](https://github.com/kokkos/kokkos/pull/4506) - Use device type as template argument in Containers and Algorithms [\#4724](https://github.com/kokkos/kokkos/pull/4724) [\#4675](https://github.com/kokkos/kokkos/pull/4675) - Implement `Kokkos::sort` with execution space [\#4490](https://github.com/kokkos/kokkos/pull/4490) - `Kokkos::resize` always error out for mismatch in runtime rank [\#4681](https://github.com/kokkos/kokkos/pull/4681) - Print current call stack when calling `Kokkos::abort()` from the host [\#4672](https://github.com/kokkos/kokkos/pull/4672) [\#4671](https://github.com/kokkos/kokkos/pull/4671) - Detect mismatch of execution spaces in functors [\#4655](https://github.com/kokkos/kokkos/pull/4655) - Improve view label access on host [\#4647](https://github.com/kokkos/kokkos/pull/4647) - Error out for `const` scalar return type in reduction [\#4645](https://github.com/kokkos/kokkos/pull/4645) - Don't allow calling `UnorderdMap::value_at` for a set [\#4639](https://github.com/kokkos/kokkos/pull/4639) - Add `KOKKOS_COMPILER_NVHPC` macro, disable `quiet_NaN` and `signaling_NaN` [\#4586](https://github.com/kokkos/kokkos/pull/4586) - Improve performance of `local_deep_copy` [\#4511](https://github.com/kokkos/kokkos/pull/4511) - Improve performance when sorting integers [\#4464](https://github.com/kokkos/kokkos/pull/4464) - Add missing numeric traits (`denorm_min`, `reciprocal_overflow_threshold`, `{quiet,silent}_NaN}`) and make them work on cv-qualified types [\#4466](https://github.com/kokkos/kokkos/pull/4466) [\#4415](https://github.com/kokkos/kokkos/pull/4415) [\#4473](https://github.com/kokkos/kokkos/pull/4473) [\#4443](https://github.com/kokkos/kokkos/pull/4443) ### Implemented enhancements BuildSystem - Manually compute IntelLLVM compiler version for older CMake versions [\#4760](https://github.com/kokkos/kokkos/pull/4760) - Add Xptxas without = to `nvcc_wrapper` [\#4646](https://github.com/kokkos/kokkos/pull/4646) - Use external GoogleTest optionally [\#4563](https://github.com/kokkos/kokkos/pull/4563) - Silent warnings about multiple optimization flags with `nvcc_wrapper` [\#4502](https://github.com/kokkos/kokkos/pull/4502) - Use the same flags in Makefile.kokkos for POWER7/8/9 as for CMake [\#4483](https://github.com/kokkos/kokkos/pull/4483) - Fix support for A64FX architecture [\#4745](https://github.com/kokkos/kokkos/pull/4745) ### Incompatibilities: - Drop `KOKKOS_ARCH_HIP` macro when using generated GNU makefiles [\#4786](https://github.com/kokkos/kokkos/pull/4786) - Remove gcc-toolchain auto add for clang in Makefile.kokkos [\#4762](https://github.com/kokkos/kokkos/pull/4762) ### Bug Fixes: - Lock constant memory in Cuda/HIP kernel launch with a mutex (thread safety) [\#4525](https://github.com/kokkos/kokkos/pull/4525) - Fix overflow for large requested scratch allocation [\#4551](https://github.com/kokkos/kokkos/pull/4551) - Fix Windows build in mingw [\#4564](https://github.com/kokkos/kokkos/pull/4564) - Fix `kokkos_launch_compiler`: escape `$` character [\#4769](https://github.com/kokkos/kokkos/pull/4769) [\#4703](https://github.com/kokkos/kokkos/pull/4703) - Fix math functions with NVCC and GCC 5 as host compiler [\#4733](https://github.com/kokkos/kokkos/pull/4733) - Fix shared build with Intel19 [\#4725](https://github.com/kokkos/kokkos/pull/4725) - Do not install empty `desul/src/` directory [\#4714](https://github.com/kokkos/kokkos/pull/4714) - Fix wrong `device_id` computation in `identifier_from_devid` (Profiling Interface) [\#4694](https://github.com/kokkos/kokkos/pull/4694) - Fix a bug in CUDA scratch memory pool (abnormally high memory consumption) [\#4673](https://github.com/kokkos/kokkos/pull/4673) - Remove eval of command args in `hpcbind` [\#4630](https://github.com/kokkos/kokkos/pull/4630) - SYCL fix to run when no GPU is detected [\#4623](https://github.com/kokkos/kokkos/pull/4623) - Fix `layout_strides::span` for rank-0 views [\#4605](https://github.com/kokkos/kokkos/pull/4605) - Fix SYCL atomics for local memory [\#4585](https://github.com/kokkos/kokkos/pull/4585) - Hotfix `mdrange_large_deep_copy` for SYCL [\#4581](https://github.com/kokkos/kokkos/pull/4581) - Fix bug when sorting integer using the HIP backend [\#4570](https://github.com/kokkos/kokkos/pull/4570) - Fix compilation error when using HIP with RDC [\#4553](https://github.com/kokkos/kokkos/pull/4553) - `DynamicView`: Fix deallocation extent [\#4533](https://github.com/kokkos/kokkos/pull/4533) - SYCL fix running parallel_reduce with TeamPolicy for large ranges [\#4532](https://github.com/kokkos/kokkos/pull/4532) - Fix bash syntax error in `nvcc_wrapper` [\#4524](https://github.com/kokkos/kokkos/pull/4524) - OpenMPTarget `team_policy` reduce fixes for `init/join` reductions [\#4521](https://github.com/kokkos/kokkos/pull/4521) - Avoid hangs in the Threads backend [\#4499](https://github.com/kokkos/kokkos/pull/4499) - OpenMPTarget fix reduction bug in `parallel_reduce` for `TeamPolicy` [\#4491](https://github.com/kokkos/kokkos/pull/4491) - HIP fix scratch space per instance [\#4439](https://github.com/kokkos/kokkos/pull/4439) - OpenMPTarget fix team scratch allocation [\#4431](https://github.com/kokkos/kokkos/pull/4431) ## [3.5.00](https://github.com/kokkos/kokkos/tree/3.5.00) (2021-10-19) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.4.01...3.5.00) ### Features: - Add support for quad-precision math functions/traits [\#4098](https://github.com/kokkos/kokkos/pull/4098) - Adding ExecutionSpace partitioning function [\#4096](https://github.com/kokkos/kokkos/pull/4096) - Improve Python Interop Capabilities [\#4065](https://github.com/kokkos/kokkos/pull/4065) - Add half_t Kokkos::rand specialization [\#3922](https://github.com/kokkos/kokkos/pull/3922) - Add math special functions: erf, erfcx, expint1, Bessel functions, Hankel functions [\#3920](https://github.com/kokkos/kokkos/pull/3920) - Add missing common mathematical functions [\#4043](https://github.com/kokkos/kokkos/pull/4043) [\#4036](https://github.com/kokkos/kokkos/pull/4036) [\#4034](https://github.com/kokkos/kokkos/pull/4034) - Let the numeric traits be SFINAE-friendly [\#4038](https://github.com/kokkos/kokkos/pull/4038) - Add Desul atomics - enabling memory-order and memory-scope parameters [\#3247](https://github.com/kokkos/kokkos/pull/3247) - Add detection idiom from the C++ standard library extension version 2 [\#3980](https://github.com/kokkos/kokkos/pull/3980) - Fence Profiling Support in all backends [\#3966](https://github.com/kokkos/kokkos/pull/3966) [\#4304](https://github.com/kokkos/kokkos/pull/4304) [\#4258](https://github.com/kokkos/kokkos/pull/4258) [\#4232](https://github.com/kokkos/kokkos/pull/4232) - Significant SYCL enhancements (see below) ### Deprecations: - Deprecate CUDA_SAFE_CALL and HIP_SAFE_CALL [\#4249](https://github.com/kokkos/kokkos/pull/4249) - Deprecate Kokkos::Impl::Timer (Kokkos::Timer has been available for a long time) [\#4201](https://github.com/kokkos/kokkos/pull/4201) - Deprecate Experimental::MasterLock [\#4094](https://github.com/kokkos/kokkos/pull/4094) - Deprecate Kokkos_TaskPolicy.hpp (headers got reorganized, doesn't remove functionality) [\#4011](https://github.com/kokkos/kokkos/pull/4011) - Deprecate backward compatibility features [\#3978](https://github.com/kokkos/kokkos/pull/3978) - Update and deprecate is_space::host_memory/execution/mirror_space [\#3973](https://github.com/kokkos/kokkos/pull/3973) ### Backends and Archs Enhancements: - Enabling constbitset constructors in kernels [\#4296](https://github.com/kokkos/kokkos/pull/4296) - Use ZeroMemset in View constructor to improve performance [\#4226](https://github.com/kokkos/kokkos/pull/4226) - Use memset in deep_copy [\#3944](https://github.com/kokkos/kokkos/pull/3944) - Add missing fence() calls in resize(View) that effectively do deep_copy(resized, orig) [\#4212](https://github.com/kokkos/kokkos/pull/4212) - Avoid allocations in resize and realloc [\#4207](https://github.com/kokkos/kokkos/pull/4207) - StaticCsrGraph: use device type instead of execution space to construct views [\#3991](https://github.com/kokkos/kokkos/pull/3991) - Consider std::sort when view is accessible from host [\#3929](https://github.com/kokkos/kokkos/pull/3929) - Fix CPP20 warnings except for volatile [\#4312](https://github.com/kokkos/kokkos/pull/4312) #### SYCL: - Introduce SYCLHostUSMSpace [\#4268](https://github.com/kokkos/kokkos/pull/4268) - Implement SYCL TeamPolicy for vector_size > 1 [\#4183](https://github.com/kokkos/kokkos/pull/4183) - Enable 64bit ranges for SYCL [\#4211](https://github.com/kokkos/kokkos/pull/4211) - Don't print SYCL device info in execution space intialization [\#4168](https://github.com/kokkos/kokkos/pull/4168) - Improve SYCL MDRangePolicy performance [\#4161](https://github.com/kokkos/kokkos/pull/4161) - Use sub_groups in SYCL parallel_scan [\#4147](https://github.com/kokkos/kokkos/pull/4147) - Implement subgroup reduction for SYCL RangePolicy parallel_reduce [\#3940](https://github.com/kokkos/kokkos/pull/3940) - Use DPC++ broadcast extension in SYCL team_broadcast [\#4103](https://github.com/kokkos/kokkos/pull/4103) - Only fence in SYCL parallel_reduce for non-device-accessible result_ptr [\#4089](https://github.com/kokkos/kokkos/pull/4089) - Improve fencing behavior in SYCL backend [\#4088](https://github.com/kokkos/kokkos/pull/4088) - Fence all registered SYCL queues before deallocating memory [\#4086](https://github.com/kokkos/kokkos/pull/4086) - Implement SYCL::print_configuration [\#3992](https://github.com/kokkos/kokkos/pull/3992) - Reuse scratch memory in parallel_scan and TeamPolicy (decreases memory footprint) [\#3899](https://github.com/kokkos/kokkos/pull/3899) [\#3889](https://github.com/kokkos/kokkos/pull/3889) #### CUDA: - Cuda improve heuristic for blocksize [\#4271](https://github.com/kokkos/kokkos/pull/4271) - Don't use [[deprecated]] for nvcc [\#4229](https://github.com/kokkos/kokkos/pull/4229) - Improve error message for NVHPC as host compiler [\#4227](https://github.com/kokkos/kokkos/pull/4227) - Update support for cuda reductions to work with types < 4bytes [\#4156](https://github.com/kokkos/kokkos/pull/4156) - Fix incompatible team size deduction in rare cases parallel_reduce [\#4142](https://github.com/kokkos/kokkos/pull/4142) - Remove UVM usage in DynamicView [\#4129](https://github.com/kokkos/kokkos/pull/4129) - Remove dependency between core and containers [\#4114](https://github.com/kokkos/kokkos/pull/4114) - Adding opt-in CudaMallocSync support when using CUDA version >= 11.2 [\#4026](https://github.com/kokkos/kokkos/pull/4026) [\#4233](https://github.com/kokkos/kokkos/pull/4233) - Fix a potential race condition in the CUDA backend [\#3999](https://github.com/kokkos/kokkos/pull/3999) #### HIP: - Implement new blocksize deduction method for HIP Backend [\#3953](https://github.com/kokkos/kokkos/pull/3953) - Add multiple LaunchMechanism [\#3820](https://github.com/kokkos/kokkos/pull/3820) - Make HIP backend thread-safe [\#4170](https://github.com/kokkos/kokkos/pull/4170) #### Serial: - Refactor Serial backend and fix thread-safety issue [\#4053](https://github.com/kokkos/kokkos/pull/4053) #### OpenMPTarget: - OpenMPTarget: support array reductions in RangePolicy [\#4040](https://github.com/kokkos/kokkos/pull/4040) - OpenMPTarget: add MDRange parallel_reduce [\#4032](https://github.com/kokkos/kokkos/pull/4032) - OpenMPTarget: Fix bug in for the case of a reducer. [\#4044](https://github.com/kokkos/kokkos/pull/4044) - OpenMPTarget: verify process fix [\#4041](https://github.com/kokkos/kokkos/pull/4041) ### Implemented enhancements BuildSystem #### Important BuildSystem Updates: - Use hipcc architecture autodetection when Kokkos_ARCH is not set [\#3941](https://github.com/kokkos/kokkos/pull/3941) - Introduce Kokkos_ENABLE_DEPRECATION_WARNINGS and remove deprecated code with Kokkos_ENABLE_DEPRECATED_CODE_3 [\#4106](https://github.com/kokkos/kokkos/pull/4106) [\#3855](https://github.com/kokkos/kokkos/pull/3855) #### Other Improvements: - Add allow-unsupported-compiler flag to nvcc-wrapper [\#4298](https://github.com/kokkos/kokkos/pull/4298) - nvcc_wrapper: fix errors in argument handling [\#3993](https://github.com/kokkos/kokkos/pull/3993) - Adds support for -time= and -time in nvcc_wrapper [\#4015](https://github.com/kokkos/kokkos/pull/4015) - nvcc_wrapper: suppress duplicates of GPU architecture and RDC flags [\#3968](https://github.com/kokkos/kokkos/pull/3968) - Fix TMPDIR support in nvcc_wrapper [\#3792](https://github.com/kokkos/kokkos/pull/3792) - NVHPC: update PGI compiler arch flags [\#4133](https://github.com/kokkos/kokkos/pull/4133) - Replace PGI with NVHPC (works for both) [\#4196](https://github.com/kokkos/kokkos/pull/4196) - Make sure that KOKKOS_CXX_HOST_COMPILER_ID is defined [\#4235](https://github.com/kokkos/kokkos/pull/4235) - Add options to Makefile builds for deprecated code and warnings [\#4215](https://github.com/kokkos/kokkos/pull/4215) - Use KOKKOS_CXX_HOST_COMPILER_ID for identifying CPU arch flags [\#4199](https://github.com/kokkos/kokkos/pull/4199) - Added support for Cray Clang to Makefile.kokkos [\#4176](https://github.com/kokkos/kokkos/pull/4176) - Add XLClang as compiler [\#4120](https://github.com/kokkos/kokkos/pull/4120) - Keep quoted compiler flags when passing to Trilinos [\#3987](https://github.com/kokkos/kokkos/pull/3987) - Add support for AMD Zen3 CPU architecture [\#3972](https://github.com/kokkos/kokkos/pull/3972) - Rename IntelClang to IntelLLVM [\#3945](https://github.com/kokkos/kokkos/pull/3945) - Add cppcoreguidelines-pro-type-cstyle-cast to clang-tidy [\#3522](https://github.com/kokkos/kokkos/pull/3522) - Add sve bit size definition for A64FX [\#3947](https://github.com/kokkos/kokkos/pull/3947) [\#3946](https://github.com/kokkos/kokkos/pull/3946) - Remove KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES [\#4150](https://github.com/kokkos/kokkos/pull/4150) ### Other Changes: #### Tool Enhancements: - Retrieve original value from a point in a MultidimensionalSparseTuningProblem [\#3977](https://github.com/kokkos/kokkos/pull/3977) - Allow extension of built-in tuners with additional tuning axes [\#3961](https://github.com/kokkos/kokkos/pull/3961) - Added a categorical tuner [\#3955](https://github.com/kokkos/kokkos/pull/3955) #### Miscellaneous: - hpcbind: Use double quotes around $@ when invoking user command [\#4284](https://github.com/kokkos/kokkos/pull/4284) - Add file and line to error message [\#3985](https://github.com/kokkos/kokkos/pull/3985) - Fix compiler warnings when compiling with nvc++ [\#4198](https://github.com/kokkos/kokkos/pull/4198) - Add OpenMPTarget CI build on AMD GPUs [\#4055](https://github.com/kokkos/kokkos/pull/4055) - CI: icpx is now part of intel container [\#4002](https://github.com/kokkos/kokkos/pull/4002) ### Incompatibilities: - Remove pre CUDA 9 KOKKOS_IMPL_CUDA_* macros [\#4138](https://github.com/kokkos/kokkos/pull/4138) ### Bug Fixes: - UnorderedMap::clear() should zero the size() [\#4130](https://github.com/kokkos/kokkos/pull/4130) - Add memory fence for HostSharedPtr::cleanup() [\#4144](https://github.com/kokkos/kokkos/pull/4144) - SYCL: Fix race conditions in TeamPolicy::parallel_reduce [\#4418](https://github.com/kokkos/kokkos/pull/4418) - Adding missing memory fence to serial exec space fence. [\#4292](https://github.com/kokkos/kokkos/pull/4292) - Fix using external SYCL queues in tests [\#4291](https://github.com/kokkos/kokkos/pull/4291) - Fix digits10 bug [\#4281](https://github.com/kokkos/kokkos/pull/4281) - Fixes constexpr errors with frounding-math on gcc < 10. [\#4278](https://github.com/kokkos/kokkos/pull/4278) - Fix compiler flags for PGI/NVHPC [\#4264](https://github.com/kokkos/kokkos/pull/4264) - Fix Zen2/3 also implying Zen Arch with Makefiles [\#4260](https://github.com/kokkos/kokkos/pull/4260) - Kokkos_Cuda.hpp: Fix shadow warning with cuda/11.0 [\#4252](https://github.com/kokkos/kokkos/pull/4252) - Fix issue w/ static initialization of function attributes [\#4242](https://github.com/kokkos/kokkos/pull/4242) - Disable long double hypot test on Power systems [\#4221](https://github.com/kokkos/kokkos/pull/4221) - Fix false sharing in random pool [\#4218](https://github.com/kokkos/kokkos/pull/4218) - Fix a missing memory_fence for debug shared alloc code [\#4216](https://github.com/kokkos/kokkos/pull/4216) - Fix two xl issues [\#4179](https://github.com/kokkos/kokkos/pull/4179) - Makefile.kokkos: fix (standard_in) 1: syntax error [\#4173](https://github.com/kokkos/kokkos/pull/4173) - Fixes for query_device example [\#4172](https://github.com/kokkos/kokkos/pull/4172) - Fix a bug when using HIP atomic with Kokkos::Complex [\#4159](https://github.com/kokkos/kokkos/pull/4159) - Fix mistaken logic in pthread creation [\#4157](https://github.com/kokkos/kokkos/pull/4157) - Define KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION when requesting Kokkos_ENABLE_AGGRESSIVE_VECTORIZATION=ON [\#4107](https://github.com/kokkos/kokkos/pull/4107) - Fix compilation with latest MSVC version [\#4102](https://github.com/kokkos/kokkos/pull/4102) - Fix incorrect macro definitions when compiling with Intel compiler on Windows [\#4087](https://github.com/kokkos/kokkos/pull/4087) - Fixup global buffer overflow in hand rolled string manipulation [\#4070](https://github.com/kokkos/kokkos/pull/4070) - Fixup heap buffer overflow in cmd line args parsing unit tests [\#4069](https://github.com/kokkos/kokkos/pull/4069) - Only add quotes in compiler flags for Trilinos if necessary [\#4067](https://github.com/kokkos/kokkos/pull/4067) - Fixed invocation of tools init callbacks [\#4061](https://github.com/kokkos/kokkos/pull/4061) - Work around SYCL JIT compiler issues with static variables [\#4013](https://github.com/kokkos/kokkos/pull/4013) - Fix TestDetectionIdiom.cpp test inclusion for Trilinos/TriBITS [\#4010](https://github.com/kokkos/kokkos/pull/4010) - Fixup allocation headers with OpenMPTarget backend [\#4003](https://github.com/kokkos/kokkos/pull/4003) - Add missing specialization for OMPT to Kokkos Random [\#3967](https://github.com/kokkos/kokkos/pull/3967) - Disable hypot long double test on power arches [\#3962](https://github.com/kokkos/kokkos/pull/3962) - Use different EBO workaround for MSVC (rebased) [\#3924](https://github.com/kokkos/kokkos/pull/3924) - Fix SYCL Kokkos::Profiling::(de)allocateData calls [\#3928](https://github.com/kokkos/kokkos/pull/3928) ## [3.4.01](https://github.com/kokkos/kokkos/tree/3.4.01) (2021-05-19) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.4.00...3.4.01) **Bug Fixes:** - Windows: Remove atomic_compare_exchange_strong overload conflicts with Windows [\#4024](https://github.com/kokkos/kokkos/pull/4024) - OpenMPTarget: Fixup allocation headers with OpenMPTarget backend [\#4020](https://github.com/kokkos/kokkos/pull/4020) - OpenMPTarget: Add missing specailization for OMPT to Kokkos Random [\#4022](https://github.com/kokkos/kokkos/pull/4022) - AMD: Add support for AMD Zen3 CPU architecture [\#4021](https://github.com/kokkos/kokkos/pull/4021) - SYCL: Implement SYCL::print_configuration [\#4012](https://github.com/kokkos/kokkos/pull/4012) - Containers: staticcsrgraph: use device type instead of execution space to construct views [\#3998](https://github.com/kokkos/kokkos/pull/3998) - nvcc_wrapper: fix errors in argument handling, suppress duplicates of GPU architecture and RDC flags [\#4006](https://github.com/kokkos/kokkos/pull/4006) - CI: Add icpx testing to intel container [\#4004](https://github.com/kokkos/kokkos/pull/4004) - CMake/TRIBITS: Keep quoted compiler flags when passing to Trilinos [\#4007](https://github.com/kokkos/kokkos/pull/4007) - CMake: Rename IntelClang to IntelLLVM [\#3945](https://github.com/kokkos/kokkos/pull/3945) ## [3.4.00](https://github.com/kokkos/kokkos/tree/3.4.00) (2021-04-25) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.3.01...3.4.00) **Highlights:** - SYCL Backend Almost Feature Complete - OpenMPTarget Backend Almost Feature Complete - Performance Improvements for HIP backend - Require CMake 3.16 or newer - Tool Callback Interface Enhancements - cmath wrapper functions available now in Kokkos::Experimental **Features:** - Implement parallel_scan with ThreadVectorRange and Reducer [\#3861](https://github.com/kokkos/kokkos/pull/3861) - Implement SYCL Random [\#3849](https://github.com/kokkos/kokkos/pull/3849) - OpenMPTarget: Adding Implementation for nested reducers [\#3845](https://github.com/kokkos/kokkos/pull/3845) - Implement UniqueToken for SYCL [\#3833](https://github.com/kokkos/kokkos/pull/3833) - OpenMPTarget: UniqueToken::Global implementation [\#3823](https://github.com/kokkos/kokkos/pull/3823) - DualView sync's on ExecutionSpaces [\#3822](https://github.com/kokkos/kokkos/pull/3822) - SYCL outer TeamPolicy parallel_reduce [\#3818](https://github.com/kokkos/kokkos/pull/3818) - SYCL TeamPolicy::team_scan [\#3815](https://github.com/kokkos/kokkos/pull/3815) - SYCL MDRangePolicy parallel_reduce [\#3801](https://github.com/kokkos/kokkos/pull/3801) - Enable use of execution space instances in ScatterView [\#3786](https://github.com/kokkos/kokkos/pull/3786) - SYCL TeamPolicy nested parallel_reduce [\#3783](https://github.com/kokkos/kokkos/pull/3783) - OpenMPTarget: MDRange with TagType for parallel_for [\#3781](https://github.com/kokkos/kokkos/pull/3781) - Adding OpenMPTarget parallel_scan [\#3655](https://github.com/kokkos/kokkos/pull/3655) - SYCL basic TeamPolicy [\#3654](https://github.com/kokkos/kokkos/pull/3654) - OpenMPTarget: scratch memory implementation [\#3611](https://github.com/kokkos/kokkos/pull/3611) **Implemented enhancements Backends and Archs:** - SYCL choose a specific GPU [\#3918](https://github.com/kokkos/kokkos/pull/3918) - [HIP] Lock access to scratch memory when using Teams [\#3916](https://github.com/kokkos/kokkos/pull/3916) - [HIP] fix multithreaded access to get_next_driver [\#3908](https://github.com/kokkos/kokkos/pull/3908) - Forward declare HIPHostPinnedSpace and SYCLSharedUSMSpace [\#3902](https://github.com/kokkos/kokkos/pull/3902) - Let SYCL USMObjectMem use SharedAllocationRecord [\#3898](https://github.com/kokkos/kokkos/pull/3898) - Implement clock_tic for SYCL [\#3893](https://github.com/kokkos/kokkos/pull/3893) - Don't use a static variable in HIPInternal::scratch_space [\#3866](https://github.com/kokkos/kokkos/pull/3866)(https://github.com/kokkos/kokkos/pull/3866) - Reuse memory for SYCL parallel_reduce [\#3873](https://github.com/kokkos/kokkos/pull/3873) - Update SYCL compiler in CI [\#3826](https://github.com/kokkos/kokkos/pull/3826) - Introduce HostSharedPtr to manage m_space_instance for Cuda/HIP/SYCL [\#3824](https://github.com/kokkos/kokkos/pull/3824) - [HIP] Use shuffle for range reduction [\#3811](https://github.com/kokkos/kokkos/pull/3811) - OpenMPTarget: Changes to the hierarchical parallelism [\#3808](https://github.com/kokkos/kokkos/pull/3808) - Remove ExtendedReferenceWrapper for SYCL parallel_reduce [\#3802](https://github.com/kokkos/kokkos/pull/3802) - Eliminate sycl_indirect_launch [\#3777](https://github.com/kokkos/kokkos/pull/3777) - OpenMPTarget: scratch implementation for parallel_reduce [\#3776](https://github.com/kokkos/kokkos/pull/3776) - Allow initializing SYCL execution space from sycl::queue and SYCL::impl_static_fence [\#3767](https://github.com/kokkos/kokkos/pull/3767) - SYCL TeamPolicy scratch memory alternative [\#3763](https://github.com/kokkos/kokkos/pull/3763) - Alternative implementation for SYCL TeamPolicy [\#3759](https://github.com/kokkos/kokkos/pull/3759) - Unify handling of synchronous errors in SYCL [\#3754](https://github.com/kokkos/kokkos/pull/3754) - core/Cuda: Half_t updates for cgsolve [\#3746](https://github.com/kokkos/kokkos/pull/3746) - Unify HIPParallelLaunch structures [\#3733](https://github.com/kokkos/kokkos/pull/3733) - Improve performance for SYCL parallel_reduce [\#3732](https://github.com/kokkos/kokkos/pull/3732) - Use consistent types in Kokkos_OpenMPTarget_Parallel.hpp [\#3703](https://github.com/kokkos/kokkos/pull/3703) - Implement non-blocking kernel launches for HIP backend [\#3697](https://github.com/kokkos/kokkos/pull/3697) - Change SYCLInternal::m_queue std::unique_ptr -> std::optional [\#3677](https://github.com/kokkos/kokkos/pull/3677) - Use alternative SYCL parallel_reduce implementation [\#3671](https://github.com/kokkos/kokkos/pull/3671) - Use runtime values in KokkosExp_MDRangePolicy.hpp [\#3626](https://github.com/kokkos/kokkos/pull/3626) - Clean up AnalyzePolicy [\#3564](https://github.com/kokkos/kokkos/pull/3564) - Changes for indirect launch of SYCL parallel reduce [\#3511](https://github.com/kokkos/kokkos/pull/3511) **Implemented enhancements BuildSystem:** - Also require C++14 when building gtest [\#3912](https://github.com/kokkos/kokkos/pull/3912) - Fix compiling SYCL with OpenMP [\#3874](https://github.com/kokkos/kokkos/pull/3874) - Require C++17 for SYCL (at configuration time) [\#3869](https://github.com/kokkos/kokkos/pull/3869) - Add COMPILE_DEFINITIONS argument to kokkos_create_imported_tpl [\#3862](https://github.com/kokkos/kokkos/pull/3862) - Do not pass arch flags to the linker with no rdc [\#3846](https://github.com/kokkos/kokkos/pull/3846) - Try compiling C++14 check with C++14 support and print error message [\#3843](https://github.com/kokkos/kokkos/pull/3843) - Enable HIP with Cray Clang [\#3842](https://github.com/kokkos/kokkos/pull/3842) - Add an option to disable header self containment tests [\#3834](https://github.com/kokkos/kokkos/pull/3834) - CMake check for C++14 [\#3809](https://github.com/kokkos/kokkos/pull/3809) - Prefer -std=* over --std=* [\#3779](https://github.com/kokkos/kokkos/pull/3779) - Kokkos launch compiler updates [\#3778](https://github.com/kokkos/kokkos/pull/3778) - Updated comments and enabled no-op for kokkos_launch_compiler [\#3774](https://github.com/kokkos/kokkos/pull/3774) - Apple's Clang not correctly recognised [\#3772](https://github.com/kokkos/kokkos/pull/3772) - kokkos_launch_compiler + CUDA auto-detect arch [\#3770](https://github.com/kokkos/kokkos/pull/3770) - Add Spack test support for Kokkos [\#3753](https://github.com/kokkos/kokkos/pull/3753) - Split SYCL tests for aot compilation [\#3741](https://github.com/kokkos/kokkos/pull/3741) - Use consistent OpenMP flag for IntelClang [\#3735](https://github.com/kokkos/kokkos/pull/3735) - Add support for -Wno-deprecated-gpu-targets [\#3722](https://github.com/kokkos/kokkos/pull/3722) - Add configuration to target CUDA compute capability 8.6 [\#3713](https://github.com/kokkos/kokkos/pull/3713) - Added VERSION and SOVERSION to KOKKOS_INTERNAL_ADD_LIBRARY [\#3706](https://github.com/kokkos/kokkos/pull/3706) - Add fast-math to known NVCC flags [\#3699](https://github.com/kokkos/kokkos/pull/3699) - Add MI-100 arch string [\#3698](https://github.com/kokkos/kokkos/pull/3698) - Require CMake >=3.16 [\#3679](https://github.com/kokkos/kokkos/pull/3679) - KokkosCI.cmake, KokkosCTest.cmake.in, CTestConfig.cmake.in + CI updates [\#2844](https://github.com/kokkos/kokkos/pull/2844) **Implemented enhancements Tools:** - Improve readability of the callback invocation in profiling [\#3860](https://github.com/kokkos/kokkos/pull/3860) - V1.1 Tools Interface: incremental, action-based [\#3812](https://github.com/kokkos/kokkos/pull/3812) - Enable launch latency simulations [\#3721](https://github.com/kokkos/kokkos/pull/3721) - Added metadata callback to tools interface [\#3711](https://github.com/kokkos/kokkos/pull/3711) - MDRange Tile Size Tuning [\#3688](https://github.com/kokkos/kokkos/pull/3688) - Added support for command-line args for kokkos-tools [\#3627](https://github.com/kokkos/kokkos/pull/3627) - Query max tile sizes for an MDRangePolicy, and set tile sizes on an existing policy [\#3481](https://github.com/kokkos/kokkos/pull/3481) **Implemented enhancements Other:** - Try detecting ndevices in get_gpu [\#3921](https://github.com/kokkos/kokkos/pull/3921) - Use strcmp to compare names() [\#3909](https://github.com/kokkos/kokkos/pull/3909) - Add execution space arguments for constructor overloads that might allocate a new underlying View [\#3904](https://github.com/kokkos/kokkos/pull/3904) - Prefix labels in internal use of kokkos_malloc [\#3891](https://github.com/kokkos/kokkos/pull/3891) - Prefix labels for internal uses of SharedAllocationRecord [\#3890](https://github.com/kokkos/kokkos/pull/3890) - Add missing hypot math function [\#3880](https://github.com/kokkos/kokkos/pull/3880) - Unify algorithm unit tests to avoid code duplication [\#3851](https://github.com/kokkos/kokkos/pull/3851) - DualView.template view() better matches for Devices in UVMSpace cases [\#3857](https://github.com/kokkos/kokkos/pull/3857) - More extensive disentangling of Policy Traits [\#3829](https://github.com/kokkos/kokkos/pull/3829) - Replaced nanosleep and sched_yield with STL routines [\#3825](https://github.com/kokkos/kokkos/pull/3825) - Constructing Atomic Subviews [\#3810](https://github.com/kokkos/kokkos/pull/3810) - Metadata Declaration in Core [\#3729](https://github.com/kokkos/kokkos/pull/3729) - Allow using tagged final functor in parallel_reduce [\#3714](https://github.com/kokkos/kokkos/pull/3714) - Major duplicate code removal in SharedAllocationRecord specializations [\#3658](https://github.com/kokkos/kokkos/pull/3658) **Fixed bugs:** - Provide forward declarations in Kokkos_ViewLayoutTiled.hpp for XL [\#3911](https://github.com/kokkos/kokkos/pull/3911) - Fixup absolute value of floating points in Kokkos complex [\#3882](https://github.com/kokkos/kokkos/pull/3882) - Address intel 17 ICE [\#3881](https://github.com/kokkos/kokkos/pull/3881) - Add missing pow(Kokkos::complex) overloads [\#3868](https://github.com/kokkos/kokkos/pull/3868) - Fix bug {pow, log}(Kokkos::complex) [\#3866](https://github.com/kokkos/kokkos/pull/3866)(https://github.com/kokkos/kokkos/pull/3866) - Cleanup writing to output streams in Cuda [\#3859](https://github.com/kokkos/kokkos/pull/3859) - Fixup cache CUDA fallback execution space instance used by DualView::sync [\#3856](https://github.com/kokkos/kokkos/pull/3856) - Fix cmake warning with pthread [\#3854](https://github.com/kokkos/kokkos/pull/3854) - Fix typo FOUND_CUDA_{DRIVVER -> DRIVER} [\#3852](https://github.com/kokkos/kokkos/pull/3852) - Fix bug in SYCL team_reduce [\#3848](https://github.com/kokkos/kokkos/pull/3848) - Atrocious bug in MDRange tuning [\#3803](https://github.com/kokkos/kokkos/pull/3803) - Fix compiling SYCL with Kokkos_ENABLE_TUNING=ON [\#3800](https://github.com/kokkos/kokkos/pull/3800) - Fixed command line parsing bug [\#3797](https://github.com/kokkos/kokkos/pull/3797) - Workaround race condition in SYCL parallel_reduce [\#3782](https://github.com/kokkos/kokkos/pull/3782) - Fix Atomic{Min,Max} for Kepler30 [\#3780](https://github.com/kokkos/kokkos/pull/3780) - Fix SYCL typo [\#3755](https://github.com/kokkos/kokkos/pull/3755) - Fixed Kokkos_install_additional_files macro [\#3752](https://github.com/kokkos/kokkos/pull/3752) - Fix a typo for Kokkos_ARCH_A64FX [\#3751](https://github.com/kokkos/kokkos/pull/3751) - OpenMPTarget: fixes and workarounds to work with "Release" build type [\#3748](https://github.com/kokkos/kokkos/pull/3748) - Fix parsing bug for number of devices command line argument [\#3724](https://github.com/kokkos/kokkos/pull/3724) - Avoid more warnings with clang and C++20 [\#3719](https://github.com/kokkos/kokkos/pull/3719) - Fix gcc-10.1 C++20 warnings [\#3718](https://github.com/kokkos/kokkos/pull/3718) - Fix cuda cache config not being set correct [\#3712](https://github.com/kokkos/kokkos/pull/3712) - Fix dualview deepcopy perftools [\#3701](https://github.com/kokkos/kokkos/pull/3701) - use drand instead of frand in drand [\#3696](https://github.com/kokkos/kokkos/pull/3696) **Incompatibilities:** - Remove unimplemented member functions of SYCLDevice [\#3919](https://github.com/kokkos/kokkos/pull/3919) - Replace cl::sycl [\#3896](https://github.com/kokkos/kokkos/pull/3896) - Get rid of SYCL workaround in Kokkos_Complex.hpp [\#3884](https://github.com/kokkos/kokkos/pull/3884) - Replace most uses of if_c [\#3883](https://github.com/kokkos/kokkos/pull/3883) - Remove Impl::enable_if_type [\#3863](https://github.com/kokkos/kokkos/pull/3863) - Remove HostBarrier test [\#3847](https://github.com/kokkos/kokkos/pull/3847) - Avoid (void) interface [\#3836](https://github.com/kokkos/kokkos/pull/3836) - Remove VerifyExecutionCanAccessMemorySpace [\#3813](https://github.com/kokkos/kokkos/pull/3813) - Avoid duplicated code in ScratchMemorySpace [\#3793](https://github.com/kokkos/kokkos/pull/3793) - Remove superfluous FunctorFinal specialization [\#3788](https://github.com/kokkos/kokkos/pull/3788) - Rename cl::sycl -> sycl in Kokkos_MathematicalFunctions.hpp [\#3678](https://github.com/kokkos/kokkos/pull/3678) - Remove integer_sequence backward compatibility implementation [\#3533](https://github.com/kokkos/kokkos/pull/3533) **Enabled tests:** - Fixup re-enable core performance tests [\#3903](https://github.com/kokkos/kokkos/pull/3903) - Enable more SYCL tests [\#3900](https://github.com/kokkos/kokkos/pull/3900) - Restrict MDRange Policy tests for Intel GPUs [\#3853](https://github.com/kokkos/kokkos/pull/3853) - Disable death tests for rawhide [\#3844](https://github.com/kokkos/kokkos/pull/3844) - OpenMPTarget: Block unit tests that do not pass with the nvidia compiler [\#3839](https://github.com/kokkos/kokkos/pull/3839) - Enable Bitset container test for SYCL [\#3830](https://github.com/kokkos/kokkos/pull/3830) - Enable some more SYCL tests [\#3744](https://github.com/kokkos/kokkos/pull/3744) - Enable SYCL atomic tests [\#3742](https://github.com/kokkos/kokkos/pull/3742) - Enable more SYCL perf_tests [\#3692](https://github.com/kokkos/kokkos/pull/3692) - Enable examples for SYCL [\#3691](https://github.com/kokkos/kokkos/pull/3691) ## [3.3.01](https://github.com/kokkos/kokkos/tree/3.3.01) (2021-01-06) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.3.00...3.3.01) **Bug Fixes:** - Fix severe performance bug in DualView which added memcpys for sync and modify [\#3693](https://github.com/kokkos/kokkos/issues/#3693) - Fix performance bug in CUDA backend, where the cuda Cache config was not set correct. ## [3.3.00](https://github.com/kokkos/kokkos/tree/3.3.00) (2020-12-16) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.2.01...3.3.00) **Features:** - Require C++14 as minimum C++ standard. C++17 and C++20 are supported too. - HIP backend is nearly feature complete. Kokkos Dynamic Task Graphs are missing. - Major update for OpenMPTarget: many capabilities now work. For details contact us. - Added DPC++/SYCL backend: primary capabilites are working. - Added Kokkos Graph API analogous to CUDA Graphs. - Added parallel_scan support with TeamThreadRange [\#3536](https://github.com/kokkos/kokkos/pull/3536) - Added Logical Memory Spaces [\#3546](https://github.com/kokkos/kokkos/pull/3546) - Added initial half precision support [\#3439](https://github.com/kokkos/kokkos/pull/3439) - Experimental feature: control cuda occupancy [\#3379](https://github.com/kokkos/kokkos/pull/3379) **Implemented enhancements Backends and Archs:** - Add a64fx and fujitsu Compiler support [\#3614](https://github.com/kokkos/kokkos/pull/3614) - Adding support for AMD gfx908 archictecture [\#3375](https://github.com/kokkos/kokkos/pull/3375) - SYCL parallel\_for MDRangePolicy [\#3583](https://github.com/kokkos/kokkos/pull/3583) - SYCL add parallel\_scan [\#3577](https://github.com/kokkos/kokkos/pull/3577) - SYCL custom reductions [\#3544](https://github.com/kokkos/kokkos/pull/3544) - SYCL Enable container unit tests [\#3550](https://github.com/kokkos/kokkos/pull/3550) - SYCL feature level 5 [\#3480](https://github.com/kokkos/kokkos/pull/3480) - SYCL Feature level 4 (parallel\_for) [\#3474](https://github.com/kokkos/kokkos/pull/3474) - SYCL feature level 3 [\#3451](https://github.com/kokkos/kokkos/pull/3451) - SYCL feature level 2 [\#3447](https://github.com/kokkos/kokkos/pull/3447) - OpenMPTarget: Hierarchial reduction for + operator on scalars [\#3504](https://github.com/kokkos/kokkos/pull/3504) - OpenMPTarget hierarchical [\#3411](https://github.com/kokkos/kokkos/pull/3411) - HIP Add Impl::atomic\_[store,load] [\#3440](https://github.com/kokkos/kokkos/pull/3440) - HIP enable global lock arrays [\#3418](https://github.com/kokkos/kokkos/pull/3418) - HIP Implement multiple occupancy paths for various HIP kernel launchers [\#3366](https://github.com/kokkos/kokkos/pull/3366) **Implemented enhancements Policies:** - MDRangePolicy: Let it be semiregular [\#3494](https://github.com/kokkos/kokkos/pull/3494) - MDRangePolicy: Check narrowing conversion in construction [\#3527](https://github.com/kokkos/kokkos/pull/3527) - MDRangePolicy: CombinedReducers support [\#3395](https://github.com/kokkos/kokkos/pull/3395) - Kokkos Graph: Interface and Default Implementation [\#3362](https://github.com/kokkos/kokkos/pull/3362) - Kokkos Graph: add Cuda Graph implementation [\#3369](https://github.com/kokkos/kokkos/pull/3369) - TeamPolicy: implemented autotuning of team sizes and vector lengths [\#3206](https://github.com/kokkos/kokkos/pull/3206) - RangePolicy: Initialize all data members in default constructor [\#3509](https://github.com/kokkos/kokkos/pull/3509) **Implemented enhancements BuildSystem:** - Auto-generate core test files for all backends [\#3488](https://github.com/kokkos/kokkos/pull/3488) - Avoid rewriting test files when calling cmake [\#3548](https://github.com/kokkos/kokkos/pull/3548) - RULE\_LAUNCH\_COMPILE and RULE\_LAUNCH\_LINK system for nvcc\_wrapper [\#3136](https://github.com/kokkos/kokkos/pull/3136) - Adding -include as a known argument to nvcc\_wrapper [\#3434](https://github.com/kokkos/kokkos/pull/3434) - Install hpcbind script [\#3402](https://github.com/kokkos/kokkos/pull/3402) - cmake/kokkos\_tribits.cmake: add parsing for args [\#3457](https://github.com/kokkos/kokkos/pull/3457) **Implemented enhancements Tools:** - Changed namespacing of Kokkos::Tools::Impl::Impl::tune\_policy [\#3455](https://github.com/kokkos/kokkos/pull/3455) - Delegate to an impl allocate/deallocate method to allow specifying a SpaceHandle for MemorySpaces [\#3530](https://github.com/kokkos/kokkos/pull/3530) - Use the Kokkos Profiling interface rather than the Impl interface [\#3518](https://github.com/kokkos/kokkos/pull/3518) - Runtime option for tuning [\#3459](https://github.com/kokkos/kokkos/pull/3459) - Dual View Tool Events [\#3326](https://github.com/kokkos/kokkos/pull/3326) **Implemented enhancements Other:** - Abort on errors instead of just printing [\#3528](https://github.com/kokkos/kokkos/pull/3528) - Enable C++14 macros unconditionally [\#3449](https://github.com/kokkos/kokkos/pull/3449) - Make ViewMapping trivially copyable [\#3436](https://github.com/kokkos/kokkos/pull/3436) - Rename struct ViewMapping to class [\#3435](https://github.com/kokkos/kokkos/pull/3435) - Replace enums in Kokkos\_ViewMapping.hpp (removes -Wextra) [\#3422](https://github.com/kokkos/kokkos/pull/3422) - Use bool for enums representing bools [\#3416](https://github.com/kokkos/kokkos/pull/3416) - Fence active instead of default execution space instances [\#3388](https://github.com/kokkos/kokkos/pull/3388) - Refactor parallel\_reduce fence usage [\#3359](https://github.com/kokkos/kokkos/pull/3359) - Moved Space EBO helpers to Kokkos\_EBO [\#3357](https://github.com/kokkos/kokkos/pull/3357) - Add remove\_cvref type trait [\#3340](https://github.com/kokkos/kokkos/pull/3340) - Adding identity type traits and update definition of identity\_t alias [\#3339](https://github.com/kokkos/kokkos/pull/3339) - Add is\_specialization\_of type trait [\#3338](https://github.com/kokkos/kokkos/pull/3338) - Make ScratchMemorySpace semi-regular [\#3309](https://github.com/kokkos/kokkos/pull/3309) - Optimize min/max atomics with early exit on no-op case [\#3265](https://github.com/kokkos/kokkos/pull/3265) - Refactor Backend Development [\#2941](https://github.com/kokkos/kokkos/pull/2941) **Fixed bugs:** - Fixup MDRangePolicy construction from Kokkos arrays [\#3591](https://github.com/kokkos/kokkos/pull/3591) - Add atomic functions for unsigned long long using gcc built-in [\#3588](https://github.com/kokkos/kokkos/pull/3588) - Fixup silent pointless comparison with zero in checked\_narrow\_cast (compiler workaround) [\#3566](https://github.com/kokkos/kokkos/pull/3566) - Fixes for ROCm 3.9 [\#3565](https://github.com/kokkos/kokkos/pull/3565) - Fix windows build issues which crept in for the CUDA build [\#3532](https://github.com/kokkos/kokkos/pull/3532) - HIP Fix atomics of large data types and clean up lock arrays [\#3529](https://github.com/kokkos/kokkos/pull/3529) - Pthreads fix exception resulting from 0 grain size [\#3510](https://github.com/kokkos/kokkos/pull/3510) - Fixup do not require atomic operation to be default constructible [\#3503](https://github.com/kokkos/kokkos/pull/3503) - Fix race condition in HIP backend [\#3467](https://github.com/kokkos/kokkos/pull/3467) - Replace KOKKOS\_DEBUG with KOKKOS\_ENABLE\_DEBUG [\#3458](https://github.com/kokkos/kokkos/pull/3458) - Fix multi-stream team scratch space definition for HIP [\#3398](https://github.com/kokkos/kokkos/pull/3398) - HIP fix template deduction [\#3393](https://github.com/kokkos/kokkos/pull/3393) - Fix compiling with HIP and C++17 [\#3390](https://github.com/kokkos/kokkos/pull/3390) - Fix sigFPE in HIP blocksize deduction [\#3378](https://github.com/kokkos/kokkos/pull/3378) - Type alias change: replace CS with CTS to avoid conflicts with NVSHMEM [\#3348](https://github.com/kokkos/kokkos/pull/3348) - Clang compilation of CUDA backend on Windows [\#3345](https://github.com/kokkos/kokkos/pull/3345) - Fix HBW support [\#3343](https://github.com/kokkos/kokkos/pull/3343) - Added missing fences to unique token [\#3260](https://github.com/kokkos/kokkos/pull/3260) **Incompatibilities:** - Remove unused utilities (forward, move, and expand\_variadic) from Kokkos::Impl [\#3535](https://github.com/kokkos/kokkos/pull/3535) - Remove unused traits [\#3534](https://github.com/kokkos/kokkos/pull/3534) - HIP: Remove old HCC code [\#3301](https://github.com/kokkos/kokkos/pull/3301) - Prepare for deprecation of ViewAllocateWithoutInitializing [\#3264](https://github.com/kokkos/kokkos/pull/3264) - Remove ROCm backend [\#3148](https://github.com/kokkos/kokkos/pull/3148) ## [3.2.01](https://github.com/kokkos/kokkos/tree/3.2.01) (2020-11-17) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.2.00...3.2.01) **Fixed bugs:** - Disallow KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE in shared library builds [\#3332](https://github.com/kokkos/kokkos/pull/3332) - Do not install libprinter-tool when testing is enabled [\#3313](https://github.com/kokkos/kokkos/pull/3313) - Fix restrict/alignment following refactor [\#3373](https://github.com/kokkos/kokkos/pull/3373) - Intel fix: workaround compiler issue with using statement [\#3383](https://github.com/kokkos/kokkos/pull/3383) - Fix zero-length reductions [#\3364](https://github.com/kokkos/kokkos/pull/3364) - Pthread zero-length reduction fix [\#3452](https://github.com/kokkos/kokkos/pull/3452) - HPX zero-length reduction fix [\#3470](https://github.com/kokkos/kokkos/pull/3470) - cuda/9.2 zero-length reduction fix [\#3580](https://github.com/kokkos/kokkos/pull/3580) - Fix multi-stream scratch [#\3269](https://github.com/kokkos/kokkos/pull/3269) - Guard KOKKOS_ALL_COMPILE_OPTIONS if Cuda is not enabled [\#3387](https://github.com/kokkos/kokkos/pull/3387) - Do not include link flags for Fortran linkage [\#3384](https://github.com/kokkos/kokkos/pull/3384) - Fix NVIDIA GPU arch macro with autodetection [\#3473](https://github.com/kokkos/kokkos/pull/3473) - Fix libdl/test issues with Trilinos [\#3543](https://github.com/kokkos/kokkos/pull/3543) - Register Pthread as Tribits option to be enabled with Trilinos [\#3558](https://github.com/kokkos/kokkos/pull/3558) **Implemented enhancements:** - Separate Cuda timing-based tests into their own executable [\#3407](https://github.com/kokkos/kokkos/pull/3407) ## [3.2.00](https://github.com/kokkos/kokkos/tree/3.2.00) (2020-08-19) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.1.01...3.2.00) **Implemented enhancements:** - HIP:Enable stream in HIP [\#3163](https://github.com/kokkos/kokkos/issues/3163) - HIP:Add support for shuffle reduction for the HIP backend [\#3154](https://github.com/kokkos/kokkos/issues/3154) - HIP:Add implementations of missing HIPHostPinnedSpace methods for LAMMPS [\#3137](https://github.com/kokkos/kokkos/issues/3137) - HIP:Require HIP 3.5.0 or higher [\#3099](https://github.com/kokkos/kokkos/issues/3099) - HIP:WorkGraphPolicy for HIP [\#3096](https://github.com/kokkos/kokkos/issues/3096) - OpenMPTarget: Significant update to the new experimental backend. Requires C++17, works on Intel GPUs, reference counting fixes. [\#3169](https://github.com/kokkos/kokkos/issues/3169) - Windows Cuda support [\#3018](https://github.com/kokkos/kokkos/issues/3018) - Pass `-Wext-lambda-captures-this` to NVCC when support for `__host__ __device__` lambda is enabled from CUDA 11 [\#3241](https://github.com/kokkos/kokkos/issues/3241) - Use explicit staging buffer for constant memory kernel launches and cleanup host/device synchronization [\#3234](https://github.com/kokkos/kokkos/issues/3234) - Various fixup to policies including making TeamPolicy default constructible and making RangePolicy and TeamPolicy assignable: [\#3202](https://github.com/kokkos/kokkos/issues/3202) , [\#3203](https://github.com/kokkos/kokkos/issues/3203) , [\#3196](https://github.com/kokkos/kokkos/issues/3196) - Annotations for `DefaultExectutionSpace` and `DefaultHostExectutionSpace` to use in static analysis [\#3189](https://github.com/kokkos/kokkos/issues/3189) - Add documentation on using Spack to install Kokkos and developing packages that depend on Kokkos [\#3187](https://github.com/kokkos/kokkos/issues/3187) - Add OpenMPTarget backend flags for NVC++ compiler [\#3185](https://github.com/kokkos/kokkos/issues/3185) - Move deep\_copy/create\_mirror\_view on Experimental::OffsetView into Kokkos:: namespace [\#3166](https://github.com/kokkos/kokkos/issues/3166) - Allow for larger block size in HIP [\#3165](https://github.com/kokkos/kokkos/issues/3165) - View: Added names of Views to the different View initialize/free kernels [\#3159](https://github.com/kokkos/kokkos/issues/3159) - Cuda: Caching cudaFunctorAttributes and whether L1/Shmem prefer was set [\#3151](https://github.com/kokkos/kokkos/issues/3151) - BuildSystem: Improved performance in default configuration by defaulting to Release build [\#3131](https://github.com/kokkos/kokkos/issues/3131) - Cuda: Update CUDA occupancy calculation [\#3124](https://github.com/kokkos/kokkos/issues/3124) - Vector: Adding data() to Vector [\#3123](https://github.com/kokkos/kokkos/issues/3123) - BuildSystem: Add CUDA Ampere configuration support [\#3122](https://github.com/kokkos/kokkos/issues/3122) - General: Apply [[noreturn]] to Kokkos::abort when applicable [\#3106](https://github.com/kokkos/kokkos/issues/3106) - TeamPolicy: Validate storage level argument passed to TeamPolicy::set\_scratch\_size() [\#3098](https://github.com/kokkos/kokkos/issues/3098) - BuildSystem: Make kokkos\_has\_string() function in Makefile.kokkos case insensitive [\#3091](https://github.com/kokkos/kokkos/issues/3091) - Modify KOKKOS\_FUNCTION macro for clang-tidy analysis [\#3087](https://github.com/kokkos/kokkos/issues/3087) - Move allocation profiling to allocate/deallocate calls [\#3084](https://github.com/kokkos/kokkos/issues/3084) - BuildSystem: FATAL\_ERROR when attempting in-source build [\#3082](https://github.com/kokkos/kokkos/issues/3082) - Change enums in ScatterView to types [\#3076](https://github.com/kokkos/kokkos/issues/3076) - HIP: Changes for new compiler/runtime [\#3067](https://github.com/kokkos/kokkos/issues/3067) - Extract and use get\_gpu [\#3061](https://github.com/kokkos/kokkos/issues/3061) , [\#3048](https://github.com/kokkos/kokkos/issues/3048) - Add is\_allocated to View-like containers [\#3059](https://github.com/kokkos/kokkos/issues/3059) - Combined reducers for scalar references [\#3052](https://github.com/kokkos/kokkos/issues/3052) - Add configurable capacity for UniqueToken [\#3051](https://github.com/kokkos/kokkos/issues/3051) - Add installation testing [\#3034](https://github.com/kokkos/kokkos/issues/3034) - HIP: Add UniqueToken [\#3020](https://github.com/kokkos/kokkos/issues/3020) - Autodetect number of devices [\#3013](https://github.com/kokkos/kokkos/issues/3013) **Fixed bugs:** - Check error code from `cudaStreamSynchronize` in CUDA fences [\#3255](https://github.com/kokkos/kokkos/issues/3255) - Fix issue with C++ standard flags when using `nvcc\_wrapper` with PGI [\#3254](https://github.com/kokkos/kokkos/issues/3254) - Add missing threadfence in lock-based atomics [\#3208](https://github.com/kokkos/kokkos/issues/3208) - Fix dedup of linker flags for shared lib on CMake <=3.12 [\#3176](https://github.com/kokkos/kokkos/issues/3176) - Fix memory leak with CUDA streams [\#3170](https://github.com/kokkos/kokkos/issues/3170) - BuildSystem: Fix OpenMP Target flags for Cray [\#3161](https://github.com/kokkos/kokkos/issues/3161) - ScatterView: fix for OpenmpTarget remove inheritance from reducers [\#3162](https://github.com/kokkos/kokkos/issues/3162) - BuildSystem: Set OpenMP flags according to host compiler [\#3127](https://github.com/kokkos/kokkos/issues/3127) - OpenMP: Fix logic for nested omp in partition\_master bug [\#3101](https://github.com/kokkos/kokkos/issues/3101) - nvcc\_wrapper: send --cudart to nvcc instead of host compiler [\#3092](https://github.com/kokkos/kokkos/issues/3092) - BuildSystem: Fixes for Cuda/11 and c++17 [\#3085](https://github.com/kokkos/kokkos/issues/3085) - HIP: Fix print\_configuration [\#3080](https://github.com/kokkos/kokkos/issues/3080) - Conditionally define get\_gpu [\#3072](https://github.com/kokkos/kokkos/issues/3072) - Fix bounds for ranges in random number generator [\#3069](https://github.com/kokkos/kokkos/issues/3069) - Fix Cuda minor arch check [\#3035](https://github.com/kokkos/kokkos/issues/3035) - BuildSystem: Add -expt-relaxed-constexpr flag to nvcc\_wrapper [\#3021](https://github.com/kokkos/kokkos/issues/3021) **Incompatibilities:** - Remove ETI support [\#3157](https://github.com/kokkos/kokkos/issues/3157) - Remove KOKKOS\_INTERNAL\_ENABLE\_NON\_CUDA\_BACKEND [\#3147](https://github.com/kokkos/kokkos/issues/3147) - Remove core/unit\_test/config [\#3146](https://github.com/kokkos/kokkos/issues/3146) - Removed the preprocessor branch for KOKKOS\_ENABLE\_PROFILING [\#3115](https://github.com/kokkos/kokkos/issues/3115) - Disable profiling with MSVC [\#3066](https://github.com/kokkos/kokkos/issues/3066) **Closed issues:** - Silent error (Validate storage level arg to set_scratch_size) [\#3097](https://github.com/kokkos/kokkos/issues/3097) - Remove KOKKKOS\_ENABLE\_PROFILING Option [\#3095](https://github.com/kokkos/kokkos/issues/3095) - Cuda 11 -\> allow C++17 [\#3083](https://github.com/kokkos/kokkos/issues/3083) - In source build failure not explained [\#3081](https://github.com/kokkos/kokkos/issues/3081) - Allow naming of Views for initialization kernel [\#3070](https://github.com/kokkos/kokkos/issues/3070) - DefaultInit tests failing when using CTest resource allocation feature [\#3040](https://github.com/kokkos/kokkos/issues/3040) - Add installation testing. [\#3037](https://github.com/kokkos/kokkos/issues/3037) - nvcc\_wrapper needs to handle `-expt-relaxed-constexpr` flag [\#3017](https://github.com/kokkos/kokkos/issues/3017) - CPU core oversubscription warning on macOS with OpenMP backend [\#2996](https://github.com/kokkos/kokkos/issues/2996) - Default behavior of KOKKOS\_NUM\_DEVICES to use all devices available [\#2975](https://github.com/kokkos/kokkos/issues/2975) - Assert blocksize \> 0 [\#2974](https://github.com/kokkos/kokkos/issues/2974) - Add ability to assign kokkos profile function from executable [\#2973](https://github.com/kokkos/kokkos/issues/2973) - ScatterView Support for the pre/post increment operator [\#2967](https://github.com/kokkos/kokkos/issues/2967) - Compiler issue: Cuda build with clang 10 has errors with the atomic unit tests [\#3237](https://github.com/kokkos/kokkos/issues/3237) - Incompatibility of flags for C++ standard with PGI v20.4 on Power9/NVIDIA V100 system [\#3252](https://github.com/kokkos/kokkos/issues/3252) - Error configuring as subproject [\#3140](https://github.com/kokkos/kokkos/issues/3140) - CMake fails with Nvidia compilers when the GPU architecture option is not supplied (Fix configure with OMPT and Cuda) [\#3207](https://github.com/kokkos/kokkos/issues/3207) - PGI compiler being passed the gcc -fopenmp flag [\#3125](https://github.com/kokkos/kokkos/issues/3125) - Cuda: Memory leak when using CUDA stream [\#3167](https://github.com/kokkos/kokkos/issues/3167) - RangePolicy has an implicitly deleted assignment operator [\#3192](https://github.com/kokkos/kokkos/issues/3192) - MemorySpace::allocate needs to have memory pool counting. [\#3064](https://github.com/kokkos/kokkos/issues/3064) - Missing write fence for lock based atomics on CUDA [\#3038](https://github.com/kokkos/kokkos/issues/3038) - CUDA compute capability version check problem [\#3026](https://github.com/kokkos/kokkos/issues/3026) - Make DynRankView fencing consistent [\#3014](https://github.com/kokkos/kokkos/issues/3014) - nvcc\_wrapper cant handle -Xcompiler -o out.o [\#2993](https://github.com/kokkos/kokkos/issues/2993) - Reductions of non-trivial types of size 4 fail in CUDA shfl operations [\#2990](https://github.com/kokkos/kokkos/issues/2990) - complex\_double misalignment in reduce, clang+CUDA [\#2989](https://github.com/kokkos/kokkos/issues/2989) - Span of degenerated \(zero-length\) subviews is not zero in some special cases [\#2979](https://github.com/kokkos/kokkos/issues/2979) - Rank 1 custom layouts dont work as expected. [\#2840](https://github.com/kokkos/kokkos/issues/2840) ## [3.1.01](https://github.com/kokkos/kokkos/tree/3.1.1) (2020-04-14) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.1.00...3.1.1) **Fixed bugs:** - Fix complex_double misalignment in reduce, clang+CUDA [\#2989](https://github.com/kokkos/kokkos/issues/2989) - Fix compilation fails when profiling disabled and CUDA enabled [\#3001](https://github.com/kokkos/kokkos/issues/3001) - Fix cuda reduction of non-trivial scalars of size 4 [\#2990](https://github.com/kokkos/kokkos/issues/2990) - Configure and install version file when building in Trilinos [\#2957](https://github.com/kokkos/kokkos/pull/2957) - Fix OpenMPTarget build missing include and namespace [\#3000](https://github.com/kokkos/kokkos/issues/3000) - fix typo in KOKKOS_SET_EXE_PROPERTY() [\#2959](https://github.com/kokkos/kokkos/issues/2959) - Fix non-zero span subviews of zero sized subviews [\#2979](https://github.com/kokkos/kokkos/issues/2979) ## [3.1.00](https://github.com/kokkos/kokkos/tree/3.1.00) (2020-04-14) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.0.00...3.1.00) **Features:** - HIP Support for AMD - OpenMPTarget Support with clang - Windows VS19 (Serial) Support [\#1533](https://github.com/kokkos/kokkos/issues/1533) **Implemented enhancements:** - generate\_makefile.bash should allow tests to be disabled [\#2886](https://github.com/kokkos/kokkos/issues/2886) - clang/7+cuda/9 build -Werror-unused parameter error in nightly test [\#2884](https://github.com/kokkos/kokkos/issues/2884) - ScatterView memory space is not user settable [\#2826](https://github.com/kokkos/kokkos/issues/2826) - clang/8+cuda/10.0 build error with c++17 [\#2809](https://github.com/kokkos/kokkos/issues/2809) - warnings.... [\#2805](https://github.com/kokkos/kokkos/issues/2805) - Kokkos version in cpp define [\#2787](https://github.com/kokkos/kokkos/issues/2787) - Remove Defunct QThreads Backend [\#2751](https://github.com/kokkos/kokkos/issues/2751) - Improve Kokkos::fence behavior with multiple execution spaces [\#2659](https://github.com/kokkos/kokkos/issues/2659) - polylithic\(?\) initialization of Kokkos [\#2658](https://github.com/kokkos/kokkos/issues/2658) - Unnecessary\(?\) check for host execution space initialization from Cuda initialization [\#2652](https://github.com/kokkos/kokkos/issues/2652) - Kokkos error reporting failures with CUDA GPUs in exclusive mode [\#2471](https://github.com/kokkos/kokkos/issues/2471) - atomicMax equivalent \(and other atomics\) [\#2401](https://github.com/kokkos/kokkos/issues/2401) - Fix alignment for Kokkos::complex [\#2255](https://github.com/kokkos/kokkos/issues/2255) - Warnings with Cuda 10.1 [\#2206](https://github.com/kokkos/kokkos/issues/2206) - dual view with Kokkos::ViewAllocateWithoutInitializing [\#2188](https://github.com/kokkos/kokkos/issues/2188) - Check error code from cudaOccupancyMaxActiveBlocksPerMultiprocessor [\#2172](https://github.com/kokkos/kokkos/issues/2172) - Add non-member Kokkos::resize/realloc for DualView [\#2170](https://github.com/kokkos/kokkos/issues/2170) - Construct DualView without initialization [\#2046](https://github.com/kokkos/kokkos/issues/2046) - Expose is\_assignable to determine if one view can be assigned to another [\#1936](https://github.com/kokkos/kokkos/issues/1936) - profiling label [\#1935](https://github.com/kokkos/kokkos/issues/1935) - team\_broadcast of bool failed on CUDA backend [\#1908](https://github.com/kokkos/kokkos/issues/1908) - View static\_extent [\#660](https://github.com/kokkos/kokkos/issues/660) - Misleading Kokkos::Cuda::initialize ERROR message when compiled for wrong GPU architecture [\#1944](https://github.com/kokkos/kokkos/issues/1944) - Cryptic Error When Malloc Fails [\#2164](https://github.com/kokkos/kokkos/issues/2164) - Drop support for intermediate standards in CMake [\#2336](https://github.com/kokkos/kokkos/issues/2336) **Fixed bugs:** - DualView sync\_device with length zero creates cuda errors [\#2946](https://github.com/kokkos/kokkos/issues/2946) - building with nvcc and clang \(or clang based XL\) as host compiler: "Kokkos::atomic\_fetch\_min\(volatile int \*, int\)" has already been defined [\#2903](https://github.com/kokkos/kokkos/issues/2903) - Cuda 9.1,10.1 debug builds failing due to -Werror=unused-parameter [\#2880](https://github.com/kokkos/kokkos/issues/2880) - clang -Werror: Kokkos\_FixedBufferMemoryPool.hpp:140:28: error: unused parameter 'alloc\_size' [\#2869](https://github.com/kokkos/kokkos/issues/2869) - intel/16.0.1, intel/17.0.1 nightly build failures with debugging enabled [\#2867](https://github.com/kokkos/kokkos/issues/2867) - intel/16.0.1 debug build errors [\#2863](https://github.com/kokkos/kokkos/issues/2863) - xl/16.1.1 with cpp14, openmp build, nightly test failures [\#2856](https://github.com/kokkos/kokkos/issues/2856) - Intel nightly test failures: team\_vector [\#2852](https://github.com/kokkos/kokkos/issues/2852) - Kokkos Views with intmax/2\\> for complex\ uses std::ostream, not std::istream [\#2313](https://github.com/kokkos/kokkos/issues/2313) - Macros: Restrict not honored for non-intel compilers [\#1922](https://github.com/kokkos/kokkos/issues/1922) ## [2.9.00](https://github.com/kokkos/kokkos/tree/2.9.00) (2019-06-24) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.8.00...2.9.00) **Implemented enhancements:** - Capability: CUDA Streams [\#1723](https://github.com/kokkos/kokkos/issues/1723) - Capability: CUDA Stream support for parallel\_reduce [\#2061](https://github.com/kokkos/kokkos/issues/2061) - Capability: Feature Request: TeamVectorRange [\#713](https://github.com/kokkos/kokkos/issues/713) - Capability: Adding HPX backend [\#2080](https://github.com/kokkos/kokkos/issues/2080) - Capability: TaskScheduler to have multiple queues [\#565](https://github.com/kokkos/kokkos/issues/565) - Capability: Support for additional reductions in ScatterView [\#1674](https://github.com/kokkos/kokkos/issues/1674) - Capability: Request: deep\_copy within parallel regions [\#689](https://github.com/kokkos/kokkos/issues/689) - Capability: Feature Request: `create\_mirror\_view\_without\_initializing` [\#1765](https://github.com/kokkos/kokkos/issues/1765) - View: Use SFINAE to restrict possible View type conversions [\#2127](https://github.com/kokkos/kokkos/issues/2127) - Deprecation: Deprecate ExecutionSpace::fence\(\) as static function and make it non-static [\#2140](https://github.com/kokkos/kokkos/issues/2140) - Deprecation: Deprecate LayoutTileLeft [\#2122](https://github.com/kokkos/kokkos/issues/2122) - Macros: KOKKOS\_RESTRICT defined for non-Intel compilers [\#2038](https://github.com/kokkos/kokkos/issues/2038) **Fixed bugs:** - Cuda: TeamThreadRange loop count on device is passed by reference to host static constexpr [\#1733](https://github.com/kokkos/kokkos/issues/1733) - Cuda: Build error with relocatable device code with CUDA 10.1 GCC 7.3 [\#2134](https://github.com/kokkos/kokkos/issues/2134) - Cuda: cudaFuncSetCacheConfig is setting CachePreferShared too often [\#2066](https://github.com/kokkos/kokkos/issues/2066) - Cuda: TeamPolicy doesn't throw then created with non-viable vector length and also doesn't backscale to viable one [\#2020](https://github.com/kokkos/kokkos/issues/2020) - Cuda: cudaMemcpy error for large league sizes on V100 [\#1991](https://github.com/kokkos/kokkos/issues/1991) - Cuda: illegal warp sync in parallel\_reduce by functor on Turing 75 [\#1958](https://github.com/kokkos/kokkos/issues/1958) - TeamThreadRange: Inconsistent results from TeamThreadRange reduction [\#1905](https://github.com/kokkos/kokkos/issues/1905) - Atomics: atomic\_fetch\_oper & atomic\_oper\_fetch don't build for complex\ [\#1964](https://github.com/kokkos/kokkos/issues/1964) - Views: Kokkos randomread Views leak memory [\#2155](https://github.com/kokkos/kokkos/issues/2155) - ScatterView: LayoutLeft overload currently non-functional [\#2165](https://github.com/kokkos/kokkos/issues/2165) - KNL: With intel 17.2.174 illegal instruction in random number test [\#2078](https://github.com/kokkos/kokkos/issues/2078) - Bitset: Enable copy constructor on device [\#2094](https://github.com/kokkos/kokkos/issues/2094) - Examples: do not compile due to template deduction error \(multi\_fem\) [\#1928](https://github.com/kokkos/kokkos/issues/1928) ## [2.8.00](https://github.com/kokkos/kokkos/tree/2.8.00) (2019-02-05) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.7.24...2.8.00) **Implemented enhancements:** - Capability, Tests: C++14 support and testing [\#1914](https://github.com/kokkos/kokkos/issues/1914) - Capability: Add environment variables for all command line arguments [\#1798](https://github.com/kokkos/kokkos/issues/1798) - Capability: --kokkos-ndevices not working for Slurm [\#1920](https://github.com/kokkos/kokkos/issues/1920) - View: Undefined behavior when deep copying from and to an empty unmanaged view [\#1967](https://github.com/kokkos/kokkos/issues/1967) - BuildSystem: nvcc\_wrapper should stop immediately if nvcc is not in PATH [\#1861](https://github.com/kokkos/kokkos/issues/1861) **Fixed bugs:** - Cuda: Fix Volta Issues 1 Non-deterministic behavior on Volta, runs fine on Pascal [\#1949](https://github.com/kokkos/kokkos/issues/1949) - Cuda: Fix Volta Issues 2 CUDA Team Scan gives wrong values on Volta with -G compile flag [\#1942](https://github.com/kokkos/kokkos/issues/1942) - Cuda: illegal warp sync in parallel\_reduce by functor on Turing 75 [\#1958](https://github.com/kokkos/kokkos/issues/1958) - Threads: Pthreads backend does not handle RangePolicy with offset correctly [\#1976](https://github.com/kokkos/kokkos/issues/1976) - Atomics: atomic\_fetch\_oper has no case for Kokkos::complex\ or other 16-byte types [\#1951](https://github.com/kokkos/kokkos/issues/1951) - MDRangePolicy: Fix zero-length range [\#1948](https://github.com/kokkos/kokkos/issues/1948) - TeamThreadRange: TeamThreadRange MaxLoc reduce doesnt compile [\#1909](https://github.com/kokkos/kokkos/issues/1909) ## [2.7.24](https://github.com/kokkos/kokkos/tree/2.7.24) (2018-11-04) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.7.00...2.7.24) **Implemented enhancements:** - DualView: Add non-templated functions for sync, need\_sync, view, modify [\#1858](https://github.com/kokkos/kokkos/issues/1858) - DualView: Avoid needlessly allocates and initializes modify\_host and modify\_device flag views [\#1831](https://github.com/kokkos/kokkos/issues/1831) - DualView: Incorrect deduction of "not device type" [\#1659](https://github.com/kokkos/kokkos/issues/1659) - BuildSystem: Add KOKKOS\_ENABLE\_CXX14 and KOKKOS\_ENABLE\_CXX17 [\#1602](https://github.com/kokkos/kokkos/issues/1602) - BuildSystem: Installed kokkos\_generated\_settings.cmake contains build directories instead of install directories [\#1838](https://github.com/kokkos/kokkos/issues/1838) - BuildSystem: KOKKOS\_ARCH: add ticks to printout of improper arch setting [\#1649](https://github.com/kokkos/kokkos/issues/1649) - BuildSystem: Make core/src/Makefile for Cuda use needed nvcc\_wrapper [\#1296](https://github.com/kokkos/kokkos/issues/1296) - Build: Support PGI as host compiler for NVCC [\#1828](https://github.com/kokkos/kokkos/issues/1828) - Build: Many Warnings Fixed e.g.[\#1786](https://github.com/kokkos/kokkos/issues/1786) - Capability: OffsetView with non-zero begin index [\#567](https://github.com/kokkos/kokkos/issues/567) - Capability: Reductions into device side view [\#1788](https://github.com/kokkos/kokkos/issues/1788) - Capability: Add max\_size to Kokkos::Array [\#1760](https://github.com/kokkos/kokkos/issues/1760) - Capability: View Assignment: LayoutStride -\> LayoutLeft and LayoutStride -\> LayoutRight [\#1594](https://github.com/kokkos/kokkos/issues/1594) - Capability: Atomic function allow implicit conversion of update argument [\#1571](https://github.com/kokkos/kokkos/issues/1571) - Capability: Add team\_size\_max with tagged functors [\#663](https://github.com/kokkos/kokkos/issues/663) - Capability: Fix allignment of views from Kokkos\_ScratchSpace should use different alignment [\#1700](https://github.com/kokkos/kokkos/issues/1700) - Capabilitiy: create\_mirror\_view\_and\_copy for DynRankView [\#1651](https://github.com/kokkos/kokkos/issues/1651) - Capability: DeepCopy HBWSpace / HostSpace [\#548](https://github.com/kokkos/kokkos/issues/548) - ROCm: support team vector scan [\#1645](https://github.com/kokkos/kokkos/issues/1645) - ROCm: Merge from rocm-hackathon2 [\#1636](https://github.com/kokkos/kokkos/issues/1636) - ROCm: Add ParallelScanWithTotal [\#1611](https://github.com/kokkos/kokkos/issues/1611) - ROCm: Implement MDRange in ROCm [\#1314](https://github.com/kokkos/kokkos/issues/1314) - ROCm: Implement Reducers for Nested Parallelism Levels [\#963](https://github.com/kokkos/kokkos/issues/963) - ROCm: Add asynchronous deep copy [\#959](https://github.com/kokkos/kokkos/issues/959) - Tests: Memory pool test seems to allocate 8GB [\#1830](https://github.com/kokkos/kokkos/issues/1830) - Tests: Add unit\_test for team\_broadcast [\#734](https://github.com/kokkos/kokkos/issues/734) **Fixed bugs:** - BuildSystem: Makefile.kokkos gets gcc-toolchain wrong if gcc is cached [\#1841](https://github.com/kokkos/kokkos/issues/1841) - BuildSystem: kokkos\_generated\_settings.cmake placement is inconsistent [\#1771](https://github.com/kokkos/kokkos/issues/1771) - BuildSystem: Invalid escape sequence \. in kokkos\_functions.cmake [\#1661](https://github.com/kokkos/kokkos/issues/1661) - BuildSystem: Problem in Kokkos generated cmake file [\#1770](https://github.com/kokkos/kokkos/issues/1770) - BuildSystem: invalid file names on windows [\#1671](https://github.com/kokkos/kokkos/issues/1671) - Tests: reducers min/max\_loc test fails randomly due to multiple min values and thus multiple valid locations [\#1681](https://github.com/kokkos/kokkos/issues/1681) - Tests: cuda.scatterview unit test causes "Bus error" when force\_uvm and enable\_lambda are enabled [\#1852](https://github.com/kokkos/kokkos/issues/1852) - Tests: cuda.cxx11 unit test fails when force\_uvm and enable\_lambda are enabled [\#1850](https://github.com/kokkos/kokkos/issues/1850) - Tests: threads.reduce\_device\_view\_range\_policy failing with Cuda/8.0.44 and RDC [\#1836](https://github.com/kokkos/kokkos/issues/1836) - Build: compile error when compiling Kokkos with hwloc 2.0.1 \(on OSX 10.12.6, with g++ 7.2.0\) [\#1506](https://github.com/kokkos/kokkos/issues/1506) - Build: dual\_view.view broken with UVM [\#1834](https://github.com/kokkos/kokkos/issues/1834) - Build: White cuda/9.2 + gcc/7.2 warnings triggering errors [\#1833](https://github.com/kokkos/kokkos/issues/1833) - Build: warning: enum constant in boolean context [\#1813](https://github.com/kokkos/kokkos/issues/1813) - Capability: Fix overly conservative max\_team\_size thingy [\#1808](https://github.com/kokkos/kokkos/issues/1808) - DynRankView: Ctors taking ViewAllocateWithoutInitializing broken [\#1783](https://github.com/kokkos/kokkos/issues/1783) - Cuda: Apollo cuda.team\_broadcast test fail with clang-6.0 [\#1762](https://github.com/kokkos/kokkos/issues/1762) - Cuda: Clang spurious test failure in impl\_view\_accessible [\#1753](https://github.com/kokkos/kokkos/issues/1753) - Cuda: Kokkos::complex\ atomic deadlocks with Clang 6 Cuda build with -O0 [\#1752](https://github.com/kokkos/kokkos/issues/1752) - Cuda: LayoutStride Test fails for UVM as default memory space [\#1688](https://github.com/kokkos/kokkos/issues/1688) - Cuda: Scan wrong values on Volta [\#1676](https://github.com/kokkos/kokkos/issues/1676) - Cuda: Kokkos::deep\_copy error with CudaUVM and Kokkos::Serial spaces [\#1652](https://github.com/kokkos/kokkos/issues/1652) - Cuda: cudaErrorInvalidConfiguration with debug build [\#1647](https://github.com/kokkos/kokkos/issues/1647) - Cuda: parallel\_for with TeamPolicy::team\_size\_recommended with launch bounds not working -- reported by Daniel Holladay [\#1283](https://github.com/kokkos/kokkos/issues/1283) - Cuda: Using KOKKOS\_CLASS\_LAMBDA in a class with Kokkos::Random\_XorShift64\_Pool member data [\#1696](https://github.com/kokkos/kokkos/issues/1696) - Long Build Times on Darwin [\#1721](https://github.com/kokkos/kokkos/issues/1721) - Capability: Typo in Kokkos\_Sort.hpp - BinOp3D - wrong comparison [\#1720](https://github.com/kokkos/kokkos/issues/1720) - Buffer overflow in SharedAllocationRecord in Kokkos\_HostSpace.cpp [\#1673](https://github.com/kokkos/kokkos/issues/1673) - Serial unit test failure [\#1632](https://github.com/kokkos/kokkos/issues/1632) ## [2.7.00](https://github.com/kokkos/kokkos/tree/2.7.00) (2018-05-24) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.6.00...2.7.00) **Part of the Kokkos C++ Performance Portability Programming EcoSystem 2.7** **Implemented enhancements:** - Deprecate team\_size auto adjusting to maximal value possible [\#1618](https://github.com/kokkos/kokkos/issues/1618) - DynamicView - remove restrictions to std::is\_trivial types and value\_type is power of two [\#1586](https://github.com/kokkos/kokkos/issues/1586) - Kokkos::StaticCrsGraph does not propagate memory traits \(e.g., Unmanaged\) [\#1581](https://github.com/kokkos/kokkos/issues/1581) - Adding ETI for DeepCopy / ViewFill etc. [\#1578](https://github.com/kokkos/kokkos/issues/1578) - Deprecate all the left over KOKKOS\_HAVE\_ Macros and Kokkos\_OldMacros.hpp [\#1572](https://github.com/kokkos/kokkos/issues/1572) - Error if Kokkos\_ARCH set in CMake [\#1555](https://github.com/kokkos/kokkos/issues/1555) - Deprecate ExecSpace::initialize / ExecSpace::finalize [\#1532](https://github.com/kokkos/kokkos/issues/1532) - New API for TeamPolicy property setting [\#1531](https://github.com/kokkos/kokkos/issues/1531) - clang 6.0 + cuda debug out-of-memory test failure [\#1521](https://github.com/kokkos/kokkos/issues/1521) - Cuda UniqueToken interface not consistent with other backends [\#1505](https://github.com/kokkos/kokkos/issues/1505) - Move Reducers out of Experimental namespace [\#1494](https://github.com/kokkos/kokkos/issues/1494) - Provide scope guard for initialize/finalize [\#1479](https://github.com/kokkos/kokkos/issues/1479) - Check Kokkos::is\_initialized in SharedAllocationRecord dtor [\#1465](https://github.com/kokkos/kokkos/issues/1465) - Remove static list of allocations [\#1464](https://github.com/kokkos/kokkos/issues/1464) - Makefiles: Support single compile/link line use case [\#1402](https://github.com/kokkos/kokkos/issues/1402) - ThreadVectorRange with a range [\#1400](https://github.com/kokkos/kokkos/issues/1400) - Exclusive scan + last value API [\#1358](https://github.com/kokkos/kokkos/issues/1358) - Install kokkos\_generated\_settings.cmake [\#1348](https://github.com/kokkos/kokkos/issues/1348) - Kokkos arrays \(not views!\) don't do bounds checking in debug mode [\#1342](https://github.com/kokkos/kokkos/issues/1342) - Expose round-robin GPU assignment outside of initialize\(int, char\*\*\) [\#1318](https://github.com/kokkos/kokkos/issues/1318) - DynamicView misses use\_count and label function [\#1298](https://github.com/kokkos/kokkos/issues/1298) - View constructor should check arguments [\#1286](https://github.com/kokkos/kokkos/issues/1286) - False Positive on Oversubscription Warning [\#1207](https://github.com/kokkos/kokkos/issues/1207) - Allow \(require\) execution space for 1st arg of VerifyExecutionCanAccessMemorySpace [\#1192](https://github.com/kokkos/kokkos/issues/1192) - ROCm: Add ROCmHostPinnedSpace [\#958](https://github.com/kokkos/kokkos/issues/958) - power of two functions [\#656](https://github.com/kokkos/kokkos/issues/656) - CUDA 8 has 64bit \_\_shfl [\#361](https://github.com/kokkos/kokkos/issues/361) - Add TriBITS/CMake configure information about node types [\#243](https://github.com/kokkos/kokkos/issues/243) **Fixed bugs:** - CUDA atomic\_fetch\_sub for doubles is hitting CAS instead of intrinsic [\#1624](https://github.com/kokkos/kokkos/issues/1624) - Bug: use of ballot on Volta [\#1612](https://github.com/kokkos/kokkos/issues/1612) - Kokkos::deep\_copy memory access failures [\#1583](https://github.com/kokkos/kokkos/issues/1583) - g++ -std option doubly set for cmake project [\#1548](https://github.com/kokkos/kokkos/issues/1548) - ViewFill for 1D Views of larger 32bit entries fails [\#1541](https://github.com/kokkos/kokkos/issues/1541) - CUDA Volta another warpsync bug [\#1520](https://github.com/kokkos/kokkos/issues/1520) - triple\_nested\_parallelism fails with KOKKOS\_DEBUG and CUDA [\#1513](https://github.com/kokkos/kokkos/issues/1513) - Jenkins errors in Kokkos\_SharedAlloc.cpp with debug build [\#1511](https://github.com/kokkos/kokkos/issues/1511) - Kokkos::Sort out-of-bounds with empty bins [\#1504](https://github.com/kokkos/kokkos/issues/1504) - Get rid of deprecated functions inside Kokkos [\#1484](https://github.com/kokkos/kokkos/issues/1484) - get\_work\_partition casts int64\_t to int, causing a seg fault [\#1481](https://github.com/kokkos/kokkos/issues/1481) - NVCC bug with \_\_device\_\_ on defaulted function [\#1470](https://github.com/kokkos/kokkos/issues/1470) - CMake example broken with CUDA backend [\#1468](https://github.com/kokkos/kokkos/issues/1468) ## [2.6.00](https://github.com/kokkos/kokkos/tree/2.6.00) (2018-03-07) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.5.00...2.6.00) **Part of the Kokkos C++ Performance Portability Programming EcoSystem 2.6** **Implemented enhancements:** - Support NVIDIA Volta microarchitecture [\#1466](https://github.com/kokkos/kokkos/issues/1466) - Kokkos - Define empty functions when profiling disabled [\#1424](https://github.com/kokkos/kokkos/issues/1424) - Don't use \_\_constant\_\_ cache for lock arrays, enable once per run update instead of once per call [\#1385](https://github.com/kokkos/kokkos/issues/1385) - task dag enhancement. [\#1354](https://github.com/kokkos/kokkos/issues/1354) - Cuda task team collectives and stack size [\#1353](https://github.com/kokkos/kokkos/issues/1353) - Replace View operator acceptance of more than rank integers with 'access' function [\#1333](https://github.com/kokkos/kokkos/issues/1333) - Interoperability: Do not shut down backend execution space runtimes upon calling finalize. [\#1305](https://github.com/kokkos/kokkos/issues/1305) - shmem\_size for LayoutStride [\#1291](https://github.com/kokkos/kokkos/issues/1291) - Kokkos::resize performs poorly on 1D Views [\#1270](https://github.com/kokkos/kokkos/issues/1270) - stride\(\) is inconsistent with dimension\(\), extent\(\), etc. [\#1214](https://github.com/kokkos/kokkos/issues/1214) - Kokkos::sort defaults to std::sort on host [\#1208](https://github.com/kokkos/kokkos/issues/1208) - DynamicView with host size grow [\#1206](https://github.com/kokkos/kokkos/issues/1206) - Unmanaged View with Anonymous Memory Space [\#1175](https://github.com/kokkos/kokkos/issues/1175) - Sort subset of Kokkos::DynamicView [\#1160](https://github.com/kokkos/kokkos/issues/1160) - MDRange policy doesn't support lambda reductions [\#1054](https://github.com/kokkos/kokkos/issues/1054) - Add ability to set hook on Kokkos::finalize [\#714](https://github.com/kokkos/kokkos/issues/714) - Atomics with Serial Backend - Default should be Disable? [\#549](https://github.com/kokkos/kokkos/issues/549) - KOKKOS\_ENABLE\_DEPRECATED\_CODE [\#1359](https://github.com/kokkos/kokkos/issues/1359) **Fixed bugs:** - cuda\_internal\_maximum\_warp\_count returns 8, but I believe it should return 16 for P100 [\#1269](https://github.com/kokkos/kokkos/issues/1269) - Cuda: level 1 scratch memory bug \(reported by Stan Moore\) [\#1434](https://github.com/kokkos/kokkos/issues/1434) - MDRangePolicy Reduction requires value\_type typedef in Functor [\#1379](https://github.com/kokkos/kokkos/issues/1379) - Kokkos DeepCopy between empty views fails [\#1369](https://github.com/kokkos/kokkos/issues/1369) - Several issues with new CMake build infrastructure \(reported by Eric Phipps\) [\#1365](https://github.com/kokkos/kokkos/issues/1365) - deep\_copy between rank-1 host/device views of differing layouts without UVM no longer works \(reported by Eric Phipps\) [\#1363](https://github.com/kokkos/kokkos/issues/1363) - Profiling can't be disabled in CMake, and a parallel\_for is missing for tasks \(reported by Kyungjoo Kim\) [\#1349](https://github.com/kokkos/kokkos/issues/1349) - get\_work\_partition int overflow \(reported by berryj5\) [\#1327](https://github.com/kokkos/kokkos/issues/1327) - Kokkos::deep\_copy must fence even if the two views are the same [\#1303](https://github.com/kokkos/kokkos/issues/1303) - CudaUVMSpace::allocate/deallocate must fence [\#1302](https://github.com/kokkos/kokkos/issues/1302) - ViewResize on CUDA fails in Debug because of too many resources requested [\#1299](https://github.com/kokkos/kokkos/issues/1299) - Cuda 9 and intrepid2 calls from Panzer. [\#1183](https://github.com/kokkos/kokkos/issues/1183) - Slowdown due to tracking\_enabled\(\) in 2.04.00 \(found by Albany app\) [\#1016](https://github.com/kokkos/kokkos/issues/1016) - Bounds checking fails with zero-span Views \(reported by Stan Moore\) [\#1411](https://github.com/kokkos/kokkos/issues/1411) ## [2.5.00](https://github.com/kokkos/kokkos/tree/2.5.00) (2017-12-15) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.04.11...2.5.00) **Part of the Kokkos C++ Performance Portability Programming EcoSystem 2.5** **Implemented enhancements:** - Provide Makefile.kokkos logic for CMake and TriBITS [\#878](https://github.com/kokkos/kokkos/issues/878) - Add Scatter View [\#825](https://github.com/kokkos/kokkos/issues/825) - Drop gcc 4.7 and intel 14 from supported compiler list [\#603](https://github.com/kokkos/kokkos/issues/603) - Enable construction of unmanaged view using common\_view\_alloc\_prop [\#1170](https://github.com/kokkos/kokkos/issues/1170) - Unused Function Warning with XL [\#1267](https://github.com/kokkos/kokkos/issues/1267) - Add memory pool parameter check [\#1218](https://github.com/kokkos/kokkos/issues/1218) - CUDA9: Fix warning for unsupported long double [\#1189](https://github.com/kokkos/kokkos/issues/1189) - CUDA9: fix warning on defaulted function marking [\#1188](https://github.com/kokkos/kokkos/issues/1188) - CUDA9: fix warnings for deprecated warp level functions [\#1187](https://github.com/kokkos/kokkos/issues/1187) - Add CUDA 9.0 nightly testing [\#1174](https://github.com/kokkos/kokkos/issues/1174) - {OMPI,MPICH}\_CXX hack breaks nvcc\_wrapper use case [\#1166](https://github.com/kokkos/kokkos/issues/1166) - KOKKOS\_HAVE\_CUDA\_LAMBDA became KOKKOS\_CUDA\_USE\_LAMBDA [\#1274](https://github.com/kokkos/kokkos/issues/1274) **Fixed bugs:** - MinMax Reducer with tagged operator doesn't compile [\#1251](https://github.com/kokkos/kokkos/issues/1251) - Reducers for Tagged operators give wrong answer [\#1250](https://github.com/kokkos/kokkos/issues/1250) - Kokkos not Compatible with Big Endian Machines? [\#1235](https://github.com/kokkos/kokkos/issues/1235) - Parallel Scan hangs forever on BG/Q [\#1234](https://github.com/kokkos/kokkos/issues/1234) - Threads backend doesn't compile with Clang on OS X [\#1232](https://github.com/kokkos/kokkos/issues/1232) - $\(shell date\) needs quote [\#1264](https://github.com/kokkos/kokkos/issues/1264) - Unqualified parallel\_for call conflicts with user-defined parallel\_for [\#1219](https://github.com/kokkos/kokkos/issues/1219) - KokkosAlgorithms: CMake issue in unit tests [\#1212](https://github.com/kokkos/kokkos/issues/1212) - Intel 18 Error: "simd pragma has been deprecated" [\#1210](https://github.com/kokkos/kokkos/issues/1210) - Memory leak in Kokkos::initialize [\#1194](https://github.com/kokkos/kokkos/issues/1194) - CUDA9: compiler error with static assert template arguments [\#1190](https://github.com/kokkos/kokkos/issues/1190) - Kokkos::Serial::is\_initialized returns always true [\#1184](https://github.com/kokkos/kokkos/issues/1184) - Triple nested parallelism still fails on bowman [\#1093](https://github.com/kokkos/kokkos/issues/1093) - OpenMP openmp.range on Develop Runs Forever on POWER7+ with RHEL7 and GCC4.8.5 [\#995](https://github.com/kokkos/kokkos/issues/995) - Rendezvous performance at global scope [\#985](https://github.com/kokkos/kokkos/issues/985) ## [2.04.11](https://github.com/kokkos/kokkos/tree/2.04.11) (2017-10-28) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.04.04...2.04.11) **Implemented enhancements:** - Add Subview pattern. [\#648](https://github.com/kokkos/kokkos/issues/648) - Add Kokkos "global" is\_initialized [\#1060](https://github.com/kokkos/kokkos/issues/1060) - Add create\_mirror\_view\_and\_copy [\#1161](https://github.com/kokkos/kokkos/issues/1161) - Add KokkosConcepts SpaceAccessibility function [\#1092](https://github.com/kokkos/kokkos/issues/1092) - Option to Disable Initialize Warnings [\#1142](https://github.com/kokkos/kokkos/issues/1142) - Mature task-DAG capability [\#320](https://github.com/kokkos/kokkos/issues/320) - Promote Work DAG from experimental [\#1126](https://github.com/kokkos/kokkos/issues/1126) - Implement new WorkGraph push/pop [\#1108](https://github.com/kokkos/kokkos/issues/1108) - Kokkos\_ENABLE\_Cuda\_Lambda should default ON [\#1101](https://github.com/kokkos/kokkos/issues/1101) - Add multidimensional parallel for example and improve unit test [\#1064](https://github.com/kokkos/kokkos/issues/1064) - Fix ROCm: Performance tests not building [\#1038](https://github.com/kokkos/kokkos/issues/1038) - Make KOKKOS\_ALIGN\_SIZE a configure-time option [\#1004](https://github.com/kokkos/kokkos/issues/1004) - Make alignment consistent [\#809](https://github.com/kokkos/kokkos/issues/809) - Improve subview construction on Cuda backend [\#615](https://github.com/kokkos/kokkos/issues/615) **Fixed bugs:** - Kokkos::vector fixes for application [\#1134](https://github.com/kokkos/kokkos/issues/1134) - DynamicView non-power of two value\_type [\#1177](https://github.com/kokkos/kokkos/issues/1177) - Memory pool bug [\#1154](https://github.com/kokkos/kokkos/issues/1154) - Cuda launch bounds performance regression bug [\#1140](https://github.com/kokkos/kokkos/issues/1140) - Significant performance regression in LAMMPS after updating Kokkos [\#1139](https://github.com/kokkos/kokkos/issues/1139) - CUDA compile error [\#1128](https://github.com/kokkos/kokkos/issues/1128) - MDRangePolicy neg idx test failure in debug mode [\#1113](https://github.com/kokkos/kokkos/issues/1113) - subview construction on Cuda backend [\#615](https://github.com/kokkos/kokkos/issues/615) ## [2.04.04](https://github.com/kokkos/kokkos/tree/2.04.04) (2017-09-11) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.04.00...2.04.04) **Implemented enhancements:** - OpenMP partition: set number of threads on nested level [\#1082](https://github.com/kokkos/kokkos/issues/1082) - Add StaticCrsGraph row\(\) method [\#1071](https://github.com/kokkos/kokkos/issues/1071) - Enhance Kokkos complex operator overloading [\#1052](https://github.com/kokkos/kokkos/issues/1052) - Tell Trilinos packages about host+device lambda [\#1019](https://github.com/kokkos/kokkos/issues/1019) - Function markup for defaulted class members [\#952](https://github.com/kokkos/kokkos/issues/952) - Add deterministic random number generator [\#857](https://github.com/kokkos/kokkos/issues/857) **Fixed bugs:** - Fix reduction\_identity\::max for floating point numbers [\#1048](https://github.com/kokkos/kokkos/issues/1048) - Fix MD iteration policy ignores lower bound on GPUs [\#1041](https://github.com/kokkos/kokkos/issues/1041) - (Experimental) HBWSpace Linking issues in KokkosKernels [\#1094](https://github.com/kokkos/kokkos/issues/1094) - (Experimental) ROCm: algorithms/unit\_tests test\_sort failing with segfault [\#1070](https://github.com/kokkos/kokkos/issues/1070) ## [2.04.00](https://github.com/kokkos/kokkos/tree/2.04.00) (2017-08-16) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.03.13...2.04.00) **Implemented enhancements:** - Added ROCm backend to support AMD GPUs - Kokkos::complex\ behaves slightly differently from std::complex\ [\#1011](https://github.com/kokkos/kokkos/issues/1011) - Kokkos::Experimental::Crs constructor arguments were in the wrong order [\#992](https://github.com/kokkos/kokkos/issues/992) - Work graph construction ease-of-use (one lambda for count and fill) [\#991](https://github.com/kokkos/kokkos/issues/991) - when\_all returns pointer of futures (improved interface) [\#990](https://github.com/kokkos/kokkos/issues/990) - Allow assignment of LayoutLeft to LayoutRight or vice versa for rank-0 Views [\#594](https://github.com/kokkos/kokkos/issues/594) - Changed the meaning of Kokkos\_ENABLE\_CXX11\_DISPATCH\_LAMBDA [\#1035](https://github.com/kokkos/kokkos/issues/1035) **Fixed bugs:** - memory pool default constructor does not properly set member variables. [\#1007](https://github.com/kokkos/kokkos/issues/1007) ## [2.03.13](https://github.com/kokkos/kokkos/tree/2.03.13) (2017-07-27) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.03.05...2.03.13) **Implemented enhancements:** - Disallow enabling both OpenMP and Threads in the same executable [\#406](https://github.com/kokkos/kokkos/issues/406) - Make Kokkos::OpenMP respect OMP environment even if hwloc is available [\#630](https://github.com/kokkos/kokkos/issues/630) - Improve Atomics Performance on KNL/Broadwell where PREFETCHW/RFO is Available [\#898](https://github.com/kokkos/kokkos/issues/898) - Kokkos::resize should test whether dimensions have changed before resizing [\#904](https://github.com/kokkos/kokkos/issues/904) - Develop performance-regression/acceptance tests [\#737](https://github.com/kokkos/kokkos/issues/737) - Make the deep\_copy Profiling hook a start/end system [\#890](https://github.com/kokkos/kokkos/issues/890) - Add deep\_copy Profiling hook [\#843](https://github.com/kokkos/kokkos/issues/843) - Append tag name to parallel construct name for Profiling [\#842](https://github.com/kokkos/kokkos/issues/842) - Add view label to `View bounds error` message for CUDA backend [\#870](https://github.com/kokkos/kokkos/issues/870) - Disable printing the loaded profiling library [\#824](https://github.com/kokkos/kokkos/issues/824) - "Declared but never referenced" warnings [\#853](https://github.com/kokkos/kokkos/issues/853) - Warnings about lock\_address\_cuda\_space [\#852](https://github.com/kokkos/kokkos/issues/852) - WorkGraph execution policy [\#771](https://github.com/kokkos/kokkos/issues/771) - Simplify makefiles by guarding compilation with appropriate KOKKOS\_ENABLE\_\#\#\# macros [\#716](https://github.com/kokkos/kokkos/issues/716) - Cmake build: wrong include install directory [\#668](https://github.com/kokkos/kokkos/issues/668) - Derived View type and allocation [\#566](https://github.com/kokkos/kokkos/issues/566) - Fix Compiler warnings when compiling core unit tests for Cuda [\#214](https://github.com/kokkos/kokkos/issues/214) **Fixed bugs:** - Out-of-bounds read in Kokkos\_Layout.hpp [\#975](https://github.com/kokkos/kokkos/issues/975) - CudaClang: Fix failing test with Clang 4.0 [\#941](https://github.com/kokkos/kokkos/issues/941) - Respawn when memory pool allocation fails \(not available memory\) [\#940](https://github.com/kokkos/kokkos/issues/940) - Memory pool aborts on zero allocation request, returns NULL for \< minimum [\#939](https://github.com/kokkos/kokkos/issues/939) - Error with TaskScheduler query of underlying memory pool [\#917](https://github.com/kokkos/kokkos/issues/917) - Profiling::\*Callee static variables declared in header [\#863](https://github.com/kokkos/kokkos/issues/863) - calling \*Space::name\(\) causes compile error [\#862](https://github.com/kokkos/kokkos/issues/862) - bug in Profiling::deallocateData [\#860](https://github.com/kokkos/kokkos/issues/860) - task\_depend test failing, CUDA 8.0 + Pascal + RDC [\#829](https://github.com/kokkos/kokkos/issues/829) - \[develop branch\] Standalone cmake issues [\#826](https://github.com/kokkos/kokkos/issues/826) - Kokkos CUDA failes to compile with OMPI\_CXX and MPICH\_CXX wrappers [\#776](https://github.com/kokkos/kokkos/issues/776) - Task Team reduction on Pascal [\#767](https://github.com/kokkos/kokkos/issues/767) - CUDA stack overflow with TaskDAG test [\#758](https://github.com/kokkos/kokkos/issues/758) - TeamVector test on Cuda [\#670](https://github.com/kokkos/kokkos/issues/670) - Clang 4.0 Cuda Build broken again [\#560](https://github.com/kokkos/kokkos/issues/560) ## [2.03.05](https://github.com/kokkos/kokkos/tree/2.03.05) (2017-05-27) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.03.00...2.03.05) **Implemented enhancements:** - Harmonize Custom Reductions over nesting levels [\#802](https://github.com/kokkos/kokkos/issues/802) - Prevent users directly including KokkosCore\_config.h [\#815](https://github.com/kokkos/kokkos/issues/815) - DualView aborts on concurrent host/device modify \(in debug mode\) [\#814](https://github.com/kokkos/kokkos/issues/814) - Abort when running on a NVIDIA CC5.0 or higher architecture with code compiled for CC \< 5.0 [\#813](https://github.com/kokkos/kokkos/issues/813) - Add "name" function to ExecSpaces [\#806](https://github.com/kokkos/kokkos/issues/806) - Allow null Future in task spawn dependences [\#795](https://github.com/kokkos/kokkos/issues/795) - Add Unit Tests for Kokkos::complex [\#785](https://github.com/kokkos/kokkos/issues/785) - Add pow function for Kokkos::complex [\#784](https://github.com/kokkos/kokkos/issues/784) - Square root of a complex [\#729](https://github.com/kokkos/kokkos/issues/729) - Command line processing of --threads argument prevents users from having any commandline arguments starting with --threads [\#760](https://github.com/kokkos/kokkos/issues/760) - Protected deprecated API with appropriate macro [\#756](https://github.com/kokkos/kokkos/issues/756) - Allow task scheduler memory pool to be used by tasks [\#747](https://github.com/kokkos/kokkos/issues/747) - View bounds checking on host-side performance: constructing a std::string [\#723](https://github.com/kokkos/kokkos/issues/723) - Add check for AppleClang as compiler distinct from check for Clang. [\#705](https://github.com/kokkos/kokkos/issues/705) - Uninclude source files for specific configurations to prevent link warning. [\#701](https://github.com/kokkos/kokkos/issues/701) - Add --small option to snapshot script [\#697](https://github.com/kokkos/kokkos/issues/697) - CMake Standalone Support [\#674](https://github.com/kokkos/kokkos/issues/674) - CMake build unit test and install [\#808](https://github.com/kokkos/kokkos/issues/808) - CMake: Fix having kokkos as a subdirectory in a pure cmake project [\#629](https://github.com/kokkos/kokkos/issues/629) - Tribits macro assumes build directory is in top level source directory [\#654](https://github.com/kokkos/kokkos/issues/654) - Use bin/nvcc\_wrapper, not config/nvcc\_wrapper [\#562](https://github.com/kokkos/kokkos/issues/562) - Allow MemoryPool::allocate\(\) to be called from multiple threads per warp. [\#487](https://github.com/kokkos/kokkos/issues/487) - Allow MemoryPool::allocate\\(\\) to be called from multiple threads per warp. [\#487](https://github.com/kokkos/kokkos/issues/487) - Move OpenMP 4.5 OpenMPTarget backend into Develop [\#456](https://github.com/kokkos/kokkos/issues/456) - Testing on ARM testbed [\#288](https://github.com/kokkos/kokkos/issues/288) **Fixed bugs:** - Fix label in OpenMP parallel\_reduce verify\_initialized [\#834](https://github.com/kokkos/kokkos/issues/834) - TeamScratch Level 1 on Cuda hangs [\#820](https://github.com/kokkos/kokkos/issues/820) - \[bug\] memory pool. [\#786](https://github.com/kokkos/kokkos/issues/786) - Some Reduction Tests fail on Intel 18 with aggressive vectorization on [\#774](https://github.com/kokkos/kokkos/issues/774) - Error copying dynamic view on copy of memory pool [\#773](https://github.com/kokkos/kokkos/issues/773) - CUDA stack overflow with TaskDAG test [\#758](https://github.com/kokkos/kokkos/issues/758) - ThreadVectorRange Customized Reduction Bug [\#739](https://github.com/kokkos/kokkos/issues/739) - set\_scratch\_size overflows [\#726](https://github.com/kokkos/kokkos/issues/726) - Get wrong results for compiler checks in Makefile on OS X. [\#706](https://github.com/kokkos/kokkos/issues/706) - Fix check if multiple host architectures enabled. [\#702](https://github.com/kokkos/kokkos/issues/702) - Threads Backend Does not Pass on Cray Compilers [\#609](https://github.com/kokkos/kokkos/issues/609) - Rare bug in memory pool where allocation can finish on superblock in empty state [\#452](https://github.com/kokkos/kokkos/issues/452) - LDFLAGS in core/unit\_test/Makefile: potential "undefined reference" to pthread lib [\#148](https://github.com/kokkos/kokkos/issues/148) ## [2.03.00](https://github.com/kokkos/kokkos/tree/2.03.00) (2017-04-25) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.15...2.03.00) **Implemented enhancements:** - UnorderedMap: make it accept Devices or MemorySpaces [\#711](https://github.com/kokkos/kokkos/issues/711) - sort to accept DynamicView and \[begin,end\) indices [\#691](https://github.com/kokkos/kokkos/issues/691) - ENABLE Macros should only be used via \#ifdef or \#if defined [\#675](https://github.com/kokkos/kokkos/issues/675) - Remove impl/Kokkos\_Synchronic\_\* [\#666](https://github.com/kokkos/kokkos/issues/666) - Turning off IVDEP for Intel 14. [\#638](https://github.com/kokkos/kokkos/issues/638) - Using an installed Kokkos in a target application using CMake [\#633](https://github.com/kokkos/kokkos/issues/633) - Create Kokkos Bill of Materials [\#632](https://github.com/kokkos/kokkos/issues/632) - MDRangePolicy and tagged evaluators [\#547](https://github.com/kokkos/kokkos/issues/547) - Add PGI support [\#289](https://github.com/kokkos/kokkos/issues/289) **Fixed bugs:** - Output from PerTeam fails [\#733](https://github.com/kokkos/kokkos/issues/733) - Cuda: architecture flag not added to link line [\#688](https://github.com/kokkos/kokkos/issues/688) - Getting large chunks of memory for a thread team in a universal way [\#664](https://github.com/kokkos/kokkos/issues/664) - Kokkos RNG normal\(\) function hangs for small seed value [\#655](https://github.com/kokkos/kokkos/issues/655) - Kokkos Tests Errors on Shepard/HSW Builds [\#644](https://github.com/kokkos/kokkos/issues/644) ## [2.02.15](https://github.com/kokkos/kokkos/tree/2.02.15) (2017-02-10) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.07...2.02.15) **Implemented enhancements:** - Containers: Adding block partitioning to StaticCrsGraph [\#625](https://github.com/kokkos/kokkos/issues/625) - Kokkos Make System can induce Errors on Cray Volta System [\#610](https://github.com/kokkos/kokkos/issues/610) - OpenMP: error out if KOKKOS\_HAVE\_OPENMP is defined but not \_OPENMP [\#605](https://github.com/kokkos/kokkos/issues/605) - CMake: fix standalone build with tests [\#604](https://github.com/kokkos/kokkos/issues/604) - Change README \(that GitHub shows when opening Kokkos project page\) to tell users how to submit PRs [\#597](https://github.com/kokkos/kokkos/issues/597) - Add correctness testing for all operators of Atomic View [\#420](https://github.com/kokkos/kokkos/issues/420) - Allow assignment of Views with compatible memory spaces [\#290](https://github.com/kokkos/kokkos/issues/290) - Build only one version of Kokkos library for tests [\#213](https://github.com/kokkos/kokkos/issues/213) - Clean out old KOKKOS\_HAVE\_CXX11 macros clauses [\#156](https://github.com/kokkos/kokkos/issues/156) - Harmonize Macro names [\#150](https://github.com/kokkos/kokkos/issues/150) **Fixed bugs:** - Cray and PGI: Kokkos\_Parallel\_Reduce [\#634](https://github.com/kokkos/kokkos/issues/634) - Kokkos Make System can induce Errors on Cray Volta System [\#610](https://github.com/kokkos/kokkos/issues/610) - Normal\(\) function random number generator doesn't give the expected distribution [\#592](https://github.com/kokkos/kokkos/issues/592) ## [2.02.07](https://github.com/kokkos/kokkos/tree/2.02.07) (2016-12-16) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.01...2.02.07) **Implemented enhancements:** - Add CMake option to enable Cuda Lambda support [\#589](https://github.com/kokkos/kokkos/issues/589) - Add CMake option to enable Cuda RDC support [\#588](https://github.com/kokkos/kokkos/issues/588) - Add Initial Intel Sky Lake Xeon-HPC Compiler Support to Kokkos Make System [\#584](https://github.com/kokkos/kokkos/issues/584) - Building Tutorial Examples [\#582](https://github.com/kokkos/kokkos/issues/582) - Internal way for using ThreadVectorRange without TeamHandle [\#574](https://github.com/kokkos/kokkos/issues/574) - Testing: Add testing for uvm and rdc [\#571](https://github.com/kokkos/kokkos/issues/571) - Profiling: Add Memory Tracing and Region Markers [\#557](https://github.com/kokkos/kokkos/issues/557) - nvcc\_wrapper not installed with Kokkos built with CUDA through CMake [\#543](https://github.com/kokkos/kokkos/issues/543) - Improve DynRankView debug check [\#541](https://github.com/kokkos/kokkos/issues/541) - Benchmarks: Add Gather benchmark [\#536](https://github.com/kokkos/kokkos/issues/536) - Testing: add spot\_check option to test\_all\_sandia [\#535](https://github.com/kokkos/kokkos/issues/535) - Deprecate Kokkos::Impl::VerifyExecutionCanAccessMemorySpace [\#527](https://github.com/kokkos/kokkos/issues/527) - Add AtomicAdd support for 64bit float for Pascal [\#522](https://github.com/kokkos/kokkos/issues/522) - Add Restrict and Aligned memory trait [\#517](https://github.com/kokkos/kokkos/issues/517) - Kokkos Tests are Not Run using Compiler Optimization [\#501](https://github.com/kokkos/kokkos/issues/501) - Add support for clang 3.7 w/ openmp backend [\#393](https://github.com/kokkos/kokkos/issues/393) - Provide an error throw class [\#79](https://github.com/kokkos/kokkos/issues/79) **Fixed bugs:** - Cuda UVM Allocation test broken with UVM as default space [\#586](https://github.com/kokkos/kokkos/issues/586) - Bug \(develop branch only\): multiple tests are now failing when forcing uvm usage. [\#570](https://github.com/kokkos/kokkos/issues/570) - Error in generate\_makefile.sh for Kokkos when Compiler is Empty String/Fails [\#568](https://github.com/kokkos/kokkos/issues/568) - XL 13.1.4 incorrect C++11 flag [\#553](https://github.com/kokkos/kokkos/issues/553) - Improve DynRankView debug check [\#541](https://github.com/kokkos/kokkos/issues/541) - Installing Library on MAC broken due to cp -u [\#539](https://github.com/kokkos/kokkos/issues/539) - Intel Nightly Testing with Debug enabled fails [\#534](https://github.com/kokkos/kokkos/issues/534) ## [2.02.01](https://github.com/kokkos/kokkos/tree/2.02.01) (2016-11-01) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.00...2.02.01) **Implemented enhancements:** - Add Changelog generation to our process. [\#506](https://github.com/kokkos/kokkos/issues/506) **Fixed bugs:** - Test scratch\_request fails in Serial with Debug enabled [\#520](https://github.com/kokkos/kokkos/issues/520) - Bug In BoundsCheck for DynRankView [\#516](https://github.com/kokkos/kokkos/issues/516) ## [2.02.00](https://github.com/kokkos/kokkos/tree/2.02.00) (2016-10-30) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.01.10...2.02.00) **Implemented enhancements:** - Add PowerPC assembly for grabbing clock register in memory pool [\#511](https://github.com/kokkos/kokkos/issues/511) - Add GCC 6.x support [\#508](https://github.com/kokkos/kokkos/issues/508) - Test install and build against installed library [\#498](https://github.com/kokkos/kokkos/issues/498) - Makefile.kokkos adds expt-extended-lambda to cuda build with clang [\#490](https://github.com/kokkos/kokkos/issues/490) - Add top-level makefile option to just test kokkos-core unit-test [\#485](https://github.com/kokkos/kokkos/issues/485) - Split and harmonize Object Files of Core UnitTests to increase build parallelism [\#484](https://github.com/kokkos/kokkos/issues/484) - LayoutLeft to LayoutLeft subview for 3D and 4D views [\#473](https://github.com/kokkos/kokkos/issues/473) - Add official Cuda 8.0 support [\#468](https://github.com/kokkos/kokkos/issues/468) - Allow C++1Z Flag for Class Lambda capture [\#465](https://github.com/kokkos/kokkos/issues/465) - Add Clang 4.0+ compilation of Cuda code [\#455](https://github.com/kokkos/kokkos/issues/455) - Possible Issue with Intel 17.0.098 and GCC 6.1.0 in Develop Branch [\#445](https://github.com/kokkos/kokkos/issues/445) - Add name of view to "View bounds error" [\#432](https://github.com/kokkos/kokkos/issues/432) - Move Sort Binning Operators into Kokkos namespace [\#421](https://github.com/kokkos/kokkos/issues/421) - TaskPolicy - generate error when attempt to use uninitialized [\#396](https://github.com/kokkos/kokkos/issues/396) - Import WithoutInitializing and AllowPadding into Kokkos namespace [\#325](https://github.com/kokkos/kokkos/issues/325) - TeamThreadRange requires begin, end to be the same type [\#305](https://github.com/kokkos/kokkos/issues/305) - CudaUVMSpace should track \# allocations, due to CUDA limit on \# UVM allocations [\#300](https://github.com/kokkos/kokkos/issues/300) - Remove old View and its infrastructure [\#259](https://github.com/kokkos/kokkos/issues/259) **Fixed bugs:** - Bug in TestCuda\_Other.cpp: most likely assembly inserted into Device code [\#515](https://github.com/kokkos/kokkos/issues/515) - Cuda Compute Capability check of GPU is outdated [\#509](https://github.com/kokkos/kokkos/issues/509) - multi\_scratch test with hwloc and pthreads seg-faults. [\#504](https://github.com/kokkos/kokkos/issues/504) - generate\_makefile.bash: "make install" is broken [\#503](https://github.com/kokkos/kokkos/issues/503) - make clean in Out of Source Build/Tests Does Not Work Correctly [\#502](https://github.com/kokkos/kokkos/issues/502) - Makefiles for test and examples have issues in Cuda when CXX is not explicitly specified [\#497](https://github.com/kokkos/kokkos/issues/497) - Dispatch lambda test directly inside GTEST macro doesn't work with nvcc [\#491](https://github.com/kokkos/kokkos/issues/491) - UnitTests with HWLOC enabled fail if run with mpirun bound to a single core [\#489](https://github.com/kokkos/kokkos/issues/489) - Failing Reducer Test on Mac with Pthreads [\#479](https://github.com/kokkos/kokkos/issues/479) - make test Dumps Error with Clang Not Found [\#471](https://github.com/kokkos/kokkos/issues/471) - OpenMP TeamPolicy member broadcast not using correct volatile shared variable [\#424](https://github.com/kokkos/kokkos/issues/424) - TaskPolicy - generate error when attempt to use uninitialized [\#396](https://github.com/kokkos/kokkos/issues/396) - New task policy implementation is pulling in old experimental code. [\#372](https://github.com/kokkos/kokkos/issues/372) - MemoryPool unit test hangs on Power8 with GCC 6.1.0 [\#298](https://github.com/kokkos/kokkos/issues/298) ## [2.01.10](https://github.com/kokkos/kokkos/tree/2.01.10) (2016-09-27) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.01.06...2.01.10) **Implemented enhancements:** - Enable Profiling by default in Tribits build [\#438](https://github.com/kokkos/kokkos/issues/438) - parallel\_reduce\(0\), parallel\_scan\(0\) unit tests [\#436](https://github.com/kokkos/kokkos/issues/436) - data\(\)==NULL after realloc with LayoutStride [\#351](https://github.com/kokkos/kokkos/issues/351) - Fix tutorials to track new Kokkos::View [\#323](https://github.com/kokkos/kokkos/issues/323) - Rename team policy set\_scratch\_size. [\#195](https://github.com/kokkos/kokkos/issues/195) **Fixed bugs:** - Possible Issue with Intel 17.0.098 and GCC 6.1.0 in Develop Branch [\#445](https://github.com/kokkos/kokkos/issues/445) - Makefile spits syntax error [\#435](https://github.com/kokkos/kokkos/issues/435) - Kokkos::sort fails for view with all the same values [\#422](https://github.com/kokkos/kokkos/issues/422) - Generic Reducers: can't accept inline constructed reducer [\#404](https://github.com/kokkos/kokkos/issues/404) - data\\(\\)==NULL after realloc with LayoutStride [\#351](https://github.com/kokkos/kokkos/issues/351) - const subview of const view with compile time dimensions on Cuda backend [\#310](https://github.com/kokkos/kokkos/issues/310) - Kokkos \(in Trilinos\) Causes Internal Compiler Error on CUDA 8.0.21-EA on POWER8 [\#307](https://github.com/kokkos/kokkos/issues/307) - Core Oversubscription Detection Broken? [\#159](https://github.com/kokkos/kokkos/issues/159) ## [2.01.06](https://github.com/kokkos/kokkos/tree/2.01.06) (2016-09-02) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.01.00...2.01.06) **Implemented enhancements:** - Add "standard" reducers for lambda-supportable customized reduce [\#411](https://github.com/kokkos/kokkos/issues/411) - TaskPolicy - single thread back-end execution [\#390](https://github.com/kokkos/kokkos/issues/390) - Kokkos master clone tag [\#387](https://github.com/kokkos/kokkos/issues/387) - Query memory requirements from task policy [\#378](https://github.com/kokkos/kokkos/issues/378) - Output order of test\_atomic.cpp is confusing [\#373](https://github.com/kokkos/kokkos/issues/373) - Missing testing for atomics [\#341](https://github.com/kokkos/kokkos/issues/341) - Feature request for Kokkos to provide Kokkos::atomic\_fetch\_max and atomic\_fetch\_min [\#336](https://github.com/kokkos/kokkos/issues/336) - TaskPolicy\ performance requires teams mapped to warps [\#218](https://github.com/kokkos/kokkos/issues/218) **Fixed bugs:** - Reduce with Teams broken for custom initialize [\#407](https://github.com/kokkos/kokkos/issues/407) - Failing Kokkos build on Debian [\#402](https://github.com/kokkos/kokkos/issues/402) - Failing Tests on NVIDIA Pascal GPUs [\#398](https://github.com/kokkos/kokkos/issues/398) - Algorithms: fill\_random assumes dimensions fit in unsigned int [\#389](https://github.com/kokkos/kokkos/issues/389) - Kokkos::subview with RandomAccess Memory Trait [\#385](https://github.com/kokkos/kokkos/issues/385) - Build warning \(signed / unsigned comparison\) in Cuda implementation [\#365](https://github.com/kokkos/kokkos/issues/365) - wrong results for a parallel\_reduce with CUDA8 / Maxwell50 [\#352](https://github.com/kokkos/kokkos/issues/352) - Hierarchical parallelism - 3 level unit test [\#344](https://github.com/kokkos/kokkos/issues/344) - Can I allocate a View w/ both WithoutInitializing & AllowPadding? [\#324](https://github.com/kokkos/kokkos/issues/324) - subview View layout determination [\#309](https://github.com/kokkos/kokkos/issues/309) - Unit tests with Cuda - Maxwell [\#196](https://github.com/kokkos/kokkos/issues/196) ## [2.01.00](https://github.com/kokkos/kokkos/tree/2.01.00) (2016-07-21) [Full Changelog](https://github.com/kokkos/kokkos/compare/End_C++98...2.01.00) **Implemented enhancements:** - Edit ViewMapping so assigning Views with the same custom layout compiles when const casting [\#327](https://github.com/kokkos/kokkos/issues/327) - DynRankView: Performance improvement for operator\(\) [\#321](https://github.com/kokkos/kokkos/issues/321) - Interoperability between static and dynamic rank views [\#295](https://github.com/kokkos/kokkos/issues/295) - subview member function ? [\#280](https://github.com/kokkos/kokkos/issues/280) - Inter-operatibility between View and DynRankView. [\#245](https://github.com/kokkos/kokkos/issues/245) - \(Trilinos\) build warning in atomic\_assign, with Kokkos::complex [\#177](https://github.com/kokkos/kokkos/issues/177) - View\<\>::shmem\_size should runtime check for number of arguments equal to rank [\#176](https://github.com/kokkos/kokkos/issues/176) - Custom reduction join via lambda argument [\#99](https://github.com/kokkos/kokkos/issues/99) - DynRankView with 0 dimensions passed in at construction [\#293](https://github.com/kokkos/kokkos/issues/293) - Inject view\_alloc and friends into Kokkos namespace [\#292](https://github.com/kokkos/kokkos/issues/292) - Less restrictive TeamPolicy reduction on Cuda [\#286](https://github.com/kokkos/kokkos/issues/286) - deep\_copy using remap with source execution space [\#267](https://github.com/kokkos/kokkos/issues/267) - Suggestion: Enable opt-in L1 caching via nvcc-wrapper [\#261](https://github.com/kokkos/kokkos/issues/261) - More flexible create\_mirror functions [\#260](https://github.com/kokkos/kokkos/issues/260) - Rename View::memory\_span to View::required\_allocation\_size [\#256](https://github.com/kokkos/kokkos/issues/256) - Use of subviews and views with compile-time dimensions [\#237](https://github.com/kokkos/kokkos/issues/237) - Use of subviews and views with compile-time dimensions [\#237](https://github.com/kokkos/kokkos/issues/237) - Kokkos::Timer [\#234](https://github.com/kokkos/kokkos/issues/234) - Fence CudaUVMSpace allocations [\#230](https://github.com/kokkos/kokkos/issues/230) - View::operator\(\) accept std::is\_integral and std::is\_enum [\#227](https://github.com/kokkos/kokkos/issues/227) - Allocating zero size View [\#216](https://github.com/kokkos/kokkos/issues/216) - Thread scalable memory pool [\#212](https://github.com/kokkos/kokkos/issues/212) - Add a way to disable memory leak output [\#194](https://github.com/kokkos/kokkos/issues/194) - Kokkos exec space init should init Kokkos profiling [\#192](https://github.com/kokkos/kokkos/issues/192) - Runtime rank wrapper for View [\#189](https://github.com/kokkos/kokkos/issues/189) - Profiling Interface [\#158](https://github.com/kokkos/kokkos/issues/158) - Fix View assignment \(of managed to unmanaged\) [\#153](https://github.com/kokkos/kokkos/issues/153) - Add unit test for assignment of managed View to unmanaged View [\#152](https://github.com/kokkos/kokkos/issues/152) - Check for oversubscription of threads with MPI in Kokkos::initialize [\#149](https://github.com/kokkos/kokkos/issues/149) - Dynamic resizeable 1dimensional view [\#143](https://github.com/kokkos/kokkos/issues/143) - Develop TaskPolicy for CUDA [\#142](https://github.com/kokkos/kokkos/issues/142) - New View : Test Compilation Downstream [\#138](https://github.com/kokkos/kokkos/issues/138) - New View Implementation [\#135](https://github.com/kokkos/kokkos/issues/135) - Add variant of subview that lets users add traits [\#134](https://github.com/kokkos/kokkos/issues/134) - NVCC-WRAPPER: Add --host-only flag [\#121](https://github.com/kokkos/kokkos/issues/121) - Address gtest issue with TriBITS Kokkos build outside of Trilinos [\#117](https://github.com/kokkos/kokkos/issues/117) - Make tests pass with -expt-extended-lambda on CUDA [\#108](https://github.com/kokkos/kokkos/issues/108) - Dynamic scheduling for parallel\_for and parallel\_reduce [\#106](https://github.com/kokkos/kokkos/issues/106) - Runtime or compile time error when reduce functor's join is not properly specified as const member function or with volatile arguments [\#105](https://github.com/kokkos/kokkos/issues/105) - Error out when the number of threads is modified after kokkos is initialized [\#104](https://github.com/kokkos/kokkos/issues/104) - Porting to POWER and remove assumption of X86 default [\#103](https://github.com/kokkos/kokkos/issues/103) - Dynamic scheduling option for RangePolicy [\#100](https://github.com/kokkos/kokkos/issues/100) - SharedMemory Support for Lambdas [\#81](https://github.com/kokkos/kokkos/issues/81) - Recommended TeamSize for Lambdas [\#80](https://github.com/kokkos/kokkos/issues/80) - Add Aggressive Vectorization Compilation mode [\#72](https://github.com/kokkos/kokkos/issues/72) - Dynamic scheduling team execution policy [\#53](https://github.com/kokkos/kokkos/issues/53) - UVM allocations in multi-GPU systems [\#50](https://github.com/kokkos/kokkos/issues/50) - Synchronic in Kokkos::Impl [\#44](https://github.com/kokkos/kokkos/issues/44) - index and dimension types in for loops [\#28](https://github.com/kokkos/kokkos/issues/28) - Subview assign of 1D Strided with stride 1 to LayoutLeft/Right [\#1](https://github.com/kokkos/kokkos/issues/1) **Fixed bugs:** - misspelled variable name in Kokkos\_Atomic\_Fetch + missing unit tests [\#340](https://github.com/kokkos/kokkos/issues/340) - seg fault Kokkos::Impl::CudaInternal::print\_configuration [\#338](https://github.com/kokkos/kokkos/issues/338) - Clang compiler error with named parallel\_reduce, tags, and TeamPolicy. [\#335](https://github.com/kokkos/kokkos/issues/335) - Shared Memory Allocation Error at parallel\_reduce [\#311](https://github.com/kokkos/kokkos/issues/311) - DynRankView: Fix resize and realloc [\#303](https://github.com/kokkos/kokkos/issues/303) - Scratch memory and dynamic scheduling [\#279](https://github.com/kokkos/kokkos/issues/279) - MemoryPool infinite loop when out of memory [\#312](https://github.com/kokkos/kokkos/issues/312) - Kokkos DynRankView changes break Sacado and Panzer [\#299](https://github.com/kokkos/kokkos/issues/299) - MemoryPool fails to compile on non-cuda non-x86 [\#297](https://github.com/kokkos/kokkos/issues/297) - Random Number Generator Fix [\#296](https://github.com/kokkos/kokkos/issues/296) - View template parameter ordering Bug [\#282](https://github.com/kokkos/kokkos/issues/282) - Serial task policy broken. [\#281](https://github.com/kokkos/kokkos/issues/281) - deep\_copy with LayoutStride should not memcpy [\#262](https://github.com/kokkos/kokkos/issues/262) - DualView::need\_sync should be a const method [\#248](https://github.com/kokkos/kokkos/issues/248) - Arbitrary-sized atomics on GPUs broken; loop forever [\#238](https://github.com/kokkos/kokkos/issues/238) - boolean reduction value\_type changes answer [\#225](https://github.com/kokkos/kokkos/issues/225) - Custom init\(\) function for parallel\_reduce with array value\_type [\#210](https://github.com/kokkos/kokkos/issues/210) - unit\_test Makefile is Broken - Recursively Calls itself until Machine Apocalypse. [\#202](https://github.com/kokkos/kokkos/issues/202) - nvcc\_wrapper Does Not Support -Xcompiler \ [\#198](https://github.com/kokkos/kokkos/issues/198) - Kokkos exec space init should init Kokkos profiling [\#192](https://github.com/kokkos/kokkos/issues/192) - Kokkos Threads Backend impl\_shared\_alloc Broken on Intel 16.1 \(Shepard Haswell\) [\#186](https://github.com/kokkos/kokkos/issues/186) - pthread back end hangs if used uninitialized [\#182](https://github.com/kokkos/kokkos/issues/182) - parallel\_reduce of size 0, not calling init/join [\#175](https://github.com/kokkos/kokkos/issues/175) - Bug in Threads with OpenMP enabled [\#173](https://github.com/kokkos/kokkos/issues/173) - KokkosExp\_SharedAlloc, m\_team\_work\_index inaccessible [\#166](https://github.com/kokkos/kokkos/issues/166) - 128-bit CAS without Assembly Broken? [\#161](https://github.com/kokkos/kokkos/issues/161) - fatal error: Cuda/Kokkos\_Cuda\_abort.hpp: No such file or directory [\#157](https://github.com/kokkos/kokkos/issues/157) - Power8: Fix OpenMP backend [\#139](https://github.com/kokkos/kokkos/issues/139) - Data race in Kokkos OpenMP initialization [\#131](https://github.com/kokkos/kokkos/issues/131) - parallel\_launch\_local\_memory and cuda 7.5 [\#125](https://github.com/kokkos/kokkos/issues/125) - Resize can fail with Cuda due to asynchronous dispatch [\#119](https://github.com/kokkos/kokkos/issues/119) - Qthread taskpolicy initialization bug. [\#92](https://github.com/kokkos/kokkos/issues/92) - Windows: sys/mman.h [\#89](https://github.com/kokkos/kokkos/issues/89) - Windows: atomic\_fetch\_sub\(\) [\#88](https://github.com/kokkos/kokkos/issues/88) - Windows: snprintf [\#87](https://github.com/kokkos/kokkos/issues/87) - Parallel\_Reduce with TeamPolicy and league size of 0 returns garbage [\#85](https://github.com/kokkos/kokkos/issues/85) - Throw with Cuda when using \(2D\) team\_policy parallel\_reduce with less than a warp size [\#76](https://github.com/kokkos/kokkos/issues/76) - Scalar views don't work with Kokkos::Atomic memory trait [\#69](https://github.com/kokkos/kokkos/issues/69) - Reduce the number of threads per team for Cuda [\#63](https://github.com/kokkos/kokkos/issues/63) - Named Kernels fail for reductions with CUDA [\#60](https://github.com/kokkos/kokkos/issues/60) - Kokkos View dimension\_\(\) for long returning unsigned int [\#20](https://github.com/kokkos/kokkos/issues/20) - atomic test hangs with LLVM [\#6](https://github.com/kokkos/kokkos/issues/6) - OpenMP Test should set omp\_set\_num\_threads to 1 [\#4](https://github.com/kokkos/kokkos/issues/4) **Closed issues:** - develop branch broken with CUDA 8 and --expt-extended-lambda [\#354](https://github.com/kokkos/kokkos/issues/354) - --arch=KNL with Intel 2016 build failure [\#349](https://github.com/kokkos/kokkos/issues/349) - Error building with Cuda when passing -DKOKKOS\_CUDA\_USE\_LAMBDA to generate\_makefile.bash [\#343](https://github.com/kokkos/kokkos/issues/343) - Can I safely use int indices in a 2-D View with capacity \> 2B? [\#318](https://github.com/kokkos/kokkos/issues/318) - Kokkos::ViewAllocateWithoutInitializing is not working [\#317](https://github.com/kokkos/kokkos/issues/317) - Intel build on Mac OS X [\#277](https://github.com/kokkos/kokkos/issues/277) - deleted [\#271](https://github.com/kokkos/kokkos/issues/271) - Broken Mira build [\#268](https://github.com/kokkos/kokkos/issues/268) - 32-bit build [\#246](https://github.com/kokkos/kokkos/issues/246) - parallel\_reduce with RDC crashes linker [\#232](https://github.com/kokkos/kokkos/issues/232) - build of Kokkos\_Sparse\_MV\_impl\_spmv\_Serial.cpp.o fails if you use nvcc and have cuda disabled [\#209](https://github.com/kokkos/kokkos/issues/209) - Kokkos Serial execution space is not tested with TeamPolicy. [\#207](https://github.com/kokkos/kokkos/issues/207) - Unit test failure on Hansen KokkosCore\_UnitTest\_Cuda\_MPI\_1 [\#200](https://github.com/kokkos/kokkos/issues/200) - nvcc compiler warning: calling a \_\_host\_\_ function from a \_\_host\_\_ \_\_device\_\_ function is not allowed [\#180](https://github.com/kokkos/kokkos/issues/180) - Intel 15 build error with defaulted "move" operators [\#171](https://github.com/kokkos/kokkos/issues/171) - missing libkokkos.a during Trilinos 12.4.2 build, yet other libkokkos\*.a libs are there [\#165](https://github.com/kokkos/kokkos/issues/165) - Tie atomic updates to execution space or even to thread team? \(speculation\) [\#144](https://github.com/kokkos/kokkos/issues/144) - New View: Compiletime/size Test [\#137](https://github.com/kokkos/kokkos/issues/137) - New View : Performance Test [\#136](https://github.com/kokkos/kokkos/issues/136) - Signed/unsigned comparison warning in CUDA parallel [\#130](https://github.com/kokkos/kokkos/issues/130) - Kokkos::complex: Need op\* w/ std::complex & real [\#126](https://github.com/kokkos/kokkos/issues/126) - Use uintptr\_t for casting pointers [\#110](https://github.com/kokkos/kokkos/issues/110) - Default thread mapping behavior between P and Q threads. [\#91](https://github.com/kokkos/kokkos/issues/91) - Windows: Atomic\_Fetch\_Exchange\(\) return type [\#90](https://github.com/kokkos/kokkos/issues/90) - Synchronic unit test is way too long [\#84](https://github.com/kokkos/kokkos/issues/84) - nvcc\_wrapper -\> $\(NVCC\_WRAPPER\) [\#42](https://github.com/kokkos/kokkos/issues/42) - Check compiler version and print helpful message [\#39](https://github.com/kokkos/kokkos/issues/39) - Kokkos shared memory on Cuda uses a lot of registers [\#31](https://github.com/kokkos/kokkos/issues/31) - Can not pass unit test `cuda.space` without a GT 720 [\#25](https://github.com/kokkos/kokkos/issues/25) - Makefile.kokkos lacks bounds checking option that CMake has [\#24](https://github.com/kokkos/kokkos/issues/24) - Kokkos can not complete unit tests with CUDA UVM enabled [\#23](https://github.com/kokkos/kokkos/issues/23) - Simplify teams + shared memory histogram example to remove vectorization [\#21](https://github.com/kokkos/kokkos/issues/21) - Kokkos needs to rever to ${PROJECT\_NAME}\_ENABLE\_CXX11 not Trilinos\_ENABLE\_CXX11 [\#17](https://github.com/kokkos/kokkos/issues/17) - Kokkos Base Makefile adds AVX to KNC Build [\#16](https://github.com/kokkos/kokkos/issues/16) - MS Visual Studio 2013 Build Errors [\#9](https://github.com/kokkos/kokkos/issues/9) - subview\(X, ALL\(\), j\) for 2-D LayoutRight View X: should it view a column? [\#5](https://github.com/kokkos/kokkos/issues/5) ## [End_C++98](https://github.com/kokkos/kokkos/tree/End_C++98) (2015-04-15) \* *This Change Log was automatically generated by [github_changelog_generator](https://github.com/skywinder/Github-Changelog-Generator)* kokkos-4.3.01/CMakeLists.txt000066400000000000000000000333411461675637500156700ustar00rootroot00000000000000cmake_minimum_required(VERSION 3.16 FATAL_ERROR) # Disable in-source builds to prevent source tree corruption. if( "${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}" ) message( FATAL_ERROR "FATAL: In-source builds are not allowed. You should create a separate directory for build files and delete CMakeCache.txt." ) endif() if (COMMAND TRIBITS_PACKAGE) TRIBITS_PACKAGE(Kokkos) endif() # We want to determine if options are given with the wrong case # In order to detect which arguments are given to compare against # the list of valid arguments, at the beginning here we need to # form a list of all the given variables. If it begins with any # case of KoKkOS, we add it to the list. GET_CMAKE_PROPERTY(_variableNames VARIABLES) SET(KOKKOS_GIVEN_VARIABLES) FOREACH (var ${_variableNames}) STRING(TOUPPER ${var} UC_VAR) STRING(FIND ${UC_VAR} KOKKOS IDX) IF (${IDX} EQUAL 0) LIST(APPEND KOKKOS_GIVEN_VARIABLES ${var}) ENDIF() ENDFOREACH() # Basic initialization (Used in KOKKOS_SETTINGS) SET(Kokkos_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) SET(KOKKOS_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) SET(KOKKOS_SRC_PATH ${Kokkos_SOURCE_DIR}) SET(KOKKOS_PATH ${Kokkos_SOURCE_DIR}) SET(KOKKOS_TOP_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}) # Is this a build as part of Trilinos? IF(COMMAND TRIBITS_PACKAGE_DECL) SET(KOKKOS_HAS_TRILINOS ON) ELSE() SET(KOKKOS_HAS_TRILINOS OFF) SET(PACKAGE_NAME Kokkos) SET(PACKAGE_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") ENDIF() # Is this build a subdirectory of another project GET_DIRECTORY_PROPERTY(HAS_PARENT PARENT_DIRECTORY) INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_functions.cmake) INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_pick_cxx_std.cmake) SET(KOKKOS_ENABLED_OPTIONS) #exported in config file SET(KOKKOS_ENABLED_DEVICES) #exported in config file SET(KOKKOS_ENABLED_TPLS) #exported in config file SET(KOKKOS_ENABLED_ARCH_LIST) #exported in config file #These are helper flags used for sanity checks during config #Certain features should depend on other features being configured first SET(KOKKOS_CFG_DAG_NONE On) #sentinel to indicate no dependencies SET(KOKKOS_CFG_DAG_DEVICES_DONE Off) SET(KOKKOS_CFG_DAG_OPTIONS_DONE Off) SET(KOKKOS_CFG_DAG_ARCH_DONE Off) SET(KOKKOS_CFG_DAG_CXX_STD_DONE Off) SET(KOKKOS_CFG_DAG_COMPILER_ID_DONE Off) FUNCTION(KOKKOS_CFG_DEPENDS SUCCESSOR PRECURSOR) SET(PRE_FLAG KOKKOS_CFG_DAG_${PRECURSOR}) SET(POST_FLAG KOKKOS_CFG_DAG_${SUCCESSOR}) IF (NOT ${PRE_FLAG}) MESSAGE(FATAL_ERROR "Bad CMake refactor: feature ${SUCCESSOR} cannot be configured until ${PRECURSOR} is configured") ENDIF() GLOBAL_SET(${POST_FLAG} On) ENDFUNCTION() LIST(APPEND CMAKE_MODULE_PATH cmake/Modules) IF(NOT KOKKOS_HAS_TRILINOS) set(CMAKE_DISABLE_SOURCE_CHANGES ON) set(CMAKE_DISABLE_IN_SOURCE_BUILD ON) # What language are we compiling Kokkos as # downstream dependencies need to match this! SET(KOKKOS_COMPILE_LANGUAGE CXX) # use lower case here since we didn't parse options yet IF (Kokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE AND Kokkos_ENABLE_CUDA) # Without this as a language for the package we would get a C++ compiler enabled. # but we still need a C++ compiler even if we build all our cpp files as CUDA only # because otherwise the C++ features don't work etc. # This is just the rather odd way CMake does this, since CUDA doesn't imply C++ even # though it is a C++ extension ... (but I guess it didn't use to be back in CUDA 4 or 5 # days. SET(KOKKOS_INTERNAL_EXTRA_COMPILE_LANGUAGE CXX) SET(KOKKOS_COMPILE_LANGUAGE CUDA) ENDIF() # use lower case here since we haven't parsed options yet IF (Kokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE AND Kokkos_ENABLE_HIP) # Without this as a language for the package we would get a C++ compiler enabled. # but we still need a C++ compiler even if we build all our cpp files as HIP only # because otherwise the C++ features don't work etc. SET(KOKKOS_INTERNAL_EXTRA_COMPILE_LANGUAGE CXX) SET(KOKKOS_COMPILE_LANGUAGE HIP) ENDIF() IF (Spack_WORKAROUND) IF (Kokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE) MESSAGE(FATAL_ERROR "Can't currently use Kokkos_ENABLE_COMPILER_AS_CMAKE_LANGUAGE in a spack installation!") ENDIF() #if we are explicitly using Spack for development, #nuke the Spack compiler SET(SPACK_CXX $ENV{SPACK_CXX}) IF(SPACK_CXX) SET(CMAKE_CXX_COMPILER ${SPACK_CXX} CACHE STRING "the C++ compiler" FORCE) SET(ENV{CXX} ${SPACK_CXX}) ENDIF() ENDIF() # Always call the project command to define Kokkos_ variables # and to make sure that C++ is an enabled language PROJECT(Kokkos ${KOKKOS_COMPILE_LANGUAGE} ${KOKKOS_INTERNAL_EXTRA_COMPILE_LANGUAGE}) IF(NOT HAS_PARENT) IF (NOT CMAKE_BUILD_TYPE) SET(DEFAULT_BUILD_TYPE "RelWithDebInfo") MESSAGE(STATUS "Setting build type to '${DEFAULT_BUILD_TYPE}' as none was specified.") SET(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}" CACHE STRING "Choose the type of build, options are: Debug, Release, RelWithDebInfo and MinSizeRel." FORCE) ENDIF() ENDIF() ELSE() SET(KOKKOS_COMPILE_LANGUAGE CXX) ENDIF() IF (NOT CMAKE_SIZEOF_VOID_P) STRING(FIND ${CMAKE_CXX_COMPILER} nvcc_wrapper FIND_IDX) IF (NOT FIND_IDX STREQUAL -1) MESSAGE(FATAL_ERROR "Kokkos did not configure correctly and failed to validate compiler. The most likely cause is CUDA linkage using nvcc_wrapper. Please ensure your CUDA environment is correctly configured.") ELSE() MESSAGE(FATAL_ERROR "Kokkos did not configure correctly and failed to validate compiler. The most likely cause is linkage errors during CMake compiler validation. Please consult the CMake error log shown below for the exact error during compiler validation") ENDIF() ELSEIF (NOT CMAKE_SIZEOF_VOID_P EQUAL 8) IF(CMAKE_SIZEOF_VOID_P EQUAL 4) MESSAGE(WARNING "32-bit builds are experimental and not officially supported.") SET(KOKKOS_IMPL_32BIT ON) ELSE() MESSAGE(FATAL_ERROR "Kokkos assumes a 64-bit build, i.e., 8-byte pointers, but found ${CMAKE_SIZEOF_VOID_P}-byte pointers instead;") ENDIF() ENDIF() set(Kokkos_VERSION_MAJOR 4) set(Kokkos_VERSION_MINOR 3) set(Kokkos_VERSION_PATCH 1) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") message(STATUS "Kokkos version: ${Kokkos_VERSION}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") # mathematical expressions below are not stricly necessary but they eliminate # the rather aggravating leading 0 in the releases patch version number, and, # in some way, are a sanity check for our arithmetic math(EXPR KOKKOS_VERSION_MAJOR "${KOKKOS_VERSION} / 10000") math(EXPR KOKKOS_VERSION_MINOR "${KOKKOS_VERSION} / 100 % 100") math(EXPR KOKKOS_VERSION_PATCH "${KOKKOS_VERSION} % 100") # Load either the real TriBITS or a TriBITS wrapper # for certain utility functions that are universal (like GLOBAL_SET) INCLUDE(${KOKKOS_SRC_PATH}/cmake/fake_tribits.cmake) IF (Kokkos_ENABLE_CUDA) # If we are building CUDA, we have tricked CMake because we declare a CXX project # If the default C++ standard for a given compiler matches the requested # standard, then CMake just omits the -std flag in later versions of CMake # This breaks CUDA compilation (CUDA compiler can have a different default # -std then the underlying host compiler by itself). Setting this variable # forces CMake to always add the -std flag even if it thinks it doesn't need it GLOBAL_SET(CMAKE_CXX_STANDARD_DEFAULT 98) ENDIF() # These are the variables we will append to as we go # I really wish these were regular variables # but scoping issues can make it difficult GLOBAL_SET(KOKKOS_COMPILE_OPTIONS) GLOBAL_SET(KOKKOS_LINK_OPTIONS) GLOBAL_SET(KOKKOS_AMDGPU_OPTIONS) GLOBAL_SET(KOKKOS_CUDA_OPTIONS) GLOBAL_SET(KOKKOS_CUDAFE_OPTIONS) GLOBAL_SET(KOKKOS_XCOMPILER_OPTIONS) # We need to append text here for making sure TPLs # we import are available for an installed Kokkos GLOBAL_SET(KOKKOS_TPL_EXPORTS) # KOKKOS_DEPENDENCE is used by kokkos_launch_compiler GLOBAL_SET(KOKKOS_COMPILE_DEFINITIONS KOKKOS_DEPENDENCE) # MSVC never goes through kokkos_launch_compiler IF(NOT MSVC) GLOBAL_APPEND(KOKKOS_LINK_OPTIONS -DKOKKOS_DEPENDENCE) ENDIF() IF(Kokkos_ENABLE_TESTS AND NOT KOKKOS_HAS_TRILINOS) find_package(GTest QUIET) ENDIF() # Include a set of Kokkos-specific wrapper functions that # will either call raw CMake or TriBITS # These are functions like KOKKOS_INCLUDE_DIRECTORIES INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_tribits.cmake) # Check the environment and set certain variables # to allow platform-specific checks INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_check_env.cmake) IF(NOT KOKKOS_HAS_TRILINOS) # This does not work in Trilinos and we simply don't care # to fix it for Trilinos # Gather information about the runtime environment INCLUDE(${KOKKOS_SRC_PATH}/cmake/build_env_info.cmake) check_git_setup() ENDIF() # The build environment setup goes in the following steps # 1) Check all the enable options. This includes checking Kokkos_DEVICES # 2) Check the compiler ID (type and version) # 3) Check the CXX standard and select important CXX flags # 4) Check for any third-party libraries (TPLs) like hwloc # 5) Check if optimizing for a particular architecture and add arch-specific flags KOKKOS_SETUP_BUILD_ENVIRONMENT() # Finish off the build # 6) Recurse into subdirectories and configure individual libraries # 7) Export and install targets OPTION(BUILD_SHARED_LIBS "Build shared libraries" OFF) SET(KOKKOS_COMPONENT_LIBRARIES kokkoscore kokkoscontainers kokkosalgorithms kokkossimd) SET_PROPERTY(GLOBAL PROPERTY KOKKOS_INT_LIBRARIES kokkos ${KOKKOS_COMPONENT_LIBRARIES}) IF (KOKKOS_HAS_TRILINOS) SET(TRILINOS_INCDIR ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}) SET(KOKKOS_HEADER_DIR ${TRILINOS_INCDIR}) SET(KOKKOS_IS_SUBDIRECTORY TRUE) ELSEIF(HAS_PARENT) SET(KOKKOS_HEADER_DIR "include/kokkos") SET(KOKKOS_IS_SUBDIRECTORY TRUE) ELSE() SET(KOKKOS_HEADER_DIR "${CMAKE_INSTALL_INCLUDEDIR}") SET(KOKKOS_IS_SUBDIRECTORY FALSE) ENDIF() #------------------------------------------------------------------------------ # # A) Forward declare the package so that certain options are also defined for # subpackages ## This restores the old behavior of ProjectCompilerPostConfig.cmake # We must do this before KOKKOS_PACKAGE_DECL IF (KOKKOS_HAS_TRILINOS) # Overwrite the old flags at the top-level # Because Tribits doesn't use lists, it uses spaces for the list of CXX flags # we have to match the annoying behavior, also we have to preserve quotes # which needs another workaround. SET(KOKKOS_COMPILE_OPTIONS_TMP) IF (KOKKOS_ENABLE_HIP) LIST(APPEND KOKKOS_COMPILE_OPTIONS ${KOKKOS_AMDGPU_OPTIONS}) ENDIF() FOREACH(OPTION ${KOKKOS_COMPILE_OPTIONS}) STRING(FIND "${OPTION}" " " OPTION_HAS_WHITESPACE) IF(OPTION_HAS_WHITESPACE EQUAL -1) LIST(APPEND KOKKOS_COMPILE_OPTIONS_TMP "${OPTION}") ELSE() LIST(APPEND KOKKOS_COMPILE_OPTIONS_TMP "\"${OPTION}\"") ENDIF() ENDFOREACH() STRING(REPLACE ";" " " KOKKOSCORE_COMPILE_OPTIONS "${KOKKOS_COMPILE_OPTIONS_TMP}") LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS ${KOKKOS_COMPILE_OPTIONS}) IF (KOKKOS_ENABLE_CUDA) LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS ${KOKKOS_CUDA_OPTIONS}) ENDIF() FOREACH(XCOMP_FLAG ${KOKKOS_XCOMPILER_OPTIONS}) SET(KOKKOSCORE_XCOMPILER_OPTIONS "${KOKKOSCORE_XCOMPILER_OPTIONS} -Xcompiler ${XCOMP_FLAG}") LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS -Xcompiler ${XCOMP_FLAG}) ENDFOREACH() IF (KOKKOS_ENABLE_CUDA) STRING(REPLACE ";" " " KOKKOSCORE_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONS}") FOREACH(CUDAFE_FLAG ${KOKKOS_CUDAFE_OPTIONS}) SET(KOKKOSCORE_CUDAFE_OPTIONS "${KOKKOSCORE_CUDAFE_OPTIONS} -Xcudafe ${CUDAFE_FLAG}") LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS -Xcudafe ${CUDAFE_FLAG}) ENDFOREACH() ENDIF() #These flags get set up in KOKKOS_PACKAGE_DECL, which means they #must be configured before KOKKOS_PACKAGE_DECL SET(KOKKOS_ALL_COMPILE_OPTIONS $<$:${KOKKOS_ALL_COMPILE_OPTIONS}>) ENDIF() #------------------------------------------------------------------------------ # # D) Process the subpackages (subdirectories) for Kokkos # KOKKOS_PROCESS_SUBPACKAGES() #------------------------------------------------------------------------------ # # E) If Kokkos itself is enabled, process the Kokkos package # KOKKOS_PACKAGE_POSTPROCESS() KOKKOS_CONFIGURE_CORE() IF (NOT KOKKOS_HAS_TRILINOS AND NOT Kokkos_INSTALL_TESTING) ADD_LIBRARY(kokkos INTERFACE) #Make sure in-tree projects can reference this as Kokkos:: #to match the installed target names ADD_LIBRARY(Kokkos::kokkos ALIAS kokkos) # all_libs target is required for TriBITS-compliance ADD_LIBRARY(Kokkos::all_libs ALIAS kokkos) TARGET_LINK_LIBRARIES(kokkos INTERFACE ${KOKKOS_COMPONENT_LIBRARIES}) KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(kokkos) ENDIF() INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_install.cmake) # nvcc_wrapper is Kokkos' wrapper for NVIDIA's NVCC CUDA compiler. # Kokkos needs nvcc_wrapper in order to build. Other libraries and # executables also need nvcc_wrapper. Thus, we need to install it. # If the argument of DESTINATION is a relative path, CMake computes it # as relative to ${CMAKE_INSTALL_PATH}. # KOKKOS_INSTALL_ADDITIONAL_FILES will install nvcc wrapper and other generated # files KOKKOS_INSTALL_ADDITIONAL_FILES() # Finally - if we are a subproject - make sure the enabled devices are visible IF (HAS_PARENT) FOREACH(DEV Kokkos_ENABLED_DEVICES) #I would much rather not make these cache variables or global properties, but I can't #make any guarantees on whether PARENT_SCOPE is good enough to make #these variables visible where I need them SET(Kokkos_ENABLE_${DEV} ON PARENT_SCOPE) SET_PROPERTY(GLOBAL PROPERTY Kokkos_ENABLE_${DEV} ON) ENDFOREACH() ENDIF() kokkos-4.3.01/CONTRIBUTING.md000066400000000000000000000011231461675637500153520ustar00rootroot00000000000000# Contributing to Kokkos ## Pull Requests We actively welcome pull requests. 1. Fork the repo and create your branch from `develop`. 2. If you've added code that should be tested, add tests. 3. If you've changed APIs, update the documentation. 4. Ensure the test suite passes. ## Issues We use GitHub issues to track public bugs. Please ensure your description is clear and has sufficient instructions to be able to reproduce the issue. ## License By contributing to Kokkos, you agree that your contributions will be licensed under the LICENSE file in the root directory of this source tree. kokkos-4.3.01/Copyright.txt000066400000000000000000000005131461675637500156340ustar00rootroot00000000000000************************************************************************ Kokkos v. 4.0 Copyright (2022) National Technology & Engineering Solutions of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. Government retains certain rights in this software. kokkos-4.3.01/HOW_TO_SNAPSHOT000066400000000000000000000052571461675637500154360ustar00rootroot00000000000000 Developers of Kokkos (those who commit modifications to Kokkos) must maintain the snapshot of Kokkos in the Trilinos repository. This file contains instructions for how to snapshot Kokkos from github.com/kokkos to Trilinos. ------------------------------------------------------------------------ *** EVERYTHING GOES RIGHT WORKFLOW *** 1) Given a 'git clone' of Kokkos and of Trilinos repositories. 1.1) Let ${KOKKOS} be the absolute path to the Kokkos clone. This path *must* terminate with the directory name 'kokkos'; e.g., ${HOME}/kokkos . 1.2) Let ${TRILINOS} be the absolute path to the Trilinos directory. 2) Given that the Kokkos build & test is clean and changes are committed to the Kokkos clone. 3) Snapshot the current commit in the Kokkos clone into the Trilinos clone. This overwrites ${TRILINOS}/packages/kokkos with the content of ${KOKKOS}: ${KOKKOS}/scripts/snapshot.py --verbose ${KOKKOS} ${TRILINOS}/packages 4) Verify the snapshot commit happened as expected cd ${TRILINOS}/packages/kokkos git log -1 --name-only 5) Modify, build, and test Trilinos with the Kokkos snapshot. 6) Given that that the Trilinos build & test is clean and changes are committed to the Trilinos clone. 7) Attempt push to the Kokkos repository. If push fails then you must 'remove the Kokkos snapshot' from your Trilinos clone. See below. 8) Attempt to push to the Trilinos repository. If updating for a failed push requires you to change Kokkos you must 'remove the Kokkos snapshot' from your Trilinos clone. See below. ------------------------------------------------------------------------ *** WHEN SOMETHING GOES WRONG AND YOU MUST *** *** REMOVE THE KOKKOS SNAPSHOT FROM YOUR TRILINOS CLONE *** 1) Query the Trilinos clone commit log. git log --oneline 2) Note the of the commit to the Trillinos clone immediately BEFORE the Kokkos snapshot commit. Copy this for use in the next command. 3) IF more than one outstanding commit then you can remove just the Kokkos snapshot commit with 'git rebase -i'. Edit the rebase file. Remove or comment out the Kokkos snapshot commit entry. git rebase -i 4) IF the Kokkos snapshot commit is the one and only outstanding commit then remove just than commit. git reset --hard HEAD~1 ------------------------------------------------------------------------ *** REGARDING 'snapshot.py' TOOL *** The 'snapshot.py' tool is developed and maintained by the Center for Computing Research (CCR) Software Engineering, Maintenance, and Support (SEMS) team. Contact Brent Perschbacher for questions> ------------------------------------------------------------------------ kokkos-4.3.01/LICENSE000066400000000000000000000311271461675637500141350ustar00rootroot00000000000000 ============================================================================== Kokkos is under the Apache License v2.0 with LLVM Exceptions: ============================================================================== Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS Apache 2.0 ---- LLVM Exceptions to the Apache 2.0 License ---- As an exception, if, as a result of your compiling your source code, portions of this Software are embedded into an Object form of such source code, you may redistribute such embedded portions in such Object form without complying with the conditions of Sections 4(a), 4(b) and 4(d) of the License. In addition, if you combine or link compiled forms of this Software with software that is licensed under the GPLv2 ("Combined Software") and if a court of competent jurisdiction determines that the patent provision (Section 3), the indemnity provision (Section 9) or other Section of the License conflicts with the conditions of the GPLv2, you may retroactively and prospectively choose to deem waived or otherwise exclude such Section(s) of the License, but only in their entirety and only with respect to the Combined Software. ============================================================================== Software from third parties included in Kokkos: ============================================================================== Kokkos contains third party software which is under different license terms. All such code will be identified clearly using at least one of two mechanisms: 1) It will be in a separate directory tree with its own `LICENSE.txt` or `LICENSE` file at the top containing the specific license and restrictions which apply to that software, or 2) It will contain specific license and restriction terms at the top of every file. THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Questions? Contact: Christian R. Trott (crtrott@sandia.gov) and Damien T. Lebrun-Grandie (lebrungrandt@ornl.gov) ************************************************************************ kokkos-4.3.01/LICENSE_FILE_HEADER000066400000000000000000000010701461675637500157360ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER kokkos-4.3.01/Makefile.kokkos000066400000000000000000002033651461675637500160750ustar00rootroot00000000000000# Default settings common options. KOKKOS_VERSION_MAJOR = 4 KOKKOS_VERSION_MINOR = 3 KOKKOS_VERSION_PATCH = 1 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) # Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial #KOKKOS_DEVICES ?= "OpenMP" KOKKOS_DEVICES ?= "Threads" # Options: # Intel: KNC,KNL,SNB,HSW,BDW,SKL,SKX,ICL,ICX,SPR # NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90 # ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX # IBM: Power8,Power9 # AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100 # AMD-CPUS: AMDAVX,Zen,Zen2,Zen3 # Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC KOKKOS_ARCH ?= "" # Options: yes,no KOKKOS_DEBUG ?= "no" # Options: hwloc KOKKOS_USE_TPLS ?= "" # Options: c++17,c++1z,c++20,c++2a,c++23,c++2b KOKKOS_CXX_STANDARD ?= "c++17" # Options: aggressive_vectorization,disable_profiling,enable_large_mem_tests,disable_complex_align,disable_deprecated_code,enable_deprecation_warnings KOKKOS_OPTIONS ?= "" KOKKOS_CMAKE ?= "no" KOKKOS_TRIBITS ?= "no" KOKKOS_STANDALONE_CMAKE ?= "no" # Default settings specific options. # Options: force_uvm,use_ldg,rdc,enable_lambda,enable_constexpr,disable_malloc_async KOKKOS_CUDA_OPTIONS ?= "" # Options: rdc KOKKOS_HIP_OPTIONS ?= "" # Default settings specific options. # Options: enable_async_dispatch KOKKOS_HPX_OPTIONS ?= "" # Helper functions for conversion to upper case uppercase_TABLE:=a,A b,B c,C d,D e,E f,F g,G h,H i,I j,J k,K l,L m,M n,N o,O p,P q,Q r,R s,S t,T u,U v,V w,W x,X y,Y z,Z uppercase_internal=$(if $1,$$(subst $(firstword $1),$(call uppercase_internal,$(wordlist 2,$(words $1),$1),$2)),$2) uppercase=$(eval uppercase_RESULT:=$(call uppercase_internal,$(uppercase_TABLE),$1))$(uppercase_RESULT) # Return a 1 if a string contains a substring and 0 if not # Note the search string should be without '"' # Example: $(call kokkos_has_string,"hwloc,libdl",hwloc) # Will return a 1 kokkos_has_string=$(if $(findstring $(call uppercase,$2),$(call uppercase,$1)),1,0) # Returns 1 if the path exists, 0 otherwise # Example: $(call kokkos_path_exists,/path/to/file) # Will return a 1 if /path/to/file exists kokkos_path_exists=$(if $(wildcard $1),1,0) # Check for general settings KOKKOS_INTERNAL_ENABLE_DEBUG := $(call kokkos_has_string,$(KOKKOS_DEBUG),yes) KOKKOS_INTERNAL_ENABLE_CXX17 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++17) KOKKOS_INTERNAL_ENABLE_CXX1Z := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++1z) KOKKOS_INTERNAL_ENABLE_CXX20 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++20) KOKKOS_INTERNAL_ENABLE_CXX2A := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2a) KOKKOS_INTERNAL_ENABLE_CXX23 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++23) KOKKOS_INTERNAL_ENABLE_CXX2B := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2b) KOKKOS_INTERNAL_ENABLE_CXX26 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++26) KOKKOS_INTERNAL_ENABLE_CXX2C := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2c) # Check for external libraries. KOKKOS_INTERNAL_USE_HWLOC := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),hwloc) # Check for advanced settings. KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),compiler_warnings) KOKKOS_INTERNAL_AGGRESSIVE_VECTORIZATION := $(call kokkos_has_string,$(KOKKOS_OPTIONS),aggressive_vectorization) KOKKOS_INTERNAL_ENABLE_TUNING := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_tuning) KOKKOS_INTERNAL_DISABLE_COMPLEX_ALIGN := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_complex_align) KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_dualview_modify_check) KOKKOS_INTERNAL_ENABLE_LARGE_MEM_TESTS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_large_mem_tests) # deprecated KOKKOS_INTERNAL_CUDA_USE_LDG := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),use_ldg) KOKKOS_INTERNAL_CUDA_USE_UVM := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),force_uvm) KOKKOS_INTERNAL_CUDA_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),rdc) KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_lambda) KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_constexpr) KOKKOS_INTERNAL_CUDA_DISABLE_MALLOC_ASYNC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),disable_malloc_async) KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH := $(call kokkos_has_string,$(KOKKOS_HPX_OPTIONS),enable_async_dispatch) # deprecated KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_desul_atomics) # deprecated KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_desul_atomics) KOKKOS_INTERNAL_DISABLE_BUNDLED_MDSPAN := $(call kokkos_has_string,$(KOKKOS_OPTIONS),impl_disable_bundled_mdspan) KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_deprecated_code) KOKKOS_INTERNAL_ENABLE_DEPRECATION_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_deprecation_warnings) KOKKOS_INTERNAL_HIP_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_HIP_OPTIONS),rdc) # Check for Kokkos Host Execution Spaces one of which must be on. KOKKOS_INTERNAL_USE_OPENMP := $(call kokkos_has_string,$(subst OpenMPTarget,,$(KOKKOS_DEVICES)),OpenMP) KOKKOS_INTERNAL_USE_THREADS := $(call kokkos_has_string,$(KOKKOS_DEVICES),Threads) KOKKOS_INTERNAL_USE_HPX := $(call kokkos_has_string,$(KOKKOS_DEVICES),HPX) KOKKOS_INTERNAL_USE_SERIAL := $(call kokkos_has_string,$(KOKKOS_DEVICES),Serial) ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0) ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 0) ifeq ($(KOKKOS_INTERNAL_USE_HPX), 0) KOKKOS_INTERNAL_USE_SERIAL := 1 endif endif endif # Check for other Execution Spaces. KOKKOS_INTERNAL_USE_CUDA := $(call kokkos_has_string,$(KOKKOS_DEVICES),Cuda) KOKKOS_INTERNAL_USE_HIP := $(call kokkos_has_string,$(KOKKOS_DEVICES),HIP) KOKKOS_INTERNAL_USE_SYCL := $(call kokkos_has_string,$(KOKKOS_DEVICES),SYCL) KOKKOS_INTERNAL_USE_OPENMPTARGET := $(call kokkos_has_string,$(KOKKOS_DEVICES),OpenMPTarget) KOKKOS_INTERNAL_USE_OPENACC := $(call kokkos_has_string,$(KOKKOS_DEVICES),OpenACC) KOKKOS_DEVICELIST = ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) KOKKOS_DEVICELIST += Serial endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) KOKKOS_DEVICELIST += OpenMP endif ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) KOKKOS_DEVICELIST += Threads endif ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) KOKKOS_DEVICELIST += HPX endif ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) KOKKOS_DEVICELIST += Cuda endif ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) KOKKOS_DEVICELIST += HIP endif ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) KOKKOS_DEVICELIST += SYCL endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) KOKKOS_DEVICELIST += OPENMPTARGET endif ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) KOKKOS_DEVICELIST += OpenACC endif ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc) ifeq ($(origin CUDA_PATH), undefined) CUDA_PATH = $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=) endif ifeq ($(CUDA_PATH),) CUDA_PATH = $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=) endif KOKKOS_INTERNAL_COMPILER_NVCC_VERSION := $(shell nvcc --version 2>&1 | grep release | cut -d' ' -f5 | cut -d',' -f1 | tr -d .) endif # Check OS. KOKKOS_OS := $(strip $(shell uname -s)) KOKKOS_INTERNAL_OS_CYGWIN := $(call kokkos_has_string,$(KOKKOS_OS),CYGWIN) KOKKOS_INTERNAL_OS_LINUX := $(call kokkos_has_string,$(KOKKOS_OS),Linux) KOKKOS_INTERNAL_OS_DARWIN := $(call kokkos_has_string,$(KOKKOS_OS),Darwin) # Check compiler. KOKKOS_CXX_VERSION := $(strip $(shell $(CXX) --version 2>&1)) KOKKOS_INTERNAL_COMPILER_INTEL := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),Intel Corporation) KOKKOS_INTERNAL_COMPILER_CRAY := $(strip $(shell $(CXX) -craype-verbose 2>&1 | grep -c "CC-")) KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell echo "$(shell export OMPI_CXX=$(OMPI_CXX); export MPICH_CXX=$(MPICH_CXX); $(CXX) --version 2>&1 | grep -c nvcc)>0" | bc)) KOKKOS_INTERNAL_COMPILER_NVHPC := $(strip $(shell $(CXX) --version 2>&1 | grep -c "nvc++")) KOKKOS_INTERNAL_COMPILER_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),clang) KOKKOS_INTERNAL_COMPILER_CRAY_CLANG := $(strip $(shell $(CXX) -craype-verbose 2>&1 | grep -c "clang++")) KOKKOS_INTERNAL_COMPILER_INTEL_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),oneAPI) KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),Apple clang) KOKKOS_INTERNAL_COMPILER_HCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC) KOKKOS_INTERNAL_COMPILER_GCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),GCC) # TODO fujitsu can emulate gcc or clang. Only clang mode works at the moment. KOKKOS_INTERNAL_COMPILER_FUJITSU := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),FUJITSU) # Check Host Compiler if using NVCC through nvcc_wrapper ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) KOKKOS_INTERNAL_COMPILER_NVCC_WRAPPER := $(strip $(shell echo $(CXX) | grep -c nvcc_wrapper)) ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC_WRAPPER), 1) KOKKOS_CXX_HOST_VERSION := $(strip $(shell $(CXX) $(CXXFLAGS) --host-version 2>&1)) KOKKOS_INTERNAL_COMPILER_INTEL := $(call kokkos_has_string,$(KOKKOS_CXX_HOST_VERSION),Intel Corporation) KOKKOS_INTERNAL_COMPILER_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_HOST_VERSION),clang) endif endif ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 2) KOKKOS_INTERNAL_COMPILER_CLANG = 1 endif # Apple Clang passes both clang and apple clang tests, so turn off clang. ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1) KOKKOS_INTERNAL_COMPILER_CLANG = 0 endif # AMD HCC passes both clang and hcc test so turn off clang ifeq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1) KOKKOS_INTENAL_COMPILER_CLANG = 0 endif # Fujitsu passes also as clang and gcc respectively ifeq ($(KOKKOS_INTERNAL_COMPILER_FUJITSU), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_GCC), 1) # TODO handle gcc flags and workaround for bug? # fujitsu (gcc mode) is bugged, see https://github.com/kokkos/kokkos/issues/4730 $(warning Warning: ${CXX} in Trad Mode '-Nnoclang' (default) is not recommended. Use 'CXX = ${CXX} -Nclang' instead.) # HACK since fujitsu only accepts some gcc flags, disable gcc here? # KOKKOS_INTERNAL_COMPILER_GCC = 0 endif # TODO handle clang flags # warnings: works fine as is # openmp: handled #KOKKOS_INTERNAL_COMPILER_CLANG = 0 endif ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) # TODO empty variable if fujitsu (clang mode) passes as clang KOKKOS_INTERNAL_COMPILER_CLANG_VERSION := $(shell $(CXX) --version | grep version | cut -d ' ' -f3 | tr -d '.') ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_CLANG_VERSION) -lt 400; echo $$?),0) $(error Compiling Cuda code directly with Clang requires version 4.0.0 or higher) endif KOKKOS_INTERNAL_CUDA_USE_LAMBDA := 1 endif endif # Set compiler warnings flags. ifeq ($(KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wunused-parameter -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized else ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1) KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wunused-parameter -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized else ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) # TODO check if cray accepts GNU style warnings KOKKOS_INTERNAL_COMPILER_WARNINGS = else #gcc KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wunused-parameter -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized endif else KOKKOS_INTERNAL_COMPILER_WARNINGS = endif # Set OpenMP flags. ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY_CLANG), 1) KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp else ifeq ($(KOKKOS_INTERNAL_COMPILER_FUJITSU), 1) # fujitsu (clang mode) fails with `=libomp` KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp else KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp endif else ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1) KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp else ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) # OpenMP is turned on by default in Cray compiler environment. KOKKOS_INTERNAL_OPENMP_FLAG := else ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL_CLANG), 1) KOKKOS_INTERNAL_OPENMP_FLAG := -fiopenmp else KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp endif endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL_CLANG), 1) KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fiopenmp -Wno-openmp-mapping else ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 1) KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -mp=gpu else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 0) #Assume GCC KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fopenmp -foffload=nvptx-none endif endif ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) # Set OpenACC flags. ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 1) KOKKOS_INTERNAL_OPENACC_FLAG := -acc else $(error Makefile.kokkos: OpenACC is enabled but the compiler must be NVHPC (got version string $(KOKKOS_CXX_VERSION))) endif endif # Set C++ version flags. ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) KOKKOS_INTERNAL_CXX17_FLAG := -hstd=c++17 KOKKOS_INTERNAL_CXX1Z_FLAG := -hstd=c++1z KOKKOS_INTERNAL_CXX20_FLAG := -hstd=c++20 KOKKOS_INTERNAL_CXX2A_FLAG := -hstd=c++2a KOKKOS_INTERNAL_CXX23_FLAG := -hstd=c++23 KOKKOS_INTERNAL_CXX2A_FLAG := -hstd=c++2b else KOKKOS_INTERNAL_CXX17_FLAG := -std=c++17 KOKKOS_INTERNAL_CXX1Z_FLAG := -std=c++1z KOKKOS_INTERNAL_CXX20_FLAG := -std=c++20 KOKKOS_INTERNAL_CXX2A_FLAG := -std=c++2a KOKKOS_INTERNAL_CXX23_FLAG := -std=c++23 KOKKOS_INTERNAL_CXX2B_FLAG := -std=c++2b endif # Check for Kokkos Architecture settings. # Intel based. KOKKOS_INTERNAL_USE_ARCH_KNC := $(call kokkos_has_string,$(KOKKOS_ARCH),KNC) KOKKOS_INTERNAL_USE_ARCH_SNB := $(call kokkos_has_string,$(KOKKOS_ARCH),SNB) KOKKOS_INTERNAL_USE_ARCH_HSW := $(call kokkos_has_string,$(KOKKOS_ARCH),HSW) KOKKOS_INTERNAL_USE_ARCH_BDW := $(call kokkos_has_string,$(KOKKOS_ARCH),BDW) KOKKOS_INTERNAL_USE_ARCH_SKL := $(call kokkos_has_string,$(KOKKOS_ARCH),SKL) KOKKOS_INTERNAL_USE_ARCH_SKX := $(call kokkos_has_string,$(KOKKOS_ARCH),SKX) KOKKOS_INTERNAL_USE_ARCH_KNL := $(call kokkos_has_string,$(KOKKOS_ARCH),KNL) KOKKOS_INTERNAL_USE_ARCH_ICL := $(call kokkos_has_string,$(KOKKOS_ARCH),ICL) KOKKOS_INTERNAL_USE_ARCH_ICX := $(call kokkos_has_string,$(KOKKOS_ARCH),ICX) KOKKOS_INTERNAL_USE_ARCH_SPR := $(call kokkos_has_string,$(KOKKOS_ARCH),SPR) KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen) KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen9) KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen11) KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen12LP) KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelDG1) KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelXeHP) KOKKOS_INTERNAL_USE_ARCH_INTEL_PVC := $(call kokkos_has_string,$(KOKKOS_ARCH),PVC) # NVIDIA based. NVCC_WRAPPER := $(KOKKOS_PATH)/bin/nvcc_wrapper KOKKOS_INTERNAL_USE_ARCH_KEPLER30 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler30) KOKKOS_INTERNAL_USE_ARCH_KEPLER32 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler32) KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler35) KOKKOS_INTERNAL_USE_ARCH_KEPLER37 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler37) KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell50) KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell52) KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell53) KOKKOS_INTERNAL_USE_ARCH_PASCAL61 := $(call kokkos_has_string,$(KOKKOS_ARCH),Pascal61) KOKKOS_INTERNAL_USE_ARCH_PASCAL60 := $(call kokkos_has_string,$(KOKKOS_ARCH),Pascal60) KOKKOS_INTERNAL_USE_ARCH_VOLTA70 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta70) KOKKOS_INTERNAL_USE_ARCH_VOLTA72 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta72) KOKKOS_INTERNAL_USE_ARCH_TURING75 := $(call kokkos_has_string,$(KOKKOS_ARCH),Turing75) KOKKOS_INTERNAL_USE_ARCH_AMPERE80 := $(call kokkos_has_string,$(KOKKOS_ARCH),Ampere80) KOKKOS_INTERNAL_USE_ARCH_AMPERE86 := $(call kokkos_has_string,$(KOKKOS_ARCH),Ampere86) KOKKOS_INTERNAL_USE_ARCH_ADA89 := $(call kokkos_has_string,$(KOKKOS_ARCH),Ada89) KOKKOS_INTERNAL_USE_ARCH_HOPPER90 := $(call kokkos_has_string,$(KOKKOS_ARCH),Hopper90) KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \ + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \ + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \ + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) \ + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \ + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \ + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \ + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \ + $(KOKKOS_INTERNAL_USE_ARCH_TURING75) \ + $(KOKKOS_INTERNAL_USE_ARCH_AMPERE80) \ + $(KOKKOS_INTERNAL_USE_ARCH_AMPERE86) \ + $(KOKKOS_INTERNAL_USE_ARCH_ADA89) \ + $(KOKKOS_INTERNAL_USE_ARCH_HOPPER90)) #SEK: This seems like a bug to me ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell) KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler) KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50)) endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc) CUDA_PATH ?= $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=) ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) KOKKOS_INTERNAL_OPENMPTARGET_FLAG := $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) --cuda-path=$(CUDA_PATH) endif endif endif # ARM based. KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv80) KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv81) KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-ThunderX) KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-TX2) KOKKOS_INTERNAL_USE_ARCH_A64FX := $(call kokkos_has_string,$(KOKKOS_ARCH),A64FX) KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2)+$(KOKKOS_INTERNAL_USE_ARCH_A64FX) | bc)) # IBM based. KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power8) KOKKOS_INTERNAL_USE_ARCH_POWER9 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power9) KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc)) # AMD based. KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX) KOKKOS_INTERNAL_USE_ARCH_ZEN3 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen3) KOKKOS_INTERNAL_USE_ARCH_ZEN2 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen2) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 0) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN2), 0) KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen) endif endif KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX906) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906), 0) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA906) endif KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX908) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908), 0) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908 := $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA908) endif KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX90A) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 0) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA90A) endif KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX940) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 0) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030) endif KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1100) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 0) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100) endif # Any AVX? KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)) KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3)) KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL)) # Incompatible flags? KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_SKL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX)+$(KOKKOS_INTERNAL_USE_ARCH_ICL)+$(KOKKOS_INTERNAL_USE_ARCH_ICX)+$(KOKKOS_INTERNAL_USE_ARCH_SPR)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1") | bc) KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1") | bc) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1) $(error Defined Multiple Host architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) ) endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIGPU), 1) $(error Defined Multiple GPU architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) ) endif # Generating the list of Flags. KOKKOS_CPPFLAGS = KOKKOS_LIBDIRS = ifneq ($(KOKKOS_CMAKE), yes) KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src endif KOKKOS_TPL_INCLUDE_DIRS = KOKKOS_TPL_LIBRARY_DIRS = KOKKOS_TPL_LIBRARY_NAMES = ifeq ($(KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS), 1) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_WARNINGS) endif KOKKOS_LIBS = -ldl KOKKOS_TPL_LIBRARY_NAMES += dl ifneq ($(KOKKOS_CMAKE), yes) KOKKOS_LIBDIRS = -L$(shell pwd) # CXXLDFLAGS is used together with CXXFLAGS in a combined compile/link command KOKKOS_CXXLDFLAGS = -L$(shell pwd) endif KOKKOS_LINK_FLAGS = KOKKOS_SRC = KOKKOS_HEADERS = # Generating the KokkosCore_config.h file. KOKKOS_INTERNAL_CONFIG_TMP=KokkosCore_config.tmp KOKKOS_CONFIG_HEADER=KokkosCore_config.h # Functions for generating config header file kokkos_append_header = $(shell echo $1 >> $(KOKKOS_INTERNAL_CONFIG_TMP)) # assign hash sign to variable for compat. with make 4.3 H := \# # Do not append first line tmp := $(shell echo "/* ---------------------------------------------" > KokkosCore_config.tmp) tmp := $(call kokkos_append_header,"Makefile constructed configuration:") tmp := $(call kokkos_append_header,"----------------------------------------------*/") tmp := $(call kokkos_append_header,'$H''if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)') tmp := $(call kokkos_append_header,'$H''error "Do not include $(KOKKOS_CONFIG_HEADER) directly; include Kokkos_Macros.hpp instead."') tmp := $(call kokkos_append_header,'$H''else') tmp := $(call kokkos_append_header,'$H''define KOKKOS_CORE_CONFIG_H') tmp := $(call kokkos_append_header,'$H''endif') tmp := $(call kokkos_append_header,"") tmp := $(call kokkos_append_header,"$H""define KOKKOS_VERSION $(KOKKOS_VERSION)") tmp := $(call kokkos_append_header,"$H""define KOKKOS_VERSION_MAJOR $(KOKKOS_VERSION_MAJOR)") tmp := $(call kokkos_append_header,"$H""define KOKKOS_VERSION_MINOR $(KOKKOS_VERSION_MINOR)") tmp := $(call kokkos_append_header,"$H""define KOKKOS_VERSION_PATCH $(KOKKOS_VERSION_PATCH)") tmp := $(call kokkos_append_header,"") tmp := $(call kokkos_append_header,"/* Execution Spaces */") ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA") endif ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_HIP') endif ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_SYCL') endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) KOKKOS_LIBS += -latomic tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_OPENMPTARGET') ifeq ($(KOKKOS_INTERNAL_COMPILER_GCC), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_WORKAROUND_OPENMPTARGET_GCC") endif endif ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_OPENACC") endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_OPENMP') endif ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_THREADS") endif ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HPX") endif ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_SERIAL") endif #only add the c++ standard flags if this is not CMake tmp := $(call kokkos_append_header,"/* General Settings */") ifneq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_DEPRECATED_CODE_4") endif ifeq ($(KOKKOS_INTERNAL_ENABLE_DEPRECATION_WARNINGS), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_DEPRECATION_WARNINGS") endif ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX17), 1) ifneq ($(KOKKOS_STANDALONE_CMAKE), yes) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX17_FLAG) endif tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX17") endif ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1) #I cannot make CMake add this in a good way - so add it here KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX17") endif ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX20), 1) #I cannot make CMake add this in a good way - so add it here KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX20_FLAG) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX20") endif ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX2A), 1) #I cannot make CMake add this in a good way - so add it here KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX2A_FLAG) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX20") endif ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX23), 1) #I cannot make CMake add this in a good way - so add it here KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX23_FLAG) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX23") endif ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX2B), 1) #I cannot make CMake add this in a good way - so add it here KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX2B_FLAG) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX23") endif ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX26), 1) #I cannot make CMake add this in a good way - so add it here KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX26_FLAG) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX26") endif ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX2C), 1) #I cannot make CMake add this in a good way - so add it here KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX2C_FLAG) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX26") endif ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) KOKKOS_CXXFLAGS += -lineinfo endif KOKKOS_CXXFLAGS += -g KOKKOS_LDFLAGS += -g tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_DEBUG") ifeq ($(KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK), 0) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK") endif endif ifeq ($(KOKKOS_INTERNAL_DISABLE_COMPLEX_ALIGN), 0) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_COMPLEX_ALIGN") endif ifeq ($(KOKKOS_INTERNAL_ENABLE_TUNING), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_TUNING") endif tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_LIBDL") ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1) ifneq ($(KOKKOS_CMAKE), yes) ifneq ($(HWLOC_PATH),) KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include KOKKOS_LIBDIRS += -L$(HWLOC_PATH)/lib KOKKOS_CXXLDFLAGS += -L$(HWLOC_PATH)/lib KOKKOS_TPL_INCLUDE_DIRS += $(HWLOC_PATH)/include KOKKOS_TPL_LIBRARY_DIRS += $(HWLOC_PATH)/lib endif KOKKOS_LIBS += -lhwloc KOKKOS_TPL_LIBRARY_NAMES += hwloc endif tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HWLOC") endif ifeq ($(KOKKOS_INTERNAL_ENABLE_LARGE_MEM_TESTS), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_LARGE_MEM_TESTS") endif tmp := $(call kokkos_append_header,"/* Optimization Settings */") ifeq ($(KOKKOS_INTERNAL_AGGRESSIVE_VECTORIZATION), 1) # deprecated tmp := $(call kokkos_append_header,"$H""define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION") endif tmp := $(call kokkos_append_header,"/* Cuda Settings */") ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) #deprecated ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC") else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC") endif ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1) # deprecated tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_UVM") endif ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE") ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) KOKKOS_CXXFLAGS += -fcuda-rdc KOKKOS_LDFLAGS += -fcuda-rdc else KOKKOS_CXXFLAGS += --relocatable-device-code=true KOKKOS_LDFLAGS += --relocatable-device-code=true endif endif ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -ge 90; echo $$?),0) # This diagnostic is just plain wrong in CUDA 9 # See https://github.com/kokkos/kokkos/issues/1470 KOKKOS_CXXFLAGS += -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored endif endif ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA") KOKKOS_CXXFLAGS += -extended-lambda endif ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA") endif ifeq ($(KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_CONSTEXPR") KOKKOS_CXXFLAGS += -expt-relaxed-constexpr endif ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_CONSTEXPR") endif endif ifeq ($(KOKKOS_INTERNAL_CUDA_DISABLE_MALLOC_ASYNC), 0) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC") else tmp := $(call kokkos_append_header,"/* $H""undef KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC */") endif endif ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) ifeq ($(KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_HPX_ASYNC_DISPATCH") endif endif # Add Architecture flags. ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) KOKKOS_CXXFLAGS += KOKKOS_LDFLAGS += else KOKKOS_CXXFLAGS += -march=armv8-a KOKKOS_LDFLAGS += -march=armv8-a endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV81") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) KOKKOS_CXXFLAGS += KOKKOS_LDFLAGS += else KOKKOS_CXXFLAGS += -march=armv8.1-a KOKKOS_LDFLAGS += -march=armv8.1-a endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_A64FX), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_A64FX") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") KOKKOS_CXXFLAGS += -march=armv8.2-a+sve KOKKOS_LDFLAGS += -march=armv8.2-a+sve ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) KOKKOS_CXXFLAGS += -msve-vector-bits=512 KOKKOS_LDFLAGS += -msve-vector-bits=512 endif ifeq ($(KOKKOS_INTERNAL_COMPILER_GCC), 1) KOKKOS_CXXFLAGS += -msve-vector-bits=512 KOKKOS_LDFLAGS += -msve-vector-bits=512 endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) KOKKOS_CXXFLAGS += -mavx2 KOKKOS_LDFLAGS += -mavx2 else KOKKOS_CXXFLAGS += -march=znver1 -mtune=znver1 KOKKOS_LDFLAGS += -march=znver1 -mtune=znver1 endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN2), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN2") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) KOKKOS_CXXFLAGS += -mavx2 KOKKOS_LDFLAGS += -mavx2 else KOKKOS_CXXFLAGS += -march=znver2 -mtune=znver2 KOKKOS_LDFLAGS += -march=znver2 -mtune=znver2 endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN3") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) KOKKOS_CXXFLAGS += -mavx2 KOKKOS_LDFLAGS += -mavx2 else KOKKOS_CXXFLAGS += -march=znver3 -mtune=znver3 KOKKOS_LDFLAGS += -march=znver3 -mtune=znver3 endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) KOKKOS_CXXFLAGS += KOKKOS_LDFLAGS += else KOKKOS_CXXFLAGS += -march=armv8-a -mtune=thunderx KOKKOS_LDFLAGS += -march=armv8-a -mtune=thunderx endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV81") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX2") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) KOKKOS_CXXFLAGS += KOKKOS_LDFLAGS += else KOKKOS_CXXFLAGS += -mtune=thunderx2t99 -mcpu=thunderx2t99 KOKKOS_LDFLAGS += -mtune=thunderx2t99 -mcpu=thunderx2t99 endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) KOKKOS_CXXFLAGS += -mavx KOKKOS_LDFLAGS += -mavx else ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) else # Assume that this is a really a GNU compiler. KOKKOS_CXXFLAGS += -mavx KOKKOS_LDFLAGS += -mavx endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER7), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_POWER7") # Assume that this is a really a GNU compiler. KOKKOS_CXXFLAGS += -mcpu=power7 -mtune=power7 KOKKOS_LDFLAGS += -mcpu=power7 -mtune=power7 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_POWER8") KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8 KOKKOS_LDFLAGS += -mcpu=power8 -mtune=power8 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_POWER9") KOKKOS_CXXFLAGS += -mcpu=power9 -mtune=power9 KOKKOS_LDFLAGS += -mcpu=power9 -mtune=power9 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HSW), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) KOKKOS_CXXFLAGS += -xCORE-AVX2 KOKKOS_LDFLAGS += -xCORE-AVX2 else ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) else # Assume that this is a really a GNU compiler. KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2 KOKKOS_LDFLAGS += -march=core-avx2 -mtune=core-avx2 endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_BDW), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) KOKKOS_CXXFLAGS += -xCORE-AVX2 KOKKOS_LDFLAGS += -xCORE-AVX2 else ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) else # Assume that this is a really a GNU compiler. KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2 -mrtm KOKKOS_LDFLAGS += -march=core-avx2 -mtune=core-avx2 -mrtm endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512MIC") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) KOKKOS_CXXFLAGS += -xMIC-AVX512 KOKKOS_LDFLAGS += -xMIC-AVX512 else ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) else # Asssume that this is really a GNU compiler. KOKKOS_CXXFLAGS += -march=knl -mtune=knl KOKKOS_LDFLAGS += -march=knl -mtune=knl endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SKL), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) KOKKOS_CXXFLAGS += -xSKYLAKE KOKKOS_LDFLAGS += -xSKYLAKE else ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) else # Nothing here yet. KOKKOS_CXXFLAGS += -march=skylake KOKKOS_LDFLAGS += -march=skylake endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SKX), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) KOKKOS_CXXFLAGS += -xCORE-AVX512 KOKKOS_LDFLAGS += -xCORE-AVX512 else ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) else # Nothing here yet. KOKKOS_CXXFLAGS += -march=skylake-avx512 -mtune=skylake-avx512 KOKKOS_LDFLAGS += -march=skylake-avx512 -mtune=skylake-avx512 endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ICL), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON") KOKKOS_CXXFLAGS += -march=icelake-client -mtune=icelake-client KOKKOS_LDFLAGS += -march=icelake-client -mtune=icelake-client endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ICX), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON") KOKKOS_CXXFLAGS += -march=icelake-server -mtune=icelake-server KOKKOS_LDFLAGS += -march=icelake-server -mtune=icelake-server endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SPR), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON") KOKKOS_CXXFLAGS += -march=sapphirerapids -mtune=sapphirerapids KOKKOS_LDFLAGS += -march=sapphirerapids -mtune=sapphirerapids endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KNC") KOKKOS_CXXFLAGS += -mmic KOKKOS_LDFLAGS += -mmic endif # Figure out the architecture flag for Cuda. ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) KOKKOS_INTERNAL_USE_CUDA_ARCH=1 endif ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) KOKKOS_INTERNAL_USE_CUDA_ARCH=1 endif endif ifeq ($(KOKKOS_INTERNAL_USE_CUDA_ARCH), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-arch else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) KOKKOS_INTERNAL_CUDA_ARCH_FLAG=--cuda-gpu-arch KOKKOS_CXXFLAGS += -x cuda else $(error Makefile.kokkos: CUDA is enabled but the compiler is neither NVCC nor Clang (got version string $(KOKKOS_CXX_VERSION)) ) endif KOKKOS_INTERNAL_USE_CUDA_ARCH = 1 endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY_CLANG), 1) KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-fopenmp else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-fopenmp --offload-arch endif endif # Do not add this flag if its the cray compiler or the nvhpc compiler. ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY_CLANG), 0) ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0) # Lets start with adding architecture defines ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER30") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_30 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER32") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_32 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER35") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_35 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER37") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_37 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL50") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_50 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL52") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_52 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL53") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_53 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL60") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_60 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL61") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA70), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA70") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_70 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA72), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA72") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_72 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_TURING75), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_TURING75") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_75 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMPERE80), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE80") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_80 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMPERE86), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE86") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_86 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ADA89), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ADA89") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_89 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HOPPER90), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HOPPER") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HOPPER90") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_90 endif endif endif ifneq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) endif ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) endif endif endif # Figure out the architecture flag for ROCm. ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX906") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx906 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX908") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx908 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX90A") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx90a endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX940") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx940 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX942") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx942 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1030") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1030 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1100") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1100 endif ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.cpp) KOKKOS_SRC += $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.hpp) KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG) KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG) ifeq ($(KOKKOS_INTERNAL_HIP_USE_RELOC), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE") KOKKOS_CXXFLAGS+=-fgpu-rdc KOKKOS_LDFLAGS+=-fgpu-rdc else KOKKOS_CXXFLAGS+=-fno-gpu-rdc KOKKOS_LDFLAGS+=-fno-gpu-rdc endif endif # Figure out Intel architecture flags. ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) KOKKOS_INTERNAL_LC_BACKEND := sycl endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) KOKKOS_INTERNAL_LC_BACKEND := openmp endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN") KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN9") KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device gen9" endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN11") KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device gen11" endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN12LP") KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device gen12lp" endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_DG1") KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device dg1" endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_XEHP") KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device 12.50.4" endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_PVC), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_PVC") KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device 12.60.7" endif ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/SYCL/*.cpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/SYCL/*.hpp) KOKKOS_CXXFLAGS+=-fsycl -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda -fsycl-dead-args-optimization KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG) KOKKOS_LDFLAGS+=-fsycl KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG) endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG) -D__STRICT_ANSI__ KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG) endif ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 1) $(warning disable_desul_atomics option has been removed. Desul atomics cannot be disabled.) KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS := 0 endif ifeq ($(KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS), 1) $(warning enable_desul_atomics option has been removed. Desul atomics are always enabled.) endif KOKKOS_CPPFLAGS+=-I$(KOKKOS_PATH)/tpls/desul/include ifeq ($(KOKKOS_INTERNAL_DISABLE_BUNDLED_MDSPAN), 0) KOKKOS_CPPFLAGS+=-I$(KOKKOS_PATH)/tpls/mdspan/include endif tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_MDSPAN") KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h 2>&1) ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h) KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff KokkosCore_config.h KokkosCore_config.tmp | grep -c define)) else KOKKOS_INTERNAL_NEW_CONFIG := 1 endif ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0) tmp := $(shell cp KokkosCore_config.tmp KokkosCore_config.h) # Functions for generating config header file kokkos_start_config_header = $(shell sed 's~@INCLUDE_NEXT_FILE@~~g' $(KOKKOS_PATH)/cmake/KokkosCore_Config_HeaderSet.in > $1) kokkos_update_config_header = $(shell sed 's~@HEADER_GUARD_TAG@~$1~g' $2 > $3) kokkos_append_config_header = $(shell echo $1 >> $2)) tmp := $(call kokkos_start_config_header, "KokkosCore_Config_FwdBackend.tmp") tmp := $(call kokkos_start_config_header, "KokkosCore_Config_SetupBackend.tmp") tmp := $(call kokkos_start_config_header, "KokkosCore_Config_DeclareBackend.tmp") tmp := $(call kokkos_start_config_header, "KokkosCore_Config_PostInclude.tmp") tmp := $(call kokkos_update_config_header, KOKKOS_FWD_HPP_, "KokkosCore_Config_FwdBackend.tmp", "KokkosCore_Config_FwdBackend.hpp") tmp := $(call kokkos_update_config_header, KOKKOS_SETUP_HPP_, "KokkosCore_Config_SetupBackend.tmp", "KokkosCore_Config_SetupBackend.hpp") tmp := $(call kokkos_update_config_header, KOKKOS_DECLARE_HPP_, "KokkosCore_Config_DeclareBackend.tmp", "KokkosCore_Config_DeclareBackend.hpp") ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") endif ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") endif ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") endif ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") endif ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_SetupBackend.hpp") endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") endif ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_SetupBackend.hpp") endif ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_SetupBackend.hpp") endif ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") endif endif KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.hpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/algorithms/src/*.hpp) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.cpp) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp) KOKKOS_SRC += $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) ifneq ($(CUDA_PATH),) KOKKOS_CPPLAGS += -I$(CUDA_PATH)/include ifeq ($(call kokkos_path_exists,$(CUDA_PATH)/lib64), 1) KOKKOS_LIBDIRS += -L$(CUDA_PATH)/lib64 KOKKOS_CXXLDFLAGS += -L$(CUDA_PATH)/lib64 KOKKOS_TPL_LIBRARY_DIRS += $(CUDA_PATH)/lib64 else ifeq ($(call kokkos_path_exists,$(CUDA_PATH)/lib), 1) KOKKOS_LIBDIRS += -L$(CUDA_PATH)/lib KOKKOS_CXXLDFLAGS += -L$(CUDA_PATH)/lib KOKKOS_TPL_LIBRARY_DIRS += $(CUDA_PATH)/lib else $(error Can't find CUDA library directory: no lib64 or lib directory in $(CUDA_PATH)) endif KOKKOS_TPL_INCLUDE_DIRS += $(CUDA_PATH)/include ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) KOKKOS_CXXFLAGS += --cuda-path=$(CUDA_PATH) endif endif KOKKOS_LIBS += -lcudart -lcuda KOKKOS_TPL_LIBRARY_NAMES += cudart cuda endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMPTarget/*.cpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMPTarget/*.hpp) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) else KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) endif KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) KOKKOS_LIBS += $(KOKKOS_INTERNAL_OPENMPTARGET_LIB) endif ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenACC/*.cpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenACC/*.hpp) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENACC_FLAG) KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENACC_FLAG) KOKKOS_LIBS += $(KOKKOS_INTERNAL_OPENACC_LIB) endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp) ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMP_FLAG) else KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG) endif KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG) KOKKOS_LINK_FLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG) endif ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp) KOKKOS_LIBS += -lpthread KOKKOS_TPL_LIBRARY_NAMES += pthread endif ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Serial/*.cpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Serial/*.hpp) endif ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.cpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.hpp) ifneq ($(HPX_PATH),) ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) KOKKOS_CXXFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --cflags hpx_application_debug) KOKKOS_CXXLDFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application_debug) KOKKOS_LIBS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application_debug) else KOKKOS_CXXFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --cflags hpx_application) KOKKOS_CXXLDFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application) KOKKOS_LIBS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application) endif else ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) KOKKOS_CXXFLAGS += $(shell pkg-config --cflags hpx_application_debug) KOKKOS_CXXLDFLAGS += $(shell pkg-config --libs hpx_application_debug) KOKKOS_LIBS += $(shell pkg-config --libs hpx_application_debug) else KOKKOS_CXXFLAGS += $(shell pkg-config --cflags hpx_application) KOKKOS_CXXLDFLAGS += $(shell pkg-config --libs hpx_application) KOKKOS_LIBS += $(shell pkg-config --libs hpx_application) endif endif KOKKOS_TPL_LIBRARY_NAMES += hpx endif # With Cygwin functions such as fdopen and fileno are not defined # when strict ansi is enabled. strict ansi gets enabled with -std=c++14 # though. So we hard undefine it here. Not sure if that has any bad side effects # This is needed for gtest actually, not for Kokkos itself! ifeq ($(KOKKOS_INTERNAL_OS_CYGWIN), 1) KOKKOS_CXXFLAGS += -U__STRICT_ANSI__ endif # Set KokkosExtraLibs and add -lkokkos to link line KOKKOS_EXTRA_LIBS := ${KOKKOS_LIBS} KOKKOS_LIBS := -lkokkos ${KOKKOS_LIBS} # Generating the header DESUL_INTERNAL_CONFIG_TMP=Desul_Config.tmp DESUL_CONFIG_HEADER=desul/atomics/Config.hpp desul_append_header = $(shell echo $1 >> $(DESUL_INTERNAL_CONFIG_TMP)) tmp := $(call desul_append_header, "// generated by on-demand build system by crtrott" > $(DESUL_INTERNAL_CONFIG_TMP)) tmp := $(call desul_append_header, "$H""ifndef DESUL_ATOMICS_CONFIG_HPP_") tmp := $(call desul_append_header, "$H""define DESUL_ATOMICS_CONFIG_HPP_") tmp := $(call desul_append_header, "") ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_CUDA") else tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_CUDA */") endif ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1) tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_CUDA_SEPARABLE_COMPILATION") else tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_CUDA_SEPARABLE_COMPILATION */") endif ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_HIP") else tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_HIP */") endif ifeq ($(KOKKOS_INTERNAL_HIP_USE_RELOC), 1) tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_HIP_SEPARABLE_COMPILATION") else tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_HIP_SEPARABLE_COMPILATION */") endif ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_SYCL") else tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_SYCL */") endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_OPENMP") else tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_OPENMP */") endif ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_OPENACC") else tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_OPENACC */") endif tmp := $(call desul_append_header, "") tmp := $(call desul_append_header, "$H""endif") DESUL_INTERNAL_LS_CONFIG := $(shell ls $(DESUL_CONFIG_HEADER) 2>&1) ifeq ($(DESUL_INTERNAL_LS_CONFIG), $(DESUL_CONFIG_HEADER)) DESUL_INTERNAL_NEW_CONFIG := $(strip $(shell diff $(DESUL_CONFIG_HEADER) $(DESUL_INTERNAL_CONFIG_TMP) | grep -c define)) else DESUL_INTERNAL_NEW_CONFIG := 1 endif ifneq ($(DESUL_INTERNAL_NEW_CONFIG), 0) tmp := $(shell mkdir -p desul/atomics) tmp := $(shell cp $(DESUL_INTERNAL_CONFIG_TMP) $(DESUL_CONFIG_HEADER)) endif # Setting up dependencies. KokkosCore_config.h: $(DESUL_CONFIG_HEADER): KOKKOS_CPP_DEPENDS := $(DESUL_CONFIG_HEADER) KokkosCore_config.h $(KOKKOS_HEADERS) KOKKOS_OBJ = $(KOKKOS_SRC:.cpp=.o) KOKKOS_OBJ_LINK = $(notdir $(KOKKOS_OBJ)) include $(KOKKOS_PATH)/Makefile.targets kokkos-clean: rm -f $(KOKKOS_OBJ_LINK) $(DESUL_CONFIG_HEADER) $(DESUL_INTERNAL_CONFIG_TMP) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a KokkosCore_Config_SetupBackend.hpp \ KokkosCore_Config_FwdBackend.hpp KokkosCore_Config_DeclareBackend.hpp KokkosCore_Config_DeclareBackend.tmp \ KokkosCore_Config_FwdBackend.tmp KokkosCore_Config_SetupBackend.tmp libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS) ar cr libkokkos.a $(KOKKOS_OBJ_LINK) ranlib libkokkos.a print-cxx-flags: echo "$(KOKKOS_CXXFLAGS)" KOKKOS_LINK_DEPENDS=libkokkos.a #we have carefully separated LDFLAGS from LIBS and LIBDIRS #we have also separated CPPFLAGS from CXXFLAGS #if this is not cmake, for backwards compatibility #we just jam everything together into the CXXFLAGS and LDFLAGS ifneq ($(KOKKOS_CMAKE), yes) KOKKOS_CXXFLAGS += $(KOKKOS_CPPFLAGS) KOKKOS_LDFLAGS += $(KOKKOS_LIBDIRS) endif kokkos-4.3.01/Makefile.targets000066400000000000000000000246351461675637500162460ustar00rootroot00000000000000Kokkos_UnorderedMap_impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/containers/src/impl/Kokkos_UnorderedMap_impl.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/containers/src/impl/Kokkos_UnorderedMap_impl.cpp Kokkos_Core.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Core.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Core.cpp Kokkos_CPUDiscovery.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_CPUDiscovery.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_CPUDiscovery.cpp Kokkos_Error.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Error.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Error.cpp Kokkos_Stacktrace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Stacktrace.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Stacktrace.cpp Kokkos_ExecPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_ExecPolicy.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_ExecPolicy.cpp Kokkos_Command_Line_Parsing.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Command_Line_Parsing.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Command_Line_Parsing.cpp Kokkos_HostSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp Kokkos_hwloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp Kokkos_HostThreadTeam.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp Kokkos_HostBarrier.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp Kokkos_Profiling.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling.cpp Kokkos_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp Kokkos_MemoryPool.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp Kokkos_HostSpace_deepcopy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp Kokkos_NumericTraits.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp Kokkos_Abort.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Abort.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Abort.cpp ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial.cpp Kokkos_Serial_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial_Task.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial_Task.cpp endif ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) Kokkos_Cuda_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp Kokkos_CudaSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp Lock_Array_CUDA.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp endif ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) Kokkos_SYCL.o : $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/SYCL/Kokkos_SYCL.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/SYCL/Kokkos_SYCL.cpp Kokkos_SYCL_Space.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/SYCL/Kokkos_SYCL_Space.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/SYCL/Kokkos_SYCL_Space.cpp Kokkos_SYCL_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/SYCL/Kokkos_SYCL_Instance.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/SYCL/Kokkos_SYCL_Instance.cpp endif ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) Kokkos_HIP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP.cpp Kokkos_HIP_SharedAllocationRecord.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.cpp Kokkos_HIP_DeepCopy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_DeepCopy.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_DeepCopy.cpp Kokkos_HIP_Space.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Space.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Space.cpp Kokkos_HIP_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp Lock_Array_HIP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp endif ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) Kokkos_Threads_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Instance.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Instance.cpp Kokkos_Threads_Spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Spinwait.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Spinwait.cpp endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) Kokkos_OpenMP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP.cpp Kokkos_OpenMP_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp Kokkos_OpenMP_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp endif ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) Kokkos_HPX.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HPX/Kokkos_HPX.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HPX/Kokkos_HPX.cpp Kokkos_HPX_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HPX/Kokkos_HPX_Task.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HPX/Kokkos_HPX_Task.cpp endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) Kokkos_OpenMPTarget_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp Kokkos_OpenMPTarget_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp Kokkos_OpenMPTargetSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp Kokkos_OpenMPTarget_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp endif ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) Kokkos_OpenACC.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenACC/Kokkos_OpenACC.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenACC/Kokkos_OpenACC.cpp Kokkos_OpenACCSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenACC/Kokkos_OpenACCSpace.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenACC/Kokkos_OpenACCSpace.cpp Kokkos_OpenACC_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenACC/Kokkos_OpenACC_Instance.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenACC/Kokkos_OpenACC_Instance.cpp Kokkos_OpenACC_SharedAllocationRecord.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenACC/Kokkos_OpenACC_SharedAllocationRecord.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenACC/Kokkos_OpenACC_SharedAllocationRecord.cpp endif kokkos-4.3.01/README.md000066400000000000000000000060511461675637500144050ustar00rootroot00000000000000![Kokkos](https://avatars2.githubusercontent.com/u/10199860?s=200&v=4) # Kokkos: Core Libraries Kokkos Core implements a programming model in C++ for writing performance portable applications targeting all major HPC platforms. For that purpose it provides abstractions for both parallel execution of code and data management. Kokkos is designed to target complex node architectures with N-level memory hierarchies and multiple types of execution resources. It currently can use CUDA, HIP, SYCL, HPX, OpenMP and C++ threads as backend programming models with several other backends in development. **Kokkos Core is part of the Kokkos C++ Performance Portability Programming EcoSystem.** For the complete documentation, click below: # [kokkos.github.io/kokkos-core-wiki](https://kokkos.github.io/kokkos-core-wiki) # Learning about Kokkos To start learning about Kokkos: - [Kokkos Lectures](https://kokkos.github.io/kokkos-core-wiki/videolectures.html): they contain a mix of lecture videos and hands-on exercises covering all the important Kokkos Ecosystem capabilities. - [Programming guide](https://kokkos.github.io/kokkos-core-wiki/programmingguide.html): contains in "narrative" form a technical description of the programming model, machine model, and the main building blocks like the Views and parallel dispatch. - [API reference](https://kokkos.github.io/kokkos-core-wiki/): organized by category, i.e., [core](https://kokkos.github.io/kokkos-core-wiki/API/core-index.html), [algorithms](https://kokkos.github.io/kokkos-core-wiki/API/algorithms-index.html) and [containers](https://kokkos.github.io/kokkos-core-wiki/API/containers-index.html) or, if you prefer, in [alphabetical order](https://kokkos.github.io/kokkos-core-wiki/API/alphabetical.html). - [Use cases and Examples](https://kokkos.github.io/kokkos-core-wiki/usecases.html): a series of examples ranging from how to use Kokkos with MPI to Fortran interoperability. For questions find us on Slack: https://kokkosteam.slack.com or open a GitHub issue. For non-public questions send an email to: *crtrott(at)sandia.gov* # Contributing to Kokkos Please see [this page](https://kokkos.github.io/kokkos-core-wiki/contributing.html) for details on how to contribute. # Requirements, Building and Installing All requirements including minimum and primary tested compiler versions can be found [here](https://kokkos.github.io/kokkos-core-wiki/requirements.html). Building and installation instructions are described [here](https://kokkos.github.io/kokkos-core-wiki/building.html). # Citing Kokkos Please see the [following page](https://kokkos.github.io/kokkos-core-wiki/citation.html). # License [![License](https://img.shields.io/badge/License-Apache--2.0_WITH_LLVM--exception-blue)](https://spdx.org/licenses/LLVM-exception.html) Under the terms of Contract DE-NA0003525 with NTESS, the U.S. Government retains certain rights in this software. The full license statement used in all headers is available [here](https://kokkos.org/kokkos-core-wiki/license.html) or [here](https://github.com/kokkos/kokkos/blob/develop/LICENSE). kokkos-4.3.01/SECURITY.md000066400000000000000000000010771461675637500147220ustar00rootroot00000000000000# Reporting Security Issues To report a security issue, please email [lebrungrandt@ornl.gov](mailto:lebrungrandt@ornl.gov) and [crtrott@sandia.gov](mailto:crtrott@sandia.gov) with a description of the issue, the steps you took to create the issue, affected versions, and, if known, mitigations for the issue. Our vulnerability management team will respond within 5 working days of your email. If the issue is confirmed as a vulnerability, we will open a Security Advisory and acknowledge your contributions as part of it. This project follows a 90 day disclosure timeline. kokkos-4.3.01/Spack.md000066400000000000000000000310311461675637500145050ustar00rootroot00000000000000![Kokkos](https://avatars2.githubusercontent.com/u/10199860?s=200&v=4) # Kokkos Spack This gives instructions for using Spack to install Kokkos and developing packages that depend on Kokkos. ## Getting Started Make sure you have downloaded [Spack](https://github.com/spack/spack). The easiest way to configure the Spack environment is: ````bash > source spack/share/spack/setup-env.sh ```` with other scripts available for other shells. You can display information about how to install packages with: ````bash > spack info kokkos ```` This will print all the information about how to install Kokkos with Spack. For detailed instructions on how to use Spack, see the [User Manual](https://spack.readthedocs.io). ## Setting Up Spack: Avoiding the Package Cascade By default, Spack doesn't 'see' anything on your system - including things like CMake and CUDA. This can be limited by adding a `packages.yaml` to your `$HOME/.spack` folder that includes CMake (and CUDA, if applicable). For example, your `packages.yaml` file could be: ````yaml packages: cuda: buildable: false externals: - prefix: /opt/local/ppc64le-pwr8-nvidia/cuda/10.1.243 spec: cuda@10.1.243 - modules: - cuda/10.1.243 spec: cuda@10.1.243 cmake: buildable: false externals: - prefix: /opt/local/ppc64le/cmake/3.16.8 spec: cmake@3.16.8 - modules: - cmake/3.16.8 spec: cmake@3.16.8 ```` The `modules` entry is only necessary on systems that require loading Modules (i.e. most DOE systems). The `buildable` flag is useful to make sure Spack crashes if there is a path error, rather than having a type-o and Spack rebuilding everything because `cmake` isn't found. You can verify your environment is set up correctly by running `spack graph` or `spack spec`. For example: ````bash > spack graph kokkos +cuda o kokkos |\ o | cuda / o cmake ```` Without the existing CUDA and CMake being identified in `packages.yaml`, a (subset!) of the output would be: ````bash o kokkos |\ | o cmake | |\ | | | |\ | | | | | |\ | | | | | | | |\ | | | | | | | | | |\ | | | | | | | o | | | libarchive | | | | | | | |\ \ \ \ | | | | | | | | | |\ \ \ \ | | | | | | | | | | | | |_|/ | | | | | | | | | | | |/| | | | | | | | | | | | | | | o curl | | |_|_|_|_|_|_|_|_|_|_|/| | |/| | | |_|_|_|_|_|_|_|/ | | | | |/| | | | | | | | | | | | o | | | | | | | | openssl | |/| | | | | | | | | | | | | | | | | | | | | o | | libxml2 | | |_|_|_|_|_|_|_|/| | | | | | | | | | | | | |\ \ \ | o | | | | | | | | | | | | zlib | / / / / / / / / / / / / | o | | | | | | | | | | | xz | / / / / / / / / / / / | o | | | | | | | | | | rhash | / / / / / / / / / / | | | | o | | | | | | nettle | | | | |\ \ \ \ \ \ \ | | | o | | | | | | | | libuv | | | | o | | | | | | | autoconf | | |_|/| | | | | | | | | | | | |/ / / / / / / | o | | | | | | | | | perl | o | | | | | | | | | gdbm | o | | | | | | | | | readline ```` ## Configuring Kokkos as a Project Dependency Say you have a project "SuperScience" which needs to use Kokkos. In your `package.py` file, you would generally include something like: ````python class SuperScience(CMakePackage): ... depends_on("kokkos") ```` Often projects want to tweak behavior when using certain features, e.g. ````python depends_on("kokkos+cuda", when="+cuda") ```` if your project needs CUDA-specific logic to configure and build. This illustrates the general principle in Spack of "flowing-up". A user requests a feature in the final app: ````bash > spack install superscience+cuda ```` This flows upstream to the Kokkos dependency, causing the `kokkos+cuda` variant to build. The downstream app (SuperScience) tells the upstream app (Kokkos) how to build. Because Kokkos is a performance portability library, it somewhat inverts this principle. Kokkos "flows-down", telling your application how best to configure for performance. Rather than a downstream app (SuperScience) telling the upstream (Kokkos) what variants to build, a pre-built Kokkos should be telling the downstream app SuperScience what variants to use. Kokkos works best when there is an "expert" configuration installed on your system. Your build should simply request `-DKokkos_ROOT=` and configure appropriately based on the Kokkos it finds. Kokkos has many, many build variants. Where possible, projects should only depend on a general Kokkos, not specific variants. We recommend instead adding for each system you build on a Kokkos configuration to your `packages.yaml` file (usually found in `~/.spack` for specific users). For a Xeon + Volta system, this could look like: ````yaml kokkos: variants: +cuda +openmp +cuda_lambda +wrapper ^cuda@10.1 cuda_arch=70 compiler: [gcc@7.2.0] ```` which gives the "best" Kokkos configuration as CUDA+OpenMP optimized for a Volta 70 architecture using CUDA 10.1. It also enables support for CUDA Lambdas. The `+wrapper` option tells Kokkos to build with the special `nvcc_wrapper` (more below). Note here that we use the built-in `cuda_arch` variant of Spack to specify the archicture. For a Haswell system, we use ````yaml kokkos: variants: +openmp std=14 target=haswell compiler: [intel@18] ```` which uses the built-in microarchitecture variants of Spack. Consult the Spack documentation for more details of Spack microarchitectures and CUDA architectures. Spack does not currently provide an AMD GPU microarchitecture option. If building for HIP or an AMD GPU, Kokkos provides an `amd_gpu_arch` similar to `cuda_arch`. ````yaml kokkos: variants: +hip amd_gpu_arch=vega900 ```` Without an optimal default in your `packages.yaml` file, it is highly likely that the default Kokkos configuration you get will not be what you want. For example, CUDA is not enabled by default (there is no easy logic to conditionally activate this for CUDA-enabled systems). If you don't specify a CUDA build variant in a `packages.yaml` and you build your Kokkos-dependent project: ````bash > spack install superscience ```` you may end up just getting the default Kokkos (i.e. Serial). Before running `spack install ` we recommend running `spack spec ` to confirm your dependency tree is correct. For example, with Kokkos Kernels: ````bash kokkos-kernels@3.0%gcc@8.3.0~blas build_type=RelWithDebInfo ~cblas~complex_double~complex_float~cublas~cuda cuda_arch=none ~cusparse~diy+double execspace_cuda=auto execspace_openmp=auto execspace_serial=auto execspace_threads=auto ~float~lapack~lapacke+layoutleft~layoutright memspace_cudaspace=auto memspace_cudauvmspace=auto +memspace_hostspace~mkl+offset_int+offset_size_t~openmp+ordinal_int~ordinal_int64_t~serial~superlu arch=linux-rhel7-skylake_avx512 ^cmake@3.16.2%gcc@8.3.0~doc+ncurses+openssl+ownlibs~qt arch=linux-rhel7-skylake_avx512 ^kokkos@3.0%gcc@8.3.0~aggressive_vectorization~amdavx~armv80~armv81~armv8_thunderx~armv8_tx2~bdw~bgq build_type=RelWithDebInfo ~carrizo~compiler_warnings+cuda cuda_arch=none +cuda_lambda~cuda_ldg_intrinsic~cuda_relocatable_device_code~cuda_uvm~debug~debug_bounds_check~debug_dualview_modify_check~deprecated_code~diy~epyc~examples~explicit_instantiation~fiji~gfx901~hpx~hpx_async_dispatch~hsw~hwloc~kaveri~kepler30~kepler32~kepler35~kepler37~knc~knl~maxwell50~maxwell52~maxwell53~memkind~numactl+openmp~pascal60~pascal61~power7~power8~power9+profiling~profiling_load_print~pthread~qthread~rocm~ryzen~serial~skx~snb std=14 ~tests~turing75~vega+volta70~volta72+wrapper~wsm arch=linux-rhel7-skylake_avx512 ^cuda@10.1%gcc@8.3.0 arch=linux-rhel7-skylake_avx512 ^kokkos-nvcc-wrapper@old%gcc@8.3.0 build_type=RelWithDebInfo +mpi arch=linux-rhel7-skylake_avx512 ^openmpi@4.0.2%gcc@8.3.0~cuda+cxx_exceptions fabrics=none ~java~legacylaunchers~memchecker patches=073477a76bba780c67c36e959cd3ee6910743e2735c7e76850ffba6791d498e4 ~pmi schedulers=none ~sqlite3~thread_multiple+vt arch=linux-rhel7-skylake_avx512 ```` The output can be very verbose, but we can verify the expected `kokkos`: ````bash kokkos@3.0%gcc@8.3.0~aggressive_vectorization~amdavx~armv80~armv81~armv8_thunderx~armv8_tx2~bdw~bgq build_type=RelWithDebInfo ~carrizo~compiler_warnings+cuda cuda_arch=none +cuda_lambda~cuda_ldg_intrinsic~cuda_relocatable_device_code~cuda_uvm~debug~debug_bounds_check~debug_dualview_modify_check~deprecated_code~diy~epyc~examples~explicit_instantiation~fiji~gfx901~hpx~hpx_async_dispatch~hsw~hwloc~kaveri~kepler30~kepler32~kepler35~kepler37~knc~knl~maxwell50~maxwell52~maxwell53~memkind~numactl+openmp~pascal60~pascal61~power7~power8~power9+profiling~profiling_load_print~pthread~qthread~rocm~ryzen~serial~skx~snb std=11 ~tests~turing75~vega+volta70~volta72+wrapper~wsm arch=linux-rhel7-skylake_avx512 ```` We see that we do have `+volta70` and `+wrapper`, e.g. ### Spack Environments The encouraged way to use Spack is with Spack environments ([more details here](https://spack-tutorial.readthedocs.io/en/latest/tutorial_environments.html#dealing-with-many-specs-at-once)). Rather than installing packages one-at-a-time, you add packages to an environment. After adding all packages, you concretize and install them all. Using environments, one can explicitly add a desired Kokkos for the environment, e.g. ````bash > spack add kokkos +cuda +cuda_lambda +volta70 > spack add my_project +my_variant > ... > spack install ```` All packages within the environment will build against the CUDA-enabled Kokkos, even if they only request a default Kokkos. ## NVCC Wrapper Kokkos is a C++ project, but often builds for the CUDA backend. This is particularly problematic with CMake. At this point, `nvcc` does not accept all the flags that normally get passed to a C++ compiler. Kokkos provides `nvcc_wrapper` that identifies correctly as a C++ compiler to CMake and accepts C++ flags, but uses `nvcc` as the underlying compiler. `nvcc` itself also uses an underlying host compiler, e.g. GCC. In Spack, the underlying host compiler is specified as below, e.g.: ````bash > spack install package %gcc@8.0.0 ```` This is still valid for Kokkos. To use the special wrapper for CUDA builds, request a desired compiler and simply add the `+wrapper` variant. ````bash > spack install kokkos +cuda +wrapper %gcc@7.2.0 ```` Downstream projects depending on Kokkos need to override their compiler. Kokkos provides the compiler in a `kokkos_cxx` variable, which points to either `nvcc_wrapper` when needed or the regular compiler otherwise. Spack projects already do this to use MPI compiler wrappers. ````python def cmake_args(self): options = [] ... options.append("-DCMAKE_CXX_COMPILER=%s" % self.spec["kokkos"].kokkos_cxx) ... return options ```` Note: `nvcc_wrapper` works with the MPI compiler wrappers. If building your project with MPI, do NOT set your compiler to `nvcc_wrapper`. Instead set your compiler to `mpicxx` and `nvcc_wrapper` will be used under the hood. ````python def cmake_args(self): options = [] ... options.append("-DCMAKE_CXX_COMPILER=%s" % self.spec["mpi"].mpicxx) ... return options ```` To accomplish this, `nvcc_wrapper` must depend on MPI (even though it uses no MPI). This has the unfortunate consequence that Kokkos CUDA projects not using MPI will implicitly depend on MPI anyway. This behavior is necessary for now, but will hopefully be removed later. When using environments, if MPI is not needed, you can remove the MPI dependency with: ````bash > spack add kokkos-nvcc-wrapper ~mpi ```` ## Developing With Spack Spack has historically been much more suited to *deployment* of mature packages than active testing or developing. However, recent features have improved support for development. Future releases are likely to make this even easier and incorporate Git integration. The most common commands will do a full build and install of the packages. If doing development, you may wish to merely set up a build environment. This allows you to modify the source and re-build. In this case, you can stop after configuring. Suppose you have Kokkos checkout in the folder `kokkos-src`: ````bash > spack dev-build -d kokkos-src -u cmake kokkos@develop +wrapper +openmp ```` This sets up a development environment for you in `kokkos-src` which you can use (Bash example shown): Note: Always specify `develop` as the version when doing `dev-build`, except in rare cases. You are usually developing a feature branch that will merge into `develop`, hence you are making a new `develop` branch. ````bash > cd kokko-src > source spack-build-env.txt > cd spack-build > make ```` Before sourcing the Spack development environment, you may wish to save your current environment: ````bash > declare -px > myenv.sh ```` When done with Spack, you can then restore your original environment: ````bash > source myenv.sh ```` kokkos-4.3.01/algorithms/000077500000000000000000000000001461675637500152755ustar00rootroot00000000000000kokkos-4.3.01/algorithms/CMakeLists.txt000066400000000000000000000004401461675637500200330ustar00rootroot00000000000000IF (NOT Kokkos_INSTALL_TESTING) ADD_SUBDIRECTORY(src) ENDIF() # FIXME_OPENACC: temporarily disabled due to unimplemented features IF(NOT ((KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) OR KOKKOS_ENABLE_OPENACC)) KOKKOS_ADD_TEST_DIRECTORIES(unit_tests) ENDIF() kokkos-4.3.01/algorithms/src/000077500000000000000000000000001461675637500160645ustar00rootroot00000000000000kokkos-4.3.01/algorithms/src/CMakeLists.txt000066400000000000000000000021451461675637500206260ustar00rootroot00000000000000#I have to leave these here for tribits KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) #----------------------------------------------------------------------------- FILE(GLOB ALGO_HEADERS *.hpp) FILE(GLOB ALGO_SOURCES *.cpp) APPEND_GLOB(ALGO_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/std_algorithms/*.hpp) APPEND_GLOB(ALGO_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/std_algorithms/impl/*.hpp) INSTALL ( DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/" DESTINATION ${KOKKOS_HEADER_DIR} FILES_MATCHING PATTERN "*.hpp" ) #----------------------------------------------------------------------------- # We have to pass the sources in here for Tribits # These will get ignored for standalone CMake and a true interface library made KOKKOS_ADD_INTERFACE_LIBRARY( kokkosalgorithms NOINSTALLHEADERS ${ALGO_HEADERS} SOURCES ${ALGO_SOURCES} ) KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkosalgorithms ${KOKKOS_TOP_BUILD_DIR} ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ) KOKKOS_LINK_TPL(kokkoscontainers PUBLIC ROCTHRUST) KOKKOS_LINK_TPL(kokkoscore PUBLIC ONEDPL) kokkos-4.3.01/algorithms/src/KokkosAlgorithms_dummy.cpp000066400000000000000000000011621461675637500232760ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER void KOKKOS_ALGORITHMS_SRC_DUMMY_PREVENT_LINK_ERROR() {} kokkos-4.3.01/algorithms/src/Kokkos_NestedSort.hpp000066400000000000000000000017261461675637500222160ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_NESTED_SORT_HPP_ #define KOKKOS_NESTED_SORT_HPP_ #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE #define KOKKOS_IMPL_PUBLIC_INCLUDE #define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_NESTED_SORT #endif #include "sorting/Kokkos_NestedSortPublicAPI.hpp" #ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_NESTED_SORT #undef KOKKOS_IMPL_PUBLIC_INCLUDE #undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_NESTED_SORT #endif #endif kokkos-4.3.01/algorithms/src/Kokkos_Random.hpp000066400000000000000000001553451461675637500213530ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_RANDOM_HPP #define KOKKOS_RANDOM_HPP #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE #define KOKKOS_IMPL_PUBLIC_INCLUDE #define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_RANDOM #endif #include #include #include #include #include /// \file Kokkos_Random.hpp /// \brief Pseudorandom number generators /// /// These generators are based on Vigna, Sebastiano (2014). "An /// experimental exploration of Marsaglia's xorshift generators, /// scrambled." See: http://arxiv.org/abs/1402.6246 namespace Kokkos { // clang-format off /*Template functions to get equidistributed random numbers from a generator for a specific Scalar type template struct rand{ //Max value returned by draw(Generator& gen) KOKKOS_INLINE_FUNCTION static Scalar max(); //Returns a value between zero and max() KOKKOS_INLINE_FUNCTION static Scalar draw(Generator& gen); //Returns a value between zero and range() //Note: for floating point values range can be larger than max() KOKKOS_INLINE_FUNCTION static Scalar draw(Generator& gen, const Scalar& range){} //Return value between start and end KOKKOS_INLINE_FUNCTION static Scalar draw(Generator& gen, const Scalar& start, const Scalar& end); }; The Random number generators themselves have two components a state-pool and the actual generator A state-pool manages a number of generators, so that each active thread is able to grep its own. This allows the generation of random numbers which are independent between threads. Note that in contrast to CuRand none of the functions of the pool (or the generator) are collectives, i.e. all functions can be called inside conditionals. template class Pool { public: //The Kokkos device type using device_type = Device; //The actual generator type using generator_type = Generator; //Default constructor: does not initialize a pool Pool(); //Initializing constructor: calls init(seed,Device_Specific_Number); Pool(unsigned int seed); //Initialize Pool with seed as a starting seed with a pool_size of num_states //The Random_XorShift64 generator is used in serial to initialize all states, //thus the initialization process is platform independent and deterministic. void init(unsigned int seed, int num_states); //Get a generator. This will lock one of the states, guaranteeing that each thread //will have its private generator. Note: on Cuda getting a state involves atomics, //and is thus not deterministic! generator_type get_state(); //Give a state back to the pool. This unlocks the state, and writes the modified //state of the generator back to the pool. void free_state(generator_type gen); } template class Generator { public: //The Kokkos device type using device_type = DeviceType; //Max return values of respective [X]rand[S]() functions enum {MAX_URAND = 0xffffffffU}; enum {MAX_URAND64 = 0xffffffffffffffffULL-1}; enum {MAX_RAND = static_cast(0xffffffffU/2)}; enum {MAX_RAND64 = static_cast(0xffffffffffffffffULL/2-1)}; //Init with a state and the idx with respect to pool. Note: in serial the //Generator can be used by just giving it the necessary state arguments KOKKOS_INLINE_FUNCTION Generator (STATE_ARGUMENTS, int state_idx = 0); //Draw a equidistributed uint32_t in the range [0,MAX_URAND) KOKKOS_INLINE_FUNCTION uint32_t urand(); //Draw a equidistributed uint64_t in the range [0,MAX_URAND64) KOKKOS_INLINE_FUNCTION uint64_t urand64(); //Draw a equidistributed uint32_t in the range [0,range) KOKKOS_INLINE_FUNCTION uint32_t urand(const uint32_t& range); //Draw a equidistributed uint32_t in the range [start,end) KOKKOS_INLINE_FUNCTION uint32_t urand(const uint32_t& start, const uint32_t& end ); //Draw a equidistributed uint64_t in the range [0,range) KOKKOS_INLINE_FUNCTION uint64_t urand64(const uint64_t& range); //Draw a equidistributed uint64_t in the range [start,end) KOKKOS_INLINE_FUNCTION uint64_t urand64(const uint64_t& start, const uint64_t& end ); //Draw a equidistributed int in the range [0,MAX_RAND) KOKKOS_INLINE_FUNCTION int rand(); //Draw a equidistributed int in the range [0,range) KOKKOS_INLINE_FUNCTION int rand(const int& range); //Draw a equidistributed int in the range [start,end) KOKKOS_INLINE_FUNCTION int rand(const int& start, const int& end ); //Draw a equidistributed int64_t in the range [0,MAX_RAND64) KOKKOS_INLINE_FUNCTION int64_t rand64(); //Draw a equidistributed int64_t in the range [0,range) KOKKOS_INLINE_FUNCTION int64_t rand64(const int64_t& range); //Draw a equidistributed int64_t in the range [start,end) KOKKOS_INLINE_FUNCTION int64_t rand64(const int64_t& start, const int64_t& end ); //Draw a equidistributed float in the range [0,1.0) KOKKOS_INLINE_FUNCTION float frand(); //Draw a equidistributed float in the range [0,range) KOKKOS_INLINE_FUNCTION float frand(const float& range); //Draw a equidistributed float in the range [start,end) KOKKOS_INLINE_FUNCTION float frand(const float& start, const float& end ); //Draw a equidistributed double in the range [0,1.0) KOKKOS_INLINE_FUNCTION double drand(); //Draw a equidistributed double in the range [0,range) KOKKOS_INLINE_FUNCTION double drand(const double& range); //Draw a equidistributed double in the range [start,end) KOKKOS_INLINE_FUNCTION double drand(const double& start, const double& end ); //Draw a standard normal distributed double KOKKOS_INLINE_FUNCTION double normal() ; //Draw a normal distributed double with given mean and standard deviation KOKKOS_INLINE_FUNCTION double normal(const double& mean, const double& std_dev=1.0); } //Additional Functions: //Fills view with random numbers in the range [0,range) template void fill_random(ViewType view, PoolType pool, ViewType::value_type range); //Fills view with random numbers in the range [start,end) template void fill_random(ViewType view, PoolType pool, ViewType::value_type start, ViewType::value_type end); */ // clang-format on template struct rand; template struct rand { KOKKOS_INLINE_FUNCTION static short max() { return 127; } KOKKOS_INLINE_FUNCTION static short draw(Generator& gen) { return short((gen.rand() & 0xff + 256) % 256); } KOKKOS_INLINE_FUNCTION static short draw(Generator& gen, const char& range) { return char(gen.rand(range)); } KOKKOS_INLINE_FUNCTION static short draw(Generator& gen, const char& start, const char& end) { return char(gen.rand(start, end)); } }; template struct rand { KOKKOS_INLINE_FUNCTION static short max() { return 32767; } KOKKOS_INLINE_FUNCTION static short draw(Generator& gen) { return short((gen.rand() & 0xffff + 65536) % 32768); } KOKKOS_INLINE_FUNCTION static short draw(Generator& gen, const short& range) { return short(gen.rand(range)); } KOKKOS_INLINE_FUNCTION static short draw(Generator& gen, const short& start, const short& end) { return short(gen.rand(start, end)); } }; template struct rand { KOKKOS_INLINE_FUNCTION static int max() { return Generator::MAX_RAND; } KOKKOS_INLINE_FUNCTION static int draw(Generator& gen) { return gen.rand(); } KOKKOS_INLINE_FUNCTION static int draw(Generator& gen, const int& range) { return gen.rand(range); } KOKKOS_INLINE_FUNCTION static int draw(Generator& gen, const int& start, const int& end) { return gen.rand(start, end); } }; template struct rand { KOKKOS_INLINE_FUNCTION static unsigned int max() { return Generator::MAX_URAND; } KOKKOS_INLINE_FUNCTION static unsigned int draw(Generator& gen) { return gen.urand(); } KOKKOS_INLINE_FUNCTION static unsigned int draw(Generator& gen, const unsigned int& range) { return gen.urand(range); } KOKKOS_INLINE_FUNCTION static unsigned int draw(Generator& gen, const unsigned int& start, const unsigned int& end) { return gen.urand(start, end); } }; template struct rand { KOKKOS_INLINE_FUNCTION static long max() { // FIXME (mfh 26 Oct 2014) It would be better to select the // return value at compile time, using something like enable_if. return sizeof(long) == 4 ? static_cast(Generator::MAX_RAND) : static_cast(Generator::MAX_RAND64); } KOKKOS_INLINE_FUNCTION static long draw(Generator& gen) { // FIXME (mfh 26 Oct 2014) It would be better to select the // return value at compile time, using something like enable_if. return sizeof(long) == 4 ? static_cast(gen.rand()) : static_cast(gen.rand64()); } KOKKOS_INLINE_FUNCTION static long draw(Generator& gen, const long& range) { // FIXME (mfh 26 Oct 2014) It would be better to select the // return value at compile time, using something like enable_if. return sizeof(long) == 4 ? static_cast(gen.rand(static_cast(range))) : static_cast(gen.rand64(range)); } KOKKOS_INLINE_FUNCTION static long draw(Generator& gen, const long& start, const long& end) { // FIXME (mfh 26 Oct 2014) It would be better to select the // return value at compile time, using something like enable_if. return sizeof(long) == 4 ? static_cast( gen.rand(static_cast(start), static_cast(end))) : static_cast(gen.rand64(start, end)); } }; template struct rand { KOKKOS_INLINE_FUNCTION static unsigned long max() { // FIXME (mfh 26 Oct 2014) It would be better to select the // return value at compile time, using something like enable_if. return sizeof(unsigned long) == 4 ? static_cast(Generator::MAX_URAND) : static_cast(Generator::MAX_URAND64); } KOKKOS_INLINE_FUNCTION static unsigned long draw(Generator& gen) { // FIXME (mfh 26 Oct 2014) It would be better to select the // return value at compile time, using something like enable_if. return sizeof(unsigned long) == 4 ? static_cast(gen.urand()) : static_cast(gen.urand64()); } KOKKOS_INLINE_FUNCTION static unsigned long draw(Generator& gen, const unsigned long& range) { // FIXME (mfh 26 Oct 2014) It would be better to select the // return value at compile time, using something like enable_if. return sizeof(unsigned long) == 4 ? static_cast( gen.urand(static_cast(range))) : static_cast(gen.urand64(range)); } KOKKOS_INLINE_FUNCTION static unsigned long draw(Generator& gen, const unsigned long& start, const unsigned long& end) { // FIXME (mfh 26 Oct 2014) It would be better to select the // return value at compile time, using something like enable_if. return sizeof(unsigned long) == 4 ? static_cast( gen.urand(static_cast(start), static_cast(end))) : static_cast(gen.urand64(start, end)); } }; // NOTE (mfh 26 oct 2014) This is a partial specialization for long // long, a C99 / C++11 signed type which is guaranteed to be at // least 64 bits. Do NOT write a partial specialization for // int64_t!!! This is just an alias! It could be either long or // long long. We don't know which a priori, and I've seen both. // The types long and long long are guaranteed to differ, so it's // always safe to specialize for both. template struct rand { KOKKOS_INLINE_FUNCTION static long long max() { // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. return Generator::MAX_RAND64; } KOKKOS_INLINE_FUNCTION static long long draw(Generator& gen) { // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. return gen.rand64(); } KOKKOS_INLINE_FUNCTION static long long draw(Generator& gen, const long long& range) { // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. return gen.rand64(range); } KOKKOS_INLINE_FUNCTION static long long draw(Generator& gen, const long long& start, const long long& end) { // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. return gen.rand64(start, end); } }; // NOTE (mfh 26 oct 2014) This is a partial specialization for // unsigned long long, a C99 / C++11 unsigned type which is // guaranteed to be at least 64 bits. Do NOT write a partial // specialization for uint64_t!!! This is just an alias! It could // be either unsigned long or unsigned long long. We don't know // which a priori, and I've seen both. The types unsigned long and // unsigned long long are guaranteed to differ, so it's always safe // to specialize for both. template struct rand { KOKKOS_INLINE_FUNCTION static unsigned long long max() { // FIXME (mfh 26 Oct 2014) It's legal for unsigned long long to be > 64 // bits. return Generator::MAX_URAND64; } KOKKOS_INLINE_FUNCTION static unsigned long long draw(Generator& gen) { // FIXME (mfh 26 Oct 2014) It's legal for unsigned long long to be > 64 // bits. return gen.urand64(); } KOKKOS_INLINE_FUNCTION static unsigned long long draw(Generator& gen, const unsigned long long& range) { // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. return gen.urand64(range); } KOKKOS_INLINE_FUNCTION static unsigned long long draw(Generator& gen, const unsigned long long& start, const unsigned long long& end) { // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. return gen.urand64(start, end); } }; #if defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT template struct rand { using half = Kokkos::Experimental::half_t; KOKKOS_INLINE_FUNCTION static half max() { return half(1.0); } KOKKOS_INLINE_FUNCTION static half draw(Generator& gen) { return half(gen.frand()); } KOKKOS_INLINE_FUNCTION static half draw(Generator& gen, const half& range) { return half(gen.frand(float(range))); } KOKKOS_INLINE_FUNCTION static half draw(Generator& gen, const half& start, const half& end) { return half(gen.frand(float(start), float(end))); } }; #endif // defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT #if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT template struct rand { using bhalf = Kokkos::Experimental::bhalf_t; KOKKOS_INLINE_FUNCTION static bhalf max() { return bhalf(1.0); } KOKKOS_INLINE_FUNCTION static bhalf draw(Generator& gen) { return bhalf(gen.frand()); } KOKKOS_INLINE_FUNCTION static bhalf draw(Generator& gen, const bhalf& range) { return bhalf(gen.frand(float(range))); } KOKKOS_INLINE_FUNCTION static bhalf draw(Generator& gen, const bhalf& start, const bhalf& end) { return bhalf(gen.frand(float(start), float(end))); } }; #endif // defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT template struct rand { KOKKOS_INLINE_FUNCTION static float max() { return 1.0f; } KOKKOS_INLINE_FUNCTION static float draw(Generator& gen) { return gen.frand(); } KOKKOS_INLINE_FUNCTION static float draw(Generator& gen, const float& range) { return gen.frand(range); } KOKKOS_INLINE_FUNCTION static float draw(Generator& gen, const float& start, const float& end) { return gen.frand(start, end); } }; template struct rand { KOKKOS_INLINE_FUNCTION static double max() { return 1.0; } KOKKOS_INLINE_FUNCTION static double draw(Generator& gen) { return gen.drand(); } KOKKOS_INLINE_FUNCTION static double draw(Generator& gen, const double& range) { return gen.drand(range); } KOKKOS_INLINE_FUNCTION static double draw(Generator& gen, const double& start, const double& end) { return gen.drand(start, end); } }; template struct rand> { KOKKOS_INLINE_FUNCTION static Kokkos::complex max() { return Kokkos::complex(1.0, 1.0); } KOKKOS_INLINE_FUNCTION static Kokkos::complex draw(Generator& gen) { const float re = gen.frand(); const float im = gen.frand(); return Kokkos::complex(re, im); } KOKKOS_INLINE_FUNCTION static Kokkos::complex draw(Generator& gen, const Kokkos::complex& range) { const float re = gen.frand(real(range)); const float im = gen.frand(imag(range)); return Kokkos::complex(re, im); } KOKKOS_INLINE_FUNCTION static Kokkos::complex draw(Generator& gen, const Kokkos::complex& start, const Kokkos::complex& end) { const float re = gen.frand(real(start), real(end)); const float im = gen.frand(imag(start), imag(end)); return Kokkos::complex(re, im); } }; template struct rand> { KOKKOS_INLINE_FUNCTION static Kokkos::complex max() { return Kokkos::complex(1.0, 1.0); } KOKKOS_INLINE_FUNCTION static Kokkos::complex draw(Generator& gen) { const double re = gen.drand(); const double im = gen.drand(); return Kokkos::complex(re, im); } KOKKOS_INLINE_FUNCTION static Kokkos::complex draw(Generator& gen, const Kokkos::complex& range) { const double re = gen.drand(real(range)); const double im = gen.drand(imag(range)); return Kokkos::complex(re, im); } KOKKOS_INLINE_FUNCTION static Kokkos::complex draw(Generator& gen, const Kokkos::complex& start, const Kokkos::complex& end) { const double re = gen.drand(real(start), real(end)); const double im = gen.drand(imag(start), imag(end)); return Kokkos::complex(re, im); } }; template class Random_XorShift1024_Pool; namespace Impl { template struct Random_XorShift1024_State { uint64_t state_[16]; KOKKOS_DEFAULTED_FUNCTION Random_XorShift1024_State() = default; template KOKKOS_FUNCTION Random_XorShift1024_State(const StateViewType& v, int state_idx) { for (int i = 0; i < 16; i++) state_[i] = v(state_idx, i); } KOKKOS_FUNCTION uint64_t operator[](const int i) const { return state_[i]; } KOKKOS_FUNCTION uint64_t& operator[](const int i) { return state_[i]; } }; template <> struct Random_XorShift1024_State { uint64_t* state_; const int stride_; KOKKOS_FUNCTION Random_XorShift1024_State() : state_(nullptr), stride_(1){}; template KOKKOS_FUNCTION Random_XorShift1024_State(const StateViewType& v, int state_idx) : state_(&v(state_idx, 0)), stride_(v.stride_1()) {} KOKKOS_FUNCTION uint64_t operator[](const int i) const { return state_[i * stride_]; } KOKKOS_FUNCTION uint64_t& operator[](const int i) { return state_[i * stride_]; } }; template struct Random_XorShift1024_UseCArrayState : std::true_type {}; #ifdef KOKKOS_ENABLE_CUDA template <> struct Random_XorShift1024_UseCArrayState : std::false_type {}; #endif #ifdef KOKKOS_ENABLE_HIP template <> struct Random_XorShift1024_UseCArrayState : std::false_type {}; #endif #ifdef KOKKOS_ENABLE_OPENMPTARGET template <> struct Random_XorShift1024_UseCArrayState : std::false_type {}; #endif template struct Random_UniqueIndex { using locks_view_type = View; KOKKOS_FUNCTION static int get_state_idx(const locks_view_type) { KOKKOS_IF_ON_HOST( (return DeviceType::execution_space::impl_hardware_thread_id();)) KOKKOS_IF_ON_DEVICE((return 0;)) } }; #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) #if defined(KOKKOS_ENABLE_CUDA) #define KOKKOS_IMPL_EXECUTION_SPACE_CUDA_OR_HIP Kokkos::Cuda #elif defined(KOKKOS_ENABLE_HIP) #define KOKKOS_IMPL_EXECUTION_SPACE_CUDA_OR_HIP Kokkos::HIP #endif template struct Random_UniqueIndex< Kokkos::Device> { using locks_view_type = View>; KOKKOS_FUNCTION static int get_state_idx(const locks_view_type& locks_) { KOKKOS_IF_ON_DEVICE(( const int i_offset = (threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z; int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) * blockDim.x * blockDim.y * blockDim.z + i_offset) % locks_.extent(0); while (Kokkos::atomic_compare_exchange(&locks_(i, 0), 0, 1)) { i += blockDim.x * blockDim.y * blockDim.z; if (i >= static_cast(locks_.extent(0))) { i = i_offset; } } return i;)) KOKKOS_IF_ON_HOST(((void)locks_; return 0;)) } }; #undef KOKKOS_IMPL_EXECUTION_SPACE_CUDA_OR_HIP #endif #ifdef KOKKOS_ENABLE_SYCL template struct Random_UniqueIndex< Kokkos::Device> { using locks_view_type = View>; KOKKOS_FUNCTION static int get_state_idx(const locks_view_type& locks_) { auto item = sycl::ext::oneapi::experimental::this_nd_item<3>(); std::size_t threadIdx[3] = {item.get_local_id(2), item.get_local_id(1), item.get_local_id(0)}; std::size_t blockIdx[3] = {item.get_group(2), item.get_group(1), item.get_group(0)}; std::size_t blockDim[3] = {item.get_local_range(2), item.get_local_range(1), item.get_local_range(0)}; std::size_t gridDim[3] = { item.get_global_range(2) / item.get_local_range(2), item.get_global_range(1) / item.get_local_range(1), item.get_global_range(0) / item.get_local_range(0)}; const int i_offset = (threadIdx[0] * blockDim[1] + threadIdx[1]) * blockDim[2] + threadIdx[2]; int i = (((blockIdx[0] * gridDim[1] + blockIdx[1]) * gridDim[2] + blockIdx[2]) * blockDim[0] * blockDim[1] * blockDim[2] + i_offset) % locks_.extent(0); while (Kokkos::atomic_compare_exchange(&locks_(i, 0), 0, 1)) { i += blockDim[0] * blockDim[1] * blockDim[2]; if (i >= static_cast(locks_.extent(0))) { i = i_offset; } } return i; } }; #endif #ifdef KOKKOS_ENABLE_OPENMPTARGET template struct Random_UniqueIndex< Kokkos::Device> { using locks_view_type = View>; KOKKOS_FUNCTION static int get_state_idx(const locks_view_type& locks) { const int team_size = omp_get_num_threads(); int i = omp_get_team_num() * team_size + omp_get_thread_num(); const int lock_size = locks.extent_int(0); while (Kokkos::atomic_compare_exchange(&locks(i, 0), 0, 1)) { i = (i + 1) % lock_size; } return i; } }; #endif } // namespace Impl template class Random_XorShift64_Pool; template class Random_XorShift64 { private: uint64_t state_; const int state_idx_; friend class Random_XorShift64_Pool; public: using device_type = DeviceType; constexpr static uint32_t MAX_URAND = std::numeric_limits::max(); constexpr static uint64_t MAX_URAND64 = std::numeric_limits::max(); constexpr static int32_t MAX_RAND = std::numeric_limits::max(); constexpr static int64_t MAX_RAND64 = std::numeric_limits::max(); KOKKOS_INLINE_FUNCTION Random_XorShift64(uint64_t state, int state_idx = 0) : state_(state == 0 ? uint64_t(1318319) : state), state_idx_(state_idx) {} KOKKOS_INLINE_FUNCTION uint32_t urand() { state_ ^= state_ >> 12; state_ ^= state_ << 25; state_ ^= state_ >> 27; uint64_t tmp = state_ * 2685821657736338717ULL; tmp = tmp >> 16; return static_cast(tmp & MAX_URAND); } KOKKOS_INLINE_FUNCTION uint64_t urand64() { state_ ^= state_ >> 12; state_ ^= state_ << 25; state_ ^= state_ >> 27; return (state_ * 2685821657736338717ULL) - 1; } KOKKOS_INLINE_FUNCTION uint32_t urand(const uint32_t& range) { const uint32_t max_val = (MAX_URAND / range) * range; uint32_t tmp = urand(); while (tmp >= max_val) tmp = urand(); return tmp % range; } KOKKOS_INLINE_FUNCTION uint32_t urand(const uint32_t& start, const uint32_t& end) { return urand(end - start) + start; } KOKKOS_INLINE_FUNCTION uint64_t urand64(const uint64_t& range) { const uint64_t max_val = (MAX_URAND64 / range) * range; uint64_t tmp = urand64(); while (tmp >= max_val) tmp = urand64(); return tmp % range; } KOKKOS_INLINE_FUNCTION uint64_t urand64(const uint64_t& start, const uint64_t& end) { return urand64(end - start) + start; } KOKKOS_INLINE_FUNCTION int rand() { return static_cast(urand() / 2); } KOKKOS_INLINE_FUNCTION int rand(const int& range) { const int max_val = (MAX_RAND / range) * range; int tmp = rand(); while (tmp >= max_val) tmp = rand(); return tmp % range; } KOKKOS_INLINE_FUNCTION int rand(const int& start, const int& end) { return rand(end - start) + start; } KOKKOS_INLINE_FUNCTION int64_t rand64() { return static_cast(urand64() / 2); } KOKKOS_INLINE_FUNCTION int64_t rand64(const int64_t& range) { const int64_t max_val = (MAX_RAND64 / range) * range; int64_t tmp = rand64(); while (tmp >= max_val) tmp = rand64(); return tmp % range; } KOKKOS_INLINE_FUNCTION int64_t rand64(const int64_t& start, const int64_t& end) { return rand64(end - start) + start; } KOKKOS_INLINE_FUNCTION float frand() { return urand64() / static_cast(MAX_URAND64); } KOKKOS_INLINE_FUNCTION float frand(const float& range) { return range * urand64() / static_cast(MAX_URAND64); } KOKKOS_INLINE_FUNCTION float frand(const float& start, const float& end) { return frand(end - start) + start; } KOKKOS_INLINE_FUNCTION double drand() { return urand64() / static_cast(MAX_URAND64); } KOKKOS_INLINE_FUNCTION double drand(const double& range) { return range * urand64() / static_cast(MAX_URAND64); } KOKKOS_INLINE_FUNCTION double drand(const double& start, const double& end) { return drand(end - start) + start; } // Box-muller method for drawing a standard normal distributed random // number KOKKOS_INLINE_FUNCTION double normal() { constexpr auto two_pi = 2 * Kokkos::numbers::pi_v; const double u = drand(); const double v = drand(); const double r = Kokkos::sqrt(-2.0 * Kokkos::log(u)); const double theta = v * two_pi; return r * Kokkos::cos(theta); } KOKKOS_INLINE_FUNCTION double normal(const double& mean, const double& std_dev = 1.0) { return mean + normal() * std_dev; } }; template class Random_XorShift64_Pool { public: using device_type = typename DeviceType::device_type; private: using execution_space = typename device_type::execution_space; using locks_type = View; using state_data_type = View; locks_type locks_ = {}; state_data_type state_ = {}; int num_states_ = {}; int padding_ = {}; public: using generator_type = Random_XorShift64; #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 KOKKOS_DEFAULTED_FUNCTION Random_XorShift64_Pool() = default; KOKKOS_DEFAULTED_FUNCTION Random_XorShift64_Pool( Random_XorShift64_Pool const&) = default; KOKKOS_DEFAULTED_FUNCTION Random_XorShift64_Pool& operator=( Random_XorShift64_Pool const&) = default; #else Random_XorShift64_Pool() = default; #endif Random_XorShift64_Pool(uint64_t seed) { num_states_ = 0; init(seed, execution_space().concurrency()); } void init(uint64_t seed, int num_states) { if (seed == 0) seed = uint64_t(1318319); // I only want to pad on CPU like archs (less than 1000 threads). 64 is a // magic number, or random number I just wanted something not too large and // not too small. 64 sounded fine. padding_ = num_states < 1000 ? 64 : 1; num_states_ = num_states; locks_ = locks_type("Kokkos::Random_XorShift64::locks", num_states, padding_); state_ = state_data_type("Kokkos::Random_XorShift64::state", num_states_, padding_); typename state_data_type::HostMirror h_state = Kokkos::create_mirror_view(Kokkos::WithoutInitializing, state_); typename locks_type::HostMirror h_lock = Kokkos::create_mirror_view(Kokkos::WithoutInitializing, locks_); // Execute on the HostMirror's default execution space. Random_XorShift64 gen(seed, 0); for (int i = 0; i < 17; i++) gen.rand(); for (int i = 0; i < num_states_; i++) { int n1 = gen.rand(); int n2 = gen.rand(); int n3 = gen.rand(); int n4 = gen.rand(); h_state(i, 0) = (((static_cast(n1)) & 0xffff) << 00) | (((static_cast(n2)) & 0xffff) << 16) | (((static_cast(n3)) & 0xffff) << 32) | (((static_cast(n4)) & 0xffff) << 48); h_lock(i, 0) = 0; } deep_copy(state_, h_state); deep_copy(locks_, h_lock); } KOKKOS_INLINE_FUNCTION Random_XorShift64 get_state() const { KOKKOS_EXPECTS(num_states_ > 0); const int i = Impl::Random_UniqueIndex::get_state_idx(locks_); return Random_XorShift64(state_(i, 0), i); } // NOTE: state_idx MUST be unique and less than num_states KOKKOS_INLINE_FUNCTION Random_XorShift64 get_state(const int state_idx) const { return Random_XorShift64(state_(state_idx, 0), state_idx); } KOKKOS_INLINE_FUNCTION void free_state(const Random_XorShift64& state) const { state_(state.state_idx_, 0) = state.state_; // Release the lock only after the state has been updated in memory Kokkos::memory_fence(); locks_(state.state_idx_, 0) = 0; } }; template class Random_XorShift1024 { using execution_space = typename DeviceType::execution_space; private: int p_; const int state_idx_; Impl::Random_XorShift1024_State< Impl::Random_XorShift1024_UseCArrayState::value> state_; friend class Random_XorShift1024_Pool; public: using pool_type = Random_XorShift1024_Pool; using device_type = DeviceType; constexpr static uint32_t MAX_URAND = std::numeric_limits::max(); constexpr static uint64_t MAX_URAND64 = std::numeric_limits::max(); constexpr static int32_t MAX_RAND = std::numeric_limits::max(); constexpr static int64_t MAX_RAND64 = std::numeric_limits::max(); KOKKOS_INLINE_FUNCTION Random_XorShift1024(const typename pool_type::state_data_type& state, int p, int state_idx = 0) : p_(p), state_idx_(state_idx), state_(state, state_idx) {} KOKKOS_INLINE_FUNCTION uint32_t urand() { uint64_t state_0 = state_[p_]; uint64_t state_1 = state_[p_ = (p_ + 1) & 15]; state_1 ^= state_1 << 31; state_1 ^= state_1 >> 11; state_0 ^= state_0 >> 30; uint64_t tmp = (state_[p_] = state_0 ^ state_1) * 1181783497276652981ULL; tmp = tmp >> 16; return static_cast(tmp & MAX_URAND); } KOKKOS_INLINE_FUNCTION uint64_t urand64() { uint64_t state_0 = state_[p_]; uint64_t state_1 = state_[p_ = (p_ + 1) & 15]; state_1 ^= state_1 << 31; state_1 ^= state_1 >> 11; state_0 ^= state_0 >> 30; return ((state_[p_] = state_0 ^ state_1) * 1181783497276652981LL) - 1; } KOKKOS_INLINE_FUNCTION uint32_t urand(const uint32_t& range) { const uint32_t max_val = (MAX_URAND / range) * range; uint32_t tmp = urand(); while (tmp >= max_val) tmp = urand(); return tmp % range; } KOKKOS_INLINE_FUNCTION uint32_t urand(const uint32_t& start, const uint32_t& end) { return urand(end - start) + start; } KOKKOS_INLINE_FUNCTION uint64_t urand64(const uint64_t& range) { const uint64_t max_val = (MAX_URAND64 / range) * range; uint64_t tmp = urand64(); while (tmp >= max_val) tmp = urand64(); return tmp % range; } KOKKOS_INLINE_FUNCTION uint64_t urand64(const uint64_t& start, const uint64_t& end) { return urand64(end - start) + start; } KOKKOS_INLINE_FUNCTION int rand() { return static_cast(urand() / 2); } KOKKOS_INLINE_FUNCTION int rand(const int& range) { const int max_val = (MAX_RAND / range) * range; int tmp = rand(); while (tmp >= max_val) tmp = rand(); return tmp % range; } KOKKOS_INLINE_FUNCTION int rand(const int& start, const int& end) { return rand(end - start) + start; } KOKKOS_INLINE_FUNCTION int64_t rand64() { return static_cast(urand64() / 2); } KOKKOS_INLINE_FUNCTION int64_t rand64(const int64_t& range) { const int64_t max_val = (MAX_RAND64 / range) * range; int64_t tmp = rand64(); while (tmp >= max_val) tmp = rand64(); return tmp % range; } KOKKOS_INLINE_FUNCTION int64_t rand64(const int64_t& start, const int64_t& end) { return rand64(end - start) + start; } KOKKOS_INLINE_FUNCTION float frand() { return urand64() / static_cast(MAX_URAND64); } KOKKOS_INLINE_FUNCTION float frand(const float& range) { return range * urand64() / static_cast(MAX_URAND64); } KOKKOS_INLINE_FUNCTION float frand(const float& start, const float& end) { return frand(end - start) + start; } KOKKOS_INLINE_FUNCTION double drand() { return urand64() / static_cast(MAX_URAND64); } KOKKOS_INLINE_FUNCTION double drand(const double& range) { return range * urand64() / static_cast(MAX_URAND64); } KOKKOS_INLINE_FUNCTION double drand(const double& start, const double& end) { return drand(end - start) + start; } // Box-muller method for drawing a standard normal distributed random // number KOKKOS_INLINE_FUNCTION double normal() { constexpr auto two_pi = 2 * Kokkos::numbers::pi_v; const double u = drand(); const double v = drand(); const double r = Kokkos::sqrt(-2.0 * Kokkos::log(u)); const double theta = v * two_pi; return r * Kokkos::cos(theta); } KOKKOS_INLINE_FUNCTION double normal(const double& mean, const double& std_dev = 1.0) { return mean + normal() * std_dev; } }; template class Random_XorShift1024_Pool { public: using device_type = typename DeviceType::device_type; private: using execution_space = typename device_type::execution_space; using locks_type = View; using int_view_type = View; using state_data_type = View; locks_type locks_ = {}; state_data_type state_ = {}; int_view_type p_ = {}; int num_states_ = {}; int padding_ = {}; friend class Random_XorShift1024; public: using generator_type = Random_XorShift1024; #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 KOKKOS_DEFAULTED_FUNCTION Random_XorShift1024_Pool() = default; KOKKOS_DEFAULTED_FUNCTION Random_XorShift1024_Pool( Random_XorShift1024_Pool const&) = default; KOKKOS_DEFAULTED_FUNCTION Random_XorShift1024_Pool& operator=( Random_XorShift1024_Pool const&) = default; #else Random_XorShift1024_Pool() = default; #endif Random_XorShift1024_Pool(uint64_t seed) { num_states_ = 0; init(seed, execution_space().concurrency()); } void init(uint64_t seed, int num_states) { if (seed == 0) seed = uint64_t(1318319); // I only want to pad on CPU like archs (less than 1000 threads). 64 is a // magic number, or random number I just wanted something not too large and // not too small. 64 sounded fine. padding_ = num_states < 1000 ? 64 : 1; num_states_ = num_states; locks_ = locks_type("Kokkos::Random_XorShift1024::locks", num_states_, padding_); state_ = state_data_type("Kokkos::Random_XorShift1024::state", num_states_); p_ = int_view_type("Kokkos::Random_XorShift1024::p", num_states_, padding_); typename state_data_type::HostMirror h_state = Kokkos::create_mirror_view(Kokkos::WithoutInitializing, state_); typename locks_type::HostMirror h_lock = Kokkos::create_mirror_view(Kokkos::WithoutInitializing, locks_); typename int_view_type::HostMirror h_p = Kokkos::create_mirror_view(Kokkos::WithoutInitializing, p_); // Execute on the HostMirror's default execution space. Random_XorShift64 gen(seed, 0); for (int i = 0; i < 17; i++) gen.rand(); for (int i = 0; i < num_states_; i++) { for (int j = 0; j < 16; j++) { int n1 = gen.rand(); int n2 = gen.rand(); int n3 = gen.rand(); int n4 = gen.rand(); h_state(i, j) = (((static_cast(n1)) & 0xffff) << 00) | (((static_cast(n2)) & 0xffff) << 16) | (((static_cast(n3)) & 0xffff) << 32) | (((static_cast(n4)) & 0xffff) << 48); } h_p(i, 0) = 0; h_lock(i, 0) = 0; } deep_copy(state_, h_state); deep_copy(locks_, h_lock); } KOKKOS_INLINE_FUNCTION Random_XorShift1024 get_state() const { KOKKOS_EXPECTS(num_states_ > 0); const int i = Impl::Random_UniqueIndex::get_state_idx(locks_); return Random_XorShift1024(state_, p_(i, 0), i); }; // NOTE: state_idx MUST be unique and less than num_states KOKKOS_INLINE_FUNCTION Random_XorShift1024 get_state(const int state_idx) const { return Random_XorShift1024(state_, p_(state_idx, 0), state_idx); } KOKKOS_INLINE_FUNCTION void free_state(const Random_XorShift1024& state) const { for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i]; p_(state.state_idx_, 0) = state.p_; // Release the lock only after the state has been updated in memory Kokkos::memory_fence(); locks_(state.state_idx_, 0) = 0; } }; namespace Impl { template struct fill_random_functor_begin_end; template struct fill_random_functor_begin_end { ViewType a; RandomPool rand_pool; typename ViewType::const_value_type begin, end; using Rand = rand; fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_) : a(a_), rand_pool(rand_pool_), begin(begin_), end(end_) {} KOKKOS_INLINE_FUNCTION void operator()(IndexType) const { typename RandomPool::generator_type gen = rand_pool.get_state(); a() = Rand::draw(gen, begin, end); rand_pool.free_state(gen); } }; template struct fill_random_functor_begin_end { ViewType a; RandomPool rand_pool; typename ViewType::const_value_type begin, end; using Rand = rand; fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_) : a(a_), rand_pool(rand_pool_), begin(begin_), end(end_) {} KOKKOS_INLINE_FUNCTION void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); for (IndexType j = 0; j < loops; j++) { const IndexType idx = i * loops + j; if (idx < static_cast(a.extent(0))) a(idx) = Rand::draw(gen, begin, end); } rand_pool.free_state(gen); } }; template struct fill_random_functor_begin_end { ViewType a; RandomPool rand_pool; typename ViewType::const_value_type begin, end; using Rand = rand; fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_) : a(a_), rand_pool(rand_pool_), begin(begin_), end(end_) {} KOKKOS_INLINE_FUNCTION void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); for (IndexType j = 0; j < loops; j++) { const IndexType idx = i * loops + j; if (idx < static_cast(a.extent(0))) { for (IndexType k = 0; k < static_cast(a.extent(1)); k++) a(idx, k) = Rand::draw(gen, begin, end); } } rand_pool.free_state(gen); } }; template struct fill_random_functor_begin_end { ViewType a; RandomPool rand_pool; typename ViewType::const_value_type begin, end; using Rand = rand; fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_) : a(a_), rand_pool(rand_pool_), begin(begin_), end(end_) {} KOKKOS_INLINE_FUNCTION void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); for (IndexType j = 0; j < loops; j++) { const IndexType idx = i * loops + j; if (idx < static_cast(a.extent(0))) { for (IndexType k = 0; k < static_cast(a.extent(1)); k++) for (IndexType l = 0; l < static_cast(a.extent(2)); l++) a(idx, k, l) = Rand::draw(gen, begin, end); } } rand_pool.free_state(gen); } }; template struct fill_random_functor_begin_end { ViewType a; RandomPool rand_pool; typename ViewType::const_value_type begin, end; using Rand = rand; fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_) : a(a_), rand_pool(rand_pool_), begin(begin_), end(end_) {} KOKKOS_INLINE_FUNCTION void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); for (IndexType j = 0; j < loops; j++) { const IndexType idx = i * loops + j; if (idx < static_cast(a.extent(0))) { for (IndexType k = 0; k < static_cast(a.extent(1)); k++) for (IndexType l = 0; l < static_cast(a.extent(2)); l++) for (IndexType m = 0; m < static_cast(a.extent(3)); m++) a(idx, k, l, m) = Rand::draw(gen, begin, end); } } rand_pool.free_state(gen); } }; template struct fill_random_functor_begin_end { ViewType a; RandomPool rand_pool; typename ViewType::const_value_type begin, end; using Rand = rand; fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_) : a(a_), rand_pool(rand_pool_), begin(begin_), end(end_) {} KOKKOS_INLINE_FUNCTION void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); for (IndexType j = 0; j < loops; j++) { const IndexType idx = i * loops + j; if (idx < static_cast(a.extent(0))) { for (IndexType l = 0; l < static_cast(a.extent(1)); l++) for (IndexType m = 0; m < static_cast(a.extent(2)); m++) for (IndexType n = 0; n < static_cast(a.extent(3)); n++) for (IndexType o = 0; o < static_cast(a.extent(4)); o++) a(idx, l, m, n, o) = Rand::draw(gen, begin, end); } } rand_pool.free_state(gen); } }; template struct fill_random_functor_begin_end { ViewType a; RandomPool rand_pool; typename ViewType::const_value_type begin, end; using Rand = rand; fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_) : a(a_), rand_pool(rand_pool_), begin(begin_), end(end_) {} KOKKOS_INLINE_FUNCTION void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); for (IndexType j = 0; j < loops; j++) { const IndexType idx = i * loops + j; if (idx < static_cast(a.extent(0))) { for (IndexType k = 0; k < static_cast(a.extent(1)); k++) for (IndexType l = 0; l < static_cast(a.extent(2)); l++) for (IndexType m = 0; m < static_cast(a.extent(3)); m++) for (IndexType n = 0; n < static_cast(a.extent(4)); n++) for (IndexType o = 0; o < static_cast(a.extent(5)); o++) a(idx, k, l, m, n, o) = Rand::draw(gen, begin, end); } } rand_pool.free_state(gen); } }; template struct fill_random_functor_begin_end { ViewType a; RandomPool rand_pool; typename ViewType::const_value_type begin, end; using Rand = rand; fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_) : a(a_), rand_pool(rand_pool_), begin(begin_), end(end_) {} KOKKOS_INLINE_FUNCTION void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); for (IndexType j = 0; j < loops; j++) { const IndexType idx = i * loops + j; if (idx < static_cast(a.extent(0))) { for (IndexType k = 0; k < static_cast(a.extent(1)); k++) for (IndexType l = 0; l < static_cast(a.extent(2)); l++) for (IndexType m = 0; m < static_cast(a.extent(3)); m++) for (IndexType n = 0; n < static_cast(a.extent(4)); n++) for (IndexType o = 0; o < static_cast(a.extent(5)); o++) for (IndexType p = 0; p < static_cast(a.extent(6)); p++) a(idx, k, l, m, n, o, p) = Rand::draw(gen, begin, end); } } rand_pool.free_state(gen); } }; template struct fill_random_functor_begin_end { ViewType a; RandomPool rand_pool; typename ViewType::const_value_type begin, end; using Rand = rand; fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_) : a(a_), rand_pool(rand_pool_), begin(begin_), end(end_) {} KOKKOS_INLINE_FUNCTION void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); for (IndexType j = 0; j < loops; j++) { const IndexType idx = i * loops + j; if (idx < static_cast(a.extent(0))) { for (IndexType k = 0; k < static_cast(a.extent(1)); k++) for (IndexType l = 0; l < static_cast(a.extent(2)); l++) for (IndexType m = 0; m < static_cast(a.extent(3)); m++) for (IndexType n = 0; n < static_cast(a.extent(4)); n++) for (IndexType o = 0; o < static_cast(a.extent(5)); o++) for (IndexType p = 0; p < static_cast(a.extent(6)); p++) for (IndexType q = 0; q < static_cast(a.extent(7)); q++) a(idx, k, l, m, n, o, p, q) = Rand::draw(gen, begin, end); } } rand_pool.free_state(gen); } }; template void fill_random(const ExecutionSpace& exec, ViewType a, RandomPool g, typename ViewType::const_value_type begin, typename ViewType::const_value_type end) { int64_t LDA = a.extent(0); if (LDA > 0) parallel_for( "Kokkos::fill_random", Kokkos::RangePolicy(exec, 0, (LDA + 127) / 128), Impl::fill_random_functor_begin_end( a, g, begin, end)); } } // namespace Impl template void fill_random(const ExecutionSpace& exec, ViewType a, RandomPool g, typename ViewType::const_value_type begin, typename ViewType::const_value_type end) { Impl::apply_to_view_of_static_rank( [&](auto dst) { Kokkos::Impl::fill_random(exec, dst, g, begin, end); }, a); } template void fill_random(const ExecutionSpace& exec, ViewType a, RandomPool g, typename ViewType::const_value_type range) { fill_random(exec, a, g, 0, range); } template void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type begin, typename ViewType::const_value_type end) { Kokkos::fence( "fill_random: fence before since no execution space instance provided"); typename ViewType::execution_space exec; fill_random(exec, a, g, begin, end); exec.fence( "fill_random: fence after since no execution space instance provided"); } template void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type range) { Kokkos::fence( "fill_random: fence before since no execution space instance provided"); typename ViewType::execution_space exec; fill_random(exec, a, g, 0, range); exec.fence( "fill_random: fence after since no execution space instance provided"); } } // namespace Kokkos #ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_RANDOM #undef KOKKOS_IMPL_PUBLIC_INCLUDE #undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_RANDOM #endif #endif kokkos-4.3.01/algorithms/src/Kokkos_Sort.hpp000066400000000000000000000020771461675637500210530ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_SORT_HPP_ #define KOKKOS_SORT_HPP_ #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE #define KOKKOS_IMPL_PUBLIC_INCLUDE #define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SORT #endif #include "sorting/Kokkos_BinSortPublicAPI.hpp" #include "sorting/Kokkos_SortPublicAPI.hpp" #include "sorting/Kokkos_SortByKeyPublicAPI.hpp" #include "sorting/Kokkos_NestedSortPublicAPI.hpp" #ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SORT #undef KOKKOS_IMPL_PUBLIC_INCLUDE #undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SORT #endif #endif kokkos-4.3.01/algorithms/src/Kokkos_StdAlgorithms.hpp000066400000000000000000000105061461675637500227040ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_HPP #define KOKKOS_STD_ALGORITHMS_HPP #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE #define KOKKOS_IMPL_PUBLIC_INCLUDE #define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_STD_ALGORITHMS #endif /// \file Kokkos_StdAlgorithms.hpp /// \brief Kokkos counterparts for Standard C++ Library algorithms #include "std_algorithms/impl/Kokkos_Constraints.hpp" #include "std_algorithms/impl/Kokkos_RandomAccessIterator.hpp" #include "std_algorithms/Kokkos_BeginEnd.hpp" // distance #include "std_algorithms/Kokkos_Distance.hpp" // note that we categorize below the headers // following the std classification. // modifying ops #include "std_algorithms/Kokkos_IterSwap.hpp" // non-modifying sequence #include "std_algorithms/Kokkos_AdjacentFind.hpp" #include "std_algorithms/Kokkos_Count.hpp" #include "std_algorithms/Kokkos_CountIf.hpp" #include "std_algorithms/Kokkos_AllOf.hpp" #include "std_algorithms/Kokkos_AnyOf.hpp" #include "std_algorithms/Kokkos_NoneOf.hpp" #include "std_algorithms/Kokkos_Equal.hpp" #include "std_algorithms/Kokkos_Find.hpp" #include "std_algorithms/Kokkos_FindIf.hpp" #include "std_algorithms/Kokkos_FindIfNot.hpp" #include "std_algorithms/Kokkos_FindEnd.hpp" #include "std_algorithms/Kokkos_FindFirstOf.hpp" #include "std_algorithms/Kokkos_ForEach.hpp" #include "std_algorithms/Kokkos_ForEachN.hpp" #include "std_algorithms/Kokkos_LexicographicalCompare.hpp" #include "std_algorithms/Kokkos_Mismatch.hpp" #include "std_algorithms/Kokkos_Search.hpp" #include "std_algorithms/Kokkos_SearchN.hpp" // modifying sequence #include "std_algorithms/Kokkos_Fill.hpp" #include "std_algorithms/Kokkos_FillN.hpp" #include "std_algorithms/Kokkos_Replace.hpp" #include "std_algorithms/Kokkos_ReplaceIf.hpp" #include "std_algorithms/Kokkos_ReplaceCopyIf.hpp" #include "std_algorithms/Kokkos_ReplaceCopy.hpp" #include "std_algorithms/Kokkos_Copy.hpp" #include "std_algorithms/Kokkos_CopyN.hpp" #include "std_algorithms/Kokkos_CopyBackward.hpp" #include "std_algorithms/Kokkos_CopyIf.hpp" #include "std_algorithms/Kokkos_Transform.hpp" #include "std_algorithms/Kokkos_Generate.hpp" #include "std_algorithms/Kokkos_GenerateN.hpp" #include "std_algorithms/Kokkos_Reverse.hpp" #include "std_algorithms/Kokkos_ReverseCopy.hpp" #include "std_algorithms/Kokkos_Move.hpp" #include "std_algorithms/Kokkos_MoveBackward.hpp" #include "std_algorithms/Kokkos_SwapRanges.hpp" #include "std_algorithms/Kokkos_Unique.hpp" #include "std_algorithms/Kokkos_UniqueCopy.hpp" #include "std_algorithms/Kokkos_Rotate.hpp" #include "std_algorithms/Kokkos_RotateCopy.hpp" #include "std_algorithms/Kokkos_Remove.hpp" #include "std_algorithms/Kokkos_RemoveIf.hpp" #include "std_algorithms/Kokkos_RemoveCopy.hpp" #include "std_algorithms/Kokkos_RemoveCopyIf.hpp" #include "std_algorithms/Kokkos_ShiftLeft.hpp" #include "std_algorithms/Kokkos_ShiftRight.hpp" // sorting #include "std_algorithms/Kokkos_IsSortedUntil.hpp" #include "std_algorithms/Kokkos_IsSorted.hpp" // min/max element #include "std_algorithms/Kokkos_MinElement.hpp" #include "std_algorithms/Kokkos_MaxElement.hpp" #include "std_algorithms/Kokkos_MinMaxElement.hpp" // partitioning #include "std_algorithms/Kokkos_IsPartitioned.hpp" #include "std_algorithms/Kokkos_PartitionCopy.hpp" #include "std_algorithms/Kokkos_PartitionPoint.hpp" // numeric #include "std_algorithms/Kokkos_AdjacentDifference.hpp" #include "std_algorithms/Kokkos_Reduce.hpp" #include "std_algorithms/Kokkos_TransformReduce.hpp" #include "std_algorithms/Kokkos_ExclusiveScan.hpp" #include "std_algorithms/Kokkos_TransformExclusiveScan.hpp" #include "std_algorithms/Kokkos_InclusiveScan.hpp" #include "std_algorithms/Kokkos_TransformInclusiveScan.hpp" #ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_STD_ALGORITHMS #undef KOKKOS_IMPL_PUBLIC_INCLUDE #undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_STD_ALGORITHMS #endif #endif kokkos-4.3.01/algorithms/src/sorting/000077500000000000000000000000001461675637500175515ustar00rootroot00000000000000kokkos-4.3.01/algorithms/src/sorting/Kokkos_BinOpsPublicAPI.hpp000066400000000000000000000104231461675637500244660ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_BIN_OPS_PUBLIC_API_HPP_ #define KOKKOS_BIN_OPS_PUBLIC_API_HPP_ #include #include namespace Kokkos { template struct BinOp1D { int max_bins_ = {}; double mul_ = {}; double min_ = {}; #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 KOKKOS_DEPRECATED BinOp1D() = default; #else BinOp1D() = delete; #endif // Construct BinOp with number of bins, minimum value and maximum value BinOp1D(int max_bins__, typename KeyViewType::const_value_type min, typename KeyViewType::const_value_type max) : max_bins_(max_bins__ + 1), // Cast to double to avoid possible overflow when using integer mul_(static_cast(max_bins__) / (static_cast(max) - static_cast(min))), min_(static_cast(min)) { // For integral types the number of bins may be larger than the range // in which case we can exactly have one unique value per bin // and then don't need to sort bins. if (std::is_integral::value && (static_cast(max) - static_cast(min)) <= static_cast(max_bins__)) { mul_ = 1.; } } // Determine bin index from key value template KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const { return static_cast(mul_ * (static_cast(keys(i)) - min_)); } // Return maximum bin index + 1 KOKKOS_INLINE_FUNCTION int max_bins() const { return max_bins_; } // Compare to keys within a bin if true new_val will be put before old_val template KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1, iType2& i2) const { return keys(i1) < keys(i2); } }; template struct BinOp3D { int max_bins_[3] = {}; double mul_[3] = {}; double min_[3] = {}; #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 KOKKOS_DEPRECATED BinOp3D() = default; #else BinOp3D() = delete; #endif BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[], typename KeyViewType::const_value_type max[]) { max_bins_[0] = max_bins__[0]; max_bins_[1] = max_bins__[1]; max_bins_[2] = max_bins__[2]; mul_[0] = static_cast(max_bins__[0]) / (static_cast(max[0]) - static_cast(min[0])); mul_[1] = static_cast(max_bins__[1]) / (static_cast(max[1]) - static_cast(min[1])); mul_[2] = static_cast(max_bins__[2]) / (static_cast(max[2]) - static_cast(min[2])); min_[0] = static_cast(min[0]); min_[1] = static_cast(min[1]); min_[2] = static_cast(min[2]); } template KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const { return int((((int(mul_[0] * (keys(i, 0) - min_[0])) * max_bins_[1]) + int(mul_[1] * (keys(i, 1) - min_[1]))) * max_bins_[2]) + int(mul_[2] * (keys(i, 2) - min_[2]))); } KOKKOS_INLINE_FUNCTION int max_bins() const { return max_bins_[0] * max_bins_[1] * max_bins_[2]; } template KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1, iType2& i2) const { if (keys(i1, 0) > keys(i2, 0)) return true; else if (keys(i1, 0) == keys(i2, 0)) { if (keys(i1, 1) > keys(i2, 1)) return true; else if (keys(i1, 1) == keys(i2, 1)) { if (keys(i1, 2) > keys(i2, 2)) return true; } } return false; } }; } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/sorting/Kokkos_BinSortPublicAPI.hpp000066400000000000000000000352601461675637500246620ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_BIN_SORT_PUBLIC_API_HPP_ #define KOKKOS_BIN_SORT_PUBLIC_API_HPP_ #include "Kokkos_BinOpsPublicAPI.hpp" #include "impl/Kokkos_CopyOpsForBinSortImpl.hpp" #include #include namespace Kokkos { template class BinSort { public: template struct copy_functor { using src_view_type = typename SrcViewType::const_type; using copy_op = Impl::CopyOp; DstViewType dst_values; src_view_type src_values; int dst_offset; copy_functor(DstViewType const& dst_values_, int const& dst_offset_, SrcViewType const& src_values_) : dst_values(dst_values_), src_values(src_values_), dst_offset(dst_offset_) {} KOKKOS_INLINE_FUNCTION void operator()(const int& i) const { copy_op::copy(dst_values, i + dst_offset, src_values, i); } }; template struct copy_permute_functor { // If a Kokkos::View then can generate constant random access // otherwise can only use the constant type. using src_view_type = std::conditional_t< Kokkos::is_view::value, Kokkos::View= 230700) , Kokkos::MemoryTraits #endif >, typename SrcViewType::const_type>; using perm_view_type = typename PermuteViewType::const_type; using copy_op = Impl::CopyOp; DstViewType dst_values; perm_view_type sort_order; src_view_type src_values; int src_offset; copy_permute_functor(DstViewType const& dst_values_, PermuteViewType const& sort_order_, SrcViewType const& src_values_, int const& src_offset_) : dst_values(dst_values_), sort_order(sort_order_), src_values(src_values_), src_offset(src_offset_) {} KOKKOS_INLINE_FUNCTION void operator()(const int& i) const { copy_op::copy(dst_values, i, src_values, src_offset + sort_order(i)); } }; // Naming this alias "execution_space" would be problematic since it would be // considered as execution space for the various functors which might use // another execution space through sort() or create_permute_vector(). using exec_space = typename Space::execution_space; using bin_op_type = BinSortOp; struct bin_count_tag {}; struct bin_offset_tag {}; struct bin_binning_tag {}; struct bin_sort_bins_tag {}; public: using size_type = SizeType; using value_type = size_type; using offset_type = Kokkos::View; using bin_count_type = Kokkos::View; using const_key_view_type = typename KeyViewType::const_type; // If a Kokkos::View then can generate constant random access // otherwise can only use the constant type. using const_rnd_key_view_type = std::conditional_t< Kokkos::is_view::value, Kokkos::View >, const_key_view_type>; using non_const_key_scalar = typename KeyViewType::non_const_value_type; using const_key_scalar = typename KeyViewType::const_value_type; using bin_count_atomic_type = Kokkos::View >; private: const_key_view_type keys; const_rnd_key_view_type keys_rnd; public: BinSortOp bin_op; offset_type bin_offsets; bin_count_atomic_type bin_count_atomic; bin_count_type bin_count_const; offset_type sort_order; int range_begin; int range_end; bool sort_within_bins; public: #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 KOKKOS_DEPRECATED BinSort() = default; #else BinSort() = delete; #endif //---------------------------------------- // Constructor: takes the keys, the binning_operator and optionally whether to // sort within bins (default false) template BinSort(const ExecutionSpace& exec, const_key_view_type keys_, int range_begin_, int range_end_, BinSortOp bin_op_, bool sort_within_bins_ = false) : keys(keys_), keys_rnd(keys_), bin_op(bin_op_), bin_offsets(), bin_count_atomic(), bin_count_const(), sort_order(), range_begin(range_begin_), range_end(range_end_), sort_within_bins(sort_within_bins_) { static_assert( Kokkos::SpaceAccessibility::accessible, "The provided execution space must be able to access the memory space " "BinSort was initialized with!"); if (bin_op.max_bins() <= 0) Kokkos::abort( "The number of bins in the BinSortOp object must be greater than 0!"); bin_count_atomic = Kokkos::View( "Kokkos::SortImpl::BinSortFunctor::bin_count", bin_op.max_bins()); bin_count_const = bin_count_atomic; bin_offsets = offset_type(view_alloc(exec, WithoutInitializing, "Kokkos::SortImpl::BinSortFunctor::bin_offsets"), bin_op.max_bins()); sort_order = offset_type(view_alloc(exec, WithoutInitializing, "Kokkos::SortImpl::BinSortFunctor::sort_order"), range_end - range_begin); } BinSort(const_key_view_type keys_, int range_begin_, int range_end_, BinSortOp bin_op_, bool sort_within_bins_ = false) : BinSort(exec_space{}, keys_, range_begin_, range_end_, bin_op_, sort_within_bins_) {} template BinSort(const ExecutionSpace& exec, const_key_view_type keys_, BinSortOp bin_op_, bool sort_within_bins_ = false) : BinSort(exec, keys_, 0, keys_.extent(0), bin_op_, sort_within_bins_) {} BinSort(const_key_view_type keys_, BinSortOp bin_op_, bool sort_within_bins_ = false) : BinSort(exec_space{}, keys_, bin_op_, sort_within_bins_) {} //---------------------------------------- // Create the permutation vector, the bin_offset array and the bin_count // array. Can be called again if keys changed template void create_permute_vector(const ExecutionSpace& exec) { static_assert( Kokkos::SpaceAccessibility::accessible, "The provided execution space must be able to access the memory space " "BinSort was initialized with!"); const size_t len = range_end - range_begin; Kokkos::parallel_for( "Kokkos::Sort::BinCount", Kokkos::RangePolicy(exec, 0, len), *this); Kokkos::parallel_scan("Kokkos::Sort::BinOffset", Kokkos::RangePolicy( exec, 0, bin_op.max_bins()), *this); Kokkos::deep_copy(exec, bin_count_atomic, 0); Kokkos::parallel_for( "Kokkos::Sort::BinBinning", Kokkos::RangePolicy(exec, 0, len), *this); if (sort_within_bins) Kokkos::parallel_for( "Kokkos::Sort::BinSort", Kokkos::RangePolicy( exec, 0, bin_op.max_bins()), *this); } // Create the permutation vector, the bin_offset array and the bin_count // array. Can be called again if keys changed void create_permute_vector() { Kokkos::fence("Kokkos::Binsort::create_permute_vector: before"); exec_space e{}; create_permute_vector(e); e.fence("Kokkos::Binsort::create_permute_vector: after"); } // Sort a subset of a view with respect to the first dimension using the // permutation array template void sort(const ExecutionSpace& exec, ValuesViewType const& values, int values_range_begin, int values_range_end) const { if (values.extent(0) == 0) { return; } static_assert( Kokkos::SpaceAccessibility::accessible, "The provided execution space must be able to access the memory space " "BinSort was initialized with!"); static_assert( Kokkos::SpaceAccessibility< ExecutionSpace, typename ValuesViewType::memory_space>::accessible, "The provided execution space must be able to access the memory space " "of the View argument!"); const size_t len = range_end - range_begin; const size_t values_len = values_range_end - values_range_begin; if (len != values_len) { Kokkos::abort( "BinSort::sort: values range length != permutation vector length"); } using scratch_view_type = Kokkos::View; scratch_view_type sorted_values( view_alloc(exec, WithoutInitializing, "Kokkos::SortImpl::BinSortFunctor::sorted_values"), values.rank_dynamic > 0 ? len : KOKKOS_IMPL_CTOR_DEFAULT_ARG, values.rank_dynamic > 1 ? values.extent(1) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, values.rank_dynamic > 2 ? values.extent(2) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, values.rank_dynamic > 3 ? values.extent(3) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, values.rank_dynamic > 4 ? values.extent(4) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, values.rank_dynamic > 5 ? values.extent(5) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, values.rank_dynamic > 6 ? values.extent(6) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, values.rank_dynamic > 7 ? values.extent(7) : KOKKOS_IMPL_CTOR_DEFAULT_ARG); { copy_permute_functor functor(sorted_values, sort_order, values, values_range_begin - range_begin); parallel_for("Kokkos::Sort::CopyPermute", Kokkos::RangePolicy(exec, 0, len), functor); } { copy_functor functor( values, range_begin, sorted_values); parallel_for("Kokkos::Sort::Copy", Kokkos::RangePolicy(exec, 0, len), functor); } } // Sort a subset of a view with respect to the first dimension using the // permutation array template void sort(ValuesViewType const& values, int values_range_begin, int values_range_end) const { Kokkos::fence("Kokkos::Binsort::sort: before"); exec_space exec; sort(exec, values, values_range_begin, values_range_end); exec.fence("Kokkos::BinSort:sort: after"); } template void sort(ExecutionSpace const& exec, ValuesViewType const& values) const { this->sort(exec, values, 0, /*values.extent(0)*/ range_end - range_begin); } template void sort(ValuesViewType const& values) const { this->sort(values, 0, /*values.extent(0)*/ range_end - range_begin); } // Get the permutation vector KOKKOS_INLINE_FUNCTION offset_type get_permute_vector() const { return sort_order; } // Get the start offsets for each bin KOKKOS_INLINE_FUNCTION offset_type get_bin_offsets() const { return bin_offsets; } // Get the count for each bin KOKKOS_INLINE_FUNCTION bin_count_type get_bin_count() const { return bin_count_const; } public: KOKKOS_INLINE_FUNCTION void operator()(const bin_count_tag& /*tag*/, const int i) const { const int j = range_begin + i; bin_count_atomic(bin_op.bin(keys, j))++; } KOKKOS_INLINE_FUNCTION void operator()(const bin_offset_tag& /*tag*/, const int i, value_type& offset, const bool& final) const { if (final) { bin_offsets(i) = offset; } offset += bin_count_const(i); } KOKKOS_INLINE_FUNCTION void operator()(const bin_binning_tag& /*tag*/, const int i) const { const int j = range_begin + i; const int bin = bin_op.bin(keys, j); const int count = bin_count_atomic(bin)++; sort_order(bin_offsets(bin) + count) = j; } KOKKOS_INLINE_FUNCTION void operator()(const bin_sort_bins_tag& /*tag*/, const int i) const { auto bin_size = bin_count_const(i); if (bin_size <= 1) return; constexpr bool use_std_sort = std::is_same_v; int lower_bound = bin_offsets(i); int upper_bound = lower_bound + bin_size; // Switching to std::sort for more than 10 elements has been found // reasonable experimentally. if (use_std_sort && bin_size > 10) { KOKKOS_IF_ON_HOST( (std::sort(&sort_order(lower_bound), &sort_order(upper_bound), [this](int p, int q) { return bin_op(keys_rnd, p, q); });)) } else { for (int k = lower_bound + 1; k < upper_bound; ++k) { int old_idx = sort_order(k); int j = k - 1; while (j >= lower_bound) { int new_idx = sort_order(j); if (!bin_op(keys_rnd, old_idx, new_idx)) break; sort_order(j + 1) = new_idx; --j; } sort_order(j + 1) = old_idx; } } } }; } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/sorting/Kokkos_NestedSortPublicAPI.hpp000066400000000000000000000105151461675637500253700ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_NESTED_SORT_PUBLIC_API_HPP_ #define KOKKOS_NESTED_SORT_PUBLIC_API_HPP_ #include "impl/Kokkos_NestedSortImpl.hpp" #include #include namespace Kokkos { namespace Experimental { template KOKKOS_INLINE_FUNCTION void sort_team(const TeamMember& t, const ViewType& view) { Impl::sort_nested_impl(t, view, nullptr, Experimental::Impl::StdAlgoLessThanBinaryPredicate< typename ViewType::non_const_value_type>(), Impl::NestedRange()); } template KOKKOS_INLINE_FUNCTION void sort_team(const TeamMember& t, const ViewType& view, const Comparator& comp) { Impl::sort_nested_impl(t, view, nullptr, comp, Impl::NestedRange()); } template KOKKOS_INLINE_FUNCTION void sort_by_key_team(const TeamMember& t, const KeyViewType& keyView, const ValueViewType& valueView) { Impl::sort_nested_impl(t, keyView, valueView, Experimental::Impl::StdAlgoLessThanBinaryPredicate< typename KeyViewType::non_const_value_type>(), Impl::NestedRange()); } template KOKKOS_INLINE_FUNCTION void sort_by_key_team(const TeamMember& t, const KeyViewType& keyView, const ValueViewType& valueView, const Comparator& comp) { Impl::sort_nested_impl(t, keyView, valueView, comp, Impl::NestedRange()); } template KOKKOS_INLINE_FUNCTION void sort_thread(const TeamMember& t, const ViewType& view) { Impl::sort_nested_impl(t, view, nullptr, Experimental::Impl::StdAlgoLessThanBinaryPredicate< typename ViewType::non_const_value_type>(), Impl::NestedRange()); } template KOKKOS_INLINE_FUNCTION void sort_thread(const TeamMember& t, const ViewType& view, const Comparator& comp) { Impl::sort_nested_impl(t, view, nullptr, comp, Impl::NestedRange()); } template KOKKOS_INLINE_FUNCTION void sort_by_key_thread(const TeamMember& t, const KeyViewType& keyView, const ValueViewType& valueView) { Impl::sort_nested_impl(t, keyView, valueView, Experimental::Impl::StdAlgoLessThanBinaryPredicate< typename KeyViewType::non_const_value_type>(), Impl::NestedRange()); } template KOKKOS_INLINE_FUNCTION void sort_by_key_thread(const TeamMember& t, const KeyViewType& keyView, const ValueViewType& valueView, const Comparator& comp) { Impl::sort_nested_impl(t, keyView, valueView, comp, Impl::NestedRange()); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/sorting/Kokkos_SortByKeyPublicAPI.hpp000066400000000000000000000115731461675637500251760ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_SORT_BY_KEY_PUBLIC_API_HPP_ #define KOKKOS_SORT_BY_KEY_PUBLIC_API_HPP_ #include "./impl/Kokkos_SortByKeyImpl.hpp" #include #include namespace Kokkos::Experimental { // --------------------------------------------------------------- // basic overloads // --------------------------------------------------------------- template void sort_by_key( const ExecutionSpace& exec, const Kokkos::View& keys, const Kokkos::View& values) { // constraints using KeysType = Kokkos::View; using ValuesType = Kokkos::View; ::Kokkos::Impl::static_assert_is_admissible_to_kokkos_sort_by_key(keys); ::Kokkos::Impl::static_assert_is_admissible_to_kokkos_sort_by_key(values); static_assert(SpaceAccessibility::accessible, "Kokkos::sort: execution space instance is not able to access " "the memory space of the keys View argument!"); static_assert( SpaceAccessibility::accessible, "Kokkos::sort: execution space instance is not able to access " "the memory space of the values View argument!"); static_assert(KeysType::static_extent(0) == 0 || ValuesType::static_extent(0) == 0 || KeysType::static_extent(0) == ValuesType::static_extent(0)); if (values.size() != keys.size()) Kokkos::abort((std::string("values and keys extents must be the same. The " "values extent is ") + std::to_string(values.size()) + ", and the keys extent is " + std::to_string(keys.size()) + ".") .c_str()); if (keys.extent(0) <= 1) { return; } ::Kokkos::Impl::sort_by_key_device_view_without_comparator(exec, keys, values); } // --------------------------------------------------------------- // overloads supporting a custom comparator // --------------------------------------------------------------- template void sort_by_key( const ExecutionSpace& exec, const Kokkos::View& keys, const Kokkos::View& values, const ComparatorType& comparator) { // constraints using KeysType = Kokkos::View; using ValuesType = Kokkos::View; ::Kokkos::Impl::static_assert_is_admissible_to_kokkos_sort_by_key(keys); ::Kokkos::Impl::static_assert_is_admissible_to_kokkos_sort_by_key(values); static_assert(SpaceAccessibility::accessible, "Kokkos::sort: execution space instance is not able to access " "the memory space of the keys View argument!"); static_assert( SpaceAccessibility::accessible, "Kokkos::sort: execution space instance is not able to access " "the memory space of the values View argument!"); static_assert(KeysType::static_extent(0) == 0 || ValuesType::static_extent(0) == 0 || KeysType::static_extent(0) == ValuesType::static_extent(0)); if (values.size() != keys.size()) Kokkos::abort((std::string("values and keys extents must be the same. The " "values extent is ") + std::to_string(values.size()) + ", and the keys extent is " + std::to_string(keys.size()) + ".") .c_str()); if (keys.extent(0) <= 1) { return; } ::Kokkos::Impl::sort_by_key_device_view_with_comparator(exec, keys, values, comparator); } } // namespace Kokkos::Experimental #endif kokkos-4.3.01/algorithms/src/sorting/Kokkos_SortPublicAPI.hpp000066400000000000000000000156561461675637500242400ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_SORT_PUBLIC_API_HPP_ #define KOKKOS_SORT_PUBLIC_API_HPP_ #include "./impl/Kokkos_SortImpl.hpp" #include #include #include namespace Kokkos { // --------------------------------------------------------------- // basic overloads // --------------------------------------------------------------- template void sort(const ExecutionSpace& exec, const Kokkos::View& view) { // constraints using ViewType = Kokkos::View; using MemSpace = typename ViewType::memory_space; static_assert( ViewType::rank == 1 && (std::is_same_v || std::is_same_v || std::is_same_v), "Kokkos::sort without comparator: supports 1D Views with LayoutRight, " "LayoutLeft or LayoutStride."); static_assert(SpaceAccessibility::accessible, "Kokkos::sort: execution space instance is not able to access " "the memory space of the " "View argument!"); if (view.extent(0) <= 1) { return; } if constexpr (Impl::better_off_calling_std_sort_v) { exec.fence("Kokkos::sort without comparator use std::sort"); auto first = ::Kokkos::Experimental::begin(view); auto last = ::Kokkos::Experimental::end(view); std::sort(first, last); } else { Impl::sort_device_view_without_comparator(exec, view); } } template void sort(const Kokkos::View& view) { using ViewType = Kokkos::View; static_assert(ViewType::rank == 1, "Kokkos::sort: currently only supports rank-1 Views."); Kokkos::fence("Kokkos::sort: before"); if (view.extent(0) <= 1) { return; } typename ViewType::execution_space exec; sort(exec, view); exec.fence("Kokkos::sort: fence after sorting"); } // --------------------------------------------------------------- // overloads supporting a custom comparator // --------------------------------------------------------------- template void sort(const ExecutionSpace& exec, const Kokkos::View& view, const ComparatorType& comparator) { // constraints using ViewType = Kokkos::View; using MemSpace = typename ViewType::memory_space; static_assert( ViewType::rank == 1 && (std::is_same_v || std::is_same_v || std::is_same_v), "Kokkos::sort with comparator: supports 1D Views with LayoutRight, " "LayoutLeft or LayoutStride."); static_assert(SpaceAccessibility::accessible, "Kokkos::sort: execution space instance is not able to access " "the memory space of the View argument!"); if (view.extent(0) <= 1) { return; } if constexpr (Impl::better_off_calling_std_sort_v) { exec.fence("Kokkos::sort with comparator use std::sort"); auto first = ::Kokkos::Experimental::begin(view); auto last = ::Kokkos::Experimental::end(view); std::sort(first, last, comparator); } else { Impl::sort_device_view_with_comparator(exec, view, comparator); } } template void sort(const Kokkos::View& view, const ComparatorType& comparator) { using ViewType = Kokkos::View; static_assert( ViewType::rank == 1 && (std::is_same_v || std::is_same_v || std::is_same_v), "Kokkos::sort with comparator: supports 1D Views with LayoutRight, " "LayoutLeft or LayoutStride."); Kokkos::fence("Kokkos::sort with comparator: before"); if (view.extent(0) <= 1) { return; } typename ViewType::execution_space exec; sort(exec, view, comparator); exec.fence("Kokkos::sort with comparator: fence after sorting"); } // --------------------------------------------------------------- // overloads for sorting a view with a subrange // specified via integers begin, end // --------------------------------------------------------------- template std::enable_if_t::value> sort( const ExecutionSpace& exec, ViewType view, size_t const begin, size_t const end) { // view must be rank-1 because the Impl::min_max_functor // used below only works for rank-1 views for now static_assert(ViewType::rank == 1, "Kokkos::sort: currently only supports rank-1 Views."); if (view.extent(0) <= 1) { return; } using range_policy = Kokkos::RangePolicy; using CompType = BinOp1D; Kokkos::MinMaxScalar result; Kokkos::MinMax reducer(result); parallel_reduce("Kokkos::Sort::FindExtent", range_policy(exec, begin, end), Impl::min_max_functor(view), reducer); if (result.min_val == result.max_val) return; BinSort bin_sort( exec, view, begin, end, CompType((end - begin) / 2, result.min_val, result.max_val), true); bin_sort.create_permute_vector(exec); bin_sort.sort(exec, view, begin, end); } template void sort(ViewType view, size_t const begin, size_t const end) { // same constraints as the overload above which this gets dispatched to static_assert(ViewType::rank == 1, "Kokkos::sort: currently only supports rank-1 Views."); Kokkos::fence("Kokkos::sort: before"); if (view.extent(0) <= 1) { return; } typename ViewType::execution_space exec; sort(exec, view, begin, end); exec.fence("Kokkos::Sort: fence after sorting"); } } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/sorting/impl/000077500000000000000000000000001461675637500205125ustar00rootroot00000000000000kokkos-4.3.01/algorithms/src/sorting/impl/Kokkos_CopyOpsForBinSortImpl.hpp000066400000000000000000000035101461675637500267310ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_COPY_OPS_FOR_BINSORT_IMPL_HPP_ #define KOKKOS_COPY_OPS_FOR_BINSORT_IMPL_HPP_ #include #include namespace Kokkos { namespace Impl { template struct CopyOp; template struct CopyOp { KOKKOS_INLINE_FUNCTION static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, size_t i_src) { dst(i_dst) = src(i_src); } }; template struct CopyOp { KOKKOS_INLINE_FUNCTION static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, size_t i_src) { for (int j = 0; j < (int)dst.extent(1); j++) dst(i_dst, j) = src(i_src, j); } }; template struct CopyOp { KOKKOS_INLINE_FUNCTION static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, size_t i_src) { for (int j = 0; j < dst.extent(1); j++) for (int k = 0; k < dst.extent(2); k++) dst(i_dst, j, k) = src(i_src, j, k); } }; } // namespace Impl } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/sorting/impl/Kokkos_NestedSortImpl.hpp000066400000000000000000000101401461675637500254540ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_NESTED_SORT_IMPL_HPP_ #define KOKKOS_NESTED_SORT_IMPL_HPP_ #include namespace Kokkos { namespace Experimental { namespace Impl { // true for TeamVectorRange, false for ThreadVectorRange template struct NestedRange {}; // Specialization for team-level template <> struct NestedRange { template KOKKOS_FUNCTION static auto create(const TeamMember& t, SizeType len) { return Kokkos::TeamVectorRange(t, len); } template KOKKOS_FUNCTION static void barrier(const TeamMember& t) { t.team_barrier(); } }; // Specialization for thread-level template <> struct NestedRange { template KOKKOS_FUNCTION static auto create(const TeamMember& t, SizeType len) { return Kokkos::ThreadVectorRange(t, len); } // Barrier is no-op, as vector lanes of a thread are implicitly synchronized // after parallel region template KOKKOS_FUNCTION static void barrier(const TeamMember&) {} }; // When just doing sort (not sort_by_key), use nullptr_t for ValueViewType. // This only takes the NestedRange instance for template arg deduction. template KOKKOS_INLINE_FUNCTION void sort_nested_impl( const TeamMember& t, const KeyViewType& keyView, [[maybe_unused]] const ValueViewType& valueView, const Comparator& comp, const NestedRange) { using SizeType = typename KeyViewType::size_type; using KeyType = typename KeyViewType::non_const_value_type; using Range = NestedRange; SizeType n = keyView.extent(0); SizeType npot = 1; SizeType levels = 0; // FIXME: ceiling power-of-two is a common thing to need - make it a utility while (npot < n) { levels++; npot <<= 1; } for (SizeType i = 0; i < levels; i++) { for (SizeType j = 0; j <= i; j++) { // n/2 pairs of items are compared in parallel Kokkos::parallel_for(Range::create(t, npot / 2), [=](const SizeType k) { // How big are the brown/pink boxes? // (Terminology comes from Wikipedia diagram) // https://commons.wikimedia.org/wiki/File:BitonicSort.svg#/media/File:BitonicSort.svg SizeType boxSize = SizeType(2) << (i - j); // Which box contains this thread? SizeType boxID = k >> (i - j); // k * 2 / boxSize; SizeType boxStart = boxID << (1 + i - j); // boxID * boxSize SizeType boxOffset = k - (boxStart >> 1); // k - boxID * boxSize / 2; SizeType elem1 = boxStart + boxOffset; // In first phase (j == 0, brown box): within a box, compare with the // opposite value in the box. // In later phases (j > 0, pink box): within a box, compare with fixed // distance (boxSize / 2) apart. SizeType elem2 = (j == 0) ? (boxStart + boxSize - 1 - boxOffset) : (elem1 + boxSize / 2); if (elem2 < n) { KeyType key1 = keyView(elem1); KeyType key2 = keyView(elem2); if (comp(key2, key1)) { keyView(elem1) = key2; keyView(elem2) = key1; if constexpr (!std::is_same_v) { Kokkos::kokkos_swap(valueView(elem1), valueView(elem2)); } } } }); Range::barrier(t); } } } } // namespace Impl } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp000066400000000000000000000357431461675637500252750ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_SORT_BY_KEY_FREE_FUNCS_IMPL_HPP_ #define KOKKOS_SORT_BY_KEY_FREE_FUNCS_IMPL_HPP_ #include #if defined(KOKKOS_ENABLE_CUDA) // Workaround for `Instruction 'shfl' without '.sync' is not supported on // .target sm_70 and higher from PTX ISA version 6.4`. // Also see https://github.com/NVIDIA/cub/pull/170. #if !defined(CUB_USE_COOPERATIVE_GROUPS) #define CUB_USE_COOPERATIVE_GROUPS #endif #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wshadow" #if defined(KOKKOS_COMPILER_CLANG) // Some versions of Clang fail to compile Thrust, failing with errors like // this: // /thrust/system/cuda/detail/core/agent_launcher.h:557:11: // error: use of undeclared identifier 'va_printf' // The exact combination of versions for Clang and Thrust (or CUDA) for this // failure was not investigated, however even very recent version combination // (Clang 10.0.0 and Cuda 10.0) demonstrated failure. // // Defining _CubLog here locally allows us to avoid that code path, however // disabling some debugging diagnostics #pragma push_macro("_CubLog") #ifdef _CubLog #undef _CubLog #endif #define _CubLog #include #include #pragma pop_macro("_CubLog") #else #include #include #endif #pragma GCC diagnostic pop #endif #if defined(KOKKOS_ENABLE_ROCTHRUST) #include #include #endif #if defined(KOKKOS_ENABLE_ONEDPL) && \ (ONEDPL_VERSION_MAJOR > 2022 || \ (ONEDPL_VERSION_MAJOR == 2022 && ONEDPL_VERSION_MINOR >= 2)) #define KOKKOS_ONEDPL_HAS_SORT_BY_KEY #include #include #endif namespace Kokkos::Impl { template constexpr inline bool is_admissible_to_kokkos_sort_by_key = ::Kokkos::is_view::value&& T::rank() == 1 && (std::is_same::value || std::is_same::value || std::is_same::value); template KOKKOS_INLINE_FUNCTION constexpr void static_assert_is_admissible_to_kokkos_sort_by_key(const ViewType& /* view */) { static_assert(is_admissible_to_kokkos_sort_by_key, "Kokkos::sort_by_key only accepts 1D values View with " "LayoutRight, LayoutLeft or LayoutStride."); } // For the fallback implementation for sort_by_key using Kokkos::sort, we need // to consider if Kokkos::sort defers to the fallback implementation that copies // the array to the host and uses std::sort, see // copy_to_host_run_stdsort_copy_back() in impl/Kokkos_SortImpl.hpp. If // sort_on_device_v is true, we assume that std::sort doesn't copy data. // Otherwise, we manually copy all data to the host and provide Kokkos::sort // with a host execution space. template inline constexpr bool sort_on_device_v = false; #if defined(KOKKOS_ENABLE_CUDA) template inline constexpr bool sort_on_device_v = true; template void sort_by_key_cudathrust( const Kokkos::Cuda& exec, const Kokkos::View& keys, const Kokkos::View& values, MaybeComparator&&... maybeComparator) { const auto policy = thrust::cuda::par.on(exec.cuda_stream()); auto keys_first = ::Kokkos::Experimental::begin(keys); auto keys_last = ::Kokkos::Experimental::end(keys); auto values_first = ::Kokkos::Experimental::begin(values); thrust::sort_by_key(policy, keys_first, keys_last, values_first, std::forward(maybeComparator)...); } #endif #if defined(KOKKOS_ENABLE_ROCTHRUST) template inline constexpr bool sort_on_device_v = true; template void sort_by_key_rocthrust( const Kokkos::HIP& exec, const Kokkos::View& keys, const Kokkos::View& values, MaybeComparator&&... maybeComparator) { const auto policy = thrust::hip::par.on(exec.hip_stream()); auto keys_first = ::Kokkos::Experimental::begin(keys); auto keys_last = ::Kokkos::Experimental::end(keys); auto values_first = ::Kokkos::Experimental::begin(values); thrust::sort_by_key(policy, keys_first, keys_last, values_first, std::forward(maybeComparator)...); } #endif #if defined(KOKKOS_ENABLE_ONEDPL) template inline constexpr bool sort_on_device_v = std::is_same_v || std::is_same_v; #ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY template void sort_by_key_onedpl( const Kokkos::Experimental::SYCL& exec, const Kokkos::View& keys, const Kokkos::View& values, MaybeComparator&&... maybeComparator) { if (keys.stride(0) != 1 && values.stride(0) != 1) { Kokkos::abort( "SYCL sort_by_key only supports rank-1 Views with stride(0) = 1."); } // Can't use Experimental::begin/end here since the oneDPL then assumes that // the data is on the host. auto queue = exec.sycl_queue(); auto policy = oneapi::dpl::execution::make_device_policy(queue); const int n = keys.extent(0); oneapi::dpl::sort_by_key(policy, keys.data(), keys.data() + n, values.data(), std::forward(maybeComparator)...); } #endif #endif template void applyPermutation(const ExecutionSpace& space, const PermutationView& permutation, const ViewType& view) { static_assert(std::is_integral::value); auto view_copy = Kokkos::create_mirror( Kokkos::view_alloc(space, typename ExecutionSpace::memory_space{}, Kokkos::WithoutInitializing), view); Kokkos::deep_copy(space, view_copy, view); Kokkos::parallel_for( "Kokkos::sort_by_key_via_sort::permute_" + view.label(), Kokkos::RangePolicy(space, 0, view.extent(0)), KOKKOS_LAMBDA(int i) { view(i) = view_copy(permutation(i)); }); } template void sort_by_key_via_sort( const ExecutionSpace& exec, const Kokkos::View& keys, const Kokkos::View& values, MaybeComparator&&... maybeComparator) { static_assert(sizeof...(MaybeComparator) <= 1); auto const n = keys.size(); Kokkos::View permute( Kokkos::view_alloc(exec, Kokkos::WithoutInitializing, "Kokkos::sort_by_key_via_sort::permute"), n); // iota Kokkos::parallel_for( "Kokkos::sort_by_key_via_sort::iota", Kokkos::RangePolicy(exec, 0, n), KOKKOS_LAMBDA(int i) { permute(i) = i; }); using Layout = typename Kokkos::View::array_layout; if constexpr (!sort_on_device_v) { auto host_keys = Kokkos::create_mirror_view( Kokkos::view_alloc(Kokkos::HostSpace{}, Kokkos::WithoutInitializing), keys); auto host_permute = Kokkos::create_mirror_view( Kokkos::view_alloc(Kokkos::HostSpace{}, Kokkos::WithoutInitializing), permute); Kokkos::deep_copy(exec, host_keys, keys); Kokkos::deep_copy(exec, host_permute, permute); exec.fence("Kokkos::Impl::sort_by_key_via_sort: before host sort"); Kokkos::DefaultHostExecutionSpace host_exec; if constexpr (sizeof...(MaybeComparator) == 0) { Kokkos::sort( host_exec, host_permute, KOKKOS_LAMBDA(int i, int j) { return host_keys(i) < host_keys(j); }); } else { auto keys_comparator = std::get<0>(std::tuple(maybeComparator...)); Kokkos::sort( host_exec, host_permute, KOKKOS_LAMBDA(int i, int j) { return keys_comparator(host_keys(i), host_keys(j)); }); } host_exec.fence("Kokkos::Impl::sort_by_key_via_sort: after host sort"); Kokkos::deep_copy(exec, permute, host_permute); } else { #ifdef KOKKOS_ENABLE_SYCL auto* raw_keys_in_comparator = keys.data(); auto stride = keys.stride(0); if constexpr (sizeof...(MaybeComparator) == 0) { Kokkos::sort( exec, permute, KOKKOS_LAMBDA(int i, int j) { return raw_keys_in_comparator[i * stride] < raw_keys_in_comparator[j * stride]; }); } else { auto keys_comparator = std::get<0>(std::tuple(maybeComparator...)); Kokkos::sort( exec, permute, KOKKOS_LAMBDA(int i, int j) { return keys_comparator(raw_keys_in_comparator[i * stride], raw_keys_in_comparator[j * stride]); }); } #else if constexpr (sizeof...(MaybeComparator) == 0) { Kokkos::sort( exec, permute, KOKKOS_LAMBDA(int i, int j) { return keys(i) < keys(j); }); } else { auto keys_comparator = std::get<0>(std::tuple(maybeComparator...)); Kokkos::sort( exec, permute, KOKKOS_LAMBDA(int i, int j) { return keys_comparator(keys(i), keys(j)); }); } #endif } applyPermutation(exec, permute, keys); applyPermutation(exec, permute, values); } // ------------------------------------------------------ // // specialize cases for sorting by key without comparator // // ------------------------------------------------------ #if defined(KOKKOS_ENABLE_CUDA) template void sort_by_key_device_view_without_comparator( const Kokkos::Cuda& exec, const Kokkos::View& keys, const Kokkos::View& values) { sort_by_key_cudathrust(exec, keys, values); } #endif #if defined(KOKKOS_ENABLE_ROCTHRUST) template void sort_by_key_device_view_without_comparator( const Kokkos::HIP& exec, const Kokkos::View& keys, const Kokkos::View& values) { sort_by_key_rocthrust(exec, keys, values); } #endif #if defined(KOKKOS_ENABLE_ONEDPL) template void sort_by_key_device_view_without_comparator( const Kokkos::Experimental::SYCL& exec, const Kokkos::View& keys, const Kokkos::View& values) { #ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY if (keys.stride(0) == 1 && values.stride(0) == 1) sort_by_key_onedpl(exec, keys, values); else #endif sort_by_key_via_sort(exec, keys, values); } #endif // fallback case template std::enable_if_t::value> sort_by_key_device_view_without_comparator( const ExecutionSpace& exec, const Kokkos::View& keys, const Kokkos::View& values) { sort_by_key_via_sort(exec, keys, values); } // --------------------------------------------------- // // specialize cases for sorting by key with comparator // // --------------------------------------------------- #if defined(KOKKOS_ENABLE_CUDA) template void sort_by_key_device_view_with_comparator( const Kokkos::Cuda& exec, const Kokkos::View& keys, const Kokkos::View& values, const ComparatorType& comparator) { sort_by_key_cudathrust(exec, keys, values, comparator); } #endif #if defined(KOKKOS_ENABLE_ROCTHRUST) template void sort_by_key_device_view_with_comparator( const Kokkos::HIP& exec, const Kokkos::View& keys, const Kokkos::View& values, const ComparatorType& comparator) { sort_by_key_rocthrust(exec, keys, values, comparator); } #endif #if defined(KOKKOS_ENABLE_ONEDPL) template void sort_by_key_device_view_with_comparator( const Kokkos::Experimental::SYCL& exec, const Kokkos::View& keys, const Kokkos::View& values, const ComparatorType& comparator) { #ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY if (keys.stride(0) == 1 && values.stride(0) == 1) sort_by_key_onedpl(exec, keys, values, comparator); else #endif sort_by_key_via_sort(exec, keys, values, comparator); } #endif // fallback case template std::enable_if_t::value> sort_by_key_device_view_with_comparator( const ExecutionSpace& exec, const Kokkos::View& keys, const Kokkos::View& values, const ComparatorType& comparator) { sort_by_key_via_sort(exec, keys, values, comparator); } #undef KOKKOS_ONEDPL_HAS_SORT_BY_KEY } // namespace Kokkos::Impl #endif kokkos-4.3.01/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp000066400000000000000000000356641461675637500243330ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_SORT_FREE_FUNCS_IMPL_HPP_ #define KOKKOS_SORT_FREE_FUNCS_IMPL_HPP_ #include "../Kokkos_BinOpsPublicAPI.hpp" #include "../Kokkos_BinSortPublicAPI.hpp" #include #include #include #if defined(KOKKOS_ENABLE_CUDA) // Workaround for `Instruction 'shfl' without '.sync' is not supported on // .target sm_70 and higher from PTX ISA version 6.4`. // Also see https://github.com/NVIDIA/cub/pull/170. #if !defined(CUB_USE_COOPERATIVE_GROUPS) #define CUB_USE_COOPERATIVE_GROUPS #endif #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wshadow" #if defined(KOKKOS_COMPILER_CLANG) // Some versions of Clang fail to compile Thrust, failing with errors like // this: // /thrust/system/cuda/detail/core/agent_launcher.h:557:11: // error: use of undeclared identifier 'va_printf' // The exact combination of versions for Clang and Thrust (or CUDA) for this // failure was not investigated, however even very recent version combination // (Clang 10.0.0 and Cuda 10.0) demonstrated failure. // // Defining _CubLog here locally allows us to avoid that code path, however // disabling some debugging diagnostics #pragma push_macro("_CubLog") #ifdef _CubLog #undef _CubLog #endif #define _CubLog #include #include #pragma pop_macro("_CubLog") #else #include #include #endif #pragma GCC diagnostic pop #endif #if defined(KOKKOS_ENABLE_ROCTHRUST) #include #include #endif #if defined(KOKKOS_ENABLE_ONEDPL) #include #include #endif namespace Kokkos { namespace Impl { template struct better_off_calling_std_sort : std::false_type {}; #if defined KOKKOS_ENABLE_SERIAL template <> struct better_off_calling_std_sort : std::true_type {}; #endif #if defined KOKKOS_ENABLE_OPENMP template <> struct better_off_calling_std_sort : std::true_type {}; #endif #if defined KOKKOS_ENABLE_THREADS template <> struct better_off_calling_std_sort : std::true_type {}; #endif #if defined KOKKOS_ENABLE_HPX template <> struct better_off_calling_std_sort : std::true_type { }; #endif template inline constexpr bool better_off_calling_std_sort_v = better_off_calling_std_sort::value; template struct min_max_functor { using minmax_scalar = Kokkos::MinMaxScalar; ViewType view; min_max_functor(const ViewType& view_) : view(view_) {} KOKKOS_INLINE_FUNCTION void operator()(const size_t& i, minmax_scalar& minmax) const { if (view(i) < minmax.min_val) minmax.min_val = view(i); if (view(i) > minmax.max_val) minmax.max_val = view(i); } }; template void sort_via_binsort(const ExecutionSpace& exec, const Kokkos::View& view) { // Although we are using BinSort below, which could work on rank-2 views, // for now view must be rank-1 because the min_max_functor // used below only works for rank-1 views using ViewType = Kokkos::View; static_assert(ViewType::rank == 1, "Kokkos::sort: currently only supports rank-1 Views."); if (view.extent(0) <= 1) { return; } Kokkos::MinMaxScalar result; Kokkos::MinMax reducer(result); parallel_reduce("Kokkos::Sort::FindExtent", Kokkos::RangePolicy( exec, 0, view.extent(0)), min_max_functor(view), reducer); if (result.min_val == result.max_val) return; // For integral types the number of bins may be larger than the range // in which case we can exactly have one unique value per bin // and then don't need to sort bins. bool sort_in_bins = true; // TODO: figure out better max_bins then this ... int64_t max_bins = view.extent(0) / 2; if (std::is_integral::value) { // Cast to double to avoid possible overflow when using integer auto const max_val = static_cast(result.max_val); auto const min_val = static_cast(result.min_val); // using 10M as the cutoff for special behavior (roughly 40MB for the count // array) if ((max_val - min_val) < 10000000) { max_bins = max_val - min_val + 1; sort_in_bins = false; } } if (std::is_floating_point::value) { KOKKOS_ASSERT(std::isfinite(static_cast(result.max_val) - static_cast(result.min_val))); } using CompType = BinOp1D; BinSort bin_sort( view, CompType(max_bins, result.min_val, result.max_val), sort_in_bins); bin_sort.create_permute_vector(exec); bin_sort.sort(exec, view); } #if defined(KOKKOS_ENABLE_CUDA) template void sort_cudathrust(const Cuda& space, const Kokkos::View& view, MaybeComparator&&... maybeComparator) { using ViewType = Kokkos::View; static_assert(ViewType::rank == 1, "Kokkos::sort: currently only supports rank-1 Views."); if (view.extent(0) <= 1) { return; } const auto exec = thrust::cuda::par.on(space.cuda_stream()); auto first = ::Kokkos::Experimental::begin(view); auto last = ::Kokkos::Experimental::end(view); thrust::sort(exec, first, last, std::forward(maybeComparator)...); } #endif #if defined(KOKKOS_ENABLE_ROCTHRUST) template void sort_rocthrust(const HIP& space, const Kokkos::View& view, MaybeComparator&&... maybeComparator) { using ViewType = Kokkos::View; static_assert(ViewType::rank == 1, "Kokkos::sort: currently only supports rank-1 Views."); if (view.extent(0) <= 1) { return; } const auto exec = thrust::hip::par.on(space.hip_stream()); auto first = ::Kokkos::Experimental::begin(view); auto last = ::Kokkos::Experimental::end(view); thrust::sort(exec, first, last, std::forward(maybeComparator)...); } #endif #if defined(KOKKOS_ENABLE_ONEDPL) template void sort_onedpl(const Kokkos::Experimental::SYCL& space, const Kokkos::View& view, MaybeComparator&&... maybeComparator) { using ViewType = Kokkos::View; static_assert(SpaceAccessibility::accessible, "SYCL execution space is not able to access the memory space " "of the View argument!"); static_assert( (ViewType::rank == 1) && (std::is_same_v || std::is_same_v || std::is_same_v), "SYCL sort only supports contiguous rank-1 Views with LayoutLeft, " "LayoutRight or LayoutStride" "For the latter, this means the View must have stride(0) = 1, enforced " "at runtime."); if (view.stride(0) != 1) { Kokkos::abort("SYCL sort only supports rank-1 Views with stride(0) = 1."); } if (view.extent(0) <= 1) { return; } // Can't use Experimental::begin/end here since the oneDPL then assumes that // the data is on the host. auto queue = space.sycl_queue(); auto policy = oneapi::dpl::execution::make_device_policy(queue); const int n = view.extent(0); oneapi::dpl::sort(policy, view.data(), view.data() + n, std::forward(maybeComparator)...); } #endif template void copy_to_host_run_stdsort_copy_back( const ExecutionSpace& exec, const Kokkos::View& view, MaybeComparator&&... maybeComparator) { namespace KE = ::Kokkos::Experimental; using ViewType = Kokkos::View; using layout = typename ViewType::array_layout; if constexpr (std::is_same_v) { // for strided views we cannot just deep_copy from device to host, // so we need to do a few more jumps using view_value_type = typename ViewType::non_const_value_type; using view_exespace = typename ViewType::execution_space; using view_deep_copyable_t = Kokkos::View; view_deep_copyable_t view_dc("view_dc", view.extent(0)); KE::copy(exec, view, view_dc); // run sort on the mirror of view_dc auto mv_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view_dc); auto first = KE::begin(mv_h); auto last = KE::end(mv_h); std::sort(first, last, std::forward(maybeComparator)...); Kokkos::deep_copy(exec, view_dc, mv_h); // copy back to argument view KE::copy(exec, KE::cbegin(view_dc), KE::cend(view_dc), KE::begin(view)); } else { auto view_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view); auto first = KE::begin(view_h); auto last = KE::end(view_h); std::sort(first, last, std::forward(maybeComparator)...); Kokkos::deep_copy(exec, view, view_h); } } // -------------------------------------------------- // // specialize cases for sorting without comparator // // -------------------------------------------------- #if defined(KOKKOS_ENABLE_CUDA) template void sort_device_view_without_comparator( const Cuda& exec, const Kokkos::View& view) { sort_cudathrust(exec, view); } #endif #if defined(KOKKOS_ENABLE_ROCTHRUST) template void sort_device_view_without_comparator( const HIP& exec, const Kokkos::View& view) { sort_rocthrust(exec, view); } #endif #if defined(KOKKOS_ENABLE_ONEDPL) template void sort_device_view_without_comparator( const Kokkos::Experimental::SYCL& exec, const Kokkos::View& view) { using ViewType = Kokkos::View; static_assert( (ViewType::rank == 1) && (std::is_same_v || std::is_same_v || std::is_same_v), "sort_device_view_without_comparator: supports rank-1 Views " "with LayoutLeft, LayoutRight or LayoutStride"); if (view.stride(0) == 1) { sort_onedpl(exec, view); } else { copy_to_host_run_stdsort_copy_back(exec, view); } } #endif // fallback case template std::enable_if_t::value> sort_device_view_without_comparator( const ExecutionSpace& exec, const Kokkos::View& view) { sort_via_binsort(exec, view); } // -------------------------------------------------- // // specialize cases for sorting with comparator // // -------------------------------------------------- #if defined(KOKKOS_ENABLE_CUDA) template void sort_device_view_with_comparator( const Cuda& exec, const Kokkos::View& view, const ComparatorType& comparator) { sort_cudathrust(exec, view, comparator); } #endif #if defined(KOKKOS_ENABLE_ROCTHRUST) template void sort_device_view_with_comparator( const HIP& exec, const Kokkos::View& view, const ComparatorType& comparator) { sort_rocthrust(exec, view, comparator); } #endif #if defined(KOKKOS_ENABLE_ONEDPL) template void sort_device_view_with_comparator( const Kokkos::Experimental::SYCL& exec, const Kokkos::View& view, const ComparatorType& comparator) { using ViewType = Kokkos::View; static_assert( (ViewType::rank == 1) && (std::is_same_v || std::is_same_v || std::is_same_v), "sort_device_view_with_comparator: supports rank-1 Views " "with LayoutLeft, LayoutRight or LayoutStride"); if (view.stride(0) == 1) { sort_onedpl(exec, view, comparator); } else { copy_to_host_run_stdsort_copy_back(exec, view, comparator); } } #endif template std::enable_if_t::value> sort_device_view_with_comparator( const ExecutionSpace& exec, const Kokkos::View& view, const ComparatorType& comparator) { // This is a fallback case if a more specialized overload does not exist: // for now, this fallback copies data to host, runs std::sort // and then copies data back. Potentially, this can later be changed // with a better solution like our own quicksort on device or similar. using ViewType = Kokkos::View; using MemSpace = typename ViewType::memory_space; // Note with HIP unified memory this code path is still the right thing to do // if we end up here when RocThrust is not enabled. // The create_mirror_view_and_copy will do the right thing (no copy). #ifndef KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY static_assert(!SpaceAccessibility::accessible, "Impl::sort_device_view_with_comparator: should not be called " "on a view that is already accessible on the host"); #endif copy_to_host_run_stdsort_copy_back(exec, view, comparator); } } // namespace Impl } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/000077500000000000000000000000001461675637500211075ustar00rootroot00000000000000kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_AdjacentDifference.hpp000066400000000000000000000304521461675637500266310ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_ADJACENT_DIFFERENCE_HPP #define KOKKOS_STD_ALGORITHMS_ADJACENT_DIFFERENCE_HPP #include "impl/Kokkos_AdjacentDifference.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename InputIteratorType, typename OutputIteratorType, std::enable_if_t::value && ::Kokkos::is_execution_space::value, int> = 0> OutputIteratorType adjacent_difference(const ExecutionSpace& ex, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest) { using value_type1 = typename InputIteratorType::value_type; using value_type2 = typename OutputIteratorType::value_type; using binary_op = Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; return Impl::adjacent_difference_exespace_impl( "Kokkos::adjacent_difference_iterator_api", ex, first_from, last_from, first_dest, binary_op()); } template < typename ExecutionSpace, typename InputIteratorType, typename OutputIteratorType, typename BinaryOp, std::enable_if_t::value && ::Kokkos::is_execution_space::value, int> = 0> OutputIteratorType adjacent_difference(const ExecutionSpace& ex, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest, BinaryOp bin_op) { return Impl::adjacent_difference_exespace_impl( "Kokkos::adjacent_difference_iterator_api", ex, first_from, last_from, first_dest, bin_op); } template < typename ExecutionSpace, typename InputIteratorType, typename OutputIteratorType, std::enable_if_t::value && ::Kokkos::is_execution_space::value, int> = 0> OutputIteratorType adjacent_difference(const std::string& label, const ExecutionSpace& ex, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest) { using value_type1 = typename InputIteratorType::value_type; using value_type2 = typename OutputIteratorType::value_type; using binary_op = Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; return Impl::adjacent_difference_exespace_impl( label, ex, first_from, last_from, first_dest, binary_op()); } template < typename ExecutionSpace, typename InputIteratorType, typename OutputIteratorType, typename BinaryOp, std::enable_if_t::value && ::Kokkos::is_execution_space::value, int> = 0> OutputIteratorType adjacent_difference(const std::string& label, const ExecutionSpace& ex, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest, BinaryOp bin_op) { return Impl::adjacent_difference_exespace_impl(label, ex, first_from, last_from, first_dest, bin_op); } template ::value, int> = 0> auto adjacent_difference( const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest) { namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); using view_type1 = ::Kokkos::View; using view_type2 = ::Kokkos::View; using value_type1 = typename view_type1::value_type; using value_type2 = typename view_type2::value_type; using binary_op = Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; return Impl::adjacent_difference_exespace_impl( "Kokkos::adjacent_difference_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op()); } template ::value, int> = 0> auto adjacent_difference( const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, BinaryOp bin_op) { namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); return Impl::adjacent_difference_exespace_impl( "Kokkos::adjacent_difference_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), bin_op); } template ::value, int> = 0> auto adjacent_difference( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest) { namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); using view_type1 = ::Kokkos::View; using view_type2 = ::Kokkos::View; using value_type1 = typename view_type1::value_type; using value_type2 = typename view_type2::value_type; using binary_op = Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; return Impl::adjacent_difference_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op()); } template ::value, int> = 0> auto adjacent_difference( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, BinaryOp bin_op) { namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); return Impl::adjacent_difference_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), bin_op); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template ::value && ::Kokkos::is_team_handle::value, int> = 0> KOKKOS_FUNCTION OutputIteratorType adjacent_difference( const TeamHandleType& teamHandle, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest) { using value_type1 = typename InputIteratorType::value_type; using value_type2 = typename OutputIteratorType::value_type; using binary_op = Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; return Impl::adjacent_difference_team_impl(teamHandle, first_from, last_from, first_dest, binary_op()); } template ::value && ::Kokkos::is_team_handle::value, int> = 0> KOKKOS_FUNCTION OutputIteratorType adjacent_difference(const TeamHandleType& teamHandle, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest, BinaryOp bin_op) { return Impl::adjacent_difference_team_impl(teamHandle, first_from, last_from, first_dest, bin_op); } template < typename TeamHandleType, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> KOKKOS_FUNCTION auto adjacent_difference( const TeamHandleType& teamHandle, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest) { namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); using view_type1 = ::Kokkos::View; using view_type2 = ::Kokkos::View; using value_type1 = typename view_type1::value_type; using value_type2 = typename view_type2::value_type; using binary_op = Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; return Impl::adjacent_difference_team_impl(teamHandle, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op()); } template < typename TeamHandleType, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename BinaryOp, std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> KOKKOS_FUNCTION auto adjacent_difference( const TeamHandleType& teamHandle, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, BinaryOp bin_op) { namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); return Impl::adjacent_difference_team_impl(teamHandle, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), bin_op); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_AdjacentFind.hpp000066400000000000000000000155021461675637500254560ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_ADJACENT_FIND_HPP #define KOKKOS_STD_ALGORITHMS_ADJACENT_FIND_HPP #include "impl/Kokkos_AdjacentFind.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // // overload set1 template < typename ExecutionSpace, typename IteratorType, std::enable_if_t, int> = 0> IteratorType adjacent_find(const ExecutionSpace& ex, IteratorType first, IteratorType last) { return Impl::adjacent_find_exespace_impl( "Kokkos::adjacent_find_iterator_api_default", ex, first, last); } template < typename ExecutionSpace, typename IteratorType, std::enable_if_t, int> = 0> IteratorType adjacent_find(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { return Impl::adjacent_find_exespace_impl(label, ex, first, last); } template < typename ExecutionSpace, typename DataType, typename... Properties, std::enable_if_t, int> = 0> auto adjacent_find(const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::adjacent_find_exespace_impl( "Kokkos::adjacent_find_view_api_default", ex, KE::begin(v), KE::end(v)); } template < typename ExecutionSpace, typename DataType, typename... Properties, std::enable_if_t, int> = 0> auto adjacent_find(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::adjacent_find_exespace_impl(label, ex, KE::begin(v), KE::end(v)); } // overload set2 template < typename ExecutionSpace, typename IteratorType, typename BinaryPredicateType, std::enable_if_t, int> = 0> IteratorType adjacent_find(const ExecutionSpace& ex, IteratorType first, IteratorType last, BinaryPredicateType pred) { return Impl::adjacent_find_exespace_impl( "Kokkos::adjacent_find_iterator_api_default", ex, first, last, pred); } template < typename ExecutionSpace, typename IteratorType, typename BinaryPredicateType, std::enable_if_t, int> = 0> IteratorType adjacent_find(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, BinaryPredicateType pred) { return Impl::adjacent_find_exespace_impl(label, ex, first, last, pred); } template < typename ExecutionSpace, typename DataType, typename... Properties, typename BinaryPredicateType, std::enable_if_t, int> = 0> auto adjacent_find(const ExecutionSpace& ex, const ::Kokkos::View& v, BinaryPredicateType pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::adjacent_find_exespace_impl( "Kokkos::adjacent_find_view_api_default", ex, KE::begin(v), KE::end(v), pred); } template < typename ExecutionSpace, typename DataType, typename... Properties, typename BinaryPredicateType, std::enable_if_t, int> = 0> auto adjacent_find(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, BinaryPredicateType pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::adjacent_find_exespace_impl(label, ex, KE::begin(v), KE::end(v), pred); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // // overload set1 template , int> = 0> KOKKOS_FUNCTION IteratorType adjacent_find(const TeamHandleType& teamHandle, IteratorType first, IteratorType last) { return Impl::adjacent_find_team_impl(teamHandle, first, last); } template , int> = 0> KOKKOS_FUNCTION auto adjacent_find( const TeamHandleType& teamHandle, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::adjacent_find_team_impl(teamHandle, KE::begin(v), KE::end(v)); } // overload set2 template , int> = 0> KOKKOS_FUNCTION IteratorType adjacent_find(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, BinaryPredicateType pred) { return Impl::adjacent_find_team_impl(teamHandle, first, last, pred); } template , int> = 0> KOKKOS_FUNCTION auto adjacent_find( const TeamHandleType& teamHandle, const ::Kokkos::View& v, BinaryPredicateType pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::adjacent_find_team_impl(teamHandle, KE::begin(v), KE::end(v), pred); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_AllOf.hpp000066400000000000000000000100051461675637500241320ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_ALL_OF_HPP #define KOKKOS_STD_ALGORITHMS_ALL_OF_HPP #include "impl/Kokkos_AllOfAnyOfNoneOf.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename InputIterator, typename Predicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool all_of(const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate predicate) { return Impl::all_of_exespace_impl("Kokkos::all_of_iterator_api_default", ex, first, last, predicate); } template < typename ExecutionSpace, typename InputIterator, typename Predicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool all_of(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate predicate) { return Impl::all_of_exespace_impl(label, ex, first, last, predicate); } template < typename ExecutionSpace, typename DataType, typename... Properties, typename Predicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool all_of(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::all_of_exespace_impl("Kokkos::all_of_view_api_default", ex, KE::cbegin(v), KE::cend(v), std::move(predicate)); } template < typename ExecutionSpace, typename DataType, typename... Properties, typename Predicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool all_of(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::all_of_exespace_impl(label, ex, KE::cbegin(v), KE::cend(v), std::move(predicate)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION bool all_of(const TeamHandleType& teamHandle, InputIterator first, InputIterator last, Predicate predicate) { return Impl::all_of_team_impl(teamHandle, first, last, predicate); } template , int> = 0> KOKKOS_FUNCTION bool all_of(const TeamHandleType& teamHandle, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::all_of_team_impl(teamHandle, KE::cbegin(v), KE::cend(v), std::move(predicate)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_AnyOf.hpp000066400000000000000000000100011461675637500241450ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_ANY_OF_HPP #define KOKKOS_STD_ALGORITHMS_ANY_OF_HPP #include "impl/Kokkos_AllOfAnyOfNoneOf.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename InputIterator, typename Predicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool any_of(const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate predicate) { return Impl::any_of_exespace_impl("Kokkos::any_of_view_api_default", ex, first, last, predicate); } template < typename ExecutionSpace, typename InputIterator, typename Predicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool any_of(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate predicate) { return Impl::any_of_exespace_impl(label, ex, first, last, predicate); } template < typename ExecutionSpace, typename DataType, typename... Properties, typename Predicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool any_of(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::any_of_exespace_impl("Kokkos::any_of_view_api_default", ex, KE::cbegin(v), KE::cend(v), std::move(predicate)); } template < typename ExecutionSpace, typename DataType, typename... Properties, typename Predicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool any_of(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::any_of_exespace_impl(label, ex, KE::cbegin(v), KE::cend(v), std::move(predicate)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION bool any_of(const TeamHandleType& teamHandle, InputIterator first, InputIterator last, Predicate predicate) { return Impl::any_of_team_impl(teamHandle, first, last, predicate); } template , int> = 0> KOKKOS_FUNCTION bool any_of(const TeamHandleType& teamHandle, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::any_of_team_impl(teamHandle, KE::cbegin(v), KE::cend(v), std::move(predicate)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_BeginEnd.hpp000066400000000000000000000045351461675637500246230ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_BEGIN_END_HPP #define KOKKOS_BEGIN_END_HPP #include #include "impl/Kokkos_RandomAccessIterator.hpp" #include "impl/Kokkos_Constraints.hpp" /// \file Kokkos_BeginEnd.hpp /// \brief Kokkos begin, end, cbegin, cend namespace Kokkos { namespace Experimental { template KOKKOS_INLINE_FUNCTION auto begin( const Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); using it_t = Impl::RandomAccessIterator>; return it_t(v); } template KOKKOS_INLINE_FUNCTION auto end( const Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); using it_t = Impl::RandomAccessIterator>; return it_t(v, v.extent(0)); } template KOKKOS_INLINE_FUNCTION auto cbegin( const Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); using ViewConstType = typename Kokkos::View::const_type; const ViewConstType cv = v; using it_t = Impl::RandomAccessIterator; return it_t(cv); } template KOKKOS_INLINE_FUNCTION auto cend( const Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); using ViewConstType = typename Kokkos::View::const_type; const ViewConstType cv = v; using it_t = Impl::RandomAccessIterator; return it_t(cv, cv.extent(0)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_Copy.hpp000066400000000000000000000105061461675637500240550ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_COPY_HPP #define KOKKOS_STD_ALGORITHMS_COPY_HPP #include "impl/Kokkos_CopyCopyN.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename InputIterator, typename OutputIterator, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy(const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { return Impl::copy_exespace_impl("Kokkos::copy_iterator_api_default", ex, first, last, d_first); } template < typename ExecutionSpace, typename InputIterator, typename OutputIterator, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { return Impl::copy_exespace_impl(label, ex, first, last, d_first); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy(const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); namespace KE = ::Kokkos::Experimental; return Impl::copy_exespace_impl("Kokkos::copy_view_api_default", ex, KE::cbegin(source), KE::cend(source), KE::begin(dest)); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); namespace KE = ::Kokkos::Experimental; return Impl::copy_exespace_impl(label, ex, KE::cbegin(source), KE::cend(source), KE::begin(dest)); } // // overload set accepting team handle // template , int> = 0> KOKKOS_FUNCTION OutputIterator copy(const TeamHandleType& teamHandle, InputIterator first, InputIterator last, OutputIterator d_first) { return Impl::copy_team_impl(teamHandle, first, last, d_first); } template , int> = 0> KOKKOS_FUNCTION auto copy( const TeamHandleType& teamHandle, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); namespace KE = ::Kokkos::Experimental; return Impl::copy_team_impl(teamHandle, KE::cbegin(source), KE::cend(source), KE::begin(dest)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_CopyBackward.hpp000066400000000000000000000105351461675637500255160ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_COPY_BACKWARD_HPP #define KOKKOS_STD_ALGORITHMS_COPY_BACKWARD_HPP #include "impl/Kokkos_CopyBackward.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename IteratorType1, typename IteratorType2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType2 copy_backward(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 d_last) { return Impl::copy_backward_exespace_impl( "Kokkos::copy_backward_iterator_api_default", ex, first, last, d_last); } template < typename ExecutionSpace, typename IteratorType1, typename IteratorType2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType2 copy_backward(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 d_last) { return Impl::copy_backward_exespace_impl(label, ex, first, last, d_last); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_backward(const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::copy_backward_exespace_impl( "Kokkos::copy_backward_view_api_default", ex, cbegin(source), cend(source), end(dest)); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_backward(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::copy_backward_exespace_impl(label, ex, cbegin(source), cend(source), end(dest)); } // // overload set accepting team handle // template , int> = 0> KOKKOS_FUNCTION IteratorType2 copy_backward(const TeamHandleType& teamHandle, IteratorType1 first, IteratorType1 last, IteratorType2 d_last) { return Impl::copy_backward_team_impl(teamHandle, first, last, d_last); } template , int> = 0> KOKKOS_FUNCTION auto copy_backward( const TeamHandleType& teamHandle, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::copy_backward_team_impl(teamHandle, cbegin(source), cend(source), end(dest)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_CopyIf.hpp000066400000000000000000000112731461675637500243360ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_COPY_IF_HPP #define KOKKOS_STD_ALGORITHMS_COPY_IF_HPP #include "impl/Kokkos_CopyIf.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename InputIterator, typename OutputIterator, typename Predicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy_if(const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first, Predicate pred) { return Impl::copy_if_exespace_impl("Kokkos::copy_if_iterator_api_default", ex, first, last, d_first, std::move(pred)); } template < typename ExecutionSpace, typename InputIterator, typename OutputIterator, typename Predicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy_if(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first, Predicate pred) { return Impl::copy_if_exespace_impl(label, ex, first, last, d_first, std::move(pred)); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename Predicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_if(const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest, Predicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::copy_if_exespace_impl("Kokkos::copy_if_view_api_default", ex, cbegin(source), cend(source), begin(dest), std::move(pred)); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename Predicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest, Predicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::copy_if_exespace_impl(label, ex, cbegin(source), cend(source), begin(dest), std::move(pred)); } // // overload set accepting team handle // template , int> = 0> KOKKOS_FUNCTION OutputIterator copy_if(const TeamHandleType& teamHandle, InputIterator first, InputIterator last, OutputIterator d_first, Predicate pred) { return Impl::copy_if_team_impl(teamHandle, first, last, d_first, std::move(pred)); } template , int> = 0> KOKKOS_FUNCTION auto copy_if( const TeamHandleType& teamHandle, const ::Kokkos::View& source, const ::Kokkos::View& dest, Predicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::copy_if_team_impl(teamHandle, cbegin(source), cend(source), begin(dest), std::move(pred)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_CopyN.hpp000066400000000000000000000106221461675637500241720ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_COPY_N_HPP #define KOKKOS_STD_ALGORITHMS_COPY_N_HPP #include "impl/Kokkos_CopyCopyN.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename InputIterator, typename Size, typename OutputIterator, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy_n(const ExecutionSpace& ex, InputIterator first, Size count, OutputIterator result) { return Impl::copy_n_exespace_impl("Kokkos::copy_n_iterator_api_default", ex, first, count, result); } template < typename ExecutionSpace, typename InputIterator, typename Size, typename OutputIterator, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy_n(const std::string& label, const ExecutionSpace& ex, InputIterator first, Size count, OutputIterator result) { return Impl::copy_n_exespace_impl(label, ex, first, count, result); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename Size, typename DataType2, typename... Properties2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_n(const ExecutionSpace& ex, const ::Kokkos::View& source, Size count, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); namespace KE = ::Kokkos::Experimental; return Impl::copy_n_exespace_impl("Kokkos::copy_n_view_api_default", ex, KE::cbegin(source), count, KE::begin(dest)); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename Size, typename DataType2, typename... Properties2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, Size count, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); namespace KE = ::Kokkos::Experimental; return Impl::copy_n_exespace_impl(label, ex, KE::cbegin(source), count, KE::begin(dest)); } // // overload set accepting team handle // template , int> = 0> KOKKOS_FUNCTION OutputIterator copy_n(const TeamHandleType& teamHandle, InputIterator first, Size count, OutputIterator result) { return Impl::copy_n_team_impl(teamHandle, first, count, result); } template , int> = 0> KOKKOS_FUNCTION auto copy_n( const TeamHandleType& teamHandle, const ::Kokkos::View& source, Size count, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); namespace KE = ::Kokkos::Experimental; return Impl::copy_n_team_impl(teamHandle, KE::cbegin(source), count, KE::begin(dest)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_Count.hpp000066400000000000000000000101311461675637500242250ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_COUNT_HPP #define KOKKOS_STD_ALGORITHMS_COUNT_HPP #include "impl/Kokkos_CountCountIf.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename IteratorType, typename T, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> typename IteratorType::difference_type count(const ExecutionSpace& ex, IteratorType first, IteratorType last, const T& value) { return Impl::count_exespace_impl("Kokkos::count_iterator_api_default", ex, first, last, value); } template < typename ExecutionSpace, typename IteratorType, typename T, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> typename IteratorType::difference_type count(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, const T& value) { return Impl::count_exespace_impl(label, ex, first, last, value); } template < typename ExecutionSpace, typename DataType, typename... Properties, typename T, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto count(const ExecutionSpace& ex, const ::Kokkos::View& v, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::count_exespace_impl("Kokkos::count_view_api_default", ex, KE::cbegin(v), KE::cend(v), value); } template < typename ExecutionSpace, typename DataType, typename... Properties, typename T, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto count(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::count_exespace_impl(label, ex, KE::cbegin(v), KE::cend(v), value); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION typename IteratorType::difference_type count( const TeamHandleType& teamHandle, IteratorType first, IteratorType last, const T& value) { return Impl::count_team_impl(teamHandle, first, last, value); } template , int> = 0> KOKKOS_FUNCTION auto count(const TeamHandleType& teamHandle, const ::Kokkos::View& v, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::count_team_impl(teamHandle, KE::cbegin(v), KE::cend(v), value); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_CountIf.hpp000066400000000000000000000106071461675637500245140ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_COUNT_IF_HPP #define KOKKOS_STD_ALGORITHMS_COUNT_IF_HPP #include "impl/Kokkos_CountCountIf.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename IteratorType, typename Predicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> typename IteratorType::difference_type count_if(const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { return Impl::count_if_exespace_impl("Kokkos::count_if_iterator_api_default", ex, first, last, std::move(predicate)); } template < typename ExecutionSpace, typename IteratorType, typename Predicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> typename IteratorType::difference_type count_if(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { return Impl::count_if_exespace_impl(label, ex, first, last, std::move(predicate)); } template < typename ExecutionSpace, typename DataType, typename... Properties, typename Predicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto count_if(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::count_if_exespace_impl("Kokkos::count_if_view_api_default", ex, KE::cbegin(v), KE::cend(v), std::move(predicate)); } template < typename ExecutionSpace, typename DataType, typename... Properties, typename Predicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto count_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::count_if_exespace_impl(label, ex, KE::cbegin(v), KE::cend(v), std::move(predicate)); } // // overload set accepting team handle // template , int> = 0> KOKKOS_FUNCTION typename IteratorType::difference_type count_if( const TeamHandleType& teamHandle, IteratorType first, IteratorType last, Predicate predicate) { return Impl::count_if_team_impl(teamHandle, first, last, std::move(predicate)); } template , int> = 0> KOKKOS_FUNCTION auto count_if(const TeamHandleType& teamHandle, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::count_if_team_impl(teamHandle, KE::cbegin(v), KE::cend(v), std::move(predicate)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_Distance.hpp000066400000000000000000000023271461675637500246770ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_DISTANCE_HPP #define KOKKOS_STD_ALGORITHMS_DISTANCE_HPP #include "impl/Kokkos_Constraints.hpp" #include "impl/Kokkos_RandomAccessIterator.hpp" namespace Kokkos { namespace Experimental { template KOKKOS_INLINE_FUNCTION constexpr typename IteratorType::difference_type distance(IteratorType first, IteratorType last) { static_assert( ::Kokkos::Experimental::Impl::are_random_access_iterators< IteratorType>::value, "Kokkos::Experimental::distance: only implemented for random access " "iterators."); return last - first; } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_Equal.hpp000066400000000000000000000326661461675637500242250ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_EQUAL_HPP #define KOKKOS_STD_ALGORITHMS_EQUAL_HPP #include "impl/Kokkos_Equal.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template && Kokkos::is_execution_space_v, int> = 0> bool equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2) { return Impl::equal_exespace_impl("Kokkos::equal_iterator_api_default", ex, first1, last1, first2); } template && ::Kokkos:: is_execution_space_v, int> = 0> bool equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2) { return Impl::equal_exespace_impl(label, ex, first1, last1, first2); } template && ::Kokkos:: is_execution_space_v, int> = 0> bool equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, BinaryPredicateType predicate) { return Impl::equal_exespace_impl("Kokkos::equal_iterator_api_default", ex, first1, last1, first2, std::move(predicate)); } template && ::Kokkos:: is_execution_space_v, int> = 0> bool equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, BinaryPredicateType predicate) { return Impl::equal_exespace_impl(label, ex, first1, last1, first2, std::move(predicate)); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool equal(const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; return Impl::equal_exespace_impl("Kokkos::equal_view_api_default", ex, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2)); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool equal(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; return Impl::equal_exespace_impl(label, ex, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2)); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename BinaryPredicateType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool equal(const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2, BinaryPredicateType predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; return Impl::equal_exespace_impl("Kokkos::equal_view_api_default", ex, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), std::move(predicate)); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename BinaryPredicateType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool equal(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2, BinaryPredicateType predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; return Impl::equal_exespace_impl(label, ex, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), std::move(predicate)); } template && ::Kokkos:: is_execution_space_v, int> = 0> bool equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { return Impl::equal_exespace_impl("Kokkos::equal_iterator_api_default", ex, first1, last1, first2, last2); } template && ::Kokkos:: is_execution_space_v, int> = 0> bool equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { return Impl::equal_exespace_impl(label, ex, first1, last1, first2, last2); } template && ::Kokkos:: is_execution_space_v, int> = 0> bool equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, BinaryPredicateType predicate) { return Impl::equal_exespace_impl("Kokkos::equal_iterator_api_default", ex, first1, last1, first2, last2, std::move(predicate)); } template && ::Kokkos:: is_execution_space_v, int> = 0> bool equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, BinaryPredicateType predicate) { return Impl::equal_exespace_impl(label, ex, first1, last1, first2, last2, std::move(predicate)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template && ::Kokkos:: is_team_handle_v, int> = 0> KOKKOS_FUNCTION bool equal(const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2) { return Impl::equal_team_impl(teamHandle, first1, last1, first2); } template && ::Kokkos:: is_team_handle_v, int> = 0> KOKKOS_FUNCTION bool equal(const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, BinaryPredicateType predicate) { return Impl::equal_team_impl(teamHandle, first1, last1, first2, std::move(predicate)); } template , int> = 0> KOKKOS_FUNCTION bool equal( const TeamHandleType& teamHandle, const ::Kokkos::View& view1, const ::Kokkos::View& view2) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; return Impl::equal_team_impl(teamHandle, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2)); } template , int> = 0> KOKKOS_FUNCTION bool equal( const TeamHandleType& teamHandle, const ::Kokkos::View& view1, const ::Kokkos::View& view2, BinaryPredicateType predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; return Impl::equal_team_impl(teamHandle, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), std::move(predicate)); } template && ::Kokkos:: is_team_handle_v, int> = 0> KOKKOS_FUNCTION bool equal(const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { return Impl::equal_team_impl(teamHandle, first1, last1, first2, last2); } template && ::Kokkos:: is_team_handle_v, int> = 0> KOKKOS_FUNCTION bool equal(const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, BinaryPredicateType predicate) { return Impl::equal_team_impl(teamHandle, first1, last1, first2, last2, std::move(predicate)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_ExclusiveScan.hpp000066400000000000000000000310071461675637500257160ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_EXCLUSIVE_SCAN_HPP #define KOKKOS_STD_ALGORITHMS_EXCLUSIVE_SCAN_HPP #include "impl/Kokkos_ExclusiveScan.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // // overload set 1 template && :: Kokkos::is_execution_space_v, int> = 0> OutputIteratorType exclusive_scan(const ExecutionSpace& ex, InputIteratorType first, InputIteratorType last, OutputIteratorType first_dest, ValueType init_value) { static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); return Impl::exclusive_scan_default_op_exespace_impl( "Kokkos::exclusive_scan_default_functors_iterator_api", ex, first, last, first_dest, std::move(init_value)); } template && :: Kokkos::is_execution_space_v, int> = 0> OutputIteratorType exclusive_scan(const std::string& label, const ExecutionSpace& ex, InputIteratorType first, InputIteratorType last, OutputIteratorType first_dest, ValueType init_value) { static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); return Impl::exclusive_scan_default_op_exespace_impl( label, ex, first, last, first_dest, std::move(init_value)); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto exclusive_scan(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, ValueType init_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; return Impl::exclusive_scan_default_op_exespace_impl( "Kokkos::exclusive_scan_default_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), std::move(init_value)); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto exclusive_scan(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, ValueType init_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; return Impl::exclusive_scan_default_op_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), std::move(init_value)); } // overload set 2 template && :: Kokkos::is_execution_space_v, int> = 0> OutputIteratorType exclusive_scan(const ExecutionSpace& ex, InputIteratorType first, InputIteratorType last, OutputIteratorType first_dest, ValueType init_value, BinaryOpType bop) { Impl::static_assert_is_not_openmptarget(ex); static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); return Impl::exclusive_scan_custom_op_exespace_impl( "Kokkos::exclusive_scan_custom_functors_iterator_api", ex, first, last, first_dest, std::move(init_value), bop); } template && :: Kokkos::is_execution_space_v, int> = 0> OutputIteratorType exclusive_scan(const std::string& label, const ExecutionSpace& ex, InputIteratorType first, InputIteratorType last, OutputIteratorType first_dest, ValueType init_value, BinaryOpType bop) { Impl::static_assert_is_not_openmptarget(ex); static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); return Impl::exclusive_scan_custom_op_exespace_impl( label, ex, first, last, first_dest, std::move(init_value), bop); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename ValueType, typename BinaryOpType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto exclusive_scan(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, ValueType init_value, BinaryOpType bop) { Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; return Impl::exclusive_scan_custom_op_exespace_impl( "Kokkos::exclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), std::move(init_value), bop); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename ValueType, typename BinaryOpType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto exclusive_scan(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, ValueType init_value, BinaryOpType bop) { Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; return Impl::exclusive_scan_custom_op_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), std::move(init_value), bop); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // // overload set 1 template && Kokkos::is_team_handle_v, int> = 0> KOKKOS_FUNCTION OutputIteratorType exclusive_scan(const TeamHandleType& teamHandle, InputIteratorType first, InputIteratorType last, OutputIteratorType first_dest, ValueType init_value) { static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); return Impl::exclusive_scan_default_op_team_impl( teamHandle, first, last, first_dest, std::move(init_value)); } template , int> = 0> KOKKOS_FUNCTION auto exclusive_scan( const TeamHandleType& teamHandle, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, ValueType init_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; return Impl::exclusive_scan_default_op_team_impl( teamHandle, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), std::move(init_value)); } // overload set 2 template && Kokkos::is_team_handle_v, int> = 0> KOKKOS_FUNCTION OutputIteratorType exclusive_scan(const TeamHandleType& teamHandle, InputIteratorType first, InputIteratorType last, OutputIteratorType first_dest, ValueType init_value, BinaryOpType bop) { Impl::static_assert_is_not_openmptarget(teamHandle); static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); return Impl::exclusive_scan_custom_op_team_impl( teamHandle, first, last, first_dest, std::move(init_value), bop); } template , int> = 0> KOKKOS_FUNCTION auto exclusive_scan( const TeamHandleType& teamHandle, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, ValueType init_value, BinaryOpType bop) { Impl::static_assert_is_not_openmptarget(teamHandle); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; return Impl::exclusive_scan_custom_op_team_impl( teamHandle, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), std::move(init_value), bop); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_Fill.hpp000066400000000000000000000067201461675637500240340ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_FILL_HPP #define KOKKOS_STD_ALGORITHMS_FILL_HPP #include "impl/Kokkos_FillFillN.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename IteratorType, typename T, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void fill(const ExecutionSpace& ex, IteratorType first, IteratorType last, const T& value) { Impl::fill_exespace_impl("Kokkos::fill_iterator_api_default", ex, first, last, value); } template < typename ExecutionSpace, typename IteratorType, typename T, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void fill(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, const T& value) { Impl::fill_exespace_impl(label, ex, first, last, value); } template < typename ExecutionSpace, typename DataType, typename... Properties, typename T, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void fill(const ExecutionSpace& ex, const ::Kokkos::View& view, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::fill_exespace_impl("Kokkos::fill_view_api_default", ex, begin(view), end(view), value); } template < typename ExecutionSpace, typename DataType, typename... Properties, typename T, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void fill(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::fill_exespace_impl(label, ex, begin(view), end(view), value); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION void fill(const TeamHandleType& th, IteratorType first, IteratorType last, const T& value) { Impl::fill_team_impl(th, first, last, value); } template , int> = 0> KOKKOS_FUNCTION void fill(const TeamHandleType& th, const ::Kokkos::View& view, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::fill_team_impl(th, begin(view), end(view), value); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_FillN.hpp000066400000000000000000000073521461675637500241540ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_FILL_N_HPP #define KOKKOS_STD_ALGORITHMS_FILL_N_HPP #include "impl/Kokkos_FillFillN.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { template < typename ExecutionSpace, typename IteratorType, typename SizeType, typename T, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType fill_n(const ExecutionSpace& ex, IteratorType first, SizeType n, const T& value) { return Impl::fill_n_exespace_impl("Kokkos::fill_n_iterator_api_default", ex, first, n, value); } template < typename ExecutionSpace, typename IteratorType, typename SizeType, typename T, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType fill_n(const std::string& label, const ExecutionSpace& ex, IteratorType first, SizeType n, const T& value) { return Impl::fill_n_exespace_impl(label, ex, first, n, value); } template < typename ExecutionSpace, typename DataType, typename... Properties, typename SizeType, typename T, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto fill_n(const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType n, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::fill_n_exespace_impl("Kokkos::fill_n_view_api_default", ex, begin(view), n, value); } template < typename ExecutionSpace, typename DataType, typename... Properties, typename SizeType, typename T, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto fill_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType n, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::fill_n_exespace_impl(label, ex, begin(view), n, value); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION IteratorType fill_n(const TeamHandleType& th, IteratorType first, SizeType n, const T& value) { return Impl::fill_n_team_impl(th, first, n, value); } template , int> = 0> KOKKOS_FUNCTION auto fill_n(const TeamHandleType& th, const ::Kokkos::View& view, SizeType n, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::fill_n_team_impl(th, begin(view), n, value); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_Find.hpp000066400000000000000000000075421461675637500240310ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_FIND_HPP #define KOKKOS_STD_ALGORITHMS_FIND_HPP #include "impl/Kokkos_FindIfOrNot.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename InputIterator, typename T, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> InputIterator find(const ExecutionSpace& ex, InputIterator first, InputIterator last, const T& value) { return Impl::find_exespace_impl("Kokkos::find_iterator_api_default", ex, first, last, value); } template < typename ExecutionSpace, typename InputIterator, typename T, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> InputIterator find(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, const T& value) { return Impl::find_exespace_impl(label, ex, first, last, value); } template < typename ExecutionSpace, typename DataType, typename... Properties, typename T, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find(const ExecutionSpace& ex, const ::Kokkos::View& view, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; return Impl::find_exespace_impl("Kokkos::find_view_api_default", ex, KE::begin(view), KE::end(view), value); } template < typename ExecutionSpace, typename DataType, typename... Properties, typename T, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; return Impl::find_exespace_impl(label, ex, KE::begin(view), KE::end(view), value); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION InputIterator find(const TeamHandleType& teamHandle, InputIterator first, InputIterator last, const T& value) { return Impl::find_team_impl(teamHandle, first, last, value); } template , int> = 0> KOKKOS_FUNCTION auto find(const TeamHandleType& teamHandle, const ::Kokkos::View& view, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; return Impl::find_team_impl(teamHandle, KE::begin(view), KE::end(view), value); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_FindEnd.hpp000066400000000000000000000223501461675637500244520ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_FIND_END_HPP #define KOKKOS_STD_ALGORITHMS_FIND_END_HPP #include "impl/Kokkos_FindEnd.hpp" #include "Kokkos_Equal.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // // overload set 1: no binary predicate passed template < typename ExecutionSpace, typename IteratorType1, typename IteratorType2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_end(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { return Impl::find_end_exespace_impl("Kokkos::find_end_iterator_api_default", ex, first, last, s_first, s_last); } template < typename ExecutionSpace, typename IteratorType1, typename IteratorType2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_end(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { return Impl::find_end_exespace_impl(label, ex, first, last, s_first, s_last); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_end(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; return Impl::find_end_exespace_impl("Kokkos::find_end_view_api_default", ex, KE::begin(view), KE::end(view), KE::begin(s_view), KE::end(s_view)); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_end(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; return Impl::find_end_exespace_impl(label, ex, KE::begin(view), KE::end(view), KE::begin(s_view), KE::end(s_view)); } // overload set 2: binary predicate passed template < typename ExecutionSpace, typename IteratorType1, typename IteratorType2, typename BinaryPredicateType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_end(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { return Impl::find_end_exespace_impl("Kokkos::find_end_iterator_api_default", ex, first, last, s_first, s_last, pred); } template < typename ExecutionSpace, typename IteratorType1, typename IteratorType2, typename BinaryPredicateType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_end(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { return Impl::find_end_exespace_impl(label, ex, first, last, s_first, s_last, pred); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename BinaryPredicateType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_end(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, const BinaryPredicateType& pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; return Impl::find_end_exespace_impl("Kokkos::find_end_view_api_default", ex, KE::begin(view), KE::end(view), KE::begin(s_view), KE::end(s_view), pred); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename BinaryPredicateType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_end(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, const BinaryPredicateType& pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; return Impl::find_end_exespace_impl(label, ex, KE::begin(view), KE::end(view), KE::begin(s_view), KE::end(s_view), pred); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // // overload set 1: no binary predicate passed template , int> = 0> KOKKOS_FUNCTION IteratorType1 find_end(const TeamHandleType& teamHandle, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { return Impl::find_end_team_impl(teamHandle, first, last, s_first, s_last); } template , int> = 0> KOKKOS_FUNCTION auto find_end( const TeamHandleType& teamHandle, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; return Impl::find_end_team_impl(teamHandle, KE::begin(view), KE::end(view), KE::begin(s_view), KE::end(s_view)); } // overload set 2: binary predicate passed template , int> = 0> KOKKOS_FUNCTION IteratorType1 find_end(const TeamHandleType& teamHandle, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { return Impl::find_end_team_impl(teamHandle, first, last, s_first, s_last, pred); } template , int> = 0> KOKKOS_FUNCTION auto find_end( const TeamHandleType& teamHandle, const ::Kokkos::View& view, const ::Kokkos::View& s_view, const BinaryPredicateType& pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; return Impl::find_end_team_impl(teamHandle, KE::begin(view), KE::end(view), KE::begin(s_view), KE::end(s_view), pred); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_FindFirstOf.hpp000066400000000000000000000233001461675637500253140ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_FIND_FIRST_OF_HPP #define KOKKOS_STD_ALGORITHMS_FIND_FIRST_OF_HPP #include "impl/Kokkos_FindFirstOf.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // // overload set 1: no binary predicate passed template < typename ExecutionSpace, typename IteratorType1, typename IteratorType2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_first_of(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { return Impl::find_first_of_exespace_impl( "Kokkos::find_first_of_iterator_api_default", ex, first, last, s_first, s_last); } template < typename ExecutionSpace, typename IteratorType1, typename IteratorType2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_first_of(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { return Impl::find_first_of_exespace_impl(label, ex, first, last, s_first, s_last); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_first_of(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; return Impl::find_first_of_exespace_impl( "Kokkos::find_first_of_view_api_default", ex, KE::begin(view), KE::end(view), KE::begin(s_view), KE::end(s_view)); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_first_of(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; return Impl::find_first_of_exespace_impl(label, ex, KE::begin(view), KE::end(view), KE::begin(s_view), KE::end(s_view)); } // overload set 2: binary predicate passed template < typename ExecutionSpace, typename IteratorType1, typename IteratorType2, typename BinaryPredicateType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_first_of(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { return Impl::find_first_of_exespace_impl( "Kokkos::find_first_of_iterator_api_default", ex, first, last, s_first, s_last, pred); } template < typename ExecutionSpace, typename IteratorType1, typename IteratorType2, typename BinaryPredicateType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_first_of(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { return Impl::find_first_of_exespace_impl(label, ex, first, last, s_first, s_last, pred); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename BinaryPredicateType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_first_of(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, const BinaryPredicateType& pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; return Impl::find_first_of_exespace_impl( "Kokkos::find_first_of_view_api_default", ex, KE::begin(view), KE::end(view), KE::begin(s_view), KE::end(s_view), pred); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename BinaryPredicateType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_first_of(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, const BinaryPredicateType& pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; return Impl::find_first_of_exespace_impl(label, ex, KE::begin(view), KE::end(view), KE::begin(s_view), KE::end(s_view), pred); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // // overload set 1: no binary predicate passed template , int> = 0> KOKKOS_FUNCTION IteratorType1 find_first_of(const TeamHandleType& teamHandle, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { return Impl::find_first_of_team_impl(teamHandle, first, last, s_first, s_last); } template , int> = 0> KOKKOS_FUNCTION auto find_first_of( const TeamHandleType& teamHandle, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; return Impl::find_first_of_team_impl(teamHandle, KE::begin(view), KE::end(view), KE::begin(s_view), KE::end(s_view)); } // overload set 2: binary predicate passed template , int> = 0> KOKKOS_FUNCTION IteratorType1 find_first_of(const TeamHandleType& teamHandle, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { return Impl::find_first_of_team_impl(teamHandle, first, last, s_first, s_last, pred); } template , int> = 0> KOKKOS_FUNCTION auto find_first_of( const TeamHandleType& teamHandle, const ::Kokkos::View& view, const ::Kokkos::View& s_view, const BinaryPredicateType& pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; return Impl::find_first_of_team_impl(teamHandle, KE::begin(view), KE::end(view), KE::begin(s_view), KE::end(s_view), pred); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_FindIf.hpp000066400000000000000000000104611461675637500243020ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_FIND_IF_HPP #define KOKKOS_STD_ALGORITHMS_FIND_IF_HPP #include "impl/Kokkos_FindIfOrNot.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename IteratorType, typename PredicateType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType find_if(const ExecutionSpace& ex, IteratorType first, IteratorType last, PredicateType predicate) { return Impl::find_if_or_not_exespace_impl( "Kokkos::find_if_iterator_api_default", ex, first, last, std::move(predicate)); } template < typename ExecutionSpace, typename IteratorType, typename PredicateType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType find_if(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, PredicateType predicate) { return Impl::find_if_or_not_exespace_impl(label, ex, first, last, std::move(predicate)); } template ::value, int> = 0> auto find_if(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::find_if_or_not_exespace_impl( "Kokkos::find_if_view_api_default", ex, KE::begin(v), KE::end(v), std::move(predicate)); } template ::value, int> = 0> auto find_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::find_if_or_not_exespace_impl( label, ex, KE::begin(v), KE::end(v), std::move(predicate)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION IteratorType find_if(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, PredicateType predicate) { return Impl::find_if_or_not_team_impl(teamHandle, first, last, std::move(predicate)); } template < typename TeamHandleType, typename DataType, typename... Properties, typename Predicate, std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> KOKKOS_FUNCTION auto find_if(const TeamHandleType& teamHandle, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::find_if_or_not_team_impl(teamHandle, KE::begin(v), KE::end(v), std::move(predicate)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_FindIfNot.hpp000066400000000000000000000104161461675637500247630ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_FIND_IF_NOT_HPP #define KOKKOS_STD_ALGORITHMS_FIND_IF_NOT_HPP #include "impl/Kokkos_FindIfOrNot.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename IteratorType, typename Predicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType find_if_not(const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { return Impl::find_if_or_not_exespace_impl( "Kokkos::find_if_not_iterator_api_default", ex, first, last, std::move(predicate)); } template < typename ExecutionSpace, typename IteratorType, typename Predicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType find_if_not(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { return Impl::find_if_or_not_exespace_impl(label, ex, first, last, std::move(predicate)); } template ::value, int> = 0> auto find_if_not(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::find_if_or_not_exespace_impl( "Kokkos::find_if_not_view_api_default", ex, KE::begin(v), KE::end(v), std::move(predicate)); } template ::value, int> = 0> auto find_if_not(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::find_if_or_not_exespace_impl( label, ex, KE::begin(v), KE::end(v), std::move(predicate)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION IteratorType find_if_not(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, Predicate predicate) { return Impl::find_if_or_not_team_impl(teamHandle, first, last, std::move(predicate)); } template < typename TeamHandleType, typename DataType, typename... Properties, typename Predicate, std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> KOKKOS_FUNCTION auto find_if_not( const TeamHandleType& teamHandle, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::find_if_or_not_team_impl( teamHandle, KE::begin(v), KE::end(v), std::move(predicate)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_ForEach.hpp000066400000000000000000000104041461675637500244470ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_FOR_EACH_HPP #define KOKKOS_STD_ALGORITHMS_FOR_EACH_HPP #include "impl/Kokkos_ForEachForEachN.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < class ExecutionSpace, class IteratorType, class UnaryFunctorType, std::enable_if_t, int> = 0> UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, UnaryFunctorType functor) { return Impl::for_each_exespace_impl(label, ex, first, last, std::move(functor)); } template < class ExecutionSpace, class IteratorType, class UnaryFunctorType, std::enable_if_t, int> = 0> UnaryFunctorType for_each(const ExecutionSpace& ex, IteratorType first, IteratorType last, UnaryFunctorType functor) { return Impl::for_each_exespace_impl("Kokkos::for_each_iterator_api_default", ex, first, last, std::move(functor)); } template < class ExecutionSpace, class DataType, class... Properties, class UnaryFunctorType, std::enable_if_t, int> = 0> UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::for_each_exespace_impl(label, ex, KE::begin(v), KE::end(v), std::move(functor)); } template < class ExecutionSpace, class DataType, class... Properties, class UnaryFunctorType, std::enable_if_t, int> = 0> UnaryFunctorType for_each(const ExecutionSpace& ex, const ::Kokkos::View& v, UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::for_each_exespace_impl("Kokkos::for_each_view_api_default", ex, KE::begin(v), KE::end(v), std::move(functor)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION UnaryFunctorType for_each(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, UnaryFunctorType functor) { return Impl::for_each_team_impl(teamHandle, first, last, std::move(functor)); } template , int> = 0> KOKKOS_FUNCTION UnaryFunctorType for_each(const TeamHandleType& teamHandle, const ::Kokkos::View& v, UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::for_each_team_impl(teamHandle, KE::begin(v), KE::end(v), std::move(functor)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_ForEachN.hpp000066400000000000000000000104131461675637500245650ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_FOR_EACH_N_HPP #define KOKKOS_STD_ALGORITHMS_FOR_EACH_N_HPP #include "impl/Kokkos_ForEachForEachN.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < class ExecutionSpace, class IteratorType, class SizeType, class UnaryFunctorType, std::enable_if_t, int> = 0> IteratorType for_each_n(const std::string& label, const ExecutionSpace& ex, IteratorType first, SizeType n, UnaryFunctorType functor) { return Impl::for_each_n_exespace_impl(label, ex, first, n, std::move(functor)); } template < class ExecutionSpace, class IteratorType, class SizeType, class UnaryFunctorType, std::enable_if_t, int> = 0> IteratorType for_each_n(const ExecutionSpace& ex, IteratorType first, SizeType n, UnaryFunctorType functor) { return Impl::for_each_n_exespace_impl( "Kokkos::for_each_n_iterator_api_default", ex, first, n, std::move(functor)); } template < class ExecutionSpace, class DataType, class... Properties, class SizeType, class UnaryFunctorType, std::enable_if_t, int> = 0> auto for_each_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, SizeType n, UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::for_each_n_exespace_impl(label, ex, KE::begin(v), n, std::move(functor)); } template < class ExecutionSpace, class DataType, class... Properties, class SizeType, class UnaryFunctorType, std::enable_if_t, int> = 0> auto for_each_n(const ExecutionSpace& ex, const ::Kokkos::View& v, SizeType n, UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::for_each_n_exespace_impl("Kokkos::for_each_n_view_api_default", ex, KE::begin(v), n, std::move(functor)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION IteratorType for_each_n(const TeamHandleType& teamHandle, IteratorType first, SizeType n, UnaryFunctorType functor) { return Impl::for_each_n_team_impl(teamHandle, first, n, std::move(functor)); } template , int> = 0> KOKKOS_FUNCTION auto for_each_n( const TeamHandleType& teamHandle, const ::Kokkos::View& v, SizeType n, UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::for_each_n_team_impl(teamHandle, KE::begin(v), n, std::move(functor)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_Generate.hpp000066400000000000000000000072011461675637500246730ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_GENERATE_HPP #define KOKKOS_STD_ALGORITHMS_GENERATE_HPP #include "impl/Kokkos_GenerateGenerateN.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template , int> = 0> void generate(const ExecutionSpace& ex, IteratorType first, IteratorType last, Generator g) { Impl::generate_exespace_impl("Kokkos::generate_iterator_api_default", ex, first, last, std::move(g)); } template , int> = 0> void generate(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, Generator g) { Impl::generate_exespace_impl(label, ex, first, last, std::move(g)); } template , int> = 0> void generate(const ExecutionSpace& ex, const ::Kokkos::View& view, Generator g) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::generate_exespace_impl("Kokkos::generate_view_api_default", ex, begin(view), end(view), std::move(g)); } template , int> = 0> void generate(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, Generator g) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::generate_exespace_impl(label, ex, begin(view), end(view), std::move(g)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION void generate(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, Generator g) { Impl::generate_team_impl(teamHandle, first, last, std::move(g)); } template , int> = 0> KOKKOS_FUNCTION void generate( const TeamHandleType& teamHandle, const ::Kokkos::View& view, Generator g) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::generate_team_impl(teamHandle, begin(view), end(view), std::move(g)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_GenerateN.hpp000066400000000000000000000077301461675637500250200ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_GENERATE_N_HPP #define KOKKOS_STD_ALGORITHMS_GENERATE_N_HPP #include "impl/Kokkos_GenerateGenerateN.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template , int> = 0> IteratorType generate_n(const ExecutionSpace& ex, IteratorType first, Size count, Generator g) { return Impl::generate_n_exespace_impl( "Kokkos::generate_n_iterator_api_default", ex, first, count, std::move(g)); } template , int> = 0> IteratorType generate_n(const std::string& label, const ExecutionSpace& ex, IteratorType first, Size count, Generator g) { return Impl::generate_n_exespace_impl(label, ex, first, count, std::move(g)); } template , int> = 0> auto generate_n(const ExecutionSpace& ex, const ::Kokkos::View& view, Size count, Generator g) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::generate_n_exespace_impl("Kokkos::generate_n_view_api_default", ex, begin(view), count, std::move(g)); } template , int> = 0> auto generate_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, Size count, Generator g) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::generate_n_exespace_impl(label, ex, begin(view), count, std::move(g)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION IteratorType generate_n(const TeamHandleType& teamHandle, IteratorType first, Size count, Generator g) { return Impl::generate_n_team_impl(teamHandle, first, count, std::move(g)); } template , int> = 0> KOKKOS_FUNCTION auto generate_n( const TeamHandleType& teamHandle, const ::Kokkos::View& view, Size count, Generator g) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::generate_n_team_impl(teamHandle, begin(view), count, std::move(g)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_InclusiveScan.hpp000066400000000000000000000373131461675637500257160ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_INCLUSIVE_SCAN_HPP #define KOKKOS_STD_ALGORITHMS_INCLUSIVE_SCAN_HPP #include "impl/Kokkos_InclusiveScan.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // // overload set 1 template && :: Kokkos::is_execution_space_v, int> = 0> OutputIteratorType inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, InputIteratorType last, OutputIteratorType first_dest) { return Impl::inclusive_scan_default_op_exespace_impl( "Kokkos::inclusive_scan_default_functors_iterator_api", ex, first, last, first_dest); } template && :: Kokkos::is_execution_space_v, int> = 0> OutputIteratorType inclusive_scan(const std::string& label, const ExecutionSpace& ex, InputIteratorType first, InputIteratorType last, OutputIteratorType first_dest) { return Impl::inclusive_scan_default_op_exespace_impl(label, ex, first, last, first_dest); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan( const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; return Impl::inclusive_scan_default_op_exespace_impl( "Kokkos::inclusive_scan_default_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest)); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; return Impl::inclusive_scan_default_op_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest)); } // overload set 2 (accepting custom binary op) template && :: Kokkos::is_execution_space_v, int> = 0> OutputIteratorType inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, InputIteratorType last, OutputIteratorType first_dest, BinaryOp binary_op) { return Impl::inclusive_scan_custom_binary_op_exespace_impl( "Kokkos::inclusive_scan_custom_functors_iterator_api", ex, first, last, first_dest, binary_op); } template && :: Kokkos::is_execution_space_v, int> = 0> OutputIteratorType inclusive_scan( const std::string& label, const ExecutionSpace& ex, InputIteratorType first, InputIteratorType last, OutputIteratorType first_dest, BinaryOp binary_op) { return Impl::inclusive_scan_custom_binary_op_exespace_impl( label, ex, first, last, first_dest, binary_op); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename BinaryOp, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, BinaryOp binary_op) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; return Impl::inclusive_scan_custom_binary_op_exespace_impl( "Kokkos::inclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename BinaryOp, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, BinaryOp binary_op) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; return Impl::inclusive_scan_custom_binary_op_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op); } // overload set 3 template && :: Kokkos::is_execution_space_v, int> = 0> OutputIteratorType inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, InputIteratorType last, OutputIteratorType first_dest, BinaryOp binary_op, ValueType init_value) { static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); return Impl::inclusive_scan_custom_binary_op_exespace_impl( "Kokkos::inclusive_scan_custom_functors_iterator_api", ex, first, last, first_dest, binary_op, std::move(init_value)); } template && :: Kokkos::is_execution_space_v, int> = 0> OutputIteratorType inclusive_scan(const std::string& label, const ExecutionSpace& ex, InputIteratorType first, InputIteratorType last, OutputIteratorType first_dest, BinaryOp binary_op, ValueType init_value) { static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); return Impl::inclusive_scan_custom_binary_op_exespace_impl( label, ex, first, last, first_dest, binary_op, std::move(init_value)); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename BinaryOp, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, BinaryOp binary_op, ValueType init_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; return Impl::inclusive_scan_custom_binary_op_exespace_impl( "Kokkos::inclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op, std::move(init_value)); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename BinaryOp, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, BinaryOp binary_op, ValueType init_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; return Impl::inclusive_scan_custom_binary_op_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op, std::move(init_value)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // // overload set 1 template && :: Kokkos::is_team_handle_v, int> = 0> KOKKOS_FUNCTION OutputIteratorType inclusive_scan(const TeamHandleType& teamHandle, InputIteratorType first, InputIteratorType last, OutputIteratorType first_dest) { return Impl::inclusive_scan_default_op_team_impl(teamHandle, first, last, first_dest); } template , int> = 0> KOKKOS_FUNCTION auto inclusive_scan( const TeamHandleType& teamHandle, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; return Impl::inclusive_scan_default_op_team_impl( teamHandle, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest)); } // overload set 2 (accepting custom binary op) template && :: Kokkos::is_team_handle_v, int> = 0> KOKKOS_FUNCTION OutputIteratorType inclusive_scan( const TeamHandleType& teamHandle, InputIteratorType first, InputIteratorType last, OutputIteratorType first_dest, BinaryOp binary_op) { return Impl::inclusive_scan_custom_binary_op_team_impl( teamHandle, first, last, first_dest, binary_op); } template , int> = 0> KOKKOS_FUNCTION auto inclusive_scan( const TeamHandleType& teamHandle, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, BinaryOp binary_op) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; return Impl::inclusive_scan_custom_binary_op_team_impl( teamHandle, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op); } // overload set 3 template && :: Kokkos::is_team_handle_v, int> = 0> KOKKOS_FUNCTION OutputIteratorType inclusive_scan(const TeamHandleType& teamHandle, InputIteratorType first, InputIteratorType last, OutputIteratorType first_dest, BinaryOp binary_op, ValueType init_value) { static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); return Impl::inclusive_scan_custom_binary_op_team_impl( teamHandle, first, last, first_dest, binary_op, std::move(init_value)); } template , int> = 0> KOKKOS_FUNCTION auto inclusive_scan( const TeamHandleType& teamHandle, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, BinaryOp binary_op, ValueType init_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; return Impl::inclusive_scan_custom_binary_op_team_impl( teamHandle, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op, std::move(init_value)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_IsPartitioned.hpp000066400000000000000000000077331461675637500257310ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_IS_PARTITIONED_HPP #define KOKKOS_STD_ALGORITHMS_IS_PARTITIONED_HPP #include "impl/Kokkos_IsPartitioned.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename IteratorType, typename PredicateType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_partitioned(const ExecutionSpace& ex, IteratorType first, IteratorType last, PredicateType p) { return Impl::is_partitioned_exespace_impl( "Kokkos::is_partitioned_iterator_api_default", ex, first, last, std::move(p)); } template < typename ExecutionSpace, typename IteratorType, typename PredicateType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_partitioned(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, PredicateType p) { return Impl::is_partitioned_exespace_impl(label, ex, first, last, std::move(p)); } template < typename ExecutionSpace, typename PredicateType, typename DataType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_partitioned(const ExecutionSpace& ex, const ::Kokkos::View& v, PredicateType p) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); return Impl::is_partitioned_exespace_impl( "Kokkos::is_partitioned_view_api_default", ex, cbegin(v), cend(v), std::move(p)); } template < typename ExecutionSpace, typename PredicateType, typename DataType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_partitioned(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, PredicateType p) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); return Impl::is_partitioned_exespace_impl(label, ex, cbegin(v), cend(v), std::move(p)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION bool is_partitioned(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, PredicateType p) { return Impl::is_partitioned_team_impl(teamHandle, first, last, std::move(p)); } template , int> = 0> KOKKOS_FUNCTION bool is_partitioned( const TeamHandleType& teamHandle, const ::Kokkos::View& v, PredicateType p) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); return Impl::is_partitioned_team_impl(teamHandle, cbegin(v), cend(v), std::move(p)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_IsSorted.hpp000066400000000000000000000157541461675637500247110ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_IS_SORTED_HPP #define KOKKOS_STD_ALGORITHMS_IS_SORTED_HPP #include "impl/Kokkos_IsSorted.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename IteratorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const ExecutionSpace& ex, IteratorType first, IteratorType last) { return Impl::is_sorted_exespace_impl("Kokkos::is_sorted_iterator_api_default", ex, first, last); } template < typename ExecutionSpace, typename IteratorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { return Impl::is_sorted_exespace_impl(label, ex, first, last); } template < typename ExecutionSpace, typename DataType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; return Impl::is_sorted_exespace_impl("Kokkos::is_sorted_view_api_default", ex, KE::cbegin(view), KE::cend(view)); } template < typename ExecutionSpace, typename DataType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; return Impl::is_sorted_exespace_impl(label, ex, KE::cbegin(view), KE::cend(view)); } template < typename ExecutionSpace, typename IteratorType, typename ComparatorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); return Impl::is_sorted_exespace_impl("Kokkos::is_sorted_iterator_api_default", ex, first, last, std::move(comp)); } template < typename ExecutionSpace, typename IteratorType, typename ComparatorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); return Impl::is_sorted_exespace_impl(label, ex, first, last, std::move(comp)); } template < typename ExecutionSpace, typename DataType, typename... Properties, typename ComparatorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const ExecutionSpace& ex, const ::Kokkos::View& view, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::static_assert_is_not_openmptarget(ex); namespace KE = ::Kokkos::Experimental; return Impl::is_sorted_exespace_impl("Kokkos::is_sorted_view_api_default", ex, KE::cbegin(view), KE::cend(view), std::move(comp)); } template < typename ExecutionSpace, typename DataType, typename... Properties, typename ComparatorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::static_assert_is_not_openmptarget(ex); namespace KE = ::Kokkos::Experimental; return Impl::is_sorted_exespace_impl(label, ex, KE::cbegin(view), KE::cend(view), std::move(comp)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION bool is_sorted(const TeamHandleType& teamHandle, IteratorType first, IteratorType last) { return Impl::is_sorted_team_impl(teamHandle, first, last); } template , int> = 0> KOKKOS_FUNCTION bool is_sorted( const TeamHandleType& teamHandle, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; return Impl::is_sorted_team_impl(teamHandle, KE::cbegin(view), KE::cend(view)); } template , int> = 0> KOKKOS_FUNCTION bool is_sorted(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(teamHandle); return Impl::is_sorted_team_impl(teamHandle, first, last, std::move(comp)); } template , int> = 0> KOKKOS_FUNCTION bool is_sorted( const TeamHandleType& teamHandle, const ::Kokkos::View& view, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::static_assert_is_not_openmptarget(teamHandle); namespace KE = ::Kokkos::Experimental; return Impl::is_sorted_team_impl(teamHandle, KE::cbegin(view), KE::cend(view), std::move(comp)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_IsSortedUntil.hpp000066400000000000000000000164411461675637500257170ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_IS_SORTED_UNTIL_HPP #define KOKKOS_STD_ALGORITHMS_IS_SORTED_UNTIL_HPP #include "impl/Kokkos_IsSortedUntil.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename IteratorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType is_sorted_until(const ExecutionSpace& ex, IteratorType first, IteratorType last) { return Impl::is_sorted_until_exespace_impl( "Kokkos::is_sorted_until_iterator_api_default", ex, first, last); } template < typename ExecutionSpace, typename IteratorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType is_sorted_until(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { return Impl::is_sorted_until_exespace_impl(label, ex, first, last); } template < typename ExecutionSpace, typename DataType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto is_sorted_until(const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; return Impl::is_sorted_until_exespace_impl( "Kokkos::is_sorted_until_view_api_default", ex, KE::begin(view), KE::end(view)); } template < typename ExecutionSpace, typename DataType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto is_sorted_until(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; return Impl::is_sorted_until_exespace_impl(label, ex, KE::begin(view), KE::end(view)); } template < typename ExecutionSpace, typename IteratorType, typename ComparatorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType is_sorted_until(const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); return Impl::is_sorted_until_exespace_impl( "Kokkos::is_sorted_until_iterator_api_default", ex, first, last, std::move(comp)); } template < typename ExecutionSpace, typename IteratorType, typename ComparatorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType is_sorted_until(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); return Impl::is_sorted_until_exespace_impl(label, ex, first, last, std::move(comp)); } template < typename ExecutionSpace, typename DataType, typename... Properties, typename ComparatorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto is_sorted_until(const ExecutionSpace& ex, const ::Kokkos::View& view, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::static_assert_is_not_openmptarget(ex); namespace KE = ::Kokkos::Experimental; return Impl::is_sorted_until_exespace_impl( "Kokkos::is_sorted_until_view_api_default", ex, KE::begin(view), KE::end(view), std::move(comp)); } template < typename ExecutionSpace, typename DataType, typename... Properties, typename ComparatorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto is_sorted_until(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::static_assert_is_not_openmptarget(ex); namespace KE = ::Kokkos::Experimental; return Impl::is_sorted_until_exespace_impl(label, ex, KE::begin(view), KE::end(view), std::move(comp)); } // // overload set accepting team handle // template , int> = 0> KOKKOS_FUNCTION IteratorType is_sorted_until(const TeamHandleType& teamHandle, IteratorType first, IteratorType last) { return Impl::is_sorted_until_team_impl(teamHandle, first, last); } template , int> = 0> KOKKOS_FUNCTION auto is_sorted_until( const TeamHandleType& teamHandle, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; return Impl::is_sorted_until_team_impl(teamHandle, KE::begin(view), KE::end(view)); } template , int> = 0> KOKKOS_FUNCTION IteratorType is_sorted_until(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(teamHandle); return Impl::is_sorted_until_team_impl(teamHandle, first, last, std::move(comp)); } template , int> = 0> KOKKOS_FUNCTION auto is_sorted_until( const TeamHandleType& teamHandle, const ::Kokkos::View& view, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::static_assert_is_not_openmptarget(teamHandle); namespace KE = ::Kokkos::Experimental; return Impl::is_sorted_until_team_impl(teamHandle, KE::begin(view), KE::end(view), std::move(comp)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_IterSwap.hpp000066400000000000000000000041271461675637500247030ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_ITER_SWAP_HPP #define KOKKOS_STD_ALGORITHMS_ITER_SWAP_HPP #include #include "impl/Kokkos_Constraints.hpp" namespace Kokkos { namespace Experimental { namespace Impl { template struct StdIterSwapFunctor { IteratorType1 m_a; IteratorType2 m_b; KOKKOS_FUNCTION void operator()(int i) const { (void)i; ::Kokkos::kokkos_swap(*m_a, *m_b); } KOKKOS_FUNCTION StdIterSwapFunctor(IteratorType1 _a, IteratorType2 _b) : m_a(std::move(_a)), m_b(std::move(_b)) {} }; template void iter_swap_impl(IteratorType1 a, IteratorType2 b) { // is there a better way to do this maybe? ::Kokkos::parallel_for( 1, StdIterSwapFunctor(a, b)); Kokkos::DefaultExecutionSpace().fence( "Kokkos::iter_swap: fence after operation"); } } // namespace Impl //---------------------------------------------------------------------------- // iter_swap template void iter_swap(IteratorType1 a, IteratorType2 b) { Impl::iter_swap_impl(a, b); } #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 template KOKKOS_DEPRECATED_WITH_COMMENT("Use Kokkos::kokkos_swap instead!") KOKKOS_FUNCTION void swap(T& a, T& b) noexcept(::Kokkos::kokkos_swap(std::declval(), std::declval())) { ::Kokkos::kokkos_swap(a, b); } #endif } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_LexicographicalCompare.hpp000066400000000000000000000216401461675637500275510ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_LEXICOGRAPHICAL_COMPARE_HPP #define KOKKOS_STD_ALGORITHMS_LEXICOGRAPHICAL_COMPARE_HPP #include "impl/Kokkos_LexicographicalCompare.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < class ExecutionSpace, class IteratorType1, class IteratorType2, std::enable_if_t, int> = 0> bool lexicographical_compare(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { return Impl::lexicographical_compare_exespace_impl( "Kokkos::lexicographical_compare_iterator_api_default", ex, first1, last1, first2, last2); } template < class ExecutionSpace, class IteratorType1, class IteratorType2, std::enable_if_t, int> = 0> bool lexicographical_compare(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { return Impl::lexicographical_compare_exespace_impl(label, ex, first1, last1, first2, last2); } template < class ExecutionSpace, class DataType1, class... Properties1, class DataType2, class... Properties2, std::enable_if_t, int> = 0> bool lexicographical_compare( const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; return Impl::lexicographical_compare_exespace_impl( "Kokkos::lexicographical_compare_view_api_default", ex, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), KE::cend(view2)); } template < class ExecutionSpace, class DataType1, class... Properties1, class DataType2, class... Properties2, std::enable_if_t, int> = 0> bool lexicographical_compare( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; return Impl::lexicographical_compare_exespace_impl( label, ex, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), KE::cend(view2)); } template < class ExecutionSpace, class IteratorType1, class IteratorType2, class ComparatorType, std::enable_if_t, int> = 0> bool lexicographical_compare(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, ComparatorType comp) { return Impl::lexicographical_compare_exespace_impl( "Kokkos::lexicographical_compare_iterator_api_default", ex, first1, last1, first2, last2, comp); } template < class ExecutionSpace, class IteratorType1, class IteratorType2, class ComparatorType, std::enable_if_t, int> = 0> bool lexicographical_compare(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, ComparatorType comp) { return Impl::lexicographical_compare_exespace_impl(label, ex, first1, last1, first2, last2, comp); } template < class ExecutionSpace, class DataType1, class... Properties1, class DataType2, class... Properties2, class ComparatorType, std::enable_if_t, int> = 0> bool lexicographical_compare( const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; return Impl::lexicographical_compare_exespace_impl( "Kokkos::lexicographical_compare_view_api_default", ex, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), KE::cend(view2), comp); } template < class ExecutionSpace, class DataType1, class... Properties1, class DataType2, class... Properties2, class ComparatorType, std::enable_if_t, int> = 0> bool lexicographical_compare( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; return Impl::lexicographical_compare_exespace_impl( label, ex, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), KE::cend(view2), comp); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION bool lexicographical_compare(const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { return Impl::lexicographical_compare_team_impl(teamHandle, first1, last1, first2, last2); } template , int> = 0> KOKKOS_FUNCTION bool lexicographical_compare( const TeamHandleType& teamHandle, const ::Kokkos::View& view1, const ::Kokkos::View& view2) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; return Impl::lexicographical_compare_team_impl( teamHandle, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), KE::cend(view2)); } template , int> = 0> KOKKOS_FUNCTION bool lexicographical_compare( const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, ComparatorType comp) { return Impl::lexicographical_compare_team_impl(teamHandle, first1, last1, first2, last2, comp); } template , int> = 0> KOKKOS_FUNCTION bool lexicographical_compare( const TeamHandleType& teamHandle, const ::Kokkos::View& view1, const ::Kokkos::View& view2, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; return Impl::lexicographical_compare_team_impl( teamHandle, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), KE::cend(view2), comp); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_MaxElement.hpp000066400000000000000000000157751461675637500252170ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_MAX_ELEMENT_HPP #define KOKKOS_STD_ALGORITHMS_MAX_ELEMENT_HPP #include "impl/Kokkos_MinMaxMinmaxElement.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename IteratorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const ExecutionSpace& ex, IteratorType first, IteratorType last) { return Impl::min_or_max_element_exespace_impl( "Kokkos::max_element_iterator_api_default", ex, first, last); } template < typename ExecutionSpace, typename IteratorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { return Impl::min_or_max_element_exespace_impl(label, ex, first, last); } template < typename ExecutionSpace, typename IteratorType, typename ComparatorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); return Impl::min_or_max_element_exespace_impl( "Kokkos::max_element_iterator_api_default", ex, first, last, std::move(comp)); } template < typename ExecutionSpace, typename IteratorType, typename ComparatorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); return Impl::min_or_max_element_exespace_impl( label, ex, first, last, std::move(comp)); } template < typename ExecutionSpace, typename DataType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); return Impl::min_or_max_element_exespace_impl( "Kokkos::max_element_view_api_default", ex, begin(v), end(v)); } template < typename ExecutionSpace, typename DataType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); return Impl::min_or_max_element_exespace_impl(label, ex, begin(v), end(v)); } template < typename ExecutionSpace, typename DataType, typename ComparatorType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); return Impl::min_or_max_element_exespace_impl( "Kokkos::max_element_view_api_default", ex, begin(v), end(v), std::move(comp)); } template < typename ExecutionSpace, typename DataType, typename ComparatorType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); return Impl::min_or_max_element_exespace_impl( label, ex, begin(v), end(v), std::move(comp)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION auto max_element(const TeamHandleType& teamHandle, IteratorType first, IteratorType last) { return Impl::min_or_max_element_team_impl(teamHandle, first, last); } template , int> = 0> KOKKOS_FUNCTION auto max_element( const TeamHandleType& teamHandle, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); return Impl::min_or_max_element_team_impl(teamHandle, begin(v), end(v)); } template , int> = 0> KOKKOS_FUNCTION auto max_element(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(teamHandle); return Impl::min_or_max_element_team_impl( teamHandle, first, last, std::move(comp)); } template , int> = 0> KOKKOS_FUNCTION auto max_element( const TeamHandleType& teamHandle, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(teamHandle); return Impl::min_or_max_element_team_impl( teamHandle, begin(v), end(v), std::move(comp)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_MinElement.hpp000066400000000000000000000157751461675637500252150ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_MIN_ELEMENT_HPP #define KOKKOS_STD_ALGORITHMS_MIN_ELEMENT_HPP #include "impl/Kokkos_MinMaxMinmaxElement.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename IteratorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const ExecutionSpace& ex, IteratorType first, IteratorType last) { return Impl::min_or_max_element_exespace_impl( "Kokkos::min_element_iterator_api_default", ex, first, last); } template < typename ExecutionSpace, typename IteratorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { return Impl::min_or_max_element_exespace_impl(label, ex, first, last); } template < typename ExecutionSpace, typename IteratorType, typename ComparatorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); return Impl::min_or_max_element_exespace_impl( "Kokkos::min_element_iterator_api_default", ex, first, last, std::move(comp)); } template < typename ExecutionSpace, typename IteratorType, typename ComparatorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); return Impl::min_or_max_element_exespace_impl( label, ex, first, last, std::move(comp)); } template < typename ExecutionSpace, typename DataType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); return Impl::min_or_max_element_exespace_impl( "Kokkos::min_element_view_api_default", ex, begin(v), end(v)); } template < typename ExecutionSpace, typename DataType, typename ComparatorType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); return Impl::min_or_max_element_exespace_impl( "Kokkos::min_element_view_api_default", ex, begin(v), end(v), std::move(comp)); } template < typename ExecutionSpace, typename DataType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); return Impl::min_or_max_element_exespace_impl(label, ex, begin(v), end(v)); } template < typename ExecutionSpace, typename DataType, typename ComparatorType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); return Impl::min_or_max_element_exespace_impl( label, ex, begin(v), end(v), std::move(comp)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION auto min_element(const TeamHandleType& teamHandle, IteratorType first, IteratorType last) { return Impl::min_or_max_element_team_impl(teamHandle, first, last); } template , int> = 0> KOKKOS_FUNCTION auto min_element( const TeamHandleType& teamHandle, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); return Impl::min_or_max_element_team_impl(teamHandle, begin(v), end(v)); } template , int> = 0> KOKKOS_FUNCTION auto min_element(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(teamHandle); return Impl::min_or_max_element_team_impl( teamHandle, first, last, std::move(comp)); } template , int> = 0> KOKKOS_FUNCTION auto min_element( const TeamHandleType& teamHandle, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(teamHandle); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); return Impl::min_or_max_element_team_impl( teamHandle, begin(v), end(v), std::move(comp)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_MinMaxElement.hpp000066400000000000000000000161471461675637500256550ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_MINMAX_ELEMENT_HPP #define KOKKOS_STD_ALGORITHMS_MINMAX_ELEMENT_HPP #include "impl/Kokkos_MinMaxMinmaxElement.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename IteratorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const ExecutionSpace& ex, IteratorType first, IteratorType last) { return Impl::minmax_element_exespace_impl( "Kokkos::minmax_element_iterator_api_default", ex, first, last); } template < typename ExecutionSpace, typename IteratorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { return Impl::minmax_element_exespace_impl(label, ex, first, last); } template < typename ExecutionSpace, typename IteratorType, typename ComparatorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); return Impl::minmax_element_exespace_impl( "Kokkos::minmax_element_iterator_api_default", ex, first, last, std::move(comp)); } template < typename ExecutionSpace, typename IteratorType, typename ComparatorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); return Impl::minmax_element_exespace_impl( label, ex, first, last, std::move(comp)); } template < typename ExecutionSpace, typename DataType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); return Impl::minmax_element_exespace_impl( "Kokkos::minmax_element_view_api_default", ex, begin(v), end(v)); } template < typename ExecutionSpace, typename DataType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); return Impl::minmax_element_exespace_impl( label, ex, begin(v), end(v)); } template < typename ExecutionSpace, typename DataType, typename ComparatorType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); return Impl::minmax_element_exespace_impl( "Kokkos::minmax_element_view_api_default", ex, begin(v), end(v), std::move(comp)); } template < typename ExecutionSpace, typename DataType, typename ComparatorType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); return Impl::minmax_element_exespace_impl( label, ex, begin(v), end(v), std::move(comp)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION auto minmax_element(const TeamHandleType& teamHandle, IteratorType first, IteratorType last) { return Impl::minmax_element_team_impl(teamHandle, first, last); } template , int> = 0> KOKKOS_FUNCTION auto minmax_element(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(teamHandle); return Impl::minmax_element_team_impl( teamHandle, first, last, std::move(comp)); } template , int> = 0> KOKKOS_FUNCTION auto minmax_element( const TeamHandleType& teamHandle, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); return Impl::minmax_element_team_impl(teamHandle, begin(v), end(v)); } template , int> = 0> KOKKOS_FUNCTION auto minmax_element( const TeamHandleType& teamHandle, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(teamHandle); return Impl::minmax_element_team_impl( teamHandle, begin(v), end(v), std::move(comp)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_Mismatch.hpp000066400000000000000000000223441461675637500247130ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_MISMATCH_HPP #define KOKKOS_STD_ALGORITHMS_MISMATCH_HPP #include "impl/Kokkos_Mismatch.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // FIXME: add mismatch overloads accepting 3 iterators. // An overload consistent with other algorithms: // // auto mismatch(const ExecSpace& ex, It1 first1, It1 last1, It2 first2) {...} // // makes API ambiguous (with the overload accepting views). // // overload set accepting execution space // template < class ExecutionSpace, class IteratorType1, class IteratorType2, std::enable_if_t, int> = 0> ::Kokkos::pair mismatch(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { return Impl::mismatch_exespace_impl("Kokkos::mismatch_iterator_api_default", ex, first1, last1, first2, last2); } template < class ExecutionSpace, class IteratorType1, class IteratorType2, class BinaryPredicateType, std::enable_if_t, int> = 0> ::Kokkos::pair mismatch( const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, BinaryPredicateType&& predicate) { return Impl::mismatch_exespace_impl( "Kokkos::mismatch_iterator_api_default", ex, first1, last1, first2, last2, std::forward(predicate)); } template < class ExecutionSpace, class IteratorType1, class IteratorType2, std::enable_if_t, int> = 0> ::Kokkos::pair mismatch( const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { return Impl::mismatch_exespace_impl(label, ex, first1, last1, first2, last2); } template < class ExecutionSpace, class IteratorType1, class IteratorType2, class BinaryPredicateType, std::enable_if_t, int> = 0> ::Kokkos::pair mismatch( const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, BinaryPredicateType&& predicate) { return Impl::mismatch_exespace_impl( label, ex, first1, last1, first2, last2, std::forward(predicate)); } template < class ExecutionSpace, class DataType1, class... Properties1, class DataType2, class... Properties2, std::enable_if_t, int> = 0> auto mismatch(const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; return Impl::mismatch_exespace_impl("Kokkos::mismatch_view_api_default", ex, KE::begin(view1), KE::end(view1), KE::begin(view2), KE::end(view2)); } template < class ExecutionSpace, class DataType1, class... Properties1, class DataType2, class... Properties2, class BinaryPredicateType, std::enable_if_t, int> = 0> auto mismatch(const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2, BinaryPredicateType&& predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; return Impl::mismatch_exespace_impl( "Kokkos::mismatch_view_api_default", ex, KE::begin(view1), KE::end(view1), KE::begin(view2), KE::end(view2), std::forward(predicate)); } template < class ExecutionSpace, class DataType1, class... Properties1, class DataType2, class... Properties2, std::enable_if_t, int> = 0> auto mismatch(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; return Impl::mismatch_exespace_impl(label, ex, KE::begin(view1), KE::end(view1), KE::begin(view2), KE::end(view2)); } template < class ExecutionSpace, class DataType1, class... Properties1, class DataType2, class... Properties2, class BinaryPredicateType, std::enable_if_t, int> = 0> auto mismatch(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2, BinaryPredicateType&& predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; return Impl::mismatch_exespace_impl( label, ex, KE::begin(view1), KE::end(view1), KE::begin(view2), KE::end(view2), std::forward(predicate)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION ::Kokkos::pair mismatch( const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { return Impl::mismatch_team_impl(teamHandle, first1, last1, first2, last2); } template , int> = 0> KOKKOS_FUNCTION ::Kokkos::pair mismatch( const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, BinaryPredicateType&& predicate) { return Impl::mismatch_team_impl(teamHandle, first1, last1, first2, last2, std::forward(predicate)); } template , int> = 0> KOKKOS_FUNCTION auto mismatch( const TeamHandleType& teamHandle, const ::Kokkos::View& view1, const ::Kokkos::View& view2) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; return Impl::mismatch_team_impl(teamHandle, KE::begin(view1), KE::end(view1), KE::begin(view2), KE::end(view2)); } template , int> = 0> KOKKOS_FUNCTION auto mismatch( const TeamHandleType& teamHandle, const ::Kokkos::View& view1, const ::Kokkos::View& view2, BinaryPredicateType&& predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; return Impl::mismatch_team_impl(teamHandle, KE::begin(view1), KE::end(view1), KE::begin(view2), KE::end(view2), std::forward(predicate)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_Move.hpp000066400000000000000000000103711461675637500240510ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_MOVE_HPP #define KOKKOS_STD_ALGORITHMS_MOVE_HPP #include "impl/Kokkos_Move.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename InputIterator, typename OutputIterator, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator move(const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { return Impl::move_exespace_impl("Kokkos::move_iterator_api_default", ex, first, last, d_first); } template < typename ExecutionSpace, typename InputIterator, typename OutputIterator, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator move(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { return Impl::move_exespace_impl(label, ex, first, last, d_first); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto move(const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::move_exespace_impl("Kokkos::move_view_api_default", ex, begin(source), end(source), begin(dest)); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto move(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::move_exespace_impl(label, ex, begin(source), end(source), begin(dest)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION OutputIterator move(const TeamHandleType& teamHandle, InputIterator first, InputIterator last, OutputIterator d_first) { return Impl::move_team_impl(teamHandle, first, last, d_first); } template , int> = 0> KOKKOS_FUNCTION auto move( const TeamHandleType& teamHandle, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::move_team_impl(teamHandle, begin(source), end(source), begin(dest)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_MoveBackward.hpp000066400000000000000000000107261461675637500255140ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_MOVE_BACKWARD_HPP #define KOKKOS_STD_ALGORITHMS_MOVE_BACKWARD_HPP #include "impl/Kokkos_MoveBackward.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename IteratorType1, typename IteratorType2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType2 move_backward(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 d_last) { return Impl::move_backward_exespace_impl( "Kokkos::move_backward_iterator_api_default", ex, first, last, d_last); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto move_backward(const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::move_backward_exespace_impl( "Kokkos::move_backward_view_api_default", ex, begin(source), end(source), end(dest)); } template < typename ExecutionSpace, typename IteratorType1, typename IteratorType2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType2 move_backward(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 d_last) { return Impl::move_backward_exespace_impl(label, ex, first, last, d_last); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto move_backward(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::move_backward_exespace_impl(label, ex, begin(source), end(source), end(dest)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION IteratorType2 move_backward(const TeamHandleType& teamHandle, IteratorType1 first, IteratorType1 last, IteratorType2 d_last) { return Impl::move_backward_team_impl(teamHandle, first, last, d_last); } template , int> = 0> KOKKOS_FUNCTION auto move_backward( const TeamHandleType& teamHandle, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::move_backward_team_impl(teamHandle, begin(source), end(source), end(dest)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_NoneOf.hpp000066400000000000000000000076771461675637500243460ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_NONE_OF_HPP #define KOKKOS_STD_ALGORITHMS_NONE_OF_HPP #include "impl/Kokkos_AllOfAnyOfNoneOf.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename IteratorType, typename Predicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool none_of(const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { return Impl::none_of_exespace_impl("Kokkos::none_of_iterator_api_default", ex, first, last, predicate); } template < typename ExecutionSpace, typename IteratorType, typename Predicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool none_of(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { return Impl::none_of_exespace_impl(label, ex, first, last, predicate); } template < typename ExecutionSpace, typename DataType, typename... Properties, typename Predicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool none_of(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::none_of_exespace_impl("Kokkos::none_of_view_api_default", ex, KE::cbegin(v), KE::cend(v), std::move(predicate)); } template < typename ExecutionSpace, typename DataType, typename... Properties, typename Predicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool none_of(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::none_of_exespace_impl(label, ex, KE::cbegin(v), KE::cend(v), std::move(predicate)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template KOKKOS_FUNCTION std::enable_if_t<::Kokkos::is_team_handle::value, bool> none_of(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, Predicate predicate) { return Impl::none_of_team_impl(teamHandle, first, last, predicate); } template KOKKOS_FUNCTION std::enable_if_t<::Kokkos::is_team_handle::value, bool> none_of(const TeamHandleType& teamHandle, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; return Impl::none_of_team_impl(teamHandle, KE::cbegin(v), KE::cend(v), std::move(predicate)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_PartitionCopy.hpp000066400000000000000000000130601461675637500257450ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_PARTITION_COPY_HPP #define KOKKOS_STD_ALGORITHMS_PARTITION_COPY_HPP #include "impl/Kokkos_PartitionCopy.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename InputIteratorType, typename OutputIteratorTrueType, typename OutputIteratorFalseType, typename PredicateType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> ::Kokkos::pair partition_copy( const ExecutionSpace& ex, InputIteratorType from_first, InputIteratorType from_last, OutputIteratorTrueType to_first_true, OutputIteratorFalseType to_first_false, PredicateType p) { return Impl::partition_copy_exespace_impl( "Kokkos::partition_copy_iterator_api_default", ex, from_first, from_last, to_first_true, to_first_false, std::move(p)); } template < typename ExecutionSpace, typename InputIteratorType, typename OutputIteratorTrueType, typename OutputIteratorFalseType, typename PredicateType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> ::Kokkos::pair partition_copy( const std::string& label, const ExecutionSpace& ex, InputIteratorType from_first, InputIteratorType from_last, OutputIteratorTrueType to_first_true, OutputIteratorFalseType to_first_false, PredicateType p) { return Impl::partition_copy_exespace_impl(label, ex, from_first, from_last, to_first_true, to_first_false, std::move(p)); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename DataType3, typename... Properties3, typename PredicateType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto partition_copy( const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest_true, const ::Kokkos::View& view_dest_false, PredicateType p) { return Impl::partition_copy_exespace_impl( "Kokkos::partition_copy_view_api_default", ex, cbegin(view_from), cend(view_from), begin(view_dest_true), begin(view_dest_false), std::move(p)); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename DataType3, typename... Properties3, typename PredicateType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto partition_copy( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest_true, const ::Kokkos::View& view_dest_false, PredicateType p) { return Impl::partition_copy_exespace_impl( label, ex, cbegin(view_from), cend(view_from), begin(view_dest_true), begin(view_dest_false), std::move(p)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION ::Kokkos::pair partition_copy(const TeamHandleType& teamHandle, InputIteratorType from_first, InputIteratorType from_last, OutputIteratorTrueType to_first_true, OutputIteratorFalseType to_first_false, PredicateType p) { return Impl::partition_copy_team_impl(teamHandle, from_first, from_last, to_first_true, to_first_false, std::move(p)); } template , int> = 0> KOKKOS_FUNCTION auto partition_copy( const TeamHandleType& teamHandle, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest_true, const ::Kokkos::View& view_dest_false, PredicateType p) { return Impl::partition_copy_team_impl(teamHandle, cbegin(view_from), cend(view_from), begin(view_dest_true), begin(view_dest_false), std::move(p)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_PartitionPoint.hpp000066400000000000000000000101771461675637500261320ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_PARTITION_POINT_HPP #define KOKKOS_STD_ALGORITHMS_PARTITION_POINT_HPP #include "impl/Kokkos_PartitionPoint.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename IteratorType, typename UnaryPredicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType partition_point(const ExecutionSpace& ex, IteratorType first, IteratorType last, UnaryPredicate p) { return Impl::partition_point_exespace_impl( "Kokkos::partitioned_point_iterator_api_default", ex, first, last, std::move(p)); } template < typename ExecutionSpace, typename IteratorType, typename UnaryPredicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType partition_point(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, UnaryPredicate p) { return Impl::partition_point_exespace_impl(label, ex, first, last, std::move(p)); } template < typename ExecutionSpace, typename UnaryPredicate, typename DataType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto partition_point(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, UnaryPredicate p) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); return Impl::partition_point_exespace_impl(label, ex, begin(v), end(v), std::move(p)); } template < typename ExecutionSpace, typename UnaryPredicate, typename DataType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto partition_point(const ExecutionSpace& ex, const ::Kokkos::View& v, UnaryPredicate p) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); return Impl::partition_point_exespace_impl( "Kokkos::partition_point_view_api_default", ex, begin(v), end(v), std::move(p)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION IteratorType partition_point(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, UnaryPredicate p) { return Impl::partition_point_team_impl(teamHandle, first, last, std::move(p)); } template , int> = 0> KOKKOS_FUNCTION auto partition_point( const TeamHandleType& teamHandle, const ::Kokkos::View& v, UnaryPredicate p) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); return Impl::partition_point_team_impl(teamHandle, begin(v), end(v), std::move(p)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_Reduce.hpp000066400000000000000000000322271461675637500243560ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_REDUCE_HPP #define KOKKOS_STD_ALGORITHMS_REDUCE_HPP #include "impl/Kokkos_Reduce.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // // // overload set 1 // template ::value, int> = 0> typename IteratorType::value_type reduce(const ExecutionSpace& ex, IteratorType first, IteratorType last) { return Impl::reduce_default_functors_exespace_impl( "Kokkos::reduce_default_functors_iterator_api", ex, first, last, typename IteratorType::value_type()); } template ::value, int> = 0> typename IteratorType::value_type reduce(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { return Impl::reduce_default_functors_exespace_impl( label, ex, first, last, typename IteratorType::value_type()); } template ::value, int> = 0> auto reduce(const ExecutionSpace& ex, const ::Kokkos::View& view) { namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); using view_type = ::Kokkos::View; using value_type = typename view_type::value_type; return Impl::reduce_default_functors_exespace_impl( "Kokkos::reduce_default_functors_view_api", ex, KE::cbegin(view), KE::cend(view), value_type()); } template ::value, int> = 0> auto reduce(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view) { namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); using view_type = ::Kokkos::View; using value_type = typename view_type::value_type; return Impl::reduce_default_functors_exespace_impl( label, ex, KE::cbegin(view), KE::cend(view), value_type()); } // // overload set2: // template ::value, int> = 0> ValueType reduce(const ExecutionSpace& ex, IteratorType first, IteratorType last, ValueType init_reduction_value) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); return Impl::reduce_default_functors_exespace_impl( "Kokkos::reduce_default_functors_iterator_api", ex, first, last, init_reduction_value); } template ::value, int> = 0> ValueType reduce(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ValueType init_reduction_value) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); return Impl::reduce_default_functors_exespace_impl(label, ex, first, last, init_reduction_value); } template ::value, int> = 0> ValueType reduce(const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::reduce_default_functors_exespace_impl( "Kokkos::reduce_default_functors_view_api", ex, KE::cbegin(view), KE::cend(view), init_reduction_value); } template ::value, int> = 0> ValueType reduce(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::reduce_default_functors_exespace_impl( label, ex, KE::cbegin(view), KE::cend(view), init_reduction_value); } // // overload set 3 // template ::value, int> = 0> ValueType reduce(const ExecutionSpace& ex, IteratorType first, IteratorType last, ValueType init_reduction_value, BinaryOp joiner) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); return Impl::reduce_custom_functors_exespace_impl( "Kokkos::reduce_default_functors_iterator_api", ex, first, last, init_reduction_value, joiner); } template ::value, int> = 0> ValueType reduce(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ValueType init_reduction_value, BinaryOp joiner) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); return Impl::reduce_custom_functors_exespace_impl( label, ex, first, last, init_reduction_value, joiner); } template ::value, int> = 0> ValueType reduce(const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value, BinaryOp joiner) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::reduce_custom_functors_exespace_impl( "Kokkos::reduce_custom_functors_view_api", ex, KE::cbegin(view), KE::cend(view), init_reduction_value, joiner); } template ::value, int> = 0> ValueType reduce(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value, BinaryOp joiner) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::reduce_custom_functors_exespace_impl( label, ex, KE::cbegin(view), KE::cend(view), init_reduction_value, joiner); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // // // overload set 1 // template < typename TeamHandleType, typename IteratorType, std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> KOKKOS_FUNCTION typename IteratorType::value_type reduce( const TeamHandleType& teamHandle, IteratorType first, IteratorType last) { return Impl::reduce_default_functors_team_impl( teamHandle, first, last, typename IteratorType::value_type()); } template < typename TeamHandleType, typename DataType, typename... Properties, std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> KOKKOS_FUNCTION auto reduce( const TeamHandleType& teamHandle, const ::Kokkos::View& view) { namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); using view_type = ::Kokkos::View; using value_type = typename view_type::value_type; return Impl::reduce_default_functors_team_impl(teamHandle, KE::cbegin(view), KE::cend(view), value_type()); } // // overload set2: // template < typename TeamHandleType, typename IteratorType, typename ValueType, std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> KOKKOS_FUNCTION ValueType reduce(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, ValueType init_reduction_value) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); return Impl::reduce_default_functors_team_impl(teamHandle, first, last, init_reduction_value); } template < typename TeamHandleType, typename DataType, typename... Properties, typename ValueType, std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> KOKKOS_FUNCTION ValueType reduce(const TeamHandleType& teamHandle, const ::Kokkos::View& view, ValueType init_reduction_value) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::reduce_default_functors_team_impl( teamHandle, KE::cbegin(view), KE::cend(view), init_reduction_value); } // // overload set 3 // template < typename TeamHandleType, typename IteratorType, typename ValueType, typename BinaryOp, std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> KOKKOS_FUNCTION ValueType reduce(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, ValueType init_reduction_value, BinaryOp joiner) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); return Impl::reduce_custom_functors_team_impl(teamHandle, first, last, init_reduction_value, joiner); } template < typename TeamHandleType, typename DataType, typename... Properties, typename ValueType, typename BinaryOp, std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> KOKKOS_FUNCTION ValueType reduce(const TeamHandleType& teamHandle, const ::Kokkos::View& view, ValueType init_reduction_value, BinaryOp joiner) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::reduce_custom_functors_team_impl(teamHandle, KE::cbegin(view), KE::cend(view), init_reduction_value, joiner); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_Remove.hpp000066400000000000000000000100161461675637500243740ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_REMOVE_HPP #define KOKKOS_STD_ALGORITHMS_REMOVE_HPP #include "impl/Kokkos_RemoveAllVariants.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename Iterator, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> Iterator remove(const ExecutionSpace& ex, Iterator first, Iterator last, const ValueType& value) { return Impl::remove_exespace_impl("Kokkos::remove_iterator_api_default", ex, first, last, value); } template < typename ExecutionSpace, typename Iterator, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> Iterator remove(const std::string& label, const ExecutionSpace& ex, Iterator first, Iterator last, const ValueType& value) { return Impl::remove_exespace_impl(label, ex, first, last, value); } template < typename ExecutionSpace, typename DataType, typename... Properties, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove(const ExecutionSpace& ex, const ::Kokkos::View& view, const ValueType& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::remove_exespace_impl("Kokkos::remove_iterator_api_default", ex, ::Kokkos::Experimental::begin(view), ::Kokkos::Experimental::end(view), value); } template < typename ExecutionSpace, typename DataType, typename... Properties, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ValueType& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::remove_exespace_impl(label, ex, ::Kokkos::Experimental::begin(view), ::Kokkos::Experimental::end(view), value); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION Iterator remove(const TeamHandleType& teamHandle, Iterator first, Iterator last, const ValueType& value) { return Impl::remove_team_impl(teamHandle, first, last, value); } template , int> = 0> KOKKOS_FUNCTION auto remove(const TeamHandleType& teamHandle, const ::Kokkos::View& view, const ValueType& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::remove_team_impl(teamHandle, ::Kokkos::Experimental::begin(view), ::Kokkos::Experimental::end(view), value); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_RemoveCopy.hpp000066400000000000000000000124351461675637500252360ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_REMOVE_COPY_HPP #define KOKKOS_STD_ALGORITHMS_REMOVE_COPY_HPP #include "impl/Kokkos_RemoveAllVariants.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename InputIterator, typename OutputIterator, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator remove_copy(const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const ValueType& value) { return Impl::remove_copy_exespace_impl( "Kokkos::remove_copy_iterator_api_default", ex, first_from, last_from, first_dest, value); } template < typename ExecutionSpace, typename InputIterator, typename OutputIterator, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator remove_copy(const std::string& label, const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const ValueType& value) { return Impl::remove_copy_exespace_impl(label, ex, first_from, last_from, first_dest, value); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_copy(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, const ValueType& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); return Impl::remove_copy_exespace_impl( "Kokkos::remove_copy_iterator_api_default", ex, ::Kokkos::Experimental::cbegin(view_from), ::Kokkos::Experimental::cend(view_from), ::Kokkos::Experimental::begin(view_dest), value); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, const ValueType& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); return Impl::remove_copy_exespace_impl( label, ex, ::Kokkos::Experimental::cbegin(view_from), ::Kokkos::Experimental::cend(view_from), ::Kokkos::Experimental::begin(view_dest), value); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION OutputIterator remove_copy(const TeamHandleType& teamHandle, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const ValueType& value) { return Impl::remove_copy_team_impl(teamHandle, first_from, last_from, first_dest, value); } template , int> = 0> KOKKOS_FUNCTION auto remove_copy( const TeamHandleType& teamHandle, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, const ValueType& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); return Impl::remove_copy_team_impl( teamHandle, ::Kokkos::Experimental::cbegin(view_from), ::Kokkos::Experimental::cend(view_from), ::Kokkos::Experimental::begin(view_dest), value); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_RemoveCopyIf.hpp000066400000000000000000000130071461675637500255110ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_REMOVE_COPY_IF_HPP #define KOKKOS_STD_ALGORITHMS_REMOVE_COPY_IF_HPP #include "impl/Kokkos_RemoveAllVariants.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename InputIterator, typename OutputIterator, typename UnaryPredicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator remove_copy_if(const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const UnaryPredicate& pred) { return Impl::remove_copy_if_exespace_impl( "Kokkos::remove_copy_if_iterator_api_default", ex, first_from, last_from, first_dest, pred); } template < typename ExecutionSpace, typename InputIterator, typename OutputIterator, typename UnaryPredicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator remove_copy_if(const std::string& label, const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const UnaryPredicate& pred) { return Impl::remove_copy_if_exespace_impl(label, ex, first_from, last_from, first_dest, pred); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename UnaryPredicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_copy_if(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, const UnaryPredicate& pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); return Impl::remove_copy_if_exespace_impl( "Kokkos::remove_copy_if_iterator_api_default", ex, ::Kokkos::Experimental::cbegin(view_from), ::Kokkos::Experimental::cend(view_from), ::Kokkos::Experimental::begin(view_dest), pred); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename UnaryPredicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_copy_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, const UnaryPredicate& pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); return Impl::remove_copy_if_exespace_impl( label, ex, ::Kokkos::Experimental::cbegin(view_from), ::Kokkos::Experimental::cend(view_from), ::Kokkos::Experimental::begin(view_dest), pred); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION OutputIterator remove_copy_if(const TeamHandleType& teamHandle, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const UnaryPredicate& pred) { return Impl::remove_copy_if_team_impl(teamHandle, first_from, last_from, first_dest, pred); } template , int> = 0> KOKKOS_FUNCTION auto remove_copy_if( const TeamHandleType& teamHandle, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, const UnaryPredicate& pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); return Impl::remove_copy_if_team_impl( teamHandle, ::Kokkos::Experimental::cbegin(view_from), ::Kokkos::Experimental::cend(view_from), ::Kokkos::Experimental::begin(view_dest), pred); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_RemoveIf.hpp000066400000000000000000000101441461675637500246550ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_REMOVE_IF_HPP #define KOKKOS_STD_ALGORITHMS_REMOVE_IF_HPP #include "impl/Kokkos_RemoveAllVariants.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename Iterator, typename UnaryPredicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> Iterator remove_if(const ExecutionSpace& ex, Iterator first, Iterator last, UnaryPredicate pred) { return Impl::remove_if_exespace_impl("Kokkos::remove_if_iterator_api_default", ex, first, last, pred); } template < typename ExecutionSpace, typename Iterator, typename UnaryPredicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> Iterator remove_if(const std::string& label, const ExecutionSpace& ex, Iterator first, Iterator last, UnaryPredicate pred) { return Impl::remove_if_exespace_impl(label, ex, first, last, pred); } template < typename ExecutionSpace, typename DataType, typename... Properties, typename UnaryPredicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_if(const ExecutionSpace& ex, const ::Kokkos::View& view, UnaryPredicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::remove_if_exespace_impl("Kokkos::remove_if_iterator_api_default", ex, ::Kokkos::Experimental::begin(view), ::Kokkos::Experimental::end(view), pred); } template < typename ExecutionSpace, typename DataType, typename... Properties, typename UnaryPredicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, UnaryPredicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::remove_if_exespace_impl(label, ex, ::Kokkos::Experimental::begin(view), ::Kokkos::Experimental::end(view), pred); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION Iterator remove_if(const TeamHandleType& teamHandle, Iterator first, Iterator last, UnaryPredicate pred) { return Impl::remove_if_team_impl(teamHandle, first, last, pred); } template , int> = 0> KOKKOS_FUNCTION auto remove_if( const TeamHandleType& teamHandle, const ::Kokkos::View& view, UnaryPredicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::remove_if_team_impl(teamHandle, ::Kokkos::Experimental::begin(view), ::Kokkos::Experimental::end(view), pred); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_Replace.hpp000066400000000000000000000101551461675637500245160ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_REPLACE_HPP #define KOKKOS_STD_ALGORITHMS_REPLACE_HPP #include "impl/Kokkos_Replace.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename Iterator, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace(const ExecutionSpace& ex, Iterator first, Iterator last, const ValueType& old_value, const ValueType& new_value) { Impl::replace_exespace_impl("Kokkos::replace_iterator_api", ex, first, last, old_value, new_value); } template < typename ExecutionSpace, typename Iterator, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace(const std::string& label, const ExecutionSpace& ex, Iterator first, Iterator last, const ValueType& old_value, const ValueType& new_value) { Impl::replace_exespace_impl(label, ex, first, last, old_value, new_value); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace(const ExecutionSpace& ex, const ::Kokkos::View& view, const ValueType& old_value, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; Impl::replace_exespace_impl("Kokkos::replace_view_api", ex, KE::begin(view), KE::end(view), old_value, new_value); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ValueType& old_value, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; Impl::replace_exespace_impl(label, ex, KE::begin(view), KE::end(view), old_value, new_value); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION void replace(const TeamHandleType& teamHandle, Iterator first, Iterator last, const ValueType& old_value, const ValueType& new_value) { Impl::replace_team_impl(teamHandle, first, last, old_value, new_value); } template , int> = 0> KOKKOS_FUNCTION void replace( const TeamHandleType& teamHandle, const ::Kokkos::View& view, const ValueType& old_value, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; Impl::replace_team_impl(teamHandle, KE::begin(view), KE::end(view), old_value, new_value); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_ReplaceCopy.hpp000066400000000000000000000133571461675637500253600ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_REPLACE_COPY_HPP #define KOKKOS_STD_ALGORITHMS_REPLACE_COPY_HPP #include "impl/Kokkos_ReplaceCopy.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename InputIterator, typename OutputIterator, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator replace_copy(const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const ValueType& old_value, const ValueType& new_value) { return Impl::replace_copy_exespace_impl("Kokkos::replace_copy_iterator_api", ex, first_from, last_from, first_dest, old_value, new_value); } template < typename ExecutionSpace, typename InputIterator, typename OutputIterator, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator replace_copy(const std::string& label, const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const ValueType& old_value, const ValueType& new_value) { return Impl::replace_copy_exespace_impl(label, ex, first_from, last_from, first_dest, old_value, new_value); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto replace_copy(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, const ValueType& old_value, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; return Impl::replace_copy_exespace_impl( "Kokkos::replace_copy_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), old_value, new_value); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto replace_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, const ValueType& old_value, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; return Impl::replace_copy_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), old_value, new_value); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION OutputIterator replace_copy(const TeamHandleType& teamHandle, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const ValueType& old_value, const ValueType& new_value) { return Impl::replace_copy_team_impl(teamHandle, first_from, last_from, first_dest, old_value, new_value); } template , int> = 0> KOKKOS_FUNCTION auto replace_copy( const TeamHandleType& teamHandle, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, const ValueType& old_value, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; return Impl::replace_copy_team_impl(teamHandle, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), old_value, new_value); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_ReplaceCopyIf.hpp000066400000000000000000000136561461675637500256410ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_REPLACE_COPY_IF_HPP #define KOKKOS_STD_ALGORITHMS_REPLACE_COPY_IF_HPP #include "impl/Kokkos_ReplaceCopyIf.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename InputIterator, typename OutputIterator, typename PredicateType, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator replace_copy_if(const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, PredicateType pred, const ValueType& new_value) { return Impl::replace_copy_if_exespace_impl( "Kokkos::replace_copy_if_iterator_api", ex, first_from, last_from, first_dest, pred, new_value); } template < typename ExecutionSpace, typename InputIterator, typename OutputIterator, typename PredicateType, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator replace_copy_if(const std::string& label, const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, PredicateType pred, const ValueType& new_value) { return Impl::replace_copy_if_exespace_impl(label, ex, first_from, last_from, first_dest, pred, new_value); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename PredicateType, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto replace_copy_if(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, PredicateType pred, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; return Impl::replace_copy_if_exespace_impl( "Kokkos::replace_copy_if_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), pred, new_value); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename PredicateType, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto replace_copy_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, PredicateType pred, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; return Impl::replace_copy_if_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), pred, new_value); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION OutputIterator replace_copy_if(const TeamHandleType& teamHandle, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, PredicateType pred, const ValueType& new_value) { return Impl::replace_copy_if_team_impl(teamHandle, first_from, last_from, first_dest, pred, new_value); } template , int> = 0> KOKKOS_FUNCTION auto replace_copy_if( const TeamHandleType& teamHandle, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, PredicateType pred, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; return Impl::replace_copy_if_team_impl(teamHandle, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), pred, new_value); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_ReplaceIf.hpp000066400000000000000000000105101461675637500247700ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_REPLACE_IF_HPP #define KOKKOS_STD_ALGORITHMS_REPLACE_IF_HPP #include "impl/Kokkos_ReplaceIf.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename InputIterator, typename Predicate, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace_if(const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate pred, const ValueType& new_value) { Impl::replace_if_exespace_impl("Kokkos::replace_if_iterator_api", ex, first, last, pred, new_value); } template < typename ExecutionSpace, typename InputIterator, typename Predicate, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace_if(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate pred, const ValueType& new_value) { Impl::replace_if_exespace_impl(label, ex, first, last, pred, new_value); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename Predicate, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace_if(const ExecutionSpace& ex, const ::Kokkos::View& view, Predicate pred, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; Impl::replace_if_exespace_impl("Kokkos::replace_if_view_api", ex, KE::begin(view), KE::end(view), pred, new_value); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename Predicate, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, Predicate pred, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; Impl::replace_if_exespace_impl(label, ex, KE::begin(view), KE::end(view), pred, new_value); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION void replace_if(const TeamHandleType& teamHandle, InputIterator first, InputIterator last, Predicate pred, const ValueType& new_value) { Impl::replace_if_team_impl(teamHandle, first, last, pred, new_value); } template , int> = 0> KOKKOS_FUNCTION void replace_if( const TeamHandleType& teamHandle, const ::Kokkos::View& view, Predicate pred, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; Impl::replace_if_team_impl(teamHandle, KE::begin(view), KE::end(view), pred, new_value); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_Reverse.hpp000066400000000000000000000067701461675637500245660ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_REVERSE_HPP #define KOKKOS_STD_ALGORITHMS_REVERSE_HPP #include "impl/Kokkos_Reverse.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename InputIterator, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void reverse(const ExecutionSpace& ex, InputIterator first, InputIterator last) { return Impl::reverse_exespace_impl("Kokkos::reverse_iterator_api_default", ex, first, last); } template < typename ExecutionSpace, typename InputIterator, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void reverse(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last) { return Impl::reverse_exespace_impl(label, ex, first, last); } template < typename ExecutionSpace, typename DataType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void reverse(const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; return Impl::reverse_exespace_impl("Kokkos::reverse_view_api_default", ex, KE::begin(view), KE::end(view)); } template < typename ExecutionSpace, typename DataType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void reverse(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; return Impl::reverse_exespace_impl(label, ex, KE::begin(view), KE::end(view)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION void reverse(const TeamHandleType& teamHandle, InputIterator first, InputIterator last) { return Impl::reverse_team_impl(teamHandle, first, last); } template , int> = 0> KOKKOS_FUNCTION void reverse( const TeamHandleType& teamHandle, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; return Impl::reverse_team_impl(teamHandle, KE::begin(view), KE::end(view)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_ReverseCopy.hpp000066400000000000000000000107321461675637500254120ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_REVERSE_COPY_HPP #define KOKKOS_STD_ALGORITHMS_REVERSE_COPY_HPP #include "impl/Kokkos_ReverseCopy.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename InputIterator, typename OutputIterator, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator reverse_copy(const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { return Impl::reverse_copy_exespace_impl( "Kokkos::reverse_copy_iterator_api_default", ex, first, last, d_first); } template < typename ExecutionSpace, typename InputIterator, typename OutputIterator, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator reverse_copy(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { return Impl::reverse_copy_exespace_impl(label, ex, first, last, d_first); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto reverse_copy(const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::reverse_copy_exespace_impl( "Kokkos::reverse_copy_view_api_default", ex, cbegin(source), cend(source), begin(dest)); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto reverse_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::reverse_copy_exespace_impl(label, ex, cbegin(source), cend(source), begin(dest)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION OutputIterator reverse_copy(const TeamHandleType& teamHandle, InputIterator first, InputIterator last, OutputIterator d_first) { return Impl::reverse_copy_team_impl(teamHandle, first, last, d_first); } template , int> = 0> KOKKOS_FUNCTION auto reverse_copy( const TeamHandleType& teamHandle, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::reverse_copy_team_impl(teamHandle, cbegin(source), cend(source), begin(dest)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_Rotate.hpp000066400000000000000000000075161461675637500244100ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_ROTATE_HPP #define KOKKOS_STD_ALGORITHMS_ROTATE_HPP #include "impl/Kokkos_Rotate.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename IteratorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType rotate(const ExecutionSpace& ex, IteratorType first, IteratorType n_first, IteratorType last) { return Impl::rotate_exespace_impl("Kokkos::rotate_iterator_api_default", ex, first, n_first, last); } template < typename ExecutionSpace, typename IteratorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType rotate(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType n_first, IteratorType last) { return Impl::rotate_exespace_impl(label, ex, first, n_first, last); } template < typename ExecutionSpace, typename DataType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto rotate(const ExecutionSpace& ex, const ::Kokkos::View& view, std::size_t n_location) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::rotate_exespace_impl("Kokkos::rotate_view_api_default", ex, begin(view), begin(view) + n_location, end(view)); } template < typename ExecutionSpace, typename DataType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto rotate(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, std::size_t n_location) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::rotate_exespace_impl(label, ex, begin(view), begin(view) + n_location, end(view)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION IteratorType rotate(const TeamHandleType& teamHandle, IteratorType first, IteratorType n_first, IteratorType last) { return Impl::rotate_team_impl(teamHandle, first, n_first, last); } template , int> = 0> KOKKOS_FUNCTION auto rotate(const TeamHandleType& teamHandle, const ::Kokkos::View& view, std::size_t n_location) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::rotate_team_impl(teamHandle, begin(view), begin(view) + n_location, end(view)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_RotateCopy.hpp000066400000000000000000000116451461675637500252410ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_ROTATE_COPY_HPP #define KOKKOS_STD_ALGORITHMS_ROTATE_COPY_HPP #include "impl/Kokkos_RotateCopy.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename InputIterator, typename OutputIterator, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator rotate_copy(const ExecutionSpace& ex, InputIterator first, InputIterator n_first, InputIterator last, OutputIterator d_first) { return Impl::rotate_copy_exespace_impl( "Kokkos::rotate_copy_iterator_api_default", ex, first, n_first, last, d_first); } template < typename ExecutionSpace, typename InputIterator, typename OutputIterator, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator rotate_copy(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator n_first, InputIterator last, OutputIterator d_first) { return Impl::rotate_copy_exespace_impl(label, ex, first, n_first, last, d_first); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto rotate_copy(const ExecutionSpace& ex, const ::Kokkos::View& source, std::size_t n_location, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::rotate_copy_exespace_impl( "Kokkos::rotate_copy_view_api_default", ex, cbegin(source), cbegin(source) + n_location, cend(source), begin(dest)); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto rotate_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, std::size_t n_location, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::rotate_copy_exespace_impl(label, ex, cbegin(source), cbegin(source) + n_location, cend(source), begin(dest)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION OutputIterator rotate_copy(const TeamHandleType& teamHandle, InputIterator first, InputIterator n_first, InputIterator last, OutputIterator d_first) { return Impl::rotate_copy_team_impl(teamHandle, first, n_first, last, d_first); } template , int> = 0> KOKKOS_FUNCTION auto rotate_copy( const TeamHandleType& teamHandle, const ::Kokkos::View& source, std::size_t n_location, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::rotate_copy_team_impl(teamHandle, cbegin(source), cbegin(source) + n_location, cend(source), begin(dest)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_Search.hpp000066400000000000000000000220431461675637500243470ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_SEARCH_HPP #define KOKKOS_STD_ALGORITHMS_SEARCH_HPP #include "impl/Kokkos_Search.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // // overload set 1: no binary predicate passed template < typename ExecutionSpace, typename IteratorType1, typename IteratorType2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 search(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { return Impl::search_exespace_impl("Kokkos::search_iterator_api_default", ex, first, last, s_first, s_last); } template < typename ExecutionSpace, typename IteratorType1, typename IteratorType2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 search(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { return Impl::search_exespace_impl(label, ex, first, last, s_first, s_last); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto search(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; return Impl::search_exespace_impl("Kokkos::search_view_api_default", ex, KE::begin(view), KE::end(view), KE::begin(s_view), KE::end(s_view)); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto search(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; return Impl::search_exespace_impl(label, ex, KE::begin(view), KE::end(view), KE::begin(s_view), KE::end(s_view)); } // overload set 2: binary predicate passed template < typename ExecutionSpace, typename IteratorType1, typename IteratorType2, typename BinaryPredicateType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 search(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { return Impl::search_exespace_impl("Kokkos::search_iterator_api_default", ex, first, last, s_first, s_last, pred); } template < typename ExecutionSpace, typename IteratorType1, typename IteratorType2, typename BinaryPredicateType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 search(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { return Impl::search_exespace_impl(label, ex, first, last, s_first, s_last, pred); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename BinaryPredicateType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto search(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, const BinaryPredicateType& pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; return Impl::search_exespace_impl("Kokkos::search_view_api_default", ex, KE::begin(view), KE::end(view), KE::begin(s_view), KE::end(s_view), pred); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename BinaryPredicateType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto search(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, const BinaryPredicateType& pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; return Impl::search_exespace_impl(label, ex, KE::begin(view), KE::end(view), KE::begin(s_view), KE::end(s_view), pred); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // // overload set 1: no binary predicate passed template , int> = 0> KOKKOS_FUNCTION IteratorType1 search(const TeamHandleType& teamHandle, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { return Impl::search_team_impl(teamHandle, first, last, s_first, s_last); } template , int> = 0> KOKKOS_FUNCTION auto search( const TeamHandleType& teamHandle, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; return Impl::search_team_impl(teamHandle, KE::begin(view), KE::end(view), KE::begin(s_view), KE::end(s_view)); } // overload set 2: binary predicate passed template , int> = 0> KOKKOS_FUNCTION IteratorType1 search(const TeamHandleType& teamHandle, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { return Impl::search_team_impl(teamHandle, first, last, s_first, s_last, pred); } template , int> = 0> KOKKOS_FUNCTION auto search( const TeamHandleType& teamHandle, const ::Kokkos::View& view, const ::Kokkos::View& s_view, const BinaryPredicateType& pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; return Impl::search_team_impl(teamHandle, KE::begin(view), KE::end(view), KE::begin(s_view), KE::end(s_view), pred); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_SearchN.hpp000066400000000000000000000205251461675637500244700ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_SEARCH_N_HPP #define KOKKOS_STD_ALGORITHMS_SEARCH_N_HPP #include "impl/Kokkos_SearchN.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // // overload set 1: no binary predicate passed template < class ExecutionSpace, class IteratorType, class SizeType, class ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType search_n(const ExecutionSpace& ex, IteratorType first, IteratorType last, SizeType count, const ValueType& value) { return Impl::search_n_exespace_impl("Kokkos::search_n_iterator_api_default", ex, first, last, count, value); } template < class ExecutionSpace, class IteratorType, class SizeType, class ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType search_n(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, SizeType count, const ValueType& value) { return Impl::search_n_exespace_impl(label, ex, first, last, count, value); } template ::value, int> = 0> auto search_n(const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType count, const ValueType& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; return Impl::search_n_exespace_impl("Kokkos::search_n_view_api_default", ex, KE::begin(view), KE::end(view), count, value); } template ::value, int> = 0> auto search_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType count, const ValueType& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; return Impl::search_n_exespace_impl(label, ex, KE::begin(view), KE::end(view), count, value); } // overload set 2: binary predicate passed template < class ExecutionSpace, class IteratorType, class SizeType, class ValueType, class BinaryPredicateType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType search_n(const ExecutionSpace& ex, IteratorType first, IteratorType last, SizeType count, const ValueType& value, const BinaryPredicateType& pred) { return Impl::search_n_exespace_impl("Kokkos::search_n_iterator_api_default", ex, first, last, count, value, pred); } template < class ExecutionSpace, class IteratorType, class SizeType, class ValueType, class BinaryPredicateType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType search_n(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, SizeType count, const ValueType& value, const BinaryPredicateType& pred) { return Impl::search_n_exespace_impl(label, ex, first, last, count, value, pred); } template ::value, int> = 0> auto search_n(const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType count, const ValueType& value, const BinaryPredicateType& pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; return Impl::search_n_exespace_impl("Kokkos::search_n_view_api_default", ex, KE::begin(view), KE::end(view), count, value, pred); } template ::value, int> = 0> auto search_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType count, const ValueType& value, const BinaryPredicateType& pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; return Impl::search_n_exespace_impl(label, ex, KE::begin(view), KE::end(view), count, value, pred); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // // overload set 1: no binary predicate passed template , int> = 0> KOKKOS_FUNCTION IteratorType search_n(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, SizeType count, const ValueType& value) { return Impl::search_n_team_impl(teamHandle, first, last, count, value); } template < class TeamHandleType, class DataType, class... Properties, class SizeType, class ValueType, std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> KOKKOS_FUNCTION auto search_n( const TeamHandleType& teamHandle, const ::Kokkos::View& view, SizeType count, const ValueType& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; return Impl::search_n_team_impl(teamHandle, KE::begin(view), KE::end(view), count, value); } // overload set 2: binary predicate passed template , int> = 0> KOKKOS_FUNCTION IteratorType search_n(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, SizeType count, const ValueType& value, const BinaryPredicateType& pred) { return Impl::search_n_team_impl(teamHandle, first, last, count, value, pred); } template < class TeamHandleType, class DataType, class... Properties, class SizeType, class ValueType, class BinaryPredicateType, std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> KOKKOS_FUNCTION auto search_n( const TeamHandleType& teamHandle, const ::Kokkos::View& view, SizeType count, const ValueType& value, const BinaryPredicateType& pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; return Impl::search_n_team_impl(teamHandle, KE::begin(view), KE::end(view), count, value, pred); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_ShiftLeft.hpp000066400000000000000000000074221461675637500250360ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_SHIFT_LEFT_HPP #define KOKKOS_STD_ALGORITHMS_SHIFT_LEFT_HPP #include "impl/Kokkos_ShiftLeft.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename IteratorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType shift_left(const ExecutionSpace& ex, IteratorType first, IteratorType last, typename IteratorType::difference_type n) { return Impl::shift_left_exespace_impl( "Kokkos::shift_left_iterator_api_default", ex, first, last, n); } template < typename ExecutionSpace, typename IteratorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType shift_left(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, typename IteratorType::difference_type n) { return Impl::shift_left_exespace_impl(label, ex, first, last, n); } template < typename ExecutionSpace, typename DataType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto shift_left(const ExecutionSpace& ex, const ::Kokkos::View& view, typename decltype(begin(view))::difference_type n) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::shift_left_exespace_impl("Kokkos::shift_left_view_api_default", ex, begin(view), end(view), n); } template < typename ExecutionSpace, typename DataType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto shift_left(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, typename decltype(begin(view))::difference_type n) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::shift_left_exespace_impl(label, ex, begin(view), end(view), n); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION IteratorType shift_left(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, typename IteratorType::difference_type n) { return Impl::shift_left_team_impl(teamHandle, first, last, n); } template , int> = 0> KOKKOS_FUNCTION auto shift_left( const TeamHandleType& teamHandle, const ::Kokkos::View& view, typename decltype(begin(view))::difference_type n) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::shift_left_team_impl(teamHandle, begin(view), end(view), n); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_ShiftRight.hpp000066400000000000000000000074551461675637500252270ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_SHIFT_RIGHT_HPP #define KOKKOS_STD_ALGORITHMS_SHIFT_RIGHT_HPP #include "impl/Kokkos_ShiftRight.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename IteratorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType shift_right(const ExecutionSpace& ex, IteratorType first, IteratorType last, typename IteratorType::difference_type n) { return Impl::shift_right_exespace_impl( "Kokkos::shift_right_iterator_api_default", ex, first, last, n); } template < typename ExecutionSpace, typename IteratorType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType shift_right(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, typename IteratorType::difference_type n) { return Impl::shift_right_exespace_impl(label, ex, first, last, n); } template < typename ExecutionSpace, typename DataType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto shift_right(const ExecutionSpace& ex, const ::Kokkos::View& view, typename decltype(begin(view))::difference_type n) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::shift_right_exespace_impl("Kokkos::shift_right_view_api_default", ex, begin(view), end(view), n); } template < typename ExecutionSpace, typename DataType, typename... Properties, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto shift_right(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, typename decltype(begin(view))::difference_type n) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::shift_right_exespace_impl(label, ex, begin(view), end(view), n); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION IteratorType shift_right(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, typename IteratorType::difference_type n) { return Impl::shift_right_team_impl(teamHandle, first, last, n); } template , int> = 0> KOKKOS_FUNCTION auto shift_right( const TeamHandleType& teamHandle, const ::Kokkos::View& view, typename decltype(begin(view))::difference_type n) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::shift_right_team_impl(teamHandle, begin(view), end(view), n); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_SwapRanges.hpp000066400000000000000000000111451461675637500252150ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_SWAP_RANGES_HPP #define KOKKOS_STD_ALGORITHMS_SWAP_RANGES_HPP #include "impl/Kokkos_SwapRanges.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template , int> = 0> IteratorType2 swap_ranges(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2) { return Impl::swap_ranges_exespace_impl( "Kokkos::swap_ranges_iterator_api_default", ex, first1, last1, first2); } template , int> = 0> auto swap_ranges(const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); assert(source.extent(0) == dest.extent(0)); return Impl::swap_ranges_exespace_impl("Kokkos::swap_ranges_view_api_default", ex, begin(source), end(source), begin(dest)); } template , int> = 0> IteratorType2 swap_ranges(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2) { return Impl::swap_ranges_exespace_impl(label, ex, first1, last1, first2); } template , int> = 0> auto swap_ranges(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); assert(source.extent(0) == dest.extent(0)); return Impl::swap_ranges_exespace_impl(label, ex, begin(source), end(source), begin(dest)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION IteratorType2 swap_ranges(const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2) { return Impl::swap_ranges_team_impl(teamHandle, first1, last1, first2); } template , int> = 0> KOKKOS_FUNCTION auto swap_ranges( const TeamHandleType& teamHandle, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); assert(source.extent(0) == dest.extent(0)); return Impl::swap_ranges_team_impl(teamHandle, begin(source), end(source), begin(dest)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_Transform.hpp000066400000000000000000000251351461675637500251220ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_TRANSFORM_HPP #define KOKKOS_STD_ALGORITHMS_TRANSFORM_HPP #include "impl/Kokkos_Transform.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template < typename ExecutionSpace, typename InputIterator, typename OutputIterator, typename UnaryOperation, std::enable_if_t && is_execution_space_v, int> = 0> OutputIterator transform(const ExecutionSpace& ex, InputIterator first1, InputIterator last1, OutputIterator d_first, UnaryOperation unary_op) { return Impl::transform_exespace_impl("Kokkos::transform_iterator_api_default", ex, first1, last1, d_first, std::move(unary_op)); } template < typename ExecutionSpace, typename InputIterator, typename OutputIterator, typename UnaryOperation, std::enable_if_t && is_execution_space_v, int> = 0> OutputIterator transform(const std::string& label, const ExecutionSpace& ex, InputIterator first1, InputIterator last1, OutputIterator d_first, UnaryOperation unary_op) { return Impl::transform_exespace_impl(label, ex, first1, last1, d_first, std::move(unary_op)); } template , int> = 0> auto transform(const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest, UnaryOperation unary_op) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::transform_exespace_impl("Kokkos::transform_view_api_default", ex, begin(source), end(source), begin(dest), std::move(unary_op)); } template , int> = 0> auto transform(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest, UnaryOperation unary_op) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::transform_exespace_impl(label, ex, begin(source), end(source), begin(dest), std::move(unary_op)); } template < typename ExecutionSpace, typename InputIterator1, typename InputIterator2, typename OutputIterator, typename BinaryOperation, std::enable_if_t< Impl::are_iterators_v && is_execution_space_v, int> = 0> OutputIterator transform(const ExecutionSpace& ex, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator d_first, BinaryOperation binary_op) { return Impl::transform_exespace_impl("Kokkos::transform_iterator_api_default", ex, first1, last1, first2, d_first, std::move(binary_op)); } template < typename ExecutionSpace, typename InputIterator1, typename InputIterator2, typename OutputIterator, typename BinaryOperation, std::enable_if_t< Impl::are_iterators_v && is_execution_space_v, int> = 0> OutputIterator transform(const std::string& label, const ExecutionSpace& ex, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator d_first, BinaryOperation binary_op) { return Impl::transform_exespace_impl(label, ex, first1, last1, first2, d_first, std::move(binary_op)); } template , int> = 0> auto transform(const ExecutionSpace& ex, const ::Kokkos::View& source1, const ::Kokkos::View& source2, const ::Kokkos::View& dest, BinaryOperation binary_op) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::transform_exespace_impl( "Kokkos::transform_view_api_default", ex, begin(source1), end(source1), begin(source2), begin(dest), std::move(binary_op)); } template , int> = 0> auto transform(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source1, const ::Kokkos::View& source2, const ::Kokkos::View& dest, BinaryOperation binary_op) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::transform_exespace_impl(label, ex, begin(source1), end(source1), begin(source2), begin(dest), std::move(binary_op)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template < typename TeamHandleType, typename InputIterator, typename OutputIterator, typename UnaryOperation, std::enable_if_t && is_team_handle_v, int> = 0> KOKKOS_FUNCTION OutputIterator transform(const TeamHandleType& teamHandle, InputIterator first1, InputIterator last1, OutputIterator d_first, UnaryOperation unary_op) { return Impl::transform_team_impl(teamHandle, first1, last1, d_first, std::move(unary_op)); } template , int> = 0> KOKKOS_FUNCTION auto transform( const TeamHandleType& teamHandle, const ::Kokkos::View& source, const ::Kokkos::View& dest, UnaryOperation unary_op) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::transform_team_impl(teamHandle, begin(source), end(source), begin(dest), std::move(unary_op)); } template < typename TeamHandleType, typename InputIterator1, typename InputIterator2, typename OutputIterator, typename BinaryOperation, std::enable_if_t< Impl::are_iterators_v && is_team_handle_v, int> = 0> KOKKOS_FUNCTION OutputIterator transform(const TeamHandleType& teamHandle, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator d_first, BinaryOperation binary_op) { return Impl::transform_team_impl(teamHandle, first1, last1, first2, d_first, std::move(binary_op)); } template , int> = 0> KOKKOS_FUNCTION auto transform( const TeamHandleType& teamHandle, const ::Kokkos::View& source1, const ::Kokkos::View& source2, const ::Kokkos::View& dest, BinaryOperation binary_op) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source1); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::transform_team_impl(teamHandle, begin(source1), end(source1), begin(source2), begin(dest), std::move(binary_op)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_TransformExclusiveScan.hpp000066400000000000000000000162541461675637500276210ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_TRASFORM_EXCLUSIVE_SCAN_HPP #define KOKKOS_STD_ALGORITHMS_TRASFORM_EXCLUSIVE_SCAN_HPP #include "impl/Kokkos_TransformExclusiveScan.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // template && :: Kokkos::is_execution_space_v, int> = 0> OutputIteratorType transform_exclusive_scan( const ExecutionSpace& ex, InputIteratorType first, InputIteratorType last, OutputIteratorType first_dest, ValueType init_value, BinaryOpType binary_op, UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(ex); static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); return Impl::transform_exclusive_scan_exespace_impl( "Kokkos::transform_exclusive_scan_custom_functors_iterator_api", ex, first, last, first_dest, std::move(init_value), binary_op, unary_op); } template && :: Kokkos::is_execution_space_v, int> = 0> OutputIteratorType transform_exclusive_scan( const std::string& label, const ExecutionSpace& ex, InputIteratorType first, InputIteratorType last, OutputIteratorType first_dest, ValueType init_value, BinaryOpType binary_op, UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(ex); static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); return Impl::transform_exclusive_scan_exespace_impl( label, ex, first, last, first_dest, std::move(init_value), binary_op, unary_op); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename ValueType, typename BinaryOpType, typename UnaryOpType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_exclusive_scan( const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, ValueType init_value, BinaryOpType binary_op, UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; return Impl::transform_exclusive_scan_exespace_impl( "Kokkos::transform_exclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), std::move(init_value), binary_op, unary_op); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename ValueType, typename BinaryOpType, typename UnaryOpType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_exclusive_scan( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, ValueType init_value, BinaryOpType binary_op, UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; return Impl::transform_exclusive_scan_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), std::move(init_value), binary_op, unary_op); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template && :: Kokkos::is_team_handle_v, int> = 0> KOKKOS_FUNCTION OutputIteratorType transform_exclusive_scan( const TeamHandleType& teamHandle, InputIteratorType first, InputIteratorType last, OutputIteratorType first_dest, ValueType init_value, BinaryOpType binary_op, UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(teamHandle); static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); return Impl::transform_exclusive_scan_team_impl( teamHandle, first, last, first_dest, std::move(init_value), binary_op, unary_op); } template , int> = 0> KOKKOS_FUNCTION auto transform_exclusive_scan( const TeamHandleType& teamHandle, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, ValueType init_value, BinaryOpType binary_op, UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(teamHandle); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; return Impl::transform_exclusive_scan_team_impl( teamHandle, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), std::move(init_value), binary_op, unary_op); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_TransformInclusiveScan.hpp000066400000000000000000000310541461675637500276060ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_TRANSFORM_INCLUSIVE_SCAN_HPP #define KOKKOS_STD_ALGORITHMS_TRANSFORM_INCLUSIVE_SCAN_HPP #include "impl/Kokkos_TransformInclusiveScan.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // // overload set 1 (no init value) template && :: Kokkos::is_execution_space_v, int> = 0> OutputIteratorType transform_inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, InputIteratorType last, OutputIteratorType first_dest, BinaryOpType binary_op, UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(ex); return Impl::transform_inclusive_scan_exespace_impl( "Kokkos::transform_inclusive_scan_custom_functors_iterator_api", ex, first, last, first_dest, binary_op, unary_op); } template && :: Kokkos::is_execution_space_v, int> = 0> OutputIteratorType transform_inclusive_scan( const std::string& label, const ExecutionSpace& ex, InputIteratorType first, InputIteratorType last, OutputIteratorType first_dest, BinaryOpType binary_op, UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(ex); return Impl::transform_inclusive_scan_exespace_impl( label, ex, first, last, first_dest, binary_op, unary_op); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename BinaryOpType, typename UnaryOpType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_inclusive_scan( const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, BinaryOpType binary_op, UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; return Impl::transform_inclusive_scan_exespace_impl( "Kokkos::transform_inclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op, unary_op); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename BinaryOpType, typename UnaryOpType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_inclusive_scan( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, BinaryOpType binary_op, UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; return Impl::transform_inclusive_scan_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op, unary_op); } // overload set 2 (init value) template && :: Kokkos::is_execution_space_v, int> = 0> OutputIteratorType transform_inclusive_scan( const ExecutionSpace& ex, InputIteratorType first, InputIteratorType last, OutputIteratorType first_dest, BinaryOpType binary_op, UnaryOpType unary_op, ValueType init_value) { Impl::static_assert_is_not_openmptarget(ex); static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); return Impl::transform_inclusive_scan_exespace_impl( "Kokkos::transform_inclusive_scan_custom_functors_iterator_api", ex, first, last, first_dest, binary_op, unary_op, std::move(init_value)); } template && :: Kokkos::is_execution_space_v, int> = 0> OutputIteratorType transform_inclusive_scan( const std::string& label, const ExecutionSpace& ex, InputIteratorType first, InputIteratorType last, OutputIteratorType first_dest, BinaryOpType binary_op, UnaryOpType unary_op, ValueType init_value) { Impl::static_assert_is_not_openmptarget(ex); static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); return Impl::transform_inclusive_scan_exespace_impl( label, ex, first, last, first_dest, binary_op, unary_op, std::move(init_value)); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename BinaryOpType, typename UnaryOpType, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_inclusive_scan( const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, BinaryOpType binary_op, UnaryOpType unary_op, ValueType init_value) { Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; return Impl::transform_inclusive_scan_exespace_impl( "Kokkos::transform_inclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op, unary_op, std::move(init_value)); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename BinaryOpType, typename UnaryOpType, typename ValueType, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_inclusive_scan( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, BinaryOpType binary_op, UnaryOpType unary_op, ValueType init_value) { Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; return Impl::transform_inclusive_scan_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op, unary_op, std::move(init_value)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // // overload set 1 (no init value) template && Kokkos::is_team_handle_v, int> = 0> KOKKOS_FUNCTION OutputIteratorType transform_inclusive_scan( const TeamHandleType& teamHandle, InputIteratorType first, InputIteratorType last, OutputIteratorType first_dest, BinaryOpType binary_op, UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(teamHandle); return Impl::transform_inclusive_scan_team_impl( teamHandle, first, last, first_dest, binary_op, unary_op); } template , int> = 0> KOKKOS_FUNCTION auto transform_inclusive_scan( const TeamHandleType& teamHandle, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, BinaryOpType binary_op, UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(teamHandle); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; return Impl::transform_inclusive_scan_team_impl( teamHandle, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op, unary_op); } // overload set 2 (init value) template && Kokkos::is_team_handle_v, int> = 0> KOKKOS_FUNCTION OutputIteratorType transform_inclusive_scan( const TeamHandleType& teamHandle, InputIteratorType first, InputIteratorType last, OutputIteratorType first_dest, BinaryOpType binary_op, UnaryOpType unary_op, ValueType init_value) { Impl::static_assert_is_not_openmptarget(teamHandle); static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); return Impl::transform_inclusive_scan_team_impl( teamHandle, first, last, first_dest, binary_op, unary_op, std::move(init_value)); } template , int> = 0> KOKKOS_FUNCTION auto transform_inclusive_scan( const TeamHandleType& teamHandle, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, BinaryOpType binary_op, UnaryOpType unary_op, ValueType init_value) { Impl::static_assert_is_not_openmptarget(teamHandle); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; return Impl::transform_inclusive_scan_team_impl( teamHandle, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op, unary_op, std::move(init_value)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_TransformReduce.hpp000066400000000000000000000457751461675637500262660ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_TRANSFORM_REDUCE_HPP #define KOKKOS_STD_ALGORITHMS_TRANSFORM_REDUCE_HPP #include "impl/Kokkos_TransformReduce.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set accepting execution space // // ---------------------------- // overload set1: // no custom functors passed, so equivalent to // transform_reduce(first1, last1, first2, init, plus<>(), multiplies<>()); // ---------------------------- template ::value, int> = 0> ValueType transform_reduce(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value) { return Impl::transform_reduce_default_functors_exespace_impl( "Kokkos::transform_reduce_default_functors_iterator_api", ex, first1, last1, first2, std::move(init_reduction_value)); } template ::value, int> = 0> ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value) { return Impl::transform_reduce_default_functors_exespace_impl( label, ex, first1, last1, first2, std::move(init_reduction_value)); } // overload1 accepting views template ::value, int> = 0> ValueType transform_reduce( const ExecutionSpace& ex, const ::Kokkos::View& first_view, const ::Kokkos::View& second_view, ValueType init_reduction_value) { namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); return Impl::transform_reduce_default_functors_exespace_impl( "Kokkos::transform_reduce_default_functors_iterator_api", ex, KE::cbegin(first_view), KE::cend(first_view), KE::cbegin(second_view), std::move(init_reduction_value)); } template ::value, int> = 0> ValueType transform_reduce( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& first_view, const ::Kokkos::View& second_view, ValueType init_reduction_value) { namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); return Impl::transform_reduce_default_functors_exespace_impl( label, ex, KE::cbegin(first_view), KE::cend(first_view), KE::cbegin(second_view), std::move(init_reduction_value)); } // // overload set2: // accepts a custom transform and joiner functor // // Note the std refers to the arg BinaryReductionOp // but in the Kokkos naming convention, it corresponds // to a "joiner" that knows how to join two values // NOTE: "joiner/transformer" need to be commutative. // https://en.cppreference.com/w/cpp/algorithm/transform_reduce // api accepting iterators template < typename ExecutionSpace, typename IteratorType1, typename IteratorType2, typename ValueType, typename BinaryJoinerType, typename BinaryTransform, std::enable_if_t<::Kokkos::is_execution_space::value, int> = 0> ValueType transform_reduce(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value, BinaryJoinerType joiner, BinaryTransform transformer) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); return Impl::transform_reduce_custom_functors_exespace_impl( "Kokkos::transform_reduce_custom_functors_iterator_api", ex, first1, last1, first2, std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } template < typename ExecutionSpace, typename IteratorType1, typename IteratorType2, typename ValueType, typename BinaryJoinerType, typename BinaryTransform, std::enable_if_t<::Kokkos::is_execution_space::value, int> = 0> ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value, BinaryJoinerType joiner, BinaryTransform transformer) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); return Impl::transform_reduce_custom_functors_exespace_impl( label, ex, first1, last1, first2, std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } // accepting views template ::value, int> = 0> ValueType transform_reduce( const ExecutionSpace& ex, const ::Kokkos::View& first_view, const ::Kokkos::View& second_view, ValueType init_reduction_value, BinaryJoinerType joiner, BinaryTransform transformer) { namespace KE = ::Kokkos::Experimental; static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); return Impl::transform_reduce_custom_functors_exespace_impl( "Kokkos::transform_reduce_custom_functors_view_api", ex, KE::cbegin(first_view), KE::cend(first_view), KE::cbegin(second_view), std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } template ::value, int> = 0> ValueType transform_reduce( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& first_view, const ::Kokkos::View& second_view, ValueType init_reduction_value, BinaryJoinerType joiner, BinaryTransform transformer) { namespace KE = ::Kokkos::Experimental; static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); return Impl::transform_reduce_custom_functors_exespace_impl( label, ex, KE::cbegin(first_view), KE::cend(first_view), KE::cbegin(second_view), std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } // // overload set3: // // accepting iterators template ::value && is_execution_space::value, int> = 0> ValueType transform_reduce(const ExecutionSpace& ex, IteratorType first1, IteratorType last1, ValueType init_reduction_value, BinaryJoinerType joiner, UnaryTransform transformer) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); return Impl::transform_reduce_custom_functors_exespace_impl( "Kokkos::transform_reduce_custom_functors_iterator_api", ex, first1, last1, std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } template ::value && is_execution_space::value, int> = 0> ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, IteratorType first1, IteratorType last1, ValueType init_reduction_value, BinaryJoinerType joiner, UnaryTransform transformer) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); return Impl::transform_reduce_custom_functors_exespace_impl( label, ex, first1, last1, std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } // accepting views template < typename ExecutionSpace, typename DataType, typename... Properties, typename ValueType, typename BinaryJoinerType, typename UnaryTransform, std::enable_if_t<::Kokkos::is_execution_space::value, int> = 0> ValueType transform_reduce(const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value, BinaryJoinerType joiner, UnaryTransform transformer) { namespace KE = ::Kokkos::Experimental; static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::transform_reduce_custom_functors_exespace_impl( "Kokkos::transform_reduce_custom_functors_view_api", ex, KE::cbegin(view), KE::cend(view), std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } template < typename ExecutionSpace, typename DataType, typename... Properties, typename ValueType, typename BinaryJoinerType, typename UnaryTransform, std::enable_if_t<::Kokkos::is_execution_space::value, int> = 0> ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value, BinaryJoinerType joiner, UnaryTransform transformer) { namespace KE = ::Kokkos::Experimental; static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::transform_reduce_custom_functors_exespace_impl( label, ex, KE::cbegin(view), KE::cend(view), std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } // // overload set accepting a team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // // ---------------------------- // overload set1: // no custom functors passed, so equivalent to // transform_reduce(first1, last1, first2, init, plus<>(), multiplies<>()); // ---------------------------- template < typename TeamHandleType, typename IteratorType1, typename IteratorType2, typename ValueType, std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> KOKKOS_FUNCTION ValueType transform_reduce(const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value) { return Impl::transform_reduce_default_functors_team_impl( teamHandle, first1, last1, first2, std::move(init_reduction_value)); } // overload1 accepting views template < typename TeamHandleType, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename ValueType, std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> KOKKOS_FUNCTION ValueType transform_reduce(const TeamHandleType& teamHandle, const ::Kokkos::View& first_view, const ::Kokkos::View& second_view, ValueType init_reduction_value) { namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); return Impl::transform_reduce_default_functors_team_impl( teamHandle, KE::cbegin(first_view), KE::cend(first_view), KE::cbegin(second_view), std::move(init_reduction_value)); } // // overload set2: // accepts a custom transform and joiner functor // // Note the std refers to the arg BinaryReductionOp // but in the Kokkos naming convention, it corresponds // to a "joiner" that knows how to join two values // NOTE: "joiner/transformer" need to be commutative. // https://en.cppreference.com/w/cpp/algorithm/transform_reduce // api accepting iterators template < typename TeamHandleType, typename IteratorType1, typename IteratorType2, typename ValueType, typename BinaryJoinerType, typename BinaryTransform, std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> KOKKOS_FUNCTION ValueType transform_reduce( const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value, BinaryJoinerType joiner, BinaryTransform transformer) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); return Impl::transform_reduce_custom_functors_team_impl( teamHandle, first1, last1, first2, std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } // accepting views template < typename TeamHandleType, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename ValueType, typename BinaryJoinerType, typename BinaryTransform, std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> KOKKOS_FUNCTION ValueType transform_reduce(const TeamHandleType& teamHandle, const ::Kokkos::View& first_view, const ::Kokkos::View& second_view, ValueType init_reduction_value, BinaryJoinerType joiner, BinaryTransform transformer) { namespace KE = ::Kokkos::Experimental; static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); return Impl::transform_reduce_custom_functors_team_impl( teamHandle, KE::cbegin(first_view), KE::cend(first_view), KE::cbegin(second_view), std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } // // overload set3: // // accepting iterators template ::value && is_team_handle::value, int> = 0> KOKKOS_FUNCTION ValueType transform_reduce(const TeamHandleType& teamHandle, IteratorType first1, IteratorType last1, ValueType init_reduction_value, BinaryJoinerType joiner, UnaryTransform transformer) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); return Impl::transform_reduce_custom_functors_team_impl( teamHandle, first1, last1, std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } // accepting views template < typename TeamHandleType, typename DataType, typename... Properties, typename ValueType, typename BinaryJoinerType, typename UnaryTransform, std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> KOKKOS_FUNCTION ValueType transform_reduce(const TeamHandleType& teamHandle, const ::Kokkos::View& view, ValueType init_reduction_value, BinaryJoinerType joiner, UnaryTransform transformer) { namespace KE = ::Kokkos::Experimental; static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::transform_reduce_custom_functors_team_impl( teamHandle, KE::cbegin(view), KE::cend(view), std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_Unique.hpp000066400000000000000000000153171461675637500244160ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_UNIQUE_HPP #define KOKKOS_STD_ALGORITHMS_UNIQUE_HPP #include "impl/Kokkos_Unique.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set1: default predicate, accepting execution space // template && is_execution_space::value, int> = 0> IteratorType unique(const ExecutionSpace& ex, IteratorType first, IteratorType last) { return Impl::unique_exespace_impl("Kokkos::unique_iterator_api_default", ex, first, last); } template && is_execution_space::value, int> = 0> IteratorType unique(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { return Impl::unique_exespace_impl(label, ex, first, last); } template ::value, int> = 0> auto unique(const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::unique_exespace_impl("Kokkos::unique_view_api_default", ex, begin(view), end(view)); } template ::value, int> = 0> auto unique(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::unique_exespace_impl(label, ex, begin(view), end(view)); } // // overload set2: custom predicate, accepting execution space // template ::value, int> = 0> IteratorType unique(const ExecutionSpace& ex, IteratorType first, IteratorType last, BinaryPredicate pred) { return Impl::unique_exespace_impl("Kokkos::unique_iterator_api_default", ex, first, last, pred); } template ::value, int> = 0> IteratorType unique(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, BinaryPredicate pred) { return Impl::unique_exespace_impl(label, ex, first, last, pred); } template ::value, int> = 0> auto unique(const ExecutionSpace& ex, const ::Kokkos::View& view, BinaryPredicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::unique_exespace_impl("Kokkos::unique_view_api_default", ex, begin(view), end(view), std::move(pred)); } template ::value, int> = 0> auto unique(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, BinaryPredicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); return Impl::unique_exespace_impl(label, ex, begin(view), end(view), std::move(pred)); } // // overload set3: default predicate, accepting team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template && is_team_handle::value, int> = 0> KOKKOS_FUNCTION IteratorType unique(const TeamHandleType& teamHandle, IteratorType first, IteratorType last) { return Impl::unique_team_impl(teamHandle, first, last); } template ::value, int> = 0> KOKKOS_FUNCTION auto unique( const TeamHandleType& teamHandle, const ::Kokkos::View& view) { return Impl::unique_team_impl(teamHandle, begin(view), end(view)); } // // overload set4: custom predicate, accepting team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template ::value, int> = 0> KOKKOS_FUNCTION IteratorType unique(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, BinaryPredicate pred) { return Impl::unique_team_impl(teamHandle, first, last, std::move(pred)); } template ::value, int> = 0> KOKKOS_FUNCTION auto unique(const TeamHandleType& teamHandle, const ::Kokkos::View& view, BinaryPredicate pred) { return Impl::unique_team_impl(teamHandle, begin(view), end(view), std::move(pred)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/Kokkos_UniqueCopy.hpp000066400000000000000000000217341461675637500252510ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_UNIQUE_COPY_HPP #define KOKKOS_STD_ALGORITHMS_UNIQUE_COPY_HPP #include "impl/Kokkos_UniqueCopy.hpp" #include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { // // overload set1: default predicate, accepting execution space // template < typename ExecutionSpace, typename InputIterator, typename OutputIterator, std::enable_if_t && is_execution_space_v, int> = 0> OutputIterator unique_copy(const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { return Impl::unique_copy_exespace_impl( "Kokkos::unique_copy_iterator_api_default", ex, first, last, d_first); } template < typename ExecutionSpace, typename InputIterator, typename OutputIterator, std::enable_if_t && is_execution_space_v, int> = 0> OutputIterator unique_copy(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { return Impl::unique_copy_exespace_impl(label, ex, first, last, d_first); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto unique_copy(const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::unique_copy_exespace_impl("Kokkos::unique_copy_view_api_default", ex, cbegin(source), cend(source), begin(dest)); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto unique_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::unique_copy_exespace_impl(label, ex, cbegin(source), cend(source), begin(dest)); } // // overload set2: custom predicate, accepting execution space // template < typename ExecutionSpace, typename InputIterator, typename OutputIterator, typename BinaryPredicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator unique_copy(const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first, BinaryPredicate pred) { return Impl::unique_copy_exespace_impl( "Kokkos::unique_copy_iterator_api_default", ex, first, last, d_first, pred); } template < typename ExecutionSpace, typename InputIterator, typename OutputIterator, typename BinaryPredicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator unique_copy(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first, BinaryPredicate pred) { return Impl::unique_copy_exespace_impl(label, ex, first, last, d_first, pred); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename BinaryPredicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto unique_copy(const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest, BinaryPredicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::unique_copy_exespace_impl("Kokkos::unique_copy_view_api_default", ex, cbegin(source), cend(source), begin(dest), std::move(pred)); } template < typename ExecutionSpace, typename DataType1, typename... Properties1, typename DataType2, typename... Properties2, typename BinaryPredicate, std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto unique_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest, BinaryPredicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::unique_copy_exespace_impl( label, ex, cbegin(source), cend(source), begin(dest), std::move(pred)); } // // overload set3: default predicate, accepting team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template < typename TeamHandleType, typename InputIterator, typename OutputIterator, std::enable_if_t && Kokkos::is_team_handle_v, int> = 0> KOKKOS_FUNCTION OutputIterator unique_copy(const TeamHandleType& teamHandle, InputIterator first, InputIterator last, OutputIterator d_first) { return Impl::unique_copy_team_impl(teamHandle, first, last, d_first); } template , int> = 0> KOKKOS_FUNCTION auto unique_copy( const TeamHandleType& teamHandle, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::unique_copy_team_impl(teamHandle, cbegin(source), cend(source), begin(dest)); } // // overload set4: custom predicate, accepting team handle // Note: for now omit the overloads accepting a label // since they cause issues on device because of the string allocation. // template , int> = 0> KOKKOS_FUNCTION OutputIterator unique_copy(const TeamHandleType& teamHandle, InputIterator first, InputIterator last, OutputIterator d_first, BinaryPredicate pred) { return Impl::unique_copy_team_impl(teamHandle, first, last, d_first, pred); } template , int> = 0> KOKKOS_FUNCTION auto unique_copy( const TeamHandleType& teamHandle, const ::Kokkos::View& source, const ::Kokkos::View& dest, BinaryPredicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); return Impl::unique_copy_team_impl(teamHandle, cbegin(source), cend(source), begin(dest), std::move(pred)); } } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/impl/000077500000000000000000000000001461675637500220505ustar00rootroot00000000000000kokkos-4.3.01/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp000066400000000000000000000102641461675637500275710ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_ADJACENT_DIFFERENCE_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_ADJACENT_DIFFERENCE_IMPL_HPP #include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include #include namespace Kokkos { namespace Experimental { namespace Impl { template struct StdAdjacentDifferenceDefaultBinaryOpFunctor { KOKKOS_FUNCTION constexpr RetType operator()(const ValueType1& a, const ValueType2& b) const { return a - b; } }; template struct StdAdjacentDiffFunctor { using index_type = typename InputIteratorType::difference_type; const InputIteratorType m_first_from; const OutputIteratorType m_first_dest; BinaryOperator m_op; KOKKOS_FUNCTION void operator()(const index_type i) const { const auto& my_value = m_first_from[i]; if (i == 0) { m_first_dest[i] = my_value; } else { const auto& left_value = m_first_from[i - 1]; m_first_dest[i] = m_op(my_value, left_value); } } KOKKOS_FUNCTION StdAdjacentDiffFunctor(InputIteratorType first_from, OutputIteratorType first_dest, BinaryOperator op) : m_first_from(std::move(first_from)), m_first_dest(std::move(first_dest)), m_op(std::move(op)) {} }; // // exespace impl // template OutputIteratorType adjacent_difference_exespace_impl( const std::string& label, const ExecutionSpace& ex, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest, BinaryOp bin_op) { // checks Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); Impl::static_assert_iterators_have_matching_difference_type(first_from, first_dest); Impl::expect_valid_range(first_from, last_from); if (first_from == last_from) { return first_dest; } // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); ::Kokkos::parallel_for( label, RangePolicy(ex, 0, num_elements), StdAdjacentDiffFunctor(first_from, first_dest, bin_op)); ex.fence("Kokkos::adjacent_difference: fence after operation"); // return return first_dest + num_elements; } // // team impl // template KOKKOS_FUNCTION OutputIteratorType adjacent_difference_team_impl( const TeamHandleType& teamHandle, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest, BinaryOp bin_op) { // checks Impl::static_assert_random_access_and_accessible(teamHandle, first_from, first_dest); Impl::static_assert_iterators_have_matching_difference_type(first_from, first_dest); Impl::expect_valid_range(first_from, last_from); if (first_from == last_from) { return first_dest; } // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); ::Kokkos::parallel_for( TeamThreadRange(teamHandle, 0, num_elements), StdAdjacentDiffFunctor(first_from, first_dest, bin_op)); teamHandle.team_barrier(); // return return first_dest + num_elements; } } // namespace Impl } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp000066400000000000000000000127231461675637500264210ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_ADJACENT_FIND_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_ADJACENT_FIND_IMPL_HPP #include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include #include namespace Kokkos { namespace Experimental { namespace Impl { template struct StdAdjacentFindFunctor { using index_type = typename IteratorType::difference_type; using red_value_type = typename ReducerType::value_type; IteratorType m_first; ReducerType m_reducer; PredicateType m_p; KOKKOS_FUNCTION void operator()(const index_type i, red_value_type& red_value) const { const auto& my_value = m_first[i]; const auto& next_value = m_first[i + 1]; const bool are_equal = m_p(my_value, next_value); // FIXME_NVHPC using a ternary operator causes problems red_value_type value = {::Kokkos::reduction_identity::min()}; if (are_equal) { value.min_loc_true = i; } m_reducer.join(red_value, value); } KOKKOS_FUNCTION StdAdjacentFindFunctor(IteratorType first, ReducerType reducer, PredicateType p) : m_first(std::move(first)), m_reducer(std::move(reducer)), m_p(std::move(p)) {} }; // // exespace impl // template IteratorType adjacent_find_exespace_impl(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, PredicateType pred) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); const auto num_elements = Kokkos::Experimental::distance(first, last); if (num_elements <= 1) { return last; } using index_type = typename IteratorType::difference_type; using reducer_type = FirstLoc; using reduction_value_type = typename reducer_type::value_type; reduction_value_type red_result; reducer_type reducer(red_result); // note that we use below num_elements-1 because // each index i in the reduction checks i and (i+1). ::Kokkos::parallel_reduce( label, RangePolicy(ex, 0, num_elements - 1), // use CTAD StdAdjacentFindFunctor(first, reducer, pred), reducer); // fence not needed because reducing into scalar if (red_result.min_loc_true == ::Kokkos::reduction_identity::min()) { return last; } else { return first + red_result.min_loc_true; } } template IteratorType adjacent_find_exespace_impl(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { using value_type = typename IteratorType::value_type; using default_pred_t = StdAlgoEqualBinaryPredicate; return adjacent_find_exespace_impl(label, ex, first, last, default_pred_t()); } // // team impl // template KOKKOS_FUNCTION IteratorType adjacent_find_team_impl(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, PredicateType pred) { // checks Impl::static_assert_random_access_and_accessible(teamHandle, first); Impl::expect_valid_range(first, last); const auto num_elements = Kokkos::Experimental::distance(first, last); if (num_elements <= 1) { return last; } using index_type = typename IteratorType::difference_type; using reducer_type = FirstLoc; using reduction_value_type = typename reducer_type::value_type; reduction_value_type red_result; reducer_type reducer(red_result); // note that we use below num_elements-1 because // each index i in the reduction checks i and (i+1). ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements - 1), // use CTAD StdAdjacentFindFunctor(first, reducer, pred), reducer); teamHandle.team_barrier(); if (red_result.min_loc_true == ::Kokkos::reduction_identity::min()) { return last; } else { return first + red_result.min_loc_true; } } template KOKKOS_FUNCTION IteratorType adjacent_find_team_impl( const TeamHandleType& teamHandle, IteratorType first, IteratorType last) { using value_type = typename IteratorType::value_type; using default_pred_t = StdAlgoEqualBinaryPredicate; return adjacent_find_team_impl(teamHandle, first, last, default_pred_t()); } } // namespace Impl } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/impl/Kokkos_AllOfAnyOfNoneOf.hpp000066400000000000000000000063031461675637500271430ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_ALL_OF_ANY_OF_NONE_OF_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_ALL_OF_ANY_OF_NONE_OF_IMPL_HPP #include "Kokkos_FindIfOrNot.hpp" namespace Kokkos { namespace Experimental { namespace Impl { // // exespace impl // template bool all_of_exespace_impl(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate predicate) { return (find_if_or_not_exespace_impl(label, ex, first, last, predicate) == last); } template bool any_of_exespace_impl(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate predicate) { return (find_if_or_not_exespace_impl(label, ex, first, last, predicate) != last); } template bool none_of_exespace_impl(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { return (find_if_or_not_exespace_impl(label, ex, first, last, predicate) == last); } // // team impl // template KOKKOS_FUNCTION bool all_of_team_impl(const TeamHandleType& teamHandle, InputIterator first, InputIterator last, Predicate predicate) { return (find_if_or_not_team_impl(teamHandle, first, last, predicate) == last); } template KOKKOS_FUNCTION bool any_of_team_impl(const TeamHandleType& teamHandle, InputIterator first, InputIterator last, Predicate predicate) { return (find_if_or_not_team_impl(teamHandle, first, last, predicate) != last); } template KOKKOS_FUNCTION bool none_of_team_impl(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, Predicate predicate) { return (find_if_or_not_team_impl(teamHandle, first, last, predicate) == last); } } // namespace Impl } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp000066400000000000000000000163201461675637500264130ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_CONSTRAINTS_HPP_ #define KOKKOS_STD_ALGORITHMS_CONSTRAINTS_HPP_ #include #include namespace Kokkos { namespace Experimental { namespace Impl { template struct is_admissible_to_kokkos_std_algorithms : std::false_type {}; template struct is_admissible_to_kokkos_std_algorithms< T, std::enable_if_t< ::Kokkos::is_view::value && T::rank() == 1 && (std::is_same::value || std::is_same::value || std::is_same::value)> > : std::true_type {}; template KOKKOS_INLINE_FUNCTION constexpr void static_assert_is_admissible_to_kokkos_std_algorithms( const ViewType& /* view */) { static_assert(is_admissible_to_kokkos_std_algorithms::value, "Currently, Kokkos standard algorithms only accept 1D Views."); } // // is_iterator // template using iterator_category_t = typename T::iterator_category; template using is_iterator = Kokkos::is_detected; template inline constexpr bool is_iterator_v = is_iterator::value; // // are_iterators // template struct are_iterators; template struct are_iterators { static constexpr bool value = is_iterator_v; }; template struct are_iterators { static constexpr bool value = are_iterators::value && (are_iterators::value && ... && true); }; template inline constexpr bool are_iterators_v = are_iterators::value; // // are_random_access_iterators // template struct are_random_access_iterators; template struct are_random_access_iterators { static constexpr bool value = is_iterator_v && std::is_base_of::value; }; template struct are_random_access_iterators { static constexpr bool value = are_random_access_iterators::value && (are_random_access_iterators::value && ... && true); }; template inline constexpr bool are_random_access_iterators_v = are_random_access_iterators::value; // // iterators_are_accessible_from // template struct iterators_are_accessible_from; template struct iterators_are_accessible_from { using view_type = typename IteratorType::view_type; static constexpr bool value = SpaceAccessibility::accessible; }; template struct iterators_are_accessible_from { static constexpr bool value = iterators_are_accessible_from::value && iterators_are_accessible_from::value; }; template KOKKOS_INLINE_FUNCTION constexpr void static_assert_random_access_and_accessible( const ExecutionSpaceOrTeamHandleType& /* ex_or_th*/, IteratorTypes... /* iterators */) { static_assert( are_random_access_iterators::value, "Currently, Kokkos standard algorithms require random access iterators."); static_assert(iterators_are_accessible_from< typename ExecutionSpaceOrTeamHandleType::execution_space, IteratorTypes...>::value, "Incompatible view/iterator and execution space"); } // // have matching difference_type // template struct iterators_have_matching_difference_type; template struct iterators_have_matching_difference_type { static constexpr bool value = true; }; template struct iterators_have_matching_difference_type { static constexpr bool value = std::is_same::value; }; template struct iterators_have_matching_difference_type { static constexpr bool value = iterators_have_matching_difference_type::value && iterators_have_matching_difference_type::value; }; template KOKKOS_INLINE_FUNCTION constexpr void static_assert_iterators_have_matching_difference_type(IteratorType1 /* it1 */, IteratorType2 /* it2 */) { static_assert(iterators_have_matching_difference_type::value, "Iterators do not have matching difference_type"); } template KOKKOS_INLINE_FUNCTION constexpr void static_assert_iterators_have_matching_difference_type(IteratorType1 it1, IteratorType2 it2, IteratorType3 it3) { static_assert_iterators_have_matching_difference_type(it1, it2); static_assert_iterators_have_matching_difference_type(it2, it3); } // // not_openmptarget // template struct not_openmptarget { #ifndef KOKKOS_ENABLE_OPENMPTARGET static constexpr bool value = true; #else static constexpr bool value = !std::is_same, ::Kokkos::Experimental::OpenMPTarget>::value; #endif }; template KOKKOS_INLINE_FUNCTION constexpr void static_assert_is_not_openmptarget( const ExecutionSpaceOrTeamHandleType& /*ex_or_th*/) { static_assert(not_openmptarget::value, "Currently, Kokkos standard algorithms do not support custom " "comparators in OpenMPTarget"); } // // valid range // template KOKKOS_INLINE_FUNCTION void expect_valid_range(IteratorType first, IteratorType last) { // this is a no-op for release KOKKOS_EXPECTS(last >= first); // avoid compiler complaining when KOKKOS_EXPECTS is no-op (void)first; (void)last; } } // namespace Impl } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/impl/Kokkos_CopyBackward.hpp000066400000000000000000000065541461675637500264650ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_COPY_BACKWARD_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_COPY_BACKWARD_IMPL_HPP #include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include #include namespace Kokkos { namespace Experimental { namespace Impl { template struct StdCopyBackwardFunctor { // we can use difference type from IteratorType1 since // the calling functions below already static assert that // the iterators have matching difference type using index_type = typename IteratorType1::difference_type; IteratorType1 m_last; IteratorType2 m_dest_last; KOKKOS_FUNCTION void operator()(index_type i) const { m_dest_last[-i - 1] = m_last[-i - 1]; } KOKKOS_FUNCTION StdCopyBackwardFunctor(IteratorType1 _last, IteratorType2 _dest_last) : m_last(std::move(_last)), m_dest_last(std::move(_dest_last)) {} }; template IteratorType2 copy_backward_exespace_impl(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 d_last) { // checks Impl::static_assert_random_access_and_accessible(ex, first, d_last); Impl::static_assert_iterators_have_matching_difference_type(first, d_last); Impl::expect_valid_range(first, last); // run const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for(label, RangePolicy(ex, 0, num_elements), // use CTAD StdCopyBackwardFunctor(last, d_last)); ex.fence("Kokkos::copy_backward: fence after operation"); // return return d_last - num_elements; } // // team-level impl // template KOKKOS_FUNCTION IteratorType2 copy_backward_team_impl(const TeamHandleType& teamHandle, IteratorType1 first, IteratorType1 last, IteratorType2 d_last) { // checks Impl::static_assert_random_access_and_accessible(teamHandle, first, d_last); Impl::static_assert_iterators_have_matching_difference_type(first, d_last); Impl::expect_valid_range(first, last); // run const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for(TeamThreadRange(teamHandle, 0, num_elements), // use CTAD StdCopyBackwardFunctor(last, d_last)); teamHandle.team_barrier(); // return return d_last - num_elements; } } // namespace Impl } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/impl/Kokkos_CopyCopyN.hpp000066400000000000000000000114511461675637500257670ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_COPY_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_COPY_IMPL_HPP #include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include #include namespace Kokkos { namespace Experimental { namespace Impl { template struct StdCopyFunctor { // we can use difference type from InputIterator since // the calling functions below already static assert that // the iterators have matching difference type using index_type = typename InputIterator::difference_type; InputIterator m_first; OutputIterator m_dest_first; KOKKOS_FUNCTION void operator()(index_type i) const { m_dest_first[i] = m_first[i]; } KOKKOS_FUNCTION StdCopyFunctor(InputIterator _first, OutputIterator _dest_first) : m_first(std::move(_first)), m_dest_first(std::move(_dest_first)) {} }; template OutputIterator copy_exespace_impl(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { // checks Impl::static_assert_random_access_and_accessible(ex, first, d_first); Impl::static_assert_iterators_have_matching_difference_type(first, d_first); Impl::expect_valid_range(first, last); // run const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for(label, RangePolicy(ex, 0, num_elements), // use CTAD StdCopyFunctor(first, d_first)); ex.fence("Kokkos::copy: fence after operation"); // return return d_first + num_elements; } template OutputIterator copy_n_exespace_impl(const std::string& label, const ExecutionSpace& ex, InputIterator first_from, Size count, OutputIterator first_dest) { // checks Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); Impl::static_assert_iterators_have_matching_difference_type(first_from, first_dest); if (count > 0) { return copy_exespace_impl(label, ex, first_from, first_from + count, first_dest); } else { return first_dest; } } // // team-level impl // template KOKKOS_FUNCTION OutputIterator copy_team_impl(const TeamHandleType& teamHandle, InputIterator first, InputIterator last, OutputIterator d_first) { // checks Impl::static_assert_random_access_and_accessible(teamHandle, first, d_first); Impl::static_assert_iterators_have_matching_difference_type(first, d_first); Impl::expect_valid_range(first, last); // run const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for(TeamThreadRange(teamHandle, 0, num_elements), // use CTAD StdCopyFunctor(first, d_first)); teamHandle.team_barrier(); // return return d_first + num_elements; } template KOKKOS_FUNCTION OutputIterator copy_n_team_impl(const TeamHandleType& teamHandle, InputIterator first_from, Size count, OutputIterator first_dest) { // checks Impl::static_assert_random_access_and_accessible(teamHandle, first_from, first_dest); Impl::static_assert_iterators_have_matching_difference_type(first_from, first_dest); if (count > 0) { return copy_team_impl(teamHandle, first_from, first_from + count, first_dest); } else { return first_dest; } } } // namespace Impl } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp000066400000000000000000000124521461675637500252770ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_COPY_IF_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_COPY_IF_IMPL_HPP #include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include "Kokkos_MustUseKokkosSingleInTeam.hpp" #include #include namespace Kokkos { namespace Experimental { namespace Impl { template struct StdCopyIfFunctor { using index_type = typename FirstFrom::difference_type; FirstFrom m_first_from; FirstDest m_first_dest; PredType m_pred; KOKKOS_FUNCTION StdCopyIfFunctor(FirstFrom first_from, FirstDest first_dest, PredType pred) : m_first_from(std::move(first_from)), m_first_dest(std::move(first_dest)), m_pred(std::move(pred)) {} KOKKOS_FUNCTION void operator()(const index_type i, index_type& update, const bool final_pass) const { const auto& myval = m_first_from[i]; if (final_pass) { if (m_pred(myval)) { m_first_dest[update] = myval; } } if (m_pred(myval)) { update += 1; } } }; template OutputIterator copy_if_exespace_impl(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first, PredicateType pred) { /* To explain the impl, suppose that our data is: | 1 | 1 | 2 | 2 | 3 | -2 | 4 | 4 | 4 | 5 | 7 | -10 | and we want to copy only the even entries, We can use an exclusive scan where the "update" is incremented only for the elements that satisfy the predicate. This way, the update allows us to track where in the destination we need to copy the elements: In this case, counting only the even entries, the exlusive scan during the final pass would yield: | 0 | 0 | 0 | 1 | 2 | 2 | 3 | 4 | 5 | 6 | 6 | 6 | * * * * * * * which provides the indexing in the destination where each starred (*) element needs to be copied to since the starred elements are those that satisfy the predicate. */ // checks Impl::static_assert_random_access_and_accessible(ex, first, d_first); Impl::static_assert_iterators_have_matching_difference_type(first, d_first); Impl::expect_valid_range(first, last); if (first == last) { return d_first; } else { // run const auto num_elements = Kokkos::Experimental::distance(first, last); typename InputIterator::difference_type count = 0; ::Kokkos::parallel_scan(label, RangePolicy(ex, 0, num_elements), // use CTAD StdCopyIfFunctor(first, d_first, pred), count); // fence not needed because of the scan accumulating into count return d_first + count; } } template KOKKOS_FUNCTION OutputIterator copy_if_team_impl( const TeamHandleType& teamHandle, InputIterator first, InputIterator last, OutputIterator d_first, PredicateType pred) { // checks Impl::static_assert_random_access_and_accessible(teamHandle, first, d_first); Impl::static_assert_iterators_have_matching_difference_type(first, d_first); Impl::expect_valid_range(first, last); if (first == last) { return d_first; } const std::size_t num_elements = Kokkos::Experimental::distance(first, last); if constexpr (stdalgo_must_use_kokkos_single_for_team_scan_v< typename TeamHandleType::execution_space>) { std::size_t count = 0; Kokkos::single( Kokkos::PerTeam(teamHandle), [=](std::size_t& lcount) { lcount = 0; for (std::size_t i = 0; i < num_elements; ++i) { const auto& myval = first[i]; if (pred(myval)) { d_first[lcount++] = myval; } } }, count); // no barrier needed since single above broadcasts to all members return d_first + count; } else { typename InputIterator::difference_type count = 0; ::Kokkos::parallel_scan(TeamThreadRange(teamHandle, 0, num_elements), StdCopyIfFunctor(first, d_first, pred), count); // no barrier needed because of the scan accumulating into count return d_first + count; } #if defined KOKKOS_COMPILER_INTEL || \ (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130) __builtin_unreachable(); #endif } } // namespace Impl } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/impl/Kokkos_CountCountIf.hpp000066400000000000000000000073571461675637500264760ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_COUNT_IF_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_COUNT_IF_IMPL_HPP #include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include #include namespace Kokkos { namespace Experimental { namespace Impl { template struct StdCountIfFunctor { using index_type = typename IteratorType::difference_type; IteratorType m_first; Predicate m_predicate; KOKKOS_FUNCTION void operator()(index_type i, index_type& lsum) const { if (m_predicate(m_first[i])) { lsum++; } } KOKKOS_FUNCTION StdCountIfFunctor(IteratorType _first, Predicate _predicate) : m_first(std::move(_first)), m_predicate(std::move(_predicate)) {} }; template typename IteratorType::difference_type count_if_exespace_impl( const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); // run const auto num_elements = Kokkos::Experimental::distance(first, last); typename IteratorType::difference_type count = 0; ::Kokkos::parallel_reduce(label, RangePolicy(ex, 0, num_elements), // use CTAD StdCountIfFunctor(first, predicate), count); ex.fence("Kokkos::count_if: fence after operation"); return count; } template auto count_exespace_impl(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, const T& value) { return count_if_exespace_impl( label, ex, first, last, ::Kokkos::Experimental::Impl::StdAlgoEqualsValUnaryPredicate(value)); } // // team-level impl // template KOKKOS_FUNCTION typename IteratorType::difference_type count_if_team_impl( const TeamHandleType& teamHandle, IteratorType first, IteratorType last, Predicate predicate) { // checks Impl::static_assert_random_access_and_accessible(teamHandle, first); Impl::expect_valid_range(first, last); // run const auto num_elements = Kokkos::Experimental::distance(first, last); typename IteratorType::difference_type count = 0; ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements), // use CTAD StdCountIfFunctor(first, predicate), count); teamHandle.team_barrier(); return count; } template KOKKOS_FUNCTION auto count_team_impl(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, const T& value) { return count_if_team_impl( teamHandle, first, last, ::Kokkos::Experimental::Impl::StdAlgoEqualsValUnaryPredicate(value)); } } // namespace Impl } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/impl/Kokkos_Equal.hpp000066400000000000000000000161511461675637500251550ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_EQUAL_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_EQUAL_IMPL_HPP #include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include #include namespace Kokkos { namespace Experimental { namespace Impl { template struct StdEqualFunctor { using index_type = typename IteratorType1::difference_type; IteratorType1 m_first1; IteratorType2 m_first2; BinaryPredicateType m_predicate; KOKKOS_FUNCTION void operator()(index_type i, std::size_t& lsum) const { if (!m_predicate(m_first1[i], m_first2[i])) { lsum = 1; } } KOKKOS_FUNCTION StdEqualFunctor(IteratorType1 _first1, IteratorType2 _first2, BinaryPredicateType _predicate) : m_first1(std::move(_first1)), m_first2(std::move(_first2)), m_predicate(std::move(_predicate)) {} }; // // exespace impl // template bool equal_exespace_impl(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, BinaryPredicateType predicate) { // checks Impl::static_assert_random_access_and_accessible(ex, first1, first2); Impl::static_assert_iterators_have_matching_difference_type(first1, first2); Impl::expect_valid_range(first1, last1); // run const auto num_elements = Kokkos::Experimental::distance(first1, last1); std::size_t different = 0; ::Kokkos::parallel_reduce( label, RangePolicy(ex, 0, num_elements), StdEqualFunctor(first1, first2, predicate), different); ex.fence("Kokkos::equal: fence after operation"); return !different; } template bool equal_exespace_impl(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2) { using value_type1 = typename IteratorType1::value_type; using value_type2 = typename IteratorType2::value_type; using pred_t = StdAlgoEqualBinaryPredicate; return equal_exespace_impl(label, ex, first1, last1, first2, pred_t()); } template bool equal_exespace_impl(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, BinaryPredicateType predicate) { const auto d1 = ::Kokkos::Experimental::distance(first1, last1); const auto d2 = ::Kokkos::Experimental::distance(first2, last2); if (d1 != d2) { return false; } return equal_exespace_impl(label, ex, first1, last1, first2, predicate); } template bool equal_exespace_impl(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { Impl::expect_valid_range(first1, last1); Impl::expect_valid_range(first2, last2); using value_type1 = typename IteratorType1::value_type; using value_type2 = typename IteratorType2::value_type; using pred_t = StdAlgoEqualBinaryPredicate; return equal_exespace_impl(label, ex, first1, last1, first2, last2, pred_t()); } // // team impl // template KOKKOS_FUNCTION bool equal_team_impl(const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, BinaryPredicateType predicate) { // checks Impl::static_assert_random_access_and_accessible(teamHandle, first1, first2); Impl::static_assert_iterators_have_matching_difference_type(first1, first2); Impl::expect_valid_range(first1, last1); // run const auto num_elements = Kokkos::Experimental::distance(first1, last1); std::size_t different = 0; ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements), StdEqualFunctor(first1, first2, predicate), different); teamHandle.team_barrier(); return !different; } template KOKKOS_FUNCTION bool equal_team_impl(const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2) { using value_type1 = typename IteratorType1::value_type; using value_type2 = typename IteratorType2::value_type; using pred_t = StdAlgoEqualBinaryPredicate; return equal_team_impl(teamHandle, first1, last1, first2, pred_t()); } template KOKKOS_FUNCTION bool equal_team_impl(const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, BinaryPredicateType predicate) { const auto d1 = ::Kokkos::Experimental::distance(first1, last1); const auto d2 = ::Kokkos::Experimental::distance(first2, last2); if (d1 != d2) { return false; } return equal_team_impl(teamHandle, first1, last1, first2, predicate); } template KOKKOS_FUNCTION bool equal_team_impl(const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { Impl::expect_valid_range(first1, last1); Impl::expect_valid_range(first2, last2); using value_type1 = typename IteratorType1::value_type; using value_type2 = typename IteratorType2::value_type; using pred_t = StdAlgoEqualBinaryPredicate; return equal_team_impl(teamHandle, first1, last1, first2, last2, pred_t()); } } // namespace Impl } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp000066400000000000000000000204431461675637500266610ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_EXCLUSIVE_SCAN_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_EXCLUSIVE_SCAN_IMPL_HPP #include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include "Kokkos_ValueWrapperForNoNeutralElement.hpp" #include "Kokkos_IdentityReferenceUnaryFunctor.hpp" #include "Kokkos_FunctorsForExclusiveScan.hpp" #include #include #include namespace Kokkos { namespace Experimental { namespace Impl { // // exespace impl // template OutputIteratorType exclusive_scan_default_op_exespace_impl( const std::string& label, const ExecutionSpace& ex, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest, ValueType init_value) { // checks Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); Impl::static_assert_iterators_have_matching_difference_type(first_from, first_dest); Impl::expect_valid_range(first_from, last_from); // does it make sense to do this static_assert too? // using input_iterator_value_type = typename InputIteratorType::value_type; // static_assert // (std::is_convertible, // ValueType>::value, // "exclusive_scan: InputIteratorType::value_type not convertible to // ValueType"); // we are unnecessarily duplicating code, but this is on purpose // so that we can use the default_op for OpenMPTarget. // Originally, I had this implemented as: // ''' // using bop_type = StdExclusiveScanDefaultJoinFunctor; // call exclusive_scan_custom_op_impl(..., bop_type()); // ''' // which avoids duplicating the functors, but for OpenMPTarget // I cannot use a custom binary op. // This is the same problem that occurs for reductions. // aliases using index_type = typename InputIteratorType::difference_type; using func_type = std::conditional_t< ::Kokkos::is_detected::value, ExclusiveScanDefaultFunctorForKnownNeutralElement< ExecutionSpace, index_type, ValueType, InputIteratorType, OutputIteratorType>, ExclusiveScanDefaultFunctorWithValueWrapper>; // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); ::Kokkos::parallel_scan( label, RangePolicy(ex, 0, num_elements), func_type(std::move(init_value), first_from, first_dest)); ex.fence("Kokkos::exclusive_scan_default_op: fence after operation"); return first_dest + num_elements; } template OutputIteratorType exclusive_scan_custom_op_exespace_impl( const std::string& label, const ExecutionSpace& ex, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest, ValueType init_value, BinaryOpType bop) { // checks Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); Impl::static_assert_iterators_have_matching_difference_type(first_from, first_dest); Impl::expect_valid_range(first_from, last_from); // aliases using index_type = typename InputIteratorType::difference_type; using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; using func_type = TransformExclusiveScanFunctorWithValueWrapper< ExecutionSpace, index_type, ValueType, InputIteratorType, OutputIteratorType, BinaryOpType, unary_op_type>; // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); ::Kokkos::parallel_scan(label, RangePolicy(ex, 0, num_elements), func_type(std::move(init_value), first_from, first_dest, bop, unary_op_type())); ex.fence("Kokkos::exclusive_scan_custom_op: fence after operation"); // return return first_dest + num_elements; } // // team impl // template KOKKOS_FUNCTION OutputIteratorType exclusive_scan_default_op_team_impl( const TeamHandleType& teamHandle, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest, ValueType init_value) { // checks Impl::static_assert_random_access_and_accessible(teamHandle, first_from, first_dest); Impl::static_assert_iterators_have_matching_difference_type(first_from, first_dest); Impl::expect_valid_range(first_from, last_from); static_assert( ::Kokkos::is_detected_v, "The team-level impl of Kokkos::Experimental::exclusive_scan currently " "does not support types without reduction identity"); // aliases using exe_space = typename TeamHandleType::execution_space; using index_type = typename InputIteratorType::difference_type; using func_type = ExclusiveScanDefaultFunctorForKnownNeutralElement< exe_space, index_type, ValueType, InputIteratorType, OutputIteratorType>; const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); ::Kokkos::parallel_scan( TeamThreadRange(teamHandle, 0, num_elements), func_type(std::move(init_value), first_from, first_dest)); teamHandle.team_barrier(); return first_dest + num_elements; } template KOKKOS_FUNCTION OutputIteratorType exclusive_scan_custom_op_team_impl( const TeamHandleType& teamHandle, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest, ValueType init_value, BinaryOpType bop) { // checks Impl::static_assert_random_access_and_accessible(teamHandle, first_from, first_dest); Impl::static_assert_iterators_have_matching_difference_type(first_from, first_dest); Impl::expect_valid_range(first_from, last_from); static_assert( ::Kokkos::is_detected_v, "The team-level impl of Kokkos::Experimental::exclusive_scan currently " "does not support types without reduction identity"); // aliases using exe_space = typename TeamHandleType::execution_space; using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; using index_type = typename InputIteratorType::difference_type; using func_type = TransformExclusiveScanFunctorWithoutValueWrapper< exe_space, index_type, ValueType, InputIteratorType, OutputIteratorType, BinaryOpType, unary_op_type>; const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); ::Kokkos::parallel_scan(TeamThreadRange(teamHandle, 0, num_elements), func_type(std::move(init_value), first_from, first_dest, bop, unary_op_type())); teamHandle.team_barrier(); return first_dest + num_elements; } } // namespace Impl } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/impl/Kokkos_FillFillN.hpp000066400000000000000000000072161461675637500257230ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_FILL_AND_FILL_N_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_FILL_AND_FILL_N_IMPL_HPP #include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include #include namespace Kokkos { namespace Experimental { namespace Impl { template struct StdFillFunctor { using index_type = typename InputIterator::difference_type; InputIterator m_first; T m_value; KOKKOS_FUNCTION void operator()(index_type i) const { m_first[i] = m_value; } KOKKOS_FUNCTION StdFillFunctor(InputIterator _first, T _value) : m_first(std::move(_first)), m_value(std::move(_value)) {} }; // // exespace impl // template void fill_exespace_impl(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, const T& value) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); // run const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for(label, RangePolicy(ex, 0, num_elements), StdFillFunctor(first, value)); ex.fence("Kokkos::fill: fence after operation"); } template IteratorType fill_n_exespace_impl(const std::string& label, const ExecutionSpace& ex, IteratorType first, SizeType n, const T& value) { auto last = first + n; Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); if (n <= 0) { return first; } fill_exespace_impl(label, ex, first, last, value); return last; } // // team-level impl // template KOKKOS_FUNCTION void fill_team_impl(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, const T& value) { Impl::static_assert_random_access_and_accessible(teamHandle, first); Impl::expect_valid_range(first, last); const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for(TeamThreadRange(teamHandle, 0, num_elements), StdFillFunctor(first, value)); teamHandle.team_barrier(); } template KOKKOS_FUNCTION IteratorType fill_n_team_impl(const TeamHandleType& teamHandle, IteratorType first, SizeType n, const T& value) { auto last = first + n; Impl::static_assert_random_access_and_accessible(teamHandle, first); Impl::expect_valid_range(first, last); if (n <= 0) { return first; } fill_team_impl(teamHandle, first, last, value); return last; } } // namespace Impl } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/impl/Kokkos_FindEnd.hpp000066400000000000000000000217121461675637500254140ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_FIND_END_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_FIND_END_IMPL_HPP #include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include #include namespace Kokkos { namespace Experimental { namespace Impl { template struct StdFindEndFunctor { using red_value_type = typename ReducerType::value_type; IteratorType1 m_first; IteratorType1 m_last; IteratorType2 m_s_first; IteratorType2 m_s_last; ReducerType m_reducer; PredicateType m_p; KOKKOS_FUNCTION void operator()(const IndexType i, red_value_type& red_value) const { namespace KE = ::Kokkos::Experimental; auto myit = m_first + i; bool found = true; const auto search_count = KE::distance(m_s_first, m_s_last); for (IndexType k = 0; k < search_count; ++k) { // note that we add this EXPECT to check if we are in a valid range // but I think we can remvoe this beceause the guarantee we don't go // out of bounds is taken care of at the calling site // where we launch the par-reduce. KOKKOS_EXPECTS((myit + k) < m_last); if (!m_p(myit[k], m_s_first[k])) { found = false; break; } } // FIXME_NVHPC using a ternary operator causes problems red_value_type rv = {::Kokkos::reduction_identity::max()}; if (found) { rv.max_loc_true = i; } m_reducer.join(red_value, rv); } KOKKOS_FUNCTION StdFindEndFunctor(IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, ReducerType reducer, PredicateType p) : m_first(std::move(first)), m_last(std::move(last)), m_s_first(std::move(s_first)), m_s_last(std::move(s_last)), m_reducer(std::move(reducer)), m_p(std::move(p)) {} }; // // exespace impl // template IteratorType1 find_end_exespace_impl(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { // checks Impl::static_assert_random_access_and_accessible(ex, first, s_first); Impl::static_assert_iterators_have_matching_difference_type(first, s_first); Impl::expect_valid_range(first, last); Impl::expect_valid_range(s_first, s_last); // the target sequence should not be larger than the range [first, last) namespace KE = ::Kokkos::Experimental; const auto num_elements = KE::distance(first, last); const auto s_count = KE::distance(s_first, s_last); KOKKOS_EXPECTS(num_elements >= s_count); if (s_first == s_last) { return last; } if (first == last) { return last; } // special case where the two ranges have equal size if (num_elements == s_count) { const auto equal_result = equal_exespace_impl(label, ex, first, last, s_first, pred); return (equal_result) ? first : last; } else { using index_type = typename IteratorType1::difference_type; using reducer_type = LastLoc; using reduction_value_type = typename reducer_type::value_type; using func_t = StdFindEndFunctor; // run reduction_value_type red_result; reducer_type reducer(red_result); // decide the size of the range policy of the par_red: // note that the last feasible index to start looking is the index // whose distance from the "last" is equal to the sequence count. // the +1 is because we need to include that location too. const auto range_size = num_elements - s_count + 1; // run par reduce ::Kokkos::parallel_reduce( label, RangePolicy(ex, 0, range_size), func_t(first, last, s_first, s_last, reducer, pred), reducer); // fence not needed because reducing into scalar // decide and return if (red_result.max_loc_true == ::Kokkos::reduction_identity::max()) { // if here, a subrange has not been found return last; } else { // a location has been found return first + red_result.max_loc_true; } } } template IteratorType1 find_end_exespace_impl(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { using value_type1 = typename IteratorType1::value_type; using value_type2 = typename IteratorType2::value_type; using predicate_type = StdAlgoEqualBinaryPredicate; return find_end_exespace_impl(label, ex, first, last, s_first, s_last, predicate_type()); } // // team impl // template KOKKOS_FUNCTION IteratorType1 find_end_team_impl(const TeamHandleType& teamHandle, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { // checks Impl::static_assert_random_access_and_accessible(teamHandle, first, s_first); Impl::static_assert_iterators_have_matching_difference_type(first, s_first); Impl::expect_valid_range(first, last); Impl::expect_valid_range(s_first, s_last); // the target sequence should not be larger than the range [first, last) namespace KE = ::Kokkos::Experimental; const auto num_elements = KE::distance(first, last); const auto s_count = KE::distance(s_first, s_last); KOKKOS_EXPECTS(num_elements >= s_count); if (s_first == s_last) { return last; } if (first == last) { return last; } // special case where the two ranges have equal size if (num_elements == s_count) { const auto equal_result = equal_team_impl(teamHandle, first, last, s_first, pred); return (equal_result) ? first : last; } else { using index_type = typename IteratorType1::difference_type; using reducer_type = LastLoc; using reduction_value_type = typename reducer_type::value_type; using func_t = StdFindEndFunctor; // run reduction_value_type red_result; reducer_type reducer(red_result); // decide the size of the range policy of the par_red: // note that the last feasible index to start looking is the index // whose distance from the "last" is equal to the sequence count. // the +1 is because we need to include that location too. const auto range_size = num_elements - s_count + 1; // run par reduce ::Kokkos::parallel_reduce( TeamThreadRange(teamHandle, 0, range_size), func_t(first, last, s_first, s_last, reducer, pred), reducer); teamHandle.team_barrier(); // decide and return if (red_result.max_loc_true == ::Kokkos::reduction_identity::max()) { // if here, a subrange has not been found return last; } else { // a location has been found return first + red_result.max_loc_true; } } } template KOKKOS_FUNCTION IteratorType1 find_end_team_impl( const TeamHandleType& teamHandle, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { using value_type1 = typename IteratorType1::value_type; using value_type2 = typename IteratorType2::value_type; using predicate_type = StdAlgoEqualBinaryPredicate; return find_end_team_impl(teamHandle, first, last, s_first, s_last, predicate_type()); } } // namespace Impl } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp000066400000000000000000000153261461675637500262660ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_FIND_FIRST_OF_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_FIND_FIRST_OF_IMPL_HPP #include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include #include namespace Kokkos { namespace Experimental { namespace Impl { template struct StdFindFirstOfFunctor { using red_value_type = typename ReducerType::value_type; IteratorType1 m_first; IteratorType2 m_s_first; IteratorType2 m_s_last; ReducerType m_reducer; PredicateType m_p; KOKKOS_FUNCTION void operator()(const IndexType i, red_value_type& red_value) const { namespace KE = ::Kokkos::Experimental; const auto& myvalue = m_first[i]; bool found = false; const auto search_count = KE::distance(m_s_first, m_s_last); for (IndexType k = 0; k < search_count; ++k) { if (m_p(myvalue, m_s_first[k])) { found = true; break; } } // FIXME_NVHPC using a ternary operator causes problems red_value_type rv = {::Kokkos::reduction_identity::min()}; if (found) { rv.min_loc_true = i; } m_reducer.join(red_value, rv); } KOKKOS_FUNCTION StdFindFirstOfFunctor(IteratorType1 first, IteratorType2 s_first, IteratorType2 s_last, ReducerType reducer, PredicateType p) : m_first(std::move(first)), m_s_first(std::move(s_first)), m_s_last(std::move(s_last)), m_reducer(std::move(reducer)), m_p(std::move(p)) {} }; // // exespace impl // template IteratorType1 find_first_of_exespace_impl( const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { // checks Impl::static_assert_random_access_and_accessible(ex, first, s_first); Impl::static_assert_iterators_have_matching_difference_type(first, s_first); Impl::expect_valid_range(first, last); Impl::expect_valid_range(s_first, s_last); if ((s_first == s_last) || (first == last)) { return last; } using index_type = typename IteratorType1::difference_type; using reducer_type = FirstLoc; using reduction_value_type = typename reducer_type::value_type; using func_t = StdFindFirstOfFunctor; // run reduction_value_type red_result; reducer_type reducer(red_result); const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_reduce( label, RangePolicy(ex, 0, num_elements), func_t(first, s_first, s_last, reducer, pred), reducer); // fence not needed because reducing into scalar // decide and return if (red_result.min_loc_true == ::Kokkos::reduction_identity::min()) { // if here, nothing found return last; } else { // a location has been found return first + red_result.min_loc_true; } } template IteratorType1 find_first_of_exespace_impl( const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { using value_type1 = typename IteratorType1::value_type; using value_type2 = typename IteratorType2::value_type; using predicate_type = StdAlgoEqualBinaryPredicate; return find_first_of_exespace_impl(label, ex, first, last, s_first, s_last, predicate_type()); } // // team impl // template KOKKOS_FUNCTION IteratorType1 find_first_of_team_impl(const TeamHandleType& teamHandle, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { // checks Impl::static_assert_random_access_and_accessible(teamHandle, first, s_first); Impl::static_assert_iterators_have_matching_difference_type(first, s_first); Impl::expect_valid_range(first, last); Impl::expect_valid_range(s_first, s_last); if ((s_first == s_last) || (first == last)) { return last; } using index_type = typename IteratorType1::difference_type; using reducer_type = FirstLoc; using reduction_value_type = typename reducer_type::value_type; using func_t = StdFindFirstOfFunctor; // run reduction_value_type red_result; reducer_type reducer(red_result); const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements), func_t(first, s_first, s_last, reducer, pred), reducer); teamHandle.team_barrier(); // decide and return if (red_result.min_loc_true == ::Kokkos::reduction_identity::min()) { // if here, nothing found return last; } else { // a location has been found return first + red_result.min_loc_true; } } template KOKKOS_FUNCTION IteratorType1 find_first_of_team_impl( const TeamHandleType& teamHandle, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { using value_type1 = typename IteratorType1::value_type; using value_type2 = typename IteratorType2::value_type; using predicate_type = StdAlgoEqualBinaryPredicate; return find_first_of_team_impl(teamHandle, first, last, s_first, s_last, predicate_type()); } } // namespace Impl } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp000066400000000000000000000136531461675637500262330ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_FIND_IF_AND_FIND_IF_NOT_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_FIND_IF_AND_FIND_IF_NOT_IMPL_HPP #include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include #include namespace Kokkos { namespace Experimental { namespace Impl { template struct StdFindIfOrNotFunctor { using red_value_type = typename ReducerType::value_type; IteratorType m_first; ReducerType m_reducer; PredicateType m_p; KOKKOS_FUNCTION void operator()(const IndexType i, red_value_type& red_value) const { const auto& my_value = m_first[i]; // if doing find_if, look for when predicate is true // if doing find_if_not, look for when predicate is false const bool found_condition = is_find_if ? m_p(my_value) : !m_p(my_value); // FIXME_NVHPC using a ternary operator causes problems red_value_type rv = {::Kokkos::reduction_identity::min()}; if (found_condition) { rv.min_loc_true = i; } m_reducer.join(red_value, rv); } KOKKOS_FUNCTION StdFindIfOrNotFunctor(IteratorType first, ReducerType reducer, PredicateType p) : m_first(std::move(first)), m_reducer(std::move(reducer)), m_p(std::move(p)) {} }; // // exespace impl // template IteratorType find_if_or_not_exespace_impl(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, PredicateType pred) { // checks Impl::static_assert_random_access_and_accessible( ex, first); // only need one It per type Impl::expect_valid_range(first, last); if (first == last) { return last; } // aliases using index_type = typename IteratorType::difference_type; using reducer_type = FirstLoc; using reduction_value_type = typename reducer_type::value_type; using func_t = StdFindIfOrNotFunctor; // run reduction_value_type red_result; reducer_type reducer(red_result); const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_reduce(label, RangePolicy(ex, 0, num_elements), func_t(first, reducer, pred), reducer); // fence not needed because reducing into scalar // decide and return if (red_result.min_loc_true == ::Kokkos::reduction_identity::min()) { // here, it means a valid loc has not been found, return last; } else { // a location has been found return first + red_result.min_loc_true; } } template InputIterator find_exespace_impl(const std::string& label, ExecutionSpace ex, InputIterator first, InputIterator last, const T& value) { return find_if_or_not_exespace_impl( label, ex, first, last, ::Kokkos::Experimental::Impl::StdAlgoEqualsValUnaryPredicate(value)); } // // team impl // template KOKKOS_FUNCTION IteratorType find_if_or_not_team_impl(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, PredicateType pred) { // checks Impl::static_assert_random_access_and_accessible( teamHandle, first); // only need one It per type Impl::expect_valid_range(first, last); if (first == last) { return last; } // aliases using index_type = typename IteratorType::difference_type; using reducer_type = FirstLoc; using reduction_value_type = typename reducer_type::value_type; using func_t = StdFindIfOrNotFunctor; // run reduction_value_type red_result; reducer_type reducer(red_result); const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements), func_t(first, reducer, pred), reducer); teamHandle.team_barrier(); // decide and return if (red_result.min_loc_true == ::Kokkos::reduction_identity::min()) { // here, it means a valid loc has not been found, return last; } else { // a location has been found return first + red_result.min_loc_true; } } template KOKKOS_FUNCTION InputIterator find_team_impl(const TeamHandleType& teamHandle, InputIterator first, InputIterator last, const T& value) { return find_if_or_not_team_impl( teamHandle, first, last, ::Kokkos::Experimental::Impl::StdAlgoEqualsValUnaryPredicate(value)); } } // namespace Impl } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp000066400000000000000000000101301461675637500267520ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_FOR_EACH_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_FOR_EACH_IMPL_HPP #include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include #include namespace Kokkos { namespace Experimental { namespace Impl { template struct StdForEachFunctor { using index_type = typename IteratorType::difference_type; IteratorType m_first; UnaryFunctorType m_functor; KOKKOS_FUNCTION void operator()(index_type i) const { m_functor(m_first[i]); } KOKKOS_FUNCTION StdForEachFunctor(IteratorType _first, UnaryFunctorType _functor) : m_first(std::move(_first)), m_functor(std::move(_functor)) {} }; template UnaryFunctorType for_each_exespace_impl(const std::string& label, const HandleType& handle, IteratorType first, IteratorType last, UnaryFunctorType functor) { // checks Impl::static_assert_random_access_and_accessible(handle, first); Impl::expect_valid_range(first, last); // run const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for( label, RangePolicy(handle, 0, num_elements), StdForEachFunctor(first, functor)); handle.fence("Kokkos::for_each: fence after operation"); return functor; } template IteratorType for_each_n_exespace_impl(const std::string& label, const ExecutionSpace& ex, IteratorType first, SizeType n, UnaryFunctorType functor) { auto last = first + n; Impl::static_assert_random_access_and_accessible(ex, first, last); Impl::expect_valid_range(first, last); if (n == 0) { return first; } for_each_exespace_impl(label, ex, first, last, std::move(functor)); // no neeed to fence since for_each_exespace_impl fences already return last; } // // team impl // template KOKKOS_FUNCTION UnaryFunctorType for_each_team_impl(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, UnaryFunctorType functor) { // checks Impl::static_assert_random_access_and_accessible(teamHandle, first); Impl::expect_valid_range(first, last); // run const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for( TeamThreadRange(teamHandle, 0, num_elements), StdForEachFunctor(first, functor)); teamHandle.team_barrier(); return functor; } template KOKKOS_FUNCTION IteratorType for_each_n_team_impl(const TeamHandleType& teamHandle, IteratorType first, SizeType n, UnaryFunctorType functor) { auto last = first + n; Impl::static_assert_random_access_and_accessible(teamHandle, first, last); Impl::expect_valid_range(first, last); if (n == 0) { return first; } for_each_team_impl(teamHandle, first, last, std::move(functor)); // no neeed to fence since for_each_team_impl fences already return last; } } // namespace Impl } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/impl/Kokkos_FunctorsForExclusiveScan.hpp000066400000000000000000000155621461675637500310620ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_FUNCTORS_FOR_EXCLUSIVE_SCAN_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_FUNCTORS_FOR_EXCLUSIVE_SCAN_IMPL_HPP #include #include "Kokkos_ValueWrapperForNoNeutralElement.hpp" namespace Kokkos { namespace Experimental { namespace Impl { template using ex_scan_has_reduction_identity_sum_t = decltype(Kokkos::reduction_identity::sum()); template struct ExclusiveScanDefaultFunctorForKnownNeutralElement { using execution_space = ExeSpace; ValueType m_init_value; FirstFrom m_first_from; FirstDest m_first_dest; KOKKOS_FUNCTION ExclusiveScanDefaultFunctorForKnownNeutralElement(ValueType init, FirstFrom first_from, FirstDest first_dest) : m_init_value(std::move(init)), m_first_from(std::move(first_from)), m_first_dest(std::move(first_dest)) {} KOKKOS_FUNCTION void operator()(const IndexType i, ValueType& update, const bool final_pass) const { const auto tmp = m_first_from[i]; if (final_pass) m_first_dest[i] = update + m_init_value; update += tmp; } }; template struct ExclusiveScanDefaultFunctorWithValueWrapper { using execution_space = ExeSpace; using value_type = ::Kokkos::Experimental::Impl::ValueWrapperForNoNeutralElement; ValueType m_init_value; FirstFrom m_first_from; FirstDest m_first_dest; KOKKOS_FUNCTION ExclusiveScanDefaultFunctorWithValueWrapper(ValueType init, FirstFrom first_from, FirstDest first_dest) : m_init_value(std::move(init)), m_first_from(std::move(first_from)), m_first_dest(std::move(first_dest)) {} KOKKOS_FUNCTION void operator()(const IndexType i, value_type& update, const bool final_pass) const { const auto tmp = value_type{m_first_from[i], false}; if (final_pass) { if (i == 0) { m_first_dest[i] = m_init_value; } else { m_first_dest[i] = update.val + m_init_value; } } this->join(update, tmp); } KOKKOS_FUNCTION void init(value_type& update) const { update.val = {}; update.is_initial = true; } KOKKOS_FUNCTION void join(value_type& update, const value_type& input) const { if (input.is_initial) return; if (update.is_initial) { update.val = input.val; update.is_initial = false; } else { update.val = update.val + input.val; } } }; template struct TransformExclusiveScanFunctorWithValueWrapper { using execution_space = ExeSpace; using value_type = ::Kokkos::Experimental::Impl::ValueWrapperForNoNeutralElement; ValueType m_init_value; FirstFrom m_first_from; FirstDest m_first_dest; BinaryOpType m_binary_op; UnaryOpType m_unary_op; KOKKOS_FUNCTION TransformExclusiveScanFunctorWithValueWrapper(ValueType init, FirstFrom first_from, FirstDest first_dest, BinaryOpType bop, UnaryOpType uop) : m_init_value(std::move(init)), m_first_from(std::move(first_from)), m_first_dest(std::move(first_dest)), m_binary_op(std::move(bop)), m_unary_op(std::move(uop)) {} KOKKOS_FUNCTION void operator()(const IndexType i, value_type& update, const bool final_pass) const { const auto tmp = value_type{m_unary_op(m_first_from[i]), false}; if (final_pass) { if (i == 0) { // for both ExclusiveScan and TransformExclusiveScan, // init is unmodified m_first_dest[i] = m_init_value; } else { m_first_dest[i] = m_binary_op(update.val, m_init_value); } } this->join(update, tmp); } KOKKOS_FUNCTION void init(value_type& value) const { value.val = {}; value.is_initial = true; } KOKKOS_FUNCTION void join(value_type& update, const value_type& input) const { if (input.is_initial) return; if (update.is_initial) { update.val = input.val; } else { update.val = m_binary_op(update.val, input.val); } update.is_initial = false; } }; template struct TransformExclusiveScanFunctorWithoutValueWrapper { using execution_space = ExeSpace; ValueType m_init_value; FirstFrom m_first_from; FirstDest m_first_dest; BinaryOpType m_binary_op; UnaryOpType m_unary_op; KOKKOS_FUNCTION TransformExclusiveScanFunctorWithoutValueWrapper(ValueType init, FirstFrom first_from, FirstDest first_dest, BinaryOpType bop, UnaryOpType uop) : m_init_value(std::move(init)), m_first_from(std::move(first_from)), m_first_dest(std::move(first_dest)), m_binary_op(std::move(bop)), m_unary_op(std::move(uop)) {} KOKKOS_FUNCTION void operator()(const IndexType i, ValueType& update, const bool final_pass) const { const auto tmp = ValueType{m_unary_op(m_first_from[i])}; if (final_pass) { if (i == 0) { // for both ExclusiveScan and TransformExclusiveScan, // init is unmodified m_first_dest[i] = m_init_value; } else { m_first_dest[i] = m_binary_op(update, m_init_value); } } this->join(update, tmp); } KOKKOS_FUNCTION void init(ValueType& update) const { update = {}; } KOKKOS_FUNCTION void join(ValueType& update, const ValueType& input) const { update = m_binary_op(update, input); } }; } // namespace Impl } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/impl/Kokkos_GenerateGenerateN.hpp000066400000000000000000000070051461675637500274270ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_GENERATE_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_GENERATE_IMPL_HPP #include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include #include namespace Kokkos { namespace Experimental { namespace Impl { template struct StdGenerateFunctor { using index_type = typename IteratorType::difference_type; IteratorType m_first; Generator m_generator; KOKKOS_FUNCTION void operator()(index_type i) const { m_first[i] = m_generator(); } KOKKOS_FUNCTION StdGenerateFunctor(IteratorType _first, Generator _g) : m_first(std::move(_first)), m_generator(std::move(_g)) {} }; // // generate impl // template void generate_exespace_impl(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, Generator g) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); // run const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for(label, RangePolicy(ex, 0, num_elements), StdGenerateFunctor(first, g)); ex.fence("Kokkos::generate: fence after operation"); } template KOKKOS_FUNCTION void generate_team_impl(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, Generator g) { // checks Impl::static_assert_random_access_and_accessible(teamHandle, first); Impl::expect_valid_range(first, last); // run const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for(TeamThreadRange(teamHandle, 0, num_elements), StdGenerateFunctor(first, g)); teamHandle.team_barrier(); } // // generate_n impl // template IteratorType generate_n_exespace_impl(const std::string& label, const ExecutionSpace& ex, IteratorType first, Size count, Generator g) { if (count <= 0) { return first; } generate_exespace_impl(label, ex, first, first + count, g); return first + count; } template KOKKOS_FUNCTION IteratorType generate_n_team_impl(const TeamHandleType& teamHandle, IteratorType first, Size count, Generator g) { if (count <= 0) { return first; } generate_team_impl(teamHandle, first, first + count, g); return first + count; } } // namespace Impl } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/impl/Kokkos_HelperPredicates.hpp000066400000000000000000000046301461675637500273300ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_HELPER_PREDICATES_HPP #define KOKKOS_STD_ALGORITHMS_HELPER_PREDICATES_HPP #include // naming convetion: // StdAlgoSomeExpressiveNameUnaryPredicate // StdAlgoSomeExpressiveNameBinaryPredicate namespace Kokkos { namespace Experimental { namespace Impl { // ------------------ // UNARY PREDICATES // ------------------ template struct StdAlgoEqualsValUnaryPredicate { T m_value; KOKKOS_FUNCTION constexpr bool operator()(const T& val) const { return val == m_value; } KOKKOS_FUNCTION constexpr explicit StdAlgoEqualsValUnaryPredicate(const T& _value) : m_value(_value) {} }; template struct StdAlgoNotEqualsValUnaryPredicate { T m_value; KOKKOS_FUNCTION constexpr bool operator()(const T& val) const { return !(val == m_value); } KOKKOS_FUNCTION constexpr explicit StdAlgoNotEqualsValUnaryPredicate(const T& _value) : m_value(_value) {} }; template struct StdAlgoNegateUnaryPredicateWrapper { PredicateType m_pred; KOKKOS_FUNCTION constexpr bool operator()(const ValueType& val) const { return !m_pred(val); } KOKKOS_FUNCTION constexpr explicit StdAlgoNegateUnaryPredicateWrapper( const PredicateType& pred) : m_pred(pred) {} }; // ------------------ // BINARY PREDICATES // ------------------ template struct StdAlgoEqualBinaryPredicate { KOKKOS_FUNCTION constexpr bool operator()(const ValueType1& a, const ValueType2& b) const { return a == b; } }; template struct StdAlgoLessThanBinaryPredicate { KOKKOS_FUNCTION constexpr bool operator()(const ValueType1& a, const ValueType2& b) const { return a < b; } }; } // namespace Impl } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/impl/Kokkos_IdentityReferenceUnaryFunctor.hpp000066400000000000000000000020631461675637500320730ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_NUMERIC_IDENTITY_REFERENCE_UNARY_FUNCTOR_HPP #define KOKKOS_STD_ALGORITHMS_NUMERIC_IDENTITY_REFERENCE_UNARY_FUNCTOR_HPP #include namespace Kokkos { namespace Experimental { namespace Impl { template struct StdNumericScanIdentityReferenceUnaryFunctor { KOKKOS_FUNCTION constexpr const ValueType& operator()(const ValueType& a) const { return a; } }; } // namespace Impl } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp000066400000000000000000000323601461675637500266540ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_INCLUSIVE_SCAN_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_INCLUSIVE_SCAN_IMPL_HPP #include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include #include #include namespace Kokkos { namespace Experimental { namespace Impl { template using in_scan_has_reduction_identity_sum_t = decltype(Kokkos::reduction_identity::sum()); template struct InclusiveScanDefaultFunctorForKnownIdentityElement { using execution_space = ExeSpace; FirstFrom m_first_from; FirstDest m_first_dest; KOKKOS_FUNCTION InclusiveScanDefaultFunctorForKnownIdentityElement(FirstFrom first_from, FirstDest first_dest) : m_first_from(std::move(first_from)), m_first_dest(std::move(first_dest)) {} KOKKOS_FUNCTION void operator()(const IndexType i, ValueType& update, const bool final_pass) const { update += m_first_from[i]; if (final_pass) { m_first_dest[i] = update; } } }; template struct InclusiveScanDefaultFunctor { using execution_space = ExeSpace; using value_type = ValueWrapperForNoNeutralElement; FirstFrom m_first_from; FirstDest m_first_dest; KOKKOS_FUNCTION InclusiveScanDefaultFunctor(FirstFrom first_from, FirstDest first_dest) : m_first_from(std::move(first_from)), m_first_dest(std::move(first_dest)) {} KOKKOS_FUNCTION void operator()(const IndexType i, value_type& update, const bool final_pass) const { const auto tmp = value_type{m_first_from[i], false}; this->join(update, tmp); if (final_pass) { m_first_dest[i] = update.val; } } KOKKOS_FUNCTION void init(value_type& update) const { update.val = {}; update.is_initial = true; } KOKKOS_FUNCTION void join(value_type& update, const value_type& input) const { if (input.is_initial) return; if (update.is_initial) { update.val = input.val; } else { update.val = update.val + input.val; } update.is_initial = false; } }; // // exespace impl // template OutputIteratorType inclusive_scan_default_op_exespace_impl( const std::string& label, const ExecutionSpace& ex, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest) { // checks Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); Impl::static_assert_iterators_have_matching_difference_type(first_from, first_dest); Impl::expect_valid_range(first_from, last_from); // aliases using index_type = typename InputIteratorType::difference_type; using value_type = std::remove_const_t; using func_type = std::conditional_t< ::Kokkos::is_detected::value, InclusiveScanDefaultFunctorForKnownIdentityElement< ExecutionSpace, index_type, value_type, InputIteratorType, OutputIteratorType>, InclusiveScanDefaultFunctor>; // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); ::Kokkos::parallel_scan(label, RangePolicy(ex, 0, num_elements), func_type(first_from, first_dest)); ex.fence("Kokkos::inclusive_scan_default_op: fence after operation"); // return return first_dest + num_elements; } // ------------------------------------------------------------- // inclusive_scan_custom_binary_op_impl // ------------------------------------------------------------- template OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl( const std::string& label, const ExecutionSpace& ex, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest, BinaryOpType binary_op) { // checks Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); Impl::static_assert_iterators_have_matching_difference_type(first_from, first_dest); Impl::expect_valid_range(first_from, last_from); // aliases using index_type = typename InputIteratorType::difference_type; using value_type = std::remove_const_t; using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; using func_type = ExeSpaceTransformInclusiveScanNoInitValueFunctor< ExecutionSpace, index_type, value_type, InputIteratorType, OutputIteratorType, BinaryOpType, unary_op_type>; // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); ::Kokkos::parallel_scan( label, RangePolicy(ex, 0, num_elements), func_type(first_from, first_dest, binary_op, unary_op_type())); ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation"); // return return first_dest + num_elements; } // ------------------------------------------------------------- // inclusive_scan_custom_binary_op_impl with init_value // ------------------------------------------------------------- template OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl( const std::string& label, const ExecutionSpace& ex, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest, BinaryOpType binary_op, ValueType init_value) { // checks Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); Impl::static_assert_iterators_have_matching_difference_type(first_from, first_dest); Impl::expect_valid_range(first_from, last_from); // aliases using index_type = typename InputIteratorType::difference_type; using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; using func_type = ExeSpaceTransformInclusiveScanWithInitValueFunctor< ExecutionSpace, index_type, ValueType, InputIteratorType, OutputIteratorType, BinaryOpType, unary_op_type>; // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); ::Kokkos::parallel_scan(label, RangePolicy(ex, 0, num_elements), func_type(first_from, first_dest, binary_op, unary_op_type(), std::move(init_value))); ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation"); // return return first_dest + num_elements; } // // team impl // template KOKKOS_FUNCTION OutputIteratorType inclusive_scan_default_op_team_impl( const TeamHandleType& teamHandle, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest) { // checks Impl::static_assert_random_access_and_accessible(teamHandle, first_from, first_dest); Impl::static_assert_iterators_have_matching_difference_type(first_from, first_dest); Impl::expect_valid_range(first_from, last_from); using value_type = std::remove_const_t; // #if defined(KOKKOS_ENABLE_CUDA) using exe_space = typename TeamHandleType::execution_space; using index_type = typename InputIteratorType::difference_type; using func_type = std::conditional_t< ::Kokkos::is_detected::value, InclusiveScanDefaultFunctorForKnownIdentityElement< exe_space, index_type, value_type, InputIteratorType, OutputIteratorType>, InclusiveScanDefaultFunctor>; // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); ::Kokkos::parallel_scan(TeamThreadRange(teamHandle, 0, num_elements), func_type(first_from, first_dest)); teamHandle.team_barrier(); // return return first_dest + num_elements; } // ------------------------------------------------------------- // inclusive_scan_custom_binary_op_impl // ------------------------------------------------------------- template KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl( const TeamHandleType& teamHandle, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest, BinaryOpType binary_op) { // checks Impl::static_assert_random_access_and_accessible(teamHandle, first_from, first_dest); Impl::static_assert_iterators_have_matching_difference_type(first_from, first_dest); Impl::expect_valid_range(first_from, last_from); using value_type = std::remove_const_t; static_assert( ::Kokkos::is_detected_v, "At the moment inclusive_scan doesn't support types without reduction " "identity"); // #if defined(KOKKOS_ENABLE_CUDA) // aliases using exe_space = typename TeamHandleType::execution_space; using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; using func_type = TeamTransformInclusiveScanNoInitValueFunctor< exe_space, value_type, InputIteratorType, OutputIteratorType, BinaryOpType, unary_op_type>; // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); ::Kokkos::parallel_scan( TeamThreadRange(teamHandle, 0, num_elements), func_type(first_from, first_dest, binary_op, unary_op_type())); teamHandle.team_barrier(); return first_dest + num_elements; } // ------------------------------------------------------------- // inclusive_scan_custom_binary_op_impl with init_value // ------------------------------------------------------------- template KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl( const TeamHandleType& teamHandle, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest, BinaryOpType binary_op, ValueType init_value) { // checks Impl::static_assert_random_access_and_accessible(teamHandle, first_from, first_dest); Impl::static_assert_iterators_have_matching_difference_type(first_from, first_dest); Impl::expect_valid_range(first_from, last_from); static_assert( ::Kokkos::is_detected_v, "At the moment inclusive_scan doesn't support types without reduction " "identity"); // #if defined(KOKKOS_ENABLE_CUDA) // aliases using exe_space = typename TeamHandleType::execution_space; using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; using func_type = TeamTransformInclusiveScanWithInitValueFunctor< exe_space, ValueType, InputIteratorType, OutputIteratorType, BinaryOpType, unary_op_type>; // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); ::Kokkos::parallel_scan(TeamThreadRange(teamHandle, 0, num_elements), func_type(first_from, first_dest, binary_op, unary_op_type(), std::move(init_value))); teamHandle.team_barrier(); // return return first_dest + num_elements; } } // namespace Impl } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp000066400000000000000000000150241461675637500266620ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_IS_PARTITIONED_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_IS_PARTITIONED_IMPL_HPP #include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include #include namespace Kokkos { namespace Experimental { namespace Impl { template struct StdIsPartitionedFunctor { using red_value_type = typename ReducerType::value_type; using index_type = typename IteratorType::difference_type; IteratorType m_first; ReducerType m_reducer; PredicateType m_p; KOKKOS_FUNCTION void operator()(const index_type i, red_value_type& redValue) const { const auto predicate_value = m_p(m_first[i]); constexpr index_type m_red_id_min = ::Kokkos::reduction_identity::min(); constexpr index_type m_red_id_max = ::Kokkos::reduction_identity::max(); // FIXME_NVHPC using a ternary operator causes problems red_value_type rv = {m_red_id_max, i}; if (predicate_value) { rv = {i, m_red_id_min}; } m_reducer.join(redValue, rv); } KOKKOS_FUNCTION StdIsPartitionedFunctor(IteratorType first, ReducerType reducer, PredicateType p) : m_first(std::move(first)), m_reducer(std::move(reducer)), m_p(std::move(p)) {} }; template bool is_partitioned_exespace_impl(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, PredicateType pred) { // true if all elements in the range [first, last) that satisfy // the predicate "pred" appear before all elements that don't. // Also returns true if [first, last) is empty. // also true if all elements satisfy the predicate. // we implement it by finding: // - the max location where predicate is true (max_loc_true) // - the min location where predicate is false (min_loc_false) // so the range is partitioned if max_loc_true < (min_loc_false) // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); // trivial case if (first == last) { return true; } // aliases using index_type = typename IteratorType::difference_type; using reducer_type = StdIsPartitioned; using reduction_value_type = typename reducer_type::value_type; using func_t = StdIsPartitionedFunctor; // run reduction_value_type red_result; reducer_type reducer(red_result); const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_reduce(label, RangePolicy(ex, 0, num_elements), func_t(first, reducer, pred), reducer); // fence not needed because reducing into scalar // decide and return constexpr index_type red_id_min = ::Kokkos::reduction_identity::min(); constexpr index_type red_id_max = ::Kokkos::reduction_identity::max(); if (red_result.max_loc_true != red_id_max && red_result.min_loc_false != red_id_min) { // this occurs when the reduction yields nontrivial values return red_result.max_loc_true < red_result.min_loc_false; } else if (red_result.max_loc_true == red_id_max && red_result.min_loc_false == 0) { // this occurs when all values do NOT satisfy // the predicate, and this corner case should also be true return true; } else if (first + red_result.max_loc_true == --last) { // this occurs when all values satisfy the predicate, // this corner case should also be true return true; } else { return false; } } template KOKKOS_FUNCTION bool is_partitioned_team_impl(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, PredicateType pred) { /* see exespace impl for the description of the impl */ // checks Impl::static_assert_random_access_and_accessible(teamHandle, first); Impl::expect_valid_range(first, last); // trivial case if (first == last) { return true; } // aliases using index_type = typename IteratorType::difference_type; using reducer_type = StdIsPartitioned; using reduction_value_type = typename reducer_type::value_type; using func_t = StdIsPartitionedFunctor; // run reduction_value_type red_result; reducer_type reducer(red_result); const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements), func_t(first, reducer, pred), reducer); // fence not needed because reducing into scalar // decide and return constexpr index_type red_id_min = ::Kokkos::reduction_identity::min(); constexpr index_type red_id_max = ::Kokkos::reduction_identity::max(); if (red_result.max_loc_true != red_id_max && red_result.min_loc_false != red_id_min) { // this occurs when the reduction yields nontrivial values return red_result.max_loc_true < red_result.min_loc_false; } else if (red_result.max_loc_true == red_id_max && red_result.min_loc_false == 0) { // this occurs when all values do NOT satisfy // the predicate, and this corner case should also be true return true; } else if (first + red_result.max_loc_true == --last) { // this occurs when all values satisfy the predicate, // this corner case should also be true return true; } else { return false; } } } // namespace Impl } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/impl/Kokkos_IsSorted.hpp000066400000000000000000000104371461675637500256430ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_IS_SORTED_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_IS_SORTED_IMPL_HPP #include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include #include namespace Kokkos { namespace Experimental { namespace Impl { template struct StdIsSortedFunctor { using index_type = typename IteratorType::difference_type; IteratorType m_first; ComparatorType m_comparator; KOKKOS_FUNCTION void operator()(const index_type i, std::size_t& update) const { const auto& val_i = m_first[i]; const auto& val_ip1 = m_first[i + 1]; if (m_comparator(val_ip1, val_i)) { ++update; } } KOKKOS_FUNCTION StdIsSortedFunctor(IteratorType _first1, ComparatorType comparator) : m_first(std::move(_first1)), m_comparator(std::move(comparator)) {} }; // // exespace impl // template bool is_sorted_exespace_impl(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); const auto num_elements = Kokkos::Experimental::distance(first, last); if (num_elements <= 1) { return true; } // use num_elements-1 because each index handles i and i+1 const auto num_elements_minus_one = num_elements - 1; using functor_type = StdIsSortedFunctor; // result is incremented by one if sorting breaks at index i std::size_t result = 0; ::Kokkos::parallel_reduce( label, RangePolicy(ex, 0, num_elements_minus_one), functor_type(first, std::move(comp)), result); return result == 0; } template bool is_sorted_exespace_impl(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { using value_type = typename IteratorType::value_type; using pred_t = Impl::StdAlgoLessThanBinaryPredicate; return is_sorted_exespace_impl(label, ex, first, last, pred_t()); } // // team impl // template KOKKOS_FUNCTION bool is_sorted_team_impl(const TeamHandleType& teamHandle, IteratorType first, IteratorType last, ComparatorType comp) { // checks Impl::static_assert_random_access_and_accessible(teamHandle, first); Impl::expect_valid_range(first, last); const auto num_elements = Kokkos::Experimental::distance(first, last); if (num_elements <= 1) { return true; } // use num_elements-1 because each index handles i and i+1 const auto num_elements_minus_one = num_elements - 1; // result is incremented by one if sorting breaks at index i std::size_t result = 0; ::Kokkos::parallel_reduce( TeamThreadRange(teamHandle, 0, num_elements_minus_one), // use CTAD here StdIsSortedFunctor(first, std::move(comp)), result); return result == 0; } template KOKKOS_FUNCTION bool is_sorted_team_impl(const TeamHandleType& teamHandle, IteratorType first, IteratorType last) { using value_type = typename IteratorType::value_type; using pred_t = Impl::StdAlgoLessThanBinaryPredicate; return is_sorted_team_impl(teamHandle, first, last, pred_t()); } } // namespace Impl } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp000066400000000000000000000140371461675637500266570ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_IS_SORTED_UNTIL_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_IS_SORTED_UNTIL_IMPL_HPP #include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include #include #include namespace Kokkos { namespace Experimental { namespace Impl { template struct StdIsSortedUntilFunctor { using index_type = typename IteratorType::difference_type; using value_type = typename ReducerType::value_type; IteratorType m_first; ComparatorType m_comparator; ReducerType m_reducer; KOKKOS_FUNCTION void operator()(const index_type i, value_type& reduction_result) const { const auto& val_i = m_first[i]; const auto& val_ip1 = m_first[i + 1]; if (m_comparator(val_ip1, val_i)) { m_reducer.join(reduction_result, i); } } KOKKOS_FUNCTION StdIsSortedUntilFunctor(IteratorType first, ComparatorType comparator, ReducerType reducer) : m_first(std::move(first)), m_comparator(std::move(comparator)), m_reducer(std::move(reducer)) {} }; // // overloads accepting exespace // template IteratorType is_sorted_until_exespace_impl(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); const auto num_elements = Kokkos::Experimental::distance(first, last); // trivial case if (num_elements <= 1) { return last; } /* Do a par_reduce computing the *min* index that breaks the sorting. If such an index is found, then the range is sorted until that element. If no such index is found, then the range is sorted until the end. */ using index_type = typename IteratorType::difference_type; index_type reduction_result; ::Kokkos::Min reducer(reduction_result); ::Kokkos::parallel_reduce( label, // use num_elements-1 because each index handles i and i+1 RangePolicy(ex, 0, num_elements - 1), StdIsSortedUntilFunctor(first, comp, reducer), reducer); /* If the reduction result is equal to the initial value, it means the range is sorted until the end */ index_type reduction_result_init; reducer.init(reduction_result_init); if (reduction_result == reduction_result_init) { return last; } else { /* If such an index is found, then the range is sorted until there and we need to return an iterator past the element found so do +1 */ return first + (reduction_result + 1); } } template IteratorType is_sorted_until_exespace_impl(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { using value_type = typename IteratorType::value_type; using pred_t = Impl::StdAlgoLessThanBinaryPredicate; return is_sorted_until_exespace_impl(label, ex, first, last, pred_t()); } // // overloads accepting team handle // template KOKKOS_FUNCTION IteratorType is_sorted_until_team_impl(const ExecutionSpace& teamHandle, IteratorType first, IteratorType last, ComparatorType comp) { // checks Impl::static_assert_random_access_and_accessible(teamHandle, first); Impl::expect_valid_range(first, last); const auto num_elements = Kokkos::Experimental::distance(first, last); // trivial case if (num_elements <= 1) { return last; } /* Do a par_reduce computing the *min* index that breaks the sorting. If one such index is found, then the range is sorted until that element, if no such index is found, then it means the range is sorted until the end. */ using index_type = typename IteratorType::difference_type; index_type red_result; index_type red_result_init; ::Kokkos::Min reducer(red_result); reducer.init(red_result_init); ::Kokkos::parallel_reduce( // use num_elements-1 because each index handles i // and i+1 TeamThreadRange(teamHandle, 0, num_elements - 1), StdIsSortedUntilFunctor(first, comp, reducer), reducer); teamHandle.team_barrier(); /* If the reduction result is equal to the initial value, and it means the range is sorted until the end */ if (red_result == red_result_init) { return last; } else { /* If such index is found, then the range is sorted until there and we need to return an iterator past the element found so do +1 */ return first + (red_result + 1); } } template KOKKOS_FUNCTION IteratorType is_sorted_until_team_impl( const ExecutionSpace& teamHandle, IteratorType first, IteratorType last) { using value_type = typename IteratorType::value_type; using pred_t = Impl::StdAlgoLessThanBinaryPredicate; return is_sorted_until_team_impl(teamHandle, first, last, pred_t()); } } // namespace Impl } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp000066400000000000000000000203261461675637500305120ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_LEXICOGRAPHICAL_COMPARE_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_LEXICOGRAPHICAL_COMPARE_IMPL_HPP #include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include #include namespace Kokkos { namespace Experimental { namespace Impl { template struct StdCompareFunctor { IteratorType1 m_it1; IteratorType2 m_it2; ComparatorType m_predicate; KOKKOS_FUNCTION void operator()(IndexType /* i is unused */, int& lsum) const { if (m_predicate(*m_it1, *m_it2)) { lsum = 1; } } KOKKOS_FUNCTION StdCompareFunctor(IteratorType1 _it1, IteratorType2 _it2, ComparatorType _predicate) : m_it1(std::move(_it1)), m_it2(std::move(_it2)), m_predicate(std::move(_predicate)) {} }; template struct StdLexicographicalCompareFunctor { using red_value_type = typename ReducerType::value_type; IteratorType1 m_first1; IteratorType2 m_first2; ReducerType m_reducer; ComparatorType m_comparator; KOKKOS_FUNCTION void operator()(const IndexType i, red_value_type& red_value) const { const auto& my_value1 = m_first1[i]; const auto& my_value2 = m_first2[i]; const bool different = m_comparator(my_value1, my_value2) || m_comparator(my_value2, my_value1); // FIXME_NVHPC using a ternary operator causes problems red_value_type rv = {::Kokkos::reduction_identity::min()}; if (different) { rv.min_loc_true = i; } m_reducer.join(red_value, rv); } KOKKOS_FUNCTION StdLexicographicalCompareFunctor(IteratorType1 _first1, IteratorType2 _first2, ReducerType _reducer, ComparatorType _comp) : m_first1(std::move(_first1)), m_first2(std::move(_first2)), m_reducer(std::move(_reducer)), m_comparator(std::move(_comp)) {} }; // // exespace impl // template bool lexicographical_compare_exespace_impl( const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, ComparatorType comp) { // checks Impl::static_assert_random_access_and_accessible(ex, first1, first2); Impl::static_assert_iterators_have_matching_difference_type(first1, first2); Impl::expect_valid_range(first1, last1); Impl::expect_valid_range(first2, last2); // aliases using index_type = typename IteratorType1::difference_type; using reducer_type = FirstLoc; using reduction_value_type = typename reducer_type::value_type; // run const auto d1 = Kokkos::Experimental::distance(first1, last1); const auto d2 = Kokkos::Experimental::distance(first2, last2); const auto range = Kokkos::min(d1, d2); reduction_value_type red_result; reducer_type reducer(red_result); using func1_t = StdLexicographicalCompareFunctor; ::Kokkos::parallel_reduce(label, RangePolicy(ex, 0, range), func1_t(first1, first2, reducer, comp), reducer); // fence not needed because reducing into scalar // no mismatch if (red_result.min_loc_true == ::Kokkos::reduction_identity::min()) { auto new_last1 = first1 + range; auto new_last2 = first2 + range; bool is_prefix = (new_last1 == last1) && (new_last2 != last2); return is_prefix; } // check mismatched int less = 0; auto it1 = first1 + red_result.min_loc_true; auto it2 = first2 + red_result.min_loc_true; using func2_t = StdCompareFunctor; ::Kokkos::parallel_reduce(label, RangePolicy(ex, 0, 1), func2_t(it1, it2, comp), less); // fence not needed because reducing into scalar return static_cast(less); } template bool lexicographical_compare_exespace_impl( const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { using value_type_1 = typename IteratorType1::value_type; using value_type_2 = typename IteratorType2::value_type; using predicate_t = Impl::StdAlgoLessThanBinaryPredicate; return lexicographical_compare_exespace_impl(label, ex, first1, last1, first2, last2, predicate_t()); } // // team impl // template KOKKOS_FUNCTION bool lexicographical_compare_team_impl( const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, ComparatorType comp) { // checks Impl::static_assert_random_access_and_accessible(teamHandle, first1, first2); Impl::static_assert_iterators_have_matching_difference_type(first1, first2); Impl::expect_valid_range(first1, last1); Impl::expect_valid_range(first2, last2); // aliases using index_type = typename IteratorType1::difference_type; using reducer_type = FirstLoc; using reduction_value_type = typename reducer_type::value_type; // run const auto d1 = Kokkos::Experimental::distance(first1, last1); const auto d2 = Kokkos::Experimental::distance(first2, last2); const auto range = Kokkos::min(d1, d2); reduction_value_type red_result; reducer_type reducer(red_result); using func1_t = StdLexicographicalCompareFunctor; ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, range), func1_t(first1, first2, reducer, comp), reducer); teamHandle.team_barrier(); // no mismatch if (red_result.min_loc_true == ::Kokkos::reduction_identity::min()) { auto new_last1 = first1 + range; auto new_last2 = first2 + range; bool is_prefix = (new_last1 == last1) && (new_last2 != last2); return is_prefix; } // check mismatched int less = 0; auto it1 = first1 + red_result.min_loc_true; auto it2 = first2 + red_result.min_loc_true; using func2_t = StdCompareFunctor; ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, 1), func2_t(it1, it2, comp), less); teamHandle.team_barrier(); return static_cast(less); } template KOKKOS_FUNCTION bool lexicographical_compare_team_impl( const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { using value_type_1 = typename IteratorType1::value_type; using value_type_2 = typename IteratorType2::value_type; using predicate_t = Impl::StdAlgoLessThanBinaryPredicate; return lexicographical_compare_team_impl(teamHandle, first1, last1, first2, last2, predicate_t()); } } // namespace Impl } // namespace Experimental } // namespace Kokkos #endif kokkos-4.3.01/algorithms/src/std_algorithms/impl/Kokkos_MinMaxMinmaxElement.hpp000066400000000000000000000167701461675637500277720ustar00rootroot00000000000000//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_STD_ALGORITHMS_MIN_MAX_MINMAX_ELEMENT_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_MIN_MAX_MINMAX_ELEMENT_IMPL_HPP #include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include #include namespace Kokkos { namespace Experimental { namespace Impl { template struct StdMinOrMaxElemFunctor { using index_type = typename IteratorType::difference_type; using red_value_type = typename ReducerType::value_type; IteratorType m_first; ReducerType m_reducer; KOKKOS_FUNCTION void operator()(const index_type i, red_value_type& red_value) const { m_reducer.join(red_value, red_value_type{m_first[i], i}); } KOKKOS_FUNCTION StdMinOrMaxElemFunctor(IteratorType first, ReducerType reducer) : m_first(std::move(first)), m_reducer(std::move(reducer)) {} }; template struct StdMinMaxElemFunctor { using index_type = typename IteratorType::difference_type; using red_value_type = typename ReducerType::value_type; IteratorType m_first; ReducerType m_reducer; KOKKOS_FUNCTION void operator()(const index_type i, red_value_type& red_value) const { const auto& my_value = m_first[i]; m_reducer.join(red_value, red_value_type{my_value, my_value, i, i}); } KOKKOS_FUNCTION StdMinMaxElemFunctor(IteratorType first, ReducerType reducer) : m_first(std::move(first)), m_reducer(std::move(reducer)) {} }; // // exespace impl // template