pax_global_header00006660000000000000000000000064141331566510014517gustar00rootroot0000000000000052 comment=5c9a39f2f6a6c0d0ab3d09cc4da046d41e09c214 Oclgrind-21.10/000077500000000000000000000000001413315665100132635ustar00rootroot00000000000000Oclgrind-21.10/.clang-format000066400000000000000000000002261413315665100156360ustar00rootroot00000000000000Language: Cpp BasedOnStyle: LLVM AllowShortFunctionsOnASingleLine: Empty BreakBeforeBraces: Allman ContinuationIndentWidth: 2 PointerAlignment: Left Oclgrind-21.10/.github/000077500000000000000000000000001413315665100146235ustar00rootroot00000000000000Oclgrind-21.10/.github/workflows/000077500000000000000000000000001413315665100166605ustar00rootroot00000000000000Oclgrind-21.10/.github/workflows/check-format.sh000077500000000000000000000006331413315665100215640ustar00rootroot00000000000000#!/usr/bin/env bash # Run git-clang-format to check for violations OUTPUT=$(git-clang-format --diff origin/master --extensions c,cpp,h,hpp) # Check for no-ops grep '^no modified files to format$' <<<"$OUTPUT" && exit 0 grep '^clang-format did not modify any files$' <<<"$OUTPUT" && exit 0 # Dump formatting diff and signal failure echo -e "\n==== FORMATTING VIOLATIONS DETECTED ====\n" echo "$OUTPUT" exit 1 Oclgrind-21.10/.github/workflows/ci.yml000066400000000000000000000246411413315665100200050ustar00rootroot00000000000000name: CI on: [push, pull_request] env: RELEASE_VERSION: "21.10" RELEASE_LLVM: "13" jobs: check-format: name: Check formatting runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 with: fetch-depth: 0 - name: Install dependencies run: sudo apt-get install -y clang-format - name: Check formatting run: bash .github/workflows/check-format.sh build: name: Build ${{ matrix.os }} llvm-${{ matrix.llvm }} ${{ matrix.compiler }} ${{ matrix.build_type }} runs-on: ${{ matrix.os }} needs: [check-format] env: LLVM_VERSION: ${{ matrix.llvm }} INSTALL_DIR: "${{ github.workspace }}/install" strategy: matrix: os: [ubuntu-20.04, macos-11.0] compiler: [gcc, clang] build_type: [Release, Debug] llvm: [11, 12, 13] exclude: - os: ubuntu-20.04 llvm: 11 - os: macos-11.0 compiler: gcc steps: - uses: actions/checkout@v2 - name: Install dependencies run: bash .github/workflows/install-deps.sh - name: Set LLVM_DIR shell: bash run: | if [[ "${{ matrix.os }}" =~ ubuntu-.* ]]; then echo LLVM_DIR=/usr/lib/llvm-${{ matrix.llvm }}/lib/cmake/llvm >>$GITHUB_ENV elif [[ "${{ matrix.os }}" =~ macos-.* ]]; then echo LLVM_DIR=$PWD/llvm-${LLVM_VERSION}/lib/cmake/llvm >>$GITHUB_ENV fi - name: Set compiler shell: bash run: | if [ "${{ matrix.compiler }}" == "gcc" ]; then echo CC=gcc >>$GITHUB_ENV echo CXX=g++ >>$GITHUB_ENV elif [ "${{ matrix.compiler }}" == "clang" ]; then echo CC=clang >>$GITHUB_ENV echo CXX=clang++ >>$GITHUB_ENV fi - name: Create build dir run: cmake -E make_directory build - name: CMake run: > cmake .. -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} \ -DLLVM_DIR="$LLVM_DIR" working-directory: build - name: Build run: cmake --build . --config ${{ matrix.build_type }} working-directory: build - name: Test run: ctest -C ${{ matrix.build_type }} --output-on-failure working-directory: build - name: Install run: cmake --install . --config ${{ matrix.build_type }} working-directory: build - name: Prepare build artifact shell: bash run: | if [[ "${{ matrix.os }}" =~ ubuntu-.* ]]; then cp src/install/INSTALL.linux ${INSTALL_DIR} elif [[ "${{ matrix.os }}" =~ macos-.* ]]; then cp src/install/INSTALL.darwin ${INSTALL_DIR} fi mv ${INSTALL_DIR} oclgrind-${RELEASE_VERSION} mkdir -p artifact tar czf artifact/oclgrind-${RELEASE_VERSION}.tgz oclgrind-${RELEASE_VERSION} - name: Upload build artifact uses: actions/upload-artifact@v2 with: name: Oclgrind-${{ env.RELEASE_VERSION }}-${{ matrix.os }}-llvm${{ matrix.llvm }}-${{ matrix.build_type }}-${{ matrix.compiler }} path: artifact build-windows: name: Build ${{ matrix.platform }} llvm-${{ matrix.llvm }} runs-on: windows-2019 needs: [check-format] env: LLVM_VERSION: ${{ matrix.llvm }} BUILD_PLATFORM: ${{ matrix.platform }} strategy: matrix: platform: [Win32, x64] llvm: [11, 12, 13] steps: - uses: actions/checkout@v2 - name: cache-llvm uses: actions/cache@v2 with: path: llvm-${{ matrix.llvm }} key: llvm-${{ matrix.llvm }}-${{ matrix.platform }} - name: Install dependencies run: bash .github/workflows/install-deps.sh - name: Create build dir run: cmake -E make_directory build - name: CMake shell: bash run: > cmake .. -G "Visual Studio 16 2019" -A ${{ matrix.platform }} -DLLVM_DIR=$PWD/../llvm-${{ matrix.llvm }}/install/lib/cmake/llvm -DCMAKE_INSTALL_PREFIX=$PWD/../install/ working-directory: build - name: Build run: cmake --build . --config Release --target ALL_BUILD working-directory: build - name: Test run: > cmake --build . --config Release --target RUN_TESTS ; cat ./Testing/Temporary/LastTest.log working-directory: build - name: Install run: cmake --build . --config Release --target INSTALL working-directory: build - name: Upload build artifact uses: actions/upload-artifact@v2 with: name: Oclgrind-${{ env.RELEASE_VERSION }}-Windows-${{ matrix.platform }}-llvm${{ matrix.llvm }} path: install retention-days: 5 upload-windows-binaries: name: Upload Windows binaries runs-on: ubuntu-20.04 needs: [build-windows] steps: - uses: actions/checkout@v2 - uses: actions/download-artifact@v2 with: name: Oclgrind-${{ env.RELEASE_VERSION }}-Windows-Win32-llvm${{ env.RELEASE_LLVM }} path: artifact/oclgrind-${{ env.RELEASE_VERSION }}/x86 - uses: actions/download-artifact@v2 with: name: Oclgrind-${{ env.RELEASE_VERSION }}-Windows-x64-llvm${{ env.RELEASE_LLVM }} path: artifact/oclgrind-${{ env.RELEASE_VERSION }}/x64 - name: Prepare directory shell: bash run: | outdir=artifact/oclgrind-${{ env.RELEASE_VERSION }} cp src/install/INSTALL.windows $outdir/INSTALL.txt cp src/install/install.bat $outdir/ cp src/install/uninstall.bat $outdir/ cp src/install/oclgrind-icd.reg $outdir/ mkdir $outdir/include mv $outdir/x64/include/CL $outdir/include/ cp -r $outdir/x64/include/oclgrind $outdir/include/ rm -rf $outdir/include/oclgrind/*.pch rm -rf $outdir/x86/include/CL - name: Upload binaries uses: actions/upload-artifact@v2 with: name: Oclgrind-${{ env.RELEASE_VERSION }}-Windows path: artifact build-cts: name: Build OpenCL CTS runs-on: ubuntu-20.04 needs: [build] env: LLVM_VERSION: 13 cts_hash: 3dab3df48d7dbc22accf6c37c59e54e35a35de7f steps: - uses: actions/checkout@v2 - uses: actions/cache@v2 id: cache-cts with: path: opencl-cts key: opencl-cts-${{ env.cts_hash }} - name: Install dependencies if: steps.cache-cts.outputs.cache-hit != 'true' run: bash .github/workflows/install-deps.sh - uses: actions/checkout@v2 if: steps.cache-cts.outputs.cache-hit != 'true' with: repository: KhronosGroup/OpenCL-CTS path: opencl-cts ref: ${{ env.cts_hash }} - uses: actions/checkout@v2 if: steps.cache-cts.outputs.cache-hit != 'true' with: repository: KhronosGroup/OpenCL-Headers path: opencl-cts/opencl-headers - uses: actions/checkout@v2 if: steps.cache-cts.outputs.cache-hit != 'true' with: repository: KhronosGroup/OpenCL-ICD-Loader path: opencl-cts/opencl-icd-loader - name: Create ICD loader build dir run: cmake -E make_directory opencl-cts/opencl-icd-loader/build - name: CMake ICD Loader if: steps.cache-cts.outputs.cache-hit != 'true' run: > cmake .. \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=$PWD/../../install \ -DOPENCL_ICD_LOADER_HEADERS_DIR=$PWD/../../opencl-headers working-directory: opencl-cts/opencl-icd-loader/build - name: Build ICD Loader if: steps.cache-cts.outputs.cache-hit != 'true' run: > cmake --build . && cmake --install . working-directory: opencl-cts/opencl-icd-loader/build - name: Create CTS build dir run: cmake -E make_directory opencl-cts/build - name: CMake CTS if: steps.cache-cts.outputs.cache-hit != 'true' run: > cmake .. \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=$PWD/../install \ -DCL_INCLUDE_DIR=$PWD/../opencl-headers \ -DCL_LIB_DIR=$PWD/../install/lib \ -DCL_LIBCLCXX_DIR=. \ -DCLConform_LIBRARIES=OpenCL working-directory: opencl-cts/build - name: Build CTS if: steps.cache-cts.outputs.cache-hit != 'true' run: cmake --build . working-directory: opencl-cts/build run-cts: name: Run OpenCL CTS ${{ matrix.suite[0] }} runs-on: ubuntu-20.04 needs: [build-cts] env: LLVM_VERSION: 13 cts_hash: 3dab3df48d7dbc22accf6c37c59e54e35a35de7f strategy: fail-fast: false matrix: suite: [ [allocations], [api], [atomics], [basic], [buffers], [commonfns], [compiler], [computeinfo], [contractions], [conversions, "-w -[4096]"], [events], [geometrics], [images_clCopyImage], [images_clFillImage], [images_clGetInfo], [images_clReadWriteImage], [images_kernel_image_methods], [images_kernel_read_write, read write small_images CL_FILTER_NEAREST CL_RGBA], # TODO: Enable all orders. [images_samplerlessReads], [half, -w], #[integer_ops], # TODO: Fix crash. #[math_brute_force, -w -m], # TODO: Fix crash when multi-threaded and access-when-mapped issue in CTS. [mem_host_flags], [multiple_device_context], [printf], [profiling], [relationals], [select, -w], #[thread_dimensions], # TODO: Takes too long. [vectors], ] steps: - uses: actions/checkout@v2 - name: Install dependencies run: bash .github/workflows/install-deps.sh - name: Download Oclgrind uses: actions/download-artifact@v2 with: name: Oclgrind-${{ env.RELEASE_VERSION }}-ubuntu-20.04-llvm${{ env.RELEASE_LLVM }}-Release-gcc - name: Unpack Oclgrind shell: bash run: | tar xf oclgrind-${RELEASE_VERSION}.tgz - name: cache-cts uses: actions/cache@v2 with: path: opencl-cts key: opencl-cts-${{ env.cts_hash }} - name: Run CTS suite run: bash .github/workflows/run-cts-suite.sh ${{ matrix.suite[0] }} ${{ matrix.suite[1] }} Oclgrind-21.10/.github/workflows/cts-xfail/000077500000000000000000000000001413315665100205525ustar00rootroot00000000000000Oclgrind-21.10/.github/workflows/cts-xfail/api000066400000000000000000000000651413315665100212470ustar00rootroot00000000000000negative_get_platform_info negative_get_platform_ids Oclgrind-21.10/.github/workflows/cts-xfail/compiler000066400000000000000000000000261413315665100223050ustar00rootroot00000000000000unload_build_threaded Oclgrind-21.10/.github/workflows/cts-xfail/conversions000066400000000000000000000000141413315665100230400ustar00rootroot00000000000000conversions Oclgrind-21.10/.github/workflows/install-deps.sh000077500000000000000000000044551413315665100216260ustar00rootroot00000000000000#!/bin/bash if [ "`uname`" == "Linux" ]; then # Add repositories wget -O - http://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - sudo add-apt-repository -y \ "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-${LLVM_VERSION} main" sudo apt-get update -qq # Install Clang + LLVM sudo apt-get install -y \ llvm-${LLVM_VERSION}-dev \ libclang-${LLVM_VERSION}-dev \ clang-${LLVM_VERSION} \ libomp-${LLVM_VERSION}-dev sudo update-alternatives --install \ /usr/bin/clang clang /usr/bin/clang-${LLVM_VERSION} 20 sudo update-alternatives --install \ /usr/bin/clang++ clang++ /usr/bin/clang++-${LLVM_VERSION} 20 # Other dependencies sudo apt-get install -y libedit-dev elif [ "`uname`" == "Darwin" ]; then URL="https://github.com/llvm/llvm-project/releases/download/llvmorg-${LLVM_VERSION}.0.0" ARCHIVE="clang+llvm-${LLVM_VERSION}.0.0-x86_64-apple-darwin.tar.xz" if [ ${LLVM_VERSION} -lt 13 ]; then ln -sfn /Applications/Xcode_12.4.app /Applications/Xcode.app fi mkdir -p llvm-${LLVM_VERSION} wget "$URL/$ARCHIVE" tar xf "$ARCHIVE" --strip-components 1 -C llvm-${LLVM_VERSION} elif [[ "`uname`" == "MINGW64"* ]]; then if [ ! -r llvm-${LLVM_VERSION}/install/lib/cmake/llvm/LLVMConfig.cmake ]; then URL="https://github.com/llvm/llvm-project/releases/download/llvmorg-${LLVM_VERSION}.0.0" # Get LLVM ARCHIVE="llvm-${LLVM_VERSION}.0.0.src.tar.xz" mkdir -p llvm-${LLVM_VERSION} curl -OL "$URL/$ARCHIVE" tar xf "$ARCHIVE" --strip-components 1 -C llvm-${LLVM_VERSION} # Get Clang ARCHIVE="clang-${LLVM_VERSION}.0.0.src.tar.xz" mkdir -p llvm-${LLVM_VERSION}/tools/clang curl -OL "$URL/$ARCHIVE" tar xf "$ARCHIVE" --strip-components 1 -C llvm-${LLVM_VERSION}/tools/clang # Build LLVM + Clang mkdir -p llvm-${LLVM_VERSION}/build cd llvm-${LLVM_VERSION}/build cmake .. \ -G "Visual Studio 16 2019" -A ${BUILD_PLATFORM} \ -DCMAKE_INSTALL_PREFIX=$PWD/../install \ -DLLVM_TARGETS_TO_BUILD=host cmake --build . --config Release --target ALL_BUILD cmake --build . --config Release --target INSTALL fi else echo "Unrecognized uname: `uname`" exit 1 fi Oclgrind-21.10/.github/workflows/run-cts-suite.sh000077500000000000000000000045361413315665100217510ustar00rootroot00000000000000#!/bin/bash suite="$1" shift ctsargs="$*" ctsdir="${suite}" ctsbin="test_${suite}" if [ "${suite}" == "multiple_device_context" ]; then ctsbin="test_multiples" elif [ "${suite}" == "math_brute_force" ]; then ctsbin="test_bruteforce" elif [ "${suite}" == "images_clCopyImage" ]; then ctsdir="images/${suite#images_}" ctsbin="test_cl_copy_images" elif [ "${suite}" == "images_clFillImage" ]; then ctsdir="images/${suite#images_}" ctsbin="test_cl_fill_images" elif [ "${suite}" == "images_clGetInfo" ]; then ctsdir="images/${suite#images_}" ctsbin="test_cl_get_info" elif [ "${suite}" == "images_clReadWriteImage" ]; then ctsdir="images/${suite#images_}" ctsbin="test_cl_read_write_images" elif [ "${suite}" == "images_kernel_image_methods" ]; then ctsdir="images/${suite#images_}" ctsbin="test_kernel_image_methods" elif [ "${suite}" == "images_kernel_read_write" ]; then ctsdir="images/${suite#images_}" ctsbin="test_image_streams" elif [ "${suite}" == "images_samplerlessReads" ]; then ctsdir="images/${suite#images_}" ctsbin="test_samplerless_reads" fi retcode=0 export PATH=$PWD/oclgrind-${RELEASE_VERSION}/bin:$PATH export CL_CONFORMANCE_RESULTS_FILENAME=$PWD/result.json echo oclgrind opencl-cts/build/test_conformance/${ctsdir}/${ctsbin} ${ctsargs} oclgrind opencl-cts/build/test_conformance/${ctsdir}/${ctsbin} ${ctsargs} echo if [ ! -r "${CL_CONFORMANCE_RESULTS_FILENAME}" ]; then echo "Conformance results file not found." exit 1 fi grep ': "fail"' "${CL_CONFORMANCE_RESULTS_FILENAME}" | \ awk -F '"' '{print $2}' >FAILED if [ -r ".github/workflows/cts-xfail/${suite}" ]; then new_fails=$(fgrep -xvf .github/workflows/cts-xfail/${suite} FAILED) new_passes=$(fgrep -xvf FAILED .github/workflows/cts-xfail/${suite}) else new_fails=$(cat FAILED) new_passes= fi if [ -n "${new_fails}" ]; then echo "-------------------" echo "Unexpected failures" echo "-------------------" echo "${new_fails}" echo retcode=1 fi if [ -n "${new_passes}" ]; then echo "-----------------" echo "Unexpected passes" echo "-----------------" echo "${new_passes}" echo retcode=1 fi if [ ${retcode} -eq 0 ]; then if [ -s ".github/workflows/cts-xfail/${suite}" ]; then echo "-----------------" echo "Expected failures" echo "-----------------" cat ".github/workflows/cts-xfail/${suite}" echo fi fi exit ${retcode} Oclgrind-21.10/.gitignore000066400000000000000000000006231413315665100152540ustar00rootroot00000000000000# CMake output *.cmake !src/core/gen_clc_h.cmake CMakeCache.txt CMakeFiles/ Makefile config.h .ninja_deps .ninja_log build.ninja rules.ninja # Compiler output *.o *.so *.dylib include /oclgrind oclgrind.icd oclgrind-kernel src/core/clc_h.cpp tests/apps/image/image tests/apps/vecadd/vecadd tests/runtime/map_buffer # Misc oclgrind-*.tar.gz oclgrind-*.zip .clang_complete .DS_Store *.kdev4 *.sublime-* Oclgrind-21.10/CMakeLists.txt000066400000000000000000000337251413315665100160350ustar00rootroot00000000000000# CMakeLists.txt (Oclgrind) # Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, # University of Bristol. All rights reserved. # # This program is provided under a three-clause BSD license. For full # license terms please see the LICENSE file distributed with this # source code. cmake_minimum_required(VERSION 3.1) project(Oclgrind) set(Oclgrind_VERSION_MAJOR 21) set(Oclgrind_VERSION_MINOR 10) include(CheckIncludeFiles) include(CheckIncludeFileCXX) include(CheckLibraryExists) include(TestBigEndian) # Enable C99 for GCC (required for tests) if (CMAKE_COMPILER_IS_GNUCC) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99") endif() # Require C++14 (hard requirement for LLVM >10) set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED ON) # Enable rpath on OS X set(CMAKE_MACOSX_RPATH 1) # Enable WIP 3.0 support. option(ENABLE_EXPERIMENTAL_OPENCL_3, "Enable experimental OpenCL 3.0 support.") if (${ENABLE_EXPERIMENTAL_OPENCL_3}) add_definitions(-DENABLE_OPENCL_3) endif() if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-strict-aliasing") endif() # Disable min/max macros on Windows if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") add_definitions(-DNOMINMAX) endif() # Suppress warnings from OpenCL runtime API headers if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-ignored-attributes") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-gcc-compat") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-availability") endif() # Detect endianness test_big_endian(IS_BIG_ENDIAN) # Find LLVM find_package(LLVM REQUIRED CONFIG NO_CMAKE_BUILDS_PATH) message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") # Check LLVM version if (${LLVM_PACKAGE_VERSION} VERSION_LESS "11.0") message(FATAL_ERROR "LLVM version must be >= 11.0") endif() # Add flags for LLVM add_definitions(${LLVM_DEFINITIONS}) include_directories(${LLVM_INCLUDE_DIRS}) link_directories(${LLVM_LIBRARY_DIRS}) # Get LLVM libraries for linking list(FIND LLVM_AVAILABLE_LIBS LLVM _LLVM_SHARED_INDEX) if (${_LLVM_SHARED_INDEX} GREATER -1) set(LLVM_LIBS LLVM) else() llvm_map_components_to_libnames(LLVM_LIBS bitreader bitwriter core coroutines coverage frontendopenmp instrumentation ipo irreader linker lto mcparser objcarcopts option target) endif() # https://bugs.llvm.org/show_bug.cgi?id=44870 list(FIND LLVM_AVAILABLE_LIBS Polly _POLLY_INDEX) if (${_POLLY_INDEX} GREATER -1) list(APPEND LLVM_LIBS Polly) endif() # Allow user to set path to Clang installation via CLANG_ROOT set (CLANG_ROOT " " CACHE PATH "Root of Clang installation") if (NOT ${CLANG_ROOT} STREQUAL " ") include_directories("${CLANG_ROOT}/include") link_directories("${CLANG_ROOT}/lib") set(CMAKE_REQUIRED_INCLUDES "${CMAKE_REQUIRED_INCLUDES};${CLANG_ROOT}/include") endif() set(CMAKE_REQUIRED_INCLUDES "${CMAKE_REQUIRED_INCLUDES};${LLVM_INCLUDE_DIRS}") set(CMAKE_REQUIRED_DEFINITIONS "${CMAKE_REQUIRED_DEFINITIONS};${LLVM_DEFINITIONS}") # Check for Clang headers unset(CLANG_HEADER CACHE) find_path(CLANG_HEADER "clang/CodeGen/CodeGenAction.h" PATHS "${CLANG_ROOT}/include" "${LLVM_INCLUDE_DIRS}" NO_DEFAULT_PATH) find_path(CLANG_HEADER "clang/CodeGen/CodeGenAction.h") if ("${CLANG_HEADER}" STREQUAL "CLANG_HEADER-NOTFOUND") message(FATAL_ERROR "Clang headers not found (set CLANG_ROOT)") endif() # Check for Clang libraries unset(CLANG_LIB CACHE) find_library(CLANG_LIB "clangFrontend" PATHS "${CLANG_ROOT}/lib" "${LLVM_LIBRARY_DIRS}" NO_DEFAULT_PATH) find_library(CLANG_LIB "clangFrontend") set(CLANG_LIBS clangCodeGen clangFrontend clangSerialization clangDriver clangParse clangSema clangAnalysis clangEdit clangAST clangASTMatchers clangLex clangBasic) if ("${CLANG_LIB}" STREQUAL "CLANG_LIB-NOTFOUND") # https://releases.llvm.org/10.0.0/tools/clang/docs/ReleaseNotes.html#build-system-changes find_library(CLANG_LIB "clang-cpp" PATHS "${CLANG_ROOT}/lib" "${LLVM_LIBRARY_DIRS}" NO_DEFAULT_PATH) find_library(CLANG_LIB "clang-cpp") set(CLANG_LIBS "clang-cpp") if ("${CLANG_LIB}" STREQUAL "CLANG_LIB-NOTFOUND") message(FATAL_ERROR "Clang libraries not found (set CLANG_ROOT)") endif() endif() # Get path to Clang's opencl-c.h header get_filename_component(CLANG_LIB_DIR "${CLANG_LIB}" DIRECTORY) set(CLANG_FULL_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}") set(CLANG_OPENCL_C_H "${CLANG_LIB_DIR}/clang/${CLANG_FULL_VERSION}/include/opencl-c.h") if (NOT EXISTS "${CLANG_OPENCL_C_H}") message(FATAL_ERROR "\nClang opencl-c.h not found:\n\t${CLANG_OPENCL_C_H}") else() message(STATUS "Using opencl-c.h: ${CLANG_OPENCL_C_H}") endif() if (EXISTS "${CLANG_LIB_DIR}/clang/${CLANG_FULL_VERSION}/include/opencl-c-base.h") set(CLANG_OPENCL_C_BASE_H "${CLANG_LIB_DIR}/clang/${CLANG_FULL_VERSION}/include/opencl-c-base.h") endif() # Check for clang find_program(CLANG clang PATHS "${CLANG_ROOT}/bin" "${LLVM_TOOLS_BINARY_DIR}" NO_DEFAULT_PATH) find_program(CLANG clang) if ("${CLANG}" STREQUAL "CLANG-NOTFOUND") message(FATAL_ERROR "Could not find clang binary") endif() # Check for GNU readline library if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") set(READLINE_DIR "" CACHE PATH "Location of GNU readline library") set(CMAKE_REQUIRED_INCLUDES ${READLINE_DIR}/include) include_directories(${READLINE_DIR}/include) link_directories(${READLINE_DIR}/lib) check_include_files("stdio.h;readline/readline.h" HAVE_READLINE_H) check_include_files("stdio.h;readline/history.h" HAVE_HISTORY_H) check_library_exists(readline readline "${READLINE_DIR}/lib" HAVE_READLINE_LIB) check_library_exists(readline add_history "${READLINE_DIR}/lib" HAVE_HISTORY_LIB) if (HAVE_READLINE_H AND HAVE_HISTORY_H AND HAVE_READLINE_LIB AND HAVE_HISTORY_LIB) set(HAVE_READLINE 1) list(APPEND CORE_EXTRA_LIBS readline) else() set(HAVE_READLINE 0) message(WARNING "GNU readline library not found (set READLINE_DIR)\n" "The interactive debugger will not have a command history.") endif() else() set(HAVE_READLINE 0) endif() # Check for library directory suffixes set(_LIBDIR_SUFFIX "") get_property(USING_LIB64 GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS) if (USING_LIB64 AND NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin") set(_LIBDIR_SUFFIX "64") endif() set(LIBDIR_SUFFIX "${_LIBDIR_SUFFIX}" CACHE STRING "Suffix for installed library directory") # Generate stringified opencl-c.h add_custom_command( OUTPUT src/core/opencl-c.h.cpp COMMAND ${CMAKE_COMMAND} -DSOURCE_FILE=${CLANG_OPENCL_C_H} -P ${CMAKE_SOURCE_DIR}/src/core/gen_opencl-c.h.cmake DEPENDS ${CLANG_OPENCL_C_H} src/core/gen_opencl-c.h.cmake ) include_directories("src/" "${PROJECT_BINARY_DIR}") if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") set(CORE_LIB_TYPE "SHARED") endif() set(CORE_HEADERS src/core/common.h src/core/Context.h src/core/Kernel.h src/core/KernelInvocation.h src/core/Memory.h src/core/Plugin.h src/core/Program.h src/core/Queue.h src/core/WorkItem.h src/core/WorkGroup.h) add_library(oclgrind ${CORE_LIB_TYPE} ${CORE_HEADERS} src/core/opencl-c.h.cpp src/core/common.cpp src/core/Context.cpp src/core/Kernel.cpp src/core/KernelInvocation.cpp src/core/Memory.cpp src/core/Plugin.cpp src/core/Program.cpp src/core/Queue.cpp src/core/WorkItem.cpp src/core/WorkItemBuiltins.cpp src/core/WorkGroup.cpp src/plugins/InstructionCounter.h src/plugins/InstructionCounter.cpp src/plugins/InteractiveDebugger.h src/plugins/InteractiveDebugger.cpp src/plugins/Logger.h src/plugins/Logger.cpp src/plugins/MemCheck.h src/plugins/MemCheck.cpp src/plugins/RaceDetector.h src/plugins/RaceDetector.cpp src/plugins/Uninitialized.h src/plugins/Uninitialized.cpp) target_link_libraries(oclgrind PRIVATE ${CORE_EXTRA_LIBS} ${CLANG_LIBS} PUBLIC ${LLVM_LIBS}) if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") target_link_libraries(oclgrind PRIVATE Version) endif() # Sources for OpenCL runtime API frontend set(RUNTIME_SOURCES src/runtime/async_queue.h src/runtime/async_queue.cpp src/runtime/icd.h src/runtime/runtime.cpp) # Add ICD exports on Windows if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") set(DLL_EXPORTS src/runtime/icd.def) endif() add_library(oclgrind-rt-icd SHARED ${RUNTIME_SOURCES} ${DLL_EXPORTS}) set_target_properties(oclgrind-rt-icd PROPERTIES COMPILE_FLAGS -DOCLGRIND_ICD) target_link_libraries(oclgrind-rt-icd ${CMAKE_DL_LIBS} oclgrind) # Add runtime exports on Windows if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") set(DLL_EXPORTS src/runtime/runtime.def) endif() add_library(oclgrind-rt SHARED ${RUNTIME_SOURCES} ${DLL_EXPORTS}) target_link_libraries(oclgrind-rt ${CMAKE_DL_LIBS} oclgrind) if (UNIX AND NOT APPLE) # Change the SONAME of the library so that it gets recognized by dlopen set_target_properties(oclgrind-rt PROPERTIES NO_SONAME ON LINK_FLAGS "-Wl,-soname,libOpenCL.so") endif() add_executable(oclgrind-exe src/runtime/oclgrind.cpp) set_target_properties(oclgrind-exe PROPERTIES OUTPUT_NAME oclgrind) target_compile_definitions(oclgrind-exe PRIVATE "-DLIBDIR_SUFFIX=\"${LIBDIR_SUFFIX}\"") add_executable(oclgrind-kernel src/kernel/oclgrind-kernel.cpp src/kernel/Simulation.h src/kernel/Simulation.cpp) target_link_libraries(oclgrind-kernel oclgrind) set(OPENCL_C_H ${CMAKE_BINARY_DIR}/include/oclgrind/opencl-c.h ${CMAKE_BINARY_DIR}/include/oclgrind/opencl-c-1.2-32.pch ${CMAKE_BINARY_DIR}/include/oclgrind/opencl-c-1.2-64.pch ${CMAKE_BINARY_DIR}/include/oclgrind/opencl-c-2.0-32.pch ${CMAKE_BINARY_DIR}/include/oclgrind/opencl-c-2.0-64.pch ) add_custom_target(OPENCL_C_HEADERS ALL DEPENDS ${OPENCL_C_H}) if (CLANG_OPENCL_C_BASE_H) add_custom_command( OUTPUT include/oclgrind/opencl-c-base.h POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${CLANG_OPENCL_C_BASE_H} include/oclgrind/opencl-c-base.h DEPENDS ${CLANG_OPENCL_C_BASE_H}) set(OPENCL_C_H_DEPENDS include/oclgrind/opencl-c-base.h) set(OPENCL_C_H ${OPENCL_C_H} ${CMAKE_BINARY_DIR}/${OPENCL_C_H_DEPENDS}) endif() add_custom_command( OUTPUT include/oclgrind/opencl-c.h POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${CLANG_OPENCL_C_H} include/oclgrind/opencl-c.h DEPENDS ${CLANG_OPENCL_C_H} ${OPENCL_C_H_DEPENDS}) # Generate precompiled headers for opencl-c.h set(OPENCL_C_H_SYSROOT "${CMAKE_BINARY_DIR}/include/oclgrind/") if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") string(REPLACE "/" "\\" OPENCL_C_H_SYSROOT "${OPENCL_C_H_SYSROOT}") endif() add_custom_command( OUTPUT include/oclgrind/opencl-c-1.2-32.pch POST_BUILD COMMAND ${CLANG} -cc1 -x cl -cl-std=CL1.2 -O0 -fno-builtin -fgnu89-inline -emit-pch -triple spir-unknown-unknown -relocatable-pch -isysroot "${OPENCL_C_H_SYSROOT}" include/oclgrind/opencl-c.h -o include/oclgrind/opencl-c-1.2-32.pch DEPENDS include/oclgrind/opencl-c.h ) add_custom_command( OUTPUT include/oclgrind/opencl-c-1.2-64.pch POST_BUILD COMMAND ${CLANG} -cc1 -x cl -cl-std=CL1.2 -O0 -fno-builtin -fgnu89-inline -emit-pch -triple spir64-unknown-unknown -relocatable-pch -isysroot "${OPENCL_C_H_SYSROOT}" include/oclgrind/opencl-c.h -o include/oclgrind/opencl-c-1.2-64.pch DEPENDS include/oclgrind/opencl-c.h ) add_custom_command( OUTPUT include/oclgrind/opencl-c-2.0-32.pch POST_BUILD COMMAND ${CLANG} -cc1 -x cl -cl-std=CL2.0 -O0 -fno-builtin -fgnu89-inline -emit-pch -triple spir-unknown-unknown -relocatable-pch -isysroot "${OPENCL_C_H_SYSROOT}" include/oclgrind/opencl-c.h -o include/oclgrind/opencl-c-2.0-32.pch DEPENDS include/oclgrind/opencl-c.h ) add_custom_command( OUTPUT include/oclgrind/opencl-c-2.0-64.pch POST_BUILD COMMAND ${CLANG} -cc1 -x cl -cl-std=CL2.0 -O0 -fno-builtin -fgnu89-inline -emit-pch -triple spir64-unknown-unknown -relocatable-pch -isysroot "${OPENCL_C_H_SYSROOT}" include/oclgrind/opencl-c.h -o include/oclgrind/opencl-c-2.0-64.pch DEPENDS include/oclgrind/opencl-c.h ) # Generate config.h set(LLVM_VERSION ${LLVM_VERSION_MAJOR}${LLVM_VERSION_MINOR}) configure_file("config.h.in" "config.h") # Generate ICD loader if not on Windows if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") file(GENERATE OUTPUT ${CMAKE_BINARY_DIR}/oclgrind.icd CONTENT "${CMAKE_INSTALL_PREFIX}/lib${LIBDIR_SUFFIX}/$\n") endif() install(TARGETS oclgrind-exe oclgrind-kernel DESTINATION bin) install(TARGETS oclgrind oclgrind-rt oclgrind-rt-icd DESTINATION "lib${LIBDIR_SUFFIX}") install(FILES ${CORE_HEADERS} ${OPENCL_C_H} DESTINATION include/oclgrind) if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") install(FILES src/CL/cl.h src/CL/cl_d3d10.h src/CL/cl_d3d11.h src/CL/cl_dx9_media_sharing.h src/CL/cl_egl.h src/CL/cl_ext.h src/CL/cl_gl.h src/CL/cl_gl_ext.h src/CL/cl_platform.h src/CL/opencl.h DESTINATION include/CL) endif() # Tests enable_testing() # Check for Python find_package(PythonInterp) if (PYTHONINTERP_FOUND) # Add test directories add_subdirectory(tests/apps) add_subdirectory(tests/kernels) add_subdirectory(tests/runtime) else() message(WARNING "Tests will not be run (Python required)") endif() # CPack config set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "OpenCL device simulator") set(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_SOURCE_DIR}/src/install/cpack-description") set(CPACK_PACKAGE_VENDOR "University of Bristol") set(CPACK_PACKAGE_VERSION_MAJOR ${Oclgrind_VERSION_MAJOR}) set(CPACK_PACKAGE_VERSION_MINOR ${Oclgrind_VERSION_MINOR}) set(CPACK_PACKAGE_VERSION "${Oclgrind_VERSION_MAJOR}.${Oclgrind_VERSION_MINOR}") set(CPACK_PACKAGE_VERSION_PATCH "0") # CPack RPM config set(CPACK_RPM_PACKAGE_GROUP "Development/Tools") set(CPACK_RPM_PACKAGE_LICENSE "BSD") include(CPack) Oclgrind-21.10/LICENSE000066400000000000000000000030001413315665100142610ustar00rootroot00000000000000Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, University of Bristol. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Oclgrind-21.10/NEWS000066400000000000000000000071071413315665100137670ustar00rootroot00000000000000For more information, please visit the Oclgrind Wiki: https://github.com/jrprice/Oclgrind/wiki Oclgrind 21.10 ============== - Added initial support for OpenCL 3.0 - Added support for LLVM 11, 12, and 13 - Dropped support for older versions of LLVM - Various minor bug fixes Oclgrind 19.10 ============== - Added support for executing commands across multiple command queues - Added support for out-of-order command queues - Added support for LLVM 7.0, 8.0 and 9.0 - Dropped support for LLVM 3.9 and 4.0 - Various minor bug fixes Oclgrind 18.3 ============= Added support for the following OpenCL 2.0 features: - Program-scope global variables - Non-uniform work-group sizes - Several miscellaneous API and kernel functions Other changes: - Switched to using Clang's builtin OpenCL header (opencl-c.h) - Added support for LLVM 4.0, 5.0 and 6.0 - Dropped support for LLVM 3.6, 3.7 and 3.8 - Added --{global,constant,local}-mem-size and --max-wgsize options - Removed autotools build system - Improved support for big-endian systems - Various minor bug fixes Oclgrind 16.10 ============== This release incorporates the following changes: - Added plugin to detect use of uninitialized values (from Moritz Pflanzer) - Added memoryMap and memoryUnmap plugin callbacks - Added support for LLVM 3.7, 3.8, and 3.9 - Added oclgrind.exe command on Windows - Report invalid uses of mapped buffers inside kernels - Report invalid indices when accessing statically sized arrays - Improved coverage of race detection plugin - Fixed memcheck false-positive when writing to a write-only vector array - Oclgrind will now appear with device type (CPU | GPU | ACCELERATOR | DEFAULT) - Various minor bug fixes Oclgrind 15.5 ============= This release updates to LLVM 3.6, which improves the OpenCL C compiler and provides some additional performance enhancements. See README for revised instructions on how to build Oclgrind from source. - Fixed race conditions in atomic operations - Interactive debugger breaks on Ctrl+C - Various other minor bug fixes Oclgrind 15.2 ============= This release significantly improves simulation performance, and fixes several bugs impacting on usage and stability. - Added detection for violations of read-only/write-only attributes - Added --build-options argument to append additional compiler flags - Added hostMemoryLoad and hostMemoryStore callbacks - Added workGroupBegin and workItemBegin callbacks - Split atomic callbacks into separate load and store - Multi-threaded simulation to improve performance - Various other performance improvements - Several general bug fixes and stability improvements Oclgrind 14.12 ============== This release incorporates a new plugin system, to allow third-party developers to build tools that utilise Oclgrind. More information can be found on the Wiki: https://github.com/jrprice/Oclgrind/wiki/Creating-Plugins In addition, this release contains the following changes: - Interactive debugger now has a command history - Detection for unaligned memory accesses - Limit the number of error messages printed to avoid flooding output - Various other bug fixes and improvements Oclgrind 14.5 ============= Initial release (beta). Implements a SPIR 1.2 interpreter which can be targeted either via an OpenCL 1.2 runtime API implementation or using a standalone kernel interface. Provides the following utilities: - Memory access error detection - Work-group divergence detection (barriers, async-copies) - Data-race detection (--data-races) - Simple interactive debugger (--interactive) - Instruction histograms (--inst-counts) - OpenCL runtime API error reporting (--check-api) Oclgrind-21.10/README.md000066400000000000000000000134471413315665100145530ustar00rootroot00000000000000Oclgrind ======== About ----- This project implements a virtual OpenCL device simulator, including an OpenCL runtime with ICD support. The goal is to provide a platform for creating tools to aid OpenCL development. In particular, this project currently implements utilities for debugging memory access errors, detecting data-races and barrier divergence, collecting instruction histograms, and for interactive OpenCL kernel debugging. The simulator is built on an interpreter for LLVM IR. This project was originally created by James Price and Simon McIntosh-Smith at the University of Bristol. Binary releases can be found on the GitHub releases page: https://github.com/jrprice/Oclgrind/releases Build dependencies ------------------ To build this project, you will need LLVM and Clang 11.0 (or newer) development libraries and headers. If you build LLVM from source, it is recommended to enable optimizations to significantly improve the performance of Oclgrind (set `CMAKE_BUILD_TYPE` to `Release` or `RelWithDebInfo`). You will need to use a compiler that supports C++14. Python should also be available in order to run the test suite. Building on Linux and macOS (CMake) ----------------------------------- The recommended method of building Oclgrind is via CMake. When configuring the CMake build, you may be prompted to supply a value for the `LLVM_DIR` parameter (this shouldn't be necessary if LLVM is installed in a standard system location). This should be set to the directory containing your LLVM installation's `LLVMConfig.cmake` file (typically either `${LLVM_ROOT}/lib/cmake/llvm` or `${LLVM_ROOT}/share/llvm/cmake/`). If Clang is installed separately to LLVM, then you may also be prompted to supply a path for the `CLANG_ROOT` parameter, which should be the root of your Clang installation (containing the `bin/`, `lib/` and `include/` directories). A typical CMake command-line might look like this: cmake ${OCLGRIND_SOURCE} \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DCMAKE_INSTALL_PREFIX=${INSTALL_ROOT} \ -DLLVM_DIR=${LLVM_ROOT}/lib/cmake/llvm where `${OCLGRIND_SOURCE}` is the path to the root directory containing the Oclgrind source code, `${LLVM_ROOT}` is the path to the LLVM installation, and `${INSTALL_ROOT}` is the desired installation root directory (this can be omitted if installing to system directories). Next, build and install with make: make make test make install If installing to a non-system location, you should add the `bin/` directory to the `PATH` environment variable in order to make use of the `oclgrind` command. If you wish to use Oclgrind via the OpenCL ICD loader (optional), then you should create an ICD loading point by copying the `oclgrind.icd` file from the build directory to `/etc/OpenCL/vendors/`. Building on Windows ------------------- Building Oclgrind on Windows requires Visual Studio 2017 (or newer). When configuring the CMake build, you may be prompted to supply a value for the `LLVM_DIR` parameter. This should be set to the directory containing your LLVM installation's `LLVMConfig.cmake` file (for example `C:\Program Files\LLVM\lib\cmake\llvm`). If Clang is installed separately to LLVM, then you may also be prompted to supply a path in the `CLANG_ROOT` parameter, which should be the root of your Clang installation (containing the `bin/`, `lib/` and `include/` directories). You should add the `bin` directory of the Oclgrind installation to the `PATH` environment variable in order to make use of the `oclgrind` command. If you wish to use Oclgrind via the OpenCL ICD loader (optional), then you should also create an ICD loading point. To do this, you should add a `REG_DWORD` value to the Windows Registry under one or both of the registry keys below, with the name set to the absolute path of the `oclgrind-rt-icd.dll` library and the value set to 0. Key for 32-bit machines or 64-bit apps on a 64-bit machine: `HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors` Key for 32-bit apps on a 64-bit machine: `HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Khronos\OpenCL\Vendors` Usage ----- The recommended method of running an application with Oclgrind is to use the `oclgrind` command, for example: oclgrind ./application This command will make it such the only OpenCL platform and device available to your application is Oclgrind. If you need more control over platform selection then installing an ICD loading point for Oclgrind will cause it to appear when an application calls `clGetPlatformIDs()`, alongside any other OpenCL platforms installed on your system. If it encounters any invalid memory accesses, Oclgrind will report the details to stderr, for example: Invalid write of size 4 at global memory address 0x1000000000040 Kernel: vecadd Entity: Global(16,0,0) Local(0,0,0) Group(16,0,0) store i32 %tmp9, i32 addrspace(1)* %tmp15, align 4 At line 4 of input.cl c[i] = a[i] + b[i] Since it is interpreting an abstract intermediate representation and bounds-checking each memory access, Oclgrind will run quite slowly (typically a couple of orders of magnitude slower than a regular CPU implementation). Therefore, it is recommended to run your application with a small problem if possible. To enable an interactive, GDB-style debugging session, supply the `-i` flag to the oclgrind command, or export the environment variable `OCLGRIND_INTERACTIVE=1`. This will cause Oclgrind to automatically break at the beginning of each kernel invocation, and upon encountering an invalid memory access. Type `help` for details of available commands. For more detailed information about using Oclgrind please visit the GitHub Wiki: https://github.com/jrprice/Oclgrind/wiki/ Contact ------- If you encounter any issues or have any questions, please use the GitHub issues page: https://github.com/jrprice/Oclgrind/issues Oclgrind-21.10/config.h.in000066400000000000000000000002771413315665100153140ustar00rootroot00000000000000#define PACKAGE_VERSION "@Oclgrind_VERSION_MAJOR@.@Oclgrind_VERSION_MINOR@" #define HAVE_READLINE @HAVE_READLINE@ #define LLVM_VERSION @LLVM_VERSION@ #define IS_BIG_ENDIAN @IS_BIG_ENDIAN@ Oclgrind-21.10/src/000077500000000000000000000000001413315665100140525ustar00rootroot00000000000000Oclgrind-21.10/src/CL/000077500000000000000000000000001413315665100143505ustar00rootroot00000000000000Oclgrind-21.10/src/CL/.clang-format000066400000000000000000000000501413315665100167160ustar00rootroot00000000000000DisableFormat: true SortIncludes: false Oclgrind-21.10/src/CL/cl.h000066400000000000000000002367011413315665100151300ustar00rootroot00000000000000/******************************************************************************* * Copyright (c) 2008-2020 The Khronos Group Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ #ifndef __OPENCL_CL_H #define __OPENCL_CL_H #include #include #ifdef __cplusplus extern "C" { #endif /******************************************************************************/ typedef struct _cl_platform_id * cl_platform_id; typedef struct _cl_device_id * cl_device_id; typedef struct _cl_context * cl_context; typedef struct _cl_command_queue * cl_command_queue; typedef struct _cl_mem * cl_mem; typedef struct _cl_program * cl_program; typedef struct _cl_kernel * cl_kernel; typedef struct _cl_event * cl_event; typedef struct _cl_sampler * cl_sampler; typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ typedef cl_ulong cl_bitfield; typedef cl_ulong cl_properties; typedef cl_bitfield cl_device_type; typedef cl_uint cl_platform_info; typedef cl_uint cl_device_info; typedef cl_bitfield cl_device_fp_config; typedef cl_uint cl_device_mem_cache_type; typedef cl_uint cl_device_local_mem_type; typedef cl_bitfield cl_device_exec_capabilities; #ifdef CL_VERSION_2_0 typedef cl_bitfield cl_device_svm_capabilities; #endif typedef cl_bitfield cl_command_queue_properties; #ifdef CL_VERSION_1_2 typedef intptr_t cl_device_partition_property; typedef cl_bitfield cl_device_affinity_domain; #endif typedef intptr_t cl_context_properties; typedef cl_uint cl_context_info; #ifdef CL_VERSION_2_0 typedef cl_properties cl_queue_properties; #endif typedef cl_uint cl_command_queue_info; typedef cl_uint cl_channel_order; typedef cl_uint cl_channel_type; typedef cl_bitfield cl_mem_flags; #ifdef CL_VERSION_2_0 typedef cl_bitfield cl_svm_mem_flags; #endif typedef cl_uint cl_mem_object_type; typedef cl_uint cl_mem_info; #ifdef CL_VERSION_1_2 typedef cl_bitfield cl_mem_migration_flags; #endif typedef cl_uint cl_image_info; #ifdef CL_VERSION_1_1 typedef cl_uint cl_buffer_create_type; #endif typedef cl_uint cl_addressing_mode; typedef cl_uint cl_filter_mode; typedef cl_uint cl_sampler_info; typedef cl_bitfield cl_map_flags; #ifdef CL_VERSION_2_0 typedef intptr_t cl_pipe_properties; typedef cl_uint cl_pipe_info; #endif typedef cl_uint cl_program_info; typedef cl_uint cl_program_build_info; #ifdef CL_VERSION_1_2 typedef cl_uint cl_program_binary_type; #endif typedef cl_int cl_build_status; typedef cl_uint cl_kernel_info; #ifdef CL_VERSION_1_2 typedef cl_uint cl_kernel_arg_info; typedef cl_uint cl_kernel_arg_address_qualifier; typedef cl_uint cl_kernel_arg_access_qualifier; typedef cl_bitfield cl_kernel_arg_type_qualifier; #endif typedef cl_uint cl_kernel_work_group_info; #ifdef CL_VERSION_2_1 typedef cl_uint cl_kernel_sub_group_info; #endif typedef cl_uint cl_event_info; typedef cl_uint cl_command_type; typedef cl_uint cl_profiling_info; #ifdef CL_VERSION_2_0 typedef cl_properties cl_sampler_properties; typedef cl_uint cl_kernel_exec_info; #endif #ifdef CL_VERSION_3_0 typedef cl_bitfield cl_device_atomic_capabilities; typedef cl_bitfield cl_device_device_enqueue_capabilities; typedef cl_uint cl_khronos_vendor_id; typedef cl_properties cl_mem_properties; typedef cl_uint cl_version; #endif typedef struct _cl_image_format { cl_channel_order image_channel_order; cl_channel_type image_channel_data_type; } cl_image_format; #ifdef CL_VERSION_1_2 typedef struct _cl_image_desc { cl_mem_object_type image_type; size_t image_width; size_t image_height; size_t image_depth; size_t image_array_size; size_t image_row_pitch; size_t image_slice_pitch; cl_uint num_mip_levels; cl_uint num_samples; #ifdef CL_VERSION_2_0 #if defined(__GNUC__) __extension__ /* Prevents warnings about anonymous union in -pedantic builds */ #endif #if defined(_MSC_VER) && !defined(__STDC__) #pragma warning( push ) #pragma warning( disable : 4201 ) /* Prevents warning about nameless struct/union in /W4 builds */ #endif #if defined(_MSC_VER) && defined(__STDC__) /* Anonymous unions are not supported in /Za builds */ #else union { #endif #endif cl_mem buffer; #ifdef CL_VERSION_2_0 #if defined(_MSC_VER) && defined(__STDC__) /* Anonymous unions are not supported in /Za builds */ #else cl_mem mem_object; }; #endif #if defined(_MSC_VER) && !defined(__STDC__) #pragma warning( pop ) #endif #endif } cl_image_desc; #endif #ifdef CL_VERSION_1_1 typedef struct _cl_buffer_region { size_t origin; size_t size; } cl_buffer_region; #endif #ifdef CL_VERSION_3_0 #define CL_NAME_VERSION_MAX_NAME_SIZE 64 typedef struct _cl_name_version { cl_version version; char name[CL_NAME_VERSION_MAX_NAME_SIZE]; } cl_name_version; #endif /******************************************************************************/ /* Error Codes */ #define CL_SUCCESS 0 #define CL_DEVICE_NOT_FOUND -1 #define CL_DEVICE_NOT_AVAILABLE -2 #define CL_COMPILER_NOT_AVAILABLE -3 #define CL_MEM_OBJECT_ALLOCATION_FAILURE -4 #define CL_OUT_OF_RESOURCES -5 #define CL_OUT_OF_HOST_MEMORY -6 #define CL_PROFILING_INFO_NOT_AVAILABLE -7 #define CL_MEM_COPY_OVERLAP -8 #define CL_IMAGE_FORMAT_MISMATCH -9 #define CL_IMAGE_FORMAT_NOT_SUPPORTED -10 #define CL_BUILD_PROGRAM_FAILURE -11 #define CL_MAP_FAILURE -12 #ifdef CL_VERSION_1_1 #define CL_MISALIGNED_SUB_BUFFER_OFFSET -13 #define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14 #endif #ifdef CL_VERSION_1_2 #define CL_COMPILE_PROGRAM_FAILURE -15 #define CL_LINKER_NOT_AVAILABLE -16 #define CL_LINK_PROGRAM_FAILURE -17 #define CL_DEVICE_PARTITION_FAILED -18 #define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19 #endif #define CL_INVALID_VALUE -30 #define CL_INVALID_DEVICE_TYPE -31 #define CL_INVALID_PLATFORM -32 #define CL_INVALID_DEVICE -33 #define CL_INVALID_CONTEXT -34 #define CL_INVALID_QUEUE_PROPERTIES -35 #define CL_INVALID_COMMAND_QUEUE -36 #define CL_INVALID_HOST_PTR -37 #define CL_INVALID_MEM_OBJECT -38 #define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39 #define CL_INVALID_IMAGE_SIZE -40 #define CL_INVALID_SAMPLER -41 #define CL_INVALID_BINARY -42 #define CL_INVALID_BUILD_OPTIONS -43 #define CL_INVALID_PROGRAM -44 #define CL_INVALID_PROGRAM_EXECUTABLE -45 #define CL_INVALID_KERNEL_NAME -46 #define CL_INVALID_KERNEL_DEFINITION -47 #define CL_INVALID_KERNEL -48 #define CL_INVALID_ARG_INDEX -49 #define CL_INVALID_ARG_VALUE -50 #define CL_INVALID_ARG_SIZE -51 #define CL_INVALID_KERNEL_ARGS -52 #define CL_INVALID_WORK_DIMENSION -53 #define CL_INVALID_WORK_GROUP_SIZE -54 #define CL_INVALID_WORK_ITEM_SIZE -55 #define CL_INVALID_GLOBAL_OFFSET -56 #define CL_INVALID_EVENT_WAIT_LIST -57 #define CL_INVALID_EVENT -58 #define CL_INVALID_OPERATION -59 #define CL_INVALID_GL_OBJECT -60 #define CL_INVALID_BUFFER_SIZE -61 #define CL_INVALID_MIP_LEVEL -62 #define CL_INVALID_GLOBAL_WORK_SIZE -63 #ifdef CL_VERSION_1_1 #define CL_INVALID_PROPERTY -64 #endif #ifdef CL_VERSION_1_2 #define CL_INVALID_IMAGE_DESCRIPTOR -65 #define CL_INVALID_COMPILER_OPTIONS -66 #define CL_INVALID_LINKER_OPTIONS -67 #define CL_INVALID_DEVICE_PARTITION_COUNT -68 #endif #ifdef CL_VERSION_2_0 #define CL_INVALID_PIPE_SIZE -69 #define CL_INVALID_DEVICE_QUEUE -70 #endif #ifdef CL_VERSION_2_2 #define CL_INVALID_SPEC_ID -71 #define CL_MAX_SIZE_RESTRICTION_EXCEEDED -72 #endif /* cl_bool */ #define CL_FALSE 0 #define CL_TRUE 1 #ifdef CL_VERSION_1_2 #define CL_BLOCKING CL_TRUE #define CL_NON_BLOCKING CL_FALSE #endif /* cl_platform_info */ #define CL_PLATFORM_PROFILE 0x0900 #define CL_PLATFORM_VERSION 0x0901 #define CL_PLATFORM_NAME 0x0902 #define CL_PLATFORM_VENDOR 0x0903 #define CL_PLATFORM_EXTENSIONS 0x0904 #ifdef CL_VERSION_2_1 #define CL_PLATFORM_HOST_TIMER_RESOLUTION 0x0905 #endif #ifdef CL_VERSION_3_0 #define CL_PLATFORM_NUMERIC_VERSION 0x0906 #define CL_PLATFORM_EXTENSIONS_WITH_VERSION 0x0907 #endif /* cl_device_type - bitfield */ #define CL_DEVICE_TYPE_DEFAULT (1 << 0) #define CL_DEVICE_TYPE_CPU (1 << 1) #define CL_DEVICE_TYPE_GPU (1 << 2) #define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) #ifdef CL_VERSION_1_2 #define CL_DEVICE_TYPE_CUSTOM (1 << 4) #endif #define CL_DEVICE_TYPE_ALL 0xFFFFFFFF /* cl_device_info */ #define CL_DEVICE_TYPE 0x1000 #define CL_DEVICE_VENDOR_ID 0x1001 #define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002 #define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003 #define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004 #define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B #define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C #define CL_DEVICE_ADDRESS_BITS 0x100D #define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E #define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F #define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010 #define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011 #define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012 #define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013 #define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014 #define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015 #define CL_DEVICE_IMAGE_SUPPORT 0x1016 #define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017 #define CL_DEVICE_MAX_SAMPLERS 0x1018 #define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019 #define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A #define CL_DEVICE_SINGLE_FP_CONFIG 0x101B #define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C #define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D #define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E #define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F #define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020 #define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021 #define CL_DEVICE_LOCAL_MEM_TYPE 0x1022 #define CL_DEVICE_LOCAL_MEM_SIZE 0x1023 #define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024 #define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025 #define CL_DEVICE_ENDIAN_LITTLE 0x1026 #define CL_DEVICE_AVAILABLE 0x1027 #define CL_DEVICE_COMPILER_AVAILABLE 0x1028 #define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029 #define CL_DEVICE_QUEUE_PROPERTIES 0x102A /* deprecated */ #ifdef CL_VERSION_2_0 #define CL_DEVICE_QUEUE_ON_HOST_PROPERTIES 0x102A #endif #define CL_DEVICE_NAME 0x102B #define CL_DEVICE_VENDOR 0x102C #define CL_DRIVER_VERSION 0x102D #define CL_DEVICE_PROFILE 0x102E #define CL_DEVICE_VERSION 0x102F #define CL_DEVICE_EXTENSIONS 0x1030 #define CL_DEVICE_PLATFORM 0x1031 #ifdef CL_VERSION_1_2 #define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 #endif /* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG which is already defined in "cl_ext.h" */ #ifdef CL_VERSION_1_1 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034 #define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035 /* deprecated */ #define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A #define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B #define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C #define CL_DEVICE_OPENCL_C_VERSION 0x103D #endif #ifdef CL_VERSION_1_2 #define CL_DEVICE_LINKER_AVAILABLE 0x103E #define CL_DEVICE_BUILT_IN_KERNELS 0x103F #define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040 #define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041 #define CL_DEVICE_PARENT_DEVICE 0x1042 #define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043 #define CL_DEVICE_PARTITION_PROPERTIES 0x1044 #define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045 #define CL_DEVICE_PARTITION_TYPE 0x1046 #define CL_DEVICE_REFERENCE_COUNT 0x1047 #define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048 #define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049 #endif #ifdef CL_VERSION_2_0 #define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A #define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B #define CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS 0x104C #define CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE 0x104D #define CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES 0x104E #define CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE 0x104F #define CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE 0x1050 #define CL_DEVICE_MAX_ON_DEVICE_QUEUES 0x1051 #define CL_DEVICE_MAX_ON_DEVICE_EVENTS 0x1052 #define CL_DEVICE_SVM_CAPABILITIES 0x1053 #define CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE 0x1054 #define CL_DEVICE_MAX_PIPE_ARGS 0x1055 #define CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS 0x1056 #define CL_DEVICE_PIPE_MAX_PACKET_SIZE 0x1057 #define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT 0x1058 #define CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT 0x1059 #define CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT 0x105A #endif #ifdef CL_VERSION_2_1 #define CL_DEVICE_IL_VERSION 0x105B #define CL_DEVICE_MAX_NUM_SUB_GROUPS 0x105C #define CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS 0x105D #endif #ifdef CL_VERSION_3_0 #define CL_DEVICE_NUMERIC_VERSION 0x105E #define CL_DEVICE_EXTENSIONS_WITH_VERSION 0x1060 #define CL_DEVICE_ILS_WITH_VERSION 0x1061 #define CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION 0x1062 #define CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES 0x1063 #define CL_DEVICE_ATOMIC_FENCE_CAPABILITIES 0x1064 #define CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT 0x1065 #define CL_DEVICE_OPENCL_C_ALL_VERSIONS 0x1066 #define CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x1067 #define CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT 0x1068 #define CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT 0x1069 /* 0x106A to 0x106E - Reserved for upcoming KHR extension */ #define CL_DEVICE_OPENCL_C_FEATURES 0x106F #define CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES 0x1070 #define CL_DEVICE_PIPE_SUPPORT 0x1071 #define CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED 0x1072 #endif /* cl_device_fp_config - bitfield */ #define CL_FP_DENORM (1 << 0) #define CL_FP_INF_NAN (1 << 1) #define CL_FP_ROUND_TO_NEAREST (1 << 2) #define CL_FP_ROUND_TO_ZERO (1 << 3) #define CL_FP_ROUND_TO_INF (1 << 4) #define CL_FP_FMA (1 << 5) #ifdef CL_VERSION_1_1 #define CL_FP_SOFT_FLOAT (1 << 6) #endif #ifdef CL_VERSION_1_2 #define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7) #endif /* cl_device_mem_cache_type */ #define CL_NONE 0x0 #define CL_READ_ONLY_CACHE 0x1 #define CL_READ_WRITE_CACHE 0x2 /* cl_device_local_mem_type */ #define CL_LOCAL 0x1 #define CL_GLOBAL 0x2 /* cl_device_exec_capabilities - bitfield */ #define CL_EXEC_KERNEL (1 << 0) #define CL_EXEC_NATIVE_KERNEL (1 << 1) /* cl_command_queue_properties - bitfield */ #define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0) #define CL_QUEUE_PROFILING_ENABLE (1 << 1) #ifdef CL_VERSION_2_0 #define CL_QUEUE_ON_DEVICE (1 << 2) #define CL_QUEUE_ON_DEVICE_DEFAULT (1 << 3) #endif /* cl_context_info */ #define CL_CONTEXT_REFERENCE_COUNT 0x1080 #define CL_CONTEXT_DEVICES 0x1081 #define CL_CONTEXT_PROPERTIES 0x1082 #ifdef CL_VERSION_1_1 #define CL_CONTEXT_NUM_DEVICES 0x1083 #endif /* cl_context_properties */ #define CL_CONTEXT_PLATFORM 0x1084 #ifdef CL_VERSION_1_2 #define CL_CONTEXT_INTEROP_USER_SYNC 0x1085 #endif #ifdef CL_VERSION_1_2 /* cl_device_partition_property */ #define CL_DEVICE_PARTITION_EQUALLY 0x1086 #define CL_DEVICE_PARTITION_BY_COUNTS 0x1087 #define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0 #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088 #endif #ifdef CL_VERSION_1_2 /* cl_device_affinity_domain */ #define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0) #define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1) #define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2) #define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3) #define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4) #define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5) #endif #ifdef CL_VERSION_2_0 /* cl_device_svm_capabilities */ #define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0) #define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1) #define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2) #define CL_DEVICE_SVM_ATOMICS (1 << 3) #endif /* cl_command_queue_info */ #define CL_QUEUE_CONTEXT 0x1090 #define CL_QUEUE_DEVICE 0x1091 #define CL_QUEUE_REFERENCE_COUNT 0x1092 #define CL_QUEUE_PROPERTIES 0x1093 #ifdef CL_VERSION_2_0 #define CL_QUEUE_SIZE 0x1094 #endif #ifdef CL_VERSION_2_1 #define CL_QUEUE_DEVICE_DEFAULT 0x1095 #endif #ifdef CL_VERSION_3_0 #define CL_QUEUE_PROPERTIES_ARRAY 0x1098 #endif /* cl_mem_flags and cl_svm_mem_flags - bitfield */ #define CL_MEM_READ_WRITE (1 << 0) #define CL_MEM_WRITE_ONLY (1 << 1) #define CL_MEM_READ_ONLY (1 << 2) #define CL_MEM_USE_HOST_PTR (1 << 3) #define CL_MEM_ALLOC_HOST_PTR (1 << 4) #define CL_MEM_COPY_HOST_PTR (1 << 5) /* reserved (1 << 6) */ #ifdef CL_VERSION_1_2 #define CL_MEM_HOST_WRITE_ONLY (1 << 7) #define CL_MEM_HOST_READ_ONLY (1 << 8) #define CL_MEM_HOST_NO_ACCESS (1 << 9) #endif #ifdef CL_VERSION_2_0 #define CL_MEM_SVM_FINE_GRAIN_BUFFER (1 << 10) /* used by cl_svm_mem_flags only */ #define CL_MEM_SVM_ATOMICS (1 << 11) /* used by cl_svm_mem_flags only */ #define CL_MEM_KERNEL_READ_AND_WRITE (1 << 12) #endif #ifdef CL_VERSION_1_2 /* cl_mem_migration_flags - bitfield */ #define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0) #define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1) #endif /* cl_channel_order */ #define CL_R 0x10B0 #define CL_A 0x10B1 #define CL_RG 0x10B2 #define CL_RA 0x10B3 #define CL_RGB 0x10B4 #define CL_RGBA 0x10B5 #define CL_BGRA 0x10B6 #define CL_ARGB 0x10B7 #define CL_INTENSITY 0x10B8 #define CL_LUMINANCE 0x10B9 #ifdef CL_VERSION_1_1 #define CL_Rx 0x10BA #define CL_RGx 0x10BB #define CL_RGBx 0x10BC #endif #ifdef CL_VERSION_1_2 #define CL_DEPTH 0x10BD #define CL_DEPTH_STENCIL 0x10BE #endif #ifdef CL_VERSION_2_0 #define CL_sRGB 0x10BF #define CL_sRGBx 0x10C0 #define CL_sRGBA 0x10C1 #define CL_sBGRA 0x10C2 #define CL_ABGR 0x10C3 #endif /* cl_channel_type */ #define CL_SNORM_INT8 0x10D0 #define CL_SNORM_INT16 0x10D1 #define CL_UNORM_INT8 0x10D2 #define CL_UNORM_INT16 0x10D3 #define CL_UNORM_SHORT_565 0x10D4 #define CL_UNORM_SHORT_555 0x10D5 #define CL_UNORM_INT_101010 0x10D6 #define CL_SIGNED_INT8 0x10D7 #define CL_SIGNED_INT16 0x10D8 #define CL_SIGNED_INT32 0x10D9 #define CL_UNSIGNED_INT8 0x10DA #define CL_UNSIGNED_INT16 0x10DB #define CL_UNSIGNED_INT32 0x10DC #define CL_HALF_FLOAT 0x10DD #define CL_FLOAT 0x10DE #ifdef CL_VERSION_1_2 #define CL_UNORM_INT24 0x10DF #endif #ifdef CL_VERSION_2_1 #define CL_UNORM_INT_101010_2 0x10E0 #endif /* cl_mem_object_type */ #define CL_MEM_OBJECT_BUFFER 0x10F0 #define CL_MEM_OBJECT_IMAGE2D 0x10F1 #define CL_MEM_OBJECT_IMAGE3D 0x10F2 #ifdef CL_VERSION_1_2 #define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3 #define CL_MEM_OBJECT_IMAGE1D 0x10F4 #define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5 #define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6 #endif #ifdef CL_VERSION_2_0 #define CL_MEM_OBJECT_PIPE 0x10F7 #endif /* cl_mem_info */ #define CL_MEM_TYPE 0x1100 #define CL_MEM_FLAGS 0x1101 #define CL_MEM_SIZE 0x1102 #define CL_MEM_HOST_PTR 0x1103 #define CL_MEM_MAP_COUNT 0x1104 #define CL_MEM_REFERENCE_COUNT 0x1105 #define CL_MEM_CONTEXT 0x1106 #ifdef CL_VERSION_1_1 #define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107 #define CL_MEM_OFFSET 0x1108 #endif #ifdef CL_VERSION_2_0 #define CL_MEM_USES_SVM_POINTER 0x1109 #endif #ifdef CL_VERSION_3_0 #define CL_MEM_PROPERTIES 0x110A #endif /* cl_image_info */ #define CL_IMAGE_FORMAT 0x1110 #define CL_IMAGE_ELEMENT_SIZE 0x1111 #define CL_IMAGE_ROW_PITCH 0x1112 #define CL_IMAGE_SLICE_PITCH 0x1113 #define CL_IMAGE_WIDTH 0x1114 #define CL_IMAGE_HEIGHT 0x1115 #define CL_IMAGE_DEPTH 0x1116 #ifdef CL_VERSION_1_2 #define CL_IMAGE_ARRAY_SIZE 0x1117 #define CL_IMAGE_BUFFER 0x1118 #define CL_IMAGE_NUM_MIP_LEVELS 0x1119 #define CL_IMAGE_NUM_SAMPLES 0x111A #endif /* cl_pipe_info */ #ifdef CL_VERSION_2_0 #define CL_PIPE_PACKET_SIZE 0x1120 #define CL_PIPE_MAX_PACKETS 0x1121 #endif #ifdef CL_VERSION_3_0 #define CL_PIPE_PROPERTIES 0x1122 #endif /* cl_addressing_mode */ #define CL_ADDRESS_NONE 0x1130 #define CL_ADDRESS_CLAMP_TO_EDGE 0x1131 #define CL_ADDRESS_CLAMP 0x1132 #define CL_ADDRESS_REPEAT 0x1133 #ifdef CL_VERSION_1_1 #define CL_ADDRESS_MIRRORED_REPEAT 0x1134 #endif /* cl_filter_mode */ #define CL_FILTER_NEAREST 0x1140 #define CL_FILTER_LINEAR 0x1141 /* cl_sampler_info */ #define CL_SAMPLER_REFERENCE_COUNT 0x1150 #define CL_SAMPLER_CONTEXT 0x1151 #define CL_SAMPLER_NORMALIZED_COORDS 0x1152 #define CL_SAMPLER_ADDRESSING_MODE 0x1153 #define CL_SAMPLER_FILTER_MODE 0x1154 #ifdef CL_VERSION_2_0 /* These enumerants are for the cl_khr_mipmap_image extension. They have since been added to cl_ext.h with an appropriate KHR suffix, but are left here for backwards compatibility. */ #define CL_SAMPLER_MIP_FILTER_MODE 0x1155 #define CL_SAMPLER_LOD_MIN 0x1156 #define CL_SAMPLER_LOD_MAX 0x1157 #endif #ifdef CL_VERSION_3_0 #define CL_SAMPLER_PROPERTIES 0x1158 #endif /* cl_map_flags - bitfield */ #define CL_MAP_READ (1 << 0) #define CL_MAP_WRITE (1 << 1) #ifdef CL_VERSION_1_2 #define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2) #endif /* cl_program_info */ #define CL_PROGRAM_REFERENCE_COUNT 0x1160 #define CL_PROGRAM_CONTEXT 0x1161 #define CL_PROGRAM_NUM_DEVICES 0x1162 #define CL_PROGRAM_DEVICES 0x1163 #define CL_PROGRAM_SOURCE 0x1164 #define CL_PROGRAM_BINARY_SIZES 0x1165 #define CL_PROGRAM_BINARIES 0x1166 #ifdef CL_VERSION_1_2 #define CL_PROGRAM_NUM_KERNELS 0x1167 #define CL_PROGRAM_KERNEL_NAMES 0x1168 #endif #ifdef CL_VERSION_2_1 #define CL_PROGRAM_IL 0x1169 #endif #ifdef CL_VERSION_2_2 #define CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT 0x116A #define CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT 0x116B #endif /* cl_program_build_info */ #define CL_PROGRAM_BUILD_STATUS 0x1181 #define CL_PROGRAM_BUILD_OPTIONS 0x1182 #define CL_PROGRAM_BUILD_LOG 0x1183 #ifdef CL_VERSION_1_2 #define CL_PROGRAM_BINARY_TYPE 0x1184 #endif #ifdef CL_VERSION_2_0 #define CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE 0x1185 #endif #ifdef CL_VERSION_1_2 /* cl_program_binary_type */ #define CL_PROGRAM_BINARY_TYPE_NONE 0x0 #define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1 #define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2 #define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4 #endif /* cl_build_status */ #define CL_BUILD_SUCCESS 0 #define CL_BUILD_NONE -1 #define CL_BUILD_ERROR -2 #define CL_BUILD_IN_PROGRESS -3 /* cl_kernel_info */ #define CL_KERNEL_FUNCTION_NAME 0x1190 #define CL_KERNEL_NUM_ARGS 0x1191 #define CL_KERNEL_REFERENCE_COUNT 0x1192 #define CL_KERNEL_CONTEXT 0x1193 #define CL_KERNEL_PROGRAM 0x1194 #ifdef CL_VERSION_1_2 #define CL_KERNEL_ATTRIBUTES 0x1195 #endif #ifdef CL_VERSION_1_2 /* cl_kernel_arg_info */ #define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196 #define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197 #define CL_KERNEL_ARG_TYPE_NAME 0x1198 #define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199 #define CL_KERNEL_ARG_NAME 0x119A #endif #ifdef CL_VERSION_1_2 /* cl_kernel_arg_address_qualifier */ #define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B #define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C #define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D #define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E #endif #ifdef CL_VERSION_1_2 /* cl_kernel_arg_access_qualifier */ #define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0 #define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1 #define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2 #define CL_KERNEL_ARG_ACCESS_NONE 0x11A3 #endif #ifdef CL_VERSION_1_2 /* cl_kernel_arg_type_qualifier */ #define CL_KERNEL_ARG_TYPE_NONE 0 #define CL_KERNEL_ARG_TYPE_CONST (1 << 0) #define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1) #define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2) #ifdef CL_VERSION_2_0 #define CL_KERNEL_ARG_TYPE_PIPE (1 << 3) #endif #endif /* cl_kernel_work_group_info */ #define CL_KERNEL_WORK_GROUP_SIZE 0x11B0 #define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1 #define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2 #define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3 #define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4 #ifdef CL_VERSION_1_2 #define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5 #endif #ifdef CL_VERSION_2_1 /* cl_kernel_sub_group_info */ #define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE 0x2033 #define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE 0x2034 #define CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT 0x11B8 #define CL_KERNEL_MAX_NUM_SUB_GROUPS 0x11B9 #define CL_KERNEL_COMPILE_NUM_SUB_GROUPS 0x11BA #endif #ifdef CL_VERSION_2_0 /* cl_kernel_exec_info */ #define CL_KERNEL_EXEC_INFO_SVM_PTRS 0x11B6 #define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM 0x11B7 #endif /* cl_event_info */ #define CL_EVENT_COMMAND_QUEUE 0x11D0 #define CL_EVENT_COMMAND_TYPE 0x11D1 #define CL_EVENT_REFERENCE_COUNT 0x11D2 #define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3 #ifdef CL_VERSION_1_1 #define CL_EVENT_CONTEXT 0x11D4 #endif /* cl_command_type */ #define CL_COMMAND_NDRANGE_KERNEL 0x11F0 #define CL_COMMAND_TASK 0x11F1 #define CL_COMMAND_NATIVE_KERNEL 0x11F2 #define CL_COMMAND_READ_BUFFER 0x11F3 #define CL_COMMAND_WRITE_BUFFER 0x11F4 #define CL_COMMAND_COPY_BUFFER 0x11F5 #define CL_COMMAND_READ_IMAGE 0x11F6 #define CL_COMMAND_WRITE_IMAGE 0x11F7 #define CL_COMMAND_COPY_IMAGE 0x11F8 #define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9 #define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA #define CL_COMMAND_MAP_BUFFER 0x11FB #define CL_COMMAND_MAP_IMAGE 0x11FC #define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD #define CL_COMMAND_MARKER 0x11FE #define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF #define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200 #ifdef CL_VERSION_1_1 #define CL_COMMAND_READ_BUFFER_RECT 0x1201 #define CL_COMMAND_WRITE_BUFFER_RECT 0x1202 #define CL_COMMAND_COPY_BUFFER_RECT 0x1203 #define CL_COMMAND_USER 0x1204 #endif #ifdef CL_VERSION_1_2 #define CL_COMMAND_BARRIER 0x1205 #define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206 #define CL_COMMAND_FILL_BUFFER 0x1207 #define CL_COMMAND_FILL_IMAGE 0x1208 #endif #ifdef CL_VERSION_2_0 #define CL_COMMAND_SVM_FREE 0x1209 #define CL_COMMAND_SVM_MEMCPY 0x120A #define CL_COMMAND_SVM_MEMFILL 0x120B #define CL_COMMAND_SVM_MAP 0x120C #define CL_COMMAND_SVM_UNMAP 0x120D #endif #ifdef CL_VERSION_3_0 #define CL_COMMAND_SVM_MIGRATE_MEM 0x120E #endif /* command execution status */ #define CL_COMPLETE 0x0 #define CL_RUNNING 0x1 #define CL_SUBMITTED 0x2 #define CL_QUEUED 0x3 /* cl_buffer_create_type */ #ifdef CL_VERSION_1_1 #define CL_BUFFER_CREATE_TYPE_REGION 0x1220 #endif /* cl_profiling_info */ #define CL_PROFILING_COMMAND_QUEUED 0x1280 #define CL_PROFILING_COMMAND_SUBMIT 0x1281 #define CL_PROFILING_COMMAND_START 0x1282 #define CL_PROFILING_COMMAND_END 0x1283 #ifdef CL_VERSION_2_0 #define CL_PROFILING_COMMAND_COMPLETE 0x1284 #endif /* cl_device_atomic_capabilities - bitfield */ #ifdef CL_VERSION_3_0 #define CL_DEVICE_ATOMIC_ORDER_RELAXED (1 << 0) #define CL_DEVICE_ATOMIC_ORDER_ACQ_REL (1 << 1) #define CL_DEVICE_ATOMIC_ORDER_SEQ_CST (1 << 2) #define CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM (1 << 3) #define CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP (1 << 4) #define CL_DEVICE_ATOMIC_SCOPE_DEVICE (1 << 5) #define CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES (1 << 6) #endif /* cl_device_device_enqueue_capabilities - bitfield */ #ifdef CL_VERSION_3_0 #define CL_DEVICE_QUEUE_SUPPORTED (1 << 0) #define CL_DEVICE_QUEUE_REPLACEABLE_DEFAULT (1 << 1) #endif /* cl_khronos_vendor_id */ #define CL_KHRONOS_VENDOR_ID_CODEPLAY 0x10004 #ifdef CL_VERSION_3_0 /* cl_version */ #define CL_VERSION_MAJOR_BITS (10) #define CL_VERSION_MINOR_BITS (10) #define CL_VERSION_PATCH_BITS (12) #define CL_VERSION_MAJOR_MASK ((1 << CL_VERSION_MAJOR_BITS) - 1) #define CL_VERSION_MINOR_MASK ((1 << CL_VERSION_MINOR_BITS) - 1) #define CL_VERSION_PATCH_MASK ((1 << CL_VERSION_PATCH_BITS) - 1) #define CL_VERSION_MAJOR(version) \ ((version) >> (CL_VERSION_MINOR_BITS + CL_VERSION_PATCH_BITS)) #define CL_VERSION_MINOR(version) \ (((version) >> CL_VERSION_PATCH_BITS) & CL_VERSION_MINOR_MASK) #define CL_VERSION_PATCH(version) ((version) & CL_VERSION_PATCH_MASK) #define CL_MAKE_VERSION(major, minor, patch) \ ((((major) & CL_VERSION_MAJOR_MASK) \ << (CL_VERSION_MINOR_BITS + CL_VERSION_PATCH_BITS)) | \ (((minor) & CL_VERSION_MINOR_MASK) << CL_VERSION_PATCH_BITS) | \ ((patch) & CL_VERSION_PATCH_MASK)) #endif /********************************************************************************************************/ /* Platform API */ extern CL_API_ENTRY cl_int CL_API_CALL clGetPlatformIDs(cl_uint num_entries, cl_platform_id * platforms, cl_uint * num_platforms) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetPlatformInfo(cl_platform_id platform, cl_platform_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; /* Device APIs */ extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDs(cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, cl_device_id * devices, cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo(cl_device_id device, cl_device_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_2 extern CL_API_ENTRY cl_int CL_API_CALL clCreateSubDevices(cl_device_id in_device, const cl_device_partition_property * properties, cl_uint num_devices, cl_device_id * out_devices, cl_uint * num_devices_ret) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clRetainDevice(cl_device_id device) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseDevice(cl_device_id device) CL_API_SUFFIX__VERSION_1_2; #endif #ifdef CL_VERSION_2_1 extern CL_API_ENTRY cl_int CL_API_CALL clSetDefaultDeviceCommandQueue(cl_context context, cl_device_id device, cl_command_queue command_queue) CL_API_SUFFIX__VERSION_2_1; extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceAndHostTimer(cl_device_id device, cl_ulong* device_timestamp, cl_ulong* host_timestamp) CL_API_SUFFIX__VERSION_2_1; extern CL_API_ENTRY cl_int CL_API_CALL clGetHostTimer(cl_device_id device, cl_ulong * host_timestamp) CL_API_SUFFIX__VERSION_2_1; #endif /* Context APIs */ extern CL_API_ENTRY cl_context CL_API_CALL clCreateContext(const cl_context_properties * properties, cl_uint num_devices, const cl_device_id * devices, void (CL_CALLBACK * pfn_notify)(const char * errinfo, const void * private_info, size_t cb, void * user_data), void * user_data, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(const cl_context_properties * properties, cl_device_type device_type, void (CL_CALLBACK * pfn_notify)(const char * errinfo, const void * private_info, size_t cb, void * user_data), void * user_data, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clRetainContext(cl_context context) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseContext(cl_context context) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetContextInfo(cl_context context, cl_context_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_3_0 extern CL_API_ENTRY cl_int CL_API_CALL clSetContextDestructorCallback(cl_context context, void (CL_CALLBACK* pfn_notify)(cl_context context, void* user_data), void* user_data) CL_API_SUFFIX__VERSION_3_0; #endif /* Command Queue APIs */ #ifdef CL_VERSION_2_0 extern CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueueWithProperties(cl_context context, cl_device_id device, const cl_queue_properties * properties, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_2_0; #endif extern CL_API_ENTRY cl_int CL_API_CALL clRetainCommandQueue(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseCommandQueue(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetCommandQueueInfo(cl_command_queue command_queue, cl_command_queue_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; /* Memory Object APIs */ extern CL_API_ENTRY cl_mem CL_API_CALL clCreateBuffer(cl_context context, cl_mem_flags flags, size_t size, void * host_ptr, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_1 extern CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBuffer(cl_mem buffer, cl_mem_flags flags, cl_buffer_create_type buffer_create_type, const void * buffer_create_info, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1; #endif #ifdef CL_VERSION_1_2 extern CL_API_ENTRY cl_mem CL_API_CALL clCreateImage(cl_context context, cl_mem_flags flags, const cl_image_format * image_format, const cl_image_desc * image_desc, void * host_ptr, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; #endif #ifdef CL_VERSION_2_0 extern CL_API_ENTRY cl_mem CL_API_CALL clCreatePipe(cl_context context, cl_mem_flags flags, cl_uint pipe_packet_size, cl_uint pipe_max_packets, const cl_pipe_properties * properties, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_2_0; #endif #ifdef CL_VERSION_3_0 extern CL_API_ENTRY cl_mem CL_API_CALL clCreateBufferWithProperties(cl_context context, const cl_mem_properties * properties, cl_mem_flags flags, size_t size, void * host_ptr, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_3_0; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateImageWithProperties(cl_context context, const cl_mem_properties * properties, cl_mem_flags flags, const cl_image_format * image_format, const cl_image_desc * image_desc, void * host_ptr, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_3_0; #endif extern CL_API_ENTRY cl_int CL_API_CALL clRetainMemObject(cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObject(cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetSupportedImageFormats(cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, cl_uint num_entries, cl_image_format * image_formats, cl_uint * num_image_formats) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetMemObjectInfo(cl_mem memobj, cl_mem_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetImageInfo(cl_mem image, cl_image_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_2_0 extern CL_API_ENTRY cl_int CL_API_CALL clGetPipeInfo(cl_mem pipe, cl_pipe_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_2_0; #endif #ifdef CL_VERSION_1_1 extern CL_API_ENTRY cl_int CL_API_CALL clSetMemObjectDestructorCallback(cl_mem memobj, void (CL_CALLBACK * pfn_notify)(cl_mem memobj, void * user_data), void * user_data) CL_API_SUFFIX__VERSION_1_1; #endif /* SVM Allocation APIs */ #ifdef CL_VERSION_2_0 extern CL_API_ENTRY void * CL_API_CALL clSVMAlloc(cl_context context, cl_svm_mem_flags flags, size_t size, cl_uint alignment) CL_API_SUFFIX__VERSION_2_0; extern CL_API_ENTRY void CL_API_CALL clSVMFree(cl_context context, void * svm_pointer) CL_API_SUFFIX__VERSION_2_0; #endif /* Sampler APIs */ #ifdef CL_VERSION_2_0 extern CL_API_ENTRY cl_sampler CL_API_CALL clCreateSamplerWithProperties(cl_context context, const cl_sampler_properties * sampler_properties, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_2_0; #endif extern CL_API_ENTRY cl_int CL_API_CALL clRetainSampler(cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseSampler(cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetSamplerInfo(cl_sampler sampler, cl_sampler_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; /* Program Object APIs */ extern CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithSource(cl_context context, cl_uint count, const char ** strings, const size_t * lengths, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinary(cl_context context, cl_uint num_devices, const cl_device_id * device_list, const size_t * lengths, const unsigned char ** binaries, cl_int * binary_status, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_2 extern CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBuiltInKernels(cl_context context, cl_uint num_devices, const cl_device_id * device_list, const char * kernel_names, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; #endif #ifdef CL_VERSION_2_1 extern CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithIL(cl_context context, const void* il, size_t length, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_2_1; #endif extern CL_API_ENTRY cl_int CL_API_CALL clRetainProgram(cl_program program) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseProgram(cl_program program) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clBuildProgram(cl_program program, cl_uint num_devices, const cl_device_id * device_list, const char * options, void (CL_CALLBACK * pfn_notify)(cl_program program, void * user_data), void * user_data) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_2 extern CL_API_ENTRY cl_int CL_API_CALL clCompileProgram(cl_program program, cl_uint num_devices, const cl_device_id * device_list, const char * options, cl_uint num_input_headers, const cl_program * input_headers, const char ** header_include_names, void (CL_CALLBACK * pfn_notify)(cl_program program, void * user_data), void * user_data) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_program CL_API_CALL clLinkProgram(cl_context context, cl_uint num_devices, const cl_device_id * device_list, const char * options, cl_uint num_input_programs, const cl_program * input_programs, void (CL_CALLBACK * pfn_notify)(cl_program program, void * user_data), void * user_data, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; #endif #ifdef CL_VERSION_2_2 extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_2_2_DEPRECATED cl_int CL_API_CALL clSetProgramReleaseCallback(cl_program program, void (CL_CALLBACK * pfn_notify)(cl_program program, void * user_data), void * user_data) CL_EXT_SUFFIX__VERSION_2_2_DEPRECATED; extern CL_API_ENTRY cl_int CL_API_CALL clSetProgramSpecializationConstant(cl_program program, cl_uint spec_id, size_t spec_size, const void* spec_value) CL_API_SUFFIX__VERSION_2_2; #endif #ifdef CL_VERSION_1_2 extern CL_API_ENTRY cl_int CL_API_CALL clUnloadPlatformCompiler(cl_platform_id platform) CL_API_SUFFIX__VERSION_1_2; #endif extern CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfo(cl_program program, cl_program_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetProgramBuildInfo(cl_program program, cl_device_id device, cl_program_build_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; /* Kernel Object APIs */ extern CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernel(cl_program program, const char * kernel_name, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clCreateKernelsInProgram(cl_program program, cl_uint num_kernels, cl_kernel * kernels, cl_uint * num_kernels_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_2_1 extern CL_API_ENTRY cl_kernel CL_API_CALL clCloneKernel(cl_kernel source_kernel, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_2_1; #endif extern CL_API_ENTRY cl_int CL_API_CALL clRetainKernel(cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseKernel(cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void * arg_value) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_2_0 extern CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel, cl_uint arg_index, const void * arg_value) CL_API_SUFFIX__VERSION_2_0; extern CL_API_ENTRY cl_int CL_API_CALL clSetKernelExecInfo(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void * param_value) CL_API_SUFFIX__VERSION_2_0; #endif extern CL_API_ENTRY cl_int CL_API_CALL clGetKernelInfo(cl_kernel kernel, cl_kernel_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_2 extern CL_API_ENTRY cl_int CL_API_CALL clGetKernelArgInfo(cl_kernel kernel, cl_uint arg_indx, cl_kernel_arg_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; #endif extern CL_API_ENTRY cl_int CL_API_CALL clGetKernelWorkGroupInfo(cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_2_1 extern CL_API_ENTRY cl_int CL_API_CALL clGetKernelSubGroupInfo(cl_kernel kernel, cl_device_id device, cl_kernel_sub_group_info param_name, size_t input_value_size, const void* input_value, size_t param_value_size, void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_2_1; #endif /* Event Object APIs */ extern CL_API_ENTRY cl_int CL_API_CALL clWaitForEvents(cl_uint num_events, const cl_event * event_list) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetEventInfo(cl_event event, cl_event_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_1 extern CL_API_ENTRY cl_event CL_API_CALL clCreateUserEvent(cl_context context, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1; #endif extern CL_API_ENTRY cl_int CL_API_CALL clRetainEvent(cl_event event) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseEvent(cl_event event) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_1 extern CL_API_ENTRY cl_int CL_API_CALL clSetUserEventStatus(cl_event event, cl_int execution_status) CL_API_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_int CL_API_CALL clSetEventCallback(cl_event event, cl_int command_exec_callback_type, void (CL_CALLBACK * pfn_notify)(cl_event event, cl_int event_command_status, void * user_data), void * user_data) CL_API_SUFFIX__VERSION_1_1; #endif /* Profiling APIs */ extern CL_API_ENTRY cl_int CL_API_CALL clGetEventProfilingInfo(cl_event event, cl_profiling_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; /* Flush and Finish APIs */ extern CL_API_ENTRY cl_int CL_API_CALL clFlush(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clFinish(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; /* Enqueued Commands APIs */ extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBuffer(cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, size_t offset, size_t size, void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_1 extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBufferRect(cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, const size_t * buffer_origin, const size_t * host_origin, const size_t * region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_1; #endif extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBuffer(cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, size_t offset, size_t size, const void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_1 extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBufferRect(cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, const size_t * buffer_origin, const size_t * host_origin, const size_t * region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_1; #endif #ifdef CL_VERSION_1_2 extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillBuffer(cl_command_queue command_queue, cl_mem buffer, const void * pattern, size_t pattern_size, size_t offset, size_t size, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_2; #endif extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBuffer(cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t size, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_1 extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferRect(cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, const size_t * src_origin, const size_t * dst_origin, const size_t * region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_1; #endif extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadImage(cl_command_queue command_queue, cl_mem image, cl_bool blocking_read, const size_t * origin, const size_t * region, size_t row_pitch, size_t slice_pitch, void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteImage(cl_command_queue command_queue, cl_mem image, cl_bool blocking_write, const size_t * origin, const size_t * region, size_t input_row_pitch, size_t input_slice_pitch, const void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_2 extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillImage(cl_command_queue command_queue, cl_mem image, const void * fill_color, const size_t * origin, const size_t * region, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_2; #endif extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImage(cl_command_queue command_queue, cl_mem src_image, cl_mem dst_image, const size_t * src_origin, const size_t * dst_origin, const size_t * region, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImageToBuffer(cl_command_queue command_queue, cl_mem src_image, cl_mem dst_buffer, const size_t * src_origin, const size_t * region, size_t dst_offset, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferToImage(cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_image, size_t src_offset, const size_t * dst_origin, const size_t * region, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY void * CL_API_CALL clEnqueueMapBuffer(cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, cl_map_flags map_flags, size_t offset, size_t size, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY void * CL_API_CALL clEnqueueMapImage(cl_command_queue command_queue, cl_mem image, cl_bool blocking_map, cl_map_flags map_flags, const size_t * origin, const size_t * region, size_t * image_row_pitch, size_t * image_slice_pitch, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueUnmapMemObject(cl_command_queue command_queue, cl_mem memobj, void * mapped_ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_2 extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjects(cl_command_queue command_queue, cl_uint num_mem_objects, const cl_mem * mem_objects, cl_mem_migration_flags flags, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_2; #endif extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t * global_work_offset, const size_t * global_work_size, const size_t * local_work_size, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueNativeKernel(cl_command_queue command_queue, void (CL_CALLBACK * user_func)(void *), void * args, size_t cb_args, cl_uint num_mem_objects, const cl_mem * mem_list, const void ** args_mem_loc, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_2 extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueMarkerWithWaitList(cl_command_queue command_queue, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueBarrierWithWaitList(cl_command_queue command_queue, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_2; #endif #ifdef CL_VERSION_2_0 extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMFree(cl_command_queue command_queue, cl_uint num_svm_pointers, void * svm_pointers[], void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue, cl_uint num_svm_pointers, void * svm_pointers[], void * user_data), void * user_data, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_2_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMemcpy(cl_command_queue command_queue, cl_bool blocking_copy, void * dst_ptr, const void * src_ptr, size_t size, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_2_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMemFill(cl_command_queue command_queue, void * svm_ptr, const void * pattern, size_t pattern_size, size_t size, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_2_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMap(cl_command_queue command_queue, cl_bool blocking_map, cl_map_flags flags, void * svm_ptr, size_t size, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_2_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMUnmap(cl_command_queue command_queue, void * svm_ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_2_0; #endif #ifdef CL_VERSION_2_1 extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMigrateMem(cl_command_queue command_queue, cl_uint num_svm_pointers, const void ** svm_pointers, const size_t * sizes, cl_mem_migration_flags flags, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_2_1; #endif #ifdef CL_VERSION_1_2 /* Extension function access * * Returns the extension function address for the given function name, * or NULL if a valid function can not be found. The client must * check to make sure the address is not NULL, before using or * calling the returned function address. */ extern CL_API_ENTRY void * CL_API_CALL clGetExtensionFunctionAddressForPlatform(cl_platform_id platform, const char * func_name) CL_API_SUFFIX__VERSION_1_2; #endif #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS /* * WARNING: * This API introduces mutable state into the OpenCL implementation. It has been REMOVED * to better facilitate thread safety. The 1.0 API is not thread safe. It is not tested by the * OpenCL 1.1 conformance test, and consequently may not work or may not work dependably. * It is likely to be non-performant. Use of this API is not advised. Use at your own risk. * * Software developers previously relying on this API are instructed to set the command queue * properties when creating the queue, instead. */ extern CL_API_ENTRY cl_int CL_API_CALL clSetCommandQueueProperty(cl_command_queue command_queue, cl_command_queue_properties properties, cl_bool enable, cl_command_queue_properties * old_properties) CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED; #endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */ /* Deprecated OpenCL 1.1 APIs */ extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL clCreateImage2D(cl_context context, cl_mem_flags flags, const cl_image_format * image_format, size_t image_width, size_t image_height, size_t image_row_pitch, void * host_ptr, cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL clCreateImage3D(cl_context context, cl_mem_flags flags, const cl_image_format * image_format, size_t image_width, size_t image_height, size_t image_depth, size_t image_row_pitch, size_t image_slice_pitch, void * host_ptr, cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL clEnqueueMarker(cl_command_queue command_queue, cl_event * event) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL clEnqueueWaitForEvents(cl_command_queue command_queue, cl_uint num_events, const cl_event * event_list) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL clEnqueueBarrier(cl_command_queue command_queue) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL clUnloadCompiler(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL clGetExtensionFunctionAddress(const char * func_name) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; /* Deprecated OpenCL 2.0 APIs */ extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_command_queue CL_API_CALL clCreateCommandQueue(cl_context context, cl_device_id device, cl_command_queue_properties properties, cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_sampler CL_API_CALL clCreateSampler(cl_context context, cl_bool normalized_coords, cl_addressing_mode addressing_mode, cl_filter_mode filter_mode, cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_int CL_API_CALL clEnqueueTask(cl_command_queue command_queue, cl_kernel kernel, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; #ifdef __cplusplus } #endif #endif /* __OPENCL_CL_H */ Oclgrind-21.10/src/CL/cl_d3d10.h000066400000000000000000000105441413315665100160160ustar00rootroot00000000000000/******************************************************************************* * Copyright (c) 2008-2020 The Khronos Group Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ #ifndef __OPENCL_CL_D3D10_H #define __OPENCL_CL_D3D10_H #if defined(_MSC_VER) #if _MSC_VER >=1500 #pragma warning( push ) #pragma warning( disable : 4201 ) #endif #endif #include #if defined(_MSC_VER) #if _MSC_VER >=1500 #pragma warning( pop ) #endif #endif #include #include #ifdef __cplusplus extern "C" { #endif /****************************************************************************** * cl_khr_d3d10_sharing */ #define cl_khr_d3d10_sharing 1 typedef cl_uint cl_d3d10_device_source_khr; typedef cl_uint cl_d3d10_device_set_khr; /******************************************************************************/ /* Error Codes */ #define CL_INVALID_D3D10_DEVICE_KHR -1002 #define CL_INVALID_D3D10_RESOURCE_KHR -1003 #define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004 #define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005 /* cl_d3d10_device_source_nv */ #define CL_D3D10_DEVICE_KHR 0x4010 #define CL_D3D10_DXGI_ADAPTER_KHR 0x4011 /* cl_d3d10_device_set_nv */ #define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012 #define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013 /* cl_context_info */ #define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014 #define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C /* cl_mem_info */ #define CL_MEM_D3D10_RESOURCE_KHR 0x4015 /* cl_image_info */ #define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016 /* cl_command_type */ #define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017 #define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018 /******************************************************************************/ typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)( cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source, void * d3d_object, cl_d3d10_device_set_khr d3d_device_set, cl_uint num_entries, cl_device_id * devices, cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)( cl_context context, cl_mem_flags flags, ID3D10Buffer * resource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)( cl_context context, cl_mem_flags flags, ID3D10Texture2D * resource, UINT subresource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)( cl_context context, cl_mem_flags flags, ID3D10Texture3D * resource, UINT subresource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; #ifdef __cplusplus } #endif #endif /* __OPENCL_CL_D3D10_H */ Oclgrind-21.10/src/CL/cl_d3d11.h000066400000000000000000000105361413315665100160200ustar00rootroot00000000000000/******************************************************************************* * Copyright (c) 2008-2020 The Khronos Group Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ #ifndef __OPENCL_CL_D3D11_H #define __OPENCL_CL_D3D11_H #if defined(_MSC_VER) #if _MSC_VER >=1500 #pragma warning( push ) #pragma warning( disable : 4201 ) #endif #endif #include #if defined(_MSC_VER) #if _MSC_VER >=1500 #pragma warning( pop ) #endif #endif #include #include #ifdef __cplusplus extern "C" { #endif /****************************************************************************** * cl_khr_d3d11_sharing */ #define cl_khr_d3d11_sharing 1 typedef cl_uint cl_d3d11_device_source_khr; typedef cl_uint cl_d3d11_device_set_khr; /******************************************************************************/ /* Error Codes */ #define CL_INVALID_D3D11_DEVICE_KHR -1006 #define CL_INVALID_D3D11_RESOURCE_KHR -1007 #define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR -1008 #define CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR -1009 /* cl_d3d11_device_source */ #define CL_D3D11_DEVICE_KHR 0x4019 #define CL_D3D11_DXGI_ADAPTER_KHR 0x401A /* cl_d3d11_device_set */ #define CL_PREFERRED_DEVICES_FOR_D3D11_KHR 0x401B #define CL_ALL_DEVICES_FOR_D3D11_KHR 0x401C /* cl_context_info */ #define CL_CONTEXT_D3D11_DEVICE_KHR 0x401D #define CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR 0x402D /* cl_mem_info */ #define CL_MEM_D3D11_RESOURCE_KHR 0x401E /* cl_image_info */ #define CL_IMAGE_D3D11_SUBRESOURCE_KHR 0x401F /* cl_command_type */ #define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR 0x4020 #define CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR 0x4021 /******************************************************************************/ typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)( cl_platform_id platform, cl_d3d11_device_source_khr d3d_device_source, void * d3d_object, cl_d3d11_device_set_khr d3d_device_set, cl_uint num_entries, cl_device_id * devices, cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)( cl_context context, cl_mem_flags flags, ID3D11Buffer * resource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)( cl_context context, cl_mem_flags flags, ID3D11Texture2D * resource, UINT subresource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)( cl_context context, cl_mem_flags flags, ID3D11Texture3D * resource, UINT subresource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_2; #ifdef __cplusplus } #endif #endif /* __OPENCL_CL_D3D11_H */ Oclgrind-21.10/src/CL/cl_dx9_media_sharing.h000066400000000000000000000107161413315665100205620ustar00rootroot00000000000000/******************************************************************************* * Copyright (c) 2008-2020 The Khronos Group Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ #ifndef __OPENCL_CL_DX9_MEDIA_SHARING_H #define __OPENCL_CL_DX9_MEDIA_SHARING_H #include #include #ifdef __cplusplus extern "C" { #endif /******************************************************************************/ /* cl_khr_dx9_media_sharing */ #define cl_khr_dx9_media_sharing 1 typedef cl_uint cl_dx9_media_adapter_type_khr; typedef cl_uint cl_dx9_media_adapter_set_khr; #if defined(_WIN32) #include typedef struct _cl_dx9_surface_info_khr { IDirect3DSurface9 *resource; HANDLE shared_handle; } cl_dx9_surface_info_khr; #endif /******************************************************************************/ /* Error Codes */ #define CL_INVALID_DX9_MEDIA_ADAPTER_KHR -1010 #define CL_INVALID_DX9_MEDIA_SURFACE_KHR -1011 #define CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR -1012 #define CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR -1013 /* cl_media_adapter_type_khr */ #define CL_ADAPTER_D3D9_KHR 0x2020 #define CL_ADAPTER_D3D9EX_KHR 0x2021 #define CL_ADAPTER_DXVA_KHR 0x2022 /* cl_media_adapter_set_khr */ #define CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2023 #define CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2024 /* cl_context_info */ #define CL_CONTEXT_ADAPTER_D3D9_KHR 0x2025 #define CL_CONTEXT_ADAPTER_D3D9EX_KHR 0x2026 #define CL_CONTEXT_ADAPTER_DXVA_KHR 0x2027 /* cl_mem_info */ #define CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR 0x2028 #define CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR 0x2029 /* cl_image_info */ #define CL_IMAGE_DX9_MEDIA_PLANE_KHR 0x202A /* cl_command_type */ #define CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR 0x202B #define CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR 0x202C /******************************************************************************/ typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)( cl_platform_id platform, cl_uint num_media_adapters, cl_dx9_media_adapter_type_khr * media_adapter_type, void * media_adapters, cl_dx9_media_adapter_set_khr media_adapter_set, cl_uint num_entries, cl_device_id * devices, cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)( cl_context context, cl_mem_flags flags, cl_dx9_media_adapter_type_khr adapter_type, void * surface_info, cl_uint plane, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_2; #ifdef __cplusplus } #endif #endif /* __OPENCL_CL_DX9_MEDIA_SHARING_H */ Oclgrind-21.10/src/CL/cl_dx9_media_sharing_intel.h000066400000000000000000000151661413315665100217610ustar00rootroot00000000000000/******************************************************************************* * Copyright (c) 2008-2020 The Khronos Group Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ /*****************************************************************************\ Copyright (c) 2013-2019 Intel Corporation All Rights Reserved. THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. File Name: cl_dx9_media_sharing_intel.h Abstract: Notes: \*****************************************************************************/ #ifndef __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H #define __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H #include #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif /*************************************** * cl_intel_dx9_media_sharing extension * ****************************************/ #define cl_intel_dx9_media_sharing 1 typedef cl_uint cl_dx9_device_source_intel; typedef cl_uint cl_dx9_device_set_intel; /* error codes */ #define CL_INVALID_DX9_DEVICE_INTEL -1010 #define CL_INVALID_DX9_RESOURCE_INTEL -1011 #define CL_DX9_RESOURCE_ALREADY_ACQUIRED_INTEL -1012 #define CL_DX9_RESOURCE_NOT_ACQUIRED_INTEL -1013 /* cl_dx9_device_source_intel */ #define CL_D3D9_DEVICE_INTEL 0x4022 #define CL_D3D9EX_DEVICE_INTEL 0x4070 #define CL_DXVA_DEVICE_INTEL 0x4071 /* cl_dx9_device_set_intel */ #define CL_PREFERRED_DEVICES_FOR_DX9_INTEL 0x4024 #define CL_ALL_DEVICES_FOR_DX9_INTEL 0x4025 /* cl_context_info */ #define CL_CONTEXT_D3D9_DEVICE_INTEL 0x4026 #define CL_CONTEXT_D3D9EX_DEVICE_INTEL 0x4072 #define CL_CONTEXT_DXVA_DEVICE_INTEL 0x4073 /* cl_mem_info */ #define CL_MEM_DX9_RESOURCE_INTEL 0x4027 #define CL_MEM_DX9_SHARED_HANDLE_INTEL 0x4074 /* cl_image_info */ #define CL_IMAGE_DX9_PLANE_INTEL 0x4075 /* cl_command_type */ #define CL_COMMAND_ACQUIRE_DX9_OBJECTS_INTEL 0x402A #define CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL 0x402B /******************************************************************************/ extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromDX9INTEL( cl_platform_id platform, cl_dx9_device_source_intel dx9_device_source, void* dx9_object, cl_dx9_device_set_intel dx9_device_set, cl_uint num_entries, cl_device_id* devices, cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_int (CL_API_CALL* clGetDeviceIDsFromDX9INTEL_fn)( cl_platform_id platform, cl_dx9_device_source_intel dx9_device_source, void* dx9_object, cl_dx9_device_set_intel dx9_device_set, cl_uint num_entries, cl_device_id* devices, cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromDX9MediaSurfaceINTEL( cl_context context, cl_mem_flags flags, IDirect3DSurface9* resource, HANDLE sharedHandle, UINT plane, cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceINTEL_fn)( cl_context context, cl_mem_flags flags, IDirect3DSurface9* resource, HANDLE sharedHandle, UINT plane, cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireDX9ObjectsINTEL( cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_EXT_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9ObjectsINTEL_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_EXT_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseDX9ObjectsINTEL( cl_command_queue command_queue, cl_uint num_objects, cl_mem* mem_objects, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_EXT_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9ObjectsINTEL_fn)( cl_command_queue command_queue, cl_uint num_objects, cl_mem* mem_objects, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_EXT_SUFFIX__VERSION_1_1; #ifdef __cplusplus } #endif #endif /* __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H */ Oclgrind-21.10/src/CL/cl_egl.h000066400000000000000000000106061413315665100157510ustar00rootroot00000000000000/******************************************************************************* * Copyright (c) 2008-2020 The Khronos Group Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ #ifndef __OPENCL_CL_EGL_H #define __OPENCL_CL_EGL_H #include #ifdef __cplusplus extern "C" { #endif /* Command type for events created with clEnqueueAcquireEGLObjectsKHR */ #define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR 0x202F #define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR 0x202D #define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR 0x202E /* Error type for clCreateFromEGLImageKHR */ #define CL_INVALID_EGL_OBJECT_KHR -1093 #define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -1092 /* CLeglImageKHR is an opaque handle to an EGLImage */ typedef void* CLeglImageKHR; /* CLeglDisplayKHR is an opaque handle to an EGLDisplay */ typedef void* CLeglDisplayKHR; /* CLeglSyncKHR is an opaque handle to an EGLSync object */ typedef void* CLeglSyncKHR; /* properties passed to clCreateFromEGLImageKHR */ typedef intptr_t cl_egl_image_properties_khr; #define cl_khr_egl_image 1 extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromEGLImageKHR(cl_context context, CLeglDisplayKHR egldisplay, CLeglImageKHR eglimage, cl_mem_flags flags, const cl_egl_image_properties_khr * properties, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)( cl_context context, CLeglDisplayKHR egldisplay, CLeglImageKHR eglimage, cl_mem_flags flags, const cl_egl_image_properties_khr * properties, cl_int * errcode_ret); extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event); extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event); #define cl_khr_egl_event 1 extern CL_API_ENTRY cl_event CL_API_CALL clCreateEventFromEGLSyncKHR(cl_context context, CLeglSyncKHR sync, CLeglDisplayKHR display, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)( cl_context context, CLeglSyncKHR sync, CLeglDisplayKHR display, cl_int * errcode_ret); #ifdef __cplusplus } #endif #endif /* __OPENCL_CL_EGL_H */ Oclgrind-21.10/src/CL/cl_ext.h000066400000000000000000001073751413315665100160140ustar00rootroot00000000000000/******************************************************************************* * Copyright (c) 2008-2020 The Khronos Group Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ /* cl_ext.h contains OpenCL extensions which don't have external */ /* (OpenGL, D3D) dependencies. */ #ifndef __CL_EXT_H #define __CL_EXT_H #ifdef __cplusplus extern "C" { #endif #include /* cl_khr_fp64 extension - no extension #define since it has no functions */ /* CL_DEVICE_DOUBLE_FP_CONFIG is defined in CL.h for OpenCL >= 120 */ #if CL_TARGET_OPENCL_VERSION <= 110 #define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 #endif /* cl_khr_fp16 extension - no extension #define since it has no functions */ #define CL_DEVICE_HALF_FP_CONFIG 0x1033 /* Memory object destruction * * Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR * * Registers a user callback function that will be called when the memory object is deleted and its resources * freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback * stack associated with memobj. The registered user callback functions are called in the reverse order in * which they were registered. The user callback functions are called and then the memory object is deleted * and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be * notified when the memory referenced by host_ptr, specified when the memory object is created and used as * the storage bits for the memory object, can be reused or freed. * * The application may not call CL api's with the cl_mem object passed to the pfn_notify. * * Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) * before using. */ #define cl_APPLE_SetMemObjectDestructor 1 cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem memobj, void (* pfn_notify)(cl_mem memobj, void * user_data), void * user_data) CL_EXT_SUFFIX__VERSION_1_0; /* Context Logging Functions * * The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext(). * Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) * before using. * * clLogMessagesToSystemLog forwards on all log messages to the Apple System Logger */ #define cl_APPLE_ContextLoggingFunctions 1 extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * errstr, const void * private_info, size_t cb, void * user_data) CL_EXT_SUFFIX__VERSION_1_0; /* clLogMessagesToStdout sends all log messages to the file descriptor stdout */ extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * errstr, const void * private_info, size_t cb, void * user_data) CL_EXT_SUFFIX__VERSION_1_0; /* clLogMessagesToStderr sends all log messages to the file descriptor stderr */ extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * errstr, const void * private_info, size_t cb, void * user_data) CL_EXT_SUFFIX__VERSION_1_0; /************************ * cl_khr_icd extension * ************************/ #define cl_khr_icd 1 /* cl_platform_info */ #define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920 /* Additional Error Codes */ #define CL_PLATFORM_NOT_FOUND_KHR -1001 extern CL_API_ENTRY cl_int CL_API_CALL clIcdGetPlatformIDsKHR(cl_uint num_entries, cl_platform_id * platforms, cl_uint * num_platforms); typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(cl_uint num_entries, cl_platform_id * platforms, cl_uint * num_platforms); /******************************* * cl_khr_il_program extension * *******************************/ #define cl_khr_il_program 1 /* New property to clGetDeviceInfo for retrieving supported intermediate * languages */ #define CL_DEVICE_IL_VERSION_KHR 0x105B /* New property to clGetProgramInfo for retrieving for retrieving the IL of a * program */ #define CL_PROGRAM_IL_KHR 0x1169 extern CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithILKHR(cl_context context, const void * il, size_t length, cl_int * errcode_ret); typedef CL_API_ENTRY cl_program (CL_API_CALL *clCreateProgramWithILKHR_fn)(cl_context context, const void * il, size_t length, cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; /* Extension: cl_khr_image2d_from_buffer * * This extension allows a 2D image to be created from a cl_mem buffer without * a copy. The type associated with a 2D image created from a buffer in an * OpenCL program is image2d_t. Both the sampler and sampler-less read_image * built-in functions are supported for 2D images and 2D images created from * a buffer. Similarly, the write_image built-ins are also supported for 2D * images created from a buffer. * * When the 2D image from buffer is created, the client must specify the * width, height, image format (i.e. channel order and channel data type) * and optionally the row pitch. * * The pitch specified must be a multiple of * CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR pixels. * The base address of the buffer must be aligned to * CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR pixels. */ #define CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR 0x104A #define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR 0x104B /************************************** * cl_khr_initialize_memory extension * **************************************/ #define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x2030 /************************************** * cl_khr_terminate_context extension * **************************************/ #define CL_CONTEXT_TERMINATED_KHR -1121 #define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x2031 #define CL_CONTEXT_TERMINATE_KHR 0x2032 #define cl_khr_terminate_context 1 extern CL_API_ENTRY cl_int CL_API_CALL clTerminateContextKHR(cl_context context) CL_EXT_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int (CL_API_CALL *clTerminateContextKHR_fn)(cl_context context) CL_EXT_SUFFIX__VERSION_1_2; /* * Extension: cl_khr_spir * * This extension adds support to create an OpenCL program object from a * Standard Portable Intermediate Representation (SPIR) instance */ #define CL_DEVICE_SPIR_VERSIONS 0x40E0 #define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE 0x40E1 /***************************************** * cl_khr_create_command_queue extension * *****************************************/ #define cl_khr_create_command_queue 1 typedef cl_properties cl_queue_properties_khr; extern CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueueWithPropertiesKHR(cl_context context, cl_device_id device, const cl_queue_properties_khr* properties, cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_command_queue (CL_API_CALL *clCreateCommandQueueWithPropertiesKHR_fn)(cl_context context, cl_device_id device, const cl_queue_properties_khr* properties, cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; /****************************************** * cl_nv_device_attribute_query extension * ******************************************/ /* cl_nv_device_attribute_query extension - no extension #define since it has no functions */ #define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 #define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 #define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002 #define CL_DEVICE_WARP_SIZE_NV 0x4003 #define CL_DEVICE_GPU_OVERLAP_NV 0x4004 #define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 #define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 /********************************* * cl_amd_device_attribute_query * *********************************/ #define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036 #define CL_DEVICE_TOPOLOGY_AMD 0x4037 #define CL_DEVICE_BOARD_NAME_AMD 0x4038 #define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD 0x4039 #define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD 0x4040 #define CL_DEVICE_SIMD_WIDTH_AMD 0x4041 #define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042 #define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043 #define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD 0x4044 #define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD 0x4045 #define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD 0x4046 #define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD 0x4047 #define CL_DEVICE_LOCAL_MEM_BANKS_AMD 0x4048 #define CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD 0x4049 #define CL_DEVICE_GFXIP_MAJOR_AMD 0x404A #define CL_DEVICE_GFXIP_MINOR_AMD 0x404B #define CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD 0x404C #define CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_AMD 0x4030 #define CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD 0x4031 #define CL_DEVICE_PREFERRED_CONSTANT_BUFFER_SIZE_AMD 0x4033 #define CL_DEVICE_PCIE_ID_AMD 0x4034 /********************************* * cl_arm_printf extension *********************************/ #define CL_PRINTF_CALLBACK_ARM 0x40B0 #define CL_PRINTF_BUFFERSIZE_ARM 0x40B1 /*********************************** * cl_ext_device_fission extension ***********************************/ #define cl_ext_device_fission 1 extern CL_API_ENTRY cl_int CL_API_CALL clReleaseDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_int (CL_API_CALL *clReleaseDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_int CL_API_CALL clRetainDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_int (CL_API_CALL *clRetainDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; typedef cl_ulong cl_device_partition_property_ext; extern CL_API_ENTRY cl_int CL_API_CALL clCreateSubDevicesEXT(cl_device_id in_device, const cl_device_partition_property_ext * properties, cl_uint num_entries, cl_device_id * out_devices, cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_int (CL_API_CALL * clCreateSubDevicesEXT_fn)(cl_device_id in_device, const cl_device_partition_property_ext * properties, cl_uint num_entries, cl_device_id * out_devices, cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1; /* cl_device_partition_property_ext */ #define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050 #define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051 #define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052 #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053 /* clDeviceGetInfo selectors */ #define CL_DEVICE_PARENT_DEVICE_EXT 0x4054 #define CL_DEVICE_PARTITION_TYPES_EXT 0x4055 #define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056 #define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057 #define CL_DEVICE_PARTITION_STYLE_EXT 0x4058 /* error codes */ #define CL_DEVICE_PARTITION_FAILED_EXT -1057 #define CL_INVALID_PARTITION_COUNT_EXT -1058 #define CL_INVALID_PARTITION_NAME_EXT -1059 /* CL_AFFINITY_DOMAINs */ #define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1 #define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2 #define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3 #define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4 #define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10 #define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100 /* cl_device_partition_property_ext list terminators */ #define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0) #define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0) #define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1) /*********************************** * cl_ext_migrate_memobject extension definitions ***********************************/ #define cl_ext_migrate_memobject 1 typedef cl_bitfield cl_mem_migration_flags_ext; #define CL_MIGRATE_MEM_OBJECT_HOST_EXT 0x1 #define CL_COMMAND_MIGRATE_MEM_OBJECT_EXT 0x4040 extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjectEXT(cl_command_queue command_queue, cl_uint num_mem_objects, const cl_mem * mem_objects, cl_mem_migration_flags_ext flags, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event); typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueMigrateMemObjectEXT_fn)(cl_command_queue command_queue, cl_uint num_mem_objects, const cl_mem * mem_objects, cl_mem_migration_flags_ext flags, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event); /********************************* * cl_ext_cxx_for_opencl extension *********************************/ #define cl_ext_cxx_for_opencl 1 #define CL_DEVICE_CXX_FOR_OPENCL_NUMERIC_VERSION_EXT 0x4230 /********************************* * cl_qcom_ext_host_ptr extension *********************************/ #define cl_qcom_ext_host_ptr 1 #define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29) #define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0 #define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1 #define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2 #define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3 #define CL_MEM_HOST_UNCACHED_QCOM 0x40A4 #define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5 #define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6 #define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7 typedef cl_uint cl_image_pitch_info_qcom; extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceImageInfoQCOM(cl_device_id device, size_t image_width, size_t image_height, const cl_image_format *image_format, cl_image_pitch_info_qcom param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret); typedef struct _cl_mem_ext_host_ptr { /* Type of external memory allocation. */ /* Legal values will be defined in layered extensions. */ cl_uint allocation_type; /* Host cache policy for this external memory allocation. */ cl_uint host_cache_policy; } cl_mem_ext_host_ptr; /******************************************* * cl_qcom_ext_host_ptr_iocoherent extension ********************************************/ /* Cache policy specifying io-coherence */ #define CL_MEM_HOST_IOCOHERENT_QCOM 0x40A9 /********************************* * cl_qcom_ion_host_ptr extension *********************************/ #define CL_MEM_ION_HOST_PTR_QCOM 0x40A8 typedef struct _cl_mem_ion_host_ptr { /* Type of external memory allocation. */ /* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */ cl_mem_ext_host_ptr ext_host_ptr; /* ION file descriptor */ int ion_filedesc; /* Host pointer to the ION allocated memory */ void* ion_hostptr; } cl_mem_ion_host_ptr; /********************************* * cl_qcom_android_native_buffer_host_ptr extension *********************************/ #define CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM 0x40C6 typedef struct _cl_mem_android_native_buffer_host_ptr { /* Type of external memory allocation. */ /* Must be CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM for Android native buffers. */ cl_mem_ext_host_ptr ext_host_ptr; /* Virtual pointer to the android native buffer */ void* anb_ptr; } cl_mem_android_native_buffer_host_ptr; /****************************************** * cl_img_yuv_image extension * ******************************************/ /* Image formats used in clCreateImage */ #define CL_NV21_IMG 0x40D0 #define CL_YV12_IMG 0x40D1 /****************************************** * cl_img_cached_allocations extension * ******************************************/ /* Flag values used by clCreateBuffer */ #define CL_MEM_USE_UNCACHED_CPU_MEMORY_IMG (1 << 26) #define CL_MEM_USE_CACHED_CPU_MEMORY_IMG (1 << 27) /****************************************** * cl_img_use_gralloc_ptr extension * ******************************************/ #define cl_img_use_gralloc_ptr 1 /* Flag values used by clCreateBuffer */ #define CL_MEM_USE_GRALLOC_PTR_IMG (1 << 28) /* To be used by clGetEventInfo: */ #define CL_COMMAND_ACQUIRE_GRALLOC_OBJECTS_IMG 0x40D2 #define CL_COMMAND_RELEASE_GRALLOC_OBJECTS_IMG 0x40D3 /* Error codes from clEnqueueAcquireGrallocObjectsIMG and clEnqueueReleaseGrallocObjectsIMG */ #define CL_GRALLOC_RESOURCE_NOT_ACQUIRED_IMG 0x40D4 #define CL_INVALID_GRALLOC_OBJECT_IMG 0x40D5 extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireGrallocObjectsIMG(cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseGrallocObjectsIMG(cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; /****************************************** * cl_img_generate_mipmap extension * ******************************************/ #define cl_img_generate_mipmap 1 typedef cl_uint cl_mipmap_filter_mode_img; /* To be used by clEnqueueGenerateMipmapIMG */ #define CL_MIPMAP_FILTER_ANY_IMG 0x0 #define CL_MIPMAP_FILTER_BOX_IMG 0x1 /* To be used by clGetEventInfo */ #define CL_COMMAND_GENERATE_MIPMAP_IMG 0x40D6 extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueGenerateMipmapIMG(cl_command_queue command_queue, cl_mem src_image, cl_mem dst_image, cl_mipmap_filter_mode_img mipmap_filter_mode, const size_t *array_region, const size_t *mip_region, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_EXT_SUFFIX__VERSION_1_2; /****************************************** * cl_img_mem_properties extension * ******************************************/ #define cl_img_mem_properties 1 /* To be used by clCreateBufferWithProperties */ #define CL_MEM_ALLOC_FLAGS_IMG 0x40D7 /* To be used wiith the CL_MEM_ALLOC_FLAGS_IMG property */ typedef cl_bitfield cl_mem_alloc_flags_img; /* To be used with cl_mem_alloc_flags_img */ #define CL_MEM_ALLOC_RELAX_REQUIREMENTS_IMG (1 << 0) /********************************* * cl_khr_subgroups extension *********************************/ #define cl_khr_subgroups 1 #if !defined(CL_VERSION_2_1) /* For OpenCL 2.1 and newer, cl_kernel_sub_group_info is declared in CL.h. In hindsight, there should have been a khr suffix on this type for the extension, but keeping it un-suffixed to maintain backwards compatibility. */ typedef cl_uint cl_kernel_sub_group_info; #endif /* cl_kernel_sub_group_info */ #define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR 0x2033 #define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR 0x2034 extern CL_API_ENTRY cl_int CL_API_CALL clGetKernelSubGroupInfoKHR(cl_kernel in_kernel, cl_device_id in_device, cl_kernel_sub_group_info param_name, size_t input_value_size, const void * input_value, size_t param_value_size, void * param_value, size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED; typedef CL_API_ENTRY cl_int (CL_API_CALL * clGetKernelSubGroupInfoKHR_fn)(cl_kernel in_kernel, cl_device_id in_device, cl_kernel_sub_group_info param_name, size_t input_value_size, const void * input_value, size_t param_value_size, void * param_value, size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED; /********************************* * cl_khr_mipmap_image extension *********************************/ /* cl_sampler_properties */ #define CL_SAMPLER_MIP_FILTER_MODE_KHR 0x1155 #define CL_SAMPLER_LOD_MIN_KHR 0x1156 #define CL_SAMPLER_LOD_MAX_KHR 0x1157 /********************************* * cl_khr_priority_hints extension *********************************/ /* This extension define is for backwards compatibility. It shouldn't be required since this extension has no new functions. */ #define cl_khr_priority_hints 1 typedef cl_uint cl_queue_priority_khr; /* cl_command_queue_properties */ #define CL_QUEUE_PRIORITY_KHR 0x1096 /* cl_queue_priority_khr */ #define CL_QUEUE_PRIORITY_HIGH_KHR (1<<0) #define CL_QUEUE_PRIORITY_MED_KHR (1<<1) #define CL_QUEUE_PRIORITY_LOW_KHR (1<<2) /********************************* * cl_khr_throttle_hints extension *********************************/ /* This extension define is for backwards compatibility. It shouldn't be required since this extension has no new functions. */ #define cl_khr_throttle_hints 1 typedef cl_uint cl_queue_throttle_khr; /* cl_command_queue_properties */ #define CL_QUEUE_THROTTLE_KHR 0x1097 /* cl_queue_throttle_khr */ #define CL_QUEUE_THROTTLE_HIGH_KHR (1<<0) #define CL_QUEUE_THROTTLE_MED_KHR (1<<1) #define CL_QUEUE_THROTTLE_LOW_KHR (1<<2) /********************************* * cl_khr_subgroup_named_barrier *********************************/ /* This extension define is for backwards compatibility. It shouldn't be required since this extension has no new functions. */ #define cl_khr_subgroup_named_barrier 1 /* cl_device_info */ #define CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR 0x2035 /********************************* * cl_khr_extended_versioning *********************************/ #define cl_khr_extended_versioning 1 #define CL_VERSION_MAJOR_BITS_KHR (10) #define CL_VERSION_MINOR_BITS_KHR (10) #define CL_VERSION_PATCH_BITS_KHR (12) #define CL_VERSION_MAJOR_MASK_KHR ((1 << CL_VERSION_MAJOR_BITS_KHR) - 1) #define CL_VERSION_MINOR_MASK_KHR ((1 << CL_VERSION_MINOR_BITS_KHR) - 1) #define CL_VERSION_PATCH_MASK_KHR ((1 << CL_VERSION_PATCH_BITS_KHR) - 1) #define CL_VERSION_MAJOR_KHR(version) ((version) >> (CL_VERSION_MINOR_BITS_KHR + CL_VERSION_PATCH_BITS_KHR)) #define CL_VERSION_MINOR_KHR(version) (((version) >> CL_VERSION_PATCH_BITS_KHR) & CL_VERSION_MINOR_MASK_KHR) #define CL_VERSION_PATCH_KHR(version) ((version) & CL_VERSION_PATCH_MASK_KHR) #define CL_MAKE_VERSION_KHR(major, minor, patch) \ ((((major) & CL_VERSION_MAJOR_MASK_KHR) << (CL_VERSION_MINOR_BITS_KHR + CL_VERSION_PATCH_BITS_KHR)) | \ (((minor) & CL_VERSION_MINOR_MASK_KHR) << CL_VERSION_PATCH_BITS_KHR) | \ ((patch) & CL_VERSION_PATCH_MASK_KHR)) typedef cl_uint cl_version_khr; #define CL_NAME_VERSION_MAX_NAME_SIZE_KHR 64 typedef struct _cl_name_version_khr { cl_version_khr version; char name[CL_NAME_VERSION_MAX_NAME_SIZE_KHR]; } cl_name_version_khr; /* cl_platform_info */ #define CL_PLATFORM_NUMERIC_VERSION_KHR 0x0906 #define CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR 0x0907 /* cl_device_info */ #define CL_DEVICE_NUMERIC_VERSION_KHR 0x105E #define CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR 0x105F #define CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR 0x1060 #define CL_DEVICE_ILS_WITH_VERSION_KHR 0x1061 #define CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR 0x1062 /********************************* * cl_khr_device_uuid extension *********************************/ #define cl_khr_device_uuid 1 #define CL_UUID_SIZE_KHR 16 #define CL_LUID_SIZE_KHR 8 #define CL_DEVICE_UUID_KHR 0x106A #define CL_DRIVER_UUID_KHR 0x106B #define CL_DEVICE_LUID_VALID_KHR 0x106C #define CL_DEVICE_LUID_KHR 0x106D #define CL_DEVICE_NODE_MASK_KHR 0x106E /********************************** * cl_arm_import_memory extension * **********************************/ #define cl_arm_import_memory 1 typedef intptr_t cl_import_properties_arm; /* Default and valid proporties name for cl_arm_import_memory */ #define CL_IMPORT_TYPE_ARM 0x40B2 /* Host process memory type default value for CL_IMPORT_TYPE_ARM property */ #define CL_IMPORT_TYPE_HOST_ARM 0x40B3 /* DMA BUF memory type value for CL_IMPORT_TYPE_ARM property */ #define CL_IMPORT_TYPE_DMA_BUF_ARM 0x40B4 /* Protected memory property */ #define CL_IMPORT_TYPE_PROTECTED_ARM 0x40B5 /* Android hardware buffer type value for CL_IMPORT_TYPE_ARM property */ #define CL_IMPORT_TYPE_ANDROID_HARDWARE_BUFFER_ARM 0x41E2 /* Data consistency with host property */ #define CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM 0x41E3 /* Import memory size value to indicate a size for the whole buffer */ #define CL_IMPORT_MEMORY_WHOLE_ALLOCATION_ARM SIZE_MAX /* This extension adds a new function that allows for direct memory import into * OpenCL via the clImportMemoryARM function. * * Memory imported through this interface will be mapped into the device's page * tables directly, providing zero copy access. It will never fall back to copy * operations and aliased buffers. * * Types of memory supported for import are specified as additional extension * strings. * * This extension produces cl_mem allocations which are compatible with all other * users of cl_mem in the standard API. * * This extension maps pages with the same properties as the normal buffer creation * function clCreateBuffer. */ extern CL_API_ENTRY cl_mem CL_API_CALL clImportMemoryARM( cl_context context, cl_mem_flags flags, const cl_import_properties_arm *properties, void *memory, size_t size, cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_0; /****************************************** * cl_arm_shared_virtual_memory extension * ******************************************/ #define cl_arm_shared_virtual_memory 1 /* Used by clGetDeviceInfo */ #define CL_DEVICE_SVM_CAPABILITIES_ARM 0x40B6 /* Used by clGetMemObjectInfo */ #define CL_MEM_USES_SVM_POINTER_ARM 0x40B7 /* Used by clSetKernelExecInfoARM: */ #define CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM 0x40B8 #define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM 0x40B9 /* To be used by clGetEventInfo: */ #define CL_COMMAND_SVM_FREE_ARM 0x40BA #define CL_COMMAND_SVM_MEMCPY_ARM 0x40BB #define CL_COMMAND_SVM_MEMFILL_ARM 0x40BC #define CL_COMMAND_SVM_MAP_ARM 0x40BD #define CL_COMMAND_SVM_UNMAP_ARM 0x40BE /* Flag values returned by clGetDeviceInfo with CL_DEVICE_SVM_CAPABILITIES_ARM as the param_name. */ #define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_ARM (1 << 0) #define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_ARM (1 << 1) #define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_ARM (1 << 2) #define CL_DEVICE_SVM_ATOMICS_ARM (1 << 3) /* Flag values used by clSVMAllocARM: */ #define CL_MEM_SVM_FINE_GRAIN_BUFFER_ARM (1 << 10) #define CL_MEM_SVM_ATOMICS_ARM (1 << 11) typedef cl_bitfield cl_svm_mem_flags_arm; typedef cl_uint cl_kernel_exec_info_arm; typedef cl_bitfield cl_device_svm_capabilities_arm; extern CL_API_ENTRY void * CL_API_CALL clSVMAllocARM(cl_context context, cl_svm_mem_flags_arm flags, size_t size, cl_uint alignment) CL_EXT_SUFFIX__VERSION_1_2; extern CL_API_ENTRY void CL_API_CALL clSVMFreeARM(cl_context context, void * svm_pointer) CL_EXT_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMFreeARM(cl_command_queue command_queue, cl_uint num_svm_pointers, void * svm_pointers[], void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue, cl_uint num_svm_pointers, void * svm_pointers[], void * user_data), void * user_data, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMemcpyARM(cl_command_queue command_queue, cl_bool blocking_copy, void * dst_ptr, const void * src_ptr, size_t size, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMemFillARM(cl_command_queue command_queue, void * svm_ptr, const void * pattern, size_t pattern_size, size_t size, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMapARM(cl_command_queue command_queue, cl_bool blocking_map, cl_map_flags flags, void * svm_ptr, size_t size, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMUnmapARM(cl_command_queue command_queue, void * svm_ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgSVMPointerARM(cl_kernel kernel, cl_uint arg_index, const void * arg_value) CL_EXT_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clSetKernelExecInfoARM(cl_kernel kernel, cl_kernel_exec_info_arm param_name, size_t param_value_size, const void * param_value) CL_EXT_SUFFIX__VERSION_1_2; /******************************** * cl_arm_get_core_id extension * ********************************/ #ifdef CL_VERSION_1_2 #define cl_arm_get_core_id 1 /* Device info property for bitfield of cores present */ #define CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM 0x40BF #endif /* CL_VERSION_1_2 */ /********************************* * cl_arm_job_slot_selection *********************************/ #define cl_arm_job_slot_selection 1 /* cl_device_info */ #define CL_DEVICE_JOB_SLOTS_ARM 0x41E0 /* cl_command_queue_properties */ #define CL_QUEUE_JOB_SLOT_ARM 0x41E1 /********************************* * cl_arm_scheduling_controls *********************************/ #define cl_arm_scheduling_controls 1 /* cl_device_info */ #define CL_DEVICE_SCHEDULING_CONTROLS_CAPABILITIES_ARM 0x41E4 #define CL_DEVICE_SCHEDULING_KERNEL_BATCHING_ARM (1 << 0) #define CL_DEVICE_SCHEDULING_WORKGROUP_BATCH_SIZE_ARM (1 << 1) #define CL_DEVICE_SCHEDULING_WORKGROUP_BATCH_SIZE_MODIFIER_ARM (1 << 2) /* cl_kernel_info */ #define CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_ARM 0x41E5 #define CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_MODIFIER_ARM 0x41E6 /* cl_queue_properties */ #define CL_QUEUE_KERNEL_BATCHING_ARM 0x41E7 #ifdef __cplusplus } #endif #endif /* __CL_EXT_H */ Oclgrind-21.10/src/CL/cl_ext_intel.h000066400000000000000000000707111413315665100172000ustar00rootroot00000000000000/******************************************************************************* * Copyright (c) 2008-2020 The Khronos Group Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ /*****************************************************************************\ Copyright (c) 2013-2020 Intel Corporation All Rights Reserved. THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. File Name: cl_ext_intel.h Abstract: Notes: \*****************************************************************************/ #ifndef __CL_EXT_INTEL_H #define __CL_EXT_INTEL_H #include #include #ifdef __cplusplus extern "C" { #endif /*************************************** * cl_intel_thread_local_exec extension * ****************************************/ #define cl_intel_thread_local_exec 1 #define CL_QUEUE_THREAD_LOCAL_EXEC_ENABLE_INTEL (((cl_bitfield)1) << 31) /*********************************************** * cl_intel_device_partition_by_names extension * ************************************************/ #define cl_intel_device_partition_by_names 1 #define CL_DEVICE_PARTITION_BY_NAMES_INTEL 0x4052 #define CL_PARTITION_BY_NAMES_LIST_END_INTEL -1 /************************************************ * cl_intel_accelerator extension * * cl_intel_motion_estimation extension * * cl_intel_advanced_motion_estimation extension * *************************************************/ #define cl_intel_accelerator 1 #define cl_intel_motion_estimation 1 #define cl_intel_advanced_motion_estimation 1 typedef struct _cl_accelerator_intel* cl_accelerator_intel; typedef cl_uint cl_accelerator_type_intel; typedef cl_uint cl_accelerator_info_intel; typedef struct _cl_motion_estimation_desc_intel { cl_uint mb_block_type; cl_uint subpixel_mode; cl_uint sad_adjust_mode; cl_uint search_path_type; } cl_motion_estimation_desc_intel; /* error codes */ #define CL_INVALID_ACCELERATOR_INTEL -1094 #define CL_INVALID_ACCELERATOR_TYPE_INTEL -1095 #define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL -1096 #define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL -1097 /* cl_accelerator_type_intel */ #define CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL 0x0 /* cl_accelerator_info_intel */ #define CL_ACCELERATOR_DESCRIPTOR_INTEL 0x4090 #define CL_ACCELERATOR_REFERENCE_COUNT_INTEL 0x4091 #define CL_ACCELERATOR_CONTEXT_INTEL 0x4092 #define CL_ACCELERATOR_TYPE_INTEL 0x4093 /* cl_motion_detect_desc_intel flags */ #define CL_ME_MB_TYPE_16x16_INTEL 0x0 #define CL_ME_MB_TYPE_8x8_INTEL 0x1 #define CL_ME_MB_TYPE_4x4_INTEL 0x2 #define CL_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0 #define CL_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1 #define CL_ME_SUBPIXEL_MODE_QPEL_INTEL 0x2 #define CL_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0 #define CL_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x1 #define CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL 0x0 #define CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL 0x1 #define CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL 0x5 #define CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL 0x0 #define CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL 0x1 #define CL_ME_LUMA_INTRA_PREDICT_ENABLED_INTEL 0x2 #define CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL 0x4 #define CL_ME_FORWARD_INPUT_MODE_INTEL 0x1 #define CL_ME_BACKWARD_INPUT_MODE_INTEL 0x2 #define CL_ME_BIDIRECTION_INPUT_MODE_INTEL 0x3 #define CL_ME_BIDIR_WEIGHT_QUARTER_INTEL 16 #define CL_ME_BIDIR_WEIGHT_THIRD_INTEL 21 #define CL_ME_BIDIR_WEIGHT_HALF_INTEL 32 #define CL_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 43 #define CL_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 48 #define CL_ME_COST_PENALTY_NONE_INTEL 0x0 #define CL_ME_COST_PENALTY_LOW_INTEL 0x1 #define CL_ME_COST_PENALTY_NORMAL_INTEL 0x2 #define CL_ME_COST_PENALTY_HIGH_INTEL 0x3 #define CL_ME_COST_PRECISION_QPEL_INTEL 0x0 #define CL_ME_COST_PRECISION_HPEL_INTEL 0x1 #define CL_ME_COST_PRECISION_PEL_INTEL 0x2 #define CL_ME_COST_PRECISION_DPEL_INTEL 0x3 #define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0 #define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 #define CL_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2 #define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3 #define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4 #define CL_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4 #define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5 #define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6 #define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7 #define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8 #define CL_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0 #define CL_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 #define CL_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2 #define CL_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3 /* cl_device_info */ #define CL_DEVICE_ME_VERSION_INTEL 0x407E #define CL_ME_VERSION_LEGACY_INTEL 0x0 #define CL_ME_VERSION_ADVANCED_VER_1_INTEL 0x1 #define CL_ME_VERSION_ADVANCED_VER_2_INTEL 0x2 extern CL_API_ENTRY cl_accelerator_intel CL_API_CALL clCreateAcceleratorINTEL( cl_context context, cl_accelerator_type_intel accelerator_type, size_t descriptor_size, const void* descriptor, cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_accelerator_intel (CL_API_CALL *clCreateAcceleratorINTEL_fn)( cl_context context, cl_accelerator_type_intel accelerator_type, size_t descriptor_size, const void* descriptor, cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clGetAcceleratorInfoINTEL( cl_accelerator_intel accelerator, cl_accelerator_info_intel param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetAcceleratorInfoINTEL_fn)( cl_accelerator_intel accelerator, cl_accelerator_info_intel param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clRetainAcceleratorINTEL( cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int (CL_API_CALL *clRetainAcceleratorINTEL_fn)( cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseAcceleratorINTEL( cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int (CL_API_CALL *clReleaseAcceleratorINTEL_fn)( cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2; /****************************************** * cl_intel_simultaneous_sharing extension * *******************************************/ #define cl_intel_simultaneous_sharing 1 #define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 0x4104 #define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 0x4105 /*********************************** * cl_intel_egl_image_yuv extension * ************************************/ #define cl_intel_egl_image_yuv 1 #define CL_EGL_YUV_PLANE_INTEL 0x4107 /******************************** * cl_intel_packed_yuv extension * *********************************/ #define cl_intel_packed_yuv 1 #define CL_YUYV_INTEL 0x4076 #define CL_UYVY_INTEL 0x4077 #define CL_YVYU_INTEL 0x4078 #define CL_VYUY_INTEL 0x4079 /******************************************** * cl_intel_required_subgroup_size extension * *********************************************/ #define cl_intel_required_subgroup_size 1 #define CL_DEVICE_SUB_GROUP_SIZES_INTEL 0x4108 #define CL_KERNEL_SPILL_MEM_SIZE_INTEL 0x4109 #define CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL 0x410A /**************************************** * cl_intel_driver_diagnostics extension * *****************************************/ #define cl_intel_driver_diagnostics 1 typedef cl_uint cl_diagnostics_verbose_level; #define CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL 0x4106 #define CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL ( 0xff ) #define CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL ( 1 ) #define CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL ( 1 << 1 ) #define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL ( 1 << 2 ) /******************************** * cl_intel_planar_yuv extension * *********************************/ #define CL_NV12_INTEL 0x410E #define CL_MEM_NO_ACCESS_INTEL ( 1 << 24 ) #define CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL ( 1 << 25 ) #define CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL 0x417E #define CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL 0x417F /******************************************************* * cl_intel_device_side_avc_motion_estimation extension * ********************************************************/ #define CL_DEVICE_AVC_ME_VERSION_INTEL 0x410B #define CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL 0x410C #define CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL 0x410D #define CL_AVC_ME_VERSION_0_INTEL 0x0 /* No support. */ #define CL_AVC_ME_VERSION_1_INTEL 0x1 /* First supported version. */ #define CL_AVC_ME_MAJOR_16x16_INTEL 0x0 #define CL_AVC_ME_MAJOR_16x8_INTEL 0x1 #define CL_AVC_ME_MAJOR_8x16_INTEL 0x2 #define CL_AVC_ME_MAJOR_8x8_INTEL 0x3 #define CL_AVC_ME_MINOR_8x8_INTEL 0x0 #define CL_AVC_ME_MINOR_8x4_INTEL 0x1 #define CL_AVC_ME_MINOR_4x8_INTEL 0x2 #define CL_AVC_ME_MINOR_4x4_INTEL 0x3 #define CL_AVC_ME_MAJOR_FORWARD_INTEL 0x0 #define CL_AVC_ME_MAJOR_BACKWARD_INTEL 0x1 #define CL_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2 #define CL_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0 #define CL_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E #define CL_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D #define CL_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B #define CL_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77 #define CL_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F #define CL_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F #define CL_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F #define CL_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0 #define CL_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1 #define CL_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2 #define CL_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3 #define CL_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4 #define CL_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5 #define CL_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6 #define CL_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7 #define CL_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8 #define CL_AVC_ME_SEARCH_WINDOW_16x12_RADIUS_INTEL 0x9 #define CL_AVC_ME_SEARCH_WINDOW_4x4_RADIUS_INTEL 0x2 #define CL_AVC_ME_SEARCH_WINDOW_2x2_RADIUS_INTEL 0xa #define CL_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0 #define CL_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2 #define CL_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0 #define CL_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1 #define CL_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3 #define CL_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0 #define CL_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1 #define CL_AVC_ME_COST_PRECISION_PEL_INTEL 0x2 #define CL_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3 #define CL_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10 #define CL_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15 #define CL_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20 #define CL_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B #define CL_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30 #define CL_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0 #define CL_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2 #define CL_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4 #define CL_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8 #define CL_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0 #define CL_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000 #define CL_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL ( 0x1 << 24 ) #define CL_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL ( 0x2 << 24 ) #define CL_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL ( 0x3 << 24 ) #define CL_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL ( 0x55 << 24 ) #define CL_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL ( 0xAA << 24 ) #define CL_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL ( 0xFF << 24 ) #define CL_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL ( 0x1 << 24 ) #define CL_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL ( 0x2 << 24 ) #define CL_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL ( 0x1 << 26 ) #define CL_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL ( 0x2 << 26 ) #define CL_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL ( 0x1 << 28 ) #define CL_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL ( 0x2 << 28 ) #define CL_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL ( 0x1 << 30 ) #define CL_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL ( 0x2 << 30 ) #define CL_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00 #define CL_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80 #define CL_AVC_ME_INTRA_16x16_INTEL 0x0 #define CL_AVC_ME_INTRA_8x8_INTEL 0x1 #define CL_AVC_ME_INTRA_4x4_INTEL 0x2 #define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6 #define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5 #define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3 #define CL_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60 #define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10 #define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8 #define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4 #define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0 #define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 #define CL_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2 #define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3 #define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4 #define CL_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4 #define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5 #define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6 #define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7 #define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8 #define CL_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0 #define CL_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 #define CL_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2 #define CL_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3 #define CL_AVC_ME_FRAME_FORWARD_INTEL 0x1 #define CL_AVC_ME_FRAME_BACKWARD_INTEL 0x2 #define CL_AVC_ME_FRAME_DUAL_INTEL 0x3 #define CL_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0 #define CL_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1 #define CL_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2 #define CL_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0 #define CL_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1 /******************************************* * cl_intel_unified_shared_memory extension * ********************************************/ /* These APIs are in sync with Revision Q of the cl_intel_unified_shared_memory spec! */ #define cl_intel_unified_shared_memory 1 /* cl_device_info */ #define CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL 0x4190 #define CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL 0x4191 #define CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4192 #define CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4193 #define CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL 0x4194 typedef cl_bitfield cl_device_unified_shared_memory_capabilities_intel; /* cl_device_unified_shared_memory_capabilities_intel - bitfield */ #define CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL (1 << 0) #define CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL (1 << 1) #define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL (1 << 2) #define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL (1 << 3) typedef cl_properties cl_mem_properties_intel; /* cl_mem_properties_intel */ #define CL_MEM_ALLOC_FLAGS_INTEL 0x4195 typedef cl_bitfield cl_mem_alloc_flags_intel; /* cl_mem_alloc_flags_intel - bitfield */ #define CL_MEM_ALLOC_WRITE_COMBINED_INTEL (1 << 0) typedef cl_uint cl_mem_info_intel; /* cl_mem_alloc_info_intel */ #define CL_MEM_ALLOC_TYPE_INTEL 0x419A #define CL_MEM_ALLOC_BASE_PTR_INTEL 0x419B #define CL_MEM_ALLOC_SIZE_INTEL 0x419C #define CL_MEM_ALLOC_DEVICE_INTEL 0x419D /* Enum values 0x419E-0x419F are reserved for future queries. */ typedef cl_uint cl_unified_shared_memory_type_intel; /* cl_unified_shared_memory_type_intel */ #define CL_MEM_TYPE_UNKNOWN_INTEL 0x4196 #define CL_MEM_TYPE_HOST_INTEL 0x4197 #define CL_MEM_TYPE_DEVICE_INTEL 0x4198 #define CL_MEM_TYPE_SHARED_INTEL 0x4199 typedef cl_uint cl_mem_advice_intel; /* cl_mem_advice_intel */ /* Enum values 0x4208-0x420F are reserved for future memory advices. */ /* cl_kernel_exec_info */ #define CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL 0x4200 #define CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL 0x4201 #define CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL 0x4202 #define CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL 0x4203 /* cl_command_type */ #define CL_COMMAND_MEMFILL_INTEL 0x4204 #define CL_COMMAND_MEMCPY_INTEL 0x4205 #define CL_COMMAND_MIGRATEMEM_INTEL 0x4206 #define CL_COMMAND_MEMADVISE_INTEL 0x4207 extern CL_API_ENTRY void* CL_API_CALL clHostMemAllocINTEL( cl_context context, const cl_mem_properties_intel* properties, size_t size, cl_uint alignment, cl_int* errcode_ret); typedef CL_API_ENTRY void* (CL_API_CALL * clHostMemAllocINTEL_fn)( cl_context context, const cl_mem_properties_intel* properties, size_t size, cl_uint alignment, cl_int* errcode_ret); extern CL_API_ENTRY void* CL_API_CALL clDeviceMemAllocINTEL( cl_context context, cl_device_id device, const cl_mem_properties_intel* properties, size_t size, cl_uint alignment, cl_int* errcode_ret); typedef CL_API_ENTRY void* (CL_API_CALL * clDeviceMemAllocINTEL_fn)( cl_context context, cl_device_id device, const cl_mem_properties_intel* properties, size_t size, cl_uint alignment, cl_int* errcode_ret); extern CL_API_ENTRY void* CL_API_CALL clSharedMemAllocINTEL( cl_context context, cl_device_id device, const cl_mem_properties_intel* properties, size_t size, cl_uint alignment, cl_int* errcode_ret); typedef CL_API_ENTRY void* (CL_API_CALL * clSharedMemAllocINTEL_fn)( cl_context context, cl_device_id device, const cl_mem_properties_intel* properties, size_t size, cl_uint alignment, cl_int* errcode_ret); extern CL_API_ENTRY cl_int CL_API_CALL clMemFreeINTEL( cl_context context, void* ptr); typedef CL_API_ENTRY cl_int (CL_API_CALL * clMemFreeINTEL_fn)( cl_context context, void* ptr); extern CL_API_ENTRY cl_int CL_API_CALL clMemBlockingFreeINTEL( cl_context context, void* ptr); typedef CL_API_ENTRY cl_int (CL_API_CALL * clMemBlockingFreeINTEL_fn)( cl_context context, void* ptr); extern CL_API_ENTRY cl_int CL_API_CALL clGetMemAllocInfoINTEL( cl_context context, const void* ptr, cl_mem_info_intel param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret); typedef CL_API_ENTRY cl_int (CL_API_CALL * clGetMemAllocInfoINTEL_fn)( cl_context context, const void* ptr, cl_mem_info_intel param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret); extern CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgMemPointerINTEL( cl_kernel kernel, cl_uint arg_index, const void* arg_value); typedef CL_API_ENTRY cl_int (CL_API_CALL * clSetKernelArgMemPointerINTEL_fn)( cl_kernel kernel, cl_uint arg_index, const void* arg_value); extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemsetINTEL( /* Deprecated */ cl_command_queue command_queue, void* dst_ptr, cl_int value, size_t size, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); typedef CL_API_ENTRY cl_int (CL_API_CALL * clEnqueueMemsetINTEL_fn)( /* Deprecated */ cl_command_queue command_queue, void* dst_ptr, cl_int value, size_t size, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemFillINTEL( cl_command_queue command_queue, void* dst_ptr, const void* pattern, size_t pattern_size, size_t size, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); typedef CL_API_ENTRY cl_int (CL_API_CALL * clEnqueueMemFillINTEL_fn)( cl_command_queue command_queue, void* dst_ptr, const void* pattern, size_t pattern_size, size_t size, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemcpyINTEL( cl_command_queue command_queue, cl_bool blocking, void* dst_ptr, const void* src_ptr, size_t size, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); typedef CL_API_ENTRY cl_int (CL_API_CALL * clEnqueueMemcpyINTEL_fn)( cl_command_queue command_queue, cl_bool blocking, void* dst_ptr, const void* src_ptr, size_t size, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); #ifdef CL_VERSION_1_2 /* Because these APIs use cl_mem_migration_flags, they require OpenCL 1.2: */ extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemINTEL( cl_command_queue command_queue, const void* ptr, size_t size, cl_mem_migration_flags flags, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); typedef CL_API_ENTRY cl_int (CL_API_CALL * clEnqueueMigrateMemINTEL_fn)( cl_command_queue command_queue, const void* ptr, size_t size, cl_mem_migration_flags flags, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); #endif extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueMemAdviseINTEL( cl_command_queue command_queue, const void* ptr, size_t size, cl_mem_advice_intel advice, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); typedef CL_API_ENTRY cl_int (CL_API_CALL * clEnqueueMemAdviseINTEL_fn)( cl_command_queue command_queue, const void* ptr, size_t size, cl_mem_advice_intel advice, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event); /*************************************************** * cl_intel_create_buffer_with_properties extension * ****************************************************/ #define cl_intel_create_buffer_with_properties 1 extern CL_API_ENTRY cl_mem CL_API_CALL clCreateBufferWithPropertiesINTEL( cl_context context, const cl_mem_properties_intel* properties, cl_mem_flags flags, size_t size, void * host_ptr, cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_mem (CL_API_CALL * clCreateBufferWithPropertiesINTEL_fn)( cl_context context, const cl_mem_properties_intel* properties, cl_mem_flags flags, size_t size, void * host_ptr, cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_0; /****************************************** * cl_intel_mem_channel_property extension * *******************************************/ #define CL_MEM_CHANNEL_INTEL 0x4213 /********************************* * cl_intel_mem_force_host_memory * **********************************/ #define cl_intel_mem_force_host_memory 1 /* cl_mem_flags */ #define CL_MEM_FORCE_HOST_MEMORY_INTEL (1 << 20) #ifdef __cplusplus } #endif #endif /* __CL_EXT_INTEL_H */ Oclgrind-21.10/src/CL/cl_gl.h000066400000000000000000000145121413315665100156040ustar00rootroot00000000000000/******************************************************************************* * Copyright (c) 2008-2020 The Khronos Group Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ #ifndef __OPENCL_CL_GL_H #define __OPENCL_CL_GL_H #include #ifdef __cplusplus extern "C" { #endif typedef cl_uint cl_gl_object_type; typedef cl_uint cl_gl_texture_info; typedef cl_uint cl_gl_platform_info; typedef struct __GLsync *cl_GLsync; /* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */ #define CL_GL_OBJECT_BUFFER 0x2000 #define CL_GL_OBJECT_TEXTURE2D 0x2001 #define CL_GL_OBJECT_TEXTURE3D 0x2002 #define CL_GL_OBJECT_RENDERBUFFER 0x2003 #ifdef CL_VERSION_1_2 #define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E #define CL_GL_OBJECT_TEXTURE1D 0x200F #define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010 #define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011 #endif /* cl_gl_texture_info */ #define CL_GL_TEXTURE_TARGET 0x2004 #define CL_GL_MIPMAP_LEVEL 0x2005 #ifdef CL_VERSION_1_2 #define CL_GL_NUM_SAMPLES 0x2012 #endif extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLBuffer(cl_context context, cl_mem_flags flags, cl_GLuint bufobj, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_2 extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLTexture(cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; #endif extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLRenderbuffer(cl_context context, cl_mem_flags flags, cl_GLuint renderbuffer, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetGLObjectInfo(cl_mem memobj, cl_gl_object_type * gl_object_type, cl_GLuint * gl_object_name) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetGLTextureInfo(cl_mem memobj, cl_gl_texture_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireGLObjects(cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseGLObjects(cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; /* Deprecated OpenCL 1.1 APIs */ extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL clCreateFromGLTexture2D(cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL clCreateFromGLTexture3D(cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; /* cl_khr_gl_sharing extension */ #define cl_khr_gl_sharing 1 typedef cl_uint cl_gl_context_info; /* Additional Error Codes */ #define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000 /* cl_gl_context_info */ #define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006 #define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007 /* Additional cl_context_properties */ #define CL_GL_CONTEXT_KHR 0x2008 #define CL_EGL_DISPLAY_KHR 0x2009 #define CL_GLX_DISPLAY_KHR 0x200A #define CL_WGL_HDC_KHR 0x200B #define CL_CGL_SHAREGROUP_KHR 0x200C extern CL_API_ENTRY cl_int CL_API_CALL clGetGLContextInfoKHR(const cl_context_properties * properties, cl_gl_context_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)( const cl_context_properties * properties, cl_gl_context_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret); #ifdef __cplusplus } #endif #endif /* __OPENCL_CL_GL_H */ Oclgrind-21.10/src/CL/cl_gl_ext.h000066400000000000000000000023561413315665100164670ustar00rootroot00000000000000/******************************************************************************* * Copyright (c) 2008-2020 The Khronos Group Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ #ifndef __OPENCL_CL_GL_EXT_H #define __OPENCL_CL_GL_EXT_H #ifdef __cplusplus extern "C" { #endif #include /* * cl_khr_gl_event extension */ #define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D extern CL_API_ENTRY cl_event CL_API_CALL clCreateEventFromGLsyncKHR(cl_context context, cl_GLsync sync, cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1; #ifdef __cplusplus } #endif #endif /* __OPENCL_CL_GL_EXT_H */ Oclgrind-21.10/src/CL/cl_half.h000066400000000000000000000242761413315665100161240ustar00rootroot00000000000000/******************************************************************************* * Copyright (c) 2019-2020 The Khronos Group Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ /** * This is a header-only utility library that provides OpenCL host code with * routines for converting to/from cl_half values. * * Example usage: * * #include * ... * cl_half h = cl_half_from_float(0.5f, CL_HALF_RTE); * cl_float f = cl_half_to_float(h); */ #ifndef OPENCL_CL_HALF_H #define OPENCL_CL_HALF_H #include #include #ifdef __cplusplus extern "C" { #endif /** * Rounding mode used when converting to cl_half. */ typedef enum { CL_HALF_RTE, // round to nearest even CL_HALF_RTZ, // round towards zero CL_HALF_RTP, // round towards positive infinity CL_HALF_RTN, // round towards negative infinity } cl_half_rounding_mode; /* Private utility macros. */ #define CL_HALF_EXP_MASK 0x7C00 #define CL_HALF_MAX_FINITE_MAG 0x7BFF /* * Utility to deal with values that overflow when converting to half precision. */ static inline cl_half cl_half_handle_overflow(cl_half_rounding_mode rounding_mode, uint16_t sign) { if (rounding_mode == CL_HALF_RTZ) { // Round overflow towards zero -> largest finite number (preserving sign) return (sign << 15) | CL_HALF_MAX_FINITE_MAG; } else if (rounding_mode == CL_HALF_RTP && sign) { // Round negative overflow towards positive infinity -> most negative finite number return (1 << 15) | CL_HALF_MAX_FINITE_MAG; } else if (rounding_mode == CL_HALF_RTN && !sign) { // Round positive overflow towards negative infinity -> largest finite number return CL_HALF_MAX_FINITE_MAG; } // Overflow to infinity return (sign << 15) | CL_HALF_EXP_MASK; } /* * Utility to deal with values that underflow when converting to half precision. */ static inline cl_half cl_half_handle_underflow(cl_half_rounding_mode rounding_mode, uint16_t sign) { if (rounding_mode == CL_HALF_RTP && !sign) { // Round underflow towards positive infinity -> smallest positive value return (sign << 15) | 1; } else if (rounding_mode == CL_HALF_RTN && sign) { // Round underflow towards negative infinity -> largest negative value return (sign << 15) | 1; } // Flush to zero return (sign << 15); } /** * Convert a cl_float to a cl_half. */ static inline cl_half cl_half_from_float(cl_float f, cl_half_rounding_mode rounding_mode) { // Type-punning to get direct access to underlying bits union { cl_float f; uint32_t i; } f32; f32.f = f; // Extract sign bit uint16_t sign = f32.i >> 31; // Extract FP32 exponent and mantissa uint32_t f_exp = (f32.i >> (CL_FLT_MANT_DIG - 1)) & 0xFF; uint32_t f_mant = f32.i & ((1 << (CL_FLT_MANT_DIG - 1)) - 1); // Remove FP32 exponent bias int32_t exp = f_exp - CL_FLT_MAX_EXP + 1; // Add FP16 exponent bias uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1); // Position of the bit that will become the FP16 mantissa LSB uint32_t lsb_pos = CL_FLT_MANT_DIG - CL_HALF_MANT_DIG; // Check for NaN / infinity if (f_exp == 0xFF) { if (f_mant) { // NaN -> propagate mantissa and silence it uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos); h_mant |= 0x200; return (sign << 15) | CL_HALF_EXP_MASK | h_mant; } else { // Infinity -> zero mantissa return (sign << 15) | CL_HALF_EXP_MASK; } } // Check for zero if (!f_exp && !f_mant) { return (sign << 15); } // Check for overflow if (exp >= CL_HALF_MAX_EXP) { return cl_half_handle_overflow(rounding_mode, sign); } // Check for underflow if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1)) { return cl_half_handle_underflow(rounding_mode, sign); } // Check for value that will become denormal if (exp < -14) { // Denormal -> include the implicit 1 from the FP32 mantissa h_exp = 0; f_mant |= 1 << (CL_FLT_MANT_DIG - 1); // Mantissa shift amount depends on exponent lsb_pos = -exp + (CL_FLT_MANT_DIG - 25); } // Generate FP16 mantissa by shifting FP32 mantissa uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos); // Check whether we need to round uint32_t halfway = 1 << (lsb_pos - 1); uint32_t mask = (halfway << 1) - 1; switch (rounding_mode) { case CL_HALF_RTE: if ((f_mant & mask) > halfway) { // More than halfway -> round up h_mant += 1; } else if ((f_mant & mask) == halfway) { // Exactly halfway -> round to nearest even if (h_mant & 0x1) h_mant += 1; } break; case CL_HALF_RTZ: // Mantissa has already been truncated -> do nothing break; case CL_HALF_RTP: if ((f_mant & mask) && !sign) { // Round positive numbers up h_mant += 1; } break; case CL_HALF_RTN: if ((f_mant & mask) && sign) { // Round negative numbers down h_mant += 1; } break; } // Check for mantissa overflow if (h_mant & 0x400) { h_exp += 1; h_mant = 0; } return (sign << 15) | (h_exp << 10) | h_mant; } /** * Convert a cl_double to a cl_half. */ static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rounding_mode) { // Type-punning to get direct access to underlying bits union { cl_double d; uint64_t i; } f64; f64.d = d; // Extract sign bit uint16_t sign = f64.i >> 63; // Extract FP64 exponent and mantissa uint64_t d_exp = (f64.i >> (CL_DBL_MANT_DIG - 1)) & 0x7FF; uint64_t d_mant = f64.i & (((uint64_t)1 << (CL_DBL_MANT_DIG - 1)) - 1); // Remove FP64 exponent bias int64_t exp = d_exp - CL_DBL_MAX_EXP + 1; // Add FP16 exponent bias uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1); // Position of the bit that will become the FP16 mantissa LSB uint32_t lsb_pos = CL_DBL_MANT_DIG - CL_HALF_MANT_DIG; // Check for NaN / infinity if (d_exp == 0x7FF) { if (d_mant) { // NaN -> propagate mantissa and silence it uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos); h_mant |= 0x200; return (sign << 15) | CL_HALF_EXP_MASK | h_mant; } else { // Infinity -> zero mantissa return (sign << 15) | CL_HALF_EXP_MASK; } } // Check for zero if (!d_exp && !d_mant) { return (sign << 15); } // Check for overflow if (exp >= CL_HALF_MAX_EXP) { return cl_half_handle_overflow(rounding_mode, sign); } // Check for underflow if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1)) { return cl_half_handle_underflow(rounding_mode, sign); } // Check for value that will become denormal if (exp < -14) { // Include the implicit 1 from the FP64 mantissa h_exp = 0; d_mant |= (uint64_t)1 << (CL_DBL_MANT_DIG - 1); // Mantissa shift amount depends on exponent lsb_pos = (uint32_t)(-exp + (CL_DBL_MANT_DIG - 25)); } // Generate FP16 mantissa by shifting FP64 mantissa uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos); // Check whether we need to round uint64_t halfway = (uint64_t)1 << (lsb_pos - 1); uint64_t mask = (halfway << 1) - 1; switch (rounding_mode) { case CL_HALF_RTE: if ((d_mant & mask) > halfway) { // More than halfway -> round up h_mant += 1; } else if ((d_mant & mask) == halfway) { // Exactly halfway -> round to nearest even if (h_mant & 0x1) h_mant += 1; } break; case CL_HALF_RTZ: // Mantissa has already been truncated -> do nothing break; case CL_HALF_RTP: if ((d_mant & mask) && !sign) { // Round positive numbers up h_mant += 1; } break; case CL_HALF_RTN: if ((d_mant & mask) && sign) { // Round negative numbers down h_mant += 1; } break; } // Check for mantissa overflow if (h_mant & 0x400) { h_exp += 1; h_mant = 0; } return (sign << 15) | (h_exp << 10) | h_mant; } /** * Convert a cl_half to a cl_float. */ static inline cl_float cl_half_to_float(cl_half h) { // Type-punning to get direct access to underlying bits union { cl_float f; uint32_t i; } f32; // Extract sign bit uint16_t sign = h >> 15; // Extract FP16 exponent and mantissa uint16_t h_exp = (h >> (CL_HALF_MANT_DIG - 1)) & 0x1F; uint16_t h_mant = h & 0x3FF; // Remove FP16 exponent bias int32_t exp = h_exp - CL_HALF_MAX_EXP + 1; // Add FP32 exponent bias uint32_t f_exp = exp + CL_FLT_MAX_EXP - 1; // Check for NaN / infinity if (h_exp == 0x1F) { if (h_mant) { // NaN -> propagate mantissa and silence it uint32_t f_mant = h_mant << (CL_FLT_MANT_DIG - CL_HALF_MANT_DIG); f_mant |= 0x400000; f32.i = (sign << 31) | 0x7F800000 | f_mant; return f32.f; } else { // Infinity -> zero mantissa f32.i = (sign << 31) | 0x7F800000; return f32.f; } } // Check for zero / denormal if (h_exp == 0) { if (h_mant == 0) { // Zero -> zero exponent f_exp = 0; } else { // Denormal -> normalize it // - Shift mantissa to make most-significant 1 implicit // - Adjust exponent accordingly uint32_t shift = 0; while ((h_mant & 0x400) == 0) { h_mant <<= 1; shift++; } h_mant &= 0x3FF; f_exp -= shift - 1; } } f32.i = (sign << 31) | (f_exp << 23) | (h_mant << 13); return f32.f; } #undef CL_HALF_EXP_MASK #undef CL_HALF_MAX_FINITE_MAG #ifdef __cplusplus } #endif #endif /* OPENCL_CL_HALF_H */ Oclgrind-21.10/src/CL/cl_icd.h000066400000000000000000001517061413315665100157500ustar00rootroot00000000000000/******************************************************************************* * Copyright (c) 2019-2020 The Khronos Group Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ #ifndef OPENCL_CL_ICD_H #define OPENCL_CL_ICD_H #include #include #include #include #if defined(_WIN32) #include #include #include #endif #ifdef __cplusplus extern "C" { #endif /* * This file contains pointer type definitions for each of the CL API calls as * well as a type definition for the dispatch table used by the Khronos ICD * loader (see cl_khr_icd extension specification for background). */ /* API function pointer definitions */ // Platform APIs typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetPlatformIDs)( cl_uint num_entries, cl_platform_id *platforms, cl_uint *num_platforms) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetPlatformInfo)( cl_platform_id platform, cl_platform_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; // Device APIs typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceIDs)( cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceInfo)( cl_device_id device, cl_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_2 typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCreateSubDevices)( cl_device_id in_device, const cl_device_partition_property *partition_properties, cl_uint num_entries, cl_device_id *out_devices, cl_uint *num_devices); typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainDevice)( cl_device_id device) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseDevice)( cl_device_id device) CL_API_SUFFIX__VERSION_1_2; #else typedef void *cl_api_clCreateSubDevices; typedef void *cl_api_clRetainDevice; typedef void *cl_api_clReleaseDevice; #endif // Context APIs typedef CL_API_ENTRY cl_context(CL_API_CALL *cl_api_clCreateContext)( const cl_context_properties *properties, cl_uint num_devices, const cl_device_id *devices, void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), void *user_data, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_context(CL_API_CALL *cl_api_clCreateContextFromType)( const cl_context_properties *properties, cl_device_type device_type, void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), void *user_data, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainContext)( cl_context context) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseContext)( cl_context context) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetContextInfo)( cl_context context, cl_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; // Command Queue APIs typedef CL_API_ENTRY cl_command_queue(CL_API_CALL *cl_api_clCreateCommandQueue)( cl_context context, cl_device_id device, cl_command_queue_properties properties, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_2_0 typedef CL_API_ENTRY cl_command_queue(CL_API_CALL *cl_api_clCreateCommandQueueWithProperties)( cl_context /* context */, cl_device_id /* device */, const cl_queue_properties * /* properties */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; #else typedef void *cl_api_clCreateCommandQueueWithProperties; #endif typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainCommandQueue)( cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseCommandQueue)( cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetCommandQueueInfo)( cl_command_queue command_queue, cl_command_queue_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; // Memory Object APIs typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateBuffer)( cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_2 typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImage)( cl_context context, cl_mem_flags flags, const cl_image_format *image_format, const cl_image_desc *image_desc, void *host_ptr, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; #else typedef void *cl_api_clCreateImage; #endif #ifdef CL_VERSION_3_0 typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateBufferWithProperties)( cl_context context, const cl_mem_properties *properties, cl_mem_flags flags, size_t size, void *host_ptr, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_3_0; typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImageWithProperties)( cl_context context, const cl_mem_properties *properties, cl_mem_flags flags, const cl_image_format *image_format, const cl_image_desc *image_desc, void *host_ptr, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_3_0; typedef CL_API_ENTRY cl_int(CL_API_CALL* cl_api_clSetContextDestructorCallback)( cl_context context, void(CL_CALLBACK* pfn_notify)(cl_context context, void* user_data), void* user_data) CL_API_SUFFIX__VERSION_3_0; #else typedef void *cl_api_clCreateBufferWithProperties; typedef void *cl_api_clCreateImageWithProperties; typedef void *cl_api_clSetContextDestructorCallback; #endif typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainMemObject)( cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseMemObject)( cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetSupportedImageFormats)( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, cl_uint num_entries, cl_image_format *image_formats, cl_uint *num_image_formats) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetMemObjectInfo)( cl_mem memobj, cl_mem_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetImageInfo)( cl_mem image, cl_image_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_2_0 typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreatePipe)( cl_context /* context */, cl_mem_flags /* flags */, cl_uint /* pipe_packet_size */, cl_uint /* pipe_max_packets */, const cl_pipe_properties * /* properties */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetPipeInfo)( cl_mem /* pipe */, cl_pipe_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clSVMAlloc)( cl_context /* context */, cl_svm_mem_flags /* flags */, size_t /* size */, unsigned int /* alignment */)CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY void(CL_API_CALL *cl_api_clSVMFree)( cl_context /* context */, void * /* svm_pointer */) CL_API_SUFFIX__VERSION_2_0; #else typedef void *cl_api_clCreatePipe; typedef void *cl_api_clGetPipeInfo; typedef void *cl_api_clSVMAlloc; typedef void *cl_api_clSVMFree; #endif // Sampler APIs typedef CL_API_ENTRY cl_sampler(CL_API_CALL *cl_api_clCreateSampler)( cl_context context, cl_bool normalized_coords, cl_addressing_mode addressing_mode, cl_filter_mode filter_mode, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainSampler)( cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseSampler)( cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetSamplerInfo)( cl_sampler sampler, cl_sampler_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_2_0 typedef CL_API_ENTRY cl_sampler(CL_API_CALL *cl_api_clCreateSamplerWithProperties)( cl_context /* context */, const cl_sampler_properties * /* sampler_properties */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; #else typedef void *cl_api_clCreateSamplerWithProperties; #endif // Program Object APIs typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clCreateProgramWithSource)( cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clCreateProgramWithBinary)( cl_context context, cl_uint num_devices, const cl_device_id *device_list, const size_t *lengths, const unsigned char **binaries, cl_int *binary_status, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_2 typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clCreateProgramWithBuiltInKernels)( cl_context context, cl_uint num_devices, const cl_device_id *device_list, const char *kernel_names, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; #else typedef void *cl_api_clCreateProgramWithBuiltInKernels; #endif typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainProgram)( cl_program program) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseProgram)( cl_program program) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clBuildProgram)( cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), void *user_data) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_2 typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCompileProgram)( cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, cl_uint num_input_headers, const cl_program *input_headers, const char **header_include_names, void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), void *user_data) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clLinkProgram)( cl_context context, cl_uint num_devices, const cl_device_id *device_list, const char *options, cl_uint num_input_programs, const cl_program *input_programs, void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), void *user_data, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; #else typedef void *cl_api_clCompileProgram; typedef void *cl_api_clLinkProgram; #endif #ifdef CL_VERSION_2_2 typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetProgramSpecializationConstant)( cl_program program, cl_uint spec_id, size_t spec_size, const void *spec_value) CL_API_SUFFIX__VERSION_2_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetProgramReleaseCallback)( cl_program program, void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), void *user_data) CL_API_SUFFIX__VERSION_2_2; #else typedef void *cl_api_clSetProgramSpecializationConstant; typedef void *cl_api_clSetProgramReleaseCallback; #endif #ifdef CL_VERSION_1_2 typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clUnloadPlatformCompiler)( cl_platform_id platform) CL_API_SUFFIX__VERSION_1_2; #else typedef void *cl_api_clUnloadPlatformCompiler; #endif typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetProgramInfo)( cl_program program, cl_program_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetProgramBuildInfo)( cl_program program, cl_device_id device, cl_program_build_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; // Kernel Object APIs typedef CL_API_ENTRY cl_kernel(CL_API_CALL *cl_api_clCreateKernel)( cl_program program, const char *kernel_name, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCreateKernelsInProgram)( cl_program program, cl_uint num_kernels, cl_kernel *kernels, cl_uint *num_kernels_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainKernel)( cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseKernel)( cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetKernelArg)( cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelInfo)( cl_kernel kernel, cl_kernel_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_2 typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelArgInfo)( cl_kernel kernel, cl_uint arg_indx, cl_kernel_arg_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; #else typedef void *cl_api_clGetKernelArgInfo; #endif typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelWorkGroupInfo)( cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_2_0 typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetKernelArgSVMPointer)( cl_kernel /* kernel */, cl_uint /* arg_index */, const void * /* arg_value */) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetKernelExecInfo)( cl_kernel /* kernel */, cl_kernel_exec_info /* param_name */, size_t /* param_value_size */, const void * /* param_value */) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelSubGroupInfoKHR)( cl_kernel /* in_kernel */, cl_device_id /*in_device*/, cl_kernel_sub_group_info /* param_name */, size_t /*input_value_size*/, const void * /*input_value*/, size_t /*param_value_size*/, void * /*param_value*/, size_t * /*param_value_size_ret*/) CL_EXT_SUFFIX__VERSION_2_0; #else typedef void *cl_api_clSetKernelArgSVMPointer; typedef void *cl_api_clSetKernelExecInfo; typedef void *cl_api_clGetKernelSubGroupInfoKHR; #endif // Event Object APIs typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clWaitForEvents)( cl_uint num_events, const cl_event *event_list) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetEventInfo)( cl_event event, cl_event_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainEvent)(cl_event event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseEvent)(cl_event event) CL_API_SUFFIX__VERSION_1_0; // Profiling APIs typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetEventProfilingInfo)( cl_event event, cl_profiling_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; // Flush and Finish APIs typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clFlush)( cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clFinish)( cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; // Enqueued Commands APIs typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReadBuffer)( cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, size_t offset, size_t cb, void *ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_1 typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReadBufferRect)( cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, const size_t *buffer_origin, const size_t *host_origin, const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_1; #else typedef void *cl_api_clEnqueueReadBufferRect; #endif typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteBuffer)( cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, size_t offset, size_t cb, const void *ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_1 typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteBufferRect)( cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, const size_t *buffer_origin, const size_t *host_origin, const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_1; #else typedef void *cl_api_clEnqueueWriteBufferRect; #endif #ifdef CL_VERSION_1_2 typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueFillBuffer)( cl_command_queue command_queue, cl_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t cb, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_2; #else typedef void *cl_api_clEnqueueFillBuffer; #endif typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyBuffer)( cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t cb, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_1 typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyBufferRect)( cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, const size_t *src_origin, const size_t *dst_origin, const size_t *region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_1; #else typedef void *cl_api_clEnqueueCopyBufferRect; #endif typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReadImage)( cl_command_queue command_queue, cl_mem image, cl_bool blocking_read, const size_t *origin, const size_t *region, size_t row_pitch, size_t slice_pitch, void *ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteImage)( cl_command_queue command_queue, cl_mem image, cl_bool blocking_write, const size_t *origin, const size_t *region, size_t input_row_pitch, size_t input_slice_pitch, const void *ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_2 typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueFillImage)( cl_command_queue command_queue, cl_mem image, const void *fill_color, const size_t origin[3], const size_t region[3], cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_2; #else typedef void *cl_api_clEnqueueFillImage; #endif typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyImage)( cl_command_queue command_queue, cl_mem src_image, cl_mem dst_image, const size_t *src_origin, const size_t *dst_origin, const size_t *region, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyImageToBuffer)( cl_command_queue command_queue, cl_mem src_image, cl_mem dst_buffer, const size_t *src_origin, const size_t *region, size_t dst_offset, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyBufferToImage)( cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_image, size_t src_offset, const size_t *dst_origin, const size_t *region, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clEnqueueMapBuffer)( cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, cl_map_flags map_flags, size_t offset, size_t cb, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event, cl_int *errcode_ret)CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clEnqueueMapImage)( cl_command_queue command_queue, cl_mem image, cl_bool blocking_map, cl_map_flags map_flags, const size_t *origin, const size_t *region, size_t *image_row_pitch, size_t *image_slice_pitch, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event, cl_int *errcode_ret)CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueUnmapMemObject)( cl_command_queue command_queue, cl_mem memobj, void *mapped_ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_2 typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueMigrateMemObjects)( cl_command_queue command_queue, cl_uint num_mem_objects, const cl_mem *mem_objects, cl_mem_migration_flags flags, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_2; #else typedef void *cl_api_clEnqueueMigrateMemObjects; #endif typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueNDRangeKernel)( cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueTask)( cl_command_queue command_queue, cl_kernel kernel, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueNativeKernel)( cl_command_queue command_queue, void(CL_CALLBACK *user_func)(void *), void *args, size_t cb_args, cl_uint num_mem_objects, const cl_mem *mem_list, const void **args_mem_loc, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_2 typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueMarkerWithWaitList)( cl_command_queue command_queue, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueBarrierWithWaitList)( cl_command_queue command_queue, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY void *( CL_API_CALL *cl_api_clGetExtensionFunctionAddressForPlatform)( cl_platform_id platform, const char *function_name)CL_API_SUFFIX__VERSION_1_2; #else typedef void *cl_api_clEnqueueMarkerWithWaitList; typedef void *cl_api_clEnqueueBarrierWithWaitList; typedef void *cl_api_clGetExtensionFunctionAddressForPlatform; #endif // Shared Virtual Memory APIs #ifdef CL_VERSION_2_0 typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMFree)( cl_command_queue /* command_queue */, cl_uint /* num_svm_pointers */, void ** /* svm_pointers */, void(CL_CALLBACK *pfn_free_func)(cl_command_queue /* queue */, cl_uint /* num_svm_pointers */, void ** /* svm_pointers[] */, void * /* user_data */), void * /* user_data */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMemcpy)( cl_command_queue /* command_queue */, cl_bool /* blocking_copy */, void * /* dst_ptr */, const void * /* src_ptr */, size_t /* size */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMemFill)( cl_command_queue /* command_queue */, void * /* svm_ptr */, const void * /* pattern */, size_t /* pattern_size */, size_t /* size */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMap)( cl_command_queue /* command_queue */, cl_bool /* blocking_map */, cl_map_flags /* map_flags */, void * /* svm_ptr */, size_t /* size */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMUnmap)( cl_command_queue /* command_queue */, void * /* svm_ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; #else typedef void *cl_api_clEnqueueSVMFree; typedef void *cl_api_clEnqueueSVMMemcpy; typedef void *cl_api_clEnqueueSVMMemFill; typedef void *cl_api_clEnqueueSVMMap; typedef void *cl_api_clEnqueueSVMUnmap; #endif // Deprecated APIs typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetCommandQueueProperty)( cl_command_queue command_queue, cl_command_queue_properties properties, cl_bool enable, cl_command_queue_properties *old_properties) CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED; typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImage2D)( cl_context context, cl_mem_flags flags, const cl_image_format *image_format, size_t image_width, size_t image_height, size_t image_row_pitch, void *host_ptr, cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImage3D)( cl_context context, cl_mem_flags flags, const cl_image_format *image_format, size_t image_width, size_t image_height, size_t image_depth, size_t image_row_pitch, size_t image_slice_pitch, void *host_ptr, cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clUnloadCompiler)(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueMarker)( cl_command_queue command_queue, cl_event *event) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWaitForEvents)( cl_command_queue command_queue, cl_uint num_events, const cl_event *event_list) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueBarrier)( cl_command_queue command_queue) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clGetExtensionFunctionAddress)( const char *function_name)CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; // GL and other APIs typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLBuffer)( cl_context context, cl_mem_flags flags, cl_GLuint bufobj, int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture)( cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture2D)( cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture3D)( cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLRenderbuffer)( cl_context context, cl_mem_flags flags, cl_GLuint renderbuffer, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetGLObjectInfo)( cl_mem memobj, cl_gl_object_type *gl_object_type, cl_GLuint *gl_object_name) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetGLTextureInfo)( cl_mem memobj, cl_gl_texture_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueAcquireGLObjects)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReleaseGLObjects)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; /* cl_khr_gl_sharing */ typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetGLContextInfoKHR)( const cl_context_properties *properties, cl_gl_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret); /* cl_khr_gl_event */ typedef CL_API_ENTRY cl_event(CL_API_CALL *cl_api_clCreateEventFromGLsyncKHR)( cl_context context, cl_GLsync sync, cl_int *errcode_ret); #if defined(_WIN32) /* cl_khr_d3d10_sharing */ typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromD3D10KHR)( cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source, void *d3d_object, cl_d3d10_device_set_khr d3d_device_set, cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10BufferKHR)( cl_context context, cl_mem_flags flags, ID3D10Buffer *resource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10Texture2DKHR)( cl_context context, cl_mem_flags flags, ID3D10Texture2D *resource, UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10Texture3DKHR)( cl_context context, cl_mem_flags flags, ID3D10Texture3D *resource, UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueAcquireD3D10ObjectsKHR)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReleaseD3D10ObjectsKHR)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromD3D10KHR( cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source, void *d3d_object, cl_d3d10_device_set_khr d3d_device_set, cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices); extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10BufferKHR(cl_context context, cl_mem_flags flags, ID3D10Buffer *resource, cl_int *errcode_ret); extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10Texture2DKHR( cl_context context, cl_mem_flags flags, ID3D10Texture2D *resource, UINT subresource, cl_int *errcode_ret); extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10Texture3DKHR( cl_context context, cl_mem_flags flags, ID3D10Texture3D *resource, UINT subresource, cl_int *errcode_ret); extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireD3D10ObjectsKHR( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event); extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D10ObjectsKHR( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event); /* cl_khr_d3d11_sharing */ typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromD3D11KHR)( cl_platform_id platform, cl_d3d11_device_source_khr d3d_device_source, void *d3d_object, cl_d3d11_device_set_khr d3d_device_set, cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11BufferKHR)( cl_context context, cl_mem_flags flags, ID3D11Buffer *resource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11Texture2DKHR)( cl_context context, cl_mem_flags flags, ID3D11Texture2D *resource, UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11Texture3DKHR)( cl_context context, cl_mem_flags flags, ID3D11Texture3D *resource, UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueAcquireD3D11ObjectsKHR)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReleaseD3D11ObjectsKHR)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_2; /* cl_khr_dx9_media_sharing */ typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromDX9MediaAdapterKHR)( cl_platform_id platform, cl_uint num_media_adapters, cl_dx9_media_adapter_type_khr *media_adapters_type, void *media_adapters, cl_dx9_media_adapter_set_khr media_adapter_set, cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromDX9MediaSurfaceKHR)( cl_context context, cl_mem_flags flags, cl_dx9_media_adapter_type_khr adapter_type, void *surface_info, cl_uint plane, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueAcquireDX9MediaSurfacesKHR)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReleaseDX9MediaSurfacesKHR)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_2; /* cl_khr_d3d11_sharing */ extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromD3D11KHR( cl_platform_id platform, cl_d3d11_device_source_khr d3d_device_source, void *d3d_object, cl_d3d11_device_set_khr d3d_device_set, cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices); extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11BufferKHR(cl_context context, cl_mem_flags flags, ID3D11Buffer *resource, cl_int *errcode_ret); extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11Texture2DKHR( cl_context context, cl_mem_flags flags, ID3D11Texture2D *resource, UINT subresource, cl_int *errcode_ret); extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11Texture3DKHR( cl_context context, cl_mem_flags flags, ID3D11Texture3D *resource, UINT subresource, cl_int *errcode_ret); extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireD3D11ObjectsKHR( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event); extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D11ObjectsKHR( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event); /* cl_khr_dx9_media_sharing */ extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromDX9MediaAdapterKHR( cl_platform_id platform, cl_uint num_media_adapters, cl_dx9_media_adapter_type_khr *media_adapter_type, void *media_adapters, cl_dx9_media_adapter_set_khr media_adapter_set, cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices); extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromDX9MediaSurfaceKHR( cl_context context, cl_mem_flags flags, cl_dx9_media_adapter_type_khr adapter_type, void *surface_info, cl_uint plane, cl_int *errcode_ret); extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireDX9MediaSurfacesKHR( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event); extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseDX9MediaSurfacesKHR( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event); #else /* cl_khr_d3d10_sharing */ typedef void *cl_api_clGetDeviceIDsFromD3D10KHR; typedef void *cl_api_clCreateFromD3D10BufferKHR; typedef void *cl_api_clCreateFromD3D10Texture2DKHR; typedef void *cl_api_clCreateFromD3D10Texture3DKHR; typedef void *cl_api_clEnqueueAcquireD3D10ObjectsKHR; typedef void *cl_api_clEnqueueReleaseD3D10ObjectsKHR; /* cl_khr_d3d11_sharing */ typedef void *cl_api_clGetDeviceIDsFromD3D11KHR; typedef void *cl_api_clCreateFromD3D11BufferKHR; typedef void *cl_api_clCreateFromD3D11Texture2DKHR; typedef void *cl_api_clCreateFromD3D11Texture3DKHR; typedef void *cl_api_clEnqueueAcquireD3D11ObjectsKHR; typedef void *cl_api_clEnqueueReleaseD3D11ObjectsKHR; /* cl_khr_dx9_media_sharing */ typedef void *cl_api_clCreateFromDX9MediaSurfaceKHR; typedef void *cl_api_clEnqueueAcquireDX9MediaSurfacesKHR; typedef void *cl_api_clEnqueueReleaseDX9MediaSurfacesKHR; typedef void *cl_api_clGetDeviceIDsFromDX9MediaAdapterKHR; #endif /* OpenCL 1.1 */ #ifdef CL_VERSION_1_1 typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetEventCallback)( cl_event /* event */, cl_int /* command_exec_callback_type */, void(CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), void * /* user_data */) CL_API_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateSubBuffer)( cl_mem /* buffer */, cl_mem_flags /* flags */, cl_buffer_create_type /* buffer_create_type */, const void * /* buffer_create_info */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetMemObjectDestructorCallback)( cl_mem /* memobj */, void(CL_CALLBACK * /*pfn_notify*/)(cl_mem /* memobj */, void * /*user_data*/), void * /*user_data */) CL_API_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_event(CL_API_CALL *cl_api_clCreateUserEvent)( cl_context /* context */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetUserEventStatus)( cl_event /* event */, cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1; #else typedef void *cl_api_clSetEventCallback; typedef void *cl_api_clCreateSubBuffer; typedef void *cl_api_clSetMemObjectDestructorCallback; typedef void *cl_api_clCreateUserEvent; typedef void *cl_api_clSetUserEventStatus; #endif typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCreateSubDevicesEXT)( cl_device_id in_device, const cl_device_partition_property_ext *partition_properties, cl_uint num_entries, cl_device_id *out_devices, cl_uint *num_devices); typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainDeviceEXT)( cl_device_id device) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseDeviceEXT)( cl_device_id device) CL_API_SUFFIX__VERSION_1_0; /* cl_khr_egl_image */ typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromEGLImageKHR)( cl_context context, CLeglDisplayKHR display, CLeglImageKHR image, cl_mem_flags flags, const cl_egl_image_properties_khr *properties, cl_int *errcode_ret); typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueAcquireEGLObjectsKHR)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event); typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReleaseEGLObjectsKHR)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event); /* cl_khr_egl_event */ typedef CL_API_ENTRY cl_event(CL_API_CALL *cl_api_clCreateEventFromEGLSyncKHR)( cl_context context, CLeglSyncKHR sync, CLeglDisplayKHR display, cl_int *errcode_ret); #ifdef CL_VERSION_2_1 typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetDefaultDeviceCommandQueue)( cl_context context, cl_device_id device, cl_command_queue command_queue) CL_API_SUFFIX__VERSION_2_1; typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clCreateProgramWithIL)( cl_context context, const void *il, size_t length, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_2_1; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelSubGroupInfo)( cl_kernel kernel, cl_device_id device, cl_kernel_sub_group_info param_name, size_t input_value_size, const void *input_value, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_2_1; typedef CL_API_ENTRY cl_kernel(CL_API_CALL *cl_api_clCloneKernel)( cl_kernel source_kernel, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_2_1; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMigrateMem)( cl_command_queue command_queue, cl_uint num_svm_pointers, const void **svm_pointers, const size_t *sizes, cl_mem_migration_flags flags, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_2_1; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceAndHostTimer)( cl_device_id device, cl_ulong *device_timestamp, cl_ulong *host_timestamp) CL_API_SUFFIX__VERSION_2_1; typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetHostTimer)( cl_device_id device, cl_ulong *host_timestamp) CL_API_SUFFIX__VERSION_2_1; #else typedef void *cl_api_clSetDefaultDeviceCommandQueue; typedef void *cl_api_clCreateProgramWithIL; typedef void *cl_api_clGetKernelSubGroupInfo; typedef void *cl_api_clCloneKernel; typedef void *cl_api_clEnqueueSVMMigrateMem; typedef void *cl_api_clGetDeviceAndHostTimer; typedef void *cl_api_clGetHostTimer; #endif /* Vendor dispatch table struture */ typedef struct _cl_icd_dispatch { /* OpenCL 1.0 */ cl_api_clGetPlatformIDs clGetPlatformIDs; cl_api_clGetPlatformInfo clGetPlatformInfo; cl_api_clGetDeviceIDs clGetDeviceIDs; cl_api_clGetDeviceInfo clGetDeviceInfo; cl_api_clCreateContext clCreateContext; cl_api_clCreateContextFromType clCreateContextFromType; cl_api_clRetainContext clRetainContext; cl_api_clReleaseContext clReleaseContext; cl_api_clGetContextInfo clGetContextInfo; cl_api_clCreateCommandQueue clCreateCommandQueue; cl_api_clRetainCommandQueue clRetainCommandQueue; cl_api_clReleaseCommandQueue clReleaseCommandQueue; cl_api_clGetCommandQueueInfo clGetCommandQueueInfo; cl_api_clSetCommandQueueProperty clSetCommandQueueProperty; cl_api_clCreateBuffer clCreateBuffer; cl_api_clCreateImage2D clCreateImage2D; cl_api_clCreateImage3D clCreateImage3D; cl_api_clRetainMemObject clRetainMemObject; cl_api_clReleaseMemObject clReleaseMemObject; cl_api_clGetSupportedImageFormats clGetSupportedImageFormats; cl_api_clGetMemObjectInfo clGetMemObjectInfo; cl_api_clGetImageInfo clGetImageInfo; cl_api_clCreateSampler clCreateSampler; cl_api_clRetainSampler clRetainSampler; cl_api_clReleaseSampler clReleaseSampler; cl_api_clGetSamplerInfo clGetSamplerInfo; cl_api_clCreateProgramWithSource clCreateProgramWithSource; cl_api_clCreateProgramWithBinary clCreateProgramWithBinary; cl_api_clRetainProgram clRetainProgram; cl_api_clReleaseProgram clReleaseProgram; cl_api_clBuildProgram clBuildProgram; cl_api_clUnloadCompiler clUnloadCompiler; cl_api_clGetProgramInfo clGetProgramInfo; cl_api_clGetProgramBuildInfo clGetProgramBuildInfo; cl_api_clCreateKernel clCreateKernel; cl_api_clCreateKernelsInProgram clCreateKernelsInProgram; cl_api_clRetainKernel clRetainKernel; cl_api_clReleaseKernel clReleaseKernel; cl_api_clSetKernelArg clSetKernelArg; cl_api_clGetKernelInfo clGetKernelInfo; cl_api_clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo; cl_api_clWaitForEvents clWaitForEvents; cl_api_clGetEventInfo clGetEventInfo; cl_api_clRetainEvent clRetainEvent; cl_api_clReleaseEvent clReleaseEvent; cl_api_clGetEventProfilingInfo clGetEventProfilingInfo; cl_api_clFlush clFlush; cl_api_clFinish clFinish; cl_api_clEnqueueReadBuffer clEnqueueReadBuffer; cl_api_clEnqueueWriteBuffer clEnqueueWriteBuffer; cl_api_clEnqueueCopyBuffer clEnqueueCopyBuffer; cl_api_clEnqueueReadImage clEnqueueReadImage; cl_api_clEnqueueWriteImage clEnqueueWriteImage; cl_api_clEnqueueCopyImage clEnqueueCopyImage; cl_api_clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer; cl_api_clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage; cl_api_clEnqueueMapBuffer clEnqueueMapBuffer; cl_api_clEnqueueMapImage clEnqueueMapImage; cl_api_clEnqueueUnmapMemObject clEnqueueUnmapMemObject; cl_api_clEnqueueNDRangeKernel clEnqueueNDRangeKernel; cl_api_clEnqueueTask clEnqueueTask; cl_api_clEnqueueNativeKernel clEnqueueNativeKernel; cl_api_clEnqueueMarker clEnqueueMarker; cl_api_clEnqueueWaitForEvents clEnqueueWaitForEvents; cl_api_clEnqueueBarrier clEnqueueBarrier; cl_api_clGetExtensionFunctionAddress clGetExtensionFunctionAddress; cl_api_clCreateFromGLBuffer clCreateFromGLBuffer; cl_api_clCreateFromGLTexture2D clCreateFromGLTexture2D; cl_api_clCreateFromGLTexture3D clCreateFromGLTexture3D; cl_api_clCreateFromGLRenderbuffer clCreateFromGLRenderbuffer; cl_api_clGetGLObjectInfo clGetGLObjectInfo; cl_api_clGetGLTextureInfo clGetGLTextureInfo; cl_api_clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects; cl_api_clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects; cl_api_clGetGLContextInfoKHR clGetGLContextInfoKHR; /* cl_khr_d3d10_sharing */ cl_api_clGetDeviceIDsFromD3D10KHR clGetDeviceIDsFromD3D10KHR; cl_api_clCreateFromD3D10BufferKHR clCreateFromD3D10BufferKHR; cl_api_clCreateFromD3D10Texture2DKHR clCreateFromD3D10Texture2DKHR; cl_api_clCreateFromD3D10Texture3DKHR clCreateFromD3D10Texture3DKHR; cl_api_clEnqueueAcquireD3D10ObjectsKHR clEnqueueAcquireD3D10ObjectsKHR; cl_api_clEnqueueReleaseD3D10ObjectsKHR clEnqueueReleaseD3D10ObjectsKHR; /* OpenCL 1.1 */ cl_api_clSetEventCallback clSetEventCallback; cl_api_clCreateSubBuffer clCreateSubBuffer; cl_api_clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback; cl_api_clCreateUserEvent clCreateUserEvent; cl_api_clSetUserEventStatus clSetUserEventStatus; cl_api_clEnqueueReadBufferRect clEnqueueReadBufferRect; cl_api_clEnqueueWriteBufferRect clEnqueueWriteBufferRect; cl_api_clEnqueueCopyBufferRect clEnqueueCopyBufferRect; /* cl_ext_device_fission */ cl_api_clCreateSubDevicesEXT clCreateSubDevicesEXT; cl_api_clRetainDeviceEXT clRetainDeviceEXT; cl_api_clReleaseDeviceEXT clReleaseDeviceEXT; /* cl_khr_gl_event */ cl_api_clCreateEventFromGLsyncKHR clCreateEventFromGLsyncKHR; /* OpenCL 1.2 */ cl_api_clCreateSubDevices clCreateSubDevices; cl_api_clRetainDevice clRetainDevice; cl_api_clReleaseDevice clReleaseDevice; cl_api_clCreateImage clCreateImage; cl_api_clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels; cl_api_clCompileProgram clCompileProgram; cl_api_clLinkProgram clLinkProgram; cl_api_clUnloadPlatformCompiler clUnloadPlatformCompiler; cl_api_clGetKernelArgInfo clGetKernelArgInfo; cl_api_clEnqueueFillBuffer clEnqueueFillBuffer; cl_api_clEnqueueFillImage clEnqueueFillImage; cl_api_clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects; cl_api_clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList; cl_api_clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList; cl_api_clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform; cl_api_clCreateFromGLTexture clCreateFromGLTexture; /* cl_khr_d3d11_sharing */ cl_api_clGetDeviceIDsFromD3D11KHR clGetDeviceIDsFromD3D11KHR; cl_api_clCreateFromD3D11BufferKHR clCreateFromD3D11BufferKHR; cl_api_clCreateFromD3D11Texture2DKHR clCreateFromD3D11Texture2DKHR; cl_api_clCreateFromD3D11Texture3DKHR clCreateFromD3D11Texture3DKHR; cl_api_clCreateFromDX9MediaSurfaceKHR clCreateFromDX9MediaSurfaceKHR; cl_api_clEnqueueAcquireD3D11ObjectsKHR clEnqueueAcquireD3D11ObjectsKHR; cl_api_clEnqueueReleaseD3D11ObjectsKHR clEnqueueReleaseD3D11ObjectsKHR; /* cl_khr_dx9_media_sharing */ cl_api_clGetDeviceIDsFromDX9MediaAdapterKHR clGetDeviceIDsFromDX9MediaAdapterKHR; cl_api_clEnqueueAcquireDX9MediaSurfacesKHR clEnqueueAcquireDX9MediaSurfacesKHR; cl_api_clEnqueueReleaseDX9MediaSurfacesKHR clEnqueueReleaseDX9MediaSurfacesKHR; /* cl_khr_egl_image */ cl_api_clCreateFromEGLImageKHR clCreateFromEGLImageKHR; cl_api_clEnqueueAcquireEGLObjectsKHR clEnqueueAcquireEGLObjectsKHR; cl_api_clEnqueueReleaseEGLObjectsKHR clEnqueueReleaseEGLObjectsKHR; /* cl_khr_egl_event */ cl_api_clCreateEventFromEGLSyncKHR clCreateEventFromEGLSyncKHR; /* OpenCL 2.0 */ cl_api_clCreateCommandQueueWithProperties clCreateCommandQueueWithProperties; cl_api_clCreatePipe clCreatePipe; cl_api_clGetPipeInfo clGetPipeInfo; cl_api_clSVMAlloc clSVMAlloc; cl_api_clSVMFree clSVMFree; cl_api_clEnqueueSVMFree clEnqueueSVMFree; cl_api_clEnqueueSVMMemcpy clEnqueueSVMMemcpy; cl_api_clEnqueueSVMMemFill clEnqueueSVMMemFill; cl_api_clEnqueueSVMMap clEnqueueSVMMap; cl_api_clEnqueueSVMUnmap clEnqueueSVMUnmap; cl_api_clCreateSamplerWithProperties clCreateSamplerWithProperties; cl_api_clSetKernelArgSVMPointer clSetKernelArgSVMPointer; cl_api_clSetKernelExecInfo clSetKernelExecInfo; /* cl_khr_sub_groups */ cl_api_clGetKernelSubGroupInfoKHR clGetKernelSubGroupInfoKHR; /* OpenCL 2.1 */ cl_api_clCloneKernel clCloneKernel; cl_api_clCreateProgramWithIL clCreateProgramWithIL; cl_api_clEnqueueSVMMigrateMem clEnqueueSVMMigrateMem; cl_api_clGetDeviceAndHostTimer clGetDeviceAndHostTimer; cl_api_clGetHostTimer clGetHostTimer; cl_api_clGetKernelSubGroupInfo clGetKernelSubGroupInfo; cl_api_clSetDefaultDeviceCommandQueue clSetDefaultDeviceCommandQueue; /* OpenCL 2.2 */ cl_api_clSetProgramReleaseCallback clSetProgramReleaseCallback; cl_api_clSetProgramSpecializationConstant clSetProgramSpecializationConstant; /* OpenCL 3.0 */ cl_api_clCreateBufferWithProperties clCreateBufferWithProperties; cl_api_clCreateImageWithProperties clCreateImageWithProperties; cl_api_clSetContextDestructorCallback clSetContextDestructorCallback; } cl_icd_dispatch; #ifdef __cplusplus } #endif #endif /* #ifndef OPENCL_CL_ICD_H */ Oclgrind-21.10/src/CL/cl_platform.h000066400000000000000000001222421413315665100170260ustar00rootroot00000000000000/******************************************************************************* * Copyright (c) 2008-2020 The Khronos Group Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ #ifndef __CL_PLATFORM_H #define __CL_PLATFORM_H #include #ifdef __cplusplus extern "C" { #endif #if defined(_WIN32) #define CL_API_ENTRY #define CL_API_CALL __stdcall #define CL_CALLBACK __stdcall #else #define CL_API_ENTRY #define CL_API_CALL #define CL_CALLBACK #endif /* * Deprecation flags refer to the last version of the header in which the * feature was not deprecated. * * E.g. VERSION_1_1_DEPRECATED means the feature is present in 1.1 without * deprecation but is deprecated in versions later than 1.1. */ #define CL_EXTENSION_WEAK_LINK #define CL_API_SUFFIX__VERSION_1_0 #define CL_EXT_SUFFIX__VERSION_1_0 #define CL_API_SUFFIX__VERSION_1_1 #define CL_EXT_SUFFIX__VERSION_1_1 #define CL_API_SUFFIX__VERSION_1_2 #define CL_EXT_SUFFIX__VERSION_1_2 #define CL_API_SUFFIX__VERSION_2_0 #define CL_EXT_SUFFIX__VERSION_2_0 #define CL_API_SUFFIX__VERSION_2_1 #define CL_EXT_SUFFIX__VERSION_2_1 #define CL_API_SUFFIX__VERSION_2_2 #define CL_EXT_SUFFIX__VERSION_2_2 #define CL_API_SUFFIX__VERSION_3_0 #define CL_EXT_SUFFIX__VERSION_3_0 #define CL_API_SUFFIX__EXPERIMENTAL #define CL_EXT_SUFFIX__EXPERIMENTAL #ifdef __GNUC__ #define CL_EXT_SUFFIX_DEPRECATED __attribute__((deprecated)) #define CL_EXT_PREFIX_DEPRECATED #elif defined(_WIN32) #define CL_EXT_SUFFIX_DEPRECATED #define CL_EXT_PREFIX_DEPRECATED __declspec(deprecated) #else #define CL_EXT_SUFFIX_DEPRECATED #define CL_EXT_PREFIX_DEPRECATED #endif #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED #else #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_EXT_SUFFIX_DEPRECATED #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED CL_EXT_PREFIX_DEPRECATED #endif #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED #else #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXT_SUFFIX_DEPRECATED #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED CL_EXT_PREFIX_DEPRECATED #endif #ifdef CL_USE_DEPRECATED_OPENCL_1_2_APIS #define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED #define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED #else #define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED CL_EXT_SUFFIX_DEPRECATED #define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED CL_EXT_PREFIX_DEPRECATED #endif #ifdef CL_USE_DEPRECATED_OPENCL_2_0_APIS #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED #else #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED CL_EXT_SUFFIX_DEPRECATED #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED CL_EXT_PREFIX_DEPRECATED #endif #ifdef CL_USE_DEPRECATED_OPENCL_2_1_APIS #define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED #define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED #else #define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED CL_EXT_SUFFIX_DEPRECATED #define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED CL_EXT_PREFIX_DEPRECATED #endif #ifdef CL_USE_DEPRECATED_OPENCL_2_2_APIS #define CL_EXT_SUFFIX__VERSION_2_2_DEPRECATED #define CL_EXT_PREFIX__VERSION_2_2_DEPRECATED #else #define CL_EXT_SUFFIX__VERSION_2_2_DEPRECATED CL_EXT_SUFFIX_DEPRECATED #define CL_EXT_PREFIX__VERSION_2_2_DEPRECATED CL_EXT_PREFIX_DEPRECATED #endif #if (defined (_WIN32) && defined(_MSC_VER)) /* scalar types */ typedef signed __int8 cl_char; typedef unsigned __int8 cl_uchar; typedef signed __int16 cl_short; typedef unsigned __int16 cl_ushort; typedef signed __int32 cl_int; typedef unsigned __int32 cl_uint; typedef signed __int64 cl_long; typedef unsigned __int64 cl_ulong; typedef unsigned __int16 cl_half; typedef float cl_float; typedef double cl_double; /* Macro names and corresponding values defined by OpenCL */ #define CL_CHAR_BIT 8 #define CL_SCHAR_MAX 127 #define CL_SCHAR_MIN (-127-1) #define CL_CHAR_MAX CL_SCHAR_MAX #define CL_CHAR_MIN CL_SCHAR_MIN #define CL_UCHAR_MAX 255 #define CL_SHRT_MAX 32767 #define CL_SHRT_MIN (-32767-1) #define CL_USHRT_MAX 65535 #define CL_INT_MAX 2147483647 #define CL_INT_MIN (-2147483647-1) #define CL_UINT_MAX 0xffffffffU #define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) #define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) #define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) #define CL_FLT_DIG 6 #define CL_FLT_MANT_DIG 24 #define CL_FLT_MAX_10_EXP +38 #define CL_FLT_MAX_EXP +128 #define CL_FLT_MIN_10_EXP -37 #define CL_FLT_MIN_EXP -125 #define CL_FLT_RADIX 2 #define CL_FLT_MAX 340282346638528859811704183484516925440.0f #define CL_FLT_MIN 1.175494350822287507969e-38f #define CL_FLT_EPSILON 1.1920928955078125e-7f #define CL_HALF_DIG 3 #define CL_HALF_MANT_DIG 11 #define CL_HALF_MAX_10_EXP +4 #define CL_HALF_MAX_EXP +16 #define CL_HALF_MIN_10_EXP -4 #define CL_HALF_MIN_EXP -13 #define CL_HALF_RADIX 2 #define CL_HALF_MAX 65504.0f #define CL_HALF_MIN 6.103515625e-05f #define CL_HALF_EPSILON 9.765625e-04f #define CL_DBL_DIG 15 #define CL_DBL_MANT_DIG 53 #define CL_DBL_MAX_10_EXP +308 #define CL_DBL_MAX_EXP +1024 #define CL_DBL_MIN_10_EXP -307 #define CL_DBL_MIN_EXP -1021 #define CL_DBL_RADIX 2 #define CL_DBL_MAX 1.7976931348623158e+308 #define CL_DBL_MIN 2.225073858507201383090e-308 #define CL_DBL_EPSILON 2.220446049250313080847e-16 #define CL_M_E 2.7182818284590452354 #define CL_M_LOG2E 1.4426950408889634074 #define CL_M_LOG10E 0.43429448190325182765 #define CL_M_LN2 0.69314718055994530942 #define CL_M_LN10 2.30258509299404568402 #define CL_M_PI 3.14159265358979323846 #define CL_M_PI_2 1.57079632679489661923 #define CL_M_PI_4 0.78539816339744830962 #define CL_M_1_PI 0.31830988618379067154 #define CL_M_2_PI 0.63661977236758134308 #define CL_M_2_SQRTPI 1.12837916709551257390 #define CL_M_SQRT2 1.41421356237309504880 #define CL_M_SQRT1_2 0.70710678118654752440 #define CL_M_E_F 2.718281828f #define CL_M_LOG2E_F 1.442695041f #define CL_M_LOG10E_F 0.434294482f #define CL_M_LN2_F 0.693147181f #define CL_M_LN10_F 2.302585093f #define CL_M_PI_F 3.141592654f #define CL_M_PI_2_F 1.570796327f #define CL_M_PI_4_F 0.785398163f #define CL_M_1_PI_F 0.318309886f #define CL_M_2_PI_F 0.636619772f #define CL_M_2_SQRTPI_F 1.128379167f #define CL_M_SQRT2_F 1.414213562f #define CL_M_SQRT1_2_F 0.707106781f #define CL_NAN (CL_INFINITY - CL_INFINITY) #define CL_HUGE_VALF ((cl_float) 1e50) #define CL_HUGE_VAL ((cl_double) 1e500) #define CL_MAXFLOAT CL_FLT_MAX #define CL_INFINITY CL_HUGE_VALF #else #include /* scalar types */ typedef int8_t cl_char; typedef uint8_t cl_uchar; typedef int16_t cl_short; typedef uint16_t cl_ushort; typedef int32_t cl_int; typedef uint32_t cl_uint; typedef int64_t cl_long; typedef uint64_t cl_ulong; typedef uint16_t cl_half; typedef float cl_float; typedef double cl_double; /* Macro names and corresponding values defined by OpenCL */ #define CL_CHAR_BIT 8 #define CL_SCHAR_MAX 127 #define CL_SCHAR_MIN (-127-1) #define CL_CHAR_MAX CL_SCHAR_MAX #define CL_CHAR_MIN CL_SCHAR_MIN #define CL_UCHAR_MAX 255 #define CL_SHRT_MAX 32767 #define CL_SHRT_MIN (-32767-1) #define CL_USHRT_MAX 65535 #define CL_INT_MAX 2147483647 #define CL_INT_MIN (-2147483647-1) #define CL_UINT_MAX 0xffffffffU #define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) #define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) #define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) #define CL_FLT_DIG 6 #define CL_FLT_MANT_DIG 24 #define CL_FLT_MAX_10_EXP +38 #define CL_FLT_MAX_EXP +128 #define CL_FLT_MIN_10_EXP -37 #define CL_FLT_MIN_EXP -125 #define CL_FLT_RADIX 2 #define CL_FLT_MAX 340282346638528859811704183484516925440.0f #define CL_FLT_MIN 1.175494350822287507969e-38f #define CL_FLT_EPSILON 1.1920928955078125e-7f #define CL_HALF_DIG 3 #define CL_HALF_MANT_DIG 11 #define CL_HALF_MAX_10_EXP +4 #define CL_HALF_MAX_EXP +16 #define CL_HALF_MIN_10_EXP -4 #define CL_HALF_MIN_EXP -13 #define CL_HALF_RADIX 2 #define CL_HALF_MAX 65504.0f #define CL_HALF_MIN 6.103515625e-05f #define CL_HALF_EPSILON 9.765625e-04f #define CL_DBL_DIG 15 #define CL_DBL_MANT_DIG 53 #define CL_DBL_MAX_10_EXP +308 #define CL_DBL_MAX_EXP +1024 #define CL_DBL_MIN_10_EXP -307 #define CL_DBL_MIN_EXP -1021 #define CL_DBL_RADIX 2 #define CL_DBL_MAX 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0 #define CL_DBL_MIN 2.225073858507201383090e-308 #define CL_DBL_EPSILON 2.220446049250313080847e-16 #define CL_M_E 2.7182818284590452354 #define CL_M_LOG2E 1.4426950408889634074 #define CL_M_LOG10E 0.43429448190325182765 #define CL_M_LN2 0.69314718055994530942 #define CL_M_LN10 2.30258509299404568402 #define CL_M_PI 3.14159265358979323846 #define CL_M_PI_2 1.57079632679489661923 #define CL_M_PI_4 0.78539816339744830962 #define CL_M_1_PI 0.31830988618379067154 #define CL_M_2_PI 0.63661977236758134308 #define CL_M_2_SQRTPI 1.12837916709551257390 #define CL_M_SQRT2 1.41421356237309504880 #define CL_M_SQRT1_2 0.70710678118654752440 #define CL_M_E_F 2.718281828f #define CL_M_LOG2E_F 1.442695041f #define CL_M_LOG10E_F 0.434294482f #define CL_M_LN2_F 0.693147181f #define CL_M_LN10_F 2.302585093f #define CL_M_PI_F 3.141592654f #define CL_M_PI_2_F 1.570796327f #define CL_M_PI_4_F 0.785398163f #define CL_M_1_PI_F 0.318309886f #define CL_M_2_PI_F 0.636619772f #define CL_M_2_SQRTPI_F 1.128379167f #define CL_M_SQRT2_F 1.414213562f #define CL_M_SQRT1_2_F 0.707106781f #if defined( __GNUC__ ) #define CL_HUGE_VALF __builtin_huge_valf() #define CL_HUGE_VAL __builtin_huge_val() #define CL_NAN __builtin_nanf( "" ) #else #define CL_HUGE_VALF ((cl_float) 1e50) #define CL_HUGE_VAL ((cl_double) 1e500) float nanf( const char * ); #define CL_NAN nanf( "" ) #endif #define CL_MAXFLOAT CL_FLT_MAX #define CL_INFINITY CL_HUGE_VALF #endif #include /* Mirror types to GL types. Mirror types allow us to avoid deciding which 87s to load based on whether we are using GL or GLES here. */ typedef unsigned int cl_GLuint; typedef int cl_GLint; typedef unsigned int cl_GLenum; /* * Vector types * * Note: OpenCL requires that all types be naturally aligned. * This means that vector types must be naturally aligned. * For example, a vector of four floats must be aligned to * a 16 byte boundary (calculated as 4 * the natural 4-byte * alignment of the float). The alignment qualifiers here * will only function properly if your compiler supports them * and if you don't actively work to defeat them. For example, * in order for a cl_float4 to be 16 byte aligned in a struct, * the start of the struct must itself be 16-byte aligned. * * Maintaining proper alignment is the user's responsibility. */ /* Define basic vector types */ #if defined( __VEC__ ) #if !defined(__clang__) #include /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */ #endif typedef __vector unsigned char __cl_uchar16; typedef __vector signed char __cl_char16; typedef __vector unsigned short __cl_ushort8; typedef __vector signed short __cl_short8; typedef __vector unsigned int __cl_uint4; typedef __vector signed int __cl_int4; typedef __vector float __cl_float4; #define __CL_UCHAR16__ 1 #define __CL_CHAR16__ 1 #define __CL_USHORT8__ 1 #define __CL_SHORT8__ 1 #define __CL_UINT4__ 1 #define __CL_INT4__ 1 #define __CL_FLOAT4__ 1 #endif #if defined( __SSE__ ) #if defined( __MINGW64__ ) #include #else #include #endif #if defined( __GNUC__ ) typedef float __cl_float4 __attribute__((vector_size(16))); #else typedef __m128 __cl_float4; #endif #define __CL_FLOAT4__ 1 #endif #if defined( __SSE2__ ) #if defined( __MINGW64__ ) #include #else #include #endif #if defined( __GNUC__ ) typedef cl_uchar __cl_uchar16 __attribute__((vector_size(16))); typedef cl_char __cl_char16 __attribute__((vector_size(16))); typedef cl_ushort __cl_ushort8 __attribute__((vector_size(16))); typedef cl_short __cl_short8 __attribute__((vector_size(16))); typedef cl_uint __cl_uint4 __attribute__((vector_size(16))); typedef cl_int __cl_int4 __attribute__((vector_size(16))); typedef cl_ulong __cl_ulong2 __attribute__((vector_size(16))); typedef cl_long __cl_long2 __attribute__((vector_size(16))); typedef cl_double __cl_double2 __attribute__((vector_size(16))); #else typedef __m128i __cl_uchar16; typedef __m128i __cl_char16; typedef __m128i __cl_ushort8; typedef __m128i __cl_short8; typedef __m128i __cl_uint4; typedef __m128i __cl_int4; typedef __m128i __cl_ulong2; typedef __m128i __cl_long2; typedef __m128d __cl_double2; #endif #define __CL_UCHAR16__ 1 #define __CL_CHAR16__ 1 #define __CL_USHORT8__ 1 #define __CL_SHORT8__ 1 #define __CL_INT4__ 1 #define __CL_UINT4__ 1 #define __CL_ULONG2__ 1 #define __CL_LONG2__ 1 #define __CL_DOUBLE2__ 1 #endif #if defined( __MMX__ ) #include #if defined( __GNUC__ ) typedef cl_uchar __cl_uchar8 __attribute__((vector_size(8))); typedef cl_char __cl_char8 __attribute__((vector_size(8))); typedef cl_ushort __cl_ushort4 __attribute__((vector_size(8))); typedef cl_short __cl_short4 __attribute__((vector_size(8))); typedef cl_uint __cl_uint2 __attribute__((vector_size(8))); typedef cl_int __cl_int2 __attribute__((vector_size(8))); typedef cl_ulong __cl_ulong1 __attribute__((vector_size(8))); typedef cl_long __cl_long1 __attribute__((vector_size(8))); typedef cl_float __cl_float2 __attribute__((vector_size(8))); #else typedef __m64 __cl_uchar8; typedef __m64 __cl_char8; typedef __m64 __cl_ushort4; typedef __m64 __cl_short4; typedef __m64 __cl_uint2; typedef __m64 __cl_int2; typedef __m64 __cl_ulong1; typedef __m64 __cl_long1; typedef __m64 __cl_float2; #endif #define __CL_UCHAR8__ 1 #define __CL_CHAR8__ 1 #define __CL_USHORT4__ 1 #define __CL_SHORT4__ 1 #define __CL_INT2__ 1 #define __CL_UINT2__ 1 #define __CL_ULONG1__ 1 #define __CL_LONG1__ 1 #define __CL_FLOAT2__ 1 #endif #if defined( __AVX__ ) #if defined( __MINGW64__ ) #include #else #include #endif #if defined( __GNUC__ ) typedef cl_float __cl_float8 __attribute__((vector_size(32))); typedef cl_double __cl_double4 __attribute__((vector_size(32))); #else typedef __m256 __cl_float8; typedef __m256d __cl_double4; #endif #define __CL_FLOAT8__ 1 #define __CL_DOUBLE4__ 1 #endif /* Define capabilities for anonymous struct members. */ #if !defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define __CL_HAS_ANON_STRUCT__ 1 #define __CL_ANON_STRUCT__ #elif defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) #define __CL_HAS_ANON_STRUCT__ 1 #define __CL_ANON_STRUCT__ __extension__ #elif defined( _WIN32) && defined(_MSC_VER) && ! defined(__STDC__) #if _MSC_VER >= 1500 /* Microsoft Developer Studio 2008 supports anonymous structs, but * complains by default. */ #define __CL_HAS_ANON_STRUCT__ 1 #define __CL_ANON_STRUCT__ /* Disable warning C4201: nonstandard extension used : nameless * struct/union */ #pragma warning( push ) #pragma warning( disable : 4201 ) #endif #else #define __CL_HAS_ANON_STRUCT__ 0 #define __CL_ANON_STRUCT__ #endif /* Define alignment keys */ #if defined( __GNUC__ ) || defined(__INTEGRITY) #define CL_ALIGNED(_x) __attribute__ ((aligned(_x))) #elif defined( _WIN32) && (_MSC_VER) /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements */ /* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx */ /* #include */ /* #define CL_ALIGNED(_x) _CRT_ALIGN(_x) */ #define CL_ALIGNED(_x) #else #warning Need to implement some method to align data here #define CL_ALIGNED(_x) #endif /* Indicate whether .xyzw, .s0123 and .hi.lo are supported */ #if __CL_HAS_ANON_STRUCT__ /* .xyzw and .s0123...{f|F} are supported */ #define CL_HAS_NAMED_VECTOR_FIELDS 1 /* .hi and .lo are supported */ #define CL_HAS_HI_LO_VECTOR_FIELDS 1 #endif /* Define cl_vector types */ /* ---- cl_charn ---- */ typedef union { cl_char CL_ALIGNED(2) s[2]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_char x, y; }; __CL_ANON_STRUCT__ struct{ cl_char s0, s1; }; __CL_ANON_STRUCT__ struct{ cl_char lo, hi; }; #endif #if defined( __CL_CHAR2__) __cl_char2 v2; #endif }cl_char2; typedef union { cl_char CL_ALIGNED(4) s[4]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3; }; __CL_ANON_STRUCT__ struct{ cl_char2 lo, hi; }; #endif #if defined( __CL_CHAR2__) __cl_char2 v2[2]; #endif #if defined( __CL_CHAR4__) __cl_char4 v4; #endif }cl_char4; /* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */ typedef cl_char4 cl_char3; typedef union { cl_char CL_ALIGNED(8) s[8]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7; }; __CL_ANON_STRUCT__ struct{ cl_char4 lo, hi; }; #endif #if defined( __CL_CHAR2__) __cl_char2 v2[4]; #endif #if defined( __CL_CHAR4__) __cl_char4 v4[2]; #endif #if defined( __CL_CHAR8__ ) __cl_char8 v8; #endif }cl_char8; typedef union { cl_char CL_ALIGNED(16) s[16]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __CL_ANON_STRUCT__ struct{ cl_char8 lo, hi; }; #endif #if defined( __CL_CHAR2__) __cl_char2 v2[8]; #endif #if defined( __CL_CHAR4__) __cl_char4 v4[4]; #endif #if defined( __CL_CHAR8__ ) __cl_char8 v8[2]; #endif #if defined( __CL_CHAR16__ ) __cl_char16 v16; #endif }cl_char16; /* ---- cl_ucharn ---- */ typedef union { cl_uchar CL_ALIGNED(2) s[2]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_uchar x, y; }; __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1; }; __CL_ANON_STRUCT__ struct{ cl_uchar lo, hi; }; #endif #if defined( __cl_uchar2__) __cl_uchar2 v2; #endif }cl_uchar2; typedef union { cl_uchar CL_ALIGNED(4) s[4]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3; }; __CL_ANON_STRUCT__ struct{ cl_uchar2 lo, hi; }; #endif #if defined( __CL_UCHAR2__) __cl_uchar2 v2[2]; #endif #if defined( __CL_UCHAR4__) __cl_uchar4 v4; #endif }cl_uchar4; /* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */ typedef cl_uchar4 cl_uchar3; typedef union { cl_uchar CL_ALIGNED(8) s[8]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7; }; __CL_ANON_STRUCT__ struct{ cl_uchar4 lo, hi; }; #endif #if defined( __CL_UCHAR2__) __cl_uchar2 v2[4]; #endif #if defined( __CL_UCHAR4__) __cl_uchar4 v4[2]; #endif #if defined( __CL_UCHAR8__ ) __cl_uchar8 v8; #endif }cl_uchar8; typedef union { cl_uchar CL_ALIGNED(16) s[16]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __CL_ANON_STRUCT__ struct{ cl_uchar8 lo, hi; }; #endif #if defined( __CL_UCHAR2__) __cl_uchar2 v2[8]; #endif #if defined( __CL_UCHAR4__) __cl_uchar4 v4[4]; #endif #if defined( __CL_UCHAR8__ ) __cl_uchar8 v8[2]; #endif #if defined( __CL_UCHAR16__ ) __cl_uchar16 v16; #endif }cl_uchar16; /* ---- cl_shortn ---- */ typedef union { cl_short CL_ALIGNED(4) s[2]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_short x, y; }; __CL_ANON_STRUCT__ struct{ cl_short s0, s1; }; __CL_ANON_STRUCT__ struct{ cl_short lo, hi; }; #endif #if defined( __CL_SHORT2__) __cl_short2 v2; #endif }cl_short2; typedef union { cl_short CL_ALIGNED(8) s[4]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3; }; __CL_ANON_STRUCT__ struct{ cl_short2 lo, hi; }; #endif #if defined( __CL_SHORT2__) __cl_short2 v2[2]; #endif #if defined( __CL_SHORT4__) __cl_short4 v4; #endif }cl_short4; /* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */ typedef cl_short4 cl_short3; typedef union { cl_short CL_ALIGNED(16) s[8]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7; }; __CL_ANON_STRUCT__ struct{ cl_short4 lo, hi; }; #endif #if defined( __CL_SHORT2__) __cl_short2 v2[4]; #endif #if defined( __CL_SHORT4__) __cl_short4 v4[2]; #endif #if defined( __CL_SHORT8__ ) __cl_short8 v8; #endif }cl_short8; typedef union { cl_short CL_ALIGNED(32) s[16]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __CL_ANON_STRUCT__ struct{ cl_short8 lo, hi; }; #endif #if defined( __CL_SHORT2__) __cl_short2 v2[8]; #endif #if defined( __CL_SHORT4__) __cl_short4 v4[4]; #endif #if defined( __CL_SHORT8__ ) __cl_short8 v8[2]; #endif #if defined( __CL_SHORT16__ ) __cl_short16 v16; #endif }cl_short16; /* ---- cl_ushortn ---- */ typedef union { cl_ushort CL_ALIGNED(4) s[2]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_ushort x, y; }; __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1; }; __CL_ANON_STRUCT__ struct{ cl_ushort lo, hi; }; #endif #if defined( __CL_USHORT2__) __cl_ushort2 v2; #endif }cl_ushort2; typedef union { cl_ushort CL_ALIGNED(8) s[4]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3; }; __CL_ANON_STRUCT__ struct{ cl_ushort2 lo, hi; }; #endif #if defined( __CL_USHORT2__) __cl_ushort2 v2[2]; #endif #if defined( __CL_USHORT4__) __cl_ushort4 v4; #endif }cl_ushort4; /* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */ typedef cl_ushort4 cl_ushort3; typedef union { cl_ushort CL_ALIGNED(16) s[8]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7; }; __CL_ANON_STRUCT__ struct{ cl_ushort4 lo, hi; }; #endif #if defined( __CL_USHORT2__) __cl_ushort2 v2[4]; #endif #if defined( __CL_USHORT4__) __cl_ushort4 v4[2]; #endif #if defined( __CL_USHORT8__ ) __cl_ushort8 v8; #endif }cl_ushort8; typedef union { cl_ushort CL_ALIGNED(32) s[16]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __CL_ANON_STRUCT__ struct{ cl_ushort8 lo, hi; }; #endif #if defined( __CL_USHORT2__) __cl_ushort2 v2[8]; #endif #if defined( __CL_USHORT4__) __cl_ushort4 v4[4]; #endif #if defined( __CL_USHORT8__ ) __cl_ushort8 v8[2]; #endif #if defined( __CL_USHORT16__ ) __cl_ushort16 v16; #endif }cl_ushort16; /* ---- cl_halfn ---- */ typedef union { cl_half CL_ALIGNED(4) s[2]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_half x, y; }; __CL_ANON_STRUCT__ struct{ cl_half s0, s1; }; __CL_ANON_STRUCT__ struct{ cl_half lo, hi; }; #endif #if defined( __CL_HALF2__) __cl_half2 v2; #endif }cl_half2; typedef union { cl_half CL_ALIGNED(8) s[4]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_half x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_half s0, s1, s2, s3; }; __CL_ANON_STRUCT__ struct{ cl_half2 lo, hi; }; #endif #if defined( __CL_HALF2__) __cl_half2 v2[2]; #endif #if defined( __CL_HALF4__) __cl_half4 v4; #endif }cl_half4; /* cl_half3 is identical in size, alignment and behavior to cl_half4. See section 6.1.5. */ typedef cl_half4 cl_half3; typedef union { cl_half CL_ALIGNED(16) s[8]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_half x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_half s0, s1, s2, s3, s4, s5, s6, s7; }; __CL_ANON_STRUCT__ struct{ cl_half4 lo, hi; }; #endif #if defined( __CL_HALF2__) __cl_half2 v2[4]; #endif #if defined( __CL_HALF4__) __cl_half4 v4[2]; #endif #if defined( __CL_HALF8__ ) __cl_half8 v8; #endif }cl_half8; typedef union { cl_half CL_ALIGNED(32) s[16]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_half x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __CL_ANON_STRUCT__ struct{ cl_half s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __CL_ANON_STRUCT__ struct{ cl_half8 lo, hi; }; #endif #if defined( __CL_HALF2__) __cl_half2 v2[8]; #endif #if defined( __CL_HALF4__) __cl_half4 v4[4]; #endif #if defined( __CL_HALF8__ ) __cl_half8 v8[2]; #endif #if defined( __CL_HALF16__ ) __cl_half16 v16; #endif }cl_half16; /* ---- cl_intn ---- */ typedef union { cl_int CL_ALIGNED(8) s[2]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_int x, y; }; __CL_ANON_STRUCT__ struct{ cl_int s0, s1; }; __CL_ANON_STRUCT__ struct{ cl_int lo, hi; }; #endif #if defined( __CL_INT2__) __cl_int2 v2; #endif }cl_int2; typedef union { cl_int CL_ALIGNED(16) s[4]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3; }; __CL_ANON_STRUCT__ struct{ cl_int2 lo, hi; }; #endif #if defined( __CL_INT2__) __cl_int2 v2[2]; #endif #if defined( __CL_INT4__) __cl_int4 v4; #endif }cl_int4; /* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */ typedef cl_int4 cl_int3; typedef union { cl_int CL_ALIGNED(32) s[8]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7; }; __CL_ANON_STRUCT__ struct{ cl_int4 lo, hi; }; #endif #if defined( __CL_INT2__) __cl_int2 v2[4]; #endif #if defined( __CL_INT4__) __cl_int4 v4[2]; #endif #if defined( __CL_INT8__ ) __cl_int8 v8; #endif }cl_int8; typedef union { cl_int CL_ALIGNED(64) s[16]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __CL_ANON_STRUCT__ struct{ cl_int8 lo, hi; }; #endif #if defined( __CL_INT2__) __cl_int2 v2[8]; #endif #if defined( __CL_INT4__) __cl_int4 v4[4]; #endif #if defined( __CL_INT8__ ) __cl_int8 v8[2]; #endif #if defined( __CL_INT16__ ) __cl_int16 v16; #endif }cl_int16; /* ---- cl_uintn ---- */ typedef union { cl_uint CL_ALIGNED(8) s[2]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_uint x, y; }; __CL_ANON_STRUCT__ struct{ cl_uint s0, s1; }; __CL_ANON_STRUCT__ struct{ cl_uint lo, hi; }; #endif #if defined( __CL_UINT2__) __cl_uint2 v2; #endif }cl_uint2; typedef union { cl_uint CL_ALIGNED(16) s[4]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3; }; __CL_ANON_STRUCT__ struct{ cl_uint2 lo, hi; }; #endif #if defined( __CL_UINT2__) __cl_uint2 v2[2]; #endif #if defined( __CL_UINT4__) __cl_uint4 v4; #endif }cl_uint4; /* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */ typedef cl_uint4 cl_uint3; typedef union { cl_uint CL_ALIGNED(32) s[8]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7; }; __CL_ANON_STRUCT__ struct{ cl_uint4 lo, hi; }; #endif #if defined( __CL_UINT2__) __cl_uint2 v2[4]; #endif #if defined( __CL_UINT4__) __cl_uint4 v4[2]; #endif #if defined( __CL_UINT8__ ) __cl_uint8 v8; #endif }cl_uint8; typedef union { cl_uint CL_ALIGNED(64) s[16]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __CL_ANON_STRUCT__ struct{ cl_uint8 lo, hi; }; #endif #if defined( __CL_UINT2__) __cl_uint2 v2[8]; #endif #if defined( __CL_UINT4__) __cl_uint4 v4[4]; #endif #if defined( __CL_UINT8__ ) __cl_uint8 v8[2]; #endif #if defined( __CL_UINT16__ ) __cl_uint16 v16; #endif }cl_uint16; /* ---- cl_longn ---- */ typedef union { cl_long CL_ALIGNED(16) s[2]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_long x, y; }; __CL_ANON_STRUCT__ struct{ cl_long s0, s1; }; __CL_ANON_STRUCT__ struct{ cl_long lo, hi; }; #endif #if defined( __CL_LONG2__) __cl_long2 v2; #endif }cl_long2; typedef union { cl_long CL_ALIGNED(32) s[4]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3; }; __CL_ANON_STRUCT__ struct{ cl_long2 lo, hi; }; #endif #if defined( __CL_LONG2__) __cl_long2 v2[2]; #endif #if defined( __CL_LONG4__) __cl_long4 v4; #endif }cl_long4; /* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */ typedef cl_long4 cl_long3; typedef union { cl_long CL_ALIGNED(64) s[8]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7; }; __CL_ANON_STRUCT__ struct{ cl_long4 lo, hi; }; #endif #if defined( __CL_LONG2__) __cl_long2 v2[4]; #endif #if defined( __CL_LONG4__) __cl_long4 v4[2]; #endif #if defined( __CL_LONG8__ ) __cl_long8 v8; #endif }cl_long8; typedef union { cl_long CL_ALIGNED(128) s[16]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __CL_ANON_STRUCT__ struct{ cl_long8 lo, hi; }; #endif #if defined( __CL_LONG2__) __cl_long2 v2[8]; #endif #if defined( __CL_LONG4__) __cl_long4 v4[4]; #endif #if defined( __CL_LONG8__ ) __cl_long8 v8[2]; #endif #if defined( __CL_LONG16__ ) __cl_long16 v16; #endif }cl_long16; /* ---- cl_ulongn ---- */ typedef union { cl_ulong CL_ALIGNED(16) s[2]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_ulong x, y; }; __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1; }; __CL_ANON_STRUCT__ struct{ cl_ulong lo, hi; }; #endif #if defined( __CL_ULONG2__) __cl_ulong2 v2; #endif }cl_ulong2; typedef union { cl_ulong CL_ALIGNED(32) s[4]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3; }; __CL_ANON_STRUCT__ struct{ cl_ulong2 lo, hi; }; #endif #if defined( __CL_ULONG2__) __cl_ulong2 v2[2]; #endif #if defined( __CL_ULONG4__) __cl_ulong4 v4; #endif }cl_ulong4; /* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */ typedef cl_ulong4 cl_ulong3; typedef union { cl_ulong CL_ALIGNED(64) s[8]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7; }; __CL_ANON_STRUCT__ struct{ cl_ulong4 lo, hi; }; #endif #if defined( __CL_ULONG2__) __cl_ulong2 v2[4]; #endif #if defined( __CL_ULONG4__) __cl_ulong4 v4[2]; #endif #if defined( __CL_ULONG8__ ) __cl_ulong8 v8; #endif }cl_ulong8; typedef union { cl_ulong CL_ALIGNED(128) s[16]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __CL_ANON_STRUCT__ struct{ cl_ulong8 lo, hi; }; #endif #if defined( __CL_ULONG2__) __cl_ulong2 v2[8]; #endif #if defined( __CL_ULONG4__) __cl_ulong4 v4[4]; #endif #if defined( __CL_ULONG8__ ) __cl_ulong8 v8[2]; #endif #if defined( __CL_ULONG16__ ) __cl_ulong16 v16; #endif }cl_ulong16; /* --- cl_floatn ---- */ typedef union { cl_float CL_ALIGNED(8) s[2]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_float x, y; }; __CL_ANON_STRUCT__ struct{ cl_float s0, s1; }; __CL_ANON_STRUCT__ struct{ cl_float lo, hi; }; #endif #if defined( __CL_FLOAT2__) __cl_float2 v2; #endif }cl_float2; typedef union { cl_float CL_ALIGNED(16) s[4]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3; }; __CL_ANON_STRUCT__ struct{ cl_float2 lo, hi; }; #endif #if defined( __CL_FLOAT2__) __cl_float2 v2[2]; #endif #if defined( __CL_FLOAT4__) __cl_float4 v4; #endif }cl_float4; /* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */ typedef cl_float4 cl_float3; typedef union { cl_float CL_ALIGNED(32) s[8]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7; }; __CL_ANON_STRUCT__ struct{ cl_float4 lo, hi; }; #endif #if defined( __CL_FLOAT2__) __cl_float2 v2[4]; #endif #if defined( __CL_FLOAT4__) __cl_float4 v4[2]; #endif #if defined( __CL_FLOAT8__ ) __cl_float8 v8; #endif }cl_float8; typedef union { cl_float CL_ALIGNED(64) s[16]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __CL_ANON_STRUCT__ struct{ cl_float8 lo, hi; }; #endif #if defined( __CL_FLOAT2__) __cl_float2 v2[8]; #endif #if defined( __CL_FLOAT4__) __cl_float4 v4[4]; #endif #if defined( __CL_FLOAT8__ ) __cl_float8 v8[2]; #endif #if defined( __CL_FLOAT16__ ) __cl_float16 v16; #endif }cl_float16; /* --- cl_doublen ---- */ typedef union { cl_double CL_ALIGNED(16) s[2]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_double x, y; }; __CL_ANON_STRUCT__ struct{ cl_double s0, s1; }; __CL_ANON_STRUCT__ struct{ cl_double lo, hi; }; #endif #if defined( __CL_DOUBLE2__) __cl_double2 v2; #endif }cl_double2; typedef union { cl_double CL_ALIGNED(32) s[4]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3; }; __CL_ANON_STRUCT__ struct{ cl_double2 lo, hi; }; #endif #if defined( __CL_DOUBLE2__) __cl_double2 v2[2]; #endif #if defined( __CL_DOUBLE4__) __cl_double4 v4; #endif }cl_double4; /* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */ typedef cl_double4 cl_double3; typedef union { cl_double CL_ALIGNED(64) s[8]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7; }; __CL_ANON_STRUCT__ struct{ cl_double4 lo, hi; }; #endif #if defined( __CL_DOUBLE2__) __cl_double2 v2[4]; #endif #if defined( __CL_DOUBLE4__) __cl_double4 v4[2]; #endif #if defined( __CL_DOUBLE8__ ) __cl_double8 v8; #endif }cl_double8; typedef union { cl_double CL_ALIGNED(128) s[16]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __CL_ANON_STRUCT__ struct{ cl_double8 lo, hi; }; #endif #if defined( __CL_DOUBLE2__) __cl_double2 v2[8]; #endif #if defined( __CL_DOUBLE4__) __cl_double4 v4[4]; #endif #if defined( __CL_DOUBLE8__ ) __cl_double8 v8[2]; #endif #if defined( __CL_DOUBLE16__ ) __cl_double16 v16; #endif }cl_double16; /* Macro to facilitate debugging * Usage: * Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source. * The first line ends with: CL_PROGRAM_STRING_DEBUG_INFO \" * Each line thereafter of OpenCL C source must end with: \n\ * The last line ends in "; * * Example: * * const char *my_program = CL_PROGRAM_STRING_DEBUG_INFO "\ * kernel void foo( int a, float * b ) \n\ * { \n\ * // my comment \n\ * *b[ get_global_id(0)] = a; \n\ * } \n\ * "; * * This should correctly set up the line, (column) and file information for your source * string so you can do source level debugging. */ #define __CL_STRINGIFY( _x ) # _x #define _CL_STRINGIFY( _x ) __CL_STRINGIFY( _x ) #define CL_PROGRAM_STRING_DEBUG_INFO "#line " _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n" #ifdef __cplusplus } #endif #if defined( _WIN32) && defined(_MSC_VER) && ! defined(__STDC__) #if _MSC_VER >=1500 #pragma warning( pop ) #endif #endif #endif /* __CL_PLATFORM_H */ Oclgrind-21.10/src/CL/cl_va_api_media_sharing_intel.h000066400000000000000000000145751413315665100225170ustar00rootroot00000000000000/******************************************************************************* * Copyright (c) 2008-2020 The Khronos Group Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ /*****************************************************************************\ Copyright (c) 2013-2019 Intel Corporation All Rights Reserved. THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. File Name: cl_va_api_media_sharing_intel.h Abstract: Notes: \*****************************************************************************/ #ifndef __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H #define __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H #include #include #include #ifdef __cplusplus extern "C" { #endif /****************************************** * cl_intel_va_api_media_sharing extension * *******************************************/ #define cl_intel_va_api_media_sharing 1 /* error codes */ #define CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL -1098 #define CL_INVALID_VA_API_MEDIA_SURFACE_INTEL -1099 #define CL_VA_API_MEDIA_SURFACE_ALREADY_ACQUIRED_INTEL -1100 #define CL_VA_API_MEDIA_SURFACE_NOT_ACQUIRED_INTEL -1101 /* cl_va_api_device_source_intel */ #define CL_VA_API_DISPLAY_INTEL 0x4094 /* cl_va_api_device_set_intel */ #define CL_PREFERRED_DEVICES_FOR_VA_API_INTEL 0x4095 #define CL_ALL_DEVICES_FOR_VA_API_INTEL 0x4096 /* cl_context_info */ #define CL_CONTEXT_VA_API_DISPLAY_INTEL 0x4097 /* cl_mem_info */ #define CL_MEM_VA_API_MEDIA_SURFACE_INTEL 0x4098 /* cl_image_info */ #define CL_IMAGE_VA_API_PLANE_INTEL 0x4099 /* cl_command_type */ #define CL_COMMAND_ACQUIRE_VA_API_MEDIA_SURFACES_INTEL 0x409A #define CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL 0x409B typedef cl_uint cl_va_api_device_source_intel; typedef cl_uint cl_va_api_device_set_intel; extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromVA_APIMediaAdapterINTEL( cl_platform_id platform, cl_va_api_device_source_intel media_adapter_type, void* media_adapter, cl_va_api_device_set_intel media_adapter_set, cl_uint num_entries, cl_device_id* devices, cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int (CL_API_CALL * clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)( cl_platform_id platform, cl_va_api_device_source_intel media_adapter_type, void* media_adapter, cl_va_api_device_set_intel media_adapter_set, cl_uint num_entries, cl_device_id* devices, cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromVA_APIMediaSurfaceINTEL( cl_context context, cl_mem_flags flags, VASurfaceID* surface, cl_uint plane, cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_mem (CL_API_CALL * clCreateFromVA_APIMediaSurfaceINTEL_fn)( cl_context context, cl_mem_flags flags, VASurfaceID* surface, cl_uint plane, cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireVA_APIMediaSurfacesINTEL( cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseVA_APIMediaSurfacesINTEL( cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; #ifdef __cplusplus } #endif #endif /* __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H */ Oclgrind-21.10/src/CL/cl_version.h000066400000000000000000000060651413315665100166730ustar00rootroot00000000000000/******************************************************************************* * Copyright (c) 2018-2020 The Khronos Group Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ #ifndef __CL_VERSION_H #define __CL_VERSION_H /* Detect which version to target */ #if !defined(CL_TARGET_OPENCL_VERSION) #pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 300 (OpenCL 3.0)") #define CL_TARGET_OPENCL_VERSION 300 #endif #if CL_TARGET_OPENCL_VERSION != 100 && \ CL_TARGET_OPENCL_VERSION != 110 && \ CL_TARGET_OPENCL_VERSION != 120 && \ CL_TARGET_OPENCL_VERSION != 200 && \ CL_TARGET_OPENCL_VERSION != 210 && \ CL_TARGET_OPENCL_VERSION != 220 && \ CL_TARGET_OPENCL_VERSION != 300 #pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220, 300). Defaulting to 300 (OpenCL 3.0)") #undef CL_TARGET_OPENCL_VERSION #define CL_TARGET_OPENCL_VERSION 300 #endif /* OpenCL Version */ #if CL_TARGET_OPENCL_VERSION >= 300 && !defined(CL_VERSION_3_0) #define CL_VERSION_3_0 1 #endif #if CL_TARGET_OPENCL_VERSION >= 220 && !defined(CL_VERSION_2_2) #define CL_VERSION_2_2 1 #endif #if CL_TARGET_OPENCL_VERSION >= 210 && !defined(CL_VERSION_2_1) #define CL_VERSION_2_1 1 #endif #if CL_TARGET_OPENCL_VERSION >= 200 && !defined(CL_VERSION_2_0) #define CL_VERSION_2_0 1 #endif #if CL_TARGET_OPENCL_VERSION >= 120 && !defined(CL_VERSION_1_2) #define CL_VERSION_1_2 1 #endif #if CL_TARGET_OPENCL_VERSION >= 110 && !defined(CL_VERSION_1_1) #define CL_VERSION_1_1 1 #endif #if CL_TARGET_OPENCL_VERSION >= 100 && !defined(CL_VERSION_1_0) #define CL_VERSION_1_0 1 #endif /* Allow deprecated APIs for older OpenCL versions. */ #if CL_TARGET_OPENCL_VERSION <= 220 && !defined(CL_USE_DEPRECATED_OPENCL_2_2_APIS) #define CL_USE_DEPRECATED_OPENCL_2_2_APIS #endif #if CL_TARGET_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS) #define CL_USE_DEPRECATED_OPENCL_2_1_APIS #endif #if CL_TARGET_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS) #define CL_USE_DEPRECATED_OPENCL_2_0_APIS #endif #if CL_TARGET_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) #define CL_USE_DEPRECATED_OPENCL_1_2_APIS #endif #if CL_TARGET_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) #define CL_USE_DEPRECATED_OPENCL_1_1_APIS #endif #if CL_TARGET_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS) #define CL_USE_DEPRECATED_OPENCL_1_0_APIS #endif #endif /* __CL_VERSION_H */ Oclgrind-21.10/src/CL/opencl.h000066400000000000000000000017441413315665100160070ustar00rootroot00000000000000/******************************************************************************* * Copyright (c) 2008-2020 The Khronos Group Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ #ifndef __OPENCL_H #define __OPENCL_H #ifdef __cplusplus extern "C" { #endif #include #include #include #include #ifdef __cplusplus } #endif #endif /* __OPENCL_H */ Oclgrind-21.10/src/core/000077500000000000000000000000001413315665100150025ustar00rootroot00000000000000Oclgrind-21.10/src/core/Context.cpp000066400000000000000000000336741413315665100171470ustar00rootroot00000000000000// Context.cpp (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" #include "config.h" #if defined(_WIN32) && !defined(__MINGW32__) #include #undef ERROR #else #include #endif #include #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" #include "Context.h" #include "Kernel.h" #include "KernelInvocation.h" #include "Memory.h" #include "Program.h" #include "WorkGroup.h" #include "WorkItem.h" #include "plugins/InstructionCounter.h" #include "plugins/InteractiveDebugger.h" #include "plugins/Logger.h" #include "plugins/MemCheck.h" #include "plugins/RaceDetector.h" #include "plugins/Uninitialized.h" using namespace oclgrind; using namespace std; Context::Context() { m_llvmContext = new llvm::LLVMContext; m_globalMemory = new Memory(AddrSpaceGlobal, sizeof(size_t) == 8 ? 16 : 8, this); m_kernelInvocation = NULL; loadPlugins(); } Context::~Context() { delete m_llvmContext; delete m_globalMemory; unloadPlugins(); } bool Context::isThreadSafe() const { for (const PluginEntry& p : m_plugins) { if (!p.first->isThreadSafe()) return false; } return true; } Memory* Context::getGlobalMemory() const { return m_globalMemory; } llvm::LLVMContext* Context::getLLVMContext() const { return m_llvmContext; } void Context::loadPlugins() { // Create core plugins m_plugins.push_back(make_pair(new Logger(this), true)); m_plugins.push_back(make_pair(new MemCheck(this), true)); if (checkEnv("OCLGRIND_INST_COUNTS")) m_plugins.push_back(make_pair(new InstructionCounter(this), true)); if (checkEnv("OCLGRIND_DATA_RACES")) m_plugins.push_back(make_pair(new RaceDetector(this), true)); if (checkEnv("OCLGRIND_UNINITIALIZED")) m_plugins.push_back(make_pair(new Uninitialized(this), true)); if (checkEnv("OCLGRIND_INTERACTIVE")) m_plugins.push_back(make_pair(new InteractiveDebugger(this), true)); // Load dynamic plugins const char* dynamicPlugins = getenv("OCLGRIND_PLUGINS"); if (dynamicPlugins) { std::istringstream ss(dynamicPlugins); std::string libpath; while (std::getline(ss, libpath, ':')) { #if defined(_WIN32) && !defined(__MINGW32__) HMODULE library = LoadLibraryA(libpath.c_str()); if (!library) { cerr << "Loading Oclgrind plugin failed (LoadLibrary): " << GetLastError() << endl; continue; } void* initialize = GetProcAddress(library, "initializePlugins"); if (!initialize) { cerr << "Loading Oclgrind plugin failed (GetProcAddress): " << GetLastError() << endl; continue; } #else void* library = dlopen(libpath.c_str(), RTLD_NOW); if (!library) { cerr << "Loading Oclgrind plugin failed (dlopen): " << dlerror() << endl; continue; } void* initialize = dlsym(library, "initializePlugins"); if (!initialize) { cerr << "Loading Oclgrind plugin failed (dlsym): " << dlerror() << endl; continue; } #endif ((void (*)(Context*))initialize)(this); m_pluginLibraries.push_back(library); } } } void Context::unloadPlugins() { // Release dynamic plugin libraries list::iterator plibItr; for (plibItr = m_pluginLibraries.begin(); plibItr != m_pluginLibraries.end(); plibItr++) { #if defined(_WIN32) && !defined(__MINGW32__) void* release = GetProcAddress((HMODULE)*plibItr, "releasePlugins"); if (release) { ((void (*)(Context*))release)(this); } FreeLibrary((HMODULE)*plibItr); #else void* release = dlsym(*plibItr, "releasePlugins"); if (release) { ((void (*)(Context*))release)(this); } dlclose(*plibItr); #endif } // Destroy internal plugins PluginList::iterator pItr; for (pItr = m_plugins.begin(); pItr != m_plugins.end(); pItr++) { if (pItr->second) delete pItr->first; } m_plugins.clear(); } void Context::registerPlugin(Plugin* plugin) { m_plugins.push_back(make_pair(plugin, false)); } void Context::unregisterPlugin(Plugin* plugin) { m_plugins.remove(make_pair(plugin, false)); } void Context::logError(const char* error) const { Message msg(ERROR, this); msg << error << endl << msg.INDENT << "Kernel: " << msg.CURRENT_KERNEL << endl << "Entity: " << msg.CURRENT_ENTITY << endl << msg.CURRENT_LOCATION << endl; msg.send(); } #define NOTIFY(function, ...) \ { \ PluginList::const_iterator pluginItr; \ for (pluginItr = m_plugins.begin(); pluginItr != m_plugins.end(); \ pluginItr++) \ { \ pluginItr->first->function(__VA_ARGS__); \ } \ } void Context::notifyInstructionExecuted(const WorkItem* workItem, const llvm::Instruction* instruction, const TypedValue& result) const { NOTIFY(instructionExecuted, workItem, instruction, result); } void Context::notifyKernelBegin(const KernelInvocation* kernelInvocation) const { assert(m_kernelInvocation == NULL); m_kernelInvocation = kernelInvocation; NOTIFY(kernelBegin, kernelInvocation); } void Context::notifyKernelEnd(const KernelInvocation* kernelInvocation) const { NOTIFY(kernelEnd, kernelInvocation); assert(m_kernelInvocation == kernelInvocation); m_kernelInvocation = NULL; } void Context::notifyMemoryAllocated(const Memory* memory, size_t address, size_t size, cl_mem_flags flags, const uint8_t* initData) const { NOTIFY(memoryAllocated, memory, address, size, flags, initData); } void Context::notifyMemoryAtomicLoad(const Memory* memory, AtomicOp op, size_t address, size_t size) const { if (m_kernelInvocation && m_kernelInvocation->getCurrentWorkItem()) { NOTIFY(memoryAtomicLoad, memory, m_kernelInvocation->getCurrentWorkItem(), op, address, size); } } void Context::notifyMemoryAtomicStore(const Memory* memory, AtomicOp op, size_t address, size_t size) const { if (m_kernelInvocation && m_kernelInvocation->getCurrentWorkItem()) { NOTIFY(memoryAtomicStore, memory, m_kernelInvocation->getCurrentWorkItem(), op, address, size); } } void Context::notifyMemoryDeallocated(const Memory* memory, size_t address) const { NOTIFY(memoryDeallocated, memory, address); } void Context::notifyMemoryLoad(const Memory* memory, size_t address, size_t size) const { if (m_kernelInvocation) { if (m_kernelInvocation->getCurrentWorkItem()) { NOTIFY(memoryLoad, memory, m_kernelInvocation->getCurrentWorkItem(), address, size); } else if (m_kernelInvocation->getCurrentWorkGroup()) { NOTIFY(memoryLoad, memory, m_kernelInvocation->getCurrentWorkGroup(), address, size); } } else { NOTIFY(hostMemoryLoad, memory, address, size); } } void Context::notifyMemoryMap(const Memory* memory, size_t address, size_t offset, size_t size, cl_mem_flags flags) const { NOTIFY(memoryMap, memory, address, offset, size, flags); } void Context::notifyMemoryStore(const Memory* memory, size_t address, size_t size, const uint8_t* storeData) const { if (m_kernelInvocation) { if (m_kernelInvocation->getCurrentWorkItem()) { NOTIFY(memoryStore, memory, m_kernelInvocation->getCurrentWorkItem(), address, size, storeData); } else if (m_kernelInvocation->getCurrentWorkGroup()) { NOTIFY(memoryStore, memory, m_kernelInvocation->getCurrentWorkGroup(), address, size, storeData); } } else { NOTIFY(hostMemoryStore, memory, address, size, storeData); } } void Context::notifyMessage(MessageType type, const char* message) const { NOTIFY(log, type, message); } void Context::notifyMemoryUnmap(const Memory* memory, size_t address, const void* ptr) const { NOTIFY(memoryUnmap, memory, address, ptr); } void Context::notifyWorkGroupBarrier(const WorkGroup* workGroup, uint32_t flags) const { NOTIFY(workGroupBarrier, workGroup, flags); } void Context::notifyWorkGroupBegin(const WorkGroup* workGroup) const { NOTIFY(workGroupBegin, workGroup); } void Context::notifyWorkGroupComplete(const WorkGroup* workGroup) const { NOTIFY(workGroupComplete, workGroup); } void Context::notifyWorkItemBegin(const WorkItem* workItem) const { NOTIFY(workItemBegin, workItem); } void Context::notifyWorkItemComplete(const WorkItem* workItem) const { NOTIFY(workItemComplete, workItem); } #undef NOTIFY Context::Message::Message(MessageType type, const Context* context) { m_type = type; m_context = context; m_kernelInvocation = context->m_kernelInvocation; } Context::Message& Context::Message::operator<<(const special& id) { switch (id) { case INDENT: m_indentModifiers.push_back(m_stream.tellp()); break; case UNINDENT: m_indentModifiers.push_back(-m_stream.tellp()); break; case CURRENT_KERNEL: *this << m_kernelInvocation->getKernel()->getName(); break; case CURRENT_WORK_ITEM_GLOBAL: { const WorkItem* workItem = m_kernelInvocation->getCurrentWorkItem(); if (workItem) { *this << workItem->getGlobalID(); } else { *this << "(none)"; } break; } case CURRENT_WORK_ITEM_LOCAL: { const WorkItem* workItem = m_kernelInvocation->getCurrentWorkItem(); if (workItem) { *this << workItem->getLocalID(); } else { *this << "(none)"; } break; } case CURRENT_WORK_GROUP: { const WorkGroup* workGroup = m_kernelInvocation->getCurrentWorkGroup(); if (workGroup) { *this << workGroup->getGroupID(); } else { *this << "(none)"; } break; } case CURRENT_ENTITY: { const WorkItem* workItem = m_kernelInvocation->getCurrentWorkItem(); const WorkGroup* workGroup = m_kernelInvocation->getCurrentWorkGroup(); if (workItem) { *this << "Global" << workItem->getGlobalID() << " Local" << workItem->getLocalID() << " "; } if (workGroup) { *this << "Group" << workGroup->getGroupID(); } if (!workItem && !workGroup) { *this << "(unknown)"; } break; } case CURRENT_LOCATION: { const llvm::Instruction* instruction = NULL; const WorkItem* workItem = m_kernelInvocation->getCurrentWorkItem(); const WorkGroup* workGroup = m_kernelInvocation->getCurrentWorkGroup(); if (workItem) { instruction = workItem->getCurrentInstruction(); } else if (workGroup) { instruction = workGroup->getCurrentBarrier(); } *this << instruction; break; } } return *this; } Context::Message& Context::Message::operator<<(const llvm::Instruction* instruction) { // Use mutex as some part of LLVM used by dumpInstruction() is not thread-safe static std::mutex mtx; std::lock_guard lock(mtx); if (instruction) { // Output instruction dumpInstruction(m_stream, instruction); *this << endl; // Output debug information llvm::MDNode* md = instruction->getMetadata("dbg"); if (!md) { *this << "Debugging information not available." << endl; } else { llvm::DILocation* loc = (llvm::DILocation*)md; unsigned lineNumber = loc->getLine(); unsigned columnNumber = loc->getColumn(); llvm::StringRef filename = loc->getFilename(); *this << "At line " << dec << lineNumber << " (column " << columnNumber << ")" << " of " << filename.str() << ":" << endl; // Get source line const Program* program = m_kernelInvocation->getKernel()->getProgram(); const char* line = program->getSourceLine(lineNumber); if (line) { while (isspace(line[0])) line++; *this << " " << line; } else *this << " (source not available)"; } } else { *this << "(location unknown)"; } return *this; } Context::Message& Context::Message::operator<<(std::ostream& (*t)(std::ostream&)) { m_stream << t; return *this; } Context::Message& Context::Message::operator<<(std::ios& (*t)(std::ios&)) { m_stream << t; return *this; } Context::Message& Context::Message::operator<<(std::ios_base& (*t)(std::ios_base&)) { m_stream << t; return *this; } void Context::Message::send() const { string msg; string line; int currentIndent = 0; list::const_iterator itr = m_indentModifiers.begin(); m_stream.clear(); m_stream.seekg(0); while (m_stream.good()) { getline(m_stream, line); // Strip trailing carriage return if present if (!line.empty() && line[line.size() - 1] == '\r') line.pop_back(); // TODO: Wrap long lines msg += line; // Check for indentation modifiers long pos = m_stream.tellg(); if (itr != m_indentModifiers.end() && pos >= abs(*itr)) { if (*itr >= 0) currentIndent++; else currentIndent--; itr++; } if (!m_stream.eof()) { // Add newline and indentation msg += '\n'; for (int i = 0; i < currentIndent; i++) msg += '\t'; } } m_context->notifyMessage(m_type, msg.c_str()); } Oclgrind-21.10/src/core/Context.h000066400000000000000000000074431413315665100166070ustar00rootroot00000000000000// Context.h (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" namespace llvm { class LLVMContext; } namespace oclgrind { class KernelInvocation; class Memory; class Plugin; class WorkGroup; class WorkItem; typedef std::pair PluginEntry; typedef std::list PluginList; class Context { public: Context(); virtual ~Context(); Memory* getGlobalMemory() const; llvm::LLVMContext* getLLVMContext() const; bool isThreadSafe() const; void logError(const char* error) const; // Simulation callbacks void notifyInstructionExecuted(const WorkItem* workItem, const llvm::Instruction* instruction, const TypedValue& result) const; void notifyKernelBegin(const KernelInvocation* kernelInvocation) const; void notifyKernelEnd(const KernelInvocation* kernelInvocation) const; void notifyMemoryAllocated(const Memory* memory, size_t address, size_t size, cl_mem_flags flags, const uint8_t* initData) const; void notifyMemoryAtomicLoad(const Memory* memory, AtomicOp op, size_t address, size_t size) const; void notifyMemoryAtomicStore(const Memory* memory, AtomicOp op, size_t address, size_t size) const; void notifyMemoryDeallocated(const Memory* memory, size_t address) const; void notifyMemoryLoad(const Memory* memory, size_t address, size_t size) const; void notifyMemoryMap(const Memory* memory, size_t address, size_t offset, size_t size, cl_map_flags flags) const; void notifyMemoryStore(const Memory* memory, size_t address, size_t size, const uint8_t* storeData) const; void notifyMessage(MessageType type, const char* message) const; void notifyMemoryUnmap(const Memory* memory, size_t address, const void* ptr) const; void notifyWorkGroupBarrier(const WorkGroup* workGroup, uint32_t flags) const; void notifyWorkGroupBegin(const WorkGroup* workGroup) const; void notifyWorkGroupComplete(const WorkGroup* workGroup) const; void notifyWorkItemBegin(const WorkItem* workItem) const; void notifyWorkItemComplete(const WorkItem* workItem) const; // Plugins void registerPlugin(Plugin* plugin); void unregisterPlugin(Plugin* plugin); private: mutable const KernelInvocation* m_kernelInvocation; Memory* m_globalMemory; PluginList m_plugins; std::list m_pluginLibraries; void loadPlugins(); void unloadPlugins(); llvm::LLVMContext* m_llvmContext; public: class Message { public: enum special { INDENT, UNINDENT, CURRENT_KERNEL, CURRENT_WORK_ITEM_GLOBAL, CURRENT_WORK_ITEM_LOCAL, CURRENT_WORK_GROUP, CURRENT_ENTITY, CURRENT_LOCATION, }; Message(MessageType type, const Context* context); Message& operator<<(const special& id); Message& operator<<(const llvm::Instruction* instruction); template Message& operator<<(const T& t); Message& operator<<(std::ostream& (*t)(std::ostream&)); Message& operator<<(std::ios& (*t)(std::ios&)); Message& operator<<(std::ios_base& (*t)(std::ios_base&)); void send() const; private: MessageType m_type; const Context* m_context; const KernelInvocation* m_kernelInvocation; mutable std::stringstream m_stream; std::list m_indentModifiers; }; }; template Context::Message& Context::Message::operator<<(const T& t) { m_stream << t; return *this; } } // namespace oclgrind Oclgrind-21.10/src/core/Kernel.cpp000066400000000000000000000265111413315665100167330ustar00rootroot00000000000000// Kernel.cpp (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" #include "config.h" #include #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Module.h" #include "llvm/Support/raw_os_ostream.h" #include "Kernel.h" #include "Program.h" using namespace oclgrind; using namespace std; Kernel::Kernel(const Program* program, const llvm::Function* function, const llvm::Module* module) : m_program(program), m_function(function), m_name(function->getName()) { // Set-up global variables llvm::Module::const_global_iterator itr; for (itr = module->global_begin(); itr != module->global_end(); itr++) { llvm::PointerType* type = itr->getType(); switch (type->getPointerAddressSpace()) { case AddrSpacePrivate: { // Get initializer data const llvm::Constant* init = itr->getInitializer(); unsigned size = getTypeSize(init->getType()); TypedValue value = {size, 1, new uint8_t[size]}; getConstantData(value.data, init); m_values[&*itr] = value; break; } case AddrSpaceGlobal: case AddrSpaceConstant: m_values[&*itr] = program->getProgramScopeVar(&*itr).clone(); break; case AddrSpaceLocal: { // Check that local memory variable belongs to this kernel if (!itr->getName().startswith(m_name)) continue; // Get size of allocation TypedValue allocSize = {getTypeSize(itr->getInitializer()->getType()), 1, NULL}; m_values[&*itr] = allocSize; break; } default: FATAL_ERROR("Unsupported GlobalVariable address space: %d", type->getPointerAddressSpace()); } } // Check whether the kernel requires uniform work-groups m_requiresUniformWorkGroups = false; for (auto& AS : m_function->getAttributes()) { if (AS.hasAttribute("uniform-work-group-size")) { const llvm::Attribute& A = AS.getAttribute("uniform-work-group-size"); if (A.getValueAsString().equals("true")) m_requiresUniformWorkGroups = true; break; } } // Get metadata node containing kernel arg info m_metadata = NULL; llvm::NamedMDNode* md = module->getNamedMetadata("opencl.kernels"); if (md) { for (unsigned i = 0; i < md->getNumOperands(); i++) { llvm::MDNode* node = md->getOperand(i); llvm::ConstantAsMetadata* cam = llvm::dyn_cast(node->getOperand(0).get()); if (!cam) continue; llvm::Function* function = ((llvm::Function*)cam->getValue()); if (function->getName() == m_name) { m_metadata = node; break; } } } } Kernel::Kernel(const Kernel& kernel) : m_program(kernel.m_program) { m_function = kernel.m_function; m_name = kernel.m_name; m_metadata = kernel.m_metadata; m_requiresUniformWorkGroups = kernel.m_requiresUniformWorkGroups; for (auto itr = kernel.m_values.begin(); itr != kernel.m_values.end(); itr++) { m_values[itr->first] = itr->second.clone(); } } Kernel::~Kernel() { TypedValueMap::iterator itr; for (itr = m_values.begin(); itr != m_values.end(); itr++) { delete[] itr->second.data; } } bool Kernel::allArgumentsSet() const { llvm::Function::const_arg_iterator itr; for (itr = m_function->arg_begin(); itr != m_function->arg_end(); itr++) { if (!m_values.count(&*itr)) { return false; } } return true; } const llvm::Argument* Kernel::getArgument(unsigned int index) const { assert(index < getNumArguments()); llvm::Function::const_arg_iterator argItr = m_function->arg_begin(); for (unsigned i = 0; i < index; i++) { argItr++; } return &*argItr; } unsigned int Kernel::getArgumentAccessQualifier(unsigned int index) const { assert(index < getNumArguments()); // Get metadata const llvm::Metadata* md = getArgumentMetadata("kernel_arg_access_qual", index); if (!md) { return -1; } // Get qualifier string const llvm::MDString* str = llvm::dyn_cast(md); llvm::StringRef access = str->getString(); if (access == "read_only") { return CL_KERNEL_ARG_ACCESS_READ_ONLY; } else if (access == "write_only") { return CL_KERNEL_ARG_ACCESS_WRITE_ONLY; } else if (access == "read_write") { return CL_KERNEL_ARG_ACCESS_READ_WRITE; } return CL_KERNEL_ARG_ACCESS_NONE; } unsigned int Kernel::getArgumentAddressQualifier(unsigned int index) const { assert(index < getNumArguments()); // Get metadata const llvm::Metadata* md = getArgumentMetadata("kernel_arg_addr_space", index); if (!md) { return -1; } switch (getMDAsConstInt(md)->getZExtValue()) { case AddrSpacePrivate: return CL_KERNEL_ARG_ADDRESS_PRIVATE; case AddrSpaceGlobal: return CL_KERNEL_ARG_ADDRESS_GLOBAL; case AddrSpaceConstant: return CL_KERNEL_ARG_ADDRESS_CONSTANT; case AddrSpaceLocal: return CL_KERNEL_ARG_ADDRESS_LOCAL; default: return -1; } } const llvm::Metadata* Kernel::getArgumentMetadata(string name, unsigned int index) const { llvm::MDNode* node = m_function->getMetadata(name); if (node) return node->getOperand(index); if (!m_metadata) { return NULL; } // Loop over all metadata nodes for this kernel for (unsigned i = 0; i < m_metadata->getNumOperands(); i++) { const llvm::MDOperand& op = m_metadata->getOperand(i); if (llvm::MDNode* node = llvm::dyn_cast(op.get())) { // Check if node matches target name if (node->getNumOperands() > 0 && ((llvm::MDString*)(node->getOperand(0).get()))->getString() == name) { return node->getOperand(index + 1).get(); } } } return NULL; } const llvm::StringRef Kernel::getArgumentName(unsigned int index) const { return getArgument(index)->getName(); } const llvm::StringRef Kernel::getArgumentTypeName(unsigned int index) const { assert(index < getNumArguments()); // Get metadata const llvm::Metadata* md = getArgumentMetadata("kernel_arg_type", index); if (!md) { return ""; } llvm::StringRef name = llvm::dyn_cast(md)->getString(); size_t imgStart = name.find(" image"); if (imgStart != llvm::StringRef::npos) { name = name.substr(imgStart + 1); } return name; } unsigned int Kernel::getArgumentTypeQualifier(unsigned int index) const { assert(index < getNumArguments()); // Get metadata const llvm::Metadata* md = getArgumentMetadata("kernel_arg_type_qual", index); if (!md) { return -1; } // Ignore type qualifiers for non-pointer arguments const llvm::Argument* arg = getArgument(index); if (!arg->getType()->isPointerTy() || arg->hasByValAttr()) return CL_KERNEL_ARG_TYPE_NONE; // Get qualifiers const llvm::MDString* str = llvm::dyn_cast(md); istringstream iss(str->getString().str()); unsigned int result = CL_KERNEL_ARG_TYPE_NONE; while (!iss.eof()) { string tok; iss >> tok; if (tok == "const") { result |= CL_KERNEL_ARG_TYPE_CONST; } else if (tok == "restrict") { result |= CL_KERNEL_ARG_TYPE_RESTRICT; } else if (tok == "volatile") { result |= CL_KERNEL_ARG_TYPE_VOLATILE; } } return result; } size_t Kernel::getArgumentSize(unsigned int index) const { const llvm::Argument* argument = getArgument(index); const llvm::Type* type = argument->getType(); // Check if pointer argument if (type->isPointerTy() && argument->hasByValAttr()) { return getTypeSize(type->getPointerElementType()); } return getTypeSize(type); } string Kernel::getAttributes() const { ostringstream attributes(""); llvm::MDNode* node; node = m_function->getMetadata("reqd_work_group_size"); if (node) { attributes << "reqd_work_group_size(" << getMDAsConstInt(node->getOperand(0))->getZExtValue() << "," << getMDAsConstInt(node->getOperand(1))->getZExtValue() << "," << getMDAsConstInt(node->getOperand(2))->getZExtValue() << ") "; } node = m_function->getMetadata("work_group_size_hint"); if (node) { attributes << "work_group_size_hint(" << getMDAsConstInt(node->getOperand(0))->getZExtValue() << "," << getMDAsConstInt(node->getOperand(1))->getZExtValue() << "," << getMDAsConstInt(node->getOperand(2))->getZExtValue() << ") "; } node = m_function->getMetadata("vec_type_hint"); if (node) { // Get type hint size_t n = 1; llvm::Metadata* md = node->getOperand(0).get(); llvm::ValueAsMetadata* vam = llvm::dyn_cast(md); const llvm::Type* type = vam->getType(); if (type->isVectorTy()) { auto vecType = llvm::cast(type); n = vecType->getNumElements(); type = vecType->getElementType(); } // Generate attribute string attributes << "vec_type_hint(" << flush; llvm::raw_os_ostream out(attributes); type->print(out); out.flush(); attributes << n << ") "; } return attributes.str(); } const llvm::Function* Kernel::getFunction() const { return m_function; } size_t Kernel::getLocalMemorySize() const { size_t sz = 0; for (auto value = m_values.begin(); value != m_values.end(); value++) { const llvm::Type* type = value->first->getType(); if (type->isPointerTy() && type->getPointerAddressSpace() == AddrSpaceLocal) { sz += value->second.size; } } return sz; } const std::string& Kernel::getName() const { return m_name; } unsigned int Kernel::getNumArguments() const { return m_function->arg_size(); } const Program* Kernel::getProgram() const { return m_program; } void Kernel::getRequiredWorkGroupSize(size_t reqdWorkGroupSize[3]) const { memset(reqdWorkGroupSize, 0, 3 * sizeof(size_t)); for (int j = 0; j < 3; j++) { const llvm::Metadata* md = getArgumentMetadata("reqd_work_group_size", j); if (md) reqdWorkGroupSize[j] = getMDAsConstInt(md)->getZExtValue(); } } bool Kernel::requiresUniformWorkGroups() const { return m_requiresUniformWorkGroups; } void Kernel::setArgument(unsigned int index, TypedValue value) { assert(index < m_function->arg_size()); const llvm::Value* argument = getArgument(index); // Deallocate existing argument if (m_values.count(argument)) { delete[] m_values[argument].data; } if (getArgumentTypeName(index).str() == "sampler_t") { // Get an llvm::ConstantInt that represents the sampler value llvm::Type* i32 = llvm::Type::getInt32Ty(m_program->getLLVMContext()); llvm::Constant* samplerValue = llvm::ConstantInt::get(i32, value.getSInt()); // A sampler argument is a pointer to the llvm::ConstantInt value TypedValue sampler; sampler.size = sizeof(size_t); sampler.num = 1; sampler.data = new unsigned char[sizeof(size_t)]; sampler.setPointer((size_t)samplerValue); m_values[argument] = sampler; } else { m_values[argument] = value.clone(); } } TypedValueMap::const_iterator Kernel::values_begin() const { return m_values.begin(); } TypedValueMap::const_iterator Kernel::values_end() const { return m_values.end(); } Oclgrind-21.10/src/core/Kernel.h000066400000000000000000000040451413315665100163760ustar00rootroot00000000000000// Kernel.h (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" #include "llvm/ADT/StringRef.h" namespace llvm { class Argument; class Constant; class Function; class GlobalVariable; class MDNode; class Metadata; class Module; } // namespace llvm namespace oclgrind { class Memory; class Program; class Kernel { public: Kernel(const Program* program, const llvm::Function* function, const llvm::Module* module); Kernel(const Kernel& kernel); virtual ~Kernel(); TypedValueMap::const_iterator values_begin() const; TypedValueMap::const_iterator values_end() const; bool allArgumentsSet() const; unsigned int getArgumentAccessQualifier(unsigned int index) const; unsigned int getArgumentAddressQualifier(unsigned int index) const; const llvm::StringRef getArgumentName(unsigned int index) const; size_t getArgumentSize(unsigned int index) const; const llvm::StringRef getArgumentTypeName(unsigned int index) const; unsigned int getArgumentTypeQualifier(unsigned int index) const; std::string getAttributes() const; const llvm::Function* getFunction() const; size_t getLocalMemorySize() const; const std::string& getName() const; unsigned int getNumArguments() const; const Program* getProgram() const; void getRequiredWorkGroupSize(size_t reqdWorkGroupSize[3]) const; bool requiresUniformWorkGroups() const; void setArgument(unsigned int index, TypedValue value); private: const Program* m_program; const llvm::Function* m_function; const llvm::MDNode* m_metadata; std::string m_name; TypedValueMap m_values; bool m_requiresUniformWorkGroups; const llvm::Argument* getArgument(unsigned int index) const; const llvm::Metadata* getArgumentMetadata(std::string name, unsigned int index) const; }; } // namespace oclgrind Oclgrind-21.10/src/core/KernelInvocation.cpp000066400000000000000000000202011413315665100207530ustar00rootroot00000000000000// KernelInvocation.cpp (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" #include #include #include #include "Context.h" #include "Kernel.h" #include "KernelInvocation.h" #include "Memory.h" #include "Program.h" #include "WorkGroup.h" #include "WorkItem.h" using namespace oclgrind; using namespace std; struct { int id; WorkGroup* workGroup; WorkItem* workItem; } static THREAD_LOCAL workerState; static atomic nextGroupIndex; KernelInvocation::KernelInvocation(const Context* context, const Kernel* kernel, unsigned int workDim, Size3 globalOffset, Size3 globalSize, Size3 localSize) : m_context(context), m_kernel(kernel) { m_workDim = workDim; m_globalOffset = globalOffset; m_globalSize = globalSize; m_localSize = localSize; m_numGroups.x = m_globalSize.x / m_localSize.x; m_numGroups.y = m_globalSize.y / m_localSize.y; m_numGroups.z = m_globalSize.z / m_localSize.z; if (!m_kernel->requiresUniformWorkGroups()) { m_numGroups.x += m_globalSize.x % m_localSize.x ? 1 : 0; m_numGroups.y += m_globalSize.y % m_localSize.y ? 1 : 0; m_numGroups.z += m_globalSize.z % m_localSize.z ? 1 : 0; } // Check for user overriding number of threads m_numWorkers = getEnvInt("OCLGRIND_NUM_THREADS", thread::hardware_concurrency(), false); if (!m_numWorkers || !m_context->isThreadSafe()) m_numWorkers = 1; // Check for quick-mode environment variable if (checkEnv("OCLGRIND_QUICK")) { // Only run first and last work-groups in quick-mode Size3 firstGroup(0, 0, 0); Size3 lastGroup(m_numGroups.x - 1, m_numGroups.y - 1, m_numGroups.z - 1); m_workGroups.push_back(firstGroup); if (lastGroup != firstGroup) m_workGroups.push_back(lastGroup); } else { for (size_t k = 0; k < m_numGroups.z; k++) { for (size_t j = 0; j < m_numGroups.y; j++) { for (size_t i = 0; i < m_numGroups.x; i++) { m_workGroups.push_back(Size3(i, j, k)); } } } } } KernelInvocation::~KernelInvocation() { // Destroy any remaining work-groups while (!m_runningGroups.empty()) { delete m_runningGroups.front(); m_runningGroups.pop_front(); } } const Context* KernelInvocation::getContext() const { return m_context; } const WorkGroup* KernelInvocation::getCurrentWorkGroup() const { return workerState.workGroup; } const WorkItem* KernelInvocation::getCurrentWorkItem() const { return workerState.workItem; } Size3 KernelInvocation::getGlobalOffset() const { return m_globalOffset; } Size3 KernelInvocation::getGlobalSize() const { return m_globalSize; } const Kernel* KernelInvocation::getKernel() const { return m_kernel; } Size3 KernelInvocation::getLocalSize() const { return m_localSize; } Size3 KernelInvocation::getNumGroups() const { return m_numGroups; } size_t KernelInvocation::getWorkDim() const { return m_workDim; } void KernelInvocation::run(const Context* context, Kernel* kernel, unsigned int workDim, Size3 globalOffset, Size3 globalSize, Size3 localSize) { // Create kernel invocation KernelInvocation* ki = new KernelInvocation( context, kernel, workDim, globalOffset, globalSize, localSize); // Run kernel context->notifyKernelBegin(ki); ki->run(); context->notifyKernelEnd(ki); delete ki; } void KernelInvocation::run() { nextGroupIndex = 0; // Create worker threads // TODO: Run in main thread if only 1 worker vector threads; for (unsigned i = 0; i < m_numWorkers; i++) { threads.push_back(thread(&KernelInvocation::runWorker, this, i)); } // Wait for workers to complete for (unsigned i = 0; i < m_numWorkers; i++) { threads[i].join(); } } int KernelInvocation::getWorkerID() const { return workerState.id; } void KernelInvocation::runWorker(int id) { workerState.workGroup = NULL; workerState.workItem = NULL; workerState.id = id; try { while (true) { // Move to next work-group if (!m_runningGroups.empty()) { // Take next work-group from running pool workerState.workGroup = m_runningGroups.front(); m_runningGroups.pop_front(); } else { // Take next work-group from pending pool unsigned index = nextGroupIndex++; if (index >= m_workGroups.size()) // No more work to do break; Size3 wgid = m_workGroups[index]; Size3 wgsize = m_localSize; // Handle remainder work-groups for (unsigned i = 0; i < 3; i++) { if (wgsize[i] * (wgid[i] + 1) > m_globalSize[i]) wgsize[i] = m_globalSize[i] % wgsize[i]; } workerState.workGroup = new WorkGroup(this, wgid, wgsize); m_context->notifyWorkGroupBegin(workerState.workGroup); } // Execute work-group workerState.workItem = workerState.workGroup->getNextWorkItem(); while (workerState.workItem) { // Run work-item until complete or at barrier while (workerState.workItem->getState() == WorkItem::READY) { workerState.workItem->step(); } // Move to next work-item workerState.workItem = workerState.workGroup->getNextWorkItem(); if (workerState.workItem) continue; // No more work-items in READY state // Check if there are work-items at a barrier if (workerState.workGroup->hasBarrier()) { // Resume execution workerState.workGroup->clearBarrier(); workerState.workItem = workerState.workGroup->getNextWorkItem(); } } // Work-group has finished m_context->notifyWorkGroupComplete(workerState.workGroup); delete workerState.workGroup; workerState.workGroup = NULL; } } catch (FatalError& err) { ostringstream info; info << "OCLGRIND FATAL ERROR " << "(" << err.getFile() << ":" << err.getLine() << ")" << endl << err.what(); m_context->logError(info.str().c_str()); if (workerState.workGroup) delete workerState.workGroup; } } bool KernelInvocation::switchWorkItem(const Size3 gid) { assert(m_numWorkers == 1); // Compute work-group ID Size3 group(gid.x / m_localSize.x, gid.y / m_localSize.y, gid.z / m_localSize.z); bool found = false; WorkGroup* previousWorkGroup = workerState.workGroup; // Check if we're already running the work-group if (group == previousWorkGroup->getGroupID()) { found = true; } // Check if work-group is in running pool if (!found) { std::list::iterator rItr; for (rItr = m_runningGroups.begin(); rItr != m_runningGroups.end(); rItr++) { if (group == (*rItr)->getGroupID()) { workerState.workGroup = *rItr; m_runningGroups.erase(rItr); found = true; break; } } } // Check if work-group is in pending pool if (!found) { std::vector::iterator pItr; for (pItr = m_workGroups.begin() + nextGroupIndex; pItr != m_workGroups.end(); pItr++) { if (group == *pItr) { workerState.workGroup = new WorkGroup(this, group); m_context->notifyWorkGroupBegin(workerState.workGroup); found = true; // Re-order list of groups accordingly // Safe since this is not in a multi-threaded context m_workGroups.erase(pItr); m_workGroups.insert(m_workGroups.begin() + nextGroupIndex, group); nextGroupIndex++; break; } } } if (!found) { return false; } if (previousWorkGroup != workerState.workGroup) { m_runningGroups.push_back(previousWorkGroup); } // Get work-item Size3 lid(gid.x % m_localSize.x, gid.y % m_localSize.y, gid.z % m_localSize.z); workerState.workItem = workerState.workGroup->getWorkItem(lid); return true; } Oclgrind-21.10/src/core/KernelInvocation.h000066400000000000000000000031221413315665100204230ustar00rootroot00000000000000// KernelInvocation.h (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" namespace oclgrind { class Context; class Kernel; class WorkGroup; class WorkItem; class KernelInvocation { public: static void run(const Context* context, Kernel* kernel, unsigned int workDim, Size3 globalOffset, Size3 globalSize, Size3 localSize); const Context* getContext() const; const WorkGroup* getCurrentWorkGroup() const; const WorkItem* getCurrentWorkItem() const; Size3 getGlobalOffset() const; Size3 getGlobalSize() const; Size3 getLocalSize() const; const Kernel* getKernel() const; Size3 getNumGroups() const; size_t getWorkDim() const; bool switchWorkItem(const Size3 gid); int getWorkerID() const; private: KernelInvocation(const Context* context, const Kernel* kernel, unsigned int workDim, Size3 globalOffset, Size3 globalSize, Size3 localSize); virtual ~KernelInvocation(); void run(); // Kernel launch parameters const Context* m_context; const Kernel* m_kernel; size_t m_workDim; Size3 m_globalOffset; Size3 m_globalSize; Size3 m_localSize; Size3 m_numGroups; // Current execution state std::vector m_workGroups; std::list m_runningGroups; // Worker threads void runWorker(int id); unsigned m_numWorkers; }; } // namespace oclgrind Oclgrind-21.10/src/core/Memory.cpp000066400000000000000000000235551413315665100167700ustar00rootroot00000000000000// Memory.cpp (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" #include #include #include #include #include "Context.h" #include "Memory.h" #include "WorkGroup.h" #include "WorkItem.h" using namespace oclgrind; using namespace std; // Multiple mutexes to mitigate risk of unnecessary synchronisation in atomics #define NUM_ATOMIC_MUTEXES 64 // Must be power of two mutex atomicMutex[NUM_ATOMIC_MUTEXES]; #define ATOMIC_MUTEX(offset) \ atomicMutex[(((offset) >> 2) & (NUM_ATOMIC_MUTEXES - 1))] Memory::Memory(unsigned addrSpace, unsigned bufferBits, const Context* context) { m_context = context; m_addressSpace = addrSpace; m_numBitsBuffer = bufferBits; m_numBitsAddress = ((sizeof(size_t) << 3) - m_numBitsBuffer); m_maxNumBuffers = ((size_t)1 << m_numBitsBuffer) - 1; // 0 reserved for NULL m_maxBufferSize = ((size_t)1 << m_numBitsAddress); clear(); } Memory::~Memory() { clear(); } size_t Memory::allocateBuffer(size_t size, cl_mem_flags flags, const uint8_t* initData) { // Check requested size doesn't exceed maximum if (size > m_maxBufferSize) { return 0; } // Find first unallocated buffer slot unsigned b = getNextBuffer(); if (b >= m_maxNumBuffers) { return 0; } // Create buffer Buffer* buffer = new Buffer; buffer->size = size; buffer->flags = flags; buffer->data = new unsigned char[size]; if (b >= m_memory.size()) { m_memory.push_back(buffer); } else { m_memory[b] = buffer; } m_totalAllocated += size; // Initialize contents of buffer if (initData) memcpy(buffer->data, initData, size); else memset(buffer->data, 0, size); size_t address = ((size_t)b) << m_numBitsAddress; m_context->notifyMemoryAllocated(this, address, size, flags, initData); return address; } template uint64_t Memory::atomic(AtomicOp op, size_t address, uint64_t value); template int64_t Memory::atomic(AtomicOp op, size_t address, int64_t value); template uint32_t Memory::atomic(AtomicOp op, size_t address, uint32_t value); template int32_t Memory::atomic(AtomicOp op, size_t address, int32_t value); template T Memory::atomic(AtomicOp op, size_t address, T value) { m_context->notifyMemoryAtomicLoad(this, op, address, sizeof(T)); m_context->notifyMemoryAtomicStore(this, op, address, sizeof(T)); // Bounds check if (!isAddressValid(address, sizeof(T))) { return 0; } // Get buffer size_t offset = extractOffset(address); Buffer* buffer = m_memory[extractBuffer(address)]; T* ptr = (T*)(buffer->data + offset); if (m_addressSpace == AddrSpaceGlobal) ATOMIC_MUTEX(offset).lock(); T old = *ptr; switch (op) { case AtomicAdd: *ptr = old + value; break; case AtomicAnd: *ptr = old & value; break; case AtomicCmpXchg: FATAL_ERROR("AtomicCmpXchg in generic atomic handler"); break; case AtomicDec: *ptr = old - 1; break; case AtomicInc: *ptr = old + 1; break; case AtomicMax: *ptr = old > value ? old : value; break; case AtomicMin: *ptr = old < value ? old : value; break; case AtomicOr: *ptr = old | value; break; case AtomicSub: *ptr = old - value; break; case AtomicXchg: *ptr = value; break; case AtomicXor: *ptr = old ^ value; break; } if (m_addressSpace == AddrSpaceGlobal) ATOMIC_MUTEX(offset).unlock(); return old; } template uint32_t Memory::atomicCmpxchg(size_t address, uint32_t cmp, uint32_t value); template uint64_t Memory::atomicCmpxchg(size_t address, uint64_t cmp, uint64_t value); template T Memory::atomicCmpxchg(size_t address, T cmp, T value) { m_context->notifyMemoryAtomicLoad(this, AtomicCmpXchg, address, sizeof(T)); // Bounds check if (!isAddressValid(address, sizeof(T))) { return 0; } // Get buffer size_t offset = extractOffset(address); Buffer* buffer = m_memory[extractBuffer(address)]; T* ptr = (T*)(buffer->data + offset); if (m_addressSpace == AddrSpaceGlobal) ATOMIC_MUTEX(offset).lock(); // Perform cmpxchg T old = *ptr; if (old == cmp) { *ptr = value; m_context->notifyMemoryAtomicStore(this, AtomicCmpXchg, address, sizeof(T)); } if (m_addressSpace == AddrSpaceGlobal) ATOMIC_MUTEX(offset).unlock(); return old; } void Memory::clear() { vector::iterator itr; for (itr = m_memory.begin(); itr != m_memory.end(); itr++) { if (*itr) { if (!((*itr)->flags & CL_MEM_USE_HOST_PTR)) { delete[](*itr)->data; } delete *itr; size_t address = (itr - m_memory.begin()) << m_numBitsAddress; m_context->notifyMemoryDeallocated(this, address); } } m_memory.resize(1); m_memory[0] = NULL; m_freeBuffers = queue(); m_totalAllocated = 0; } size_t Memory::createHostBuffer(size_t size, void* ptr, cl_mem_flags flags) { // Check requested size doesn't exceed maximum if (size > m_maxBufferSize) { return 0; } // Find first unallocated buffer slot unsigned b = getNextBuffer(); if (b >= m_maxNumBuffers) { return 0; } // Create buffer Buffer* buffer = new Buffer; buffer->size = size; buffer->flags = flags; buffer->data = (unsigned char*)ptr; if (b >= m_memory.size()) { m_memory.push_back(buffer); } else { m_memory[b] = buffer; } m_totalAllocated += size; size_t address = ((size_t)b) << m_numBitsAddress; m_context->notifyMemoryAllocated(this, address, size, flags, (uint8_t*)ptr); return address; } bool Memory::copy(size_t dst, size_t src, size_t size) { m_context->notifyMemoryLoad(this, src, size); // Check source address if (!isAddressValid(src, size)) { return false; } size_t src_offset = extractOffset(src); Buffer* src_buffer = m_memory.at(extractBuffer(src)); m_context->notifyMemoryStore(this, dst, size, src_buffer->data + src_offset); // Check destination address if (!isAddressValid(dst, size)) { return false; } size_t dst_offset = extractOffset(dst); Buffer* dst_buffer = m_memory.at(extractBuffer(dst)); // Copy data memcpy(dst_buffer->data + dst_offset, src_buffer->data + src_offset, size); return true; } void Memory::deallocateBuffer(size_t address) { unsigned buffer = extractBuffer(address); assert(buffer < m_memory.size() && m_memory[buffer]); if (!(m_memory[buffer]->flags & CL_MEM_USE_HOST_PTR)) { delete[] m_memory[buffer]->data; } m_totalAllocated -= m_memory[buffer]->size; m_freeBuffers.push(buffer); delete m_memory[buffer]; m_memory[buffer] = NULL; m_context->notifyMemoryDeallocated(this, address); } void Memory::dump() const { for (unsigned b = 1; b < m_memory.size(); b++) { if (!m_memory[b] || !m_memory[b]->data) { continue; } for (unsigned i = 0; i < m_memory[b]->size; i++) { if (i % 4 == 0) { cout << endl << hex << uppercase << setw(16) << setfill(' ') << right << ((((size_t)b) << m_numBitsAddress) | i) << ":"; } cout << " " << hex << uppercase << setw(2) << setfill('0') << (int)m_memory[b]->data[i]; } } cout << endl; } size_t Memory::extractBuffer(size_t address) const { return (address >> m_numBitsAddress); } size_t Memory::extractOffset(size_t address) const { return (address & (((size_t)-1) >> m_numBitsBuffer)); } unsigned int Memory::getAddressSpace() const { return m_addressSpace; } const Memory::Buffer* Memory::getBuffer(size_t address) const { size_t buf = extractBuffer(address); if (buf == 0 || buf >= m_memory.size() || !m_memory[buf]->data) { return NULL; } return m_memory[buf]; } size_t Memory::getMaxAllocSize() { return m_maxBufferSize; } unsigned Memory::getNextBuffer() { if (m_freeBuffers.empty()) { return m_memory.size(); } else { unsigned b = m_freeBuffers.front(); m_freeBuffers.pop(); return b; } } void* Memory::getPointer(size_t address) const { size_t buffer = extractBuffer(address); // Bounds check if (!isAddressValid(address)) { return NULL; } return m_memory[buffer]->data + extractOffset(address); } size_t Memory::getTotalAllocated() const { return m_totalAllocated; } bool Memory::isAddressValid(size_t address, size_t size) const { size_t buffer = extractBuffer(address); size_t offset = extractOffset(address); if (buffer == 0 || buffer >= m_memory.size() || !m_memory[buffer] || offset + size > m_memory[buffer]->size) { return false; } return true; } bool Memory::load(unsigned char* dest, size_t address, size_t size) const { m_context->notifyMemoryLoad(this, address, size); // Bounds check if (!isAddressValid(address, size)) { return false; } // Get buffer size_t offset = extractOffset(address); Buffer* src = m_memory[extractBuffer(address)]; // Load data memcpy(dest, src->data + offset, size); return true; } void* Memory::mapBuffer(size_t address, size_t offset, size_t size) { size_t buffer = extractBuffer(address); // Bounds check if (!isAddressValid(address, size)) { return NULL; } return m_memory[buffer]->data + offset + extractOffset(address); } bool Memory::store(const unsigned char* source, size_t address, size_t size) { m_context->notifyMemoryStore(this, address, size, source); // Bounds check if (!isAddressValid(address, size)) { return false; } // Get buffer size_t offset = extractOffset(address); Buffer* dst = m_memory[extractBuffer(address)]; // Store data memcpy(dst->data + offset, source, size); return true; } Oclgrind-21.10/src/core/Memory.h000066400000000000000000000036221413315665100164260ustar00rootroot00000000000000// Memory.h (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" namespace oclgrind { class Context; class Memory { public: struct Buffer { size_t size; cl_mem_flags flags; unsigned char* data; }; public: Memory(unsigned addrSpace, unsigned bufferBits, const Context* context); virtual ~Memory(); size_t allocateBuffer(size_t size, cl_mem_flags flags = 0, const uint8_t* initData = NULL); template T atomic(AtomicOp op, size_t address, T value = 0); template T atomicCmpxchg(size_t address, T cmp, T value); void clear(); size_t createHostBuffer(size_t size, void* ptr, cl_mem_flags flags = 0); bool copy(size_t dest, size_t src, size_t size); void deallocateBuffer(size_t address); void dump() const; unsigned int getAddressSpace() const; const Buffer* getBuffer(size_t address) const; void* getPointer(size_t address) const; size_t getTotalAllocated() const; bool isAddressValid(size_t address, size_t size = 1) const; bool load(unsigned char* dst, size_t address, size_t size = 1) const; void* mapBuffer(size_t address, size_t offset, size_t size); bool store(const unsigned char* source, size_t address, size_t size = 1); size_t extractBuffer(size_t address) const; size_t extractOffset(size_t address) const; size_t getMaxAllocSize(); private: const Context* m_context; std::queue m_freeBuffers; std::vector m_memory; unsigned int m_addressSpace; size_t m_totalAllocated; unsigned m_numBitsBuffer; unsigned m_numBitsAddress; size_t m_maxNumBuffers; size_t m_maxBufferSize; unsigned getNextBuffer(); }; } // namespace oclgrind Oclgrind-21.10/src/core/Plugin.cpp000066400000000000000000000007431413315665100167500ustar00rootroot00000000000000// Plugin.cpp (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "Plugin.h" using namespace oclgrind; Plugin::Plugin(const Context* context) : m_context(context) {} Plugin::~Plugin() {} bool Plugin::isThreadSafe() const { return true; } Oclgrind-21.10/src/core/Plugin.h000066400000000000000000000060601413315665100164130ustar00rootroot00000000000000// Plugin.h (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #pragma once #include "common.h" namespace oclgrind { class Context; class Kernel; class KernelInvocation; class Memory; class WorkGroup; class WorkItem; class Plugin { public: Plugin(const Context* context); virtual ~Plugin(); virtual void hostMemoryLoad(const Memory* memory, size_t address, size_t size) { } virtual void hostMemoryStore(const Memory* memory, size_t address, size_t size, const uint8_t* storeData) { } virtual void instructionExecuted(const WorkItem* workItem, const llvm::Instruction* instruction, const TypedValue& result) { } virtual void kernelBegin(const KernelInvocation* kernelInvocation) {} virtual void kernelEnd(const KernelInvocation* kernelInvocation) {} virtual void log(MessageType type, const char* message) {} virtual void memoryAllocated(const Memory* memory, size_t address, size_t size, cl_mem_flags flags, const uint8_t* initData) { } virtual void memoryAtomicLoad(const Memory* memory, const WorkItem* workItem, AtomicOp op, size_t address, size_t size) { } virtual void memoryAtomicStore(const Memory* memory, const WorkItem* workItem, AtomicOp op, size_t address, size_t size) { } virtual void memoryDeallocated(const Memory* memory, size_t address) {} virtual void memoryLoad(const Memory* memory, const WorkItem* workItem, size_t address, size_t size) { } virtual void memoryLoad(const Memory* memory, const WorkGroup* workGroup, size_t address, size_t size) { } virtual void memoryMap(const Memory* memory, size_t address, size_t offset, size_t size, cl_map_flags flags) { } virtual void memoryStore(const Memory* memory, const WorkItem* workItem, size_t address, size_t size, const uint8_t* storeData) { } virtual void memoryStore(const Memory* memory, const WorkGroup* workGroup, size_t address, size_t size, const uint8_t* storeData) { } virtual void memoryUnmap(const Memory* memory, size_t address, const void* ptr) { } virtual void workGroupBarrier(const WorkGroup* workGroup, uint32_t flags) {} virtual void workGroupBegin(const WorkGroup* workGroup) {} virtual void workGroupComplete(const WorkGroup* workGroup) {} virtual void workItemBegin(const WorkItem* workItem) {} virtual void workItemComplete(const WorkItem* workItem) {} virtual bool isThreadSafe() const; protected: const Context* m_context; }; } // namespace oclgrind Oclgrind-21.10/src/core/Program.cpp000066400000000000000000001000751413315665100171200ustar00rootroot00000000000000// Program.cpp (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" #include "config.h" #include #include "clang/CodeGen/CodeGenAction.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/TextDiagnosticPrinter.h" #include "clang/Lex/PreprocessorOptions.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/IR/AssemblyAnnotationWriter.h" #include "llvm/IR/Constants.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include "llvm/Linker/Linker.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Utils/Cloning.h" #if defined(_WIN32) && !defined(__MINGW32__) #include #else #include #endif #include "Context.h" #include "Kernel.h" #include "Memory.h" #include "Program.h" #include "WorkItem.h" #define ENV_DUMP_SPIR "OCLGRIND_DUMP_SPIR" #define CL_DUMP_NAME "/tmp/oclgrind_%lX.cl" #define IR_DUMP_NAME "/tmp/oclgrind_%lX.s" #define BC_DUMP_NAME "/tmp/oclgrind_%lX.bc" #if defined(_WIN32) #define REMAP_DIR "Z:/remapped/" #else #define REMAP_DIR "/remapped/" #endif #define REMAP_INPUT "input.cl" #define OPENCL_C_H_PATH REMAP_DIR "opencl-c.h" extern const char OPENCL_C_H_DATA[]; const char* EXTENSIONS[] = { "cl_khr_fp64", "cl_khr_3d_image_writes", "cl_khr_global_int32_base_atomics", "cl_khr_global_int32_extended_atomics", "cl_khr_local_int32_base_atomics", "cl_khr_local_int32_extended_atomics", "cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics", "cl_khr_byte_addressable_store", }; #define OCLGRIND_BINARY_TYPE "oclgrind_binary_type" using namespace oclgrind; using namespace std; namespace { void setBinaryType(llvm::Module& mod, cl_program_binary_type type) { llvm::LLVMContext& ctx = mod.getContext(); llvm::MDNode* binaryTypeMD = llvm::MDNode::get( ctx, llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( ctx, llvm::APInt(sizeof(cl_program_binary_type), type)))); llvm::NamedMDNode* md = mod.getOrInsertNamedMetadata(OCLGRIND_BINARY_TYPE); md->clearOperands(); md->addOperand(binaryTypeMD); } } // namespace Program::Program(const Context* context, llvm::Module* module) : m_module(module), m_context(context) { m_buildLog = ""; m_buildOptions = ""; m_buildStatus = CL_BUILD_SUCCESS; m_uid = generateUID(); m_totalProgramScopeVarSize = 0; allocateProgramScopeVars(); m_binaryType = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; // Override binary based on module flag if present. llvm::NamedMDNode* md = m_module->getNamedMetadata(OCLGRIND_BINARY_TYPE); if (md && md->getNumOperands() > 0) { llvm::MDNode* node = llvm::dyn_cast(md->getOperand(0)); if (node && node->getNumOperands() > 0) { llvm::ConstantAsMetadata* cam = llvm::dyn_cast(node->getOperand(0).get()); if (cam) { llvm::ConstantInt* value = llvm::dyn_cast(cam->getValue()); if (value) { m_binaryType = static_cast( value->getValue().getZExtValue()); } } } } } Program::Program(const Context* context, const string& source) : m_context(context) { m_source = source; m_buildLog = ""; m_buildOptions = ""; m_buildStatus = CL_BUILD_NONE; m_uid = 0; m_totalProgramScopeVarSize = 0; // Split source into individual lines m_sourceLines.clear(); if (!source.empty()) { std::stringstream ss(source); std::string line; while (std::getline(ss, line, '\n')) { m_sourceLines.push_back(line); } } } Program::~Program() { clearInterpreterCache(); deallocateProgramScopeVars(); } void Program::allocateProgramScopeVars() { deallocateProgramScopeVars(); Memory* globalMemory = m_context->getGlobalMemory(); // Create the pointer values for each global variable llvm::Module::const_global_iterator itr; for (itr = m_module->global_begin(); itr != m_module->global_end(); itr++) { unsigned addrspace = itr->getType()->getPointerAddressSpace(); if (addrspace != AddrSpaceGlobal && addrspace != AddrSpaceConstant) continue; // Allocate global variable const llvm::Type* type = itr->getType()->getPointerElementType(); size_t size = getTypeSize(type); size_t ptr = globalMemory->allocateBuffer(size); m_totalProgramScopeVarSize += size; // Create pointer value TypedValue ptrValue = {sizeof(size_t), 1, new uint8_t[sizeof(size_t)]}; ptrValue.setPointer(ptr); m_programScopeVars[&*itr] = ptrValue; } try { // Initialize global variables for (auto itr = m_programScopeVars.begin(); itr != m_programScopeVars.end(); itr++) { auto var = llvm::cast(itr->first); const llvm::Constant* initializer = var->getInitializer(); if (!initializer) continue; size_t varptr = itr->second.getPointer(); if (initializer->getType()->getTypeID() == llvm::Type::PointerTyID) { size_t ptr = resolveConstantPointer(initializer, m_programScopeVars); globalMemory->store((uint8_t*)&ptr, varptr, sizeof(size_t)); } else { size_t size = getTypeSize(initializer->getType()); uint8_t* data = new uint8_t[size]; getConstantData((uint8_t*)data, (const llvm::Constant*)initializer); globalMemory->store(data, varptr, size); delete[] data; } } } catch (FatalError& err) { cerr << endl << "OCLGRIND FATAL ERROR " << "(" << err.getFile() << ":" << err.getLine() << ")" << endl << err.what() << endl << "When initializing program scope global variables" << endl; } } // Utility to split a string up to the next unquoted space // After this returns, input will point to the start of the next string (no // leading spaces), and next will point to where the next string will start. // Modifies the content of input in place. void split_token(char* input, char** next) { char* output = input; // Strip leading spaces while (*input == ' ') input++; // Loop until end of string bool quoted = false; while (*input != '\0') { // Stop at space, unless we're in quotes if (*input == ' ' && !quoted) break; if (*input == '"') { // Enter/exit quoted region, don't emit quote quoted = !quoted; } else { // Check for escaped space if (*input == '\\' && *(input + 1) == ' ') input++; // Copy character to output string *output = *input; output++; } input++; } // Set *next to start of next potential string *next = input; if (**next != '\0') (*next)++; // Split token with null terminator *output = '\0'; } bool Program::build(BuildType buildType, const char* options, list
headers) { m_buildStatus = CL_BUILD_IN_PROGRESS; m_buildOptions = options ? options : ""; // Create build log m_buildLog = ""; llvm::raw_string_ostream buildLog(m_buildLog); // Do nothing if program was created with binary if (m_source.empty() && m_module) { m_buildStatus = CL_BUILD_SUCCESS; allocateProgramScopeVars(); return true; } if (m_module) { clearInterpreterCache(); m_module.reset(); } m_binaryType = CL_PROGRAM_BINARY_TYPE_NONE; // Assign a new UID to this program m_uid = generateUID(); // Set compiler arguments vector args; args.push_back("-cl-kernel-arg-info"); args.push_back("-D__IMAGE_SUPPORT__=1"); args.push_back("-D__OPENCL_VERSION__=120"); args.push_back("-fno-builtin"); args.push_back("-fgnu89-inline"); args.push_back("-debug-info-kind=standalone"); args.push_back("-triple"); if (sizeof(size_t) == 4) args.push_back("spir-unknown-unknown"); else args.push_back("spir64-unknown-unknown"); #if !IS_BIG_ENDIAN args.push_back("-D__ENDIAN_LITTLE__=1"); #endif // Disable all extensions std::string cl_ext("-cl-ext=-all"); // Explicitly enable supported extensions for (unsigned i = 0; i < sizeof(EXTENSIONS) / sizeof(const char*); i++) { cl_ext += ",+" + std::string(EXTENSIONS[i]); } args.push_back(cl_ext.c_str()); bool defaultOptimization = true; const char* clstd = NULL; // Add OpenCL build options const char* mainOptions = options; const char* extraOptions = getenv("OCLGRIND_BUILD_OPTIONS"); if (!mainOptions) mainOptions = ""; if (!extraOptions) extraOptions = ""; char* tmpOptions = new char[strlen(mainOptions) + strlen(extraOptions) + 2]; sprintf(tmpOptions, "%s %s", mainOptions, extraOptions); char* opt = tmpOptions; char* next = NULL; while (strlen(opt) > 0) { // Split token up to next unquoted space if (next) opt = next; split_token(opt, &next); if (!strlen(opt)) break; // Ignore options that break PCH if (strcmp(opt, "-cl-fast-relaxed-math") != 0 && strcmp(opt, "-cl-finite-math-only") != 0 && strcmp(opt, "-cl-single-precision-constant") && strcmp(opt, "-cl-unsafe-math-optimizations") != 0) { // Check for optimization flags if (strncmp(opt, "-O", 2) == 0 || strcmp(opt, "-cl-opt-disable") == 0) { defaultOptimization = false; } // Clang no longer supports -cl-no-signed-zeros if (strcmp(opt, "-cl-no-signed-zeros") == 0) continue; // Handle -cl-denorms-are-zero if (strcmp(opt, "-cl-denorms-are-zero") == 0) { args.push_back("-fdenormal-fp-math=preserve-sign"); continue; } // Check for -cl-std flag if (strncmp(opt, "-cl-std=", 8) == 0) { clstd = opt; continue; } args.push_back(opt); } } if (defaultOptimization) { // Disable optimizations by default if in interactive mode if (checkEnv("OCLGRIND_INTERACTIVE")) args.push_back("-O0"); // Otherwise, default to optimizing for size else args.push_back("-Oz"); } if (!clstd) { clstd = "-cl-std=CL1.2"; } args.push_back(clstd); // Pre-compiled header char* pchdir = NULL; char* pch = NULL; if (!checkEnv("OCLGRIND_DISABLE_PCH") && (!strcmp(clstd, "-cl-std=CL1.2") || !strcmp(clstd, "-cl-std=CL2.0"))) { const char* pchdirOverride = getenv("OCLGRIND_PCH_DIR"); if (pchdirOverride) { pchdir = strdup(pchdirOverride); } else { // Get directory containing library #if defined(_WIN32) && !defined(__MINGW32__) char libpath[4096]; HMODULE dll; if (GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, (LPCSTR)&Program::createFromBitcode, &dll) && GetModuleFileNameA(dll, libpath, sizeof(libpath))) { #else Dl_info dlinfo; if (dladdr((const void*)Program::createFromBitcode, &dlinfo)) { const char* libpath = dlinfo.dli_fname; #endif // Construct path to PCH directory const char* dirend; #if defined(_WIN32) && !defined(__MINGW32__) if ((dirend = strrchr(libpath, '\\'))) #else if ((dirend = strrchr(libpath, '/'))) #endif { const char* includes_relative = "/../include/oclgrind/"; size_t length = dirend - libpath; pchdir = new char[length + strlen(includes_relative) + 1]; strncpy(pchdir, libpath, length); strcpy(pchdir + length, includes_relative); } } } if (pchdir) { // Select precompiled header pch = new char[strlen(pchdir) + 24]; sprintf(pch, "%s/opencl-c-%s-%d.pch", pchdir, clstd + 10, (sizeof(size_t) == 4 ? 32 : 64)); // Check if precompiled header exists ifstream pchfile(pch); if (!pchfile.good()) { buildLog << "WARNING: Unable to find precompiled header:\n" << pch << "\n"; delete[] pch; pch = NULL; } pchfile.close(); } else { buildLog << "WARNING: Unable to determine precompiled header path\n"; } } if (pch) { args.push_back("-isysroot"); args.push_back(pchdir); args.push_back("-include-pch"); args.push_back(pch); args.push_back("-fno-validate-pch"); } else { // Fall back to embedded opencl-c.h args.push_back("-include"); args.push_back(OPENCL_C_H_PATH); } // Append input file to arguments (remapped later) args.push_back(REMAP_INPUT); // Create diagnostics engine clang::DiagnosticOptions* diagOpts = new clang::DiagnosticOptions(); llvm::IntrusiveRefCntPtr diagID( new clang::DiagnosticIDs()); clang::TextDiagnosticPrinter* diagConsumer = new clang::TextDiagnosticPrinter(buildLog, diagOpts); clang::DiagnosticsEngine diags(diagID, diagOpts, diagConsumer); // Create compiler instance clang::CompilerInstance compiler; compiler.createDiagnostics(diagConsumer, false); // Create compiler invocation std::shared_ptr invocation( new clang::CompilerInvocation); clang::CompilerInvocation::CreateFromArgs(*invocation, args, compiler.getDiagnostics()); compiler.setInvocation(invocation); // Remap include files std::unique_ptr buffer; compiler.getHeaderSearchOpts().AddPath(REMAP_DIR, clang::frontend::Quoted, false, true); list
::iterator itr; for (itr = headers.begin(); itr != headers.end(); itr++) { buffer = llvm::MemoryBuffer::getMemBuffer(itr->second->m_source, "", false); compiler.getPreprocessorOpts().addRemappedFile(REMAP_DIR + itr->first, buffer.release()); } // Remap opencl-c.h buffer = llvm::MemoryBuffer::getMemBuffer(OPENCL_C_H_DATA, "", false); compiler.getPreprocessorOpts().addRemappedFile(OPENCL_C_H_PATH, buffer.release()); // Remap input file buffer = llvm::MemoryBuffer::getMemBuffer(m_source, "", false); compiler.getPreprocessorOpts().addRemappedFile(REMAP_INPUT, buffer.release()); // Compile clang::EmitLLVMOnlyAction action(m_context->getLLVMContext()); if (compiler.ExecuteAction(action)) { // Retrieve module m_module = action.takeModule(); // Strip debug intrinsics if not in interactive mode if (!checkEnv("OCLGRIND_INTERACTIVE")) { stripDebugIntrinsics(); } removeLValueLoads(); allocateProgramScopeVars(); m_buildStatus = CL_BUILD_SUCCESS; if (buildType == BUILD) { m_binaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE; } else { m_binaryType = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; } setBinaryType(*m_module, m_binaryType); } else { m_buildStatus = CL_BUILD_ERROR; } // Dump temps if required if (checkEnv(ENV_DUMP_SPIR)) { // Temporary directory #if defined(_WIN32) const char* tmpdir = getenv("TEMP"); #else const char* tmpdir = "/tmp"; #endif // Construct unique output filenames size_t sz = snprintf(NULL, 0, "%s/oclgrind_%lX.XX", tmpdir, m_uid) + 1; char* tempCL = new char[sz]; char* tempIR = new char[sz]; char* tempBC = new char[sz]; sprintf(tempCL, "%s/oclgrind_%lX.cl", tmpdir, m_uid); sprintf(tempIR, "%s/oclgrind_%lX.ll", tmpdir, m_uid); sprintf(tempBC, "%s/oclgrind_%lX.bc", tmpdir, m_uid); // Dump source ofstream cl; cl.open(tempCL); cl << m_source; cl.close(); if (m_buildStatus == CL_BUILD_SUCCESS) { // Dump IR std::error_code err; llvm::raw_fd_ostream ir(tempIR, err, llvm::sys::fs::OF_None); llvm::AssemblyAnnotationWriter asmWriter; m_module->print(ir, &asmWriter); ir.close(); // Dump bitcode llvm::raw_fd_ostream bc(tempBC, err, llvm::sys::fs::OF_None); llvm::WriteBitcodeToFile(*m_module, bc); bc.close(); } delete[] tempCL; delete[] tempIR; delete[] tempBC; } delete[] tmpOptions; delete[] pchdir; delete[] pch; return m_buildStatus == CL_BUILD_SUCCESS; } void Program::clearInterpreterCache() { InterpreterCacheMap::iterator itr; for (itr = m_interpreterCache.begin(); itr != m_interpreterCache.end(); itr++) { delete itr->second; } m_interpreterCache.clear(); } Program* Program::createFromBitcode(const Context* context, const unsigned char* bitcode, size_t length) { // Load bitcode from file llvm::StringRef data((const char*)bitcode, length); unique_ptr buffer = llvm::MemoryBuffer::getMemBuffer(data, "", false); if (!buffer) { return NULL; } // Parse bitcode into IR module llvm::Expected> module = parseBitcodeFile(buffer->getMemBufferRef(), *context->getLLVMContext()); if (!module) { return NULL; } return new Program(context, module.get().release()); } Program* Program::createFromBitcodeFile(const Context* context, const string filename) { // Load bitcode from file llvm::ErrorOr> buffer = llvm::MemoryBuffer::getFile(filename); if (!buffer) { return NULL; } // Parse bitcode into IR module llvm::Expected> module = parseBitcodeFile( buffer->get()->getMemBufferRef(), *context->getLLVMContext()); if (!module) { return NULL; } return new Program(context, module.get().release()); } Program* Program::createFromPrograms(const Context* context, list programs, const char* options) { llvm::Module* module = new llvm::Module("oclgrind_linked", *context->getLLVMContext()); llvm::Linker linker(*module); // Link modules list::iterator itr; for (itr = programs.begin(); itr != programs.end(); itr++) { unique_ptr m = llvm::CloneModule(*(*itr)->m_module); if (linker.linkInModule(std::move(m))) { return NULL; } } // Set program binary type cl_program_binary_type binaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE; if (options && strstr(options, "-create-library")) { binaryType = CL_PROGRAM_BINARY_TYPE_LIBRARY; } setBinaryType(*module, binaryType); return new Program(context, module); } Kernel* Program::createKernel(const string name) { if (!m_module) return NULL; // Iterate over functions in module to find kernel llvm::Function* function = NULL; for (auto F = m_module->begin(); F != m_module->end(); F++) { if (F->getCallingConv() == llvm::CallingConv::SPIR_KERNEL && F->getName() == name) { function = &*F; break; } } if (function == NULL) { return NULL; } try { // Create cache if none already InterpreterCacheMap::iterator itr = m_interpreterCache.find(function); if (itr == m_interpreterCache.end()) { m_interpreterCache[function] = new InterpreterCache(function); } return new Kernel(this, function, m_module.get()); } catch (FatalError& err) { cerr << endl << "OCLGRIND FATAL ERROR " << "(" << err.getFile() << ":" << err.getLine() << ")" << endl << err.what() << endl << "When creating kernel '" << name << "'" << endl; return NULL; } } void Program::deallocateProgramScopeVars() { for (auto psv = m_programScopeVars.begin(); psv != m_programScopeVars.end(); psv++) { m_context->getGlobalMemory()->deallocateBuffer(psv->second.getPointer()); delete[] psv->second.data; } m_programScopeVars.clear(); m_totalProgramScopeVarSize = 0; } void Program::getBinary(unsigned char* binary) const { if (!m_module) return; std::string str; llvm::raw_string_ostream stream(str); llvm::WriteBitcodeToFile(*m_module, stream); stream.str(); memcpy(binary, str.c_str(), str.length()); } size_t Program::getBinarySize() const { if (!m_module) { return 0; } std::string str; llvm::raw_string_ostream stream(str); llvm::WriteBitcodeToFile(*m_module, stream); stream.str(); return str.length(); } cl_program_binary_type Program::getBinaryType() const { return m_binaryType; } const string& Program::getBuildLog() const { return m_buildLog; } const string& Program::getBuildOptions() const { return m_buildOptions; } unsigned int Program::getBuildStatus() const { return m_buildStatus; } const Context* Program::getContext() const { return m_context; } unsigned long Program::generateUID() const { srand(now()); return rand(); } const InterpreterCache* Program::getInterpreterCache(const llvm::Function* kernel) const { return m_interpreterCache[kernel]; } list Program::getKernelNames() const { list names; for (auto F = m_module->begin(); F != m_module->end(); F++) { if (F->getCallingConv() == llvm::CallingConv::SPIR_KERNEL) { names.push_back(F->getName().str()); } } return names; } llvm::LLVMContext& Program::getLLVMContext() const { return m_module->getContext(); } unsigned int Program::getNumKernels() const { assert(m_module); unsigned int num = 0; for (auto F = m_module->begin(); F != m_module->end(); F++) { if (F->getCallingConv() == llvm::CallingConv::SPIR_KERNEL) { num++; } } return num; } const TypedValue& Program::getProgramScopeVar(const llvm::Value* variable) const { return m_programScopeVars.at(variable); } const string& Program::getSource() const { return m_source; } const char* Program::getSourceLine(size_t lineNumber) const { if (!lineNumber || (lineNumber - 1) >= m_sourceLines.size()) return NULL; return m_sourceLines[lineNumber - 1].c_str(); } size_t Program::getNumSourceLines() const { return m_sourceLines.size(); } size_t Program::getTotalProgramScopeVarSize() const { return m_totalProgramScopeVarSize; } unsigned long Program::getUID() const { return m_uid; } void Program::pruneDeadCode(llvm::Instruction* instruction) { // Remove instructions that have no uses if (instruction->getNumUses() == 0) { // Get list of operands set operands; { llvm::Instruction::op_iterator op; for (op = instruction->op_begin(); op != instruction->op_end(); op++) { operands.insert(*op); } } // Remove instruction instruction->eraseFromParent(); // Prune operands set::iterator op; for (op = operands.begin(); op != operands.end(); op++) { if (auto inst = llvm::dyn_cast(*op)) pruneDeadCode(inst); } } } void Program::removeLValueLoads() { // Get list of aggregate store instructions set aggStores; for (llvm::Module::iterator F = m_module->begin(); F != m_module->end(); F++) { llvm::Function* f = &*F; for (llvm::inst_iterator I = inst_begin(f), E = inst_end(f); I != E; I++) { if (auto store = llvm::dyn_cast(&*I)) aggStores.insert(store); } } // Replace aggregate modify-write sequences with direct scalar writes set::iterator itr; for (itr = aggStores.begin(); itr != aggStores.end(); itr++) { scalarizeAggregateStore(*itr); } } void Program::scalarizeAggregateStore(llvm::StoreInst* store) { llvm::IntegerType* gepIndexType = (sizeof(size_t) == 8) ? llvm::Type::getInt64Ty(m_module.get()->getContext()) : llvm::Type::getInt32Ty(m_module.get()->getContext()); llvm::Value* storeValue = store->getValueOperand(); llvm::Value* vectorPtr = store->getPointerOperand(); if (auto insert = llvm::dyn_cast(storeValue)) { llvm::Value* vector = insert->getOperand(0); llvm::Value* value = insert->getOperand(1); llvm::Value* index = insert->getOperand(2); // Create GEP for scalar value llvm::GetElementPtrInst* scalarPtr = NULL; if (auto gep = llvm::dyn_cast(vectorPtr)) { // Create GEP from existing GEP std::vector indices; for (auto idx = gep->idx_begin(); idx != gep->idx_end(); idx++) { indices.push_back(*idx); } indices.push_back(index); scalarPtr = llvm::GetElementPtrInst::Create( gep->getPointerOperandType()->getPointerElementType(), gep->getPointerOperand(), indices); } else { // Create GEP from non-GEP pointer std::vector indices; indices.push_back(llvm::ConstantInt::getSigned(gepIndexType, 0)); indices.push_back(index); scalarPtr = llvm::GetElementPtrInst::Create( vectorPtr->getType()->getPointerElementType(), vectorPtr, indices); } scalarPtr->setDebugLoc(store->getDebugLoc()); scalarPtr->insertAfter(store); // Create direct scalar store llvm::StoreInst* scalarStore = new llvm::StoreInst(value, scalarPtr, store->isVolatile(), llvm::Align(getTypeAlignment(value->getType()))); scalarStore->setDebugLoc(store->getDebugLoc()); scalarStore->insertAfter(scalarPtr); // Check if the input to the insertelement instruction came from something // other than a load to the same address as the store llvm::LoadInst* load = llvm::dyn_cast(vector); if (!(load && load->getPointerOperand() == store->getPointerOperand())) { // Replace value in store with the input to the insertelement instruction llvm::StoreInst* _store = new llvm::StoreInst( vector, store->getPointerOperand(), store->isVolatile(), llvm::Align(store->getAlignment())); _store->setDebugLoc(store->getDebugLoc()); _store->insertAfter(store); // Repeat process with new store if (_store) scalarizeAggregateStore(_store); } // Remove vector store and any dead code store->eraseFromParent(); pruneDeadCode(insert); } else if (auto shuffle = llvm::dyn_cast(storeValue)) { llvm::Value* v1 = shuffle->getOperand(0); llvm::Value* v2 = shuffle->getOperand(1); unsigned maskSize = shuffle->getShuffleMask().size(); unsigned v1num = llvm::cast(v1->getType())->getNumElements(); // Check if shuffle sources came from a load with same address as the store llvm::LoadInst* load; bool v1SourceIsDest = false, v2SourceIsDest = false; if ((load = llvm::dyn_cast(v1)) && load->getPointerOperand() == vectorPtr) v1SourceIsDest = true; if ((load = llvm::dyn_cast(v2)) && load->getPointerOperand() == vectorPtr) v2SourceIsDest = true; // Get mask indices that don't correspond to the destination vector stack indices; for (unsigned i = 0; i < maskSize; i++) { int idx = shuffle->getMaskValue(i); // Skip undef indices if (idx == -1) continue; // Check if source is the store destination bool sourceIsDest = ((unsigned)idx < v1num ? v1SourceIsDest : v2SourceIsDest); // If destination is used in non-identity position, leave shuffle as is if (sourceIsDest && (unsigned)idx != i) return; // Add non-destination index if (!sourceIsDest) indices.push(i); } // Check if destination is actually used as a source in the mask if (indices.size() == maskSize) { // Check for any unused loads with the same address as the store // These would usually be caught by DCE, but if optimisations are // disabled we need to prune these manually list lvalueloads; for (auto user = vectorPtr->user_begin(); user != vectorPtr->user_end(); user++) { if (auto load = llvm::dyn_cast(*user)) { if (load->getNumUses() == 0) lvalueloads.push_back(load); } } for (auto load = lvalueloads.begin(); load != lvalueloads.end(); load++) { (*load)->eraseFromParent(); } return; } // Create a scalar store for each shuffle index while (!indices.empty()) { unsigned index = indices.top(); indices.pop(); // Create GEP for scalar value llvm::GetElementPtrInst* scalarPtr = NULL; if (auto gep = llvm::dyn_cast(vectorPtr)) { // Create GEP from existing GEP std::vector gepIndices; for (auto idx = gep->idx_begin(); idx != gep->idx_end(); idx++) { gepIndices.push_back(*idx); } gepIndices.push_back(llvm::ConstantInt::getSigned(gepIndexType, index)); scalarPtr = llvm::GetElementPtrInst::Create( gep->getPointerOperandType()->getPointerElementType(), gep->getPointerOperand(), gepIndices); } else { // Create GEP from non-GEP pointer std::vector gepIndices; gepIndices.push_back(llvm::ConstantInt::getSigned(gepIndexType, 0)); gepIndices.push_back(llvm::ConstantInt::getSigned(gepIndexType, index)); scalarPtr = llvm::GetElementPtrInst::Create( vectorPtr->getType()->getPointerElementType(), vectorPtr, gepIndices); } scalarPtr->setDebugLoc(store->getDebugLoc()); scalarPtr->insertAfter(store); // Get source vector and index unsigned idx = shuffle->getMaskValue(index); llvm::Value* src = v1; if (idx >= v1num) { idx -= v1num; src = v2; } // Create direct scalar store if (auto cnst = llvm::dyn_cast(src)) { // If source is a constant, extract scalar constant src = cnst->getAggregateElement(idx); llvm::StoreInst* scalarStore = new llvm::StoreInst(src, scalarPtr, store->isVolatile(), llvm::Align(getTypeAlignment(src->getType()))); scalarStore->setDebugLoc(store->getDebugLoc()); scalarStore->insertAfter(scalarPtr); } else { // If extracting from a shuffle, trace back to last non-shuffle while (auto shfl = llvm::dyn_cast(src)) { llvm::Value* v1 = shfl->getOperand(0); llvm::Value* v2 = shfl->getOperand(1); unsigned v1num = llvm::cast(v1->getType())->getNumElements(); // Get source vector and index idx = shfl->getMaskValue(idx); src = v1; if (idx >= v1num) { idx -= v1num; src = v2; } } llvm::ExtractElementInst* extract = llvm::ExtractElementInst::Create( src, llvm::ConstantInt::getSigned(gepIndexType, idx)); extract->setDebugLoc(shuffle->getDebugLoc()); extract->insertAfter(scalarPtr); llvm::StoreInst* scalarStore = new llvm::StoreInst( extract, scalarPtr, store->isVolatile(), llvm::Align(getTypeAlignment(extract->getType()))); scalarStore->setDebugLoc(store->getDebugLoc()); scalarStore->insertAfter(extract); } } // Prune old store and dead any code store->eraseFromParent(); pruneDeadCode(shuffle); } } void Program::stripDebugIntrinsics() { // Get list of llvm.dbg intrinsics set intrinsics; for (llvm::Module::iterator F = m_module->begin(); F != m_module->end(); F++) { llvm::Function* f = &*F; for (llvm::inst_iterator I = inst_begin(f), E = inst_end(f); I != E; I++) { if (I->getOpcode() == llvm::Instruction::Call) { llvm::CallInst* call = (llvm::CallInst*)&*I; llvm::Function* function = (llvm::Function*)call->getCalledFunction()->stripPointerCasts(); if (function->getName().startswith("llvm.dbg")) { intrinsics.insert(&*I); } } } } // Remove instructions set::iterator itr; for (itr = intrinsics.begin(); itr != intrinsics.end(); itr++) { (*itr)->eraseFromParent(); } } Oclgrind-21.10/src/core/Program.h000066400000000000000000000057641413315665100165760ustar00rootroot00000000000000// Program.h (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" namespace llvm { class Function; class LLVMContext; class Module; class StoreInst; } // namespace llvm namespace oclgrind { class Context; class InterpreterCache; class Kernel; class Program { public: typedef std::pair Header; enum BuildType { BUILD, COMPILE, }; public: Program(const Context* context, const std::string& source); virtual ~Program(); static Program* createFromBitcode(const Context* context, const unsigned char* bitcode, size_t length); static Program* createFromBitcodeFile(const Context* context, const std::string filename); static Program* createFromPrograms(const Context* context, std::list, const char* options); bool build(BuildType buildType, const char* options, std::list
headers = std::list
()); Kernel* createKernel(const std::string name); const std::string& getBuildLog() const; const std::string& getBuildOptions() const; void getBinary(unsigned char* binary) const; size_t getBinarySize() const; cl_program_binary_type getBinaryType() const; unsigned int getBuildStatus() const; const Context* getContext() const; const InterpreterCache* getInterpreterCache(const llvm::Function* kernel) const; std::list getKernelNames() const; llvm::LLVMContext& getLLVMContext() const; unsigned int getNumKernels() const; const std::string& getSource() const; const char* getSourceLine(size_t lineNumber) const; size_t getNumSourceLines() const; const TypedValue& getProgramScopeVar(const llvm::Value* var) const; size_t getTotalProgramScopeVarSize() const; unsigned long getUID() const; private: Program(const Context* context, llvm::Module* module); std::unique_ptr m_module; std::string m_source; std::string m_buildLog; std::string m_buildOptions; unsigned int m_buildStatus; const Context* m_context; std::vector m_sourceLines; cl_program_binary_type m_binaryType; TypedValueMap m_programScopeVars; size_t m_totalProgramScopeVarSize; unsigned long m_uid; unsigned long generateUID() const; void allocateProgramScopeVars(); void deallocateProgramScopeVars(); void pruneDeadCode(llvm::Instruction*); void removeLValueLoads(); void scalarizeAggregateStore(llvm::StoreInst* store); void stripDebugIntrinsics(); typedef std::map InterpreterCacheMap; mutable InterpreterCacheMap m_interpreterCache; void clearInterpreterCache(); }; } // namespace oclgrind Oclgrind-21.10/src/core/Queue.cpp000066400000000000000000000165571413315665100166100ustar00rootroot00000000000000// Queue.cpp (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" #include #include #include "Context.h" #include "KernelInvocation.h" #include "Memory.h" #include "Queue.h" using namespace oclgrind; using namespace std; Queue::Queue(const Context* context, bool out_of_order) : m_context(context), m_out_of_order(out_of_order) { } Queue::~Queue() {} Event::Event() { state = CL_QUEUED; queueTime = now(); startTime = endTime = 0; } Event* Queue::enqueue(Command* cmd) { Event* event = new Event(); cmd->event = event; event->command = cmd; event->queue = this; m_queue.push_back(cmd); return event; } void Queue::executeCopyBuffer(CopyCommand* cmd) { m_context->getGlobalMemory()->copy(cmd->dst, cmd->src, cmd->size); } void Queue::executeCopyBufferRect(CopyRectCommand* cmd) { // Perform copy Memory* memory = m_context->getGlobalMemory(); for (unsigned z = 0; z < cmd->region[2]; z++) { for (unsigned y = 0; y < cmd->region[1]; y++) { // Compute addresses size_t src = cmd->src + cmd->src_offset[0] + y * cmd->src_offset[1] + z * cmd->src_offset[2]; size_t dst = cmd->dst + cmd->dst_offset[0] + y * cmd->dst_offset[1] + z * cmd->dst_offset[2]; // Copy data memory->copy(dst, src, cmd->region[0]); } } } void Queue::executeFillBuffer(FillBufferCommand* cmd) { Memory* memory = m_context->getGlobalMemory(); for (unsigned i = 0; i < cmd->size / cmd->pattern_size; i++) { memory->store(cmd->pattern, cmd->address + i * cmd->pattern_size, cmd->pattern_size); } } void Queue::executeFillImage(FillImageCommand* cmd) { Memory* memory = m_context->getGlobalMemory(); for (unsigned z = 0; z < cmd->region[2]; z++) { for (unsigned y = 0; y < cmd->region[1]; y++) { for (unsigned x = 0; x < cmd->region[0]; x++) { size_t address = cmd->base + (cmd->origin[0] + x) * cmd->pixelSize + (cmd->origin[1] + y) * cmd->rowPitch + (cmd->origin[2] + z) * cmd->slicePitch; memory->store(cmd->color, address, cmd->pixelSize); } } } } void Queue::executeKernel(KernelCommand* cmd) { // Run kernel KernelInvocation::run(m_context, cmd->kernel, cmd->work_dim, cmd->globalOffset, cmd->globalSize, cmd->localSize); } void Queue::executeMap(MapCommand* cmd) { m_context->notifyMemoryMap(m_context->getGlobalMemory(), cmd->address, cmd->offset, cmd->size, cmd->flags); } void Queue::executeNativeKernel(NativeKernelCommand* cmd) { // Run kernel cmd->func(cmd->args); } void Queue::executeReadBuffer(BufferCommand* cmd) { m_context->getGlobalMemory()->load(cmd->ptr, cmd->address, cmd->size); } void Queue::executeReadBufferRect(BufferRectCommand* cmd) { Memory* memory = m_context->getGlobalMemory(); for (unsigned z = 0; z < cmd->region[2]; z++) { for (unsigned y = 0; y < cmd->region[1]; y++) { unsigned char* host = cmd->ptr + cmd->host_offset[0] + y * cmd->host_offset[1] + z * cmd->host_offset[2]; size_t buff = cmd->address + cmd->buffer_offset[0] + y * cmd->buffer_offset[1] + z * cmd->buffer_offset[2]; memory->load(host, buff, cmd->region[0]); } } } void Queue::executeUnmap(UnmapCommand* cmd) { m_context->notifyMemoryUnmap(m_context->getGlobalMemory(), cmd->address, cmd->ptr); } void Queue::executeWriteBuffer(BufferCommand* cmd) { m_context->getGlobalMemory()->store(cmd->ptr, cmd->address, cmd->size); } void Queue::executeWriteBufferRect(BufferRectCommand* cmd) { // Perform write Memory* memory = m_context->getGlobalMemory(); for (unsigned z = 0; z < cmd->region[2]; z++) { for (unsigned y = 0; y < cmd->region[1]; y++) { const unsigned char* host = cmd->ptr + cmd->host_offset[0] + y * cmd->host_offset[1] + z * cmd->host_offset[2]; size_t buff = cmd->address + cmd->buffer_offset[0] + y * cmd->buffer_offset[1] + z * cmd->buffer_offset[2]; memory->store(host, buff, cmd->region[0]); } } } bool Queue::isEmpty() const { return m_queue.empty(); } void Queue::execute(Command* command, bool flush) { // Find command in queue auto it = std::find(m_queue.begin(), m_queue.end(), command); // If there is a previous (older) command in the queue AND either the queue // is not out of order OR needs to be flushed, then add event associated with // previous (older) command as a dependency if (it != m_queue.begin() && (!m_out_of_order || flush)) { command->waitList.push_back((*std::prev(it))->event); } // Make sure all events in the wait list are complete before executing // current command while (!command->waitList.empty()) { Event* evt = command->waitList.front(); command->waitList.pop_front(); if (evt->state < 0) { command->event->state = evt->state; m_queue.erase(it); return; } else if (evt->state != CL_COMPLETE) { if (evt->command) { // If it's not a user event, execute the associated command evt->queue->execute(evt->command, flush); command->execBefore.push_front(evt->command); } else { // If it's a user event then place it back at the of the wait list, and // check it later command->waitList.push_back(evt); } } } // Dispatch command command->event->startTime = now(); command->event->state = CL_RUNNING; switch (command->type) { case Command::COPY: executeCopyBuffer((CopyCommand*)command); break; case Command::COPY_RECT: executeCopyBufferRect((CopyRectCommand*)command); break; case Command::EMPTY: break; case Command::FILL_BUFFER: executeFillBuffer((FillBufferCommand*)command); break; case Command::FILL_IMAGE: executeFillImage((FillImageCommand*)command); break; case Command::READ: executeReadBuffer((BufferCommand*)command); break; case Command::READ_RECT: executeReadBufferRect((BufferRectCommand*)command); break; case Command::KERNEL: executeKernel((KernelCommand*)command); break; case Command::MAP: executeMap((MapCommand*)command); break; case Command::NATIVE_KERNEL: executeNativeKernel((NativeKernelCommand*)command); break; case Command::UNMAP: executeUnmap((UnmapCommand*)command); break; case Command::WRITE: executeWriteBuffer((BufferCommand*)command); break; case Command::WRITE_RECT: executeWriteBufferRect((BufferRectCommand*)command); break; default: assert(false && "Unhandled command type in queue."); } command->event->endTime = now(); command->event->state = CL_COMPLETE; // Remove command from its queue m_queue.erase(it); } Command* Queue::finish() { if (m_queue.empty()) { return NULL; } // Get most recent command in queue and execute it, triggering the execution // of all previous commands even if it's an out-of-order queue Command* cmd = m_queue.back(); execute(cmd, true); return cmd; } Oclgrind-21.10/src/core/Queue.h000066400000000000000000000103031413315665100162340ustar00rootroot00000000000000// Queue.h (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #pragma once #include "common.h" namespace oclgrind { class Context; class Kernel; class Queue; struct Command; struct Event { int state; double queueTime, startTime, endTime; Command* command; Queue* queue; Event(); }; struct Command { enum CommandType { EMPTY, COPY, COPY_RECT, FILL_BUFFER, FILL_IMAGE, KERNEL, MAP, NATIVE_KERNEL, READ, READ_RECT, UNMAP, WRITE, WRITE_RECT }; CommandType type; std::list waitList; std::list execBefore; Command() { type = EMPTY; } virtual ~Command() {} private: Event* event; friend class Queue; }; struct BufferCommand : Command { unsigned char* ptr; size_t address, size; BufferCommand(CommandType t) { type = t; } }; struct BufferRectCommand : Command { unsigned char* ptr; size_t address; size_t region[3]; size_t host_offset[3]; size_t buffer_offset[3]; BufferRectCommand(CommandType t) { type = t; } }; struct CopyCommand : Command { size_t src, dst, size; CopyCommand() { type = COPY; } }; struct CopyRectCommand : Command { size_t src, dst; size_t region[3]; size_t src_offset[3]; size_t dst_offset[3]; CopyRectCommand() { type = COPY_RECT; } }; struct FillBufferCommand : Command { size_t address, size; size_t pattern_size; unsigned char* pattern; FillBufferCommand(const unsigned char* p, size_t sz) { type = FILL_BUFFER; pattern = new unsigned char[sz]; pattern_size = sz; memcpy(pattern, p, sz); } ~FillBufferCommand() { delete[] pattern; } }; struct FillImageCommand : Command { size_t base; size_t origin[3], region[3]; size_t rowPitch, slicePitch; size_t pixelSize; unsigned char* color; FillImageCommand(size_t b, const size_t o[3], const size_t r[3], size_t rp, size_t sp, size_t ps, const unsigned char* col) { type = FILL_IMAGE; base = b; memcpy(origin, o, sizeof(size_t) * 3); memcpy(region, r, sizeof(size_t) * 3); rowPitch = rp; slicePitch = sp; pixelSize = ps; color = new unsigned char[ps]; memcpy(color, col, ps); } ~FillImageCommand() { delete[] color; } }; struct KernelCommand : Command { Kernel* kernel; unsigned int work_dim; Size3 globalOffset; Size3 globalSize; Size3 localSize; KernelCommand() { type = KERNEL; } }; struct NativeKernelCommand : Command { void(CL_CALLBACK* func)(void*); void* args; NativeKernelCommand(void(CL_CALLBACK* f)(void*), void* a, size_t sz) { type = NATIVE_KERNEL; func = f; if (a) { args = malloc(sz); memcpy(args, a, sz); } else { args = NULL; } } ~NativeKernelCommand() { if (args) { free(args); } } }; struct MapCommand : Command { void* ptr; size_t address; size_t offset; size_t size; cl_map_flags flags; MapCommand() { type = MAP; } }; struct UnmapCommand : Command { const void* ptr; size_t address; UnmapCommand() { type = UNMAP; } }; class Queue { public: Queue(const Context* context, const bool out_of_order); virtual ~Queue(); Event* enqueue(Command* command); void execute(Command* command, bool flush); void executeCopyBuffer(CopyCommand* cmd); void executeCopyBufferRect(CopyRectCommand* cmd); void executeFillBuffer(FillBufferCommand* cmd); void executeFillImage(FillImageCommand* cmd); void executeKernel(KernelCommand* cmd); void executeMap(MapCommand* cmd); void executeNativeKernel(NativeKernelCommand* cmd); void executeReadBuffer(BufferCommand* cmd); void executeReadBufferRect(BufferRectCommand* cmd); void executeUnmap(UnmapCommand* cmd); void executeWriteBuffer(BufferCommand* cmd); void executeWriteBufferRect(BufferRectCommand* cmd); bool isEmpty() const; Command* finish(); private: const Context* m_context; const bool m_out_of_order; std::list m_queue; }; } // namespace oclgrind Oclgrind-21.10/src/core/WorkGroup.cpp000066400000000000000000000273401413315665100174530ustar00rootroot00000000000000// WorkGroup.cpp (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" #include #include "llvm/IR/Module.h" #include "Context.h" #include "Kernel.h" #include "KernelInvocation.h" #include "Memory.h" #include "WorkGroup.h" #include "WorkItem.h" using namespace oclgrind; using namespace std; WorkGroup::WorkGroup(const KernelInvocation* kernelInvocation, Size3 wgid) : WorkGroup(kernelInvocation, wgid, kernelInvocation->getLocalSize()) { } WorkGroup::WorkGroup(const KernelInvocation* kernelInvocation, Size3 wgid, Size3 size) : m_context(kernelInvocation->getContext()) { m_groupID = wgid; m_groupSize = size; m_groupIndex = (m_groupID.x + (m_groupID.y + m_groupID.z * (kernelInvocation->getNumGroups().y) * kernelInvocation->getNumGroups().x)); // Allocate local memory m_localMemory = new Memory(AddrSpaceLocal, sizeof(size_t) == 8 ? 16 : 8, m_context); const Kernel* kernel = kernelInvocation->getKernel(); for (auto value = kernel->values_begin(); value != kernel->values_end(); value++) { const llvm::Type* type = value->first->getType(); if (type->isPointerTy() && type->getPointerAddressSpace() == AddrSpaceLocal) { size_t ptr = m_localMemory->allocateBuffer(value->second.size); m_localAddresses[value->first] = ptr; } } // Initialise work-items for (size_t k = 0; k < m_groupSize.z; k++) { for (size_t j = 0; j < m_groupSize.y; j++) { for (size_t i = 0; i < m_groupSize.x; i++) { WorkItem* workItem = new WorkItem(kernelInvocation, this, Size3(i, j, k)); m_workItems.push_back(workItem); m_running.insert(workItem); } } } m_nextEvent = 1; m_barrier = NULL; } WorkGroup::~WorkGroup() { // Delete work-items for (unsigned i = 0; i < m_workItems.size(); i++) { delete m_workItems[i]; } delete m_localMemory; } size_t WorkGroup::async_copy(const WorkItem* workItem, const llvm::Instruction* instruction, AsyncCopyType type, size_t dest, size_t src, size_t size, size_t num, size_t srcStride, size_t destStride, size_t event) { AsyncCopy copy = {instruction, type, dest, src, size, num, srcStride, destStride, event}; // Check if copy has already been registered by another work-item list>>::iterator itr; for (itr = m_asyncCopies.begin(); itr != m_asyncCopies.end(); itr++) { if (itr->second.count(workItem)) { continue; } // Check for divergence if ((itr->first.instruction->getDebugLoc() != copy.instruction->getDebugLoc()) || (itr->first.type != copy.type) || (itr->first.dest != copy.dest) || (itr->first.src != copy.src) || (itr->first.size != copy.size) || (itr->first.num != copy.num) || (itr->first.srcStride != copy.srcStride) || (itr->first.destStride != copy.destStride)) { Context::Message msg(ERROR, m_context); msg << "Work-group divergence detected (async copy)" << endl << msg.INDENT << "Kernel: " << msg.CURRENT_KERNEL << endl << "Work-group: " << msg.CURRENT_WORK_GROUP << endl << endl << "Work-item: " << msg.CURRENT_ENTITY << endl << msg.CURRENT_LOCATION << endl << "dest=0x" << hex << copy.dest << ", " << "src=0x" << hex << copy.src << endl << "elem_size=" << dec << copy.size << ", " << "num_elems=" << dec << copy.num << ", " << "src_stride=" << dec << copy.srcStride << ", " << "dest_stride=" << dec << copy.destStride << endl << endl << "Previous work-items executed:" << endl << itr->first.instruction << endl << "dest=0x" << hex << itr->first.dest << ", " << "src=0x" << hex << itr->first.src << endl << "elem_size=" << dec << itr->first.size << ", " << "num_elems=" << dec << itr->first.num << ", " << "src_stride=" << dec << itr->first.srcStride << ", " << "dest_stride=" << dec << itr->first.destStride << endl; msg.send(); } itr->second.insert(workItem); return itr->first.event; } // Create new event if necessary if (copy.event == 0) { copy.event = m_nextEvent++; } // Register new copy and event m_asyncCopies.push_back(make_pair(copy, set())); m_asyncCopies.back().second.insert(workItem); if (!m_events.count(event)) { m_events[copy.event] = list(); } m_events[copy.event].push_back(copy); return copy.event; } void WorkGroup::clearBarrier() { assert(m_barrier); // Check for divergence if (m_barrier->workItems.size() != m_workItems.size()) { Context::Message msg(ERROR, m_context); msg << "Work-group divergence detected (barrier)" << endl << msg.INDENT << "Kernel: " << msg.CURRENT_KERNEL << endl << "Work-group: " << msg.CURRENT_WORK_GROUP << endl << "Only " << dec << m_barrier->workItems.size() << " out of " << m_workItems.size() << " work-items executed barrier" << endl << m_barrier->instruction << endl; msg.send(); } // Move work-items to running state set::iterator itr; for (itr = m_barrier->workItems.begin(); itr != m_barrier->workItems.end(); itr++) { (*itr)->clearBarrier(); m_running.insert(*itr); } m_barrier->workItems.clear(); // Deal with events while (!m_barrier->events.empty()) { size_t event = m_barrier->events.front(); // Perform copy list copies = m_events[event]; list::iterator itr; for (itr = copies.begin(); itr != copies.end(); itr++) { Memory *destMem, *srcMem; if (itr->type == GLOBAL_TO_LOCAL) { destMem = m_localMemory; srcMem = m_context->getGlobalMemory(); } else { destMem = m_context->getGlobalMemory(); srcMem = m_localMemory; } size_t src = itr->src; size_t dest = itr->dest; unsigned char* buffer = new unsigned char[itr->size]; for (unsigned i = 0; i < itr->num; i++) { srcMem->load(buffer, src, itr->size); destMem->store(buffer, dest, itr->size); src += itr->srcStride * itr->size; dest += itr->destStride * itr->size; } delete[] buffer; } m_events.erase(event); // Remove copies from list for this event list>>::iterator cItr; for (cItr = m_asyncCopies.begin(); cItr != m_asyncCopies.end();) { if (cItr->first.event == event) { // Check that all work-items registered the copy if (cItr->second.size() != m_workItems.size()) { Context::Message msg(ERROR, m_context); msg << "Work-group divergence detected (async copy)" << endl << msg.INDENT << "Kernel: " << msg.CURRENT_KERNEL << endl << "Work-group: " << msg.CURRENT_WORK_GROUP << endl << "Only " << dec << cItr->second.size() << " out of " << m_workItems.size() << " work-items executed copy" << endl << cItr->first.instruction << endl; msg.send(); } cItr = m_asyncCopies.erase(cItr); } else { cItr++; } } m_barrier->events.remove(event); } m_context->notifyWorkGroupBarrier(this, m_barrier->fence); delete m_barrier; m_barrier = NULL; } const llvm::Instruction* WorkGroup::getCurrentBarrier() const { return m_barrier ? m_barrier->instruction : NULL; } Size3 WorkGroup::getGroupID() const { return m_groupID; } size_t WorkGroup::getGroupIndex() const { return m_groupIndex; } Size3 WorkGroup::getGroupSize() const { return m_groupSize; } Memory* WorkGroup::getLocalMemory() const { return m_localMemory; } size_t WorkGroup::getLocalMemoryAddress(const llvm::Value* value) const { return m_localAddresses.at(value); } WorkItem* WorkGroup::getNextWorkItem() const { if (m_running.empty()) { return NULL; } return *m_running.begin(); } WorkItem* WorkGroup::getWorkItem(Size3 localID) const { return m_workItems[localID.x + (localID.y + localID.z * m_groupSize.y) * m_groupSize.x]; } bool WorkGroup::hasBarrier() const { return m_barrier; } void WorkGroup::notifyBarrier(WorkItem* workItem, const llvm::Instruction* instruction, uint64_t fence, list events) { if (!m_barrier) { // Create new barrier m_barrier = new Barrier; m_barrier->instruction = instruction; m_barrier->fence = fence; m_barrier->events = events; // Check for invalid events list::iterator itr; for (itr = events.begin(); itr != events.end(); itr++) { if (!m_events.count(*itr)) { m_context->logError("Invalid wait event"); } } } else { // Check for divergence bool divergence = false; if (instruction->getDebugLoc() != m_barrier->instruction->getDebugLoc() || fence != m_barrier->fence || events.size() != m_barrier->events.size()) { divergence = true; } // Check events are all the same int divergentEventIndex = -1; size_t newEvent = -1; size_t oldEvent = -1; if (!divergence) { int i = 0; list::iterator cItr = events.begin(); list::iterator pItr = m_barrier->events.begin(); for (; cItr != events.end(); cItr++, pItr++, i++) { if (*cItr != *pItr) { divergence = true; divergentEventIndex = i; newEvent = *cItr; oldEvent = *pItr; break; } } } if (divergence) { Context::Message msg(ERROR, m_context); msg << "Work-group divergence detected (barrier)" << endl << msg.INDENT << "Kernel: " << msg.CURRENT_KERNEL << endl << "Work-group: " << msg.CURRENT_WORK_GROUP << endl << endl << "Work-item: " << msg.CURRENT_ENTITY << endl << msg.CURRENT_LOCATION << endl << "fence=0x" << hex << fence << ", " << "num_events=" << dec << events.size() << endl; if (divergentEventIndex >= 0) { msg << "events[" << dec << divergentEventIndex << "]=" << newEvent << endl; } msg << endl << "Previous work-items executed:" << endl << m_barrier->instruction << endl << "fence=0x" << hex << m_barrier->fence << ", " << "num_events=" << dec << m_barrier->events.size() << endl; if (divergentEventIndex >= 0) { msg << "events[" << dec << divergentEventIndex << "]=" << oldEvent << endl; } msg.send(); } } m_running.erase(workItem); m_barrier->workItems.insert(workItem); } void WorkGroup::notifyFinished(WorkItem* workItem) { m_running.erase(workItem); // Check if work-group finished without waiting for all events if (m_running.empty() && !m_barrier && !m_events.empty()) { m_context->logError("Work-item finished without waiting for events"); } } bool WorkGroup::WorkItemCmp::operator()(const WorkItem* lhs, const WorkItem* rhs) const { Size3 lgid = lhs->getGlobalID(); Size3 rgid = rhs->getGlobalID(); if (lgid.z != rgid.z) { return lgid.z < rgid.z; } if (lgid.y != rgid.y) { return lgid.y < rgid.y; } return lgid.x < rgid.x; } Oclgrind-21.10/src/core/WorkGroup.h000066400000000000000000000051401413315665100171120ustar00rootroot00000000000000// WorkGroup.h (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" #define CLK_LOCAL_MEM_FENCE (1 << 0) #define CLK_GLOBAL_MEM_FENCE (1 << 1) namespace oclgrind { class Context; class Memory; class Kernel; class KernelInvocation; class WorkItem; class WorkGroup { public: enum AsyncCopyType { GLOBAL_TO_LOCAL, LOCAL_TO_GLOBAL }; private: // Comparator for ordering work-items struct WorkItemCmp { bool operator()(const WorkItem* lhs, const WorkItem* rhs) const; }; std::set m_running; struct AsyncCopy { const llvm::Instruction* instruction; AsyncCopyType type; size_t dest; size_t src; size_t size; size_t num; size_t srcStride; size_t destStride; size_t event; }; struct Barrier { const llvm::Instruction* instruction; std::set workItems; uint64_t fence; std::list events; }; public: WorkGroup(const KernelInvocation* kernelInvocation, Size3 wgid); WorkGroup(const KernelInvocation* kernelInvocation, Size3 wgid, Size3 size); virtual ~WorkGroup(); size_t async_copy(const WorkItem* workItem, const llvm::Instruction* instruction, AsyncCopyType type, size_t dest, size_t src, size_t size, size_t num, size_t srcStride, size_t destStride, size_t event); void clearBarrier(); const llvm::Instruction* getCurrentBarrier() const; Size3 getGroupID() const; size_t getGroupIndex() const; Size3 getGroupSize() const; Memory* getLocalMemory() const; size_t getLocalMemoryAddress(const llvm::Value* value) const; WorkItem* getNextWorkItem() const; WorkItem* getWorkItem(Size3 localID) const; bool hasBarrier() const; void notifyBarrier(WorkItem* workItem, const llvm::Instruction* instruction, uint64_t fence, std::list events = std::list()); void notifyFinished(WorkItem* workItem); private: size_t m_groupIndex; Size3 m_groupID; Size3 m_groupSize; const Context* m_context; Memory* m_localMemory; std::map m_localAddresses; std::vector m_workItems; Barrier* m_barrier; size_t m_nextEvent; std::list>> m_asyncCopies; std::map> m_events; }; } // namespace oclgrind Oclgrind-21.10/src/core/WorkItem.cpp000066400000000000000000001324071413315665100172560ustar00rootroot00000000000000// WorkItem.cpp (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved.` // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" #include "config.h" #include #include "llvm/IR/DebugInfo.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "Context.h" #include "Kernel.h" #include "KernelInvocation.h" #include "Memory.h" #include "Program.h" #include "WorkGroup.h" #include "WorkItem.h" using namespace oclgrind; using namespace std; struct WorkItem::Position { bool hasBegun; const llvm::BasicBlock* prevBlock; const llvm::BasicBlock* currBlock; const llvm::BasicBlock* nextBlock; llvm::BasicBlock::const_iterator currInst; std::stack callStack; std::stack> allocations; }; WorkItem::WorkItem(const KernelInvocation* kernelInvocation, WorkGroup* workGroup, Size3 lid) : m_context(kernelInvocation->getContext()), m_kernelInvocation(kernelInvocation), m_workGroup(workGroup) { m_localID = lid; // Compute global ID Size3 groupID = workGroup->getGroupID(); Size3 groupSize = kernelInvocation->getLocalSize(); Size3 globalOffset = kernelInvocation->getGlobalOffset(); m_globalID.x = lid.x + groupID.x * groupSize.x + globalOffset.x; m_globalID.y = lid.y + groupID.y * groupSize.y + globalOffset.y; m_globalID.z = lid.z + groupID.z * groupSize.z + globalOffset.z; Size3 globalSize = kernelInvocation->getGlobalSize(); m_globalIndex = (m_globalID.x + (m_globalID.y + m_globalID.z * globalSize.y) * globalSize.x); const Kernel* kernel = kernelInvocation->getKernel(); // Load interpreter cache m_cache = kernel->getProgram()->getInterpreterCache(kernel->getFunction()); // Set initial number of values to store based on cache m_values.resize(m_cache->getNumValues()); m_privateMemory = new Memory(AddrSpacePrivate, sizeof(size_t) == 8 ? 32 : 16, m_context); // Initialise kernel arguments and global variables for (auto value = kernel->values_begin(); value != kernel->values_end(); value++) { pair size = getValueSize(value->first); TypedValue v = {size.first, size.second, m_pool.alloc(size.first * size.second)}; const llvm::Type* type = value->first->getType(); if (type->isPointerTy() && type->getPointerAddressSpace() == AddrSpacePrivate) { size_t sz = value->second.size * value->second.num; v.setPointer(m_privateMemory->allocateBuffer(sz, 0, value->second.data)); } else if (type->isPointerTy() && type->getPointerAddressSpace() == AddrSpaceLocal) { v.setPointer(m_workGroup->getLocalMemoryAddress(value->first)); } else { memcpy(v.data, value->second.data, v.size * v.num); } setValue(value->first, v); } // Initialize interpreter state m_state = READY; m_position = new Position; m_position->hasBegun = false; m_position->prevBlock = NULL; m_position->nextBlock = NULL; m_position->currBlock = &*kernel->getFunction()->begin(); m_position->currInst = m_position->currBlock->begin(); } WorkItem::~WorkItem() { delete m_privateMemory; delete m_position; } void WorkItem::clearBarrier() { if (m_state == BARRIER) { m_state = READY; } } void WorkItem::dispatch(const llvm::Instruction* instruction, TypedValue& result) { switch (instruction->getOpcode()) { case llvm::Instruction::Add: add(instruction, result); break; case llvm::Instruction::Alloca: alloc(instruction, result); break; case llvm::Instruction::And: bwand(instruction, result); break; case llvm::Instruction::AShr: ashr(instruction, result); break; case llvm::Instruction::BitCast: bitcast(instruction, result); break; case llvm::Instruction::Br: br(instruction, result); break; case llvm::Instruction::Call: call(instruction, result); break; case llvm::Instruction::ExtractElement: extractelem(instruction, result); break; case llvm::Instruction::ExtractValue: extractval(instruction, result); break; case llvm::Instruction::FAdd: fadd(instruction, result); break; case llvm::Instruction::FCmp: fcmp(instruction, result); break; case llvm::Instruction::FDiv: fdiv(instruction, result); break; case llvm::Instruction::FMul: fmul(instruction, result); break; case llvm::Instruction::FNeg: fneg(instruction, result); break; case llvm::Instruction::FPExt: fpext(instruction, result); break; case llvm::Instruction::FPToSI: fptosi(instruction, result); break; case llvm::Instruction::FPToUI: fptoui(instruction, result); break; case llvm::Instruction::FPTrunc: fptrunc(instruction, result); break; case llvm::Instruction::FRem: frem(instruction, result); break; case llvm::Instruction::FSub: fsub(instruction, result); break; case llvm::Instruction::GetElementPtr: gep(instruction, result); break; case llvm::Instruction::ICmp: icmp(instruction, result); break; case llvm::Instruction::InsertElement: insertelem(instruction, result); break; case llvm::Instruction::InsertValue: insertval(instruction, result); break; case llvm::Instruction::IntToPtr: inttoptr(instruction, result); break; case llvm::Instruction::Load: load(instruction, result); break; case llvm::Instruction::LShr: lshr(instruction, result); break; case llvm::Instruction::Mul: mul(instruction, result); break; case llvm::Instruction::Or: bwor(instruction, result); break; case llvm::Instruction::PHI: phi(instruction, result); break; case llvm::Instruction::PtrToInt: ptrtoint(instruction, result); break; case llvm::Instruction::Ret: ret(instruction, result); break; case llvm::Instruction::SDiv: sdiv(instruction, result); break; case llvm::Instruction::Select: select(instruction, result); break; case llvm::Instruction::SExt: sext(instruction, result); break; case llvm::Instruction::Shl: shl(instruction, result); break; case llvm::Instruction::ShuffleVector: shuffle(instruction, result); break; case llvm::Instruction::SIToFP: sitofp(instruction, result); break; case llvm::Instruction::SRem: srem(instruction, result); break; case llvm::Instruction::Store: store(instruction, result); break; case llvm::Instruction::Sub: sub(instruction, result); break; case llvm::Instruction::Switch: swtch(instruction, result); break; case llvm::Instruction::Trunc: itrunc(instruction, result); break; case llvm::Instruction::UDiv: udiv(instruction, result); break; case llvm::Instruction::UIToFP: uitofp(instruction, result); break; case llvm::Instruction::URem: urem(instruction, result); break; case llvm::Instruction::Unreachable: FATAL_ERROR("Encountered unreachable instruction"); case llvm::Instruction::Xor: bwxor(instruction, result); break; case llvm::Instruction::ZExt: zext(instruction, result); break; case llvm::Instruction::Freeze: freeze(instruction, result); break; default: FATAL_ERROR("Unsupported instruction: %s", instruction->getOpcodeName()); } } void WorkItem::execute(const llvm::Instruction* instruction) { // Prepare private variable for instruction result pair resultSize = getValueSize(instruction); // Prepare result TypedValue result = {resultSize.first, resultSize.second, NULL}; if (result.size) { result.data = m_pool.alloc(result.size * result.num); } if (instruction->getOpcode() != llvm::Instruction::PHI && m_phiTemps.size() > 0) { TypedValueMap::iterator itr; for (itr = m_phiTemps.begin(); itr != m_phiTemps.end(); itr++) { setValue(itr->first, itr->second); } m_phiTemps.clear(); } // Execute instruction dispatch(instruction, result); // Store result if (result.size) { if (instruction->getOpcode() != llvm::Instruction::PHI) { setValue(instruction, result); } else { m_phiTemps[instruction] = result; } } m_context->notifyInstructionExecuted(this, instruction, result); } const stack& WorkItem::getCallStack() const { return m_position->callStack; } const llvm::BasicBlock* WorkItem::getCurrentBlock() const { return m_position->currBlock; } const llvm::Instruction* WorkItem::getCurrentInstruction() const { return &*m_position->currInst; } Size3 WorkItem::getGlobalID() const { return m_globalID; } size_t WorkItem::getGlobalIndex() const { return m_globalIndex; } Size3 WorkItem::getLocalID() const { return m_localID; } Memory* WorkItem::getMemory(unsigned int addrSpace) const { switch (addrSpace) { case AddrSpacePrivate: return m_privateMemory; case AddrSpaceGlobal: case AddrSpaceConstant: return m_context->getGlobalMemory(); case AddrSpaceLocal: return m_workGroup->getLocalMemory(); default: FATAL_ERROR("Unsupported address space: %d", addrSpace); } } TypedValue WorkItem::getOperand(const llvm::Value* operand) const { if (llvm::isa(operand) || llvm::isa(operand) || llvm::isa(operand)) { return getValue(operand); } else if (llvm::isa(operand)) { pair size = getValueSize(operand); TypedValue result; result.size = size.first; result.num = size.second; result.data = m_pool.alloc(getTypeSize(operand->getType())); // Use of const_cast here is ugly, but ConstExpr instructions // shouldn't actually modify WorkItem state anyway const_cast(this)->dispatch(m_cache->getConstantExpr(operand), result); return result; } else if (llvm::isa(operand) || llvm::isa(operand)) { return m_cache->getConstant(operand); } else { FATAL_ERROR("Unhandled operand type: %d", operand->getValueID()); } // Unreachable assert(false); } const llvm::BasicBlock* WorkItem::getPreviousBlock() const { return m_position->prevBlock; } Memory* WorkItem::getPrivateMemory() const { return m_privateMemory; } WorkItem::State WorkItem::getState() const { return m_state; } TypedValue WorkItem::getValue(const llvm::Value* key) const { return m_values[m_cache->getValueID(key)]; } const unsigned char* WorkItem::getValueData(const llvm::Value* value) const { if (!hasValue(value)) { return NULL; } return getValue(value).data; } const WorkGroup* WorkItem::getWorkGroup() const { return m_workGroup; } bool WorkItem::hasValue(const llvm::Value* key) const { return m_cache->hasValue(key); } void WorkItem::printExpression(string expr) const { // Split base variable name from rest of expression size_t split; string basename; if ((split = expr.find_first_of(".-[")) != string::npos) { basename = expr.substr(0, split); expr = expr.substr(split); } else { basename = expr; expr = ""; } const llvm::Value* baseValue = NULL; const llvm::DIVariable* divar = NULL; // Check private variables VariableMap::const_iterator itr; itr = m_variables.find(basename); if (itr != m_variables.end()) { baseValue = itr->second.first; divar = itr->second.second; } // Check global variables string globalName = m_position->currBlock->getParent()->getName().str(); globalName += "."; globalName += basename; const llvm::Module* module = m_kernelInvocation->getKernel()->getFunction()->getParent(); for (auto global = module->global_begin(); global != module->global_end(); global++) { if (global->getName() == globalName) { baseValue = &*global; llvm::SmallVector GVEs; global->getDebugInfo(GVEs); if (GVEs.size() == 0) { cout << "global variable debug information not found"; return; } // TODO: Does it matter which GVE we pick? divar = llvm::dyn_cast(GVEs[0]->getRawVariable()); } } // Check that we found the target variable if (!baseValue) { cout << "not found"; return; } // Get variable data and type TypedValue result = getOperand(baseValue); unsigned char* data = result.data; const llvm::Type* type = baseValue->getType(); const llvm::Metadata* mdtype = divar->getRawType(); // Auto-dereference global variables and allocas if (baseValue->getValueID() == llvm::Value::GlobalVariableVal || ((const llvm::Instruction*)baseValue)->getOpcode() == llvm::Instruction::Alloca) { size_t address = result.getPointer(); Memory* memory = getMemory(type->getPointerAddressSpace()); data = (unsigned char*)memory->getPointer(address); type = type->getPointerElementType(); } // Handle rest of print expression while (!expr.empty()) { bool member = false; bool dereference = false; size_t subscript = 0; // Handle special characters if (expr[0] == '.') { expr = expr.substr(1); member = true; } else if (!expr.compare(0, 2, "->")) { expr = expr.substr(2); dereference = true; member = true; } else if (expr[0] == '[') { // Find end of subscript size_t end = expr.find(']'); if (end == string::npos) { cout << "missing ']'" << endl; return; } // Parse index value stringstream ss(expr.substr(1, end - 1)); ss >> subscript; if (!ss.eof()) { cout << "invalid subscript index" << endl; return; } expr = expr.substr(end + 1); dereference = true; } else { cout << "invalid print expression"; return; } // Deference a pointer if user requested if (dereference) { auto ptrtype = llvm::dyn_cast(mdtype); if (!ptrtype || ptrtype->getTag() != llvm::dwarf::DW_TAG_pointer_type) { cout << "not a pointer type"; return; } // Get pointer value size_t address = *(size_t*)data; Memory* memory = getMemory(type->getPointerAddressSpace()); // Check address is valid auto elemType = type->getPointerElementType(); size_t elemSize = getTypeSize(elemType); if (!memory->isAddressValid(address + subscript * elemSize, elemSize)) { cout << "invalid memory address"; return; } // Get pointer to data and add offset data = (unsigned char*)memory->getPointer(address); data += subscript * elemSize; // Update types mdtype = ptrtype->getRawBaseType(); type = elemType; } // Deal with structure elements if (member) { // Split at next special character size_t split; string element; if ((split = expr.find_first_of(".-[")) != string::npos) { element = expr.substr(0, split); expr = expr.substr(split); } else { element = expr; expr = ""; } // Deal with typedef auto ditype = llvm::dyn_cast(mdtype); if (ditype->getTag() == llvm::dwarf::DW_TAG_typedef) { mdtype = llvm::dyn_cast(ditype)->getRawBaseType(); } // Ensure we have a composite type auto composite_type = llvm::dyn_cast(mdtype); if (!composite_type) { cout << "not a composite type"; return; } // Find element with matching name bool found = false; auto elements = composite_type->getElements(); unsigned numElements = elements->getNumOperands(); for (unsigned i = 0; i < numElements; i++) { auto elem = llvm::dyn_cast(elements->getOperand(i)); if (elem->getName() == element) { // Increment data pointer by offset and update type type = type->getStructElementType(i); mdtype = elem->getRawBaseType(); data = data + elem->getOffsetInBits() / 8; found = true; } } if (!found) { cout << "no member named '" << element << "' found"; return; } } } printTypedData(type, data); } bool WorkItem::printValue(const llvm::Value* value) const { if (!hasValue(value)) { return false; } printTypedData(value->getType(), getValue(value).data); return true; } void WorkItem::setValue(const llvm::Value* key, TypedValue value) { m_values[m_cache->getValueID(key)] = value; } WorkItem::State WorkItem::step() { assert(m_state == READY); if (!m_position->hasBegun) { m_position->hasBegun = true; m_context->notifyWorkItemBegin(this); } // Execute the next instruction execute(&*m_position->currInst); // Check if we've reached the end of the block if (++m_position->currInst == m_position->currBlock->end() || m_position->nextBlock) { if (m_position->nextBlock) { // Move to next basic block m_position->prevBlock = m_position->currBlock; m_position->currBlock = m_position->nextBlock; m_position->nextBlock = NULL; m_position->currInst = m_position->currBlock->begin(); } } if (m_state == FINISHED) m_context->notifyWorkItemComplete(this); return m_state; } /////////////////////////////// //// Instruction execution //// /////////////////////////////// #define INSTRUCTION(name) \ void WorkItem::name(const llvm::Instruction* instruction, TypedValue& result) INSTRUCTION(add) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { result.setUInt(opA.getUInt(i) + opB.getUInt(i), i); } } INSTRUCTION(alloc) { const llvm::AllocaInst* allocInst = ((const llvm::AllocaInst*)instruction); const llvm::Type* type = allocInst->getAllocatedType(); // Perform allocation unsigned size = getTypeSize(type); size_t address = m_privateMemory->allocateBuffer(size); if (!address) FATAL_ERROR("Insufficient private memory (alloca)"); // Create pointer to alloc'd memory result.setPointer(address); // Track allocation in stack frame if (!m_position->allocations.empty()) m_position->allocations.top().push_back(address); } INSTRUCTION(ashr) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); uint64_t shiftMask = (result.num > 1 ? result.size : max((size_t)result.size, sizeof(uint32_t))) * 8 - 1; for (unsigned i = 0; i < result.num; i++) { result.setUInt(opA.getSInt(i) >> (opB.getUInt(i) & shiftMask), i); } } INSTRUCTION(bitcast) { TypedValue operand = getOperand(instruction->getOperand(0)); memcpy(result.data, operand.data, result.size * result.num); } INSTRUCTION(br) { if (instruction->getNumOperands() == 1) { // Unconditional branch m_position->nextBlock = (const llvm::BasicBlock*)instruction->getOperand(0); } else { // Conditional branch bool pred = getOperand(instruction->getOperand(0)).getUInt(); const llvm::Value* iftrue = instruction->getOperand(2); const llvm::Value* iffalse = instruction->getOperand(1); m_position->nextBlock = (const llvm::BasicBlock*)(pred ? iftrue : iffalse); } } INSTRUCTION(bwand) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { result.setUInt(opA.getUInt(i) & opB.getUInt(i), i); } } INSTRUCTION(bwor) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { result.setUInt(opA.getUInt(i) | opB.getUInt(i), i); } } INSTRUCTION(bwxor) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { result.setUInt(opA.getUInt(i) ^ opB.getUInt(i), i); } } INSTRUCTION(call) { const llvm::CallInst* callInst = (const llvm::CallInst*)instruction; const llvm::Function* function = callInst->getCalledFunction(); // Check for indirect function calls if (!callInst->getCalledFunction()) { // Resolve indirect function pointer const llvm::Value* func = callInst->getCalledOperand(); const llvm::Value* funcPtr = ((const llvm::User*)func)->getOperand(0); function = (const llvm::Function*)funcPtr; } // Check if function has definition if (!function->isDeclaration()) { m_position->callStack.push(&*m_position->currInst); m_position->allocations.push(list()); m_position->nextBlock = &*function->begin(); // Set function arguments llvm::Function::const_arg_iterator argItr; for (argItr = function->arg_begin(); argItr != function->arg_end(); argItr++) { const llvm::Value* arg = callInst->getArgOperand(argItr->getArgNo()); TypedValue value = getOperand(arg); if (argItr->hasByValAttr()) { // Make new copy of value in private memory void* data = m_privateMemory->getPointer(value.getPointer()); size_t size = getTypeSize(argItr->getType()->getPointerElementType()); size_t ptr = m_privateMemory->allocateBuffer(size, 0, (uint8_t*)data); m_position->allocations.top().push_back(ptr); // Pass new allocation to function TypedValue address = {sizeof(size_t), 1, m_pool.alloc(sizeof(size_t))}; address.setPointer(ptr); setValue(&*argItr, address); } else { setValue(&*argItr, m_pool.clone(value)); } } return; } // Call builtin function InterpreterCache::Builtin builtin = m_cache->getBuiltin(function); builtin.function.func(this, callInst, builtin.name, builtin.overload, result, builtin.function.op); } INSTRUCTION(extractelem) { const llvm::ExtractElementInst* extract = (const llvm::ExtractElementInst*)instruction; unsigned index = getOperand(extract->getIndexOperand()).getUInt(); TypedValue operand = getOperand(extract->getVectorOperand()); memcpy(result.data, operand.data + result.size * index, result.size); } INSTRUCTION(extractval) { const llvm::ExtractValueInst* extract = (const llvm::ExtractValueInst*)instruction; const llvm::Value* agg = extract->getAggregateOperand(); llvm::ArrayRef indices = extract->getIndices(); // Compute offset for target value int offset = 0; const llvm::Type* type = agg->getType(); for (unsigned i = 0; i < indices.size(); i++) { if (type->isArrayTy()) { type = type->getArrayElementType(); offset += getTypeSize(type) * indices[i]; } else if (type->isStructTy()) { offset += getStructMemberOffset((const llvm::StructType*)type, indices[i]); type = type->getStructElementType(indices[i]); } else { FATAL_ERROR("Unsupported aggregate type: %d", type->getTypeID()) } } // Copy target value to result memcpy(result.data, getOperand(agg).data + offset, getTypeSize(type)); } INSTRUCTION(fadd) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { result.setFloat(opA.getFloat(i) + opB.getFloat(i), i); } } INSTRUCTION(fcmp) { const llvm::CmpInst* cmpInst = (const llvm::CmpInst*)instruction; llvm::CmpInst::Predicate pred = cmpInst->getPredicate(); TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); uint64_t t = result.num > 1 ? -1 : 1; for (unsigned i = 0; i < result.num; i++) { double a = opA.getFloat(i); double b = opB.getFloat(i); uint64_t r; switch (pred) { case llvm::CmpInst::FCMP_OEQ: case llvm::CmpInst::FCMP_UEQ: r = a == b; break; case llvm::CmpInst::FCMP_ONE: case llvm::CmpInst::FCMP_UNE: r = a != b; break; case llvm::CmpInst::FCMP_OGT: case llvm::CmpInst::FCMP_UGT: r = a > b; break; case llvm::CmpInst::FCMP_OGE: case llvm::CmpInst::FCMP_UGE: r = a >= b; break; case llvm::CmpInst::FCMP_OLT: case llvm::CmpInst::FCMP_ULT: r = a < b; break; case llvm::CmpInst::FCMP_OLE: case llvm::CmpInst::FCMP_ULE: r = a <= b; break; case llvm::CmpInst::FCMP_FALSE: case llvm::CmpInst::FCMP_UNO: r = false; break; case llvm::CmpInst::FCMP_TRUE: case llvm::CmpInst::FCMP_ORD: r = true; break; default: FATAL_ERROR("Unsupported FCmp predicate: %d", pred); } // Deal with NaN operands if (std::isnan(a) || std::isnan(b)) { r = !llvm::CmpInst::isOrdered(pred); } result.setUInt(r ? t : 0, i); } } INSTRUCTION(fdiv) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { result.setFloat(opA.getFloat(i) / opB.getFloat(i), i); } } INSTRUCTION(fmul) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { result.setFloat(opA.getFloat(i) * opB.getFloat(i), i); } } INSTRUCTION(fneg) { TypedValue op = getOperand(instruction->getOperand(0)); for (unsigned i = 0; i < result.num; i++) { result.setFloat(-op.getFloat(i), i); } } INSTRUCTION(fpext) { TypedValue op = getOperand(instruction->getOperand(0)); for (unsigned i = 0; i < result.num; i++) { result.setFloat(op.getFloat(i), i); } } INSTRUCTION(fptosi) { TypedValue op = getOperand(instruction->getOperand(0)); for (unsigned i = 0; i < result.num; i++) { result.setSInt((int64_t)op.getFloat(i), i); } } INSTRUCTION(fptoui) { TypedValue op = getOperand(instruction->getOperand(0)); for (unsigned i = 0; i < result.num; i++) { result.setUInt((uint64_t)op.getFloat(i), i); } } INSTRUCTION(frem) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { result.setFloat(fmod(opA.getFloat(i), opB.getFloat(i)), i); } } INSTRUCTION(fptrunc) { TypedValue op = getOperand(instruction->getOperand(0)); for (unsigned i = 0; i < result.num; i++) { result.setFloat(op.getFloat(i), i); } } INSTRUCTION(fsub) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { result.setFloat(opA.getFloat(i) - opB.getFloat(i), i); } } INSTRUCTION(gep) { const llvm::GetElementPtrInst* gepInst = (const llvm::GetElementPtrInst*)instruction; // Get base address size_t base = getOperand(gepInst->getPointerOperand()).getPointer(); const llvm::Type* ptrType = gepInst->getPointerOperandType(); // Get indices std::vector offsets; llvm::User::const_op_iterator opItr; for (opItr = gepInst->idx_begin(); opItr != gepInst->idx_end(); opItr++) { offsets.push_back(getOperand(opItr->get()).getSInt()); } result.setPointer(resolveGEP(base, ptrType, offsets)); } INSTRUCTION(icmp) { const llvm::CmpInst* cmpInst = (const llvm::CmpInst*)instruction; llvm::CmpInst::Predicate pred = cmpInst->getPredicate(); TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); uint64_t t = result.num > 1 ? -1 : 1; for (unsigned i = 0; i < result.num; i++) { // Load operands uint64_t ua = opA.getUInt(i); uint64_t ub = opB.getUInt(i); int64_t sa = opA.getSInt(i); int64_t sb = opB.getSInt(i); uint64_t r; switch (pred) { case llvm::CmpInst::ICMP_EQ: r = ua == ub; break; case llvm::CmpInst::ICMP_NE: r = ua != ub; break; case llvm::CmpInst::ICMP_UGT: r = ua > ub; break; case llvm::CmpInst::ICMP_UGE: r = ua >= ub; break; case llvm::CmpInst::ICMP_ULT: r = ua < ub; break; case llvm::CmpInst::ICMP_ULE: r = ua <= ub; break; case llvm::CmpInst::ICMP_SGT: r = sa > sb; break; case llvm::CmpInst::ICMP_SGE: r = sa >= sb; break; case llvm::CmpInst::ICMP_SLT: r = sa < sb; break; case llvm::CmpInst::ICMP_SLE: r = sa <= sb; break; default: FATAL_ERROR("Unsupported ICmp predicate: %d", pred); } result.setUInt(r ? t : 0, i); } } INSTRUCTION(insertelem) { TypedValue vector = getOperand(instruction->getOperand(0)); TypedValue element = getOperand(instruction->getOperand(1)); unsigned index = getOperand(instruction->getOperand(2)).getUInt(); memcpy(result.data, vector.data, result.size * result.num); memcpy(result.data + index * result.size, element.data, result.size); } INSTRUCTION(insertval) { const llvm::InsertValueInst* insert = (const llvm::InsertValueInst*)instruction; // Load original aggregate data const llvm::Value* agg = insert->getAggregateOperand(); memcpy(result.data, getOperand(agg).data, result.size * result.num); // Compute offset for inserted value int offset = 0; llvm::ArrayRef indices = insert->getIndices(); const llvm::Type* type = agg->getType(); for (unsigned i = 0; i < indices.size(); i++) { if (type->isArrayTy()) { type = type->getArrayElementType(); offset += getTypeSize(type) * indices[i]; } else if (type->isStructTy()) { offset += getStructMemberOffset((const llvm::StructType*)type, indices[i]); type = type->getStructElementType(indices[i]); } else { FATAL_ERROR("Unsupported aggregate type: %d", type->getTypeID()) } } // Copy inserted value into result const llvm::Value* value = insert->getInsertedValueOperand(); memcpy(result.data + offset, getOperand(value).data, getTypeSize(value->getType())); } INSTRUCTION(inttoptr) { TypedValue op = getOperand(instruction->getOperand(0)); for (unsigned i = 0; i < result.num; i++) { result.setPointer(op.getUInt(i), i); } } INSTRUCTION(itrunc) { TypedValue op = getOperand(instruction->getOperand(0)); for (unsigned i = 0; i < result.num; i++) { result.setUInt(op.getUInt(i), i); } } INSTRUCTION(load) { const llvm::LoadInst* loadInst = (const llvm::LoadInst*)instruction; unsigned addressSpace = loadInst->getPointerAddressSpace(); const llvm::Value* opPtr = loadInst->getPointerOperand(); size_t address = getOperand(opPtr).getPointer(); // Check address is correctly aligned unsigned alignment = loadInst->getAlignment(); if (!alignment) alignment = getTypeAlignment(opPtr->getType()->getPointerElementType()); if (address & (alignment - 1)) { m_context->logError("Invalid memory load - source pointer is " "not aligned to the pointed type"); } // Load data getMemory(addressSpace)->load(result.data, address, result.size * result.num); } INSTRUCTION(lshr) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); uint64_t shiftMask = (result.num > 1 ? result.size : max((size_t)result.size, sizeof(uint32_t))) * 8 - 1; for (unsigned i = 0; i < result.num; i++) { result.setUInt(opA.getUInt(i) >> (opB.getUInt(i) & shiftMask), i); } } INSTRUCTION(mul) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { result.setUInt(opA.getUInt(i) * opB.getUInt(i), i); } } INSTRUCTION(phi) { const llvm::PHINode* phiNode = (const llvm::PHINode*)instruction; const llvm::Value* value = phiNode->getIncomingValueForBlock( (const llvm::BasicBlock*)m_position->prevBlock); memcpy(result.data, getOperand(value).data, result.size * result.num); } INSTRUCTION(ptrtoint) { TypedValue op = getOperand(instruction->getOperand(0)); for (unsigned i = 0; i < result.num; i++) { result.setUInt(op.getPointer(i), i); } } INSTRUCTION(ret) { const llvm::ReturnInst* retInst = (const llvm::ReturnInst*)instruction; if (!m_position->callStack.empty()) { m_position->currInst = llvm::BasicBlock::const_iterator(m_position->callStack.top()); m_position->currBlock = m_position->currInst->getParent(); m_position->callStack.pop(); // Set return value const llvm::Value* returnVal = retInst->getReturnValue(); if (returnVal) { setValue(&*m_position->currInst, m_pool.clone(getOperand(returnVal))); } // Clear stack allocations list& allocs = m_position->allocations.top(); list::iterator itr; for (itr = allocs.begin(); itr != allocs.end(); itr++) { m_privateMemory->deallocateBuffer(*itr); } m_position->allocations.pop(); } else { m_position->nextBlock = NULL; m_state = FINISHED; m_workGroup->notifyFinished(this); } } INSTRUCTION(sdiv) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { int64_t a = opA.getSInt(i); int64_t b = opB.getSInt(i); int64_t r = 0; if (b && !(a == INT64_MIN && b == -1)) { r = a / b; } result.setSInt(r, i); } } INSTRUCTION(select) { const llvm::SelectInst* selectInst = (const llvm::SelectInst*)instruction; TypedValue opCondition = getOperand(selectInst->getCondition()); for (unsigned i = 0; i < result.num; i++) { const bool cond = selectInst->getCondition()->getType()->isVectorTy() ? opCondition.getUInt(i) : opCondition.getUInt(); const llvm::Value* op = cond ? selectInst->getTrueValue() : selectInst->getFalseValue(); memcpy(result.data + i * result.size, getOperand(op).data + i * result.size, result.size); } } INSTRUCTION(sext) { const llvm::Value* operand = instruction->getOperand(0); TypedValue value = getOperand(operand); for (unsigned i = 0; i < result.num; i++) { int64_t val = value.getSInt(i); if (operand->getType()->getPrimitiveSizeInBits() == 1) { val = val ? -1 : 0; } result.setSInt(val, i); } } INSTRUCTION(shl) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); uint64_t shiftMask = (result.num > 1 ? result.size : max((size_t)result.size, sizeof(uint32_t))) * 8 - 1; for (unsigned i = 0; i < result.num; i++) { result.setUInt(opA.getUInt(i) << (opB.getUInt(i) & shiftMask), i); } } INSTRUCTION(shuffle) { const llvm::ShuffleVectorInst* shuffle = (const llvm::ShuffleVectorInst*)instruction; const llvm::Value* v1 = shuffle->getOperand(0); const llvm::Value* v2 = shuffle->getOperand(1); unsigned num = llvm::cast(v1->getType())->getNumElements(); for (unsigned i = 0; i < result.num; i++) { const llvm::Value* src = v1; int index = shuffle->getMaskValue(i); if (index == llvm::UndefMaskElem) { // Don't care / undef continue; } if (index >= num) { index -= num; src = v2; } memcpy(result.data + i * result.size, getOperand(src).data + index * result.size, result.size); } } INSTRUCTION(sitofp) { TypedValue op = getOperand(instruction->getOperand(0)); for (unsigned i = 0; i < result.num; i++) { result.setFloat(op.getSInt(i), i); } } INSTRUCTION(srem) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { int64_t a = opA.getSInt(i); int64_t b = opB.getSInt(i); int64_t r = 0; if (b && !(a == INT64_MIN && b == -1)) { r = a % b; } result.setSInt(r, i); } } INSTRUCTION(store) { const llvm::StoreInst* storeInst = (const llvm::StoreInst*)instruction; unsigned addressSpace = storeInst->getPointerAddressSpace(); const llvm::Value* opPtr = storeInst->getPointerOperand(); size_t address = getOperand(opPtr).getPointer(); // Check address is correctly aligned unsigned alignment = storeInst->getAlignment(); if (!alignment) alignment = getTypeAlignment(opPtr->getType()->getPointerElementType()); if (address & (alignment - 1)) { m_context->logError("Invalid memory store - source pointer is " "not aligned to the pointed type"); } // Store data TypedValue operand = getOperand(storeInst->getValueOperand()); getMemory(addressSpace) ->store(operand.data, address, operand.size * operand.num); } INSTRUCTION(sub) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { result.setUInt(opA.getUInt(i) - opB.getUInt(i), i); } } INSTRUCTION(swtch) { const llvm::SwitchInst* swtch = (const llvm::SwitchInst*)instruction; const llvm::Value* cond = swtch->getCondition(); uint64_t val = getOperand(cond).getUInt(); // Look for case matching condition value for (auto C : swtch->cases()) { if (C.getCaseValue()->getZExtValue() == val) { m_position->nextBlock = C.getCaseSuccessor(); return; } } // No matching cases - use default m_position->nextBlock = swtch->getDefaultDest(); } INSTRUCTION(udiv) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { uint64_t a = opA.getUInt(i); uint64_t b = opB.getUInt(i); result.setUInt(b ? a / b : 0, i); } } INSTRUCTION(uitofp) { TypedValue op = getOperand(instruction->getOperand(0)); for (unsigned i = 0; i < result.num; i++) { uint64_t in = op.getUInt(i); if (result.size == 4) result.setFloat(in ? (float)in : 0.f, i); else result.setFloat(in ? (double)in : 0.0, i); } } INSTRUCTION(urem) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { uint64_t a = opA.getUInt(i); uint64_t b = opB.getUInt(i); result.setUInt(b ? a % b : 0, i); } } INSTRUCTION(zext) { TypedValue operand = getOperand(instruction->getOperand(0)); for (unsigned i = 0; i < result.num; i++) { result.setUInt(operand.getUInt(i), i); } } INSTRUCTION(freeze) { TypedValue operand = getOperand(instruction->getOperand(0)); memcpy(result.data, operand.data, result.size * result.num); } #undef INSTRUCTION //////////////////////////////// // WorkItem::InterpreterCache // //////////////////////////////// InterpreterCache::InterpreterCache(llvm::Function* kernel) { // TODO: Determine this number dynamically? m_valueIDs.reserve(1024); // Add global variables to cache // TODO: Only add variables that are used? const llvm::Module* module = kernel->getParent(); llvm::Module::const_global_iterator G; for (G = module->global_begin(); G != module->global_end(); G++) { addValueID(&*G); } set processed; set pending; pending.insert(kernel); while (!pending.empty()) { // Get next function to process llvm::Function* function = *pending.begin(); processed.insert(function); pending.erase(function); // Iterate through the function arguments llvm::Function::arg_iterator A; for (A = function->arg_begin(); A != function->arg_end(); A++) { addValueID(&*A); } // Iterate through instructions in function llvm::inst_iterator I; for (I = inst_begin(function); I != inst_end(function); I++) { addValueID(&*I); // Check for function calls if (I->getOpcode() == llvm::Instruction::Call) { const llvm::CallInst* call = ((const llvm::CallInst*)&*I); llvm::Function* callee = (llvm::Function*)call->getCalledFunction()->stripPointerCasts(); if (callee->isDeclaration()) { // Resolve builtin function calls addBuiltin(callee); } else if (!processed.count(callee)) { // Process called function pending.insert(callee); } } // Process operands for (llvm::User::value_op_iterator O = I->value_op_begin(); O != I->value_op_end(); O++) { addOperand(*O); } } } } InterpreterCache::~InterpreterCache() { ConstantMap::iterator constItr; for (constItr = m_constants.begin(); constItr != m_constants.end(); constItr++) { delete[] constItr->second.data; } ConstExprMap::iterator constExprItr; for (constExprItr = m_constExpressions.begin(); constExprItr != m_constExpressions.end(); constExprItr++) { constExprItr->second->deleteValue(); } } void InterpreterCache::addBuiltin(const llvm::Function* function) { // Check if already in cache InterpreterCache::BuiltinMap::iterator fItr = m_builtins.find(function); if (fItr != m_builtins.end()) { return; } // Extract unmangled name and overload string name, overload; const string fullname = function->getName().str(); if (fullname.compare(0, 2, "_Z") == 0) { int len = atoi(fullname.c_str() + 2); int start = fullname.find_first_not_of("0123456789", 2); name = fullname.substr(start, len); overload = fullname.substr(start + len); } else { name = fullname; overload = ""; } // Find builtin function in map BuiltinFunctionMap::iterator bItr = workItemBuiltins.find(name); if (bItr != workItemBuiltins.end()) { // Add builtin to cache const InterpreterCache::Builtin builtin = {bItr->second, name, overload}; m_builtins[function] = builtin; return; } // Check for builtin with matching prefix BuiltinFunctionPrefixList::iterator pItr; for (pItr = workItemPrefixBuiltins.begin(); pItr != workItemPrefixBuiltins.end(); pItr++) { if (name.compare(0, pItr->first.length(), pItr->first) == 0) { // Add builtin to cache const InterpreterCache::Builtin builtin = {pItr->second, name, overload}; m_builtins[function] = builtin; return; } } // Function didn't match any builtins FATAL_ERROR("Undefined external function: %s", name.c_str()); } InterpreterCache::Builtin InterpreterCache::getBuiltin(const llvm::Function* function) const { return m_builtins.at(function); } void InterpreterCache::addConstant(const llvm::Value* value) { // Check if constant already in cache if (m_constants.count(value)) { return; } // Create constant and add to cache pair size = getValueSize(value); TypedValue constant; constant.size = size.first; constant.num = size.second; constant.data = new unsigned char[getTypeSize(value->getType())]; getConstantData(constant.data, (const llvm::Constant*)value); m_constants[value] = constant; } TypedValue InterpreterCache::getConstant(const llvm::Value* operand) const { ConstantMap::const_iterator itr = m_constants.find(operand); if (itr == m_constants.end()) { FATAL_ERROR("Constant not found in cache (ID %d)", operand->getValueID()); } return itr->second; } const llvm::Instruction* InterpreterCache::getConstantExpr(const llvm::Value* expr) const { ConstExprMap::const_iterator itr = m_constExpressions.find(expr); if (itr == m_constExpressions.end()) { FATAL_ERROR("Constant expression not found in cache"); } return itr->second; } unsigned InterpreterCache::addValueID(const llvm::Value* value) { ValueMap::iterator itr = m_valueIDs.find(value); if (itr == m_valueIDs.end()) { // Assign next index to value unsigned pos = m_valueIDs.size(); itr = m_valueIDs.insert(make_pair(value, pos)).first; } return itr->second; } unsigned InterpreterCache::getValueID(const llvm::Value* value) const { ValueMap::const_iterator itr = m_valueIDs.find(value); if (itr == m_valueIDs.end()) { FATAL_ERROR("Value not found in cache (ID %d)", value->getValueID()); } return itr->second; } unsigned InterpreterCache::getNumValues() const { return m_valueIDs.size(); } bool InterpreterCache::hasValue(const llvm::Value* value) const { return m_valueIDs.count(value); } void InterpreterCache::addOperand(const llvm::Value* operand) { // Resolve constants if (llvm::isa(operand) || llvm::isa(operand)) { addConstant(operand); } else if (llvm::isa(operand)) { // Resolve constant expressions const llvm::ConstantExpr* expr = (const llvm::ConstantExpr*)operand; if (!m_constExpressions.count(expr)) { for (auto O = expr->op_begin(); O != expr->op_end(); O++) { addOperand(*O); } m_constExpressions[expr] = getConstExprAsInstruction(expr); // TODO: Resolve actual value? } } else { addValueID(operand); } } Oclgrind-21.10/src/core/WorkItem.h000066400000000000000000000131531413315665100167170ustar00rootroot00000000000000// WorkItem.h (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" namespace llvm { class BasicBlock; class CallInst; class ConstExpr; class DILocalVariable; class Function; class Module; } // namespace llvm namespace oclgrind { class Context; class Kernel; class KernelInvocation; class Memory; class WorkGroup; class WorkItem; class WorkItemBuiltins; // Data structures for builtin functions struct BuiltinFunction { void (*func)(WorkItem*, const llvm::CallInst*, const std::string&, const std::string&, TypedValue&, void*); void* op; BuiltinFunction(){}; BuiltinFunction(void (*f)(WorkItem*, const llvm::CallInst*, const std::string&, const std::string&, TypedValue&, void*), void* o) : func(f), op(o){}; }; typedef std::unordered_map BuiltinFunctionMap; typedef std::list> BuiltinFunctionPrefixList; extern BuiltinFunctionMap workItemBuiltins; extern BuiltinFunctionPrefixList workItemPrefixBuiltins; // Per-kernel cache for various interpreter state information class InterpreterCache { public: struct Builtin { BuiltinFunction function; std::string name, overload; }; InterpreterCache(llvm::Function* kernel); ~InterpreterCache(); void addBuiltin(const llvm::Function* function); Builtin getBuiltin(const llvm::Function* function) const; void addConstant(const llvm::Value* constant); TypedValue getConstant(const llvm::Value* operand) const; const llvm::Instruction* getConstantExpr(const llvm::Value* expr) const; unsigned addValueID(const llvm::Value* value); unsigned getValueID(const llvm::Value* value) const; unsigned getNumValues() const; bool hasValue(const llvm::Value* value) const; private: typedef std::unordered_map ValueMap; typedef std::unordered_map BuiltinMap; typedef std::unordered_map ConstantMap; typedef std::unordered_map ConstExprMap; BuiltinMap m_builtins; ConstantMap m_constants; ConstExprMap m_constExpressions; ValueMap m_valueIDs; void addOperand(const llvm::Value* value); }; class WorkItem { friend class WorkItemBuiltins; public: enum State { READY, BARRIER, FINISHED }; public: WorkItem(const KernelInvocation* kernelInvocation, WorkGroup* workGroup, Size3 lid); virtual ~WorkItem(); void clearBarrier(); void dispatch(const llvm::Instruction* instruction, TypedValue& result); void execute(const llvm::Instruction* instruction); const std::stack& getCallStack() const; const llvm::BasicBlock* getCurrentBlock() const; const llvm::Instruction* getCurrentInstruction() const; Size3 getGlobalID() const; size_t getGlobalIndex() const; Size3 getLocalID() const; TypedValue getOperand(const llvm::Value* operand) const; const llvm::BasicBlock* getPreviousBlock() const; Memory* getPrivateMemory() const; State getState() const; const unsigned char* getValueData(const llvm::Value* value) const; const WorkGroup* getWorkGroup() const; void printExpression(std::string expr) const; bool printValue(const llvm::Value* value) const; State step(); // SPIR instructions private: #define INSTRUCTION(name) \ void name(const llvm::Instruction* instruction, TypedValue& result) INSTRUCTION(add); INSTRUCTION(alloc); INSTRUCTION(ashr); INSTRUCTION(bitcast); INSTRUCTION(br); INSTRUCTION(bwand); INSTRUCTION(bwor); INSTRUCTION(bwxor); INSTRUCTION(call); INSTRUCTION(extractelem); INSTRUCTION(extractval); INSTRUCTION(fadd); INSTRUCTION(fcmp); INSTRUCTION(fdiv); INSTRUCTION(fmul); INSTRUCTION(fneg); INSTRUCTION(fpext); INSTRUCTION(fptosi); INSTRUCTION(fptoui); INSTRUCTION(fptrunc); INSTRUCTION(frem); INSTRUCTION(fsub); INSTRUCTION(gep); INSTRUCTION(icmp); INSTRUCTION(insertelem); INSTRUCTION(insertval); INSTRUCTION(inttoptr); INSTRUCTION(itrunc); INSTRUCTION(load); INSTRUCTION(lshr); INSTRUCTION(mul); INSTRUCTION(phi); INSTRUCTION(ptrtoint); INSTRUCTION(ret); INSTRUCTION(sdiv); INSTRUCTION(select); INSTRUCTION(sext); INSTRUCTION(shl); INSTRUCTION(shuffle); INSTRUCTION(sitofp); INSTRUCTION(srem); INSTRUCTION(store); INSTRUCTION(sub); INSTRUCTION(swtch); INSTRUCTION(udiv); INSTRUCTION(uitofp); INSTRUCTION(urem); INSTRUCTION(zext); INSTRUCTION(freeze); #undef INSTRUCTION private: typedef std::map> VariableMap; size_t m_globalIndex; Size3 m_globalID; Size3 m_localID; TypedValueMap m_phiTemps; VariableMap m_variables; const Context* m_context; const KernelInvocation* m_kernelInvocation; Memory* m_privateMemory; WorkGroup* m_workGroup; mutable MemoryPool m_pool; State m_state; struct Position; Position* m_position; Memory* getMemory(unsigned int addrSpace) const; // Store for instruction results and other operand values std::vector m_values; TypedValue getValue(const llvm::Value* key) const; bool hasValue(const llvm::Value* key) const; void setValue(const llvm::Value* key, TypedValue value); const InterpreterCache* m_cache; }; } // namespace oclgrind Oclgrind-21.10/src/core/WorkItemBuiltins.cpp000066400000000000000000003062041413315665100207660ustar00rootroot00000000000000// WorkItemBuiltins.cpp (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" #include "config.h" #include #include #include #include #include #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" #include "CL/cl.h" #include "CL/cl_half.h" #include "Context.h" #include "KernelInvocation.h" #include "Memory.h" #include "WorkGroup.h" #include "WorkItem.h" using namespace oclgrind; using namespace std; #ifndef M_PI #define M_PI 3.1415926535897932384626433832795 #endif namespace oclgrind { static mutex printfMutex; class WorkItemBuiltins { // Utility macros for creating builtins #define DEFINE_BUILTIN(name) \ static void name(WorkItem* workItem, const llvm::CallInst* callInst, \ const string& fnName, const string& overload, \ TypedValue& result, void*) #define ARG(i) (callInst->getArgOperand(i)) #define UARGV(i, v) workItem->getOperand(ARG(i)).getUInt(v) #define SARGV(i, v) workItem->getOperand(ARG(i)).getSInt(v) #define FARGV(i, v) workItem->getOperand(ARG(i)).getFloat(v) #define PARGV(i, v) workItem->getOperand(ARG(i)).getPointer(v) #define UARG(i) UARGV(i, 0) #define SARG(i) SARGV(i, 0) #define FARG(i) FARGV(i, 0) #define PARG(i) PARGV(i, 0) #define ARG_VLEN(i) \ llvm::cast(ARG(i)->getType())->getNumElements() // Functions that apply generic builtins to each component of a vector static void f1arg(WorkItem* workItem, const llvm::CallInst* callInst, const string& name, const string& overload, TypedValue& result, double (*func)(double)) { for (unsigned i = 0; i < result.num; i++) { result.setFloat(func(FARGV(0, i)), i); } } static void f2arg(WorkItem* workItem, const llvm::CallInst* callInst, const string& name, const string& overload, TypedValue& result, double (*func)(double, double)) { for (unsigned i = 0; i < result.num; i++) { result.setFloat(func(FARGV(0, i), FARGV(1, i)), i); } } static void f3arg(WorkItem* workItem, const llvm::CallInst* callInst, const string& name, const string& overload, TypedValue& result, double (*func)(double, double, double)) { for (unsigned i = 0; i < result.num; i++) { result.setFloat(func(FARGV(0, i), FARGV(1, i), FARGV(2, i)), i); } } static void u1arg(WorkItem* workItem, const llvm::CallInst* callInst, const string& name, const string& overload, TypedValue& result, uint64_t (*func)(uint64_t)) { for (unsigned i = 0; i < result.num; i++) { result.setUInt(func(UARGV(0, i)), i); } } static void u2arg(WorkItem* workItem, const llvm::CallInst* callInst, const string& name, const string& overload, TypedValue& result, uint64_t (*func)(uint64_t, uint64_t)) { for (unsigned i = 0; i < result.num; i++) { result.setUInt(func(UARGV(0, i), UARGV(1, i)), i); } } static void u3arg(WorkItem* workItem, const llvm::CallInst* callInst, const string& name, const string& overload, TypedValue& result, uint64_t (*func)(uint64_t, uint64_t, uint64_t)) { for (unsigned i = 0; i < result.num; i++) { result.setUInt(func(UARGV(0, i), UARGV(1, i), UARGV(2, i)), i); } } static void s1arg(WorkItem* workItem, const llvm::CallInst* callInst, const string& name, const string& overload, TypedValue& result, int64_t (*func)(int64_t)) { for (unsigned i = 0; i < result.num; i++) { result.setSInt(func(SARGV(0, i)), i); } } static void s2arg(WorkItem* workItem, const llvm::CallInst* callInst, const string& name, const string& overload, TypedValue& result, int64_t (*func)(int64_t, int64_t)) { for (unsigned i = 0; i < result.num; i++) { result.setSInt(func(SARGV(0, i), SARGV(1, i)), i); } } static void s3arg(WorkItem* workItem, const llvm::CallInst* callInst, const string& name, const string& overload, TypedValue& result, int64_t (*func)(int64_t, int64_t, int64_t)) { for (unsigned i = 0; i < result.num; i++) { result.setSInt(func(SARGV(0, i), SARGV(1, i), SARGV(2, i)), i); } } static void rel1arg(WorkItem* workItem, const llvm::CallInst* callInst, const string& name, const string& overload, TypedValue& result, int64_t (*func)(double)) { int64_t t = result.num > 1 ? -1 : 1; for (unsigned i = 0; i < result.num; i++) { result.setSInt(func(FARGV(0, i)) * t, i); } } static void rel2arg(WorkItem* workItem, const llvm::CallInst* callInst, const string& name, const string& overload, TypedValue& result, int64_t (*func)(double, double)) { int64_t t = result.num > 1 ? -1 : 1; for (unsigned i = 0; i < result.num; i++) { result.setSInt(func(FARGV(0, i), FARGV(1, i)) * t, i); } } // Extract the (first) argument type from an overload string static char getOverloadArgType(const string& overload) { char type = overload[0]; if (type == 'D') { char* typestr; strtol(overload.c_str() + 2, &typestr, 10); type = typestr[1]; } return type; } /////////////////////////////////////// // Async Copy and Prefetch Functions // /////////////////////////////////////// DEFINE_BUILTIN(async_work_group_copy) { int arg = 0; // Get src/dest addresses const llvm::Value* destOp = ARG(arg++); const llvm::Value* srcOp = ARG(arg++); size_t dest = workItem->getOperand(destOp).getPointer(); size_t src = workItem->getOperand(srcOp).getPointer(); // Get size of copy unsigned elemSize = getTypeSize(destOp->getType()->getPointerElementType()); uint64_t num = UARG(arg++); // Get stride uint64_t stride = 1; size_t srcStride = 1; size_t destStride = 1; if (fnName == "async_work_group_strided_copy") { stride = UARG(arg++); } size_t event = UARG(arg++); // Get type of copy WorkGroup::AsyncCopyType type; if (destOp->getType()->getPointerAddressSpace() == AddrSpaceLocal) { type = WorkGroup::GLOBAL_TO_LOCAL; srcStride = stride; } else { type = WorkGroup::LOCAL_TO_GLOBAL; destStride = stride; } // Register copy event = workItem->m_workGroup->async_copy(workItem, callInst, type, dest, src, elemSize, num, srcStride, destStride, event); result.setUInt(event); } DEFINE_BUILTIN(wait_group_events) { uint64_t num = UARG(0); size_t address = PARG(1); list events; for (unsigned i = 0; i < num; i++) { size_t event; if (!workItem->m_privateMemory->load((unsigned char*)&event, address, sizeof(size_t))) { return; } events.push_back(event); address += sizeof(size_t); } workItem->m_state = WorkItem::BARRIER; workItem->m_workGroup->notifyBarrier( workItem, callInst, CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, events); } DEFINE_BUILTIN(prefetch) { // Do nothing. } ////////////////////// // Atomic Functions // ////////////////////// static bool _is_signed_type(char c) { const string signed_vals("casilxn"); // CXXNameMangler return signed_vals.find(c) != string::npos; } DEFINE_BUILTIN(atomic_op) { const static map name_to_op = { {"atomic_add", AtomicAdd}, {"atom_add", AtomicAdd}, {"atomic_and", AtomicAnd}, {"atom_and", AtomicAnd}, {"atom_cmpxchg", AtomicCmpXchg}, {"atomic_cmpxchg", AtomicCmpXchg}, {"atom_dec", AtomicDec}, {"atomic_dec", AtomicDec}, {"atom_inc", AtomicInc}, {"atomic_inc", AtomicInc}, {"atom_max", AtomicMax}, {"atomic_max", AtomicMax}, {"atom_min", AtomicMin}, {"atomic_min", AtomicMin}, {"atom_or", AtomicOr}, {"atomic_or", AtomicOr}, {"atom_sub", AtomicSub}, {"atomic_sub", AtomicSub}, {"atom_xchg", AtomicXchg}, {"atomic_xchg", AtomicXchg}, {"atom_xor", AtomicXor}, {"atomic_xor", AtomicXor}, }; Memory* memory = workItem->getMemory(ARG(0)->getType()->getPointerAddressSpace()); const bool is_64bit( ARG(0)->getType()->getPointerElementType()->getScalarSizeInBits() == 64); const bool is_signed_type(_is_signed_type(overload.back())); const auto op(name_to_op.at(fnName)); size_t address = PARG(0); // Verify the address is 4/8-byte aligned if ((address & ((is_64bit ? 8 : 4) - 1)) != 0) { workItem->m_context->logError(("Unaligned address on " + fnName).c_str()); } uint64_t old; if (op == AtomicCmpXchg) { if (is_64bit) { old = memory->atomicCmpxchg(address, UARG(1), UARG(2)); } else { old = memory->atomicCmpxchg(address, UARG(1), UARG(2)); } } else if (op == AtomicInc || op == AtomicDec) { if (is_64bit) { old = memory->atomic(op, address); } else { old = memory->atomic(op, address); } } else if (op == AtomicMax || op == AtomicMin) { if (is_64bit) { if (is_signed_type) { old = memory->atomic(op, address, SARG(1)); } else { old = memory->atomic(op, address, UARG(1)); } } else { if (is_signed_type) { old = memory->atomic(op, address, SARG(1)); } else { old = memory->atomic(op, address, UARG(1)); } } } else { if (is_64bit) { old = memory->atomic(op, address, UARG(1)); } else { old = memory->atomic(op, address, UARG(1)); } } result.setUInt(old); } ////////////////////// // Common Functions // ////////////////////// template T static _max_(T a, T b) { return a > b ? a : b; } template T static _min_(T a, T b) { return a < b ? a : b; } template T static _clamp_(T x, T min, T max) { return _min_(_max_(x, min), max); } static double _degrees_(double x) { return x * (180 / M_PI); } static double _radians_(double x) { return x * (M_PI / 180); } static double _sign_(double x) { if (std::isnan(x)) return 0.0; if (x > 0.0) return 1.0; if (x == -0.0) return -0.0; if (x == 0.0) return 0.0; if (x < 0.0) return -1.0; return 0.0; } DEFINE_BUILTIN(clamp) { switch (getOverloadArgType(overload)) { case 'f': case 'd': if (ARG(1)->getType()->isVectorTy()) { f3arg(workItem, callInst, fnName, overload, result, _clamp_); } else { for (unsigned i = 0; i < result.num; i++) { double x = FARGV(0, i); double minval = FARG(1); double maxval = FARG(2); result.setFloat(_clamp_(x, minval, maxval), i); } } break; case 'h': case 't': case 'j': case 'm': u3arg(workItem, callInst, fnName, overload, result, _clamp_); break; case 'c': case 's': case 'i': case 'l': s3arg(workItem, callInst, fnName, overload, result, _clamp_); break; default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } DEFINE_BUILTIN(max) { switch (getOverloadArgType(overload)) { case 'f': case 'd': if (ARG(1)->getType()->isVectorTy()) { f2arg(workItem, callInst, fnName, overload, result, fmax); } else { for (unsigned i = 0; i < result.num; i++) { double x = FARGV(0, i); double y = FARG(1); result.setFloat(_max_(x, y), i); } } break; case 'h': case 't': case 'j': case 'm': u2arg(workItem, callInst, fnName, overload, result, _max_); break; case 'c': case 's': case 'i': case 'l': s2arg(workItem, callInst, fnName, overload, result, _max_); break; default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } DEFINE_BUILTIN(min) { switch (getOverloadArgType(overload)) { case 'f': case 'd': if (ARG(1)->getType()->isVectorTy()) { f2arg(workItem, callInst, fnName, overload, result, fmin); } else { for (unsigned i = 0; i < result.num; i++) { double x = FARGV(0, i); double y = FARG(1); result.setFloat(_min_(x, y), i); } } break; case 'h': case 't': case 'j': case 'm': u2arg(workItem, callInst, fnName, overload, result, _min_); break; case 'c': case 's': case 'i': case 'l': s2arg(workItem, callInst, fnName, overload, result, _min_); break; default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } DEFINE_BUILTIN(mix) { for (unsigned i = 0; i < result.num; i++) { double x = FARGV(0, i); double y = FARGV(1, i); double a = ARG(2)->getType()->isVectorTy() ? FARGV(2, i) : FARG(2); double r = x + (y - x) * a; result.setFloat(r, i); } } DEFINE_BUILTIN(smoothstep) { for (unsigned i = 0; i < result.num; i++) { double edge0 = ARG(0)->getType()->isVectorTy() ? FARGV(0, i) : FARG(0); double edge1 = ARG(1)->getType()->isVectorTy() ? FARGV(1, i) : FARG(1); double x = FARGV(2, i); double t = _clamp_((x - edge0) / (edge1 - edge0), 0, 1); double r = t * t * (3 - 2 * t); result.setFloat(r, i); } } DEFINE_BUILTIN(step) { for (unsigned i = 0; i < result.num; i++) { double edge = ARG(0)->getType()->isVectorTy() ? FARGV(0, i) : FARG(0); double x = FARGV(1, i); double r = (x < edge) ? 0.0 : 1.0; result.setFloat(r, i); } } ///////////////////////// // Geometric Functions // ///////////////////////// DEFINE_BUILTIN(cross) { double u1 = FARGV(0, 0); double u2 = FARGV(0, 1); double u3 = FARGV(0, 2); double v1 = FARGV(1, 0); double v2 = FARGV(1, 1); double v3 = FARGV(1, 2); result.setFloat(u2 * v3 - u3 * v2, 0); result.setFloat(u3 * v1 - u1 * v3, 1); result.setFloat(u1 * v2 - u2 * v1, 2); result.setFloat(0, 3); } DEFINE_BUILTIN(dot) { unsigned num = 1; if (ARG(0)->getType()->isVectorTy()) { num = ARG_VLEN(0); } double r = 0.f; for (unsigned i = 0; i < num; i++) { double a = FARGV(0, i); double b = FARGV(1, i); r += a * b; } result.setFloat(r); } static double geometric_length(double* values, unsigned num) { double lengthSq = 0.0; for (unsigned i = 0; i < num; i++) { lengthSq += values[i] * values[i]; } // Check for overflow/underflow double rescale = 1.0; if (lengthSq == INFINITY) { rescale = ldexp(1.0, -512); } else if (lengthSq < num * DBL_MIN / DBL_EPSILON) { rescale = ldexp(1.0, 640); } if (rescale != 1.0) { // Re-do calculations with a rescaling multiplier lengthSq = 0.0; for (unsigned i = 0; i < num; i++) { double f = values[i] * rescale; lengthSq += f * f; } } return sqrt(lengthSq) * (1.0 / rescale); } DEFINE_BUILTIN(distance) { unsigned num = 1; if (ARG(0)->getType()->isVectorTy()) { num = ARG_VLEN(0); } double values[4]; for (unsigned i = 0; i < num; i++) { values[i] = FARGV(0, i) - FARGV(1, i); } result.setFloat(geometric_length(values, num)); } DEFINE_BUILTIN(length) { unsigned num = 1; if (ARG(0)->getType()->isVectorTy()) { num = ARG_VLEN(0); } double values[4]; for (unsigned i = 0; i < num; i++) { values[i] = FARGV(0, i); } result.setFloat(geometric_length(values, num)); } DEFINE_BUILTIN(normalize) { double values[4]; double lengthSq = 0.0; for (unsigned i = 0; i < result.num; i++) { values[i] = FARGV(0, i); lengthSq += values[i] * values[i]; } if (lengthSq == INFINITY) { // Re-do calculations with a rescaling multiplier lengthSq = 0.0; double rescale = ldexp(1.0, -512); for (unsigned i = 0; i < result.num; i++) { values[i] = values[i] * rescale; lengthSq += values[i] * values[i]; } if (lengthSq == INFINITY) { // Infinities in input, set all other values to 0 lengthSq = 0.0; for (unsigned i = 0; i < result.num; i++) { if (std::isinf(values[i])) { values[i] = copysign(1.0, FARGV(0, i)); lengthSq += 1.0; } else { values[i] = copysign(0.0, FARGV(0, i)); } } } } else if (lengthSq < result.num * DBL_MIN / DBL_EPSILON) { // Re-do calculations with a rescaling multiplier lengthSq = 0.0; double rescale = ldexp(1.0, 640); for (unsigned i = 0; i < result.num; i++) { values[i] = values[i] * rescale; lengthSq += values[i] * values[i]; } if (lengthSq == 0.0) { // Zeros in input, copy vector unchanged for (unsigned i = 0; i < result.num; i++) { result.setFloat(FARGV(0, i), i); } return; } } double length = sqrt(lengthSq); for (unsigned i = 0; i < result.num; i++) { result.setFloat(values[i] / length, i); } } ///////////////////// // Image Functions // ///////////////////// static size_t getChannelSize(const cl_image_format& format) { switch (format.image_channel_data_type) { case CL_SNORM_INT8: case CL_UNORM_INT8: case CL_SIGNED_INT8: case CL_UNSIGNED_INT8: return 1; case CL_SNORM_INT16: case CL_UNORM_INT16: case CL_SIGNED_INT16: case CL_UNSIGNED_INT16: case CL_HALF_FLOAT: return 2; case CL_SIGNED_INT32: case CL_UNSIGNED_INT32: case CL_FLOAT: return 4; default: return 0; } } static size_t getNumChannels(const cl_image_format& format) { switch (format.image_channel_order) { case CL_R: case CL_Rx: case CL_A: case CL_INTENSITY: case CL_LUMINANCE: return 1; case CL_RG: case CL_RGx: case CL_RA: return 2; case CL_RGB: case CL_RGBx: return 3; case CL_RGBA: case CL_ARGB: case CL_BGRA: return 4; default: return 0; } } static bool hasZeroAlphaBorder(const cl_image_format& format) { switch (format.image_channel_order) { case CL_A: case CL_INTENSITY: case CL_Rx: case CL_RA: case CL_RGx: case CL_RGBx: case CL_ARGB: case CL_BGRA: case CL_RGBA: return true; default: return false; } } DEFINE_BUILTIN(get_image_array_size) { Image* image = *(Image**)(workItem->getValue(ARG(0)).data); result.setUInt(image->desc.image_array_size); } DEFINE_BUILTIN(get_image_channel_data_type) { Image* image = *(Image**)(workItem->getValue(ARG(0)).data); result.setSInt(image->format.image_channel_data_type); } DEFINE_BUILTIN(get_image_channel_order) { Image* image = *(Image**)(workItem->getValue(ARG(0)).data); result.setSInt(image->format.image_channel_order); } DEFINE_BUILTIN(get_image_dim) { Image* image = *(Image**)(workItem->getValue(ARG(0)).data); result.setSInt(image->desc.image_width, 0); result.setSInt(image->desc.image_height, 1); if (result.num > 2) { result.setSInt(image->desc.image_depth, 2); result.setSInt(0, 3); } } DEFINE_BUILTIN(get_image_depth) { Image* image = *(Image**)(workItem->getValue(ARG(0)).data); result.setSInt(image->desc.image_depth); } DEFINE_BUILTIN(get_image_height) { Image* image = *(Image**)(workItem->getValue(ARG(0)).data); result.setSInt(image->desc.image_height); } DEFINE_BUILTIN(get_image_width) { Image* image = *(Image**)(workItem->getValue(ARG(0)).data); result.setSInt(image->desc.image_width); } static inline float getCoordinate(const llvm::Value* value, int index, char type, WorkItem* workItem) { switch (type) { case 'i': return workItem->getOperand(value).getSInt(index); case 'f': return workItem->getOperand(value).getFloat(index); default: FATAL_ERROR("Unsupported coordinate type: '%c'", type); } } static inline int getNearestCoordinate(uint32_t sampler, float n, // Normalized float u, // Unormalized size_t size) { switch (sampler & CLK_ADDRESS_MASK) { case CLK_ADDRESS_NONE: return floor(u); case CLK_ADDRESS_CLAMP_TO_EDGE: return _clamp_(floor(u), 0, size - 1); case CLK_ADDRESS_CLAMP: return _clamp_(floor(u), -1, size); case CLK_ADDRESS_REPEAT: return (int)floorf((n - floorf(n)) * size) % size; case CLK_ADDRESS_MIRRORED_REPEAT: return _min_((int)floorf(fabsf(n - 2.f * rintf(0.5f * n)) * size), size - 1); default: FATAL_ERROR("Unsupported sampler addressing mode: %X", sampler & CLK_ADDRESS_MASK); } } static inline float getAdjacentCoordinates(uint32_t sampler, float n, // Normalized float u, // Unnormalized size_t size, int* c0, int* c1) { switch (sampler & CLK_ADDRESS_MASK) { case CLK_ADDRESS_NONE: *c0 = floor(u); *c1 = floor(u) + 1; return u; case CLK_ADDRESS_CLAMP_TO_EDGE: *c0 = _clamp_(floorf(u - 0.5f), 0, size - 1); *c1 = _clamp_(floorf(u - 0.5f) + 1, 0, size - 1); return u; case CLK_ADDRESS_CLAMP: *c0 = _clamp_((floorf(u - 0.5f)), -1, size); *c1 = _clamp_((floorf(u - 0.5f)) + 1, -1, size); return u; case CLK_ADDRESS_REPEAT: { u = (n - floorf(n)) * size; *c0 = (int)floorf(u - 0.5f); *c1 = *c0 + 1; if (*c0 < 0) *c0 += size; if (*c1 >= size) *c1 -= size; return u; } case CLK_ADDRESS_MIRRORED_REPEAT: { u = fabsf(n - 2.0f * rintf(0.5f * n)) * size; *c0 = (int)floorf(u - 0.5f); *c1 = *c0 + 1; *c0 = _max_(*c0, 0); *c1 = _min_(*c1, (int)size - 1); return u; } default: FATAL_ERROR("Unsupported sampler addressing mode: %X", sampler & CLK_ADDRESS_MASK); } } static inline int getInputChannel(const cl_image_format& format, int output, float* ret) { int input = output; switch (format.image_channel_order) { case CL_R: case CL_Rx: if (output == 1) { *ret = 0.f; return -1; } case CL_RG: case CL_RGx: if (output == 2) { *ret = 0.f; return -1; } case CL_RGB: case CL_RGBx: if (output == 3) { *ret = 1.f; return -1; } break; case CL_RGBA: break; case CL_BGRA: if (output == 0) input = 2; if (output == 2) input = 0; break; case CL_ARGB: if (output == 0) input = 1; if (output == 1) input = 2; if (output == 2) input = 3; if (output == 3) input = 0; break; case CL_A: if (output == 3) input = 0; else { *ret = 0.f; return -1; } break; case CL_RA: if (output == 3) input = 1; else if (output != 0) { *ret = 0.f; return -1; } break; case CL_INTENSITY: input = 0; break; case CL_LUMINANCE: if (output == 3) { *ret = 1.f; return -1; } input = 0; break; default: FATAL_ERROR("Unsupported image channel order: %X", format.image_channel_order); } return input; } static inline float readNormalizedColor(const Image* image, WorkItem* workItem, int i, int j, int k, int layer, int c) { // Check for out-of-range coordinages if (i < 0 || i >= image->desc.image_width || j < 0 || j >= image->desc.image_height || k < 0 || k >= image->desc.image_depth) { // Return border color if (c == 3 && !hasZeroAlphaBorder(image->format)) { return 1.f; } return 0.f; } // Remap channels float ret = 0.f; int channel = getInputChannel(image->format, c, &ret); if (channel < 0) { return ret; } // Calculate pixel address size_t channelSize = getChannelSize(image->format); size_t numChannels = getNumChannels(image->format); size_t pixelSize = channelSize * numChannels; size_t address = image->address + (i + (j + (k + layer * image->desc.image_depth) * image->desc.image_height) * image->desc.image_width) * pixelSize + channel * channelSize; // Load channel data unsigned char* data = workItem->m_pool.alloc(channelSize); if (!workItem->getMemory(AddrSpaceGlobal)->load(data, address, channelSize)) { return 0.f; } // Compute normalized color value float color; switch (image->format.image_channel_data_type) { case CL_SNORM_INT8: color = _clamp_(*(int8_t*)data / 127.f, -1.f, 1.f); break; case CL_UNORM_INT8: color = _clamp_(*(uint8_t*)data / 255.f, 0.f, 1.f); break; case CL_SNORM_INT16: color = _clamp_(*(int16_t*)data / 32767.f, -1.f, 1.f); break; case CL_UNORM_INT16: color = _clamp_(*(uint16_t*)data / 65535.f, 0.f, 1.f); break; case CL_FLOAT: color = *(float*)data; break; case CL_HALF_FLOAT: color = cl_half_to_float(*(cl_half*)data); break; default: FATAL_ERROR("Unsupported image channel data type: %X", image->format.image_channel_data_type); } return color; } static inline int32_t readSignedColor(const Image* image, WorkItem* workItem, int i, int j, int k, int layer, int c) { // Check for out-of-range coordinages if (i < 0 || i >= image->desc.image_width || j < 0 || j >= image->desc.image_height || k < 0 || k >= image->desc.image_depth) { // Return border color if (c == 3 && !hasZeroAlphaBorder(image->format)) { return 1.f; } return 0.f; } // Remap channels float ret = 0.f; int channel = getInputChannel(image->format, c, &ret); if (channel < 0) { return ret; } // Calculate pixel address size_t channelSize = getChannelSize(image->format); size_t numChannels = getNumChannels(image->format); size_t pixelSize = channelSize * numChannels; size_t address = image->address + (i + (j + (k + layer * image->desc.image_depth) * image->desc.image_height) * image->desc.image_width) * pixelSize + channel * channelSize; // Load channel data unsigned char* data = workItem->m_pool.alloc(channelSize); if (!workItem->getMemory(AddrSpaceGlobal)->load(data, address, channelSize)) { return 0; } // Compute unnormalized color value int32_t color; switch (image->format.image_channel_data_type) { case CL_SIGNED_INT8: color = *(int8_t*)data; break; case CL_SIGNED_INT16: color = *(int16_t*)data; break; case CL_SIGNED_INT32: color = *(int32_t*)data; break; default: FATAL_ERROR("Unsupported image channel data type: %X", image->format.image_channel_data_type); } return color; } static inline uint32_t readUnsignedColor(const Image* image, WorkItem* workItem, int i, int j, int k, int layer, int c) { // Check for out-of-range coordinages if (i < 0 || i >= image->desc.image_width || j < 0 || j >= image->desc.image_height || k < 0 || k >= image->desc.image_depth) { // Return border color if (c == 3 && !hasZeroAlphaBorder(image->format)) { return 1.f; } return 0.f; } // Remap channels float ret = 0.f; int channel = getInputChannel(image->format, c, &ret); if (channel < 0) { return ret; } // Calculate pixel address size_t channelSize = getChannelSize(image->format); size_t numChannels = getNumChannels(image->format); size_t pixelSize = channelSize * numChannels; size_t address = image->address + (i + (j + (k + layer * image->desc.image_depth) * image->desc.image_height) * image->desc.image_width) * pixelSize + channel * channelSize; // Load channel data unsigned char* data = workItem->m_pool.alloc(channelSize); if (!workItem->getMemory(AddrSpaceGlobal)->load(data, address, channelSize)) { return 0; } // Load color value uint32_t color; switch (image->format.image_channel_data_type) { case CL_UNSIGNED_INT8: color = *(uint8_t*)data; break; case CL_UNSIGNED_INT16: color = *(uint16_t*)data; break; case CL_UNSIGNED_INT32: color = *(uint32_t*)data; break; default: FATAL_ERROR("Unsupported image channel data type: %X", image->format.image_channel_data_type); } return color; } static inline float frac(float x) { return x - floorf(x); } static inline float interpolate(float v000, float v010, float v100, float v110, float v001, float v011, float v101, float v111, float a, float b, float c) { return (1 - a) * (1 - b) * (1 - c) * v000 + a * (1 - b) * (1 - c) * v100 + (1 - a) * b * (1 - c) * v010 + a * b * (1 - c) * v110 + (1 - a) * (1 - b) * c * v001 + a * (1 - b) * c * v101 + (1 - a) * b * c * v011 + a * b * c * v111; } DEFINE_BUILTIN(translate_sampler_initializer) { // A sampler initializer is just a pointer to its ConstantInt value result.setPointer((size_t)ARG(0)); } DEFINE_BUILTIN(read_imagef) { const Image* image = *(Image**)(workItem->getValue(ARG(0)).data); uint32_t sampler = CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; int coordIndex = 1; // Check for sampler version if (callInst->getNumArgOperands() > 2) { sampler = ((llvm::ConstantInt*)PARG(1))->getZExtValue(); coordIndex = 2; } // Get coordinates float s = 0.f, t = 0.f, r = 0.f; char coordType = *overload.rbegin(); s = getCoordinate(ARG(coordIndex), 0, coordType, workItem); if (ARG(coordIndex)->getType()->isVectorTy()) { t = getCoordinate(ARG(coordIndex), 1, coordType, workItem); if (ARG_VLEN(coordIndex) > 2) { r = getCoordinate(ARG(coordIndex), 2, coordType, workItem); } } // Get unnormalized coordinates float u = 0.f, v = 0.f, w = 0.f; bool noormCoords = sampler & CLK_NORMALIZED_COORDS_TRUE; if (noormCoords) { u = s * image->desc.image_width; v = t * image->desc.image_height; w = r * image->desc.image_depth; } else { u = s; v = t; w = r; } // Get array layer index int layer = 0; if (image->desc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { layer = _clamp_(rintf(t), 0, image->desc.image_array_size - 1); v = t = 0.f; } else if (image->desc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { layer = _clamp_(rintf(r), 0, image->desc.image_array_size - 1); w = r = 0.f; } float values[4]; if (sampler & CLK_FILTER_LINEAR) { // Get coordinates of adjacent pixels int i0 = 0, i1 = 0, j0 = 0, j1 = 0, k0 = 0, k1 = 0; u = getAdjacentCoordinates(sampler, s, u, image->desc.image_width, &i0, &i1); v = getAdjacentCoordinates(sampler, t, v, image->desc.image_height, &j0, &j1); w = getAdjacentCoordinates(sampler, r, w, image->desc.image_depth, &k0, &k1); // Make sure y and z coordinates are equal for 1 and 2D images if (image->desc.image_type == CL_MEM_OBJECT_IMAGE1D || image->desc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { j0 = j1; k0 = k1; } else if (image->desc.image_type == CL_MEM_OBJECT_IMAGE2D || image->desc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { k0 = k1; } // Perform linear interpolation float a = frac(u - 0.5f); float b = frac(v - 0.5f); float c = frac(w - 0.5f); for (int i = 0; i < 4; i++) { values[i] = interpolate( readNormalizedColor(image, workItem, i0, j0, k0, layer, i), readNormalizedColor(image, workItem, i0, j1, k0, layer, i), readNormalizedColor(image, workItem, i1, j0, k0, layer, i), readNormalizedColor(image, workItem, i1, j1, k0, layer, i), readNormalizedColor(image, workItem, i0, j0, k1, layer, i), readNormalizedColor(image, workItem, i0, j1, k1, layer, i), readNormalizedColor(image, workItem, i1, j0, k1, layer, i), readNormalizedColor(image, workItem, i1, j1, k1, layer, i), a, b, c); } } else { // Read values from nearest pixel int i = getNearestCoordinate(sampler, s, u, image->desc.image_width); int j = getNearestCoordinate(sampler, t, v, image->desc.image_height); int k = getNearestCoordinate(sampler, r, w, image->desc.image_depth); values[0] = readNormalizedColor(image, workItem, i, j, k, layer, 0); values[1] = readNormalizedColor(image, workItem, i, j, k, layer, 1); values[2] = readNormalizedColor(image, workItem, i, j, k, layer, 2); values[3] = readNormalizedColor(image, workItem, i, j, k, layer, 3); } // Store values in result for (int i = 0; i < 4; i++) { result.setFloat(values[i], i); } } DEFINE_BUILTIN(read_imagei) { const Image* image = *(Image**)(workItem->getValue(ARG(0)).data); uint32_t sampler = CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; int coordIndex = 1; // Check for sampler version if (callInst->getNumArgOperands() > 2) { sampler = ((llvm::ConstantInt*)PARG(1))->getZExtValue(); coordIndex = 2; } // Get coordinates float s = 0.f, t = 0.f, r = 0.f; char coordType = *overload.rbegin(); s = getCoordinate(ARG(coordIndex), 0, coordType, workItem); if (ARG(coordIndex)->getType()->isVectorTy()) { t = getCoordinate(ARG(coordIndex), 1, coordType, workItem); if (ARG_VLEN(coordIndex) > 2) { r = getCoordinate(ARG(coordIndex), 2, coordType, workItem); } } // Get unnormalized coordinates float u = 0.f, v = 0.f, w = 0.f; bool noormCoords = sampler & CLK_NORMALIZED_COORDS_TRUE; if (noormCoords) { u = s * image->desc.image_width; v = t * image->desc.image_height; w = r * image->desc.image_depth; } else { u = s; v = t; w = r; } // Get array layer index int layer = 0; if (image->desc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { layer = _clamp_(rintf(t), 0, image->desc.image_array_size - 1); v = t = 0.f; } else if (image->desc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { layer = _clamp_(rintf(r), 0, image->desc.image_array_size - 1); w = r = 0.f; } // Read values from nearest pixel int32_t values[4]; int i = getNearestCoordinate(sampler, s, u, image->desc.image_width); int j = getNearestCoordinate(sampler, t, v, image->desc.image_height); int k = getNearestCoordinate(sampler, r, w, image->desc.image_depth); values[0] = readSignedColor(image, workItem, i, j, k, layer, 0); values[1] = readSignedColor(image, workItem, i, j, k, layer, 1); values[2] = readSignedColor(image, workItem, i, j, k, layer, 2); values[3] = readSignedColor(image, workItem, i, j, k, layer, 3); // Store values in result for (int i = 0; i < 4; i++) { result.setSInt(values[i], i); } } DEFINE_BUILTIN(read_imageui) { const Image* image = *(Image**)(workItem->getValue(ARG(0)).data); uint32_t sampler = CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; int coordIndex = 1; // Check for sampler version if (callInst->getNumArgOperands() > 2) { sampler = ((llvm::ConstantInt*)PARG(1))->getZExtValue(); coordIndex = 2; } // Get coordinates float s = 0.f, t = 0.f, r = 0.f; char coordType = *overload.rbegin(); s = getCoordinate(ARG(coordIndex), 0, coordType, workItem); if (ARG(coordIndex)->getType()->isVectorTy()) { t = getCoordinate(ARG(coordIndex), 1, coordType, workItem); if (ARG_VLEN(coordIndex) > 2) { r = getCoordinate(ARG(coordIndex), 2, coordType, workItem); } } // Get unnormalized coordinates float u = 0.f, v = 0.f, w = 0.f; bool noormCoords = sampler & CLK_NORMALIZED_COORDS_TRUE; if (noormCoords) { u = s * image->desc.image_width; v = t * image->desc.image_height; w = r * image->desc.image_depth; } else { u = s; v = t; w = r; } // Get array layer index int layer = 0; if (image->desc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { layer = _clamp_(rintf(t), 0, image->desc.image_array_size - 1); v = t = 0.f; } else if (image->desc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { layer = _clamp_(rintf(r), 0, image->desc.image_array_size - 1); w = r = 0.f; } // Read values from nearest pixel uint32_t values[4]; int i = getNearestCoordinate(sampler, s, u, image->desc.image_width); int j = getNearestCoordinate(sampler, t, v, image->desc.image_height); int k = getNearestCoordinate(sampler, r, w, image->desc.image_depth); values[0] = readUnsignedColor(image, workItem, i, j, k, layer, 0); values[1] = readUnsignedColor(image, workItem, i, j, k, layer, 1); values[2] = readUnsignedColor(image, workItem, i, j, k, layer, 2); values[3] = readUnsignedColor(image, workItem, i, j, k, layer, 3); // Store values in result for (int i = 0; i < 4; i++) { result.setUInt(values[i], i); } } DEFINE_BUILTIN(write_imagef) { Image* image = *(Image**)(workItem->getValue(ARG(0)).data); // Get pixel coordinates int x, y = 0, z = 0; x = SARGV(1, 0); if (ARG(1)->getType()->isVectorTy()) { y = SARGV(1, 1); if (ARG_VLEN(1) > 2) { z = SARGV(1, 2); } } // Get color data float values[4] = { (float)FARGV(2, 0), (float)FARGV(2, 1), (float)FARGV(2, 2), (float)FARGV(2, 3), }; // Re-order color values switch (image->format.image_channel_order) { case CL_R: case CL_Rx: case CL_RG: case CL_RGx: case CL_RGB: case CL_RGBx: case CL_RGBA: case CL_INTENSITY: case CL_LUMINANCE: break; case CL_A: values[0] = values[3]; break; case CL_RA: values[1] = values[3]; break; case CL_ARGB: swap(values[2], values[3]); swap(values[1], values[2]); swap(values[0], values[1]); break; case CL_BGRA: swap(values[0], values[2]); break; default: FATAL_ERROR("Unsupported image channel order: %X", image->format.image_channel_order); } size_t channelSize = getChannelSize(image->format); size_t numChannels = getNumChannels(image->format); size_t pixelSize = channelSize * numChannels; size_t pixelAddress = image->address + (x + (y + z * image->desc.image_height) * image->desc.image_width) * pixelSize; // Generate channel values Memory* memory = workItem->getMemory(AddrSpaceGlobal); unsigned char* data = workItem->m_pool.alloc(channelSize * numChannels); for (unsigned i = 0; i < numChannels; i++) { switch (image->format.image_channel_data_type) { case CL_SNORM_INT8: ((int8_t*)data)[i] = rint(_clamp_(values[i] * 127.f, -128.f, 127.f)); break; case CL_UNORM_INT8: data[i] = rint(_clamp_(values[i] * 255.f, 0.f, 255.f)); break; case CL_SNORM_INT16: ((int16_t*)data)[i] = rint(_clamp_(values[i] * 32767.f, -32768.f, 32767.f)); break; case CL_UNORM_INT16: ((uint16_t*)data)[i] = rint(_clamp_(values[i] * 65535.f, 0.f, 65535.f)); break; case CL_FLOAT: ((float*)data)[i] = values[i]; break; case CL_HALF_FLOAT: ((uint16_t*)data)[i] = cl_half_from_float(values[i], CL_HALF_RTE); break; default: FATAL_ERROR("Unsupported image channel data type: %X", image->format.image_channel_data_type); } } // Write pixel data memory->store(data, pixelAddress, channelSize * numChannels); } DEFINE_BUILTIN(write_imagei) { Image* image = *(Image**)(workItem->getValue(ARG(0)).data); // Get pixel coordinates int x, y = 0, z = 0; x = SARGV(1, 0); if (ARG(1)->getType()->isVectorTy()) { y = SARGV(1, 1); if (ARG_VLEN(1) > 2) { z = SARGV(1, 2); } } // Get color data int32_t values[4] = { (int32_t)SARGV(2, 0), (int32_t)SARGV(2, 1), (int32_t)SARGV(2, 2), (int32_t)SARGV(2, 3), }; // Re-order color values switch (image->format.image_channel_order) { case CL_R: case CL_Rx: case CL_RG: case CL_RGx: case CL_RGB: case CL_RGBx: case CL_RGBA: case CL_INTENSITY: case CL_LUMINANCE: break; case CL_A: values[0] = values[3]; break; case CL_RA: values[1] = values[3]; break; case CL_ARGB: swap(values[2], values[3]); swap(values[1], values[2]); swap(values[0], values[1]); break; case CL_BGRA: swap(values[0], values[2]); break; default: FATAL_ERROR("Unsupported image channel order: %X", image->format.image_channel_order); } size_t channelSize = getChannelSize(image->format); size_t numChannels = getNumChannels(image->format); size_t pixelSize = channelSize * numChannels; size_t pixelAddress = image->address + (x + (y + z * image->desc.image_height) * image->desc.image_width) * pixelSize; // Generate channel values Memory* memory = workItem->getMemory(AddrSpaceGlobal); unsigned char* data = workItem->m_pool.alloc(channelSize * numChannels); for (unsigned i = 0; i < numChannels; i++) { switch (image->format.image_channel_data_type) { case CL_SIGNED_INT8: ((int8_t*)data)[i] = _clamp_(values[i], -128, 127); break; case CL_SIGNED_INT16: ((int16_t*)data)[i] = _clamp_(values[i], -32768, 32767); break; case CL_SIGNED_INT32: ((int32_t*)data)[i] = values[i]; break; default: FATAL_ERROR("Unsupported image channel data type: %X", image->format.image_channel_data_type); } } // Write pixel data memory->store(data, pixelAddress, channelSize * numChannels); } DEFINE_BUILTIN(write_imageui) { Image* image = *(Image**)(workItem->getValue(ARG(0)).data); // Get pixel coordinates int x, y = 0, z = 0; x = SARGV(1, 0); if (ARG(1)->getType()->isVectorTy()) { y = SARGV(1, 1); if (ARG_VLEN(1) > 2) { z = SARGV(1, 2); } } // Get color data uint32_t values[4] = { (uint32_t)SARGV(2, 0), (uint32_t)SARGV(2, 1), (uint32_t)SARGV(2, 2), (uint32_t)SARGV(2, 3), }; // Re-order color values switch (image->format.image_channel_order) { case CL_R: case CL_Rx: case CL_RG: case CL_RGx: case CL_RGB: case CL_RGBx: case CL_RGBA: case CL_INTENSITY: case CL_LUMINANCE: break; case CL_A: values[0] = values[3]; break; case CL_RA: values[1] = values[3]; break; case CL_ARGB: swap(values[2], values[3]); swap(values[1], values[2]); swap(values[0], values[1]); break; case CL_BGRA: swap(values[0], values[2]); break; default: FATAL_ERROR("Unsupported image channel order: %X", image->format.image_channel_order); } size_t channelSize = getChannelSize(image->format); size_t numChannels = getNumChannels(image->format); size_t pixelSize = channelSize * numChannels; size_t pixelAddress = image->address + (x + (y + z * image->desc.image_height) * image->desc.image_width) * pixelSize; // Generate channel values Memory* memory = workItem->getMemory(AddrSpaceGlobal); unsigned char* data = workItem->m_pool.alloc(channelSize * numChannels); for (unsigned i = 0; i < numChannels; i++) { switch (image->format.image_channel_data_type) { case CL_UNSIGNED_INT8: ((uint8_t*)data)[i] = _min_(values[i], UINT8_MAX); break; case CL_UNSIGNED_INT16: ((uint16_t*)data)[i] = _min_(values[i], UINT16_MAX); break; case CL_UNSIGNED_INT32: ((uint32_t*)data)[i] = values[i]; break; default: FATAL_ERROR("Unsupported image channel data type: %X", image->format.image_channel_data_type); } } // Write pixel data memory->store(data, pixelAddress, channelSize * numChannels); } /////////////////////// // Integer Functions // /////////////////////// DEFINE_BUILTIN(abs_builtin) { for (unsigned i = 0; i < result.num; i++) { switch (getOverloadArgType(overload)) { case 'h': case 't': case 'j': case 'm': result.setUInt(UARGV(0, i), i); break; case 'c': case 's': case 'i': case 'l': result.setSInt(abs(SARGV(0, i)), i); break; default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } } DEFINE_BUILTIN(abs_diff) { for (unsigned i = 0; i < result.num; i++) { switch (getOverloadArgType(overload)) { case 'h': case 't': case 'j': case 'm': { uint64_t a = UARGV(0, i); uint64_t b = UARGV(1, i); result.setUInt(_max_(a, b) - _min_(a, b), i); break; } case 'c': case 's': case 'i': case 'l': { int64_t a = SARGV(0, i); int64_t b = SARGV(1, i); result.setSInt(_max_(a, b) - _min_(a, b), i); break; } default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } } DEFINE_BUILTIN(add_sat) { for (unsigned i = 0; i < result.num; i++) { uint64_t uresult = UARGV(0, i) + UARGV(1, i); int64_t sresult = SARGV(0, i) + SARGV(1, i); switch (getOverloadArgType(overload)) { case 'h': uresult = _min_(uresult, UINT8_MAX); result.setUInt(uresult, i); break; case 't': uresult = _min_(uresult, UINT16_MAX); result.setUInt(uresult, i); break; case 'j': uresult = _min_(uresult, UINT32_MAX); result.setUInt(uresult, i); break; case 'm': uresult = (UARGV(1, i) > uresult) ? UINT64_MAX : uresult; result.setUInt(uresult, i); break; case 'c': sresult = _clamp_(sresult, INT8_MIN, INT8_MAX); result.setSInt(sresult, i); break; case 's': sresult = _clamp_(sresult, INT16_MIN, INT16_MAX); result.setSInt(sresult, i); break; case 'i': sresult = _clamp_(sresult, INT32_MIN, INT32_MAX); result.setSInt(sresult, i); break; case 'l': if ((SARGV(0, i) > 0) == (SARGV(1, i) > 0) && (SARGV(0, i) > 0) != (sresult > 0)) { sresult = (SARGV(0, i) > 0) ? INT64_MAX : INT64_MIN; } result.setSInt(sresult, i); break; default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } } DEFINE_BUILTIN(clz) { for (unsigned i = 0; i < result.num; i++) { uint64_t x = UARGV(0, i); int nz = 0; while (x) { x >>= 1; nz++; } uint64_t r = ((result.size << 3) - nz); result.setUInt(r, i); } } DEFINE_BUILTIN(hadd) { for (unsigned i = 0; i < result.num; i++) { switch (getOverloadArgType(overload)) { case 'h': case 't': case 'j': case 'm': { uint64_t a = UARGV(0, i); uint64_t b = UARGV(1, i); uint64_t c = (a & b) & 1; result.setUInt((a >> 1) + (b >> 1) + c, i); break; } case 'c': case 's': case 'i': case 'l': { int64_t a = SARGV(0, i); int64_t b = SARGV(1, i); int64_t c = (a & b) & 1; result.setSInt((a >> 1) + (b >> 1) + c, i); break; } default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } } static uint64_t _mad_(uint64_t a, uint64_t b, uint64_t c) { return a * b + c; } static uint64_t _umul_hi_(uint64_t x, uint64_t y, uint64_t bits) { if (bits == 64) { uint64_t xl = x & UINT32_MAX; uint64_t xh = x >> 32; uint64_t yl = y & UINT32_MAX; uint64_t yh = y >> 32; uint64_t xlyl = xl * yl; uint64_t xlyh = xl * yh; uint64_t xhyl = xh * yl; uint64_t xhyh = xh * yh; uint64_t a = xhyl + ((xlyl) >> 32); uint64_t al = a & UINT32_MAX; uint64_t ah = a >> 32; uint64_t b = ((al + xlyh) >> 32) + ah; return xhyh + b; } else { return (x * y) >> bits; } } static int64_t _smul_hi_(int64_t x, int64_t y, int64_t bits) { if (bits == 64) { int64_t xl = x & UINT32_MAX; int64_t xh = x >> 32; int64_t yl = y & UINT32_MAX; int64_t yh = y >> 32; int64_t xlyl = xl * yl; int64_t xlyh = xl * yh; int64_t xhyl = xh * yl; int64_t xhyh = xh * yh; int64_t a = xhyl + ((xlyl >> 32) & UINT32_MAX); int64_t al = a & UINT32_MAX; int64_t ah = a >> 32; int64_t b = ((al + xlyh) >> 32) + ah; return xhyh + b; } else { return (x * y) >> bits; } } DEFINE_BUILTIN(mad_hi) { for (unsigned i = 0; i < result.num; i++) { switch (getOverloadArgType(overload)) { case 'h': case 't': case 'j': case 'm': { uint64_t r = _umul_hi_(UARGV(0, i), UARGV(1, i), result.size << 3) + UARGV(2, i); result.setUInt(r, i); break; } case 'c': case 's': case 'i': case 'l': { int64_t r = _smul_hi_(SARGV(0, i), SARGV(1, i), result.size << 3) + SARGV(2, i); result.setSInt(r, i); break; } default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } } DEFINE_BUILTIN(mad_sat) { for (unsigned i = 0; i < result.num; i++) { uint64_t uresult = UARGV(0, i) * UARGV(1, i) + UARGV(2, i); int64_t sresult = SARGV(0, i) * SARGV(1, i) + SARGV(2, i); switch (getOverloadArgType(overload)) { case 'h': uresult = _min_(uresult, UINT8_MAX); result.setUInt(uresult, i); break; case 't': uresult = _min_(uresult, UINT16_MAX); result.setUInt(uresult, i); break; case 'j': uresult = _min_(uresult, UINT32_MAX); result.setUInt(uresult, i); break; case 'm': { uint64_t hi = _umul_hi_(UARGV(0, i), UARGV(1, i), 64); if (hi || UARGV(2, i) > uresult) { uresult = UINT64_MAX; } result.setUInt(uresult, i); break; } case 'c': sresult = _clamp_(sresult, INT8_MIN, INT8_MAX); result.setSInt(sresult, i); break; case 's': sresult = _clamp_(sresult, INT16_MIN, INT16_MAX); result.setSInt(sresult, i); break; case 'i': sresult = _clamp_(sresult, INT32_MIN, INT32_MAX); result.setSInt(sresult, i); break; case 'l': // Check for overflow in multiplication if (_smul_hi_(SARGV(0, i), SARGV(1, i), 64)) { sresult = (SARGV(0, i) > 0) ^ (SARGV(1, i) > 0) ? INT64_MIN : INT64_MAX; } else { // Check for overflow in addition int64_t m = SARGV(0, i) * SARGV(1, i); if ((m > 0) == (SARGV(2, i) > 0) && (m > 0) != (sresult > 0)) { sresult = (m > 0) ? INT64_MAX : INT64_MIN; } } result.setSInt(sresult, i); break; default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } } static uint64_t _mul_(uint64_t a, uint64_t b) { return a * b; } DEFINE_BUILTIN(mul_hi) { for (unsigned i = 0; i < result.num; i++) { switch (getOverloadArgType(overload)) { case 'h': case 't': case 'j': case 'm': { uint64_t r = _umul_hi_(UARGV(0, i), UARGV(1, i), result.size << 3); result.setUInt(r, i); break; } case 'c': case 's': case 'i': case 'l': { int64_t r = _smul_hi_(SARGV(0, i), SARGV(1, i), result.size << 3); result.setSInt(r, i); break; } default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } } static uint64_t _popcount_(uint64_t x) { int i = 0; while (x) { i += (x & 0x1); x >>= 1; } return i; } DEFINE_BUILTIN(rhadd) { for (unsigned i = 0; i < result.num; i++) { switch (getOverloadArgType(overload)) { case 'h': case 't': case 'j': case 'm': { uint64_t a = UARGV(0, i); uint64_t b = UARGV(1, i); uint64_t c = (a | b) & 1; result.setUInt((a >> 1) + (b >> 1) + c, i); break; } case 'c': case 's': case 'i': case 'l': { int64_t a = SARGV(0, i); int64_t b = SARGV(1, i); int64_t c = (a | b) & 1; result.setSInt((a >> 1) + (b >> 1) + c, i); break; } default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } } DEFINE_BUILTIN(rotate) { for (unsigned i = 0; i < result.num; i++) { uint64_t width = (result.size << 3); uint64_t v = UARGV(0, i); uint64_t ls = UARGV(1, i) % width; uint64_t rs = width - ls; result.setUInt((v << ls) | (v >> rs), i); } } DEFINE_BUILTIN(sub_sat) { for (unsigned i = 0; i < result.num; i++) { uint64_t uresult = UARGV(0, i) - UARGV(1, i); int64_t sresult = SARGV(0, i) - SARGV(1, i); switch (getOverloadArgType(overload)) { case 'h': uresult = uresult > UINT8_MAX ? 0 : uresult; result.setUInt(uresult, i); break; case 't': uresult = uresult > UINT16_MAX ? 0 : uresult; result.setUInt(uresult, i); break; case 'j': uresult = uresult > UINT32_MAX ? 0 : uresult; result.setUInt(uresult, i); break; case 'm': uresult = (UARGV(1, i) > UARGV(0, i)) ? 0 : uresult; result.setUInt(uresult, i); break; case 'c': sresult = _clamp_(sresult, INT8_MIN, INT8_MAX); result.setSInt(sresult, i); break; case 's': sresult = _clamp_(sresult, INT16_MIN, INT16_MAX); result.setSInt(sresult, i); break; case 'i': sresult = _clamp_(sresult, INT32_MIN, INT32_MAX); result.setSInt(sresult, i); break; case 'l': if ((SARGV(0, i) > 0) != (SARGV(1, i) > 0) && (SARGV(0, i) > 0) != (sresult > 0)) { sresult = (SARGV(0, i) > 0) ? INT64_MAX : INT64_MIN; } result.setSInt(sresult, i); break; default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } } DEFINE_BUILTIN(upsample) { for (unsigned i = 0; i < result.num; i++) { uint64_t r = (UARGV(0, i) << (result.size << 2)) | UARGV(1, i); result.setUInt(r, i); } } //////////////////// // Math Functions // //////////////////// static double _acospi_(double x) { return (acos(x) / M_PI); } static double _asinpi_(double x) { return (asin(x) / M_PI); } static double _atanpi_(double x) { return (atan(x) / M_PI); } static double _atan2pi_(double x, double y) { return (atan2(x, y) / M_PI); } static double _cospi_(double x) { return (cos(x * M_PI)); } static double _exp10_(double x) { return pow(10, x); } static double _fdivide_(double x, double y) { return x / y; } static double _frecip_(double x) { return 1.0 / x; } static double _rsqrt_(double x) { return 1.0 / sqrt(x); } static double _sinpi_(double x) { return (sin(x * M_PI)); } static double _tanpi_(double x) { return (tan(x * M_PI)); } DEFINE_BUILTIN(fma_builtin) { for (unsigned i = 0; i < result.num; i++) { if (result.size == 4) result.setFloat(fmaf(FARGV(0, i), FARGV(1, i), FARGV(2, i)), i); else result.setFloat(fma(FARGV(0, i), FARGV(1, i), FARGV(2, i)), i); } } DEFINE_BUILTIN(fmax_builtin) { TypedValue a = workItem->getOperand(ARG(0)); TypedValue b = workItem->getOperand(ARG(1)); for (unsigned i = 0; i < result.num; i++) { double _b = b.num > 1 ? b.getFloat(i) : b.getFloat(); if (result.size == 4) result.setFloat(fmaxf(a.getFloat(i), _b), i); else result.setFloat(fmax(a.getFloat(i), _b), i); } } DEFINE_BUILTIN(fmin_builtin) { TypedValue a = workItem->getOperand(ARG(0)); TypedValue b = workItem->getOperand(ARG(1)); for (unsigned i = 0; i < result.num; i++) { double _b = b.num > 1 ? b.getFloat(i) : b.getFloat(); if (result.size == 4) result.setFloat(fminf(a.getFloat(i), _b), i); else result.setFloat(fmin(a.getFloat(i), _b), i); } } static double _maxmag_(double x, double y) { double _x = fabs(x); double _y = fabs(y); if (_x > _y) { return x; } else if (_y > _x) { return y; } else { return fmax(x, y); } } static double _minmag_(double x, double y) { double _x = fabs(x); double _y = fabs(y); if (_x < _y) { return x; } else if (_y < _x) { return y; } else { return fmin(x, y); } } DEFINE_BUILTIN(fract) { Memory* memory = workItem->getMemory(ARG(1)->getType()->getPointerAddressSpace()); size_t iptr = PARG(1); for (unsigned i = 0; i < result.num; i++) { double x = FARGV(0, i); double fl, r; if (std::isnan(x)) { r = nan(""); fl = nan(""); } else { if (result.size == 4) { fl = floorf(x); r = fmin(x - fl, nextafterf(1, 0)); } else { fl = floor(x); r = fmin(x - fl, nextafter(1, 0)); } } size_t offset = i * result.size; result.setFloat(fl, i); memory->store(result.data + offset, iptr + offset, result.size); result.setFloat(r, i); } } DEFINE_BUILTIN(frexp_builtin) { Memory* memory = workItem->getMemory(ARG(1)->getType()->getPointerAddressSpace()); size_t iptr = PARG(1); for (unsigned i = 0; i < result.num; i++) { int32_t e; double r = frexp(FARGV(0, i), &e); memory->store((const unsigned char*)&e, iptr + i * 4, 4); result.setFloat(r, i); } } DEFINE_BUILTIN(ilogb_builtin) { for (unsigned i = 0; i < result.num; i++) { double x = FARGV(0, i); if (std::isnan(x)) result.setSInt(INT_MAX, i); else result.setSInt(ilogb(x), i); } } DEFINE_BUILTIN(ldexp_builtin) { for (unsigned i = 0; i < result.num; i++) { result.setFloat(ldexp(FARGV(0, i), SARGV(1, i)), i); } } DEFINE_BUILTIN(lgamma_r) { Memory* memory = workItem->getMemory(ARG(1)->getType()->getPointerAddressSpace()); size_t signp = PARG(1); for (unsigned i = 0; i < result.num; i++) { double r = lgamma(FARGV(0, i)); int32_t s = (tgamma(FARGV(0, i)) < 0 ? -1 : 1); memory->store((const unsigned char*)&s, signp + i * 4, 4); result.setFloat(r, i); } } DEFINE_BUILTIN(modf_builtin) { Memory* memory = workItem->getMemory(ARG(1)->getType()->getPointerAddressSpace()); size_t iptr = PARG(1); for (unsigned i = 0; i < result.num; i++) { double x = FARGV(0, i); double integral = trunc(x); double fractional = copysign(std::isinf(x) ? 0.0 : x - integral, x); size_t offset = i * result.size; result.setFloat(integral, i); memory->store(result.data + offset, iptr + offset, result.size); result.setFloat(fractional, i); } } DEFINE_BUILTIN(nan_builtin) { for (unsigned i = 0; i < result.num; i++) { result.setFloat(nan(""), i); } } DEFINE_BUILTIN(nextafter_builtin) { for (unsigned i = 0; i < result.num; i++) { if (result.size == 4) result.setFloat(nextafterf(FARGV(0, i), FARGV(1, i)), i); else result.setFloat(nextafter(FARGV(0, i), FARGV(1, i)), i); } } DEFINE_BUILTIN(pown) { for (unsigned i = 0; i < result.num; i++) { double x = FARGV(0, i); int32_t y = SARGV(1, i); result.setFloat(pow(x, y), i); } } DEFINE_BUILTIN(powr) { for (unsigned i = 0; i < result.num; i++) { double x = FARGV(0, i); double y = FARGV(1, i); double r; if (x < 0.0) { r = nan(""); } else if (std::isnan(x) || std::isnan(y)) { r = nan(""); } else if (x == 1.0) { if (std::isinf(y)) r = nan(""); else r = 1.0; } else if (y == 0.0) { if (x == 0.0 || x == INFINITY) r = nan(""); else r = 1.0; } else if (x == 0.0) { if (y < 0.0) r = INFINITY; else r = 0.0; } else if (x == INFINITY) { if (y < 0.0) r = 0.0; else r = INFINITY; } else { r = pow(x, y); } result.setFloat(r, i); } } DEFINE_BUILTIN(remquo_builtin) { Memory* memory = workItem->getMemory(ARG(2)->getType()->getPointerAddressSpace()); size_t quop = PARG(2); for (unsigned i = 0; i < result.num; i++) { double x = FARGV(0, i); double y = FARGV(1, i); int32_t quo; double rem = remquo(x, y, &quo); memory->store((const unsigned char*)&quo, quop + i * 4, 4); result.setFloat(rem, i); } } DEFINE_BUILTIN(rootn) { for (unsigned i = 0; i < result.num; i++) { long double x = FARGV(0, i); int n = SARGV(1, i); long double r; if (n == 0) { r = nan(""); } else if (x == 0) { if (n < 0) { if (n & 1) r = copysign(INFINITY, x); else r = INFINITY; } else { if (n & 1) r = x; else r = 0.0; } } else if (x < 0 && !(n & 1)) { r = nan(""); } else { r = pow(fabs(x), 1.0L / n); if (x < 0 && n & 1) r = -r; } result.setFloat(r, i); } } DEFINE_BUILTIN(sincos) { Memory* memory = workItem->getMemory(ARG(1)->getType()->getPointerAddressSpace()); size_t cv = PARG(1); for (unsigned i = 0; i < result.num; i++) { double x = FARGV(0, i); size_t offset = i * result.size; result.setFloat(cos(x), i); memory->store(result.data + offset, cv + offset, result.size); result.setFloat(sin(x), i); } } //////////////////////////// // Misc. Vector Functions // //////////////////////////// DEFINE_BUILTIN(shuffle_builtin) { for (unsigned i = 0; i < result.num; i++) { result.setUInt(UARGV(0, UARGV(1, i) % ARG_VLEN(0)), i); } } DEFINE_BUILTIN(shuffle2_builtin) { for (unsigned i = 0; i < result.num; i++) { uint64_t m = 1; if (ARG(0)->getType()->isVectorTy()) { m = ARG_VLEN(0); } uint64_t src = 0; uint64_t index = UARGV(2, i) % (2 * m); if (index >= m) { index -= m; src = 1; } result.setUInt(UARGV(src, index), i); } } ////////////////////////// // Relational Functions // ////////////////////////// static int64_t _iseq_(double x, double y) { return x == y; } static int64_t _isneq_(double x, double y) { return x != y; } static int64_t _isgt_(double x, double y) { return isgreater(x, y); } static int64_t _isge_(double x, double y) { return isgreaterequal(x, y); } static int64_t _islt_(double x, double y) { return isless(x, y); } static int64_t _isle_(double x, double y) { return islessequal(x, y); } static int64_t _islg_(double x, double y) { return islessgreater(x, y); } static int64_t _isfin_(double x) { return isfinite(x); } static int64_t _isinf_(double x) { return std::isinf(x); } static int64_t _isnan_(double x) { return std::isnan(x); } static int64_t _isnorm_(double x) { return isnormal(x); } static int64_t _isord_(double x, double y) { return !isunordered(x, y); } static int64_t _isuord_(double x, double y) { return isunordered(x, y); } static int64_t _signbit_(double x) { return signbit(x); } DEFINE_BUILTIN(all) { unsigned num = 1; if (ARG(0)->getType()->isVectorTy()) { num = ARG_VLEN(0); } for (unsigned i = 0; i < num; i++) { if (!(SARGV(0, i) & INT64_MIN)) { result.setSInt(0); return; } } result.setSInt(1); } DEFINE_BUILTIN(any) { unsigned num = 1; if (ARG(0)->getType()->isVectorTy()) { num = ARG_VLEN(0); } for (unsigned i = 0; i < num; i++) { if (SARGV(0, i) & INT64_MIN) { result.setSInt(1); return; } } result.setSInt(0); } static uint64_t _ibitselect_(uint64_t a, uint64_t b, uint64_t c) { return ((a & ~c) | (b & c)); } static double _fbitselect_(double a, double b, double c) { uint64_t _a = *(uint64_t*)&a; uint64_t _b = *(uint64_t*)&b; uint64_t _c = *(uint64_t*)&c; uint64_t _r = _ibitselect_(_a, _b, _c); return *(double*)&_r; } DEFINE_BUILTIN(bitselect) { switch (getOverloadArgType(overload)) { case 'f': case 'd': f3arg(workItem, callInst, fnName, overload, result, _fbitselect_); break; case 'h': case 't': case 'j': case 'm': case 'c': case 's': case 'i': case 'l': u3arg(workItem, callInst, fnName, overload, result, _ibitselect_); break; default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } DEFINE_BUILTIN(select_builtin) { char type = getOverloadArgType(overload); for (unsigned i = 0; i < result.num; i++) { int64_t c = SARGV(2, i); bool _c = (result.num > 1) ? c & INT64_MIN : c; switch (type) { case 'f': case 'd': result.setFloat(_c ? FARGV(1, i) : FARGV(0, i), i); break; case 'h': case 't': case 'j': case 'm': case 'c': case 's': case 'i': case 'l': result.setSInt(_c ? SARGV(1, i) : SARGV(0, i), i); break; default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } } /////////////////////////////// // Synchronization Functions // /////////////////////////////// DEFINE_BUILTIN(work_group_barrier) { workItem->m_state = WorkItem::BARRIER; workItem->m_workGroup->notifyBarrier(workItem, callInst, UARG(0)); } DEFINE_BUILTIN(mem_fence) { // TODO: Implement? } ////////////////////////////////////////// // Vector Data Load and Store Functions // ////////////////////////////////////////// DEFINE_BUILTIN(vload) { size_t base = PARG(1); unsigned int addressSpace = ARG(1)->getType()->getPointerAddressSpace(); uint64_t offset = UARG(0); size_t address = base + offset * result.size * result.num; size_t size = result.size * result.num; workItem->getMemory(addressSpace)->load(result.data, address, size); } DEFINE_BUILTIN(vstore) { const llvm::Value* value = ARG(0); unsigned size = getTypeSize(value->getType()); if (isVector3(value)) { // 3-element vectors are same size as 4-element vectors, // but vstore address offset shouldn't use this. size = (size / 4) * 3; } size_t base = PARG(2); unsigned int addressSpace = ARG(2)->getType()->getPointerAddressSpace(); uint64_t offset = UARG(1); size_t address = base + offset * size; unsigned char* data = workItem->getOperand(value).data; workItem->getMemory(addressSpace)->store(data, address, size); } DEFINE_BUILTIN(vload_half) { size_t base = PARG(1); unsigned int addressSpace = ARG(1)->getType()->getPointerAddressSpace(); uint64_t offset = UARG(0); size_t address; if (fnName.compare(0, 6, "vloada") == 0 && result.num == 3) { address = base + offset * sizeof(cl_half) * 4; } else { address = base + offset * sizeof(cl_half) * result.num; } size_t size = sizeof(cl_half) * result.num; uint16_t* halfData = (uint16_t*)workItem->m_pool.alloc(2 * result.num); workItem->getMemory(addressSpace) ->load((unsigned char*)halfData, address, size); // Convert to floats for (unsigned i = 0; i < result.num; i++) { ((float*)result.data)[i] = cl_half_to_float(halfData[i]); } } DEFINE_BUILTIN(vstore_half) { const llvm::Value* value = ARG(0); unsigned size = getTypeSize(value->getType()); if (isVector3(value)) { // 3-element vectors are same size as 4-element vectors, // but vstore address offset shouldn't use this. size = (size / 4) * 3; } size_t base = PARG(2); unsigned int addressSpace = ARG(2)->getType()->getPointerAddressSpace(); uint64_t offset = UARG(1); // Convert to halfs TypedValue op = workItem->getOperand(value); unsigned char* data = op.data; size = op.num * sizeof(cl_half); uint16_t* halfData = (uint16_t*)workItem->m_pool.alloc(2 * op.num); // Parse rounding mode (RTE is the default) cl_half_rounding_mode rmode = CL_HALF_RTE; if (fnName.find("_rtz") != std::string::npos) rmode = CL_HALF_RTZ; else if (fnName.find("_rtn") != std::string::npos) rmode = CL_HALF_RTN; else if (fnName.find("_rtp") != std::string::npos) rmode = CL_HALF_RTP; for (unsigned i = 0; i < op.num; i++) { if (op.size == 4) halfData[i] = cl_half_from_float(((float*)data)[i], rmode); else halfData[i] = cl_half_from_double(((double*)data)[i], rmode); } size_t address; if (fnName.compare(0, 7, "vstorea") == 0 && op.num == 3) { address = base + offset * sizeof(cl_half) * 4; } else { address = base + offset * sizeof(cl_half) * op.num; } workItem->getMemory(addressSpace) ->store((unsigned char*)halfData, address, size); } ///////////////////////// // Work-Item Functions // ///////////////////////// DEFINE_BUILTIN(get_global_id) { uint64_t dim = UARG(0); size_t r = dim < 3 ? workItem->m_globalID[dim] : 0; result.setUInt(r); } DEFINE_BUILTIN(get_global_size) { uint64_t dim = UARG(0); size_t r = dim < 3 ? workItem->m_kernelInvocation->getGlobalSize()[dim] : 0; result.setUInt(r); } DEFINE_BUILTIN(get_global_offset) { uint64_t dim = UARG(0); size_t r = dim < 3 ? workItem->m_kernelInvocation->getGlobalOffset()[dim] : 0; result.setUInt(r); } DEFINE_BUILTIN(get_group_id) { uint64_t dim = UARG(0); size_t r = dim < 3 ? workItem->m_workGroup->getGroupID()[dim] : 0; result.setUInt(r); } DEFINE_BUILTIN(get_local_id) { uint64_t dim = UARG(0); size_t r = dim < 3 ? workItem->m_localID[dim] : 0; result.setUInt(r); } DEFINE_BUILTIN(get_local_size) { uint64_t dim = UARG(0); size_t r = dim < 3 ? workItem->m_workGroup->getGroupSize()[dim] : 0; result.setUInt(r); } DEFINE_BUILTIN(get_enqueued_local_size) { uint64_t dim = UARG(0); size_t r = dim < 3 ? workItem->m_kernelInvocation->getLocalSize()[dim] : 0; result.setUInt(r); } DEFINE_BUILTIN(get_num_groups) { uint64_t dim = UARG(0); size_t r = 0; if (dim < 3) { r = workItem->m_kernelInvocation->getNumGroups()[dim]; } result.setUInt(r); } DEFINE_BUILTIN(get_work_dim) { result.setUInt(workItem->m_kernelInvocation->getWorkDim()); } DEFINE_BUILTIN(get_global_linear_id) { Size3 globalID = workItem->m_globalID; Size3 globalSize = workItem->m_kernelInvocation->getGlobalSize(); Size3 globalOffset = workItem->m_kernelInvocation->getGlobalOffset(); size_t r = ((globalID.z - globalOffset.z) * globalSize.y + (globalID.y - globalOffset.y)) * globalSize.x + globalID.x - globalOffset.x; result.setUInt(r); } DEFINE_BUILTIN(get_local_linear_id) { Size3 localID = workItem->m_localID; Size3 localSize = workItem->m_workGroup->getGroupSize(); size_t r = (localID.z * localSize.y + localID.y) * localSize.x + localID.x; result.setUInt(r); } ///////////////////// // Other Functions // ///////////////////// DEFINE_BUILTIN(astype) { TypedValue src = workItem->getOperand(ARG(0)); memcpy(result.data, src.data, src.size * src.num); } static void setConvertRoundingMode(const string& name, int def) { size_t rpos = name.find("_rt"); if (rpos != string::npos) { switch (name[rpos + 3]) { case 'e': fesetround(FE_TONEAREST); break; case 'z': fesetround(FE_TOWARDZERO); break; case 'p': fesetround(FE_UPWARD); break; case 'n': fesetround(FE_DOWNWARD); break; default: FATAL_ERROR("Unsupported rounding mode: %c", name[rpos + 3]); } } else { fesetround(def); } } DEFINE_BUILTIN(convert_float) { // Use rounding mode const int origRnd = fegetround(); setConvertRoundingMode(fnName, FE_TONEAREST); for (unsigned i = 0; i < result.num; i++) { switch (getOverloadArgType(overload)) { case 'h': case 't': case 'j': case 'm': { uint64_t in = UARGV(0, i); if (result.size == 4) result.setFloat(in ? (float)in : 0.f, i); else result.setFloat(in ? (double)in : 0.0, i); break; } case 'c': case 's': case 'i': case 'l': result.setFloat(SARGV(0, i), i); break; case 'f': case 'd': result.setFloat(FARGV(0, i), i); break; default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } fesetround(origRnd); } DEFINE_BUILTIN(convert_half) { float f; cl_half_rounding_mode rmode = CL_HALF_RTE; if (fnName.find("_rtz") != std::string::npos) rmode = CL_HALF_RTZ; else if (fnName.find("_rtn") != std::string::npos) rmode = CL_HALF_RTN; else if (fnName.find("_rtp") != std::string::npos) rmode = CL_HALF_RTP; const char srcType = getOverloadArgType(overload); for (unsigned i = 0; i < result.num; i++) { switch (srcType) { case 'h': case 't': case 'j': case 'm': f = (float)UARGV(0, i); break; case 'c': case 's': case 'i': case 'l': f = (float)SARGV(0, i); break; case 'd': case 'f': f = FARGV(0, i); break; default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } result.setUInt(cl_half_from_float(f, rmode), i); } } DEFINE_BUILTIN(convert_uint) { // Check for saturation modifier bool sat = fnName.find("_sat") != string::npos; uint64_t max; switch (result.size) { case 1: max = UINT8_MAX; break; case 2: max = UINT16_MAX; break; case 4: max = UINT32_MAX; break; case 8: max = UINT64_MAX; break; default: FATAL_ERROR("Unsupported integer size %d", result.size); } // Use rounding mode const int origRnd = fegetround(); setConvertRoundingMode(fnName, FE_TOWARDZERO); for (unsigned i = 0; i < result.num; i++) { uint64_t r; switch (getOverloadArgType(overload)) { case 'h': case 't': case 'j': case 'm': r = UARGV(0, i); if (sat) { r = _min_(r, max); } break; case 'c': case 's': case 'i': case 'l': { int64_t si = SARGV(0, i); r = si; if (sat) { if (si < 0) { r = 0; } else if (si > max) { r = max; } } break; } case 'f': case 'd': if (sat) { r = rint(_clamp_((long double)FARGV(0, i), 0.0L, (long double)max)); } else { r = rint(FARGV(0, i)); } break; default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } result.setUInt(r, i); } fesetround(origRnd); } DEFINE_BUILTIN(convert_sint) { // Check for saturation modifier bool sat = fnName.find("_sat") != string::npos; int64_t min, max; switch (result.size) { case 1: min = INT8_MIN; max = INT8_MAX; break; case 2: min = INT16_MIN; max = INT16_MAX; break; case 4: min = INT32_MIN; max = INT32_MAX; break; case 8: min = INT64_MIN; max = INT64_MAX; break; default: FATAL_ERROR("Unsupported integer size %d", result.size); } // Use rounding mode const int origRnd = fegetround(); setConvertRoundingMode(fnName, FE_TOWARDZERO); for (unsigned i = 0; i < result.num; i++) { int64_t r; switch (getOverloadArgType(overload)) { case 'h': case 't': case 'j': case 'm': r = UARGV(0, i); if (sat) { r = _min_((uint64_t)r, (uint64_t)max); } break; case 'c': case 's': case 'i': case 'l': r = SARGV(0, i); if (sat) { r = _clamp_(r, min, max); } break; case 'f': case 'd': if (sat) { r = rint(_clamp_((long double)FARGV(0, i), (long double)min, (long double)max)); } else { r = rint(FARGV(0, i)); } break; default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } result.setSInt(r, i); } fesetround(origRnd); } DEFINE_BUILTIN(printf_builtin) { lock_guard lck(printfMutex); size_t formatPtr = workItem->getOperand(ARG(0)).getPointer(); Memory* memory = workItem->getMemory(AddrSpaceGlobal); int arg = 1; while (true) { char c; memory->load((unsigned char*)&c, formatPtr++); if (c == '\0') { break; } if (c == '%') { unsigned vectorWidth = 1; string format = "%"; while (true) { memory->load((unsigned char*)&c, formatPtr++); if (c == '\0') { cout << format; break; } if (c == 'v') { // Load vector width specifier memory->load((unsigned char*)&c, formatPtr++); vectorWidth = c - '0'; if (vectorWidth == 1) { // Assume this is 16 vectorWidth = 16; formatPtr++; } continue; } // Ignore all 'h' specifiers if (c == 'h') continue; format += c; bool done = false; switch (c) { case 'c': case 'd': case 'i': for (unsigned i = 0; i < vectorWidth; i++) { if (i > 0) printf(","); printf(format.c_str(), SARGV(arg, i)); } arg++; done = true; break; case 'o': case 'u': case 'x': case 'X': case 'p': for (unsigned i = 0; i < vectorWidth; i++) { if (i > 0) printf(","); printf(format.c_str(), UARGV(arg, i)); } arg++; done = true; break; case 'f': case 'F': case 'e': case 'E': case 'g': case 'G': case 'a': case 'A': for (unsigned i = 0; i < vectorWidth; i++) { if (i > 0) printf(","); printf(format.c_str(), FARGV(arg, i)); } arg++; done = true; break; case 's': { size_t ptr = UARG(arg++); if (!ptr) { // Special case for printing NULL pointer printf(format.c_str(), NULL); } else { // Load string from memory char c; string str = ""; while (true) { if (!memory->load((unsigned char*)&c, ptr++)) break; if (c == '\0') break; str += c; } printf(format.c_str(), str.c_str()); } done = true; break; } case '%': printf("%%"); done = true; break; } if (done) { break; } } if (c == '\0') { break; } } else { cout << c; } } } ///////////////////// // LLVM Intrinsics // ///////////////////// DEFINE_BUILTIN(llvm_bswap) { uint64_t r = 0; uint64_t value = UARG(0); for (unsigned i = 0; i < result.size; i++) { r |= ((value >> (i * 8)) & 0xFF) << ((result.size - i - 1) * 8); } result.setUInt(r); } DEFINE_BUILTIN(llvm_dbg_declare) { const llvm::DbgDeclareInst* dbgInst = (const llvm::DbgDeclareInst*)callInst; const llvm::Value* addr = dbgInst->getAddress(); const llvm::DILocalVariable* var = dbgInst->getVariable(); workItem->m_variables[var->getName().str()] = {addr, var}; } DEFINE_BUILTIN(llvm_dbg_value) { const llvm::DbgValueInst* dbgInst = (const llvm::DbgValueInst*)callInst; const llvm::Value* value = dbgInst->getValue(); // TODO: Use offset? // uint64_t offset = dbgInst->getOffset(); const llvm::DILocalVariable* var = dbgInst->getVariable(); workItem->m_variables[var->getName().str()] = {value, var}; } DEFINE_BUILTIN(llvm_lifetime_start) { // TODO: Implement? } DEFINE_BUILTIN(llvm_lifetime_end) { // TODO: Implement? } DEFINE_BUILTIN(llvm_memcpy) { const llvm::MemCpyInst* memcpyInst = (const llvm::MemCpyInst*)callInst; size_t dest = workItem->getOperand(memcpyInst->getDest()).getPointer(); size_t src = workItem->getOperand(memcpyInst->getSource()).getPointer(); size_t size = workItem->getOperand(memcpyInst->getLength()).getUInt(); unsigned destAddrSpace = memcpyInst->getDestAddressSpace(); unsigned srcAddrSpace = memcpyInst->getSourceAddressSpace(); unsigned char* buffer = workItem->m_pool.alloc(size); workItem->getMemory(srcAddrSpace)->load(buffer, src, size); workItem->getMemory(destAddrSpace)->store(buffer, dest, size); } DEFINE_BUILTIN(llvm_memset) { const llvm::MemSetInst* memsetInst = (const llvm::MemSetInst*)callInst; size_t dest = workItem->getOperand(memsetInst->getDest()).getPointer(); size_t size = workItem->getOperand(memsetInst->getLength()).getUInt(); unsigned addressSpace = memsetInst->getDestAddressSpace(); unsigned char* buffer = workItem->m_pool.alloc(size); unsigned char value = UARG(1); memset(buffer, value, size); workItem->getMemory(addressSpace)->store(buffer, dest, size); } DEFINE_BUILTIN(llvm_trap) { FATAL_ERROR("Encountered trap instruction"); } public: static BuiltinFunctionMap initBuiltins(); }; // Utility macros for generating builtin function map #define CAST \ void (*)(WorkItem*, const llvm::CallInst*, const std::string&, \ const std::string&, TypedValue& result, void*) #define F1ARG(name) (double (*)(double)) name #define F2ARG(name) (double (*)(double, double)) name #define F3ARG(name) (double (*)(double, double, double)) name #define ADD_BUILTIN(name, func, op) \ builtins[name] = BuiltinFunction((CAST)func, (void*)op); #define ADD_PREFIX_BUILTIN(name, func, op) \ workItemPrefixBuiltins.push_back( \ make_pair(name, BuiltinFunction((CAST)func, (void*)op))); // Generate builtin function map BuiltinFunctionPrefixList workItemPrefixBuiltins; BuiltinFunctionMap workItemBuiltins = WorkItemBuiltins::initBuiltins(); BuiltinFunctionMap WorkItemBuiltins::initBuiltins() { BuiltinFunctionMap builtins; // Async Copy and Prefetch Functions ADD_BUILTIN("async_work_group_copy", async_work_group_copy, NULL); ADD_BUILTIN("async_work_group_strided_copy", async_work_group_copy, NULL); ADD_BUILTIN("wait_group_events", wait_group_events, NULL); ADD_BUILTIN("prefetch", prefetch, NULL); // Atomic Functions ADD_BUILTIN("atom_add", atomic_op, NULL); ADD_BUILTIN("atomic_add", atomic_op, NULL); ADD_BUILTIN("atom_and", atomic_op, NULL); ADD_BUILTIN("atomic_and", atomic_op, NULL); ADD_BUILTIN("atom_cmpxchg", atomic_op, NULL); ADD_BUILTIN("atomic_cmpxchg", atomic_op, NULL); ADD_BUILTIN("atom_dec", atomic_op, NULL); ADD_BUILTIN("atomic_dec", atomic_op, NULL); ADD_BUILTIN("atom_inc", atomic_op, NULL); ADD_BUILTIN("atomic_inc", atomic_op, NULL); ADD_BUILTIN("atom_max", atomic_op, NULL); ADD_BUILTIN("atomic_max", atomic_op, NULL); ADD_BUILTIN("atom_min", atomic_op, NULL); ADD_BUILTIN("atomic_min", atomic_op, NULL); ADD_BUILTIN("atom_or", atomic_op, NULL); ADD_BUILTIN("atomic_or", atomic_op, NULL); ADD_BUILTIN("atom_sub", atomic_op, NULL); ADD_BUILTIN("atomic_sub", atomic_op, NULL); ADD_BUILTIN("atom_xchg", atomic_op, NULL); ADD_BUILTIN("atomic_xchg", atomic_op, NULL); ADD_BUILTIN("atom_xor", atomic_op, NULL); ADD_BUILTIN("atomic_xor", atomic_op, NULL); // Common Functions ADD_BUILTIN("clamp", clamp, NULL); ADD_BUILTIN("degrees", f1arg, _degrees_); ADD_BUILTIN("max", max, NULL); ADD_BUILTIN("min", min, NULL); ADD_BUILTIN("mix", mix, NULL); ADD_BUILTIN("radians", f1arg, _radians_); ADD_BUILTIN("sign", f1arg, _sign_); ADD_BUILTIN("smoothstep", smoothstep, NULL); ADD_BUILTIN("step", step, NULL); // Geometric Functions ADD_BUILTIN("cross", cross, NULL); ADD_BUILTIN("dot", dot, NULL); ADD_BUILTIN("distance", distance, NULL); ADD_BUILTIN("length", length, NULL); ADD_BUILTIN("normalize", normalize, NULL); ADD_BUILTIN("fast_distance", distance, NULL); ADD_BUILTIN("fast_length", length, NULL); ADD_BUILTIN("fast_normalize", normalize, NULL); // Image Functions ADD_BUILTIN("get_image_array_size", get_image_array_size, NULL); ADD_BUILTIN("get_image_channel_data_type", get_image_channel_data_type, NULL); ADD_BUILTIN("get_image_channel_order", get_image_channel_order, NULL); ADD_BUILTIN("get_image_dim", get_image_dim, NULL); ADD_BUILTIN("get_image_depth", get_image_depth, NULL); ADD_BUILTIN("get_image_height", get_image_height, NULL); ADD_BUILTIN("get_image_width", get_image_width, NULL); ADD_BUILTIN("read_imagef", read_imagef, NULL); ADD_BUILTIN("read_imagei", read_imagei, NULL); ADD_BUILTIN("read_imageui", read_imageui, NULL); ADD_BUILTIN("write_imagef", write_imagef, NULL); ADD_BUILTIN("write_imagei", write_imagei, NULL); ADD_BUILTIN("write_imageui", write_imageui, NULL); ADD_BUILTIN("__translate_sampler_initializer", translate_sampler_initializer, NULL); // Integer Functions ADD_BUILTIN("abs", abs_builtin, NULL); ADD_BUILTIN("abs_diff", abs_diff, NULL); ADD_BUILTIN("add_sat", add_sat, NULL); ADD_BUILTIN("clz", clz, NULL); ADD_BUILTIN("hadd", hadd, NULL); ADD_BUILTIN("mad24", u3arg, _mad_); ADD_BUILTIN("mad_hi", mad_hi, NULL); ADD_BUILTIN("mad_sat", mad_sat, NULL); ADD_BUILTIN("mul24", u2arg, _mul_); ADD_BUILTIN("mul_hi", mul_hi, NULL); ADD_BUILTIN("popcount", u1arg, _popcount_); ADD_BUILTIN("rhadd", rhadd, NULL); ADD_BUILTIN("rotate", rotate, NULL); ADD_BUILTIN("sub_sat", sub_sat, NULL); ADD_BUILTIN("upsample", upsample, NULL); // Math Functions ADD_BUILTIN("acos", f1arg, F1ARG(acos)); ADD_BUILTIN("acosh", f1arg, F1ARG(acosh)); ADD_BUILTIN("acospi", f1arg, _acospi_); ADD_BUILTIN("asin", f1arg, F1ARG(asin)); ADD_BUILTIN("asinh", f1arg, F1ARG(asinh)); ADD_BUILTIN("asinpi", f1arg, _asinpi_); ADD_BUILTIN("atan", f1arg, F1ARG(atan)); ADD_BUILTIN("atan2", f2arg, F2ARG(atan2)); ADD_BUILTIN("atanh", f1arg, F1ARG(atanh)); ADD_BUILTIN("atanpi", f1arg, _atanpi_); ADD_BUILTIN("atan2pi", f2arg, _atan2pi_); ADD_BUILTIN("cbrt", f1arg, F1ARG(cbrt)); ADD_BUILTIN("ceil", f1arg, F1ARG(ceil)); ADD_BUILTIN("copysign", f2arg, F2ARG(copysign)); ADD_BUILTIN("cos", f1arg, F1ARG(cos)); ADD_BUILTIN("cosh", f1arg, F1ARG(cosh)); ADD_BUILTIN("cospi", f1arg, _cospi_); ADD_BUILTIN("erfc", f1arg, F1ARG(erfc)); ADD_BUILTIN("erf", f1arg, F1ARG(erf)); ADD_BUILTIN("exp", f1arg, F1ARG(exp)); ADD_BUILTIN("exp2", f1arg, F1ARG(exp2)); ADD_BUILTIN("exp10", f1arg, _exp10_); ADD_BUILTIN("expm1", f1arg, F1ARG(expm1)); ADD_BUILTIN("fabs", f1arg, F1ARG(fabs)); ADD_BUILTIN("fdim", f2arg, F2ARG(fdim)); ADD_BUILTIN("floor", f1arg, F1ARG(floor)); ADD_BUILTIN("fma", fma_builtin, NULL); ADD_BUILTIN("fmax", fmax_builtin, NULL); ADD_BUILTIN("fmin", fmin_builtin, NULL); ADD_BUILTIN("fmod", f2arg, F2ARG(fmod)); ADD_BUILTIN("fract", fract, NULL); ADD_BUILTIN("frexp", frexp_builtin, NULL); ADD_BUILTIN("hypot", f2arg, F2ARG(hypot)); ADD_BUILTIN("ilogb", ilogb_builtin, NULL); ADD_BUILTIN("ldexp", ldexp_builtin, NULL); ADD_BUILTIN("lgamma", f1arg, F1ARG(lgamma)); ADD_BUILTIN("lgamma_r", lgamma_r, NULL); ADD_BUILTIN("log", f1arg, F1ARG(log)); ADD_BUILTIN("log2", f1arg, F1ARG(log2)); ADD_BUILTIN("log10", f1arg, F1ARG(log10)); ADD_BUILTIN("log1p", f1arg, F1ARG(log1p)); ADD_BUILTIN("logb", f1arg, F1ARG(logb)); ADD_BUILTIN("mad", fma_builtin, NULL); ADD_BUILTIN("maxmag", f2arg, _maxmag_); ADD_BUILTIN("minmag", f2arg, _minmag_); ADD_BUILTIN("modf", modf_builtin, NULL); ADD_BUILTIN("nan", nan_builtin, NULL); ADD_BUILTIN("nanf", nan_builtin, NULL); ADD_BUILTIN("nextafter", nextafter_builtin, NULL); ADD_BUILTIN("pow", f2arg, F2ARG(pow)); ADD_BUILTIN("pown", pown, NULL); ADD_BUILTIN("powr", powr, NULL); ADD_BUILTIN("remainder", f2arg, F2ARG(remainder)); ADD_BUILTIN("remquo", remquo_builtin, NULL); ADD_BUILTIN("rint", f1arg, F1ARG(rint)); ADD_BUILTIN("rootn", rootn, NULL); ADD_BUILTIN("round", f1arg, F1ARG(round)); ADD_BUILTIN("rsqrt", f1arg, _rsqrt_); ADD_BUILTIN("sin", f1arg, F1ARG(sin)); ADD_BUILTIN("sinh", f1arg, F1ARG(sinh)); ADD_BUILTIN("sinpi", f1arg, _sinpi_); ADD_BUILTIN("sincos", sincos, NULL); ADD_BUILTIN("sqrt", f1arg, F1ARG(sqrt)); ADD_BUILTIN("tan", f1arg, F1ARG(tan)); ADD_BUILTIN("tanh", f1arg, F1ARG(tanh)); ADD_BUILTIN("tanpi", f1arg, _tanpi_); ADD_BUILTIN("tgamma", f1arg, F1ARG(tgamma)); ADD_BUILTIN("trunc", f1arg, F1ARG(trunc)); // Native Math Functions ADD_BUILTIN("half_cos", f1arg, F1ARG(cos)); ADD_BUILTIN("native_cos", f1arg, F1ARG(cos)); ADD_BUILTIN("half_divide", f2arg, _fdivide_); ADD_BUILTIN("native_divide", f2arg, _fdivide_); ADD_BUILTIN("half_exp", f1arg, F1ARG(exp)); ADD_BUILTIN("native_exp", f1arg, F1ARG(exp)); ADD_BUILTIN("half_exp2", f1arg, F1ARG(exp2)); ADD_BUILTIN("native_exp2", f1arg, F1ARG(exp2)); ADD_BUILTIN("half_exp10", f1arg, _exp10_); ADD_BUILTIN("native_exp10", f1arg, _exp10_); ADD_BUILTIN("half_log", f1arg, F1ARG(log)); ADD_BUILTIN("native_log", f1arg, F1ARG(log)); ADD_BUILTIN("half_log2", f1arg, F1ARG(log2)); ADD_BUILTIN("native_log2", f1arg, F1ARG(log2)); ADD_BUILTIN("half_log10", f1arg, F1ARG(log10)); ADD_BUILTIN("native_log10", f1arg, F1ARG(log10)); ADD_BUILTIN("half_powr", powr, NULL); ADD_BUILTIN("native_powr", powr, NULL); ADD_BUILTIN("half_recip", f1arg, _frecip_); ADD_BUILTIN("native_recip", f1arg, _frecip_); ADD_BUILTIN("half_rsqrt", f1arg, _rsqrt_); ADD_BUILTIN("native_rsqrt", f1arg, _rsqrt_); ADD_BUILTIN("half_sin", f1arg, F1ARG(sin)); ADD_BUILTIN("native_sin", f1arg, F1ARG(sin)); ADD_BUILTIN("half_sqrt", f1arg, F1ARG(sqrt)); ADD_BUILTIN("native_sqrt", f1arg, F1ARG(sqrt)); ADD_BUILTIN("half_tan", f1arg, F1ARG(tan)); ADD_BUILTIN("native_tan", f1arg, F1ARG(tan)); // Misc. Vector Functions ADD_BUILTIN("shuffle", shuffle_builtin, NULL); ADD_BUILTIN("shuffle2", shuffle2_builtin, NULL); // Relational Functional ADD_BUILTIN("all", all, NULL); ADD_BUILTIN("any", any, NULL); ADD_BUILTIN("bitselect", bitselect, NULL); ADD_BUILTIN("isequal", rel2arg, _iseq_); ADD_BUILTIN("isnotequal", rel2arg, _isneq_); ADD_BUILTIN("isgreater", rel2arg, _isgt_); ADD_BUILTIN("isgreaterequal", rel2arg, _isge_); ADD_BUILTIN("isless", rel2arg, _islt_); ADD_BUILTIN("islessequal", rel2arg, _isle_); ADD_BUILTIN("islessgreater", rel2arg, _islg_); ADD_BUILTIN("isfinite", rel1arg, _isfin_); ADD_BUILTIN("isinf", rel1arg, _isinf_); ADD_BUILTIN("isnan", rel1arg, _isnan_); ADD_BUILTIN("isnormal", rel1arg, _isnorm_); ADD_BUILTIN("isordered", rel2arg, _isord_); ADD_BUILTIN("isunordered", rel2arg, _isuord_); ADD_BUILTIN("select", select_builtin, NULL); ADD_BUILTIN("signbit", rel1arg, _signbit_); // Synchronization Functions ADD_BUILTIN("barrier", work_group_barrier, NULL); ADD_BUILTIN("work_group_barrier", work_group_barrier, NULL); ADD_BUILTIN("mem_fence", mem_fence, NULL); ADD_BUILTIN("read_mem_fence", mem_fence, NULL); ADD_BUILTIN("write_mem_fence", mem_fence, NULL); // Vector Data Load and Store Functions ADD_PREFIX_BUILTIN("vload_half", vload_half, NULL); ADD_PREFIX_BUILTIN("vloada_half", vload_half, NULL); ADD_PREFIX_BUILTIN("vstore_half", vstore_half, NULL); ADD_PREFIX_BUILTIN("vstorea_half", vstore_half, NULL); ADD_PREFIX_BUILTIN("vload", vload, NULL); ADD_PREFIX_BUILTIN("vstore", vstore, NULL); // Work-Item Functions ADD_BUILTIN("get_global_id", get_global_id, NULL); ADD_BUILTIN("get_global_size", get_global_size, NULL); ADD_BUILTIN("get_global_offset", get_global_offset, NULL); ADD_BUILTIN("get_group_id", get_group_id, NULL); ADD_BUILTIN("get_local_id", get_local_id, NULL); ADD_BUILTIN("get_local_size", get_local_size, NULL); ADD_BUILTIN("get_num_groups", get_num_groups, NULL); ADD_BUILTIN("get_work_dim", get_work_dim, NULL); ADD_BUILTIN("get_global_linear_id", get_global_linear_id, NULL); ADD_BUILTIN("get_local_linear_id", get_local_linear_id, NULL); ADD_BUILTIN("get_enqueued_local_size", get_enqueued_local_size, NULL); // Other Functions ADD_PREFIX_BUILTIN("as_", astype, NULL); ADD_PREFIX_BUILTIN("convert_half", convert_half, NULL); ADD_PREFIX_BUILTIN("convert_float", convert_float, NULL); ADD_PREFIX_BUILTIN("convert_double", convert_float, NULL); ADD_PREFIX_BUILTIN("convert_u", convert_uint, NULL); ADD_PREFIX_BUILTIN("convert_", convert_sint, NULL); ADD_BUILTIN("printf", printf_builtin, NULL); // LLVM Intrinsics ADD_PREFIX_BUILTIN("llvm.bswap.", llvm_bswap, NULL); ADD_BUILTIN("llvm.dbg.declare", llvm_dbg_declare, NULL); ADD_BUILTIN("llvm.dbg.value", llvm_dbg_value, NULL); ADD_PREFIX_BUILTIN("llvm.fabs.f", f1arg, F1ARG(fabs)); ADD_PREFIX_BUILTIN("llvm.lifetime.start", llvm_lifetime_start, NULL); ADD_PREFIX_BUILTIN("llvm.lifetime.end", llvm_lifetime_end, NULL); ADD_PREFIX_BUILTIN("llvm.memcpy", llvm_memcpy, NULL); ADD_PREFIX_BUILTIN("llvm.memmove", llvm_memcpy, NULL); ADD_PREFIX_BUILTIN("llvm.memset", llvm_memset, NULL); ADD_PREFIX_BUILTIN("llvm.fmuladd", fma_builtin, NULL); ADD_PREFIX_BUILTIN("llvm.smax", s2arg, _max_); ADD_PREFIX_BUILTIN("llvm.smin", s2arg, _min_); ADD_PREFIX_BUILTIN("llvm.umax", u2arg, _max_); ADD_PREFIX_BUILTIN("llvm.umin", u2arg, _min_); ADD_BUILTIN("llvm.trap", llvm_trap, NULL); return builtins; } } // namespace oclgrind Oclgrind-21.10/src/core/common.cpp000066400000000000000000000547351413315665100170140ustar00rootroot00000000000000// common.cpp (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" #include "config.h" #if defined(_WIN32) && !defined(__MINGW32__) #include #else #include #endif #include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" #include "llvm/Support/raw_os_ostream.h" using namespace oclgrind; using namespace std; namespace oclgrind { Size3::Size3() { x = y = z = 0; } Size3::Size3(size_t _x, size_t _y, size_t _z) { x = _x; y = _y; z = _z; } Size3::Size3(size_t linear, Size3 dimensions) { x = linear % dimensions.x; y = (linear / dimensions.x) % dimensions.y; z = (linear / (dimensions.x * dimensions.y)); } size_t& Size3::operator[](unsigned i) { switch (i) { case 0: return x; case 1: return y; case 2: return z; default: assert(false && "Size3 index out of range"); abort(); } } const size_t& Size3::operator[](unsigned i) const { switch (i) { case 0: return x; case 1: return y; case 2: return z; default: assert(false && "Size3 index out of range"); abort(); } } bool Size3::operator==(const Size3& rhs) const { return x == rhs.x && y == rhs.y && z == rhs.z; } bool Size3::operator!=(const Size3& rhs) const { return x != rhs.x || y != rhs.y || z != rhs.z; } ostream& operator<<(ostream& stream, const Size3& size) { stream << dec << "(" << size.x << "," << size.y << "," << size.z << ")"; return stream; } double TypedValue::getFloat(unsigned index) const { switch (size) { case 4: return ((float*)data)[index]; case 8: return ((double*)data)[index]; default: FATAL_ERROR("Unsupported float size: %u bytes", size); } } size_t TypedValue::getPointer(unsigned index) const { if (size != sizeof(size_t)) { FATAL_ERROR("Unsupported pointer size: %u bytes", size); } return ((size_t*)data)[index]; } int64_t TypedValue::getSInt(unsigned index) const { switch (size) { case 1: return ((int8_t*)data)[index]; case 2: return ((int16_t*)data)[index]; case 4: return ((int32_t*)data)[index]; case 8: return ((int64_t*)data)[index]; default: FATAL_ERROR("Unsupported signed int size: %u bytes", size); } } uint64_t TypedValue::getUInt(unsigned index) const { switch (size) { case 1: return ((uint8_t*)data)[index]; case 2: return ((uint16_t*)data)[index]; case 4: return ((uint32_t*)data)[index]; case 8: return ((uint64_t*)data)[index]; default: FATAL_ERROR("Unsupported unsigned int size: %u bytes", size); } } void TypedValue::setFloat(double value, unsigned index) { switch (size) { case 4: ((float*)data)[index] = value; break; case 8: ((double*)data)[index] = value; break; default: FATAL_ERROR("Unsupported float size: %u bytes", size); } } void TypedValue::setPointer(size_t value, unsigned index) { if (size != sizeof(size_t)) { FATAL_ERROR("Unsupported pointer size: %u bytes", size); } ((size_t*)data)[index] = value; } void TypedValue::setSInt(int64_t value, unsigned index) { switch (size) { case 1: ((int8_t*)data)[index] = value; break; case 2: ((int16_t*)data)[index] = value; break; case 4: ((int32_t*)data)[index] = value; break; case 8: ((int64_t*)data)[index] = value; break; default: FATAL_ERROR("Unsupported signed int size: %u bytes", size); } } void TypedValue::setUInt(uint64_t value, unsigned index) { switch (size) { case 1: ((uint8_t*)data)[index] = value; break; case 2: ((uint16_t*)data)[index] = value; break; case 4: ((uint32_t*)data)[index] = value; break; case 8: ((uint64_t*)data)[index] = value; break; default: FATAL_ERROR("Unsupported unsigned int size: %u bytes", size); } } ostream& operator<<(ostream& stream, const TypedValue& tv) { if (tv.data) { if (tv.num > 1) { stream << "("; } for (unsigned n = 0; n < tv.num; ++n) { for (int i = tv.size - 1; i >= 0; --i) { stream << hex << uppercase << setw(2) << setfill('0') << (int)*(tv.data + tv.size * n + i); } if (n != tv.num - 1) { stream << ","; } } if (tv.num > 1) { stream << ")"; } } else { stream << "NULL"; } return stream; } bool TypedValue::operator==(const TypedValue& rhs) const { return (size == rhs.size) && (num == rhs.num) && (memcmp(data, rhs.data, size * num) == 0); } bool TypedValue::operator!=(const TypedValue& rhs) const { return (size != rhs.size) || (num != rhs.num) || (memcmp(data, rhs.data, size * num) != 0); } TypedValue TypedValue::clone() const { TypedValue result; result.size = size; result.num = num; if (data) { result.data = new unsigned char[size * num]; memcpy(result.data, data, size * num); } else { result.data = NULL; } return result; } bool checkEnv(const char* var) { const char* value = getenv(var); return (value && !strcmp(value, "1")); } unsigned getEnvInt(const char* var, int def, bool allowZero) { const char* value = getenv(var); if (!value) return def; char* next; uint64_t result = strtoul(value, &next, 10); if (strlen(next) || result == ULONG_MAX || (!allowZero && !result)) { cerr << endl << "Oclgrind: Invalid value for " << var << endl; abort(); } return result; } void dumpInstruction(ostream& out, const llvm::Instruction* instruction) { llvm::raw_os_ostream stream(out); instruction->print(stream); } const char* getAddressSpaceName(unsigned addrSpace) { switch (addrSpace) { case AddrSpacePrivate: return "private"; case AddrSpaceGlobal: return "global"; case AddrSpaceConstant: return "constant"; case AddrSpaceLocal: return "local"; default: return "(unknown)"; } } void getConstantData(unsigned char* data, const llvm::Constant* constant) { if (constant->getValueID() == llvm::Value::UndefValueVal) { return; } const llvm::Type* type = constant->getType(); unsigned size = getTypeSize(type); if (auto* undef = llvm::dyn_cast(constant)) { memset(data, 0, size); return; } switch (type->getTypeID()) { case llvm::Type::IntegerTyID: { uint64_t ui = ((llvm::ConstantInt*)constant)->getZExtValue(); switch (size) { case 1: *((uint8_t*)data) = ui; break; case 2: *((uint16_t*)data) = ui; break; case 4: *((uint32_t*)data) = ui; break; case 8: *((uint64_t*)data) = ui; break; default: FATAL_ERROR("Unsupported constant int size: %u bytes", size); } break; } case llvm::Type::FloatTyID: { *(float*)data = ((llvm::ConstantFP*)constant)->getValueAPF().convertToFloat(); break; } case llvm::Type::DoubleTyID: { *(double*)data = ((llvm::ConstantFP*)constant)->getValueAPF().convertToDouble(); break; } case llvm::Type::FixedVectorTyID: { auto vecType = llvm::cast(type); unsigned num = vecType->getNumElements(); const llvm::Type* elemType = vecType->getElementType(); unsigned elemSize = getTypeSize(elemType); for (unsigned i = 0; i < num; i++) { getConstantData(data + i * elemSize, constant->getAggregateElement(i)); } break; } case llvm::Type::ArrayTyID: { unsigned num = type->getArrayNumElements(); const llvm::Type* elemType = type->getArrayElementType(); unsigned elemSize = getTypeSize(elemType); for (unsigned i = 0; i < num; i++) { getConstantData(data + i * elemSize, constant->getAggregateElement(i)); } break; } case llvm::Type::PointerTyID: { if (constant->getValueID() != llvm::Value::ConstantPointerNullVal) { FATAL_ERROR("Unsupported constant pointer value: %d", constant->getValueID()); } *(size_t*)data = 0; break; } case llvm::Type::StructTyID: { unsigned num = type->getStructNumElements(); for (unsigned i = 0; i < num; i++) { unsigned offset = getStructMemberOffset((const llvm::StructType*)type, i); getConstantData(data + offset, constant->getAggregateElement(i)); } break; } default: FATAL_ERROR("Unsupported constant type: %d", type->getTypeID()); } } llvm::Instruction* getConstExprAsInstruction(const llvm::ConstantExpr* expr) { // Get operands vector valueOperands(expr->op_begin(), expr->op_end()); llvm::ArrayRef operands(valueOperands); // Create instruction unsigned opcode = expr->getOpcode(); switch (opcode) { case llvm::Instruction::Trunc: case llvm::Instruction::ZExt: case llvm::Instruction::SExt: case llvm::Instruction::FPTrunc: case llvm::Instruction::FPExt: case llvm::Instruction::UIToFP: case llvm::Instruction::SIToFP: case llvm::Instruction::FPToUI: case llvm::Instruction::FPToSI: case llvm::Instruction::PtrToInt: case llvm::Instruction::IntToPtr: case llvm::Instruction::BitCast: return llvm::CastInst::Create((llvm::Instruction::CastOps)opcode, operands[0], expr->getType()); case llvm::Instruction::Select: return llvm::SelectInst::Create(operands[0], operands[1], operands[2]); case llvm::Instruction::InsertElement: return llvm::InsertElementInst::Create(operands[0], operands[1], operands[2]); case llvm::Instruction::ExtractElement: return llvm::ExtractElementInst::Create(operands[0], operands[1]); case llvm::Instruction::InsertValue: return llvm::InsertValueInst::Create(operands[0], operands[1], expr->getIndices()); case llvm::Instruction::ExtractValue: return llvm::ExtractValueInst::Create(operands[0], expr->getIndices()); case llvm::Instruction::ShuffleVector: return new llvm::ShuffleVectorInst(operands[0], operands[1], operands[2]); case llvm::Instruction::GetElementPtr: if (((const llvm::GEPOperator*)expr)->isInBounds()) { return llvm::GetElementPtrInst::CreateInBounds( operands[0]->getType()->getPointerElementType(), operands[0], operands.slice(1)); } else { return llvm::GetElementPtrInst::Create( operands[0]->getType()->getPointerElementType(), operands[0], operands.slice(1)); } case llvm::Instruction::ICmp: case llvm::Instruction::FCmp: return llvm::CmpInst::Create((llvm::Instruction::OtherOps)opcode, (llvm::CmpInst::Predicate)expr->getPredicate(), operands[0], operands[1]); case llvm::Instruction::AddrSpaceCast: FATAL_ERROR("Unsupported constant expression: addrspacecast"); default: assert(expr->getNumOperands() == 2 && "Must be binary operator?"); llvm::BinaryOperator* binaryOp = llvm::BinaryOperator::Create( (llvm::Instruction::BinaryOps)opcode, operands[0], operands[1]); // Check for overflowing operator if (opcode == llvm::Instruction::Add || opcode == llvm::Instruction::Mul || opcode == llvm::Instruction::Shl || opcode == llvm::Instruction::Sub) { binaryOp->setHasNoUnsignedWrap( expr->getRawSubclassOptionalData() & llvm::OverflowingBinaryOperator::NoUnsignedWrap); binaryOp->setHasNoSignedWrap( expr->getRawSubclassOptionalData() & llvm::OverflowingBinaryOperator::NoSignedWrap); } // Check for possibly exact operator if (opcode == llvm::Instruction::AShr || opcode == llvm::Instruction::LShr || opcode == llvm::Instruction::SDiv || opcode == llvm::Instruction::UDiv) { binaryOp->setIsExact(expr->getRawSubclassOptionalData() & llvm::PossiblyExactOperator::IsExact); } return binaryOp; } } const llvm::ConstantInt* getMDAsConstInt(const llvm::Metadata* md) { const llvm::ConstantAsMetadata* cam = llvm::dyn_cast(md); if (!cam) return NULL; return llvm::dyn_cast(cam->getValue()); } unsigned getStructMemberOffset(const llvm::StructType* type, unsigned index) { bool packed = ((llvm::StructType*)type)->isPacked(); unsigned offset = 0; for (unsigned i = 0; i <= index; i++) { // Get member size and alignment const llvm::Type* elemType = type->getStructElementType(i); unsigned size = getTypeSize(elemType); unsigned align = getTypeAlignment(elemType); // Add padding if necessary if (!packed && offset % align) { offset += (align - (offset % align)); } if (i == index) { return offset; } offset += size; } // Unreachable abort(); } unsigned getTypeSize(const llvm::Type* type) { if (type->isArrayTy()) { unsigned num = type->getArrayNumElements(); unsigned sz = getTypeSize(type->getArrayElementType()); return num * sz; } else if (type->isStructTy()) { bool packed = ((llvm::StructType*)type)->isPacked(); unsigned size = 0; unsigned alignment = 1; for (unsigned i = 0; i < type->getStructNumElements(); i++) { // Get member size and alignment const llvm::Type* elemType = type->getStructElementType(i); unsigned sz = getTypeSize(elemType); unsigned align = getTypeAlignment(elemType); // Add padding if necessary if (!packed && size % align) { size += (align - (size % align)); } size += sz; alignment = max(alignment, align); } // Alignment of struct should match member with largest alignment if (!packed && size % alignment) { size += (alignment - (size % alignment)); } return size; } else if (type->isVectorTy()) { auto vecType = llvm::cast(type); unsigned num = vecType->getNumElements(); unsigned sz = getTypeSize(vecType->getElementType()); if (num == 3) num = 4; // Hack for 3-element vectors return num * sz; } else if (type->isPointerTy()) { return sizeof(size_t); } else { // Round up for types that have a bit size not multiple of 8 // like "bool". return (type->getScalarSizeInBits() + 7) >> 3; } } /// Returns the byte alignment of this type unsigned getTypeAlignment(const llvm::Type* type) { using namespace llvm; // Array types are aligned to their element type if (const ArrayType* psAT = dyn_cast(type)) { return getTypeAlignment(psAT->getElementType()); } // Struct alignment is the size of its largest contained type if (const StructType* structT = dyn_cast(type)) { if (structT->isPacked()) return 1; StructType* nonConstTy = const_cast(structT); unsigned uAlign = 0, uMaxAlign = 1; unsigned uCount = structT->getNumElements(); for (unsigned i = 0; i < uCount; i++) { const Type* psElemType = nonConstTy->getTypeAtIndex(i); uAlign = getTypeAlignment(psElemType); if (uAlign > uMaxAlign) uMaxAlign = uAlign; } return uMaxAlign; } return getTypeSize(type); } pair getValueSize(const llvm::Value* value) { unsigned bits, numElements; const llvm::Type* type = value->getType(); if (type->isVectorTy()) { auto vecType = llvm::cast(type); bits = vecType->getElementType()->getPrimitiveSizeInBits(); numElements = vecType->getNumElements(); } else if (type->isAggregateType()) { bits = getTypeSize(type) << 3; numElements = 1; } else { bits = type->getPrimitiveSizeInBits(); numElements = 1; } unsigned elemSize = (bits + 7) >> 3; // Special case for pointer types if (type->isPointerTy()) { elemSize = sizeof(size_t); } // Special case for boolean results if (bits == 1) { elemSize = sizeof(bool); } return pair(elemSize, numElements); } bool isConstantOperand(const llvm::Value* operand) { unsigned id = operand->getValueID(); return (id >= llvm::Value::ConstantFirstVal && id <= llvm::Value::ConstantLastVal); } bool isVector3(const llvm::Value* value) { auto vecType = llvm::dyn_cast(value->getType()); return (vecType && vecType->getNumElements() == 3); } double now() { #if defined(_WIN32) && !defined(__MINGW32__) return time(NULL) * 1e9; #else struct timeval tv; gettimeofday(&tv, NULL); return tv.tv_usec * 1e3 + tv.tv_sec * 1e9; #endif } void printTypedData(const llvm::Type* type, const unsigned char* data) { // TODO: Interpret other types (array, struct) unsigned size = getTypeSize(type); switch (type->getTypeID()) { case llvm::Type::FloatTyID: cout << *(float*)data; break; case llvm::Type::DoubleTyID: cout << *(double*)data; break; case llvm::Type::IntegerTyID: cout << dec; switch (size) { case 1: cout << (int)*(char*)data; break; case 2: cout << *(short*)data; break; case 4: cout << *(int*)data; break; case 8: cout << *(long*)data; break; default: cout << "(invalid integer size)"; break; } break; case llvm::Type::FixedVectorTyID: { auto vecType = llvm::cast(type); const llvm::Type* elemType = vecType->getElementType(); cout << "("; for (unsigned i = 0; i < vecType->getNumElements(); i++) { if (i > 0) { cout << ","; } printTypedData(elemType, data + i * getTypeSize(elemType)); } cout << ")"; break; } case llvm::Type::PointerTyID: cout << "0x" << hex << *(size_t*)data; break; case llvm::Type::ArrayTyID: { const llvm::Type* elemType = type->getArrayElementType(); unsigned elemSize = getTypeSize(elemType); cout << "{"; for (unsigned i = 0; i < type->getArrayNumElements(); i++) { if (i > 0) cout << ","; printTypedData(elemType, data + i * elemSize); } cout << "}"; break; } default: cout << "(raw) 0x" << hex << uppercase << setfill('0'); for (unsigned i = 0; i < size; i++) { cout << setw(2) << (int)data[i]; } } } size_t resolveConstantPointer(const llvm::Value* ptr, TypedValueMap& values) { if (values.count(ptr)) { // In the value map - just return the pointer return values.at(ptr).getPointer(); } else if (auto gep = llvm::dyn_cast(ptr)) { // Get base address size_t base = resolveConstantPointer(gep->getPointerOperand(), values); const llvm::Type* ptrType = gep->getPointerOperandType(); // Get indices std::vector offsets; llvm::User::const_op_iterator opItr; for (opItr = gep->idx_begin(); opItr != gep->idx_end(); opItr++) { auto idx = (llvm::ConstantInt*)(opItr->get()); offsets.push_back(idx->getSExtValue()); } return resolveGEP(base, ptrType, offsets); } else if (auto bc = llvm::dyn_cast(ptr)) { // bitcast - no change to the source pointer return resolveConstantPointer(bc->getOperand(0), values); } else if (ptr->getValueID() == llvm::Value::ConstantPointerNullVal) { return 0; } else { FATAL_ERROR("Unsupported constant pointer type: %d", ptr->getValueID()); } return 0; } size_t resolveGEP(size_t base, const llvm::Type* ptrType, std::vector& offsets) { size_t address = base; // Iterate over indices for (int i = 0; i < offsets.size(); i++) { int64_t offset = offsets[i]; if (ptrType->isPointerTy()) { // Get pointer element size const llvm::Type* elemType = ptrType->getPointerElementType(); address += offset * getTypeSize(elemType); ptrType = elemType; } else if (ptrType->isArrayTy()) { // Get array element size const llvm::Type* elemType = ptrType->getArrayElementType(); address += offset * getTypeSize(elemType); ptrType = elemType; } else if (ptrType->isVectorTy()) { // Get vector element size auto vecType = llvm::cast(ptrType); const llvm::Type* elemType = vecType->getElementType(); address += offset * getTypeSize(elemType); ptrType = elemType; } else if (ptrType->isStructTy()) { address += getStructMemberOffset((const llvm::StructType*)ptrType, offset); ptrType = ptrType->getStructElementType(offset); } else { FATAL_ERROR("Unsupported GEP base type: %d", ptrType->getTypeID()); } } return address; } FatalError::FatalError(const string& msg, const string& file, size_t line) : std::runtime_error(msg) { m_file = file; m_line = line; } FatalError::~FatalError() throw() {} const string& FatalError::getFile() const { return m_file; } size_t FatalError::getLine() const { return m_line; } const char* FatalError::what() const throw() { return runtime_error::what(); } MemoryPool::MemoryPool(size_t blockSize) : m_blockSize(blockSize) { // Force first allocation to create new block m_offset = m_blockSize; } MemoryPool::~MemoryPool() { for (auto itr = m_blocks.begin(); itr != m_blocks.end(); itr++) { delete[] * itr; } } uint8_t* MemoryPool::alloc(size_t size) { if (size == 0) return NULL; // Check if requested size larger than block size if (size > m_blockSize) { // Oversized buffers allocated separately from main pool unsigned char* buffer = new unsigned char[size]; m_blocks.push_back(buffer); return buffer; } // Round up size to nearest power of two for alignment // Taken from here: // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 unsigned align = size; align--; align |= align >> 1; align |= align >> 2; align |= align >> 4; align |= align >> 8; align |= align >> 16; align++; // Align offset to size of requested allocation if (m_offset & (align - 1)) m_offset += (align - (m_offset & (align - 1))); // Check if enough space in current block if (m_offset + size > m_blockSize) { // Allocate new block m_blocks.push_front(new unsigned char[m_blockSize]); m_offset = 0; } uint8_t* buffer = m_blocks.front() + m_offset; m_offset += size; return buffer; } TypedValue MemoryPool::clone(const TypedValue& source) { TypedValue dest; dest.size = source.size; dest.num = source.num; dest.data = alloc(dest.size * dest.num); memcpy(dest.data, source.data, dest.size * dest.num); return dest; } } // namespace oclgrind Oclgrind-21.10/src/core/common.h000066400000000000000000000175031413315665100164510ustar00rootroot00000000000000// common.h (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #ifndef __common_h_ #define __common_h_ #define CL_TARGET_OPENCL_VERSION 300 #include "CL/cl.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(_WIN32) && !defined(__MINGW32__) #define snprintf _snprintf #undef ERROR #endif #ifdef __APPLE__ // TODO: Remove this when thread_local fixed on OS X #define THREAD_LOCAL __thread #elif defined(_WIN32) && !defined(__MINGW32__) // TODO: Remove this when thread_local fixed on Windows #define THREAD_LOCAL __declspec(thread) #else #define THREAD_LOCAL thread_local #endif #define CLK_NORMALIZED_COORDS_TRUE 0x0001 #define CLK_ADDRESS_NONE 0x0000 #define CLK_ADDRESS_CLAMP_TO_EDGE 0x0002 #define CLK_ADDRESS_CLAMP 0x0004 #define CLK_ADDRESS_REPEAT 0x0006 #define CLK_ADDRESS_MIRRORED_REPEAT 0x0008 #define CLK_ADDRESS_MASK 0x000E #define CLK_FILTER_NEAREST 0x0010 #define CLK_FILTER_LINEAR 0x0020 namespace llvm { class Constant; class ConstantExpr; class ConstantInt; class Instruction; class Metadata; class StructType; class Type; class Value; } // namespace llvm namespace oclgrind { class Kernel; // Enumeration for address spaces enum AddressSpace { AddrSpacePrivate = 0, AddrSpaceGlobal = 1, AddrSpaceConstant = 2, AddrSpaceLocal = 3, }; enum AtomicOp { AtomicAdd, AtomicAnd, AtomicCmpXchg, AtomicDec, AtomicInc, AtomicMax, AtomicMin, AtomicOr, AtomicSub, AtomicXchg, AtomicXor, }; // Enumeration for different log message types enum MessageType { DEBUG, INFO, WARNING, ERROR, }; // 3-dimensional size struct Size3 { size_t x, y, z; Size3(); Size3(size_t x, size_t y, size_t z); Size3(size_t linear, Size3 dimensions); size_t& operator[](unsigned i); const size_t& operator[](unsigned i) const; bool operator==(const Size3& rhs) const; bool operator!=(const Size3& rhs) const; friend std::ostream& operator<<(std::ostream& stream, const Size3& sz); }; // Structure for a value with a size/type struct TypedValue { unsigned size; unsigned num; unsigned char* data; bool operator==(const TypedValue& rhs) const; bool operator!=(const TypedValue& rhs) const; friend std::ostream& operator<<(std::ostream& stream, const TypedValue& tv); struct TypedValue clone() const; double getFloat(unsigned index = 0) const; size_t getPointer(unsigned index = 0) const; int64_t getSInt(unsigned index = 0) const; uint64_t getUInt(unsigned index = 0) const; void setFloat(double value, unsigned index = 0); void setPointer(size_t value, unsigned index = 0); void setSInt(int64_t value, unsigned index = 0); void setUInt(uint64_t value, unsigned index = 0); }; // Private memory map type typedef std::map TypedValueMap; // Image object struct Image { size_t address; cl_image_format format; cl_image_desc desc; }; // Check if an environment variable is set to 1 bool checkEnv(const char* var); // Get an environment variable as an integer unsigned getEnvInt(const char* var, int def = 0, bool allowZero = true); // Output an instruction in human-readable format void dumpInstruction(std::ostream& out, const llvm::Instruction* instruction); // Get the human readable name of an address space const char* getAddressSpaceName(unsigned addrSpace); // Retrieve the raw data for a constant void getConstantData(unsigned char* data, const llvm::Constant* constant); // Creates an instruction from a constant expression llvm::Instruction* getConstExprAsInstruction(const llvm::ConstantExpr* expr); // Get the ConstantInt object for a Metadata node const llvm::ConstantInt* getMDAsConstInt(const llvm::Metadata* md); // Get the byte offset of a struct member unsigned getStructMemberOffset(const llvm::StructType* type, unsigned index); // Returns the size of a type unsigned getTypeSize(const llvm::Type* type); /// Returns the alignment requirements of this type unsigned getTypeAlignment(const llvm::Type* type); // Returns the size of a value std::pair getValueSize(const llvm::Value* value); // Returns true if the operand is a constant value bool isConstantOperand(const llvm::Value* operand); // Returns true if the value is a 3-element vector bool isVector3(const llvm::Value* value); // Return the current time in nanoseconds since the epoch double now(); // Print data in a human readable format (according to its type) void printTypedData(const llvm::Type* type, const unsigned char* data); // Resolve a constant pointer, using a set of known constant values size_t resolveConstantPointer(const llvm::Value* ptr, TypedValueMap& values); // Resolve a GEP from a base address and list of offsets size_t resolveGEP(size_t base, const llvm::Type* ptrType, std::vector& offsets); // Exception class for raising fatal errors class FatalError : std::runtime_error { public: FatalError(const std::string& msg, const std::string& file, size_t line); ~FatalError() throw(); virtual const std::string& getFile() const; virtual size_t getLine() const; virtual const char* what() const throw(); protected: std::string m_file; size_t m_line; }; // Utility macro for raising an exception with a sprintf-based message #define FATAL_ERROR(format, ...) \ { \ int sz = snprintf(NULL, 0, format, ##__VA_ARGS__); \ char* str = new char[sz + 1]; \ sprintf(str, format, ##__VA_ARGS__); \ string msg = str; \ delete[] str; \ throw FatalError(msg, __FILE__, __LINE__); \ } class MemoryPool { public: MemoryPool(size_t blockSize = 1024); ~MemoryPool(); uint8_t* alloc(size_t size); TypedValue clone(const TypedValue& source); private: size_t m_blockSize; size_t m_offset; std::list m_blocks; }; // Pool allocator class for STL containers template class PoolAllocator { template friend class PoolAllocator; public: typedef T value_type; typedef T* pointer; typedef T& reference; typedef const T* const_pointer; typedef const T& const_reference; typedef size_t size_type; typedef ptrdiff_t difference_type; template struct rebind { typedef PoolAllocator other; }; PoolAllocator() { pool.reset(new MemoryPool(BLOCKSIZE)); } PoolAllocator(const PoolAllocator& p) { this->pool = p.pool; } template PoolAllocator(const PoolAllocator& p) { this->pool = p.pool; } pointer allocate(size_type n, const_pointer hint = 0) { return (pointer)(pool->alloc(n * sizeof(value_type))); } void deallocate(pointer p, size_type n) {} template void construct(U* p, Args&&... args) { new ((void*)p) U(std::forward(args)...); } template void destroy(U* p) { p->~U(); } bool operator==(const PoolAllocator& p) const { return this->pool == p.pool; } bool operator!=(const PoolAllocator& p) const { return this->pool != p.pool; } private: std::shared_ptr pool; }; } // namespace oclgrind #endif // __common_h_ Oclgrind-21.10/src/core/gen_opencl-c.h.cmake000066400000000000000000000014271413315665100205670ustar00rootroot00000000000000set(OUTPUT src/core/opencl-c.h.cpp) get_filename_component(SOURCE_FILE_DIR ${SOURCE_FILE} DIRECTORY) # Load opencl-c.h file(READ ${SOURCE_FILE} OPENCL_C_H) if (EXISTS "${SOURCE_FILE_DIR}/opencl-c-base.h") file(READ ${SOURCE_FILE_DIR}/opencl-c-base.h OPENCL_C_BASE_H) endif() string(REPLACE "#include \"opencl-c-base.h\"" "${OPENCL_C_BASE_H}" CONTENT "${OPENCL_C_H}") # Replace each character with a C character literal, escaping as necessary string(REGEX REPLACE "(.)" "'\\1', " CONTENT "${CONTENT}") string(REGEX REPLACE "\n'" "\\\\n'\n" CONTENT "${CONTENT}") string(REGEX REPLACE "\\\\'" "\\\\\\\\'" CONTENT "${CONTENT}") # Write character array file(WRITE ${OUTPUT} "extern const char OPENCL_C_H_DATA[] = {\n") file(APPEND ${OUTPUT} "${CONTENT}") file(APPEND ${OUTPUT} "'\\0'};\n") Oclgrind-21.10/src/install/000077500000000000000000000000001413315665100155205ustar00rootroot00000000000000Oclgrind-21.10/src/install/INSTALL.darwin000066400000000000000000000010201413315665100200250ustar00rootroot00000000000000To install Oclgrind, simply copy the bin, lib and include directories to (for example) /usr/local/: sudo cp -r {bin,lib,include} /usr/local Alternatively, Oclgrind can be used from a non-system directory. To do so, add $OCLGRIND_ROOT/bin to your PATH environment variable, and $OCLGRIND_ROOT/lib to your DYLD_LIBRARY_PATH environment variable (where $OCLGRIND_ROOT is the directory containing this file). Information about using Oclgrind can be found on the GitHub wiki page: http://github.com/jrprice/Oclgrind/wiki Oclgrind-21.10/src/install/INSTALL.linux000066400000000000000000000011611413315665100177060ustar00rootroot00000000000000To install Oclgrind, simply copy the bin, lib and include directories to (for example) /usr/local/: sudo cp -r {bin,lib,include} /usr/local Alternatively, Oclgrind can be used from a non-system directory. To do so, add $OCLGRIND_ROOT/bin to your PATH environment variable, and $OCLGRIND_ROOT/lib to your LD_LIBRARY_PATH environment variable (where $OCLGRIND_ROOT is the directory containing this file). To use Oclgrind with the OpenCL ICD loader (optional), copy oclgrind.icd to /etc/OpenCL/vendors/. Information about using Oclgrind can be found on the GitHub wiki page: http://github.com/jrprice/Oclgrind/wiki Oclgrind-21.10/src/install/INSTALL.windows000066400000000000000000000016571413315665100202530ustar00rootroot00000000000000To install Oclgrind, run 'install.bat' as an Administrator. This will install Oclgrind to 'C:\Program Files\Oclgrind' and create a registry entry for the OpenCL ICD loader. Oclgrind can be uninstalled by running 'uninstall.bat' as an Administrator. Alternatively, Oclgrind can be run from any other directory. You will need to manually create OpenCL ICD loading points by editing the registry (see oclgrind-icd.reg), and/or add $OCLGRIND_ROOT/bin to your PATH environment variable to make use of the oclgrind.exe command. You may be warned about a missing MSVCP140.dll during the installation process, which can cause Oclgrind to fail to run properly. This can be fixed by installing the Microsoft Visual C++ Redistributable from here: https://www.microsoft.com/en-us/download/details.aspx?id=48145 Information about using Oclgrind can be found on the GitHub wiki page: http://github.com/jrprice/Oclgrind/wiki Oclgrind-21.10/src/install/cpack-description000066400000000000000000000011721413315665100210460ustar00rootroot00000000000000Oclgrind is an extensible OpenCL device simulator that provides a plugin interface to facilitate the creation of tools to aid analysis and development of OpenCL programs. Among the tools that Oclgrind provides are various debugging aids, such as out-of-bounds memory access checking, data-race detection, and an interactive debugger. Oclgrind implements the OpenCL runtime API, which makes simulating an existing OpenCL program very straightforward - simply prefix your usual application command-line with 'oclgrind'. There is also a simple interface for simulating individual kernels in isolation via the 'oclgrind-kernel' command. Oclgrind-21.10/src/install/install.bat000066400000000000000000000015621413315665100176620ustar00rootroot00000000000000@ECHO OFF cd %~dp0 set "ROOT=%programfiles%\Oclgrind" mkdir "%ROOT%" || goto :error xcopy include "%ROOT%\include" /S /Y /I || goto :error xcopy x86 "%ROOT%\x86" /S /Y /I || goto :error xcopy x64 "%ROOT%\x64" /S /Y /I || goto :error xcopy uninstall.bat "%ROOT%\" /Y || goto :error regedit /S oclgrind-icd.reg || goto :error echo. echo Installation completed. echo. if not exist C:\Windows\system32\msvcp140.dll ( echo WARNING: MSVCP140.dll not found - Oclgrind may fail to work correctly echo Download the Microsoft Visual C++ Redistributable from here: echo. echo https://www.microsoft.com/en-us/download/details.aspx?id=48145 echo. pause ) goto :EOF :error echo INSTALLATION FAILED echo Did you run as Administrator? pause Oclgrind-21.10/src/install/oclgrind-icd.reg000066400000000000000000000020221413315665100205510ustar00rootroot00000000000000ÿþWindows Registry Editor Version 5.00 [HKEY_LOCAL_MACHINE\SOFTWARE\Khronos] [HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL] [HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors] "C:\\Program Files\\Oclgrind\\x64\\lib\\oclgrind-rt-icd.dll"=dword:00000000 [HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Khronos] [HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Khronos\OpenCL] [HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Khronos\OpenCL\Vendors] "C:\\Program Files\\Oclgrind\\x86\\lib\\oclgrind-rt-icd.dll"=dword:00000000 Oclgrind-21.10/src/install/uninstall.bat000066400000000000000000000000721413315665100202200ustar00rootroot00000000000000start /B "" cmd /C rmdir "%programfiles%\Oclgrind" /S /Q Oclgrind-21.10/src/kernel/000077500000000000000000000000001413315665100153325ustar00rootroot00000000000000Oclgrind-21.10/src/kernel/Simulation.cpp000066400000000000000000000475021413315665100201720ustar00rootroot00000000000000// Simulation.cpp (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include #include #include #include #include "core/Context.h" #include "core/Kernel.h" #include "core/KernelInvocation.h" #include "core/Memory.h" #include "core/Program.h" #include "kernel/Simulation.h" using namespace oclgrind; using namespace std; #define PARSING(parsing) m_parsing = parsing; // Convert an integer to char/uchar, checking if the value is valid #define INT_TO_CHAR(intval, result) \ result = intval; \ if (result != intval) \ { \ throw "Invalid char value"; \ } // Utility to read a typed value from a stream template T readValue(istream& stream); Simulation::Simulation() { m_context = new Context(); m_kernel = NULL; m_program = NULL; } Simulation::~Simulation() { delete m_kernel; delete m_program; delete m_context; } template void Simulation::dumpArgument(DumpArg& arg) { size_t num = arg.size / sizeof(T); T* data = new T[num]; m_context->getGlobalMemory()->load((uint8_t*)data, arg.address, arg.size); for (size_t i = 0; i < num; i++) { cout << " " << arg.name << "[" << i << "] = "; if (arg.hex) cout << "0x" << setfill('0') << setw(sizeof(T) * 2) << hex; if (sizeof(T) == 1) cout << (int)data[i]; else cout << data[i]; cout << dec; cout << endl; } cout << endl; delete[] data; } template void Simulation::get(T& result) { do { // Check if line buffer has content streampos pos = m_lineBuffer.tellg(); string token; m_lineBuffer >> token; if (!m_lineBuffer.fail()) { // Line has content, rewind line buffer m_lineBuffer.clear(); m_lineBuffer.seekg(pos); // Read value from line buffer m_lineBuffer >> result; if (m_lineBuffer.fail()) { throw ifstream::failbit; } return; } // Get next line string line; getline(m_simfile, line); m_lineNumber++; // Remove comments size_t comment = line.find_first_of('#'); if (comment != string::npos) { line = line.substr(0, comment); } // Update line buffer m_lineBuffer.clear(); m_lineBuffer.str(line); } while (m_simfile.good()); // Couldn't read data from file, throw exception throw m_simfile.eof() ? ifstream::eofbit : ifstream::failbit; } bool Simulation::load(const char* filename) { // Open simulator file m_lineNumber = 0; m_lineBuffer.setstate(ios_base::eofbit); m_simfile.open(filename); if (m_simfile.fail()) { cerr << "Unable to open simulator file." << endl; return false; } try { // Read simulation parameters string progFileName; string kernelName; PARSING("program file"); get(progFileName); PARSING("kernel"); get(kernelName); PARSING("NDRange"); get(m_ndrange.x); get(m_ndrange.y); get(m_ndrange.z); PARSING("work-group size"); get(m_wgsize.x); get(m_wgsize.y); get(m_wgsize.z); // Open program file ifstream progFile; progFile.open(progFileName.c_str(), ios_base::in | ios_base::binary); if (!progFile.good()) { cerr << "Unable to open " << progFileName << endl; return false; } // Check for LLVM bitcode magic numbers char magic[2] = {0, 0}; progFile.read(magic, 2); if (magic[0] == 0x42 && magic[1] == 0x43) { // Load bitcode progFile.close(); m_program = Program::createFromBitcodeFile(m_context, progFileName); if (!m_program) { cerr << "Failed to load bitcode from " << progFileName << endl; return false; } } else { // Get size of file progFile.seekg(0, ios_base::end); size_t sz = progFile.tellg(); progFile.seekg(0, ios_base::beg); // Load source char* data = new char[sz + 1]; progFile.read(data, sz + 1); progFile.close(); data[sz] = '\0'; m_program = new Program(m_context, data); delete[] data; // Build program if (!m_program->build(Program::BUILD, "")) { cerr << "Build failure:" << endl << m_program->getBuildLog() << endl; return false; } } // Get kernel m_kernel = m_program->createKernel(kernelName); if (!m_kernel) { cerr << "Failed to create kernel " << kernelName << endl; return false; } // Ensure work-group size exactly divides NDRange if necessary if (m_kernel->requiresUniformWorkGroups() && (m_ndrange.x % m_wgsize.x || m_ndrange.y % m_wgsize.y || m_ndrange.z % m_wgsize.z)) { cerr << "Work group size must divide NDRange exactly." << endl; return false; } // Parse kernel arguments m_dumpArguments.clear(); for (unsigned index = 0; index < m_kernel->getNumArguments(); index++) { parseArgument(index); } // Make sure there is no more input string next; m_simfile >> next; if (m_simfile.good() || !m_simfile.eof()) { cerr << "Unexpected token '" << next << "' (expected EOF)" << endl; return false; } } catch (const char* err) { cerr << "Line " << m_lineNumber << ": " << err << " (" << m_parsing << ")" << endl; return false; } catch (ifstream::iostate e) { if (e == ifstream::eofbit) { cerr << "Unexpected EOF when parsing " << m_parsing << endl; return false; } else if (e == ifstream::failbit) { cerr << "Line " << m_lineNumber << ": Failed to parse " << m_parsing << endl; return false; } else { throw e; } } return true; } void Simulation::parseArgument(size_t index) { // Argument parsing parameters size_t size = -1; cl_mem_flags flags = 0; ArgDataType type = TYPE_NONE; size_t typeSize = 0; bool null = false; bool dump = false; bool hex = false; bool noinit = false; string fill = ""; string range = ""; string name = m_kernel->getArgumentName(index).str(); // Set meaningful parsing status for error messages ostringstream stringstream; stringstream << "argument " << index << ": " << name; string formatted = stringstream.str(); PARSING(formatted.c_str()); // Get argument info size_t argSize = m_kernel->getArgumentSize(index); unsigned int addrSpace = m_kernel->getArgumentAddressQualifier(index); const llvm::StringRef argType = m_kernel->getArgumentTypeName(index); // Ensure we have an argument header char c; get(c); if (c != '<') { throw "Expected argument header <...>"; } // Get header streampos startpos = m_lineBuffer.tellg(); string headerStr; getline(m_lineBuffer, headerStr); size_t end = headerStr.find_last_of('>'); if (end == string::npos) { throw "Missing '>' at end of argument header"; } headerStr = headerStr.substr(0, end); // Move line buffer to end of header m_lineBuffer.clear(); m_lineBuffer.seekg((int)startpos + headerStr.size() + 1); // Save format flags ios_base::fmtflags previousFormat = m_lineBuffer.flags(); // Parse header istringstream header(headerStr); while (!header.eof()) { // Get next header token string token; header >> token; if (header.fail()) { break; } #define MATCH_TYPE(str, value, sz) \ else if (token == str) \ { \ if (type != TYPE_NONE) \ { \ throw "Argument type defined multiple times"; \ } \ type = value; \ typeSize = sz; \ } // Parse token if (false) ; MATCH_TYPE("char", TYPE_CHAR, 1) MATCH_TYPE("uchar", TYPE_UCHAR, 1) MATCH_TYPE("short", TYPE_SHORT, 2) MATCH_TYPE("ushort", TYPE_USHORT, 2) MATCH_TYPE("int", TYPE_INT, 4) MATCH_TYPE("uint", TYPE_UINT, 4) MATCH_TYPE("long", TYPE_LONG, 8) MATCH_TYPE("ulong", TYPE_ULONG, 8) MATCH_TYPE("float", TYPE_FLOAT, 4) MATCH_TYPE("double", TYPE_DOUBLE, 8) else if (token.compare(0, 4, "dump") == 0) { dump = true; } else if (token.compare(0, 4, "fill") == 0) { if (token.size() < 6 || token[4] != '=') { throw "Expected =VALUE after 'fill"; } fill = token.substr(5); } else if (token == "hex") { hex = true; m_lineBuffer.setf(ios_base::hex); m_lineBuffer.unsetf(ios_base::dec | ios_base::oct); } else if (token == "noinit") { if (addrSpace != CL_KERNEL_ARG_ADDRESS_GLOBAL && addrSpace != CL_KERNEL_ARG_ADDRESS_CONSTANT) { throw "'noinit' only valid for buffer arguments"; } noinit = true; } else if (token == "null") { if (addrSpace != CL_KERNEL_ARG_ADDRESS_GLOBAL && addrSpace != CL_KERNEL_ARG_ADDRESS_CONSTANT) { throw "'null' only valid for buffer arguments"; } null = true; } else if (token.compare(0, 5, "range") == 0) { if (token.size() < 7 || token[5] != '=') { throw "Expected =START:INC:END after 'range"; } range = token.substr(6); } else if (token == "ro") { if (flags & CL_MEM_WRITE_ONLY) { throw "'ro' and 'wo' are mutually exclusive"; } if (addrSpace != CL_KERNEL_ARG_ADDRESS_GLOBAL) { throw "'ro' only valid for global memory buffers"; } flags |= CL_MEM_READ_ONLY; } else if (token.compare(0, 4, "size") == 0) { istringstream value(token.substr(4)); char equals = 0; value >> equals; if (equals != '=') { throw "Expected = after 'size'"; } value >> dec >> size; if (value.fail() || !value.eof()) { throw "Invalid value for 'size'"; } } else if (token == "wo") { if (flags & CL_MEM_READ_ONLY) { throw "'ro' and 'wo' are mutually exclusive"; } if (addrSpace != CL_KERNEL_ARG_ADDRESS_GLOBAL) { throw "'wo' only valid for global memory buffers"; } flags |= CL_MEM_WRITE_ONLY; } else { string err = "Unrecognised header token '"; err += token; err += "'"; throw err.c_str(); } } // Ensure size given if (null) { if (size != -1 || !fill.empty() || !range.empty() || noinit || dump) { throw "'null' not valid with other argument descriptors"; } size = 0; } else if (size == -1) { throw "size required"; } if (type == TYPE_NONE) { #define MATCH_TYPE_PREFIX(str, value, sz) \ else if (argType.startswith(str)) \ { \ type = value; \ typeSize = sz; \ } // Set default type using kernel introspection if (false) ; MATCH_TYPE_PREFIX("char", TYPE_CHAR, 1) MATCH_TYPE_PREFIX("uchar", TYPE_UCHAR, 1) MATCH_TYPE_PREFIX("short", TYPE_SHORT, 2) MATCH_TYPE_PREFIX("ushort", TYPE_USHORT, 2) MATCH_TYPE_PREFIX("int", TYPE_INT, 4) MATCH_TYPE_PREFIX("uint", TYPE_UINT, 4) MATCH_TYPE_PREFIX("long", TYPE_LONG, 8) MATCH_TYPE_PREFIX("ulong", TYPE_ULONG, 8) MATCH_TYPE_PREFIX("float", TYPE_FLOAT, 4) MATCH_TYPE_PREFIX("double", TYPE_DOUBLE, 8) MATCH_TYPE_PREFIX("void*", TYPE_UCHAR, 1) else { throw "Invalid default kernel argument type"; } } // Ensure argument data size is a multiple of format type size if (size % typeSize) { throw "Initialiser type must exactly divide argument size"; } // Ensure 'dump' only used with non-null buffers if (dump) { if (addrSpace != CL_KERNEL_ARG_ADDRESS_GLOBAL && addrSpace != CL_KERNEL_ARG_ADDRESS_CONSTANT) { throw "'dump' only valid for memory objects"; } } // Ensure only one initializer given unsigned numInitializers = 0; if (noinit) numInitializers++; if (!fill.empty()) numInitializers++; if (!range.empty()) numInitializers++; if (numInitializers > 1) { throw "Multiple initializers present"; } // Generate argument data TypedValue value; value.size = argSize; value.num = 1; if (addrSpace == CL_KERNEL_ARG_ADDRESS_LOCAL) { value.size = size; value.data = NULL; } else if (null) { value.data = new unsigned char[value.size]; memset(value.data, 0, value.size); } else { // Parse argument data unsigned char* data = new unsigned char[size]; if (noinit) { } else if (!fill.empty()) { istringstream fillStream(fill); fillStream.copyfmt(m_lineBuffer); #define FILL_TYPE(type, T) \ case type: \ parseFill(data, size, fillStream); \ break; switch (type) { FILL_TYPE(TYPE_CHAR, int8_t); FILL_TYPE(TYPE_UCHAR, uint8_t); FILL_TYPE(TYPE_SHORT, int16_t); FILL_TYPE(TYPE_USHORT, uint16_t); FILL_TYPE(TYPE_INT, int32_t); FILL_TYPE(TYPE_UINT, uint32_t); FILL_TYPE(TYPE_LONG, int64_t); FILL_TYPE(TYPE_ULONG, uint64_t); FILL_TYPE(TYPE_FLOAT, float); FILL_TYPE(TYPE_DOUBLE, double); default: throw "Invalid argument data type"; } } else if (!range.empty()) { istringstream rangeStream(range); rangeStream.copyfmt(m_lineBuffer); #define RANGE_TYPE(type, T) \ case type: \ parseRange(data, size, rangeStream); \ break; switch (type) { RANGE_TYPE(TYPE_CHAR, int8_t); RANGE_TYPE(TYPE_UCHAR, uint8_t); RANGE_TYPE(TYPE_SHORT, int16_t); RANGE_TYPE(TYPE_USHORT, uint16_t); RANGE_TYPE(TYPE_INT, int32_t); RANGE_TYPE(TYPE_UINT, uint32_t); RANGE_TYPE(TYPE_LONG, int64_t); RANGE_TYPE(TYPE_ULONG, uint64_t); RANGE_TYPE(TYPE_FLOAT, float); RANGE_TYPE(TYPE_DOUBLE, double); default: throw "Invalid argument data type"; } } else if (addrSpace != CL_KERNEL_ARG_ADDRESS_LOCAL) { #define PARSE_TYPE(type, T) \ case type: \ parseArgumentData(data, size); \ break; switch (type) { PARSE_TYPE(TYPE_CHAR, int8_t); PARSE_TYPE(TYPE_UCHAR, uint8_t); PARSE_TYPE(TYPE_SHORT, int16_t); PARSE_TYPE(TYPE_USHORT, uint16_t); PARSE_TYPE(TYPE_INT, int32_t); PARSE_TYPE(TYPE_UINT, uint32_t); PARSE_TYPE(TYPE_LONG, int64_t); PARSE_TYPE(TYPE_ULONG, uint64_t); PARSE_TYPE(TYPE_FLOAT, float); PARSE_TYPE(TYPE_DOUBLE, double); default: throw "Invalid argument data type"; } } if (addrSpace == CL_KERNEL_ARG_ADDRESS_PRIVATE) { value.data = data; } else { // Allocate buffer and store content Memory* globalMemory = m_context->getGlobalMemory(); size_t address = globalMemory->allocateBuffer(size, flags); if (!address) throw "Failed to allocate global memory"; if (!noinit) globalMemory->store((unsigned char*)&data[0], address, size); value.data = new unsigned char[value.size]; value.setPointer(address); delete[] data; if (dump) { DumpArg dump = {address, size, type, name, hex}; m_dumpArguments.push_back(dump); } } } // Set argument value m_kernel->setArgument(index, value); if (value.data) { delete[] value.data; } // Reset parsing format m_lineBuffer.flags(previousFormat); } template void Simulation::parseArgumentData(unsigned char* result, size_t size) { vector data; for (int i = 0; i < size / sizeof(T); i++) { T value; if (sizeof(T) == 1) { int intval; get(intval); INT_TO_CHAR(intval, value); } else { get(value); } data.push_back(value); } memcpy(result, &data[0], size); } template void Simulation::parseFill(unsigned char* result, size_t size, istringstream& fill) { T value = readValue(fill); for (int i = 0; i < size / sizeof(T); i++) { ((T*)result)[i] = value; } if (fill.fail() || !fill.eof()) { throw "Invalid fill value"; } } template void Simulation::parseRange(unsigned char* result, size_t size, istringstream& range) { // Parse range format T values[3]; for (int i = 0; i < 3; i++) { values[i] = readValue(range); if (i < 2) { char colon = 0; range >> colon; if (range.fail() || colon != ':') { throw "Invalid range format"; } } } if (range.fail() || !range.eof()) { throw "Invalid range format"; } // Ensure range is value double num = (values[2] - values[0] + values[1]) / (double)values[1]; if (ceil(num) != num || num * sizeof(T) != size) { throw "Range doesn't produce correct buffer size"; } // Produce range values T value = values[0]; for (size_t i = 0; i < num; i++) { ((T*)result)[i] = value; value += values[1]; } } void Simulation::run(bool dumpGlobalMemory) { assert(m_kernel && m_program); assert(m_kernel->allArgumentsSet()); Size3 offset(0, 0, 0); KernelInvocation::run(m_context, m_kernel, 3, offset, m_ndrange, m_wgsize); // Dump individual arguments cout << dec; list::iterator itr; for (itr = m_dumpArguments.begin(); itr != m_dumpArguments.end(); itr++) { cout << endl << "Argument '" << itr->name << "': " << itr->size << " bytes" << endl; #define DUMP_TYPE(type, T) \ case type: \ dumpArgument(*itr); \ break; switch (itr->type) { DUMP_TYPE(TYPE_CHAR, int8_t); DUMP_TYPE(TYPE_UCHAR, uint8_t); DUMP_TYPE(TYPE_SHORT, int16_t); DUMP_TYPE(TYPE_USHORT, uint16_t); DUMP_TYPE(TYPE_INT, int32_t); DUMP_TYPE(TYPE_UINT, uint32_t); DUMP_TYPE(TYPE_LONG, int64_t); DUMP_TYPE(TYPE_ULONG, uint64_t); DUMP_TYPE(TYPE_FLOAT, float); DUMP_TYPE(TYPE_DOUBLE, double); default: throw "Invalid argument data type"; } } // Dump global memory if required if (dumpGlobalMemory) { cout << endl << "Global Memory:" << endl; m_context->getGlobalMemory()->dump(); } } template T readValue(istream& stream) { T value; if (sizeof(T) == 1) { int intval; stream >> intval; INT_TO_CHAR(intval, value); } else { stream >> value; } return value; } Oclgrind-21.10/src/kernel/Simulation.h000066400000000000000000000032751413315665100176360ustar00rootroot00000000000000// Simulation.h (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "core/common.h" #include #include #include #include namespace oclgrind { class Context; class Kernel; class Program; }; // namespace oclgrind class Simulation { enum ArgDataType { TYPE_NONE, TYPE_CHAR, TYPE_UCHAR, TYPE_SHORT, TYPE_USHORT, TYPE_INT, TYPE_UINT, TYPE_LONG, TYPE_ULONG, TYPE_FLOAT, TYPE_DOUBLE, }; public: Simulation(); virtual ~Simulation(); bool load(const char* filename); void run(bool dumpGlobalMemory = false); private: oclgrind::Context* m_context; oclgrind::Kernel* m_kernel; oclgrind::Program* m_program; oclgrind::Size3 m_ndrange; oclgrind::Size3 m_wgsize; std::ifstream m_simfile; std::string m_parsing; size_t m_lineNumber; std::istringstream m_lineBuffer; struct DumpArg { size_t address; size_t size; ArgDataType type; std::string name; bool hex; }; std::list m_dumpArguments; template void dumpArgument(DumpArg& arg); template void get(T& result); void parseArgument(size_t index); template void parseArgumentData(unsigned char* result, size_t size); template void parseFill(unsigned char* result, size_t size, std::istringstream& fill); template void parseRange(unsigned char* result, size_t size, std::istringstream& range); }; Oclgrind-21.10/src/kernel/oclgrind-kernel.cpp000066400000000000000000000207101413315665100211150ustar00rootroot00000000000000// main.cpp (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "config.h" #include #include #include #include #include "kernel/Simulation.h" using namespace oclgrind; using namespace std; static bool outputGlobalMemory = false; static const char* simfile = NULL; static bool parseArguments(int argc, char* argv[]); static void printUsage(); static void setEnvironment(const char* name, const char* value); int main(int argc, char* argv[]) { // Parse arguments if (!parseArguments(argc, argv)) { return 1; } // Initialise simulation Simulation simulation; if (!simulation.load(simfile)) { return 1; } // Run simulation simulation.run(outputGlobalMemory); } static bool parseArguments(int argc, char* argv[]) { for (int i = 1; i < argc; i++) { if (!strcmp(argv[i], "--build-options")) { if (++i >= argc) { cerr << "Missing argument to --build-options" << endl; return false; } setEnvironment("OCLGRIND_BUILD_OPTIONS", argv[i]); } else if (!strcmp(argv[i], "--compute-units")) { if (++i >= argc) { cerr << "Missing argument to --compute-units" << endl; return false; } setEnvironment("OCLGRIND_COMPUTE_UNITS", argv[i]); } else if (!strcmp(argv[i], "--constant-mem-size")) { if (++i >= argc) { cerr << "Missing argument to --constant-mem-size" << endl; return false; } setEnvironment("OCLGRIND_CONSTANT_MEM_SIZE", argv[i]); } else if (!strcmp(argv[i], "--data-races")) { setEnvironment("OCLGRIND_DATA_RACES", "1"); } else if (!strcmp(argv[i], "--disable-pch")) { setEnvironment("OCLGRIND_DISABLE_PCH", "1"); } else if (!strcmp(argv[i], "--dump-spir")) { setEnvironment("OCLGRIND_DUMP_SPIR", "1"); } else if (!strcmp(argv[i], "-g") || !strcmp(argv[i], "--global-mem")) { outputGlobalMemory = true; } else if (!strcmp(argv[i], "--global-mem-size")) { if (++i >= argc) { cerr << "Missing argument to --global-mem-size" << endl; return false; } setEnvironment("OCLGRIND_GLOBAL_MEM_SIZE", argv[i]); } else if (!strcmp(argv[i], "-h") || !strcmp(argv[i], "--help")) { printUsage(); exit(0); } else if (!strcmp(argv[i], "--inst-counts")) { setEnvironment("OCLGRIND_INST_COUNTS", "1"); } else if (!strcmp(argv[i], "-i") || !strcmp(argv[i], "--interactive")) { setEnvironment("OCLGRIND_INTERACTIVE", "1"); } else if (!strcmp(argv[i], "--local-mem-size")) { if (++i >= argc) { cerr << "Missing argument to --local-mem-size" << endl; return false; } setEnvironment("OCLGRIND_LOCAL_MEM_SIZE", argv[i]); } else if (!strcmp(argv[i], "--log")) { if (++i >= argc) { cerr << "Missing argument to --log" << endl; return false; } setEnvironment("OCLGRIND_LOG", argv[i]); } else if (!strcmp(argv[i], "--max-errors")) { if (++i >= argc) { cerr << "Missing argument to --max-errors" << endl; return false; } setEnvironment("OCLGRIND_MAX_ERRORS", argv[i]); } else if (!strcmp(argv[i], "--max-wgsize")) { if (++i >= argc) { cerr << "Missing argument to --max-wgsize" << endl; return false; } setEnvironment("OCLGRIND_MAX_WGSIZE", argv[i]); } else if (!strcmp(argv[i], "--num-threads")) { if (++i >= argc) { cerr << "Missing argument to --num-threads" << endl; return false; } setEnvironment("OCLGRIND_NUM_THREADS", argv[i]); } else if (!strcmp(argv[i], "--pch-dir")) { if (++i >= argc) { cerr << "Missing argument to --pch-dir" << endl; return false; } setEnvironment("OCLGRIND_PCH_DIR", argv[i]); } else if (!strcmp(argv[i], "--plugins")) { if (++i >= argc) { cerr << "Missing argument to --plugins" << endl; return false; } setEnvironment("OCLGRIND_PLUGINS", argv[i]); } else if (!strcmp(argv[i], "-q") || !strcmp(argv[i], "--quick")) { setEnvironment("OCLGRIND_QUICK", "1"); } else if (!strcmp(argv[i], "--uniform-writes")) { setEnvironment("OCLGRIND_UNIFORM_WRITES", "1"); } else if (!strcmp(argv[i], "--uninitialized")) { setEnvironment("OCLGRIND_UNINITIALIZED", "1"); } else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--version")) { cout << endl; cout << "Oclgrind " PACKAGE_VERSION << endl; cout << endl; cout << "Copyright (c) 2013-2019" << endl; cout << "James Price and Simon McIntosh-Smith, University of Bristol" << endl; cout << "https://github.com/jrprice/Oclgrind" << endl; cout << endl; exit(0); } else if (argv[i][0] == '-') { cerr << "Unrecognised option '" << argv[i] << "'" << endl; return false; } else { if (simfile == NULL) { simfile = argv[i]; } else { cerr << "Unexpected positional argument '" << argv[i] << "'" << endl; return false; } } } if (simfile == NULL) { printUsage(); return false; } return true; } static void printUsage() { cout << "Usage: oclgrind-kernel [OPTIONS] simfile" << endl << " oclgrind-kernel [--help | --version]" << endl << endl << "Options:" << endl << " --build-options OPTIONS " "Additional options to pass to the OpenCL compiler" << endl << " --compute-units UNITS " "Change the number of compute units reported" << endl << " --constant-mem-size BYTES " "Change the constant memory size of the device" << endl << " --data-races " "Enable data-race detection" << endl << " --disable-pch " "Don't use precompiled headers" << endl << " --dump-spir " "Dump SPIR to /tmp/oclgrind_*.{ll,bc}" << endl << " --global-mem [-g] " "Output global memory at exit" << endl << " --global-mem-size BYTES " "Change the global memory size of the device" << endl << " --help [-h] " "Display usage information" << endl << " --inst-counts " "Output histograms of instructions executed" << endl << " --interactive [-i] " "Enable interactive mode" << endl << " --local-mem-size BYTES " "Change the local memory size of the device" << endl << " --log LOGFILE " "Redirect log/error messages to a file" << endl << " --max-errors NUM " "Limit the number of error/warning messages" << endl << " --max-wgsize WGSIZE " "Change the maximum work-group size of the device" << endl << " --num-threads NUM " "Set the number of worker threads to use" << endl << " --pch-dir DIR " "Override directory containing precompiled headers" << endl << " --plugins PLUGINS " "Load colon separated list of plugin libraries" << endl << " --quick [-q] " "Only run first and last work-group" << endl << " --uniform-writes " "Don't suppress uniform write-write data-races" << endl << " --uninitialized " "Report usage of uninitialized values" << endl << " --version [-q] " "Display version information" << endl << endl << "For more information, please visit the Oclgrind wiki page:" << endl << "-> https://github.com/jrprice/Oclgrind/wiki" << endl << endl; } static void setEnvironment(const char* name, const char* value) { #if defined(_WIN32) && !defined(__MINGW32__) _putenv_s(name, value); #else setenv(name, value, 1); #endif } Oclgrind-21.10/src/plugins/000077500000000000000000000000001413315665100155335ustar00rootroot00000000000000Oclgrind-21.10/src/plugins/InstructionCounter.cpp000066400000000000000000000147621413315665100221320ustar00rootroot00000000000000// InstructionCounter.cpp (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "core/common.h" #include #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" #include "InstructionCounter.h" #include "core/Kernel.h" #include "core/KernelInvocation.h" using namespace oclgrind; using namespace std; #define COUNTED_LOAD_BASE (llvm::Instruction::OtherOpsEnd + 4) #define COUNTED_STORE_BASE (COUNTED_LOAD_BASE + 8) #define COUNTED_CALL_BASE (COUNTED_STORE_BASE + 8) THREAD_LOCAL InstructionCounter::WorkerState InstructionCounter::m_state = { NULL}; static bool compareNamedCount(pair a, pair b) { if (a.second > b.second) return true; else if (a.second < b.second) return false; else return a.first < b.first; } string InstructionCounter::getOpcodeName(unsigned opcode) const { if (opcode >= COUNTED_CALL_BASE) { // Get function name unsigned index = opcode - COUNTED_CALL_BASE; assert(index < m_functions.size()); return "call " + m_functions[index]->getName().str() + "()"; } else if (opcode >= COUNTED_LOAD_BASE) { // Create stream using default locale ostringstream name; locale defaultLocale(""); name.imbue(defaultLocale); // Get number of bytes size_t bytes = m_memopBytes[opcode - COUNTED_LOAD_BASE]; // Get name of operation if (opcode >= COUNTED_STORE_BASE) { opcode -= COUNTED_STORE_BASE; name << "store"; } else { opcode -= COUNTED_LOAD_BASE; name << "load"; } // Add address space to name name << " " << getAddressSpaceName(opcode); // Add number of bytes to name name << " (" << bytes << " bytes)"; return name.str(); } return llvm::Instruction::getOpcodeName(opcode); } void InstructionCounter::instructionExecuted( const WorkItem* workItem, const llvm::Instruction* instruction, const TypedValue& result) { unsigned opcode = instruction->getOpcode(); // Check for loads and stores if (opcode == llvm::Instruction::Load || opcode == llvm::Instruction::Store) { // Track operations in separate address spaces bool load = (opcode == llvm::Instruction::Load); const llvm::Type* type = instruction->getOperand(load ? 0 : 1)->getType(); unsigned addrSpace = type->getPointerAddressSpace(); opcode = (load ? COUNTED_LOAD_BASE : COUNTED_STORE_BASE) + addrSpace; // Count total number of bytes loaded/stored unsigned bytes = getTypeSize(type->getPointerElementType()); (*m_state.memopBytes)[opcode - COUNTED_LOAD_BASE] += bytes; } else if (opcode == llvm::Instruction::Call) { // Track distinct function calls const llvm::CallInst* callInst = (const llvm::CallInst*)instruction; const llvm::Function* function = callInst->getCalledFunction(); if (function) { vector::iterator itr = find(m_state.functions->begin(), m_state.functions->end(), function); if (itr == m_state.functions->end()) { opcode = COUNTED_CALL_BASE + m_state.functions->size(); m_state.functions->push_back(function); } else { opcode = COUNTED_CALL_BASE + (itr - m_state.functions->begin()); } } } if (opcode >= m_state.instCounts->size()) { m_state.instCounts->resize(opcode + 1); } (*m_state.instCounts)[opcode]++; } void InstructionCounter::kernelBegin(const KernelInvocation* kernelInvocation) { m_instructionCounts.clear(); m_memopBytes.clear(); m_memopBytes.resize(16); m_functions.clear(); } void InstructionCounter::kernelEnd(const KernelInvocation* kernelInvocation) { // Load default locale locale previousLocale = cout.getloc(); locale defaultLocale(""); cout.imbue(defaultLocale); cout << "Instructions executed for kernel '" << kernelInvocation->getKernel()->getName() << "':"; cout << endl; // Generate list named instructions and their counts vector> namedCounts; for (unsigned i = 0; i < m_instructionCounts.size(); i++) { if (m_instructionCounts[i] == 0) { continue; } string name = getOpcodeName(i); if (name.compare(0, 14, "call llvm.dbg.") == 0) { continue; } namedCounts.push_back(make_pair(name, m_instructionCounts[i])); } // Sort named counts sort(namedCounts.begin(), namedCounts.end(), compareNamedCount); // Output sorted instruction counts for (unsigned i = 0; i < namedCounts.size(); i++) { cout << setw(16) << dec << namedCounts[i].second << " - " << namedCounts[i].first << endl; } cout << endl; // Restore locale cout.imbue(previousLocale); } void InstructionCounter::workGroupBegin(const WorkGroup* workGroup) { // Create worker state if haven't already if (!m_state.instCounts) { m_state.instCounts = new vector; m_state.memopBytes = new vector; m_state.functions = new vector; } m_state.instCounts->clear(); m_state.instCounts->resize(COUNTED_CALL_BASE); m_state.memopBytes->clear(); m_state.memopBytes->resize(16); m_state.functions->clear(); } void InstructionCounter::workGroupComplete(const WorkGroup* workGroup) { lock_guard lock(m_mtx); if (m_state.instCounts->size() > m_instructionCounts.size()) m_instructionCounts.resize(m_state.instCounts->size()); // Merge instruction counts into global list for (unsigned i = 0; i < m_state.instCounts->size(); i++) { if (m_state.instCounts->at(i) == 0) continue; // Merge functions into global list unsigned opcode = i; if (i >= COUNTED_CALL_BASE) { const llvm::Function* func = m_state.functions->at(i - COUNTED_CALL_BASE); vector::iterator itr = find(m_functions.begin(), m_functions.end(), func); if (itr == m_functions.end()) { opcode = COUNTED_CALL_BASE + m_functions.size(); m_functions.push_back(func); } else { opcode = COUNTED_CALL_BASE + (itr - m_functions.begin()); } } m_instructionCounts[opcode] += m_state.instCounts->at(i); } // Merge memory transfer sizes into global list for (unsigned i = 0; i < m_state.memopBytes->size(); i++) m_memopBytes[i] += m_state.memopBytes->at(i); } Oclgrind-21.10/src/plugins/InstructionCounter.h000066400000000000000000000026701413315665100215720ustar00rootroot00000000000000// InstructionCounter.h (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "core/Plugin.h" #include namespace llvm { class Function; } namespace oclgrind { class InstructionCounter : public Plugin { public: InstructionCounter(const Context* context) : Plugin(context){}; virtual void instructionExecuted(const WorkItem* workItem, const llvm::Instruction* instruction, const TypedValue& result) override; virtual void kernelBegin(const KernelInvocation* kernelInvocation) override; virtual void kernelEnd(const KernelInvocation* kernelInvocation) override; virtual void workGroupBegin(const WorkGroup* workGroup) override; virtual void workGroupComplete(const WorkGroup* workGroup) override; private: std::vector m_instructionCounts; std::vector m_memopBytes; std::vector m_functions; struct WorkerState { std::vector* instCounts; std::vector* memopBytes; std::vector* functions; }; static THREAD_LOCAL WorkerState m_state; std::mutex m_mtx; std::string getOpcodeName(unsigned opcode) const; }; } // namespace oclgrind Oclgrind-21.10/src/plugins/InteractiveDebugger.cpp000066400000000000000000000541531413315665100221710ustar00rootroot00000000000000// InteractiveDebugger.cpp (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "config.h" #include "core/common.h" #include #include #if !defined(_WIN32) || defined(__MINGW32__) #include #include #else #include #define isatty _isatty #define STDIN_FILENO _fileno(stdin) #endif #if HAVE_READLINE #include #include #endif #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "InteractiveDebugger.h" #include "core/Context.h" #include "core/Kernel.h" #include "core/KernelInvocation.h" #include "core/Memory.h" #include "core/Program.h" #include "core/WorkGroup.h" #include "core/WorkItem.h" using namespace oclgrind; using namespace std; #define LIST_LENGTH 10 static bool sigintBreak = false; #if !defined(_WIN32) || defined(__MINGW32__) static struct sigaction m_oldSignalHandler; void handleSignal(int s) { if (s == SIGINT) sigintBreak = true; } #endif InteractiveDebugger::InteractiveDebugger(const Context* context) : Plugin(context) { m_running = true; m_forceBreak = false; m_nextBreakpoint = 1; m_program = NULL; m_kernelInvocation = NULL; // Set-up commands #define ADD_CMD(name, sname, func) \ m_commands[name] = &InteractiveDebugger::func; \ m_commands[sname] = &InteractiveDebugger::func; ADD_CMD("backtrace", "bt", backtrace); ADD_CMD("break", "b", brk); ADD_CMD("continue", "c", cont); ADD_CMD("delete", "d", del); ADD_CMD("gmem", "gm", mem); ADD_CMD("help", "h", help); ADD_CMD("info", "i", info); ADD_CMD("list", "l", list); ADD_CMD("lmem", "lm", mem); ADD_CMD("next", "n", next); ADD_CMD("pmem", "pm", mem); ADD_CMD("print", "p", print); ADD_CMD("quit", "q", quit); ADD_CMD("step", "s", step); ADD_CMD("workitem", "wi", workitem); } void InteractiveDebugger::instructionExecuted( const WorkItem* workItem, const llvm::Instruction* instruction, const TypedValue& result) { if (!shouldShowPrompt(workItem)) return; #if !defined(_WIN32) || defined(__MINGW32__) // Restore old signal handler sigaction(SIGINT, &m_oldSignalHandler, NULL); #endif m_forceBreak = false; sigintBreak = false; // Print function if changed if (m_previousDepth != workItem->getCallStack().size() && workItem->getState() != WorkItem::FINISHED) { cout << "In function "; printFunction(workItem->getCurrentInstruction()); } printCurrentLine(); m_listPosition = 0; m_continue = false; m_next = false; bool interactive = isatty(STDIN_FILENO); while (true) { // Prompt for command bool eof = false; string cmd; #if HAVE_READLINE if (interactive) { char* line = readline("(oclgrind) "); if (line) { cmd = line; free(line); } else { eof = true; } } else #endif { if (interactive) cout << "(oclgrind) " << flush; getline(cin, cmd); eof = cin.eof(); } // Quit on EOF if (eof) { if (interactive) cout << "(quit)" << endl; quit(vector()); return; } // Split command into tokens vector tokens; istringstream iss(cmd); copy(istream_iterator(iss), istream_iterator(), back_inserter>(tokens)); // Skip empty lines if (tokens.size() == 0) { continue; } #if HAVE_READLINE if (interactive) add_history(cmd.c_str()); #endif // Find command in map and execute map::iterator itr = m_commands.find(tokens[0]); if (itr != m_commands.end()) { if ((this->*itr->second)(tokens)) break; } else { cout << "Unrecognized command '" << tokens[0] << "'" << endl; } } } bool InteractiveDebugger::isThreadSafe() const { return false; } void InteractiveDebugger::kernelBegin(const KernelInvocation* kernelInvocation) { m_continue = false; m_lastBreakLine = 0; m_listPosition = 0; m_next = false; m_previousDepth = 0; m_previousLine = 0; m_kernelInvocation = kernelInvocation; m_program = kernelInvocation->getKernel()->getProgram(); } void InteractiveDebugger::kernelEnd(const KernelInvocation* kernelInvocation) { m_kernelInvocation = NULL; #if !defined(_WIN32) || defined(__MINGW32__) // Restore old signal handler sigaction(SIGINT, &m_oldSignalHandler, NULL); #endif } void InteractiveDebugger::log(MessageType type, const char* message) { if (type == ERROR) m_forceBreak = true; } /////////////////////////// //// Utility Functions //// /////////////////////////// size_t InteractiveDebugger::getCurrentLineNumber() const { const WorkItem* workItem = m_kernelInvocation->getCurrentWorkItem(); if (!workItem || workItem->getState() == WorkItem::FINISHED) { return 0; } return getLineNumber(workItem->getCurrentInstruction()); } size_t InteractiveDebugger::getLineNumber(const llvm::Instruction* instruction) const { llvm::MDNode* md = instruction->getMetadata("dbg"); if (md) { return ((llvm::DILocation*)md)->getLine(); } return 0; } bool InteractiveDebugger::hasHitBreakpoint() { if (m_breakpoints.empty()) return false; // Check if we have passed over the previous breakpoint if (m_lastBreakLine) { if (getCurrentLineNumber() != m_lastBreakLine) m_lastBreakLine = 0; else return false; ; } // Check if we're at a breakpoint size_t line = getCurrentLineNumber(); map::iterator itr; for (itr = m_breakpoints[m_program].begin(); itr != m_breakpoints[m_program].end(); itr++) { if (itr->second == line) { cout << "Breakpoint " << itr->first << " hit at line " << itr->second << " by work-item " << m_kernelInvocation->getCurrentWorkItem()->getGlobalID() << endl; m_lastBreakLine = line; m_listPosition = 0; return true; } } return false; } void InteractiveDebugger::printCurrentLine() const { const WorkItem* workItem = m_kernelInvocation->getCurrentWorkItem(); if (!workItem || workItem->getState() == WorkItem::FINISHED) { return; } size_t lineNum = getCurrentLineNumber(); if (m_program->getNumSourceLines() && lineNum > 0) { printSourceLine(lineNum); } else { cout << "Source line not available." << endl; dumpInstruction(cout, workItem->getCurrentInstruction()); cout << endl; } } void InteractiveDebugger::printFunction( const llvm::Instruction* instruction) const { // Get function const llvm::Function* function = instruction->getParent()->getParent(); cout << function->getName().str() << "("; // Print arguments llvm::Function::const_arg_iterator argItr; for (argItr = function->arg_begin(); argItr != function->arg_end(); argItr++) { if (argItr != function->arg_begin()) { cout << ", "; } cout << argItr->getName().str() << "="; m_kernelInvocation->getCurrentWorkItem()->printValue(&*argItr); } cout << ") at line " << dec << getLineNumber(instruction) << endl; } void InteractiveDebugger::printSourceLine(size_t lineNum) const { const char* line = m_program->getSourceLine(lineNum); if (line) { cout << dec << lineNum << "\t" << line << endl; } else { cout << "Invalid line number: " << lineNum << endl; } } bool InteractiveDebugger::shouldShowPrompt(const WorkItem* workItem) { if (!m_running) return false; if (m_forceBreak || sigintBreak) return true; if (hasHitBreakpoint()) return true; if (m_continue) return false; if (workItem->getState() == WorkItem::BARRIER) return true; if (workItem->getState() == WorkItem::FINISHED) return true; if (!m_program->getNumSourceLines()) return true; size_t line = getCurrentLineNumber(); if (m_next && workItem->getCallStack().size() > m_previousDepth) return false; if (!line || line == m_previousLine) return false; return true; } ////////////////////////////// //// Interactive Commands //// ////////////////////////////// bool InteractiveDebugger::backtrace(vector args) { const WorkItem* workItem = m_kernelInvocation->getCurrentWorkItem(); if (!workItem || workItem->getState() == WorkItem::FINISHED) { return false; } stack callStack = workItem->getCallStack(); // Print current instruction cout << "#" << callStack.size() << " "; printFunction(workItem->getCurrentInstruction()); // Print call stack while (!callStack.empty()) { cout << "#" << (callStack.size() - 1) << " "; printFunction(callStack.top()); callStack.pop(); } return false; } bool InteractiveDebugger::brk(vector args) { if (!m_program->getNumSourceLines()) { cout << "Breakpoints only valid when source is available." << endl; return false; } size_t lineNum = getCurrentLineNumber(); if (args.size() > 1) { // Parse argument as a target line number istringstream ss(args[1]); ss >> lineNum; if (!ss.eof() || !lineNum || lineNum > m_program->getNumSourceLines() + 1) { cout << "Invalid line number." << endl; return false; } } if (lineNum) { m_breakpoints[m_program][m_nextBreakpoint++] = lineNum; } else { cout << "Not currently on a line." << endl; } return false; } bool InteractiveDebugger::cont(vector args) { #if !defined(_WIN32) || defined(__MINGW32__) // Register a signal handler to catch interrupts struct sigaction sigHandler; sigHandler.sa_handler = handleSignal; sigemptyset(&sigHandler.sa_mask); sigHandler.sa_flags = 0; sigaction(SIGINT, &sigHandler, &m_oldSignalHandler); #endif m_continue = true; return true; } bool InteractiveDebugger::del(vector args) { if (args.size() > 1) { // Parse argument as a target breakpoint size_t bpNum = 0; istringstream ss(args[1]); ss >> bpNum; if (!ss.eof()) { cout << "Invalid breakpoint number." << endl; return false; } // Ensure breakpoint exists if (!m_breakpoints[m_program].count(bpNum)) { cout << "Breakpoint not found." << endl; return false; } m_breakpoints[m_program].erase(bpNum); } else { // Prompt for confimation string confirm; cout << "Delete all breakpoints? (y/n) " << flush; cin >> confirm; cin.ignore(); if (confirm == "y") { m_breakpoints.clear(); } } return false; } bool InteractiveDebugger::help(vector args) { if (args.size() < 2) { cout << "Command list:" << endl; cout << " backtrace (bt)" << endl; cout << " break (b)" << endl; cout << " continue (c)" << endl; cout << " delete (d)" << endl; cout << " gmem (gm)" << endl; cout << " help (h)" << endl; cout << " info (i)" << endl; cout << " list (l)" << endl; cout << " next (n)" << endl; cout << " lmem (lm)" << endl; cout << " pmem (pm)" << endl; cout << " print (p)" << endl; cout << " quit (q)" << endl; cout << " step (s)" << endl; cout << " workitem (wi)" << endl; cout << "(type 'help command' for more information)" << endl; return false; } if (args[1] == "backtrace" || args[1] == "bt") { cout << "Print function call stack." << endl; } else if (args[1] == "break" || args[1] == "b") { cout << "Set a breakpoint" << " (only functional when source is available)." << endl << "With no arguments, sets a breakpoint at the current line." << endl << "Use a numeric argument to set a breakpoint at a specific line." << endl; } else if (args[1] == "continue" || args[1] == "c") { cout << "Continue kernel execution until next breakpoint." << endl; } else if (args[1] == "delete" || args[1] == "d") { cout << "Delete a breakpoint." << endl << "With no arguments, deletes all breakpoints." << endl; } else if (args[1] == "help" || args[1] == "h") { cout << "Display usage information for a command." << endl; } else if (args[1] == "info" || args[1] == "i") { cout << "Display information about current debugging context." << endl << "With no arguments, displays general information." << endl << "'info break' lists breakpoints." << endl; } else if (args[1] == "list" || args[1] == "l") { cout << "List source lines." << endl << "With no argument, lists " << LIST_LENGTH << " lines after previous listing." << endl << "Use - to list " << LIST_LENGTH << " lines before the previous listing" << endl << "Use a numeric argument to list around a specific line number." << endl; } else if (args[1] == "gmem" || args[1] == "lmem" || args[1] == "pmem" || args[1] == "gm" || args[1] == "lm" || args[1] == "pm") { cout << "Examine contents of "; if (args[1] == "gmem") cout << "global"; if (args[1] == "lmem") cout << "local"; if (args[1] == "pmem") cout << "private"; cout << " memory." << endl << "With no arguments, dumps entire contents of memory." << endl << "'" << args[1] << " address [size]'" << endl << "address is hexadecimal and 4-byte aligned." << endl; } else if (args[1] == "next" || args[1] == "n") { cout << "Step forward," << " treating function calls as single instruction." << endl; } else if (args[1] == "print" || args[1] == "p") { cout << "Print the values of one or more variables." << endl << "'print x y' prints the values of x and y" << endl << "'print foo[i]' prints a value at a constant array index" << endl; } else if (args[1] == "quit" || args[1] == "q") { cout << "Quit interactive debugger." << endl; } else if (args[1] == "step" || args[1] == "s") { cout << "Step forward a single source line," << " or an instruction if no source available." << endl; } else if (args[1] == "workitem" || args[1] == "wi") { cout << "Switch to a different work-item." << endl << "Up to three (space separated) arguments allowed," << " specifying the global ID of the work-item." << endl; } else { cout << "Unrecognized command '" << args[1] << "'" << endl; } return false; } bool InteractiveDebugger::info(vector args) { if (args.size() > 1) { if (args[1] == "break") { // List breakpoints map::iterator itr; for (itr = m_breakpoints[m_program].begin(); itr != m_breakpoints[m_program].end(); itr++) { cout << "Breakpoint " << itr->first << ": Line " << itr->second << endl; } } else { cout << "Invalid info command: " << args[1] << endl; } return false; } // Kernel invocation information cout << dec << "Running kernel '" << m_kernelInvocation->getKernel()->getName() << "'" << endl << "-> Global work size: " << m_kernelInvocation->getGlobalSize() << endl << "-> Global work offset: " << m_kernelInvocation->getGlobalOffset() << endl << "-> Local work size: " << m_kernelInvocation->getLocalSize() << endl; // Current work-item const WorkItem* workItem = m_kernelInvocation->getCurrentWorkItem(); if (workItem) { cout << endl << "Current work-item: " << workItem->getGlobalID() << endl; if (workItem->getState() == WorkItem::FINISHED) { cout << "Work-item has finished." << endl; } else { cout << "In function "; printFunction(workItem->getCurrentInstruction()); printCurrentLine(); } } else { cout << "All work-items finished." << endl; } return false; } bool InteractiveDebugger::list(vector args) { const WorkItem* workItem = m_kernelInvocation->getCurrentWorkItem(); if (!workItem) { cout << "All work-items finished." << endl; return false; } if (!m_program->getNumSourceLines()) { cout << "No source code available." << endl; return false; } // Check for an argument size_t start = 0; bool forwards = true; if (args.size() > 1) { if (args[1] == "-") { forwards = false; } else { // Parse argument as a target line number istringstream ss(args[1]); ss >> start; if (!ss.eof()) { cout << "Invalid line number." << endl; return false; } start = start > LIST_LENGTH / 2 ? start - LIST_LENGTH / 2 : 1; } } if (!start) { if (forwards) { // Starting position is the previous list position + LIST_LENGTH start = m_listPosition ? m_listPosition + LIST_LENGTH : getCurrentLineNumber() + 1; if (start >= m_program->getNumSourceLines() + 1) { m_listPosition = m_program->getNumSourceLines() + 1; return false; } } else { // Starting position is the previous list position - LIST_LENGTH start = m_listPosition ? m_listPosition : getCurrentLineNumber(); start = start > LIST_LENGTH ? start - LIST_LENGTH : 1; } } // Display lines for (int i = 0; i < LIST_LENGTH; i++) { if (start + i >= m_program->getNumSourceLines() + 1) { break; } printSourceLine(start + i); } m_listPosition = start; return false; } bool InteractiveDebugger::mem(vector args) { // Get target memory object Memory* memory = NULL; if (args[0][0] == 'g') { memory = m_context->getGlobalMemory(); } else if (args[0][0] == 'l') { memory = m_kernelInvocation->getCurrentWorkGroup()->getLocalMemory(); } else if (args[0][0] == 'p') { memory = m_kernelInvocation->getCurrentWorkItem()->getPrivateMemory(); } // If no arguments, dump memory if (args.size() == 1) { memory->dump(); return false; } else if (args.size() > 3) { cout << "Invalid number of arguments." << endl; return false; } // Get target address size_t address; stringstream ss(args[1]); ss >> hex >> address; if (!ss.eof() || address % 4 != 0) { cout << "Invalid address." << endl; return false; } // Get optional size size_t size = 8; if (args.size() == 3) { stringstream ss(args[2]); ss >> dec >> size; if (!ss.eof() || !size) { cout << "Invalid size" << endl; return false; } } // Check address is valid if (!memory->isAddressValid(address, size)) { cout << "Invalid memory address." << endl; return false; } // Output data unsigned char* data = (unsigned char*)memory->getPointer(address); for (unsigned i = 0; i < size; i++) { if (i % 4 == 0) { cout << endl << hex << uppercase << setw(16) << setfill(' ') << right << (address + i) << ":"; } cout << " " << hex << uppercase << setw(2) << setfill('0') << (int)data[i]; } cout << endl << endl; return false; } bool InteractiveDebugger::next(vector args) { const WorkItem* workItem = m_kernelInvocation->getCurrentWorkItem(); if (!workItem) { cout << "All work-items finished." << endl; return false; } if (workItem->getState() == WorkItem::FINISHED) { cout << "Work-item has finished." << endl; return false; } else if (workItem->getState() == WorkItem::BARRIER) { cout << "Work-item is at barrier." << endl; return false; } // Step until we return to the same depth m_previousDepth = workItem->getCallStack().size(); m_previousLine = getCurrentLineNumber(); m_next = true; return true; } bool InteractiveDebugger::print(vector args) { if (args.size() < 2) { cout << "Variable name(s) required." << endl; return false; } const WorkItem* workItem = m_kernelInvocation->getCurrentWorkItem(); for (unsigned i = 1; i < args.size(); i++) { cout << args[i] << " = "; try { workItem->printExpression(args[i]); } catch (FatalError err) { cout << "fatal error: " << err.what(); } cout << endl; } return false; } bool InteractiveDebugger::quit(vector args) { #if !defined(_WIN32) || defined(__MINGW32__) // Restore old signal handler sigaction(SIGINT, &m_oldSignalHandler, NULL); #endif m_running = false; return true; } bool InteractiveDebugger::step(vector args) { const WorkItem* workItem = m_kernelInvocation->getCurrentWorkItem(); if (!workItem) { cout << "All work-items finished." << endl; return false; } if (workItem->getState() == WorkItem::FINISHED) { cout << "Work-item has finished." << endl; return false; } else if (workItem->getState() == WorkItem::BARRIER) { cout << "Work-item is at barrier." << endl; return false; } // Save current position m_previousDepth = workItem->getCallStack().size(); m_previousLine = getCurrentLineNumber(); return true; } bool InteractiveDebugger::workitem(vector args) { // TODO: Take offsets into account? Size3 gid(0, 0, 0); for (unsigned i = 1; i < args.size(); i++) { // Parse argument as a target line number istringstream ss(args[i]); ss >> gid[i - 1]; if (!ss.eof() || gid[i - 1] >= m_kernelInvocation->getGlobalSize()[i - 1]) { cout << "Invalid global ID." << endl; return false; } } // Ugly const_cast since this operation actually changes something about // the simulation. This goes against the idea that plugins are entirely // passive. if (!const_cast(m_kernelInvocation)->switchWorkItem(gid)) { cout << "Work-item has already finished, unable to load state." << endl; return false; } // Print new WI id cout << "Switched to work-item: (" << gid[0] << "," << gid[1] << "," << gid[2] << ")" << endl; if (m_kernelInvocation->getCurrentWorkItem()->getState() == WorkItem::FINISHED) { cout << "Work-item has finished execution." << endl; } else { printCurrentLine(); } return false; } Oclgrind-21.10/src/plugins/InteractiveDebugger.h000066400000000000000000000040031413315665100216230ustar00rootroot00000000000000// InteractiveDebugger.h (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "core/Plugin.h" namespace oclgrind { class Program; class InteractiveDebugger : public Plugin { public: InteractiveDebugger(const Context* context); virtual void instructionExecuted(const WorkItem* workItem, const llvm::Instruction* instruction, const TypedValue& result) override; virtual void kernelBegin(const KernelInvocation* kernelInvocation) override; virtual void kernelEnd(const KernelInvocation* kernelInvocation) override; virtual void log(MessageType type, const char* message) override; virtual bool isThreadSafe() const override; private: bool m_continue; bool m_running; bool m_forceBreak; size_t m_listPosition; bool m_next; size_t m_lastBreakLine; size_t m_nextBreakpoint; size_t m_previousDepth; size_t m_previousLine; std::map> m_breakpoints; const Program* m_program; const KernelInvocation* m_kernelInvocation; size_t getCurrentLineNumber() const; size_t getLineNumber(const llvm::Instruction* instruction) const; bool hasHitBreakpoint(); void printCurrentLine() const; void printFunction(const llvm::Instruction* instruction) const; void printSourceLine(size_t lineNum) const; bool shouldShowPrompt(const WorkItem* workItem); // Interactive commands typedef bool (InteractiveDebugger::*Command)(std::vector); std::map m_commands; #define CMD(name) bool name(std::vector args); CMD(backtrace); CMD(brk); CMD(cont); CMD(del); CMD(help); CMD(info); CMD(list); CMD(mem); CMD(next); CMD(print); CMD(quit); CMD(step); CMD(workitem); #undef CMD }; } // namespace oclgrind Oclgrind-21.10/src/plugins/Logger.cpp000066400000000000000000000027401413315665100174610ustar00rootroot00000000000000// Logger.cpp (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "core/common.h" #include #include #include "Logger.h" using namespace oclgrind; using namespace std; #define DEFAULT_MAX_ERRORS 1000 unsigned Logger::m_numErrors = 0; static mutex logMutex; Logger::Logger(const Context* context) : Plugin(context) { m_log = &cerr; const char* logfile = getenv("OCLGRIND_LOG"); if (logfile) { m_log = new ofstream(logfile); if (!m_log->good()) { cerr << "Oclgrind: Unable to open log file '" << logfile << "'" << endl; m_log = &cerr; } } m_maxErrors = getEnvInt("OCLGRIND_MAX_ERRORS", DEFAULT_MAX_ERRORS); } Logger::~Logger() { if (m_log != &cerr) { ((ofstream*)m_log)->close(); delete m_log; } } void Logger::log(MessageType type, const char* message) { lock_guard lock(logMutex); // Limit number of errors/warning printed if (type == ERROR || type == WARNING) { if (m_numErrors == m_maxErrors) { *m_log << endl << "Oclgrind: " << m_numErrors << " errors generated - suppressing further errors" << endl << endl; } if (m_numErrors++ >= m_maxErrors) return; } *m_log << endl << message << endl; } Oclgrind-21.10/src/plugins/Logger.h000066400000000000000000000011541413315665100171240ustar00rootroot00000000000000// Logger.h (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "core/Plugin.h" namespace oclgrind { class Logger : public Plugin { public: Logger(const Context* context); virtual ~Logger(); virtual void log(MessageType type, const char* message) override; private: std::ostream* m_log; unsigned m_maxErrors; static unsigned m_numErrors; }; } // namespace oclgrind Oclgrind-21.10/src/plugins/MemCheck.cpp000066400000000000000000000146671413315665100177310ustar00rootroot00000000000000// MemCheck.cpp (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "core/common.h" #include "core/Context.h" #include "core/Memory.h" #include "core/WorkItem.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" #include "MemCheck.h" using namespace oclgrind; using namespace std; MemCheck::MemCheck(const Context* context) : Plugin(context) {} void MemCheck::instructionExecuted(const WorkItem* workItem, const llvm::Instruction* instruction, const TypedValue& result) { // Check static array bounds if load or store is executed const llvm::Value* PtrOp = nullptr; if (auto LI = llvm::dyn_cast(instruction)) { PtrOp = LI->getPointerOperand(); } else if (auto SI = llvm::dyn_cast(instruction)) { PtrOp = SI->getPointerOperand(); } else { return; } // Walk up chain of GEP instructions leading to this access while (auto GEPI = llvm::dyn_cast(PtrOp->stripPointerCasts())) { checkArrayAccess(workItem, GEPI); PtrOp = GEPI->getPointerOperand(); } } void MemCheck::memoryAtomicLoad(const Memory* memory, const WorkItem* workItem, AtomicOp op, size_t address, size_t size) { checkLoad(memory, address, size); } void MemCheck::memoryAtomicStore(const Memory* memory, const WorkItem* workItem, AtomicOp op, size_t address, size_t size) { checkStore(memory, address, size); } void MemCheck::memoryLoad(const Memory* memory, const WorkItem* workItem, size_t address, size_t size) { checkLoad(memory, address, size); } void MemCheck::memoryLoad(const Memory* memory, const WorkGroup* workGroup, size_t address, size_t size) { checkLoad(memory, address, size); } void MemCheck::memoryMap(const Memory* memory, size_t address, size_t offset, size_t size, cl_map_flags flags) { MapRegion map = {address, offset, size, memory->getPointer(address + offset), (flags == CL_MAP_READ ? MapRegion::READ : MapRegion::WRITE)}; m_mapRegions.push_back(map); } void MemCheck::memoryStore(const Memory* memory, const WorkItem* workItem, size_t address, size_t size, const uint8_t* storeData) { checkStore(memory, address, size); } void MemCheck::memoryStore(const Memory* memory, const WorkGroup* workGroup, size_t address, size_t size, const uint8_t* storeData) { checkStore(memory, address, size); } void MemCheck::memoryUnmap(const Memory* memory, size_t address, const void* ptr) { for (auto region = m_mapRegions.begin(); region != m_mapRegions.end(); region++) { if (region->ptr == ptr) { m_mapRegions.erase(region); return; } } } void MemCheck::checkArrayAccess(const WorkItem* workItem, const llvm::GetElementPtrInst* GEPI) const { // Iterate through GEPI indices const llvm::Type* ptrType = GEPI->getPointerOperandType(); for (auto opIndex = GEPI->idx_begin(); opIndex != GEPI->idx_end(); opIndex++) { int64_t index = workItem->getOperand(opIndex->get()).getSInt(); if (ptrType->isArrayTy()) { // Check index doesn't exceed size of array uint64_t size = ptrType->getArrayNumElements(); if ((uint64_t)index >= size) { ostringstream info; info << "Index (" << index << ") exceeds static array size (" << size << ")"; m_context->logError(info.str().c_str()); } ptrType = ptrType->getArrayElementType(); } else if (ptrType->isPointerTy()) { ptrType = ptrType->getPointerElementType(); } else if (ptrType->isVectorTy()) { ptrType = llvm::cast(ptrType)->getElementType(); } else if (ptrType->isStructTy()) { ptrType = ptrType->getStructElementType(index); } } } void MemCheck::checkLoad(const Memory* memory, size_t address, size_t size) const { if (!memory->isAddressValid(address, size)) { logInvalidAccess(true, memory->getAddressSpace(), address, size); return; } if (memory->getBuffer(address)->flags & CL_MEM_WRITE_ONLY) { m_context->logError("Invalid read from write-only buffer"); } if (memory->getAddressSpace() == AddrSpaceLocal || memory->getAddressSpace() == AddrSpacePrivate) return; // Check if memory location is currently mapped for writing for (auto region = m_mapRegions.begin(); region != m_mapRegions.end(); region++) { if (region->type == MapRegion::WRITE && address < region->address + region->size && address + size >= region->address) { m_context->logError("Invalid read from buffer mapped for writing"); } } } void MemCheck::checkStore(const Memory* memory, size_t address, size_t size) const { if (!memory->isAddressValid(address, size)) { logInvalidAccess(false, memory->getAddressSpace(), address, size); return; } if (memory->getBuffer(address)->flags & CL_MEM_READ_ONLY) { m_context->logError("Invalid write to read-only buffer"); } if (memory->getAddressSpace() == AddrSpaceLocal || memory->getAddressSpace() == AddrSpacePrivate) return; // Check if memory location is currently mapped for (auto region = m_mapRegions.begin(); region != m_mapRegions.end(); region++) { if (address < region->address + region->size && address + size >= region->address) { m_context->logError("Invalid write to mapped buffer"); } } } void MemCheck::logInvalidAccess(bool read, unsigned addrSpace, size_t address, size_t size) const { Context::Message msg(ERROR, m_context); msg << "Invalid " << (read ? "read" : "write") << " of size " << size << " at " << getAddressSpaceName(addrSpace) << " memory address 0x" << hex << address << endl << msg.INDENT << "Kernel: " << msg.CURRENT_KERNEL << endl << "Entity: " << msg.CURRENT_ENTITY << endl << msg.CURRENT_LOCATION << endl; msg.send(); } Oclgrind-21.10/src/plugins/MemCheck.h000066400000000000000000000050701413315665100173620ustar00rootroot00000000000000// MemCheck.h (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "core/Plugin.h" namespace llvm { class GetElementPtrInst; } namespace oclgrind { class MemCheck : public Plugin { public: MemCheck(const Context* context); virtual void instructionExecuted(const WorkItem* workItem, const llvm::Instruction* instruction, const TypedValue& result) override; virtual void memoryAtomicLoad(const Memory* memory, const WorkItem* workItem, AtomicOp op, size_t address, size_t size) override; virtual void memoryAtomicStore(const Memory* memory, const WorkItem* workItem, AtomicOp op, size_t address, size_t size) override; virtual void memoryLoad(const Memory* memory, const WorkItem* workItem, size_t address, size_t size) override; virtual void memoryLoad(const Memory* memory, const WorkGroup* workGroup, size_t address, size_t size) override; virtual void memoryMap(const Memory* memory, size_t address, size_t offset, size_t size, cl_map_flags flags) override; virtual void memoryStore(const Memory* memory, const WorkItem* workItem, size_t address, size_t size, const uint8_t* storeData) override; virtual void memoryStore(const Memory* memory, const WorkGroup* workGroup, size_t address, size_t size, const uint8_t* storeData) override; virtual void memoryUnmap(const Memory* memory, size_t address, const void* ptr) override; private: void checkArrayAccess(const WorkItem* workItem, const llvm::GetElementPtrInst* GEPI) const; void checkLoad(const Memory* memory, size_t address, size_t size) const; void checkStore(const Memory* memory, size_t address, size_t size) const; void logInvalidAccess(bool read, unsigned addrSpace, size_t address, size_t size) const; struct MapRegion { size_t address; size_t offset; size_t size; const void* ptr; enum { READ, WRITE } type; }; std::list m_mapRegions; }; } // namespace oclgrind Oclgrind-21.10/src/plugins/RaceDetector.cpp000066400000000000000000000347251413315665100206160ustar00rootroot00000000000000// RaceDetector.cpp (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "core/common.h" #include "core/Context.h" #include "core/KernelInvocation.h" #include "core/Memory.h" #include "core/WorkGroup.h" #include "core/WorkItem.h" #include "RaceDetector.h" using namespace oclgrind; using namespace std; THREAD_LOCAL RaceDetector::WorkerState RaceDetector::m_state = {NULL}; #define STATE(workgroup) (m_state.groups->at(workgroup)) // Use a bank of mutexes to reduce unnecessary synchronisation #define NUM_GLOBAL_MUTEXES 4096 // Must be power of two #define GLOBAL_MUTEX(buffer, offset) \ m_globalMutexes[buffer][offset & (NUM_GLOBAL_MUTEXES - 1)] RaceDetector::RaceDetector(const Context* context) : Plugin(context) { m_kernelInvocation = NULL; m_allowUniformWrites = !checkEnv("OCLGRIND_UNIFORM_WRITES"); } void RaceDetector::kernelBegin(const KernelInvocation* kernelInvocation) { m_kernelInvocation = kernelInvocation; } void RaceDetector::kernelEnd(const KernelInvocation* kernelInvocation) { // Log races for (auto race : kernelRaces) logRace(race); kernelRaces.clear(); // Clear all global memory accesses for (auto& buffer : m_globalAccesses) { size_t sz = buffer.second.size(); buffer.second.clear(); buffer.second.resize(sz); } m_kernelInvocation = NULL; } void RaceDetector::memoryAllocated(const Memory* memory, size_t address, size_t size, cl_mem_flags flags, const uint8_t* initData) { size_t buffer = memory->extractBuffer(address); if (memory->getAddressSpace() == AddrSpaceGlobal) { m_globalAccesses[buffer].resize(size); m_globalMutexes[buffer] = new mutex[NUM_GLOBAL_MUTEXES]; } } void RaceDetector::memoryAtomicLoad(const Memory* memory, const WorkItem* workItem, AtomicOp op, size_t address, size_t size) { registerAccess(memory, workItem->getWorkGroup(), workItem, address, size, true); } void RaceDetector::memoryAtomicStore(const Memory* memory, const WorkItem* workItem, AtomicOp op, size_t address, size_t size) { registerAccess(memory, workItem->getWorkGroup(), workItem, address, size, true, (const uint8_t*)memory->getPointer(address)); } void RaceDetector::memoryDeallocated(const Memory* memory, size_t address) { size_t buffer = memory->extractBuffer(address); if (memory->getAddressSpace() == AddrSpaceGlobal) { m_globalAccesses.erase(buffer); delete[] m_globalMutexes.at(buffer); m_globalMutexes.erase(buffer); } } void RaceDetector::memoryLoad(const Memory* memory, const WorkItem* workItem, size_t address, size_t size) { registerAccess(memory, workItem->getWorkGroup(), workItem, address, size, false, NULL); } void RaceDetector::memoryLoad(const Memory* memory, const WorkGroup* workGroup, size_t address, size_t size) { registerAccess(memory, workGroup, NULL, address, size, false); } void RaceDetector::memoryStore(const Memory* memory, const WorkItem* workItem, size_t address, size_t size, const uint8_t* storeData) { registerAccess(memory, workItem->getWorkGroup(), workItem, address, size, false, storeData); } void RaceDetector::memoryStore(const Memory* memory, const WorkGroup* workGroup, size_t address, size_t size, const uint8_t* storeData) { registerAccess(memory, workGroup, NULL, address, size, false, storeData); } void RaceDetector::workGroupBarrier(const WorkGroup* workGroup, uint32_t flags) { if (flags & CLK_LOCAL_MEM_FENCE) { syncWorkItems(workGroup->getLocalMemory(), STATE(workGroup), STATE(workGroup).wiLocal); } if (flags & CLK_GLOBAL_MEM_FENCE) { syncWorkItems(m_context->getGlobalMemory(), STATE(workGroup), STATE(workGroup).wiGlobal); } } void RaceDetector::workGroupBegin(const WorkGroup* workGroup) { // Create worker state if haven't already if (!m_state.groups) { m_state.groups = new unordered_map; } // Initialize work-group state WorkGroupState& state = (*m_state.groups)[workGroup]; Size3 wgsize = workGroup->getGroupSize(); state.numWorkItems = wgsize.x * wgsize.y * wgsize.z; // Re-use pool allocator for all access maps AccessMap tmp(0, AccessMap::hasher(), AccessMap::key_equal(), state.wgGlobal.get_allocator()); state.wiGlobal.resize(state.numWorkItems + 1, tmp); state.wiLocal.resize(state.numWorkItems + 1, tmp); } void RaceDetector::workGroupComplete(const WorkGroup* workGroup) { WorkGroupState& state = STATE(workGroup); syncWorkItems(workGroup->getLocalMemory(), state, state.wiLocal); syncWorkItems(m_context->getGlobalMemory(), state, state.wiGlobal); // Merge global accesses across kernel invocation size_t group = workGroup->getGroupIndex(); for (auto& record : state.wgGlobal) { size_t address = record.first; size_t buffer = m_context->getGlobalMemory()->extractBuffer(address); size_t offset = m_context->getGlobalMemory()->extractOffset(address); lock_guard lock(GLOBAL_MUTEX(buffer, offset)); AccessRecord& a = record.second; AccessRecord& b = m_globalAccesses.at(buffer)[offset]; // Check for races with previous accesses if (check(a.load, b.store) && getAccessWorkGroup(b.store) != group) insertKernelRace({AddrSpaceGlobal, address, a.load, b.store}); if (check(a.store, b.load) && getAccessWorkGroup(b.load) != group) insertKernelRace({AddrSpaceGlobal, address, a.store, b.load}); if (check(a.store, b.store) && getAccessWorkGroup(b.store) != group) insertKernelRace({AddrSpaceGlobal, address, a.store, b.store}); // Insert accesses if (a.load.isSet()) insert(b, a.load); if (a.store.isSet()) insert(b, a.store); } state.wgGlobal.clear(); // Clean-up work-group state m_state.groups->erase(workGroup); if (m_state.groups->empty()) { delete m_state.groups; m_state.groups = NULL; } } bool RaceDetector::check(const MemoryAccess& a, const MemoryAccess& b) const { // Ensure both accesses are valid if (!a.isSet() || !b.isSet()) return false; // No race if same work-item if (a.isWorkItem() && b.isWorkItem() && (a.getEntity() == b.getEntity())) return false; // No race if both operations are atomics if (a.isAtomic() && b.isAtomic()) return false; // Potential race if at least one store if (a.isStore() || b.isStore()) { // Read-write race if one is a load if (a.isLoad() || b.isLoad()) return true; // Write-write race if not uniform if (!m_allowUniformWrites || (a.getStoreData() != b.getStoreData())) return true; } return false; } size_t RaceDetector::getAccessWorkGroup(const MemoryAccess& access) const { if (access.isWorkItem()) { const Size3& wgsize = m_kernelInvocation->getLocalSize(); return access.getEntity() / (wgsize.x * wgsize.y * wgsize.z); } else return access.getEntity(); } void RaceDetector::insert(AccessRecord& record, const MemoryAccess& access) const { if (access.isLoad()) { if (!record.load.isSet() || record.load.isAtomic()) record.load = access; } else if (access.isStore()) { if (!record.store.isSet() || record.store.isAtomic()) record.store = access; } } void RaceDetector::insertKernelRace(const Race& race) { lock_guard lock(kernelRacesMutex); insertRace(kernelRaces, race); } void RaceDetector::insertRace(RaceList& races, const Race& race) const { // Check list for duplicates for (auto x = races.begin(); x != races.end(); x++) { // Check if races are equal modulo address if ((race.a == x->a && race.b == x->b) || (race.a == x->b && race.b == x->a)) { // If they match, keep the one with the lowest address if (race.address < x->address) { races.erase(x); races.push_back(race); return; } else return; } } races.push_back(race); } void RaceDetector::logRace(const Race& race) const { const char* raceType; if (race.a.isLoad() || race.b.isLoad()) raceType = "Read-write"; else raceType = "Write-write"; Context::Message msg(ERROR, m_context); msg << raceType << " data race at " << getAddressSpaceName(race.addrspace) << " memory address 0x" << hex << race.address << endl << msg.INDENT << "Kernel: " << msg.CURRENT_KERNEL << endl << endl << "First entity: "; if (race.a.isWorkItem()) { Size3 wgsize = m_kernelInvocation->getLocalSize(); Size3 global(race.a.getEntity(), m_kernelInvocation->getGlobalSize()); Size3 local(global.x % wgsize.x, global.y % wgsize.y, global.z % wgsize.z); Size3 group(global.x / wgsize.x, global.y / wgsize.y, global.z / wgsize.z); msg << "Global" << global << " Local" << local << " Group" << group; } else { msg << "Group" << Size3(race.a.getEntity(), m_kernelInvocation->getLocalSize()); } msg << endl << race.a.getInstruction() << endl << endl << "Second entity: "; // Show details of other entity involved in race if (race.b.isWorkItem()) { Size3 wgsize = m_kernelInvocation->getLocalSize(); Size3 global(race.b.getEntity(), m_kernelInvocation->getGlobalSize()); Size3 local(global.x % wgsize.x, global.y % wgsize.y, global.z % wgsize.z); Size3 group(global.x / wgsize.x, global.y / wgsize.y, global.z / wgsize.z); msg << "Global" << global << " Local" << local << " Group" << group; } else { msg << "Group" << Size3(race.b.getEntity(), m_kernelInvocation->getLocalSize()); } msg << endl << race.b.getInstruction() << endl; msg.send(); } void RaceDetector::registerAccess(const Memory* memory, const WorkGroup* workGroup, const WorkItem* workItem, size_t address, size_t size, bool atomic, const uint8_t* storeData) { unsigned addrSpace = memory->getAddressSpace(); if (addrSpace == AddrSpacePrivate || addrSpace == AddrSpaceConstant) return; if (!memory->isAddressValid(address, size)) return; // Construct access MemoryAccess access(workGroup, workItem, storeData != NULL, atomic); size_t index; if (workItem) { Size3 wgsize = workGroup->getGroupSize(); Size3 lid = workItem->getLocalID(); index = lid.x + (lid.y + lid.z * wgsize.y) * wgsize.x; } else { index = STATE(workGroup).wiLocal.size() - 1; } AccessMap& accesses = (addrSpace == AddrSpaceGlobal) ? STATE(workGroup).wiGlobal[index] : STATE(workGroup).wiLocal[index]; for (size_t i = 0; i < size; i++) { if (storeData) access.setStoreData(storeData[i]); insert(accesses[address + i], access); } } void RaceDetector::syncWorkItems(const Memory* memory, WorkGroupState& state, vector& accesses) { AccessMap wgAccesses(0, AccessMap::hasher(), AccessMap::key_equal(), state.wgGlobal.get_allocator()); for (size_t i = 0; i < state.numWorkItems + 1; i++) { RaceList races; for (auto& record : accesses[i]) { size_t address = record.first; AccessRecord& a = record.second; AccessRecord& b = wgAccesses[address]; if (check(a.load, b.store)) insertRace(races, {memory->getAddressSpace(), address, a.load, b.store}); if (check(a.store, b.load)) insertRace(races, {memory->getAddressSpace(), address, a.store, b.load}); if (check(a.store, b.store)) insertRace(races, {memory->getAddressSpace(), address, a.store, b.store}); if (a.load.isSet()) { insert(b, a.load); if (memory->getAddressSpace() == AddrSpaceGlobal) insert(state.wgGlobal[address], a.load); } if (a.store.isSet()) { insert(b, a.store); if (memory->getAddressSpace() == AddrSpaceGlobal) insert(state.wgGlobal[address], a.store); } } accesses[i].clear(); // Log races for (auto race : races) logRace(race); } } RaceDetector::MemoryAccess::MemoryAccess() { this->info = 0; this->instruction = NULL; } RaceDetector::MemoryAccess::MemoryAccess(const WorkGroup* workGroup, const WorkItem* workItem, bool store, bool atomic) { this->info = 0; this->info |= 1 << SET_BIT; this->info |= store << STORE_BIT; this->info |= atomic << ATOMIC_BIT; if (workItem) { this->entity = workItem->getGlobalIndex(); this->instruction = workItem->getCurrentInstruction(); } else { this->info |= (1 << WG_BIT); this->entity = workGroup->getGroupIndex(); this->instruction = NULL; // TODO? } } void RaceDetector::MemoryAccess::clear() { this->info = 0; this->instruction = NULL; } bool RaceDetector::MemoryAccess::isSet() const { return this->info & (1 << SET_BIT); } bool RaceDetector::MemoryAccess::isAtomic() const { return this->info & (1 << ATOMIC_BIT); } bool RaceDetector::MemoryAccess::isLoad() const { return !isStore(); } bool RaceDetector::MemoryAccess::isStore() const { return this->info & (1 << STORE_BIT); } bool RaceDetector::MemoryAccess::isWorkGroup() const { return this->info & (1 << WG_BIT); } bool RaceDetector::MemoryAccess::isWorkItem() const { return !isWorkGroup(); } size_t RaceDetector::MemoryAccess::getEntity() const { return this->entity; } const llvm::Instruction* RaceDetector::MemoryAccess::getInstruction() const { return this->instruction; } uint8_t RaceDetector::MemoryAccess::getStoreData() const { return this->storeData; } void RaceDetector::MemoryAccess::setStoreData(uint8_t data) { this->storeData = data; } bool RaceDetector::MemoryAccess::operator==( const RaceDetector::MemoryAccess& other) const { return this->entity == other.entity && this->instruction == other.instruction && this->info == other.info; } Oclgrind-21.10/src/plugins/RaceDetector.h000066400000000000000000000112161413315665100202510ustar00rootroot00000000000000// RaceDetector.h (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "core/Plugin.h" #include namespace oclgrind { class RaceDetector : public Plugin { public: RaceDetector(const Context* context); virtual void kernelBegin(const KernelInvocation* kernelInvocation) override; virtual void kernelEnd(const KernelInvocation* kernelInvocation) override; virtual void memoryAllocated(const Memory* memory, size_t address, size_t size, cl_mem_flags flags, const uint8_t* initData) override; virtual void memoryAtomicLoad(const Memory* memory, const WorkItem* workItem, AtomicOp op, size_t address, size_t size) override; virtual void memoryAtomicStore(const Memory* memory, const WorkItem* workItem, AtomicOp op, size_t address, size_t size) override; virtual void memoryDeallocated(const Memory* memory, size_t address) override; virtual void memoryLoad(const Memory* memory, const WorkItem* workItem, size_t address, size_t size) override; virtual void memoryLoad(const Memory* memory, const WorkGroup* workGroup, size_t address, size_t size) override; virtual void memoryStore(const Memory* memory, const WorkItem* workItem, size_t address, size_t size, const uint8_t* storeData) override; virtual void memoryStore(const Memory* memory, const WorkGroup* workGroup, size_t address, size_t size, const uint8_t* storeData) override; virtual void workGroupBarrier(const WorkGroup* workGroup, uint32_t flags) override; virtual void workGroupBegin(const WorkGroup* workGroup) override; virtual void workGroupComplete(const WorkGroup* workGroup) override; private: struct MemoryAccess { private: size_t entity; const llvm::Instruction* instruction; uint8_t info; static const unsigned SET_BIT = 0; static const unsigned STORE_BIT = 1; static const unsigned ATOMIC_BIT = 2; static const unsigned WG_BIT = 3; uint8_t storeData; public: void clear(); bool isSet() const; bool isAtomic() const; bool isLoad() const; bool isStore() const; bool isWorkGroup() const; bool isWorkItem() const; size_t getEntity() const; const llvm::Instruction* getInstruction() const; uint8_t getStoreData() const; void setStoreData(uint8_t); MemoryAccess(); MemoryAccess(const WorkGroup* workGroup, const WorkItem* workItem, bool store, bool atomic); bool operator==(const MemoryAccess& other) const; }; struct AccessRecord { MemoryAccess load; MemoryAccess store; }; typedef std::vector AccessList; typedef std::unordered_map< size_t, AccessRecord, std::hash, std::equal_to, PoolAllocator, 8192>> AccessMap; std::unordered_map> m_globalAccesses; std::map m_globalMutexes; struct WorkGroupState { size_t numWorkItems; std::vector wiLocal; std::vector wiGlobal; AccessMap wgGlobal; }; struct WorkerState { std::unordered_map* groups; }; static THREAD_LOCAL WorkerState m_state; struct Race { unsigned addrspace; size_t address; MemoryAccess a, b; }; typedef std::list RaceList; bool m_allowUniformWrites; const KernelInvocation* m_kernelInvocation; std::mutex kernelRacesMutex; RaceList kernelRaces; size_t getAccessWorkGroup(const MemoryAccess& access) const; bool check(const MemoryAccess& a, const MemoryAccess& b) const; void insert(AccessRecord& record, const MemoryAccess& access) const; void insertKernelRace(const Race& race); void insertRace(RaceList& races, const Race& race) const; void logRace(const Race& race) const; void registerAccess(const Memory* memory, const WorkGroup* workGroup, const WorkItem* workItem, size_t address, size_t size, bool atomic, const uint8_t* storeData = NULL); void syncWorkItems(const Memory* memory, WorkGroupState& state, std::vector& accesses); }; } // namespace oclgrind Oclgrind-21.10/src/plugins/Uninitialized.cpp000066400000000000000000002441411413315665100210550ustar00rootroot00000000000000// Uninitialized.h (Oclgrind) // Copyright (c) 2015, Moritz Pflanzer // Imperial College London. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "config.h" #include "core/common.h" #include "core/Context.h" #include "core/Kernel.h" #include "core/KernelInvocation.h" #include "core/Memory.h" #include "core/WorkGroup.h" #include "core/WorkItem.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" #include "Uninitialized.h" #include using namespace oclgrind; using namespace std; // void Uninitialized::memoryAllocated(const Memory *memory, size_t address, // size_t size, cl_mem_flags flags, // const uint8_t *initData) //{ // cout << "Memory: " << memory << ", address: " << hex << address << dec << // ", size: " << size << endl; //} // Multiple mutexes to mitigate risk of unnecessary synchronisation in atomics #define NUM_ATOMIC_MUTEXES 64 // Must be power of two static std::mutex atomicShadowMutex[NUM_ATOMIC_MUTEXES]; #define ATOMIC_MUTEX(offset) \ atomicShadowMutex[(((offset) >> 2) & (NUM_ATOMIC_MUTEXES - 1))] THREAD_LOCAL ShadowContext::WorkSpace ShadowContext::m_workSpace = {NULL, NULL, NULL, 0}; Uninitialized::Uninitialized(const Context* context) : Plugin(context), shadowContext(sizeof(size_t) == 8 ? 32 : 16) { shadowContext.createMemoryPool(); } Uninitialized::~Uninitialized() { shadowContext.destroyMemoryPool(); } void Uninitialized::allocAndStoreShadowMemory(unsigned addrSpace, size_t address, TypedValue SM, const WorkItem* workItem, const WorkGroup* workGroup, bool unchecked) { if (addrSpace == AddrSpaceConstant) { // TODO: Eventually store value return; } ShadowMemory* memory = getShadowMemory(addrSpace, workItem, workGroup); memory->allocate(address, SM.size * SM.num); storeShadowMemory(addrSpace, address, SM, workItem, workGroup, unchecked); } bool Uninitialized::checkAllOperandsDefined(const WorkItem* workItem, const llvm::Instruction* I) { for (llvm::Instruction::const_op_iterator OI = I->op_begin(); OI != I->op_end(); ++OI) { if (!ShadowContext::isCleanValue( shadowContext.getValue(workItem, OI->get()))) { #ifdef DUMP_SHADOW OI->get()->dump(); cout << "Shadow value: " << shadowContext.getValue(workItem, OI->get()) << endl; #endif logUninitializedCF(); #ifdef DUMP_SHADOW shadowContext.dump(workItem); #endif return false; } } return true; } void Uninitialized::checkStructMemcpy(const WorkItem* workItem, const llvm::Value* src) { const llvm::PointerType* srcPtrTy = llvm::dyn_cast(src->getType()); const llvm::StructType* structTy = llvm::dyn_cast(srcPtrTy->getElementType()); size_t srcAddr = workItem->getOperand(src).getPointer(); unsigned srcAddrSpace = srcPtrTy->getPointerAddressSpace(); ShadowMemory* shadowMemory; switch (srcAddrSpace) { case AddrSpacePrivate: { shadowMemory = shadowContext.getShadowWorkItem(workItem)->getPrivateMemory(); break; } case AddrSpaceLocal: { shadowMemory = shadowContext.getShadowWorkGroup(workItem->getWorkGroup()) ->getLocalMemory(); break; } case AddrSpaceConstant: // TODO: Constants should always be clean?! return; case AddrSpaceGlobal: shadowMemory = shadowContext.getGlobalMemory(); break; default: FATAL_ERROR("Unsupported addressspace %d", srcAddrSpace); } if (!ShadowContext::isCleanStruct(shadowMemory, srcAddr, structTy)) { logUninitializedWrite(srcAddrSpace, srcAddr); } } void Uninitialized::copyShadowMemory(unsigned dstAddrSpace, size_t dst, unsigned srcAddrSpace, size_t src, unsigned size, const WorkItem* workItem, const WorkGroup* workGroup, bool unchecked) { copyShadowMemoryStrided(dstAddrSpace, dst, srcAddrSpace, src, 1, 1, size, workItem, workGroup, unchecked); } void Uninitialized::copyShadowMemoryStrided( unsigned dstAddrSpace, size_t dst, unsigned srcAddrSpace, size_t src, size_t num, size_t stride, unsigned size, const WorkItem* workItem, const WorkGroup* workGroup, bool unchecked) { TypedValue v = {size, 1, new unsigned char[size]}; for (unsigned i = 0; i < num; i++) { loadShadowMemory(srcAddrSpace, src, v, workItem, workGroup); storeShadowMemory(dstAddrSpace, dst, v, workItem, workGroup, unchecked); src += stride * size; dst += stride * size; } delete[] v.data; } std::string Uninitialized::extractUnmangledName(const std::string fullname) { // Extract unmangled name if (fullname.compare(0, 2, "_Z") == 0) { int len = atoi(fullname.c_str() + 2); int start = fullname.find_first_not_of("0123456789", 2); return fullname.substr(start, len); } else { return fullname; } } ShadowMemory* Uninitialized::getShadowMemory(unsigned addrSpace, const WorkItem* workItem, const WorkGroup* workGroup) const { switch (addrSpace) { case AddrSpacePrivate: { if (!workItem) { FATAL_ERROR("Work item needed to access private memory!"); } return shadowContext.getShadowWorkItem(workItem)->getPrivateMemory(); } case AddrSpaceLocal: { if (!workGroup) { if (!workItem) { FATAL_ERROR("Work item or work group needed to access local memory!"); } workGroup = workItem->getWorkGroup(); } return shadowContext.getShadowWorkGroup(workGroup)->getLocalMemory(); } // case AddrSpaceConstant: // break; case AddrSpaceGlobal: return shadowContext.getGlobalMemory(); default: FATAL_ERROR("Unsupported addressspace %d", addrSpace); } } bool Uninitialized::handleBuiltinFunction(const WorkItem* workItem, string name, const llvm::CallInst* CI, const TypedValue result) { name = extractUnmangledName(name); ShadowValues* shadowValues = shadowContext.getShadowWorkItem(workItem)->getValues(); if (name == "async_work_group_copy" || name == "async_work_group_strided_copy") { int arg = 0; // Get src/dest addresses const llvm::Value* dstOp = CI->getArgOperand(arg++); const llvm::Value* srcOp = CI->getArgOperand(arg++); size_t dst = workItem->getOperand(dstOp).getPointer(); size_t src = workItem->getOperand(srcOp).getPointer(); // Get size of copy unsigned elemSize = getTypeSize(dstOp->getType()->getPointerElementType()); const llvm::Value* numOp = CI->getArgOperand(arg++); uint64_t num = workItem->getOperand(numOp).getUInt(); TypedValue numShadow = shadowContext.getValue(workItem, numOp); if (!ShadowContext::isCleanValue(numShadow)) { logUninitializedIndex(); } // Get stride size_t stride = 1; if (name == "async_work_group_strided_copy") { const llvm::Value* strideOp = CI->getArgOperand(arg++); stride = workItem->getOperand(strideOp).getUInt(); TypedValue strideShadow = shadowContext.getValue(workItem, strideOp); if (!ShadowContext::isCleanValue(strideShadow)) { logUninitializedIndex(); } } const llvm::Value* eventOp = CI->getArgOperand(arg++); TypedValue eventShadow = shadowContext.getValue(workItem, eventOp); // Get type of copy AddressSpace dstAddrSpace = AddrSpaceLocal; AddressSpace srcAddrSpace = AddrSpaceLocal; if (dstOp->getType()->getPointerAddressSpace() == AddrSpaceLocal) { srcAddrSpace = AddrSpaceGlobal; } else { dstAddrSpace = AddrSpaceGlobal; } copyShadowMemoryStrided(dstAddrSpace, dst, srcAddrSpace, src, num, stride, elemSize, workItem); shadowValues->setValue(CI, eventShadow); // Check shadow of src address TypedValue srcShadow = shadowContext.getValue(workItem, srcOp); if (!ShadowContext::isCleanValue(srcShadow)) { logUninitializedAddress(srcAddrSpace, src, false); } // Check shadow of dst address TypedValue dstShadow = shadowContext.getValue(workItem, dstOp); if (!ShadowContext::isCleanValue(dstShadow)) { logUninitializedAddress(dstAddrSpace, dst); } return true; } else if (name == "wait_group_events") { const llvm::Value* Addr = CI->getArgOperand(1); const llvm::Value* Num = CI->getArgOperand(0); uint64_t num = workItem->getOperand(Num).getUInt(); size_t address = workItem->getOperand(Addr).getPointer(); TypedValue numShadow = shadowContext.getValue(workItem, Num); TypedValue eventShadow = {sizeof(size_t), 1, new unsigned char[sizeof(size_t)]}; // Check shadow for the number of events if (!ShadowContext::isCleanValue(numShadow)) { logUninitializedCF(); } for (unsigned i = 0; i < num; ++i) { loadShadowMemory(AddrSpacePrivate, address, eventShadow, workItem); if (!ShadowContext::isCleanValue(eventShadow)) { logUninitializedCF(); delete[] eventShadow.data; return true; } address += sizeof(size_t); } delete[] eventShadow.data; // Check shadow of address TypedValue addrShadow = shadowContext.getValue(workItem, Addr); if (!ShadowContext::isCleanValue(addrShadow)) { logUninitializedAddress(AddrSpacePrivate, address, false); } return true; } else if (name.compare(0, 6, "atomic") == 0) { if (name.compare(6, string::npos, "cmpxchg") == 0) { const llvm::Value* Addr = CI->getArgOperand(0); unsigned addrSpace = Addr->getType()->getPointerAddressSpace(); size_t address = workItem->getOperand(Addr).getPointer(); uint32_t cmp = workItem->getOperand(CI->getArgOperand(1)).getUInt(); uint32_t old = workItem->getOperand(CI).getUInt(); TypedValue argShadow = shadowContext.getValue(workItem, CI->getArgOperand(2)); TypedValue cmpShadow = shadowContext.getValue(workItem, CI->getArgOperand(1)); TypedValue oldShadow = {4, 1, shadowContext.getMemoryPool()->alloc(4)}; // Check shadow of the condition if (!ShadowContext::isCleanValue(cmpShadow)) { logUninitializedCF(); } // Perform cmpxchg if (addrSpace == AddrSpaceGlobal) { shadowContext.getGlobalMemory()->lock(address); } loadShadowMemory(addrSpace, address, oldShadow, workItem); if (old == cmp) { storeShadowMemory(addrSpace, address, argShadow, workItem); } if (addrSpace == AddrSpaceGlobal) { shadowContext.getGlobalMemory()->unlock(address); } shadowValues->setValue(CI, oldShadow); // Check shadow of address TypedValue addrShadow = shadowContext.getValue(workItem, Addr); if (!ShadowContext::isCleanValue(addrShadow)) { logUninitializedAddress(addrSpace, address); } return true; } else { SimpleOrAtomic(workItem, CI); return true; } } else if (name == "fract" || name == "modf" || name == "sincos") { const llvm::Value* Addr = CI->getArgOperand(1); unsigned addrSpace = Addr->getType()->getPointerAddressSpace(); size_t iptr = workItem->getOperand(Addr).getPointer(); TypedValue argShadow = shadowContext.getValue(workItem, CI->getArgOperand(0)); TypedValue newElemShadow; TypedValue newShadow = shadowContext.getMemoryPool()->clone(result); for (unsigned i = 0; i < result.num; ++i) { if (!ShadowContext::isCleanValue(argShadow, i)) { newElemShadow = ShadowContext::getPoisonedValue(result.size); } else { newElemShadow = ShadowContext::getCleanValue(result.size); } memcpy(newShadow.data, newElemShadow.data, result.size); } storeShadowMemory(addrSpace, iptr, newShadow); shadowValues->setValue(CI, newShadow); // Check shadow of address TypedValue addrShadow = shadowContext.getValue(workItem, Addr); if (!ShadowContext::isCleanValue(addrShadow)) { logUninitializedAddress(addrSpace, iptr); } return true; } else if (name == "frexp" || name == "lgamma_r") { const llvm::Value* Addr = CI->getArgOperand(1); unsigned addrSpace = Addr->getType()->getPointerAddressSpace(); size_t iptr = workItem->getOperand(Addr).getPointer(); TypedValue argShadow = shadowContext.getValue(workItem, CI->getArgOperand(0)); TypedValue newElemShadow; TypedValue newElemIntShadow; TypedValue newShadow = shadowContext.getMemoryPool()->clone(result); TypedValue newIntShadow = {newShadow.size, newShadow.num, shadowContext.getMemoryPool()->alloc(4)}; for (unsigned i = 0; i < result.num; ++i) { if (!ShadowContext::isCleanValue(argShadow, i)) { newElemShadow = ShadowContext::getPoisonedValue(result.size); newElemIntShadow = ShadowContext::getPoisonedValue(4); } else { newElemShadow = ShadowContext::getCleanValue(result.size); newElemIntShadow = ShadowContext::getCleanValue(4); } memcpy(newIntShadow.data, newElemIntShadow.data, 4); memcpy(newShadow.data, newElemShadow.data, result.size); } storeShadowMemory(addrSpace, iptr, newIntShadow); shadowValues->setValue(CI, newShadow); // Check shadow of address TypedValue addrShadow = shadowContext.getValue(workItem, Addr); if (!ShadowContext::isCleanValue(addrShadow)) { logUninitializedAddress(addrSpace, iptr); } return true; } else if (name == "remquo") { const llvm::Value* Addr = CI->getArgOperand(2); unsigned addrSpace = Addr->getType()->getPointerAddressSpace(); size_t iptr = workItem->getOperand(Addr).getPointer(); TypedValue arg0Shadow = shadowContext.getValue(workItem, CI->getArgOperand(0)); TypedValue arg1Shadow = shadowContext.getValue(workItem, CI->getArgOperand(1)); TypedValue newElemShadow; TypedValue newShadow = shadowContext.getMemoryPool()->clone(result); for (unsigned i = 0; i < result.num; ++i) { if (!ShadowContext::isCleanValue(arg0Shadow, i) || !ShadowContext::isCleanValue(arg1Shadow, i)) { newElemShadow = ShadowContext::getPoisonedValue(result.size); } else { newElemShadow = ShadowContext::getCleanValue(result.size); } storeShadowMemory(addrSpace, iptr + i * 4, newElemShadow); memcpy(newShadow.data, newElemShadow.data, result.size); } shadowValues->setValue(CI, newShadow); // Check shadow of address TypedValue addrShadow = shadowContext.getValue(workItem, Addr); if (!ShadowContext::isCleanValue(addrShadow)) { logUninitializedAddress(addrSpace, iptr); } return true; } else if (name == "shuffle") { TypedValue mask = workItem->getOperand(CI->getArgOperand(1)); TypedValue maskShadow = shadowContext.getValue(workItem, CI->getArgOperand(1)); TypedValue shadow = shadowContext.getValue(workItem, CI->getArgOperand(0)); TypedValue newShadow = shadowContext.getMemoryPool()->clone(result); for (unsigned i = 0; i < newShadow.num; ++i) { if (!ShadowContext::isCleanValue(maskShadow, i)) { TypedValue v = ShadowContext::getPoisonedValue(newShadow.size); memcpy(newShadow.data + i * newShadow.size, v.data, newShadow.size); } else { size_t srcOffset = (mask.getUInt(i) % shadow.size) * shadow.size; memcpy(newShadow.data + i * newShadow.size, shadow.data + srcOffset, newShadow.size); } } shadowValues->setValue(CI, newShadow); return true; } else if (name == "shuffle2") { TypedValue mask = workItem->getOperand(CI->getArgOperand(2)); TypedValue maskShadow = shadowContext.getValue(workItem, CI->getArgOperand(2)); TypedValue shadow[] = { shadowContext.getValue(workItem, CI->getArgOperand(0)), shadowContext.getValue(workItem, CI->getArgOperand(1))}; TypedValue newShadow = shadowContext.getMemoryPool()->clone(result); for (unsigned i = 0; i < newShadow.num; ++i) { uint64_t m = 1; const llvm::Type* arg0Type = CI->getArgOperand(0)->getType(); if (arg0Type->isVectorTy()) { auto vecType = llvm::cast(arg0Type); m = vecType->getNumElements(); } uint64_t src = 0; uint64_t index = mask.getUInt(i) % (2 * m); if (index >= m) { index -= m; src = 1; } if (!ShadowContext::isCleanValue(maskShadow, i)) { TypedValue v = ShadowContext::getPoisonedValue(newShadow.size); memcpy(newShadow.data + i * newShadow.size, v.data, newShadow.size); } else { size_t srcOffset = index * shadow[src].size; memcpy(newShadow.data + i * newShadow.size, shadow[src].data + srcOffset, newShadow.size); } } shadowValues->setValue(CI, newShadow); return true; } else if (name == "any") { const llvm::Value* argOp = CI->getArgOperand(0); const llvm::Type* argType = argOp->getType(); TypedValue shadow = shadowContext.getValue(workItem, argOp); unsigned num = 1; if (argType->isVectorTy()) { num = llvm::cast(argType)->getNumElements(); } for (unsigned i = 0; i < num; ++i) { if (ShadowContext::isCleanValue(shadow, i)) { shadowValues->setValue(CI, ShadowContext::getCleanValue(result.size)); return true; } } shadowValues->setValue(CI, ShadowContext::getPoisonedValue(result.size)); return true; } else if (name == "select") { TypedValue newShadow = shadowContext.getMemoryPool()->clone(result); TypedValue shadow[] = { shadowContext.getValue(workItem, CI->getArgOperand(0)), shadowContext.getValue(workItem, CI->getArgOperand(1))}; TypedValue selectShadow = shadowContext.getValue(workItem, CI->getArgOperand(2)); for (unsigned i = 0; i < newShadow.num; ++i) { int64_t c = workItem->getOperand(CI->getArgOperand(2)).getSInt(i); uint64_t src = ((newShadow.num > 1) ? c & INT64_MIN : c) ? 1 : 0; if (!ShadowContext::isCleanValue(selectShadow, i)) { TypedValue v = ShadowContext::getPoisonedValue(newShadow.size); memcpy(newShadow.data + i * newShadow.size, v.data, newShadow.size); } else { size_t srcOffset = i * shadow[src].size; memcpy(newShadow.data + i * newShadow.size, shadow[src].data + srcOffset, newShadow.size); } } shadowValues->setValue(CI, newShadow); return true; } else if (name.compare(0, 10, "vload_half") == 0 || name.compare(0, 11, "vloada_half") == 0) { const llvm::Value* BaseOp = CI->getArgOperand(1); const llvm::Value* OffsetOp = CI->getArgOperand(0); size_t base = workItem->getOperand(BaseOp).getPointer(); unsigned int addressSpace = BaseOp->getType()->getPointerAddressSpace(); uint64_t offset = workItem->getOperand(OffsetOp).getUInt(); size_t address; if (name.compare(0, 6, "vloada") == 0 && result.num == 3) { address = base + offset * sizeof(cl_half) * 4; } else { address = base + offset * sizeof(cl_half) * result.num; } TypedValue halfShadow = { sizeof(cl_half), result.num, shadowContext.getMemoryPool()->alloc(2 * result.num)}; TypedValue newShadow = shadowContext.getMemoryPool()->clone(result); loadShadowMemory(addressSpace, address, halfShadow, workItem); TypedValue pv = ShadowContext::getPoisonedValue(newShadow.size); TypedValue cv = ShadowContext::getCleanValue(newShadow.size); // Convert to float shadows for (unsigned i = 0; i < newShadow.num; ++i) { if (!ShadowContext::isCleanValue(halfShadow, i)) { memcpy(newShadow.data + i * newShadow.size, pv.data, newShadow.size); } else { memcpy(newShadow.data + i * newShadow.size, cv.data, newShadow.size); } } shadowValues->setValue(CI, newShadow); // Check shadow of address TypedValue baseShadow = shadowContext.getValue(workItem, BaseOp); TypedValue offsetShadow = shadowContext.getValue(workItem, OffsetOp); if (!ShadowContext::isCleanValue(baseShadow) || !ShadowContext::isCleanValue(offsetShadow)) { logUninitializedAddress(addressSpace, address, false); } return true; } else if (name.compare(0, 11, "vstore_half") == 0 || name.compare(0, 12, "vstorea_half") == 0) { const llvm::Value* value = CI->getArgOperand(0); unsigned size = getTypeSize(value->getType()); if (isVector3(value)) { // 3-element vectors are same size as 4-element vectors, // but vstore address offset shouldn't use this. size = (size / 4) * 3; } const llvm::Value* BaseOp = CI->getArgOperand(2); const llvm::Value* OffsetOp = CI->getArgOperand(1); size_t base = workItem->getOperand(BaseOp).getPointer(); unsigned int addressSpace = BaseOp->getType()->getPointerAddressSpace(); uint64_t offset = workItem->getOperand(OffsetOp).getUInt(); // Convert to halfs TypedValue shadow = shadowContext.getValue(workItem, value); unsigned num = size / sizeof(float); size = num * sizeof(cl_half); TypedValue halfShadow = {sizeof(cl_half), num, shadowContext.getMemoryPool()->alloc(2 * num)}; TypedValue pv = ShadowContext::getPoisonedValue(halfShadow.size); TypedValue cv = ShadowContext::getCleanValue(halfShadow.size); for (unsigned i = 0; i < num; i++) { if (!ShadowContext::isCleanValue(shadow, i)) { memcpy(halfShadow.data + i * halfShadow.size, pv.data, halfShadow.size); } else { memcpy(halfShadow.data + i * halfShadow.size, cv.data, halfShadow.size); } } size_t address; if (name.compare(0, 7, "vstorea") == 0 && num == 3) { address = base + offset * sizeof(cl_half) * 4; } else { address = base + offset * sizeof(cl_half) * num; } storeShadowMemory(addressSpace, address, halfShadow, workItem); // Check shadow of address TypedValue baseShadow = shadowContext.getValue(workItem, BaseOp); TypedValue offsetShadow = shadowContext.getValue(workItem, OffsetOp); if (!ShadowContext::isCleanValue(baseShadow) || !ShadowContext::isCleanValue(offsetShadow)) { logUninitializedAddress(addressSpace, address); } return true; } else if (name.compare(0, 5, "vload") == 0) { TypedValue newShadow = shadowContext.getMemoryPool()->clone(result); const llvm::Value* BaseOp = CI->getArgOperand(1); const llvm::Value* OffsetOp = CI->getArgOperand(0); unsigned int addressSpace = BaseOp->getType()->getPointerAddressSpace(); size_t base = workItem->getOperand(BaseOp).getPointer(); uint64_t offset = workItem->getOperand(OffsetOp).getUInt(); size_t size = newShadow.size * newShadow.num; size_t address = base + offset * size; loadShadowMemory(addressSpace, address, newShadow, workItem); shadowValues->setValue(CI, newShadow); // Check shadow of address TypedValue baseShadow = shadowContext.getValue(workItem, BaseOp); TypedValue offsetShadow = shadowContext.getValue(workItem, OffsetOp); if (!ShadowContext::isCleanValue(baseShadow) || !ShadowContext::isCleanValue(offsetShadow)) { logUninitializedAddress(addressSpace, address, false); } return true; } else if (name.compare(0, 6, "vstore") == 0) { const llvm::Value* value = CI->getArgOperand(0); unsigned size = getTypeSize(value->getType()); if (isVector3(value)) { // 3-element vectors are same size as 4-element vectors, // but vstore address offset shouldn't use this. size = (size / 4) * 3; } const llvm::Value* BaseOp = CI->getArgOperand(2); const llvm::Value* OffsetOp = CI->getArgOperand(1); unsigned int addressSpace = BaseOp->getType()->getPointerAddressSpace(); size_t base = workItem->getOperand(BaseOp).getPointer(); uint64_t offset = workItem->getOperand(OffsetOp).getUInt(); size_t address = base + offset * size; TypedValue shadow = shadowContext.getValue(workItem, value); storeShadowMemory(addressSpace, address, shadow, workItem); // Check shadow of address TypedValue baseShadow = shadowContext.getValue(workItem, BaseOp); TypedValue offsetShadow = shadowContext.getValue(workItem, OffsetOp); if (!ShadowContext::isCleanValue(baseShadow) || !ShadowContext::isCleanValue(offsetShadow)) { logUninitializedAddress(addressSpace, address); } return true; } else if (name == "read_imagef" || name == "read_imagei" || name == "read_imageui") { Image* image = *(Image**)(workItem->getOperand(CI->getArgOperand(0)).data); TypedValue shadowImage = shadowContext.getValue(workItem, CI->getArgOperand(0)); TypedValue newShadow; // FIXME: The new shadow should be loaded from memory // and not generated based on the image description // However, this currently requires to duplicate all functionality // in WorkItemBuiltins.cpp for the image function // Has to be changed in combination with the write functions size_t address = image->address; if (!ShadowContext::isCleanImage(shadowImage)) { newShadow = ShadowContext::getPoisonedValue(result); } else { newShadow = ShadowContext::getCleanValue(result); } shadowValues->setValue(CI, newShadow); // Check image if (!ShadowContext::isCleanImageAddress(shadowImage)) { logUninitializedAddress(AddrSpaceGlobal, address, false); } return true; } else if (name == "write_imagef" || name == "write_imagei" || name == "write_imageui") { Image* image = *(Image**)(workItem->getOperand(CI->getArgOperand(0)).data); TypedValue shadowImage = shadowContext.getValue(workItem, CI->getArgOperand(0)); // FIXME: The actual shadow of the image should be stored to memory // However, this currently requires to duplicate all functionality // in WorkItemBuiltins.cpp for the image function // Has to be changed in combination with the read functions size_t address = image->address; // Check image if (!ShadowContext::isCleanImageAddress(shadowImage)) { logUninitializedAddress(AddrSpaceGlobal, address); } return true; } else if (name.compare(0, 10, "get_image_") == 0) { TypedValue shadowImage = shadowContext.getValue(workItem, CI->getArgOperand(0)); TypedValue newShadow = { result.size, result.num, shadowContext.getMemoryPool()->alloc(result.size * result.num)}; if (name == "get_image_array_size") { newShadow.setUInt(((Image*)shadowImage.data)->desc.image_array_size); } else if (name == "get_image_dim") { newShadow.setUInt(((Image*)shadowImage.data)->desc.image_width, 0); newShadow.setUInt(((Image*)shadowImage.data)->desc.image_height, 1); if (newShadow.num > 2) { newShadow.setUInt(((Image*)shadowImage.data)->desc.image_depth, 2); newShadow.setUInt(0, 3); } } else if (name == "get_image_depth") { newShadow.setUInt(((Image*)shadowImage.data)->desc.image_depth); } else if (name == "get_image_height") { newShadow.setUInt(((Image*)shadowImage.data)->desc.image_height); } else if (name == "get_image_width") { newShadow.setUInt(((Image*)shadowImage.data)->desc.image_width); } else if (name == "get_image_channel_order") { newShadow.setUInt(((Image*)shadowImage.data)->format.image_channel_order); } else if (name == "get_image_channel_data_type") { newShadow.setUInt( ((Image*)shadowImage.data)->format.image_channel_data_type); } shadowValues->setValue(CI, newShadow); return true; } return false; } void Uninitialized::handleIntrinsicInstruction(const WorkItem* workItem, const llvm::IntrinsicInst* I) { switch (I->getIntrinsicID()) { case llvm::Intrinsic::fmuladd: #if LLVM_VERSION >= 120 case llvm::Intrinsic::smax: case llvm::Intrinsic::smin: case llvm::Intrinsic::umax: case llvm::Intrinsic::umin: #endif { SimpleOr(workItem, I); break; } case llvm::Intrinsic::memcpy: { const llvm::MemCpyInst* memcpyInst = (const llvm::MemCpyInst*)I; const llvm::Value* dstOp = memcpyInst->getDest(); const llvm::Value* srcOp = memcpyInst->getSource(); size_t dst = workItem->getOperand(dstOp).getPointer(); size_t src = workItem->getOperand(srcOp).getPointer(); size_t size = workItem->getOperand(memcpyInst->getLength()).getUInt(); unsigned dstAddrSpace = memcpyInst->getDestAddressSpace(); unsigned srcAddrSpace = memcpyInst->getSourceAddressSpace(); const llvm::PointerType* srcPtrTy = llvm::dyn_cast(memcpyInst->getSource()->getType()); if (dstAddrSpace != AddrSpacePrivate && srcPtrTy->getElementType()->isStructTy()) { checkStructMemcpy(workItem, memcpyInst->getSource()); } copyShadowMemory(dstAddrSpace, dst, srcAddrSpace, src, size, workItem, NULL, true); // Check shadow of src address TypedValue srcShadow = shadowContext.getValue(workItem, srcOp); if (!ShadowContext::isCleanValue(srcShadow)) { logUninitializedAddress(srcAddrSpace, src, false); } // Check shadow of dst address TypedValue dstShadow = shadowContext.getValue(workItem, dstOp); if (!ShadowContext::isCleanValue(dstShadow)) { logUninitializedAddress(dstAddrSpace, dst); } break; } case llvm::Intrinsic::bswap: { // TODO: byte-level accuracy SimpleOr(workItem, I); break; } case llvm::Intrinsic::memset: { const llvm::MemSetInst* memsetInst = (const llvm::MemSetInst*)I; const llvm::Value* Addr = memsetInst->getDest(); size_t dst = workItem->getOperand(Addr).getPointer(); unsigned size = workItem->getOperand(memsetInst->getLength()).getUInt(); unsigned addrSpace = memsetInst->getDestAddressSpace(); TypedValue shadowValue = {size, 1, new unsigned char[size]}; memset( shadowValue.data, shadowContext.getValue(workItem, memsetInst->getArgOperand(1)).getUInt(), size); storeShadowMemory(addrSpace, dst, shadowValue, workItem, NULL, true); delete[] shadowValue.data; // Check shadow of address TypedValue addrShadow = shadowContext.getValue(workItem, Addr); if (!ShadowContext::isCleanValue(addrShadow)) { logUninitializedAddress(addrSpace, dst); } break; } case llvm::Intrinsic::dbg_declare: // Do nothing break; case llvm::Intrinsic::dbg_value: // Do nothing break; case llvm::Intrinsic::lifetime_end: // Do nothing break; case llvm::Intrinsic::lifetime_start: // Do nothing break; default: FATAL_ERROR("Unsupported intrinsic %s", llvm::Intrinsic::getName(I->getIntrinsicID()).data()); } } void Uninitialized::hostMemoryStore(const Memory* memory, size_t address, size_t size, const uint8_t* storeData) { if (memory->getAddressSpace() == AddrSpaceGlobal) { TypedValue v = ShadowContext::getCleanValue(size); allocAndStoreShadowMemory(AddrSpaceGlobal, address, v); } } void Uninitialized::instructionExecuted(const WorkItem* workItem, const llvm::Instruction* instruction, const TypedValue& result) { #ifdef DUMP_SHADOW cout << "++++++++++++++++++++++++++++++++++++++++++++" << endl; instruction->dump(); #endif ShadowWorkItem* shadowWorkItem = shadowContext.getShadowWorkItem(workItem); ShadowValues* shadowValues = shadowWorkItem->getValues(); switch (instruction->getOpcode()) { case llvm::Instruction::Add: { VectorOr(workItem, instruction); break; } case llvm::Instruction::Alloca: { const llvm::AllocaInst* allocaInst = ((const llvm::AllocaInst*)instruction); size_t address = result.getPointer(); shadowValues->setValue(instruction, ShadowContext::getCleanValue(instruction)); TypedValue v = ShadowContext::getPoisonedValue(allocaInst->getAllocatedType()); allocAndStoreShadowMemory(AddrSpacePrivate, address, v, workItem); break; } case llvm::Instruction::And: { VectorOr(workItem, instruction); break; } case llvm::Instruction::AShr: { TypedValue S0 = shadowContext.getValue(workItem, instruction->getOperand(0)); TypedValue S1 = shadowContext.getValue(workItem, instruction->getOperand(1)); if (!ShadowContext::isCleanValue(S1)) { shadowValues->setValue(instruction, ShadowContext::getPoisonedValue(instruction)); } else { TypedValue newShadow = shadowContext.getMemoryPool()->clone(result); TypedValue Shift = workItem->getOperand(instruction->getOperand(1)); uint64_t shiftMask = (S0.num > 1 ? S0.size : max((size_t)S0.size, sizeof(uint32_t))) * 8 - 1; for (unsigned i = 0; i < S0.num; i++) { newShadow.setUInt(S0.getSInt(i) >> (Shift.getUInt(i) & shiftMask), i); } shadowValues->setValue(instruction, newShadow); } break; } case llvm::Instruction::BitCast: { TypedValue shadow = shadowContext.getValue(workItem, instruction->getOperand(0)); TypedValue newShadow = shadowContext.getMemoryPool()->clone(result); memcpy(newShadow.data, shadow.data, newShadow.size * newShadow.num); shadowValues->setValue(instruction, newShadow); break; } case llvm::Instruction::Br: { checkAllOperandsDefined(workItem, instruction); #ifdef DUMP_SHADOW // Insert pseudo value to keep numbering shadowValues->setValue(instruction, ShadowContext::getCleanValue(3)); #endif break; } case llvm::Instruction::Call: { const llvm::CallInst* callInst = ((const llvm::CallInst*)instruction); const llvm::Function* function = callInst->getCalledFunction(); // Check for indirect function calls if (!function) { // Resolve indirect function pointer const llvm::Value* func = callInst->getCalledOperand(); const llvm::Value* funcPtr = ((const llvm::User*)func)->getOperand(0); function = (const llvm::Function*)funcPtr; } // For inline asm, do the usual thing: check argument shadow and mark all // outputs as clean. Note that any side effects of the inline asm that are // not immediately visible in its constraints are not handled. if (callInst->isInlineAsm()) { checkAllOperandsDefined(workItem, instruction); shadowValues->setValue(instruction, ShadowContext::getCleanValue(instruction)); break; } if (const llvm::IntrinsicInst* II = llvm::dyn_cast(instruction)) { handleIntrinsicInstruction(workItem, II); break; } if (function->isDeclaration()) { if (!handleBuiltinFunction(workItem, function->getName().str(), callInst, result)) { // Handle external function calls checkAllOperandsDefined(workItem, instruction); if (callInst->getType()->isSized()) { // Set return value only if function is non-void shadowValues->setValue(instruction, ShadowContext::getCleanValue(instruction)); } } break; } assert(!function->isVarArg() && "Variadic functions are not supported!"); assert(!llvm::isa(instruction) && "intrinsics are handled elsewhere"); // Fresh values for function ShadowFrame* values = shadowValues->createCleanShadowFrame(); llvm::Function::const_arg_iterator argItr; for (argItr = function->arg_begin(); argItr != function->arg_end(); argItr++) { const llvm::Value* Val = callInst->getArgOperand(argItr->getArgNo()); if (!Val->getType()->isSized()) { continue; } if (argItr->hasByValAttr()) { assert(Val->getType()->isPointerTy() && "ByVal argument is not a pointer!"); // Make new copy of shadow in private memory size_t origShadowAddress = workItem->getOperand(Val).getPointer(); size_t newShadowAddress = workItem->getOperand(&*argItr).getPointer(); ShadowMemory* mem = shadowWorkItem->getPrivateMemory(); unsigned char* origShadowData = (unsigned char*)mem->getPointer(origShadowAddress); size_t size = getTypeSize(argItr->getType()->getPointerElementType()); // Set new shadow memory TypedValue v = ShadowContext::getCleanValue(size); memcpy(v.data, origShadowData, size); allocAndStoreShadowMemory(AddrSpacePrivate, newShadowAddress, v, workItem); values->setValue(&*argItr, ShadowContext::getCleanValue(&*argItr)); } else { TypedValue newShadow = shadowContext.getMemoryPool()->clone( shadowContext.getValue(workItem, Val)); values->setValue(&*argItr, newShadow); } } // Now, get the shadow for the RetVal. if (callInst->getType()->isSized()) { values->setCall(callInst); } shadowValues->pushFrame(values); break; } case llvm::Instruction::ExtractElement: { const llvm::ExtractElementInst* extractInst = ((const llvm::ExtractElementInst*)instruction); TypedValue indexShadow = shadowContext.getValue(workItem, extractInst->getIndexOperand()); if (!ShadowContext::isCleanValue(indexShadow)) { logUninitializedIndex(); } TypedValue vectorShadow = shadowContext.getValue(workItem, extractInst->getVectorOperand()); TypedValue newShadow = shadowContext.getMemoryPool()->clone(result); unsigned index = workItem->getOperand(extractInst->getIndexOperand()).getUInt(); memcpy(newShadow.data, vectorShadow.data + newShadow.size * index, newShadow.size); shadowValues->setValue(instruction, newShadow); break; } case llvm::Instruction::ExtractValue: { const llvm::ExtractValueInst* extractInst = ((const llvm::ExtractValueInst*)instruction); const llvm::Value* Agg = extractInst->getAggregateOperand(); TypedValue ResShadow = shadowContext.getMemoryPool()->clone(result); llvm::ArrayRef indices = extractInst->getIndices(); // Compute offset for target value int offset = 0; const llvm::Type* type = Agg->getType(); for (unsigned i = 0; i < indices.size(); i++) { if (type->isArrayTy()) { type = type->getArrayElementType(); offset += getTypeSize(type) * indices[i]; } else if (type->isStructTy()) { offset += getStructMemberOffset((const llvm::StructType*)type, indices[i]); type = type->getStructElementType(indices[i]); } else { FATAL_ERROR("Unsupported aggregate type: %d", type->getTypeID()) } } // Copy target value to result memcpy(ResShadow.data, shadowContext.getValue(workItem, Agg).data + offset, getTypeSize(type)); shadowValues->setValue(instruction, ResShadow); break; } case llvm::Instruction::FAdd: { VectorOr(workItem, instruction); break; } case llvm::Instruction::FCmp: { VectorOr(workItem, instruction); break; } case llvm::Instruction::FDiv: { VectorOr(workItem, instruction); break; } case llvm::Instruction::FMul: { VectorOr(workItem, instruction); break; } case llvm::Instruction::FNeg: { VectorOr(workItem, instruction); break; } case llvm::Instruction::FPExt: { SimpleOr(workItem, instruction); break; } case llvm::Instruction::FPToSI: { VectorOr(workItem, instruction); break; } case llvm::Instruction::FPToUI: { VectorOr(workItem, instruction); break; } case llvm::Instruction::FPTrunc: { SimpleOr(workItem, instruction); break; } case llvm::Instruction::FRem: { VectorOr(workItem, instruction); break; } case llvm::Instruction::FSub: { VectorOr(workItem, instruction); break; } case llvm::Instruction::GetElementPtr: { SimpleOr(workItem, instruction); break; } case llvm::Instruction::ICmp: { VectorOr(workItem, instruction); break; } case llvm::Instruction::InsertElement: { TypedValue indexShadow = shadowContext.getValue(workItem, instruction->getOperand(2)); if (!ShadowContext::isCleanValue(indexShadow)) { logUninitializedIndex(); } TypedValue vectorShadow = shadowContext.getValue(workItem, instruction->getOperand(0)); TypedValue elementShadow = shadowContext.getValue(workItem, instruction->getOperand(1)); TypedValue newShadow = shadowContext.getMemoryPool()->clone(result); unsigned index = workItem->getOperand(instruction->getOperand(2)).getUInt(); memcpy(newShadow.data, vectorShadow.data, newShadow.size * newShadow.num); memcpy(newShadow.data + index * newShadow.size, elementShadow.data, newShadow.size); shadowValues->setValue(instruction, newShadow); break; } case llvm::Instruction::InsertValue: { const llvm::InsertValueInst* insertInst = (const llvm::InsertValueInst*)instruction; TypedValue newShadow = shadowContext.getMemoryPool()->clone(result); // Load original aggregate data const llvm::Value* agg = insertInst->getAggregateOperand(); memcpy(newShadow.data, shadowContext.getValue(workItem, agg).data, newShadow.size * newShadow.num); // Compute offset for inserted value int offset = 0; llvm::ArrayRef indices = insertInst->getIndices(); const llvm::Type* type = agg->getType(); for (unsigned i = 0; i < indices.size(); i++) { if (type->isArrayTy()) { type = type->getArrayElementType(); offset += getTypeSize(type) * indices[i]; } else if (type->isStructTy()) { offset += getStructMemberOffset((const llvm::StructType*)type, indices[i]); type = type->getStructElementType(indices[i]); } else { FATAL_ERROR("Unsupported aggregate type: %d", type->getTypeID()) } } // Copy inserted value into result const llvm::Value* value = insertInst->getInsertedValueOperand(); memcpy(newShadow.data + offset, shadowContext.getValue(workItem, value).data, getTypeSize(value->getType())); shadowValues->setValue(instruction, newShadow); break; } case llvm::Instruction::IntToPtr: { TypedValue shadow = shadowContext.getValue(workItem, instruction->getOperand(0)); TypedValue newShadow = shadowContext.getMemoryPool()->clone(result); for (unsigned i = 0; i < newShadow.num; i++) { newShadow.setPointer(shadow.getUInt(i), i); } shadowValues->setValue(instruction, newShadow); break; } case llvm::Instruction::Load: { assert(instruction->getType()->isSized() && "Load type must have size"); const llvm::LoadInst* loadInst = ((const llvm::LoadInst*)instruction); const llvm::Value* Addr = loadInst->getPointerOperand(); size_t address = workItem->getOperand(Addr).getPointer(); unsigned addrSpace = loadInst->getPointerAddressSpace(); TypedValue v = shadowContext.getMemoryPool()->clone(result); loadShadowMemory(addrSpace, address, v, workItem); shadowValues->setValue(instruction, v); // Check shadow of address TypedValue addrShadow = shadowContext.getValue(workItem, Addr); if (!ShadowContext::isCleanValue(addrShadow)) { logUninitializedAddress(addrSpace, address, false); } // if (I.isAtomic()) // I.setOrdering(addAcquireOrdering(I.getOrdering())); break; } case llvm::Instruction::LShr: { TypedValue S0 = shadowContext.getValue(workItem, instruction->getOperand(0)); TypedValue S1 = shadowContext.getValue(workItem, instruction->getOperand(1)); if (!ShadowContext::isCleanValue(S1)) { shadowValues->setValue(instruction, ShadowContext::getPoisonedValue(instruction)); } else { TypedValue newShadow = shadowContext.getMemoryPool()->clone(result); TypedValue Shift = workItem->getOperand(instruction->getOperand(1)); uint64_t shiftMask = (S0.num > 1 ? S0.size : max((size_t)S0.size, sizeof(uint32_t))) * 8 - 1; for (unsigned i = 0; i < S0.num; i++) { newShadow.setUInt(S0.getUInt(i) >> (Shift.getUInt(i) & shiftMask), i); } shadowValues->setValue(instruction, newShadow); } break; } case llvm::Instruction::Mul: { VectorOr(workItem, instruction); break; } case llvm::Instruction::Or: { VectorOr(workItem, instruction); break; } case llvm::Instruction::PHI: { const llvm::PHINode* phiNode = (const llvm::PHINode*)instruction; const llvm::Value* value = phiNode->getIncomingValueForBlock(workItem->getPreviousBlock()); TypedValue shadowValue = shadowContext.getValue(workItem, value); shadowValues->setValue(instruction, shadowValue); break; } case llvm::Instruction::PtrToInt: { TypedValue shadow = shadowContext.getValue(workItem, instruction->getOperand(0)); TypedValue newShadow = shadowContext.getMemoryPool()->clone(result); for (unsigned i = 0; i < newShadow.num; i++) { newShadow.setUInt(shadow.getPointer(i), i); } shadowValues->setValue(instruction, newShadow); break; } case llvm::Instruction::Ret: { const llvm::ReturnInst* retInst = ((const llvm::ReturnInst*)instruction); const llvm::Value* RetVal = retInst->getReturnValue(); if (RetVal) { // Value *ShadowPtr = getValuePtrForRetval(RetVal, IRB); // if (CheckReturnValue) { // insertShadowCheck(RetVal, &I); // Value *Shadow = getCleanValue(RetVal); // IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment); //} else { TypedValue retValShadow = shadowContext.getMemoryPool()->clone( shadowContext.getValue(workItem, RetVal)); const llvm::CallInst* callInst = shadowValues->getCall(); shadowValues->popFrame(); shadowValues->setValue(callInst, retValShadow); //} } else { #ifdef DUMP_SHADOW // Insert pseudo value to keep numbering shadowValues->setValue(instruction, ShadowContext::getCleanValue(3)); #endif shadowValues->popFrame(); } break; } case llvm::Instruction::SDiv: { VectorOr(workItem, instruction); break; } case llvm::Instruction::Select: { const llvm::SelectInst* selectInst = (const llvm::SelectInst*)instruction; TypedValue opCondition = workItem->getOperand(selectInst->getCondition()); TypedValue conditionShadow = shadowContext.getValue(workItem, selectInst->getCondition()); TypedValue newShadow; if (!ShadowContext::isCleanValue(conditionShadow)) { newShadow = ShadowContext::getPoisonedValue(instruction); } else { newShadow = shadowContext.getMemoryPool()->clone(result); for (unsigned i = 0; i < result.num; i++) { const bool cond = selectInst->getCondition()->getType()->isVectorTy() ? opCondition.getUInt(i) : opCondition.getUInt(); const llvm::Value* op = cond ? selectInst->getTrueValue() : selectInst->getFalseValue(); memcpy(newShadow.data + i * newShadow.size, shadowContext.getValue(workItem, op).data + i * newShadow.size, newShadow.size); } } shadowValues->setValue(instruction, newShadow); break; } case llvm::Instruction::SExt: { const llvm::Value* operand = instruction->getOperand(0); TypedValue shadow = shadowContext.getValue(workItem, operand); TypedValue newShadow = shadowContext.getMemoryPool()->clone(result); for (unsigned i = 0; i < newShadow.num; i++) { int64_t val = shadow.getSInt(i); if (operand->getType()->getPrimitiveSizeInBits() == 1) { val = val ? -1 : 0; } newShadow.setSInt(val, i); } shadowValues->setValue(instruction, newShadow); break; } case llvm::Instruction::Shl: { TypedValue S0 = shadowContext.getValue(workItem, instruction->getOperand(0)); TypedValue S1 = shadowContext.getValue(workItem, instruction->getOperand(1)); if (!ShadowContext::isCleanValue(S1)) { shadowValues->setValue(instruction, ShadowContext::getPoisonedValue(instruction)); } else { TypedValue newShadow = shadowContext.getMemoryPool()->clone(result); TypedValue Shift = workItem->getOperand(instruction->getOperand(1)); uint64_t shiftMask = (S0.num > 1 ? S0.size : max((size_t)S0.size, sizeof(uint32_t))) * 8 - 1; for (unsigned i = 0; i < S0.num; i++) { newShadow.setUInt(S0.getUInt(i) << (Shift.getUInt(i) & shiftMask), i); } shadowValues->setValue(instruction, newShadow); } break; } case llvm::Instruction::ShuffleVector: { const llvm::ShuffleVectorInst* shuffleInst = (const llvm::ShuffleVectorInst*)instruction; const llvm::Value* v1 = shuffleInst->getOperand(0); const llvm::Value* v2 = shuffleInst->getOperand(1); TypedValue newShadow = ShadowContext::getCleanValue(result); TypedValue pv = ShadowContext::getPoisonedValue(newShadow.size); unsigned num = llvm::cast(v1->getType())->getNumElements(); for (unsigned i = 0; i < newShadow.num; i++) { int index = shuffleInst->getMaskValue(i); if (index == llvm::Value::UndefValueVal) { // Undef value are poisoned memcpy(newShadow.data + i * newShadow.size, pv.data, newShadow.size); continue; } const llvm::Value* src = v1; if (index >= num) { index -= num; src = v2; } TypedValue v = shadowContext.getValue(workItem, src); size_t srcOffset = index * newShadow.size; memcpy(newShadow.data + i * newShadow.size, v.data + srcOffset, newShadow.size); } shadowValues->setValue(instruction, newShadow); break; } case llvm::Instruction::SIToFP: { VectorOr(workItem, instruction); break; } case llvm::Instruction::SRem: { VectorOr(workItem, instruction); break; } case llvm::Instruction::Store: { PARANOID_CHECK(workItem, instruction); const llvm::StoreInst* storeInst = ((const llvm::StoreInst*)instruction); const llvm::Value* Val = storeInst->getValueOperand(); const llvm::Value* Addr = storeInst->getPointerOperand(); size_t address = workItem->getOperand(Addr).getPointer(); unsigned addrSpace = storeInst->getPointerAddressSpace(); TypedValue shadowVal = storeInst->isAtomic() ? ShadowContext::getCleanValue(Val) : shadowContext.getValue(workItem, Val); storeShadowMemory(addrSpace, address, shadowVal, workItem); // Check shadow of address TypedValue addrShadow = shadowContext.getValue(workItem, Addr); if (!ShadowContext::isCleanValue(addrShadow)) { logUninitializedAddress(addrSpace, address); } break; } case llvm::Instruction::Sub: { VectorOr(workItem, instruction); break; } case llvm::Instruction::Switch: { checkAllOperandsDefined(workItem, instruction); #ifdef DUMP_SHADOW // Insert pseudo value to keep numbering shadowValues->setValue(instruction, ShadowContext::getCleanValue(3)); #endif break; } case llvm::Instruction::Trunc: { TypedValue shadow = shadowContext.getValue(workItem, instruction->getOperand(0)); TypedValue newShadow = shadowContext.getMemoryPool()->clone(result); for (unsigned i = 0; i < newShadow.num; i++) { memcpy(newShadow.data + i * newShadow.size, shadow.data + i * shadow.size, newShadow.size); } shadowValues->setValue(instruction, newShadow); break; } case llvm::Instruction::UDiv: { VectorOr(workItem, instruction); break; } case llvm::Instruction::UIToFP: { VectorOr(workItem, instruction); break; } case llvm::Instruction::URem: { VectorOr(workItem, instruction); break; } case llvm::Instruction::Unreachable: FATAL_ERROR("Encountered unreachable instruction"); case llvm::Instruction::Xor: { VectorOr(workItem, instruction); break; } case llvm::Instruction::ZExt: { TypedValue shadow = shadowContext.getValue(workItem, instruction->getOperand(0)); TypedValue newShadow = shadowContext.getMemoryPool()->clone(result); for (unsigned i = 0; i < newShadow.num; i++) { newShadow.setUInt(shadow.getUInt(i), i); } shadowValues->setValue(instruction, newShadow); break; } default: FATAL_ERROR("Unsupported instruction: %s", instruction->getOpcodeName()); } #ifdef DUMP_SHADOW if (shadowContext.hasValue(workItem, instruction)) { cout << shadowContext.getValue(workItem, instruction) << endl; } #endif } void Uninitialized::kernelBegin(const KernelInvocation* kernelInvocation) { const Kernel* kernel = kernelInvocation->getKernel(); // Initialise kernel arguments and global variables for (auto value = kernel->values_begin(); value != kernel->values_end(); value++) { const llvm::Type* type = value->first->getType(); if (!type->isSized()) { continue; } if (type->isPointerTy()) { switch (type->getPointerAddressSpace()) { case AddrSpaceConstant: { // Constants // value->second.data == ptr // value->second.size == ptr size TypedValue cleanValue = m_pool.clone(ShadowContext::getCleanValue(value->first)); shadowContext.setGlobalValue(value->first, cleanValue); const llvm::Type* elementTy = type->getPointerElementType(); allocAndStoreShadowMemory(AddrSpaceConstant, value->second.getPointer(), ShadowContext::getCleanValue(elementTy)); break; } case AddrSpaceGlobal: { // Global pointer kernel arguments // value->second.data == ptr // value->second.size == ptr size size_t address = value->second.getPointer(); if (m_context->getGlobalMemory()->isAddressValid(address) && !shadowContext.getGlobalMemory()->isAddressValid(address)) { // Allocate poisoned global memory if there was no host store size_t size = m_context->getGlobalMemory()->getBuffer(address)->size; allocAndStoreShadowMemory(AddrSpaceGlobal, address, ShadowContext::getPoisonedValue(size), NULL, NULL, true); } m_deferredInit.push_back(*value); break; } case AddrSpaceLocal: { // Local pointer kernel arguments and local data variables // value->second.data == NULL // value->second.size == val size if (llvm::isa(value->first)) { // Arguments have a private pointer m_deferredInit.push_back(*value); } else { // Variables have a global pointer TypedValue cleanValue = m_pool.clone(ShadowContext::getCleanValue(value->first)); shadowContext.setGlobalValue(value->first, cleanValue); } m_deferredInitGroup.push_back(*value); break; } case AddrSpacePrivate: { const llvm::Argument* A = llvm::dyn_cast(value->first); if (A && A->hasByValAttr()) { // ByVal kernel argument // value->second.data == val // value->second.size == val size m_deferredInit.push_back(*value); } else { // Private struct/Union definitions with global type // value->second.data == val // value->second.size == val size m_deferredInit.push_back(*value); TypedValue cleanValue = m_pool.clone(ShadowContext::getCleanValue(value->first)); // TODO: Structs can have posioned padding bytes. Is this important? shadowContext.setGlobalValue(value->first, cleanValue); } break; } default: FATAL_ERROR("Unsupported addressspace %d", type->getPointerAddressSpace()); } } else { // Non pointer type kernel arguments // value->second.data == val // value->second.size == val size m_deferredInit.push_back(*value); } } } void Uninitialized::kernelEnd(const KernelInvocation* kernelInvocation) { m_deferredInit.clear(); m_deferredInitGroup.clear(); shadowContext.clearGlobalValues(); } void Uninitialized::loadShadowMemory(unsigned addrSpace, size_t address, TypedValue& SM, const WorkItem* workItem, const WorkGroup* workGroup) { if (addrSpace == AddrSpaceConstant) { // TODO: Eventually load value memset(SM.data, 0, SM.size * SM.num); return; } ShadowMemory* memory = getShadowMemory(addrSpace, workItem, workGroup); memory->load(SM.data, address, SM.size * SM.num); #ifdef DUMP_SHADOW cout << "Loaded " << hex << SM << " from space " << dec << addrSpace << " at address " << hex << address << endl; #endif } void Uninitialized::logUninitializedAddress(unsigned int addrSpace, size_t address, bool write) const { Context::Message msg(WARNING, m_context); msg << "Uninitialized address used to " << (write ? "write to " : "read from ") << getAddressSpaceName(addrSpace) << " memory address 0x" << hex << address << endl << msg.INDENT << "Kernel: " << msg.CURRENT_KERNEL << endl << "Entity: " << msg.CURRENT_ENTITY << endl << msg.CURRENT_LOCATION << endl; msg.send(); } void Uninitialized::logUninitializedCF() const { Context::Message msg(WARNING, m_context); msg << "Controlflow depends on uninitialized value" << endl << msg.INDENT << "Kernel: " << msg.CURRENT_KERNEL << endl << "Entity: " << msg.CURRENT_ENTITY << endl << msg.CURRENT_LOCATION << endl; msg.send(); } void Uninitialized::logUninitializedIndex() const { Context::Message msg(WARNING, m_context); msg << "Instruction depends on an uninitialized index value" << endl << msg.INDENT << "Kernel: " << msg.CURRENT_KERNEL << endl << "Entity: " << msg.CURRENT_ENTITY << endl << msg.CURRENT_LOCATION << endl; msg.send(); } void Uninitialized::logUninitializedWrite(unsigned int addrSpace, size_t address) const { Context::Message msg(WARNING, m_context); msg << "Uninitialized value written to " << getAddressSpaceName(addrSpace) << " memory address 0x" << hex << address << endl << msg.INDENT << "Kernel: " << msg.CURRENT_KERNEL << endl << "Entity: " << msg.CURRENT_ENTITY << endl << msg.CURRENT_LOCATION << endl; msg.send(); } void Uninitialized::memoryMap(const Memory* memory, size_t address, size_t offset, size_t size, cl_map_flags flags) { if (!(flags & CL_MAP_READ)) { allocAndStoreShadowMemory(memory->getAddressSpace(), address + offset, ShadowContext::getCleanValue(size)); } } void Uninitialized::VectorOr(const WorkItem* workItem, const llvm::Instruction* I) { PARANOID_CHECK(workItem, I); ShadowValues* shadowValues = shadowContext.getShadowWorkItem(workItem)->getValues(); TypedValue newShadow = ShadowContext::getCleanValue(I); for (llvm::Instruction::const_op_iterator OI = I->op_begin(); OI != I->op_end(); ++OI) { ShadowContext::shadowOr(newShadow, shadowContext.getValue(workItem, OI->get())); } shadowValues->setValue(I, newShadow); } void Uninitialized::SimpleOr(const WorkItem* workItem, const llvm::Instruction* I) { PARANOID_CHECK(workItem, I); ShadowValues* shadowValues = shadowContext.getShadowWorkItem(workItem)->getValues(); for (llvm::Instruction::const_op_iterator OI = I->op_begin(); OI != I->op_end(); ++OI) { if (!ShadowContext::isCleanValue( shadowContext.getValue(workItem, OI->get()))) { shadowValues->setValue(I, ShadowContext::getPoisonedValue(I)); return; } } shadowValues->setValue(I, ShadowContext::getCleanValue(I)); } void Uninitialized::SimpleOrAtomic(const WorkItem* workItem, const llvm::CallInst* CI) { const llvm::Value* Addr = CI->getArgOperand(0); unsigned addrSpace = Addr->getType()->getPointerAddressSpace(); size_t address = workItem->getOperand(Addr).getPointer(); TypedValue oldShadow = {4, 1, shadowContext.getMemoryPool()->alloc(4)}; TypedValue newShadow = ShadowContext::getCleanValue(4); if (addrSpace == AddrSpaceGlobal) { shadowContext.getGlobalMemory()->lock(address); } loadShadowMemory(addrSpace, address, oldShadow, workItem); if (!ShadowContext::isCleanValue(oldShadow)) { newShadow = ShadowContext::getPoisonedValue(4); } if (CI->getNumArgOperands() > 1) { TypedValue argShadow = shadowContext.getValue(workItem, CI->getArgOperand(1)); if (!ShadowContext::isCleanValue(argShadow)) { newShadow = ShadowContext::getPoisonedValue(4); } } storeShadowMemory(addrSpace, address, newShadow, workItem); if (addrSpace == AddrSpaceGlobal) { shadowContext.getGlobalMemory()->unlock(address); } ShadowValues* shadowValues = shadowContext.getShadowWorkItem(workItem)->getValues(); shadowValues->setValue(CI, oldShadow); // Check shadow of address TypedValue addrShadow = shadowContext.getValue(workItem, Addr); if (!ShadowContext::isCleanValue(addrShadow)) { logUninitializedAddress(addrSpace, address); } } void Uninitialized::storeShadowMemory(unsigned addrSpace, size_t address, TypedValue SM, const WorkItem* workItem, const WorkGroup* workGroup, bool unchecked) { #ifdef DUMP_SHADOW cout << "Store " << hex << SM << " to space " << dec << addrSpace << " at address " << hex << address << endl; #endif if (!unchecked && addrSpace != AddrSpacePrivate && !ShadowContext::isCleanValue(SM)) { #ifdef DUMP_SHADOW shadowContext.dump(workItem); #endif logUninitializedWrite(addrSpace, address); } if (addrSpace == AddrSpaceConstant) { // TODO: Eventually store value return; } ShadowMemory* memory = getShadowMemory(addrSpace, workItem, workGroup); memory->store(SM.data, address, SM.size * SM.num); } void Uninitialized::workItemBegin(const WorkItem* workItem) { shadowContext.createMemoryPool(); shadowContext.allocateWorkItems(); ShadowWorkItem* shadowWI = shadowContext.createShadowWorkItem(workItem); ShadowValues* shadowValues = shadowWI->getValues(); for (auto value : m_deferredInit) { const llvm::Type* type = value.first->getType(); if (type->isPointerTy()) { switch (type->getPointerAddressSpace()) { case AddrSpaceGlobal: { // Global pointer kernel arguments // value.second.data == ptr // value.second.size == ptr size shadowValues->setValue(value.first, ShadowContext::getCleanValue(type)); break; } case AddrSpaceLocal: { // Local pointer kernel arguments // value.second.data == NULL // value.second.size == val size shadowValues->setValue(value.first, ShadowContext::getCleanValue(value.first)); break; } case AddrSpacePrivate: { const llvm::Argument* A = llvm::dyn_cast(value.first); if (A && A->hasByValAttr()) { // ByVal kernel argument // value.second.data == val // value.second.size == val size size_t address = workItem->getOperand(value.first).getPointer(); TypedValue cleanValue = ShadowContext::getCleanValue(value.second.size); allocAndStoreShadowMemory(AddrSpacePrivate, address, cleanValue, workItem); shadowValues->setValue(value.first, ShadowContext::getCleanValue(value.first)); } else { // Private struct/Union definitions with global type // value.second.data == NULL // value.second.size == val size size_t address = workItem->getOperand(value.first).getPointer(); TypedValue cleanValue = ShadowContext::getCleanValue(value.second.size); allocAndStoreShadowMemory(AddrSpacePrivate, address, cleanValue, workItem); } break; } } } else { // Non pointer type kernel arguments // value->second.data == val // value->second.size == val size shadowValues->setValue(value.first, ShadowContext::getCleanValue(value.first)); } } } void Uninitialized::workItemComplete(const WorkItem* workItem) { shadowContext.destroyShadowWorkItem(workItem); shadowContext.freeWorkItems(); shadowContext.destroyMemoryPool(); } void Uninitialized::workGroupBegin(const WorkGroup* workGroup) { shadowContext.createMemoryPool(); shadowContext.allocateWorkGroups(); shadowContext.createShadowWorkGroup(workGroup); for (auto value : m_deferredInitGroup) { // Local data variables // value->second.data == NULL // value->second.size == val size size_t address = workGroup->getLocalMemoryAddress(value.first); TypedValue v; if (llvm::isa(value.first)) { // TODO: Local memory clean or poisoned? May need to differentiate // between kernel argument (?) and variable (poisoned) v = ShadowContext::getPoisonedValue(value.second.size); } else { v = ShadowContext::getPoisonedValue(value.second.size); } allocAndStoreShadowMemory(AddrSpaceLocal, address, v, NULL, workGroup, true); } } void Uninitialized::workGroupComplete(const WorkGroup* workGroup) { shadowContext.destroyShadowWorkGroup(workGroup); shadowContext.freeWorkGroups(); shadowContext.destroyMemoryPool(); } ShadowFrame::ShadowFrame() : m_values(new UnorderedTypedValueMap()) { #ifdef DUMP_SHADOW m_valuesList = new ValuesList(); #endif } ShadowFrame::~ShadowFrame() { delete m_values; #ifdef DUMP_SHADOW delete m_valuesList; #endif } void ShadowFrame::dump() const { cout << "==== ShadowMap (private) =======" << endl; #ifdef DUMP_SHADOW ValuesList::const_iterator itr; unsigned num = 1; for (itr = m_valuesList->begin(); itr != m_valuesList->end(); ++itr) { if ((*itr)->hasName()) { cout << "%" << (*itr)->getName().str() << ": " << m_values->at(*itr) << endl; } else { cout << "%" << dec << num++ << ": " << m_values->at(*itr) << endl; } } #else cout << endl << "Dump not activated!" << endl; #endif cout << "=======================" << endl; } TypedValue ShadowFrame::getValue(const llvm::Value* V) const { if (llvm::isa(V)) { // For instructions the shadow is already stored in the map. assert(m_values->count(V) && "No shadow for instruction value"); return m_values->at(V); } else if (llvm::isa(V)) { return ShadowContext::getPoisonedValue(V); } else if (llvm::isa(V)) { // For arguments the shadow is already stored in the map. assert(m_values->count(V) && "No shadow for argument value"); return m_values->at(V); } else if (const llvm::ConstantVector* VC = llvm::dyn_cast(V)) { TypedValue vecShadow = ShadowContext::getCleanValue(V); TypedValue elemShadow; for (unsigned i = 0; i < vecShadow.num; ++i) { elemShadow = getValue(VC->getAggregateElement(i)); size_t offset = i * vecShadow.size; memcpy(vecShadow.data + offset, elemShadow.data, vecShadow.size); } return vecShadow; } else { // For everything else the shadow is zero. return ShadowContext::getCleanValue(V); } } void ShadowFrame::setValue(const llvm::Value* V, TypedValue SV) { #ifdef DUMP_SHADOW if (!m_values->count(V)) { m_valuesList->push_back(V); } else { cout << "Shadow for value " << V->getName().str() << " reset!" << endl; } #endif (*m_values)[V] = SV; } ShadowValues::ShadowValues() : m_stack(new ShadowValuesStack()) { pushFrame(createCleanShadowFrame()); } ShadowValues::~ShadowValues() { while (!m_stack->empty()) { popFrame(); } delete m_stack; } ShadowFrame* ShadowValues::createCleanShadowFrame() { return new ShadowFrame(); } ShadowWorkItem::ShadowWorkItem(unsigned bufferBits) : m_memory(new ShadowMemory(AddrSpacePrivate, bufferBits)), m_values(new ShadowValues()) { } ShadowWorkItem::~ShadowWorkItem() { delete m_memory; delete m_values; } ShadowWorkGroup::ShadowWorkGroup(unsigned bufferBits) : // FIXME: Hard coded values m_memory(new ShadowMemory(AddrSpaceLocal, sizeof(size_t) == 8 ? 16 : 8)) { } ShadowWorkGroup::~ShadowWorkGroup() { delete m_memory; } ShadowMemory::ShadowMemory(AddressSpace addrSpace, unsigned bufferBits) : m_addrSpace(addrSpace), m_map(), m_numBitsAddress((sizeof(size_t) << 3) - bufferBits), m_numBitsBuffer(bufferBits) { } ShadowMemory::~ShadowMemory() { clear(); } void ShadowMemory::allocate(size_t address, size_t size) { size_t index = extractBuffer(address); if (m_map.count(index)) { deallocate(address); } Buffer* buffer = new Buffer(); buffer->size = size; buffer->flags = 0; buffer->data = new unsigned char[size]; m_map[index] = buffer; } void ShadowMemory::clear() { MemoryMap::iterator mItr; for (mItr = m_map.begin(); mItr != m_map.end(); ++mItr) { delete[] mItr->second->data; delete mItr->second; } } void ShadowMemory::deallocate(size_t address) { size_t index = extractBuffer(address); assert(m_map.count(index) && "Cannot deallocate non existing memory!"); delete[] m_map.at(index)->data; delete m_map.at(index); m_map.at(index) = NULL; } void ShadowMemory::dump() const { cout << "====== ShadowMem (" << getAddressSpaceName(m_addrSpace) << ") ======"; for (unsigned b = 0, o = 1; b < m_map.size(); o++) { if (!m_map.count(b + o)) { continue; } for (unsigned i = 0; i < m_map.at(b + o)->size; i++) { if (i % 4 == 0) { cout << endl << hex << uppercase << setw(16) << setfill(' ') << right << ((((size_t)b + o) << m_numBitsAddress) | i) << ":"; } cout << " " << hex << uppercase << setw(2) << setfill('0') << (int)m_map.at(b + o)->data[i]; } ++b; o = 0; } cout << endl; cout << "=======================" << endl; } size_t ShadowMemory::extractBuffer(size_t address) const { return (address >> m_numBitsAddress); } size_t ShadowMemory::extractOffset(size_t address) const { return (address & (((size_t)-1) >> m_numBitsBuffer)); } void* ShadowMemory::getPointer(size_t address) const { size_t index = extractBuffer(address); size_t offset = extractOffset(address); assert(m_map.count(index) && "No shadow memory found!"); return m_map.at(index)->data + offset; } bool ShadowMemory::isAddressValid(size_t address, size_t size) const { size_t index = extractBuffer(address); size_t offset = extractOffset(address); return m_map.count(index) && (offset + size <= m_map.at(index)->size); } void ShadowMemory::load(unsigned char* dst, size_t address, size_t size) const { size_t index = extractBuffer(address); size_t offset = extractOffset(address); if (isAddressValid(address, size)) { assert(m_map.count(index) && "No shadow memory found!"); memcpy(dst, m_map.at(index)->data + offset, size); } else { TypedValue v = ShadowContext::getPoisonedValue(size); memcpy(dst, v.data, size); } } void ShadowMemory::lock(size_t address) const { size_t offset = extractOffset(address); ATOMIC_MUTEX(offset).lock(); } void ShadowMemory::store(const unsigned char* src, size_t address, size_t size) { size_t index = extractBuffer(address); size_t offset = extractOffset(address); if (isAddressValid(address, size)) { assert(m_map.count(index) && "Cannot store to unallocated memory!"); memcpy(m_map.at(index)->data + offset, src, size); } } void ShadowMemory::unlock(size_t address) const { size_t offset = extractOffset(address); ATOMIC_MUTEX(offset).unlock(); } ShadowContext::ShadowContext(unsigned bufferBits) : m_globalMemory(new ShadowMemory(AddrSpaceGlobal, bufferBits)), m_globalValues(), m_numBitsBuffer(bufferBits) { } ShadowContext::~ShadowContext() { delete m_globalMemory; } void ShadowContext::allocateWorkItems() { if (!m_workSpace.workItems) { m_workSpace.workItems = new ShadowItemMap(); } } void ShadowContext::allocateWorkGroups() { if (!m_workSpace.workGroups) { m_workSpace.workGroups = new ShadowGroupMap(); } } void ShadowContext::clearGlobalValues() { m_globalValues.clear(); } void ShadowContext::createMemoryPool() { if (m_workSpace.poolUsers == 0) { m_workSpace.memoryPool = new MemoryPool(); } ++m_workSpace.poolUsers; } ShadowWorkItem* ShadowContext::createShadowWorkItem(const WorkItem* workItem) { assert(!m_workSpace.workItems->count(workItem) && "Workitems may only have one shadow"); ShadowWorkItem* sWI = new ShadowWorkItem(m_numBitsBuffer); (*m_workSpace.workItems)[workItem] = sWI; return sWI; } ShadowWorkGroup* ShadowContext::createShadowWorkGroup(const WorkGroup* workGroup) { assert(!m_workSpace.workGroups->count(workGroup) && "Workgroups may only have one shadow"); ShadowWorkGroup* sWG = new ShadowWorkGroup(m_numBitsBuffer); (*m_workSpace.workGroups)[workGroup] = sWG; return sWG; } void ShadowContext::destroyMemoryPool() { --m_workSpace.poolUsers; if (m_workSpace.poolUsers == 0) { delete m_workSpace.memoryPool; } } void ShadowContext::destroyShadowWorkItem(const WorkItem* workItem) { assert(m_workSpace.workItems->count(workItem) && "No shadow for workitem found!"); delete (*m_workSpace.workItems)[workItem]; m_workSpace.workItems->erase(workItem); } void ShadowContext::destroyShadowWorkGroup(const WorkGroup* workGroup) { assert(m_workSpace.workGroups->count(workGroup) && "No shadow for workgroup found!"); delete (*m_workSpace.workGroups)[workGroup]; m_workSpace.workGroups->erase(workGroup); } void ShadowContext::dump(const WorkItem* workItem) const { dumpGlobalValues(); m_globalMemory->dump(); if (m_workSpace.workGroups && m_workSpace.workGroups->size()) { m_workSpace.workGroups->begin()->second->dump(); } if (m_workSpace.workItems && m_workSpace.workItems->size()) { if (workItem) { cout << "Item " << workItem->getGlobalID() << endl; getShadowWorkItem(workItem)->dump(); } else { ShadowItemMap::const_iterator itr; for (itr = m_workSpace.workItems->begin(); itr != m_workSpace.workItems->end(); ++itr) { cout << "Item " << itr->first->getGlobalID() << endl; itr->second->dump(); } } } } void ShadowContext::dumpGlobalValues() const { cout << "==== ShadowMap (global) =======" << endl; UnorderedTypedValueMap::const_iterator itr; unsigned num = 1; for (itr = m_globalValues.begin(); itr != m_globalValues.end(); ++itr) { if (itr->first->hasName()) { cout << "%" << itr->first->getName().str() << ": " << itr->second << endl; } else { cout << "%" << dec << num++ << ": " << itr->second << endl; } } cout << "=======================" << endl; } void ShadowContext::freeWorkItems() { if (m_workSpace.workItems && !m_workSpace.workItems->size()) { delete m_workSpace.workItems; m_workSpace.workItems = NULL; } } void ShadowContext::freeWorkGroups() { if (m_workSpace.workGroups && !m_workSpace.workGroups->size()) { delete m_workSpace.workGroups; m_workSpace.workGroups = NULL; } } TypedValue ShadowContext::getCleanValue(unsigned size) { TypedValue v = {size, 1, m_workSpace.memoryPool->alloc(size)}; memset(v.data, 0, size); return v; } TypedValue ShadowContext::getCleanValue(TypedValue v) { TypedValue c = {v.size, v.num, m_workSpace.memoryPool->alloc(v.size * v.num)}; memset(c.data, 0, v.size * v.num); return c; } TypedValue ShadowContext::getCleanValue(const llvm::Value* V) { pair size = getValueSize(V); TypedValue v = {size.first, size.second, m_workSpace.memoryPool->alloc(size.first * size.second)}; memset(v.data, 0, v.size * v.num); return v; } TypedValue ShadowContext::getCleanValue(const llvm::Type* Ty) { unsigned size = getTypeSize(Ty); TypedValue v = {size, 1, m_workSpace.memoryPool->alloc(size)}; memset(v.data, 0, v.size); return v; } TypedValue ShadowContext::getPoisonedValue(unsigned size) { TypedValue v = {size, 1, m_workSpace.memoryPool->alloc(size)}; memset(v.data, -1, size); return v; } TypedValue ShadowContext::getPoisonedValue(TypedValue v) { TypedValue p = {v.size, v.num, m_workSpace.memoryPool->alloc(v.size * v.num)}; memset(p.data, -1, v.size * v.num); return p; } TypedValue ShadowContext::getPoisonedValue(const llvm::Value* V) { pair size = getValueSize(V); TypedValue v = {size.first, size.second, m_workSpace.memoryPool->alloc(size.first * size.second)}; memset(v.data, -1, v.size * v.num); return v; } TypedValue ShadowContext::getPoisonedValue(const llvm::Type* Ty) { unsigned size = getTypeSize(Ty); TypedValue v = {size, 1, m_workSpace.memoryPool->alloc(size)}; memset(v.data, -1, v.size); return v; } TypedValue ShadowContext::getValue(const WorkItem* workItem, const llvm::Value* V) const { if (m_globalValues.count(V)) { return m_globalValues.at(V); } else { ShadowValues* shadowValues = getShadowWorkItem(workItem)->getValues(); return shadowValues->getValue(V); } } bool ShadowContext::isCleanImage(const TypedValue shadowImage) { return (isCleanImageAddress(shadowImage) && isCleanImageDescription(shadowImage) && isCleanImageFormat(shadowImage)); } bool ShadowContext::isCleanImageAddress(const TypedValue shadowImage) { Image* image = (Image*)shadowImage.data; return ShadowContext::isCleanValue(image->address); } bool ShadowContext::isCleanImageDescription(const TypedValue shadowImage) { Image* image = (Image*)shadowImage.data; // TODO: image->desc.buffer is currently not checked return (ShadowContext::isCleanValue(image->desc.image_type) && ShadowContext::isCleanValue(image->desc.image_width) && ShadowContext::isCleanValue(image->desc.image_height) && ShadowContext::isCleanValue(image->desc.image_depth) && ShadowContext::isCleanValue(image->desc.image_array_size) && ShadowContext::isCleanValue(image->desc.image_row_pitch) && ShadowContext::isCleanValue(image->desc.image_slice_pitch) && ShadowContext::isCleanValue(image->desc.num_mip_levels) && ShadowContext::isCleanValue(image->desc.num_samples)); } bool ShadowContext::isCleanImageFormat(const TypedValue shadowImage) { Image* image = (Image*)shadowImage.data; return (ShadowContext::isCleanValue(image->format.image_channel_order) && ShadowContext::isCleanValue(image->format.image_channel_data_type)); } bool ShadowContext::isCleanStruct(ShadowMemory* shadowMemory, size_t address, const llvm::StructType* structTy) { if (structTy->isPacked()) { unsigned size = getTypeSize(structTy); TypedValue v = {size, 1, m_workSpace.memoryPool->alloc(size)}; shadowMemory->load(v.data, address, size); return isCleanValue(v); } else { for (unsigned i = 0; i < structTy->getStructNumElements(); ++i) { size_t offset = getStructMemberOffset(structTy, i); unsigned size = getTypeSize(structTy->getElementType(i)); if (const llvm::StructType* elemTy = llvm::dyn_cast(structTy->getElementType(i))) { if (!isCleanStruct(shadowMemory, address + offset, elemTy)) { return false; } } else { TypedValue v = {size, 1, m_workSpace.memoryPool->alloc(size)}; shadowMemory->load(v.data, address + offset, size); if (!isCleanValue(v)) { return false; } } } return true; } } bool ShadowContext::isCleanValue(unsigned long v) { return v == 0UL; } bool ShadowContext::isCleanValue(TypedValue v) { return (ShadowContext::getCleanValue(v) == v); } bool ShadowContext::isCleanValue(TypedValue v, unsigned offset) { TypedValue c = ShadowContext::getCleanValue(v.size); return !memcmp(v.data + offset * v.size, c.data, v.size); } void ShadowContext::setGlobalValue(const llvm::Value* V, TypedValue SV) { assert(!m_globalValues.count(V) && "Values may only have one shadow"); m_globalValues[V] = SV; } void ShadowContext::shadowOr(TypedValue v1, TypedValue v2) { assert(v1.num == v2.num && "Cannot create shadow for vectors of different lengths!"); for (unsigned int i = 0; i < v1.num; ++i) { if (!ShadowContext::isCleanValue(v2, i)) { memset(v1.data + i * v1.size, 0xff, v1.size); } } } Oclgrind-21.10/src/plugins/Uninitialized.h000066400000000000000000000253201413315665100205160ustar00rootroot00000000000000// Uninitialized.h (Oclgrind) // Copyright (c) 2015, Moritz Pflanzer // Imperial College London. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "core/Plugin.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" //#define DUMP_SHADOW //#define PARANOID_CHECK(W, I) assert(checkAllOperandsDefined(W, I) && "Not all // operands defined") #define PARANOID_CHECK(W, I) checkAllOperandsDefined(W, I) #define PARANOID_CHECK(W, I) (void*)0 namespace oclgrind { typedef std::unordered_map UnorderedTypedValueMap; class ShadowFrame { public: ShadowFrame(); virtual ~ShadowFrame(); void dump() const; inline const llvm::CallInst* getCall() const { return m_call; } TypedValue getValue(const llvm::Value* V) const; inline bool hasValue(const llvm::Value* V) const { return llvm::isa(V) || m_values->count(V); } inline void setCall(const llvm::CallInst* CI) { m_call = CI; } void setValue(const llvm::Value* V, TypedValue SV); private: typedef std::list ValuesList; const llvm::CallInst* m_call; UnorderedTypedValueMap* m_values; #ifdef DUMP_SHADOW ValuesList* m_valuesList; #endif }; class ShadowValues { public: ShadowValues(); virtual ~ShadowValues(); ShadowFrame* createCleanShadowFrame(); inline void dump() const { m_stack->top()->dump(); } inline const llvm::CallInst* getCall() const { return m_stack->top()->getCall(); } inline TypedValue getValue(const llvm::Value* V) const { return m_stack->top()->getValue(V); } inline bool hasValue(const llvm::Value* V) const { return llvm::isa(V) || m_stack->top()->hasValue(V); } inline void popFrame() { ShadowFrame* frame = m_stack->top(); m_stack->pop(); delete frame; } inline void pushFrame(ShadowFrame* frame) { m_stack->push(frame); } inline void setCall(const llvm::CallInst* CI) { m_stack->top()->setCall(CI); } inline void setValue(const llvm::Value* V, TypedValue SV) { m_stack->top()->setValue(V, SV); } private: typedef std::stack ShadowValuesStack; ShadowValuesStack* m_stack; }; class ShadowMemory { public: struct Buffer { size_t size; cl_mem_flags flags; unsigned char* data; }; ShadowMemory(AddressSpace addrSpace, unsigned bufferBits); virtual ~ShadowMemory(); void allocate(size_t address, size_t size); void dump() const; void* getPointer(size_t address) const; bool isAddressValid(size_t address, size_t size = 1) const; void load(unsigned char* dst, size_t address, size_t size = 1) const; void lock(size_t address) const; void store(const unsigned char* src, size_t address, size_t size = 1); void unlock(size_t address) const; private: typedef std::unordered_map MemoryMap; AddressSpace m_addrSpace; MemoryMap m_map; unsigned m_numBitsAddress; unsigned m_numBitsBuffer; void clear(); void deallocate(size_t address); size_t extractBuffer(size_t address) const; size_t extractOffset(size_t address) const; }; class ShadowWorkItem { public: ShadowWorkItem(unsigned bufferBits); virtual ~ShadowWorkItem(); inline void dump() const { m_values->dump(); m_memory->dump(); } inline ShadowMemory* getPrivateMemory() { return m_memory; } inline ShadowValues* getValues() const { return m_values; } private: ShadowMemory* m_memory; ShadowValues* m_values; }; class ShadowWorkGroup { public: ShadowWorkGroup(unsigned bufferBits); virtual ~ShadowWorkGroup(); inline void dump() const { m_memory->dump(); } inline ShadowMemory* getLocalMemory() { return m_memory; } private: ShadowMemory* m_memory; }; class ShadowContext { public: ShadowContext(unsigned bufferBits); virtual ~ShadowContext(); void allocateWorkItems(); void allocateWorkGroups(); void clearGlobalValues(); void createMemoryPool(); ShadowWorkItem* createShadowWorkItem(const WorkItem* workItem); ShadowWorkGroup* createShadowWorkGroup(const WorkGroup* workGroup); void destroyMemoryPool(); void destroyShadowWorkItem(const WorkItem* workItem); void destroyShadowWorkGroup(const WorkGroup* workGroup); void dump(const WorkItem* workItem) const; void dumpGlobalValues() const; void freeWorkItems(); void freeWorkGroups(); static TypedValue getCleanValue(unsigned size); static TypedValue getCleanValue(TypedValue v); static TypedValue getCleanValue(const llvm::Type* Ty); static TypedValue getCleanValue(const llvm::Value* V); inline ShadowMemory* getGlobalMemory() const { return m_globalMemory; } TypedValue getGlobalValue(const llvm::Value* V) const; MemoryPool* getMemoryPool() const { return m_workSpace.memoryPool; } static TypedValue getPoisonedValue(unsigned size); static TypedValue getPoisonedValue(TypedValue v); static TypedValue getPoisonedValue(const llvm::Type* Ty); static TypedValue getPoisonedValue(const llvm::Value* V); inline ShadowWorkItem* getShadowWorkItem(const WorkItem* workItem) const { return m_workSpace.workItems->at(workItem); } inline ShadowWorkGroup* getShadowWorkGroup(const WorkGroup* workGroup) const { return m_workSpace.workGroups->at(workGroup); } TypedValue getValue(const WorkItem* workItem, const llvm::Value* V) const; inline bool hasValue(const WorkItem* workItem, const llvm::Value* V) const { return llvm::isa(V) || m_globalValues.count(V) || m_workSpace.workItems->at(workItem)->getValues()->hasValue(V); } static bool isCleanImage(const TypedValue shadowImage); static bool isCleanImageAddress(const TypedValue shadowImage); static bool isCleanImageDescription(const TypedValue shadowImage); static bool isCleanImageFormat(const TypedValue shadowImage); static bool isCleanStruct(ShadowMemory* shadowMemory, size_t address, const llvm::StructType* structTy); static bool isCleanValue(unsigned long v); static bool isCleanValue(TypedValue v); static bool isCleanValue(TypedValue v, unsigned offset); void setGlobalValue(const llvm::Value* V, TypedValue SV); static void shadowOr(TypedValue v1, TypedValue v2); private: ShadowMemory* m_globalMemory; UnorderedTypedValueMap m_globalValues; unsigned m_numBitsBuffer; typedef std::map ShadowItemMap; typedef std::map ShadowGroupMap; struct WorkSpace { ShadowItemMap* workItems; ShadowGroupMap* workGroups; MemoryPool* memoryPool; unsigned poolUsers; }; static THREAD_LOCAL WorkSpace m_workSpace; }; class Uninitialized : public Plugin { public: Uninitialized(const Context* context); virtual ~Uninitialized(); virtual void hostMemoryStore(const Memory* memory, size_t address, size_t size, const uint8_t* storeData) override; virtual void instructionExecuted(const WorkItem* workItem, const llvm::Instruction* instruction, const TypedValue& result) override; virtual void kernelBegin(const KernelInvocation* kernelInvocation) override; virtual void kernelEnd(const KernelInvocation* kernelInvocation) override; virtual void memoryMap(const Memory* memory, size_t address, size_t offset, size_t size, cl_map_flags flags) override; virtual void workItemBegin(const WorkItem* workItem) override; virtual void workItemComplete(const WorkItem* workItem) override; virtual void workGroupBegin(const WorkGroup* workGroup) override; virtual void workGroupComplete(const WorkGroup* workGroup) override; // virtual void memoryAllocated(const Memory *memory, size_t address, // size_t size, cl_mem_flags flags, // const uint8_t *initData); private: std::list> m_deferredInit; std::list> m_deferredInitGroup; ShadowContext shadowContext; MemoryPool m_pool; void allocAndStoreShadowMemory(unsigned addrSpace, size_t address, TypedValue SM, const WorkItem* workItem = NULL, const WorkGroup* workGroup = NULL, bool unchecked = false); bool checkAllOperandsDefined(const WorkItem* workItem, const llvm::Instruction* I); void checkStructMemcpy(const WorkItem* workItem, const llvm::Value* src); void copyShadowMemory(unsigned dstAddrSpace, size_t dst, unsigned srcAddrSpace, size_t src, unsigned size, const WorkItem* workItem = NULL, const WorkGroup* workGroup = NULL, bool unchecked = false); void copyShadowMemoryStrided(unsigned dstAddrSpace, size_t dst, unsigned srcAddrSpace, size_t src, size_t num, size_t stride, unsigned size, const WorkItem* workItem = NULL, const WorkGroup* workGroup = NULL, bool unchecked = false); static std::string extractUnmangledName(const std::string fullname); ShadowMemory* getShadowMemory(unsigned addrSpace, const WorkItem* workItem = NULL, const WorkGroup* workGroup = NULL) const; bool handleBuiltinFunction(const WorkItem* workItem, std::string name, const llvm::CallInst* CI, const TypedValue result); void handleIntrinsicInstruction(const WorkItem* workItem, const llvm::IntrinsicInst* I); void loadShadowMemory(unsigned addrSpace, size_t address, TypedValue& SM, const WorkItem* workItem = NULL, const WorkGroup* workGroup = NULL); void logUninitializedAddress(unsigned int addrSpace, size_t address, bool write = true) const; void logUninitializedCF() const; void logUninitializedIndex() const; void logUninitializedWrite(unsigned int addrSpace, size_t address) const; void SimpleOr(const WorkItem* workItem, const llvm::Instruction* I); void SimpleOrAtomic(const WorkItem* workItem, const llvm::CallInst* CI); void storeShadowMemory(unsigned addrSpace, size_t address, TypedValue SM, const WorkItem* workItem = NULL, const WorkGroup* workGroup = NULL, bool unchecked = false); void VectorOr(const WorkItem* workItem, const llvm::Instruction* I); }; } // namespace oclgrind Oclgrind-21.10/src/runtime/000077500000000000000000000000001413315665100155355ustar00rootroot00000000000000Oclgrind-21.10/src/runtime/async_queue.cpp000066400000000000000000000063471413315665100205740ustar00rootroot00000000000000// async_queue.cpp (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "async_queue.h" #include #include #include #include #include "core/Kernel.h" #include "core/Queue.h" using namespace oclgrind; using namespace std; // Maps to keep track of retained objects static map> memObjectMap; static map kernelMap; static map eventMap; static map> waitListMap; void asyncEnqueue(cl_command_queue queue, cl_command_type type, Command* cmd, cl_uint numEvents, const cl_event* waitList, cl_event* eventOut) { // Add event wait list to command for (unsigned i = 0; i < numEvents; i++) { cmd->waitList.push_back(waitList[i]->event); waitListMap[cmd].push_back(waitList[i]); clRetainEvent(waitList[i]); } // Enqueue command Event* event = queue->queue->enqueue(cmd); // Create event objects cl_event _event = new _cl_event; _event->dispatch = m_dispatchTable; _event->context = queue->context; _event->queue = queue; _event->type = type; _event->event = event; _event->refCount = 1; // Add event to map eventMap[cmd] = _event; // Pass event as output and retain (if required) if (eventOut) { clRetainEvent(_event); *eventOut = _event; } } void asyncQueueRetain(Command* cmd, cl_mem mem) { // Retain object and add to map clRetainMemObject(mem); memObjectMap[cmd].push_back(mem); } void asyncQueueRetain(Command* cmd, cl_kernel kernel) { assert(kernelMap.find(cmd) == kernelMap.end()); // Retain kernel and add to map clRetainKernel(kernel); kernelMap[cmd] = kernel; // Retain memory objects arguments map::const_iterator itr; for (itr = kernel->memArgs.begin(); itr != kernel->memArgs.end(); itr++) { asyncQueueRetain(cmd, itr->second); } } void asyncQueueRelease(Command* cmd) { // Release memory objects if (memObjectMap.find(cmd) != memObjectMap.end()) { list memObjects = memObjectMap[cmd]; while (!memObjects.empty()) { clReleaseMemObject(memObjects.front()); memObjects.pop_front(); } memObjectMap.erase(cmd); } // Release kernel if (cmd->type == Command::KERNEL) { assert(kernelMap.find(cmd) != kernelMap.end()); clReleaseKernel(kernelMap[cmd]); kernelMap.erase(cmd); delete ((KernelCommand*)cmd)->kernel; } // Remove event from map cl_event event = eventMap[cmd]; eventMap.erase(cmd); // Perform callbacks list>::iterator callItr; for (callItr = event->callbacks.begin(); callItr != event->callbacks.end(); callItr++) { callItr->first(event, event->event->state, callItr->second); } // Release events list::iterator waitItr; for (waitItr = waitListMap[cmd].begin(); waitItr != waitListMap[cmd].end(); waitItr++) { clReleaseEvent(*waitItr); } waitListMap.erase(cmd); clReleaseEvent(event); } Oclgrind-21.10/src/runtime/async_queue.h000066400000000000000000000013431413315665100202300ustar00rootroot00000000000000// async_queue.h (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "icd.h" #include "core/Queue.h" extern void asyncEnqueue(cl_command_queue queue, cl_command_type type, oclgrind::Command* cmd, cl_uint numEvents, const cl_event* waitList, cl_event* eventOut); extern void asyncQueueRetain(oclgrind::Command* cmd, cl_mem mem); extern void asyncQueueRetain(oclgrind::Command* cmd, cl_kernel); extern void asyncQueueRelease(oclgrind::Command* cmd); Oclgrind-21.10/src/runtime/icd.def000066400000000000000000000001521413315665100167520ustar00rootroot00000000000000EXPORTS ; Make ICD initialisation functions visible clGetExtensionFunctionAddress clIcdGetPlatformIDsKHR Oclgrind-21.10/src/runtime/icd.h000066400000000000000000000177611413315665100164610ustar00rootroot00000000000000// icd.h (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #ifndef _ICD_H_ #define _ICD_H_ // Rename OpenCL API functions to avoid clashes with ICD library #ifdef OCLGRIND_ICD #define clGetPlatformIDs _clGetPlatformIDs #define clGetPlatformInfo _clGetPlatformInfo #define clGetDeviceIDs _clGetDeviceIDs #define clGetDeviceInfo _clGetDeviceInfo #define clCreateSubDevices _clCreateSubDevices #define clRetainDevice _clRetainDevice #define clReleaseDevice _clReleaseDevice #define clCreateContext _clCreateContext #define clCreateContextFromType _clCreateContextFromType #define clRetainContext _clRetainContext #define clReleaseContext _clReleaseContext #define clGetContextInfo _clGetContextInfo #define clCreateCommandQueue _clCreateCommandQueue #define clSetCommandQueueProperty _clSetCommandQueueProperty #define clRetainCommandQueue _clRetainCommandQueue #define clReleaseCommandQueue _clReleaseCommandQueue #define clGetCommandQueueInfo _clGetCommandQueueInfo #define clCreateBuffer _clCreateBuffer #define clCreateSubBuffer _clCreateSubBuffer #define clCreateImage _clCreateImage #define clCreateImage2D _clCreateImage2D #define clCreateImage3D _clCreateImage3D #define clRetainMemObject _clRetainMemObject #define clReleaseMemObject _clReleaseMemObject #define clGetSupportedImageFormats _clGetSupportedImageFormats #define clGetMemObjectInfo _clGetMemObjectInfo #define clGetImageInfo _clGetImageInfo #define clSetMemObjectDestructorCallback _clSetMemObjectDestructorCallback #define clCreateSampler _clCreateSampler #define clRetainSampler _clRetainSampler #define clReleaseSampler _clReleaseSampler #define clGetSamplerInfo _clGetSamplerInfo #define clCreateProgramWithSource _clCreateProgramWithSource #define clCreateProgramWithBinary _clCreateProgramWithBinary #define clCreateProgramWithBuiltInKernels _clCreateProgramWithBuiltInKernels #define clRetainProgram _clRetainProgram #define clReleaseProgram _clReleaseProgram #define clBuildProgram _clBuildProgram #define clUnloadCompiler _clUnloadCompiler #define clCompileProgram _clCompileProgram #define clLinkProgram _clLinkProgram #define clUnloadPlatformCompiler _clUnloadPlatformCompiler #define clGetProgramInfo _clGetProgramInfo #define clGetProgramBuildInfo _clGetProgramBuildInfo #define clCreateKernel _clCreateKernel #define clCreateKernelsInProgram _clCreateKernelsInProgram #define clRetainKernel _clRetainKernel #define clReleaseKernel _clReleaseKernel #define clSetKernelArg _clSetKernelArg #define clGetKernelInfo _clGetKernelInfo #define clGetKernelArgInfo _clGetKernelArgInfo #define clGetKernelWorkGroupInfo _clGetKernelWorkGroupInfo #define clWaitForEvents _clWaitForEvents #define clGetEventInfo _clGetEventInfo #define clCreateUserEvent _clCreateUserEvent #define clRetainEvent _clRetainEvent #define clReleaseEvent _clReleaseEvent #define clSetUserEventStatus _clSetUserEventStatus #define clSetEventCallback _clSetEventCallback #define clGetEventProfilingInfo _clGetEventProfilingInfo #define clFlush _clFlush #define clFinish _clFinish #define clEnqueueReadBuffer _clEnqueueReadBuffer #define clEnqueueReadBufferRect _clEnqueueReadBufferRect #define clEnqueueWriteBuffer _clEnqueueWriteBuffer #define clEnqueueWriteBufferRect _clEnqueueWriteBufferRect #define clEnqueueCopyBuffer _clEnqueueCopyBuffer #define clEnqueueCopyBufferRect _clEnqueueCopyBufferRect #define clEnqueueFillBuffer _clEnqueueFillBuffer #define clEnqueueFillImage _clEnqueueFillImage #define clEnqueueReadImage _clEnqueueReadImage #define clEnqueueWriteImage _clEnqueueWriteImage #define clEnqueueCopyImage _clEnqueueCopyImage #define clEnqueueCopyImageToBuffer _clEnqueueCopyImageToBuffer #define clEnqueueCopyBufferToImage _clEnqueueCopyBufferToImage #define clEnqueueMapBuffer _clEnqueueMapBuffer #define clEnqueueMapImage _clEnqueueMapImage #define clEnqueueUnmapMemObject _clEnqueueUnmapMemObject #define clEnqueueMigrateMemObjects _clEnqueueMigrateMemObjects #define clEnqueueNDRangeKernel _clEnqueueNDRangeKernel #define clEnqueueTask _clEnqueueTask #define clEnqueueNativeKernel _clEnqueueNativeKernel #define clGetExtensionFunctionAddressForPlatform \ _clGetExtensionFunctionAddressForPlatform #define clEnqueueMarkerWithWaitList _clEnqueueMarkerWithWaitList #define clEnqueueBarrierWithWaitList _clEnqueueBarrierWithWaitList #define clSetPrintfCallback _clSetPrintfCallback #define clEnqueueMarker _clEnqueueMarker #define clEnqueueWaitForEvents _clEnqueueWaitForEvents #define clEnqueueBarrier _clEnqueueBarrier #define clCreateFromGLBuffer _clCreateFromGLBuffer #define clCreateFromGLTexture _clCreateFromGLTexture #define clCreateFromGLTexture2D _clCreateFromGLTexture2D #define clCreateFromGLTexture3D _clCreateFromGLTexture3D #define clCreateFromGLRenderbuffer _clCreateFromGLRenderbuffer #define clGetGLObjectInfo _clGetGLObjectInfo #define clGetGLTextureInfo _clGetGLTextureInfo #define clEnqueueAcquireGLObjects _clEnqueueAcquireGLObjects #define clEnqueueReleaseGLObjects _clEnqueueReleaseGLObjects #define clGetGLContextInfoKHR _clGetGLContextInfoKHR #define clCreateEventFromGLsyncKHR _clCreateEventFromGLsyncKHR #endif // OCLGRIND_ICD #include #include #include #include #include #define CL_USE_DEPRECATED_OPENCL_1_0_APIS #define CL_USE_DEPRECATED_OPENCL_1_1_APIS #define CL_USE_DEPRECATED_OPENCL_1_2_APIS #define CL_USE_DEPRECATED_OPENCL_2_0_APIS #define CL_USE_DEPRECATED_OPENCL_2_1_APIS #define CL_USE_DEPRECATED_OPENCL_2_2_APIS #define CL_TARGET_OPENCL_VERSION 300 #include "CL/cl.h" #include "CL/cl_ext.h" #include "CL/cl_gl.h" #include "CL/cl_gl_ext.h" #if defined(_WIN32) && !defined(__MINGW32__) // clang-format off #include "CL/cl_d3d11.h" #include "CL/cl_d3d10.h" #include "CL/cl_dx9_media_sharing.h" // clang-format on #endif namespace oclgrind { class Context; class Kernel; class Program; class Queue; struct Command; struct Event; struct Image; } // namespace oclgrind struct _cl_platform_id { void* dispatch; }; struct _cl_device_id { void** dispatch; size_t globalMemSize; size_t constantMemSize; size_t localMemSize; size_t maxWGSize; }; struct _cl_context { void* dispatch; oclgrind::Context* context; void(CL_CALLBACK* notify)(const char*, const void*, size_t, void*); void* data; cl_context_properties* properties; size_t szProperties; std::stack> callbacks; unsigned int refCount; }; struct _cl_command_queue { void* dispatch; cl_command_queue_properties properties; cl_context context; std::vector properties_array; oclgrind::Queue* queue; unsigned int refCount; }; struct _cl_mem { void* dispatch; cl_context context; cl_mem parent; size_t address; size_t size; size_t offset; cl_mem_flags flags; bool isImage; void* hostPtr; std::stack> callbacks; std::vector properties; unsigned int refCount; }; struct cl_image : _cl_mem { cl_image_format format; cl_image_desc desc; }; struct _cl_program { void* dispatch; oclgrind::Program* program; cl_context context; unsigned int refCount; }; struct _cl_kernel { void* dispatch; oclgrind::Kernel* kernel; cl_program program; std::map memArgs; std::vector imageArgs; unsigned int refCount; }; struct _cl_event { void* dispatch; cl_context context; cl_command_queue queue; cl_command_type type; oclgrind::Event* event; std::list> callbacks; unsigned int refCount; }; struct _cl_sampler { void* dispatch; cl_context context; cl_bool normCoords; cl_addressing_mode addressMode; cl_filter_mode filterMode; std::vector properties; uint32_t sampler; unsigned int refCount; }; extern void* m_dispatchTable[256]; #endif // _ICD_H_ Oclgrind-21.10/src/runtime/oclgrind.cpp000066400000000000000000000361611413315665100200510ustar00rootroot00000000000000// oclgrind.cpp (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "config.h" #include #include #include #include #if defined(_WIN32) && !defined(__MINGW32__) #include #else #include #include #ifdef __APPLE__ #include #endif #endif using namespace std; #if defined(_WIN32) && !defined(__MINGW32__) static string appCmd; static void checkWow64(HANDLE parent, HANDLE child); static void die(const char* op); #else // not Windows static char** appArgs = NULL; #ifdef __APPLE__ #define LIB_EXTENSION "dylib" #define LD_LIBRARY_PATH_ENV "DYLD_LIBRARY_PATH" #define LD_PRELOAD_ENV "DYLD_INSERT_LIBRARIES" #else #define LIB_EXTENSION "so" #define LD_LIBRARY_PATH_ENV "LD_LIBRARY_PATH" #define LD_PRELOAD_ENV "LD_PRELOAD" #endif #endif static string getLibDirPath(); static bool parseArguments(int argc, char* argv[]); static void printUsage(); static void setEnvironment(const char* name, const char* value); int main(int argc, char* argv[]) { // Parse arguments if (!parseArguments(argc, argv)) { return 1; } #if defined(_WIN32) && !defined(__MINGW32__) // Get full path to oclgrind-rt.dll string dllpath = getLibDirPath(); dllpath += "\\oclgrind-rt.dll"; PROCESS_INFORMATION pinfo = {0}; STARTUPINFOA sinfo = {0}; sinfo.cb = sizeof(sinfo); // Create child process in suspended state if (!CreateProcessA(NULL, (LPSTR)appCmd.c_str(), NULL, NULL, FALSE, CREATE_SUSPENDED, NULL, NULL, &sinfo, &pinfo)) die("creating child process"); // Check that we are running as 64-bit if and only if we need to be checkWow64(GetCurrentProcess(), pinfo.hProcess); // Allocate memory for DLL path void* childPath = VirtualAllocEx(pinfo.hProcess, NULL, dllpath.size() + 1, MEM_COMMIT, PAGE_READWRITE); if (!childPath) die("allocating child memory"); // Write DLL path to child if (!WriteProcessMemory(pinfo.hProcess, childPath, (void*)dllpath.c_str(), dllpath.size() + 1, NULL)) die("writing child memory"); // Create thread to load DLL in child process HANDLE childThread = CreateRemoteThread(pinfo.hProcess, NULL, 0, (LPTHREAD_START_ROUTINE)GetProcAddress( GetModuleHandleA("kernel32.dll"), "LoadLibraryA"), childPath, 0, NULL); if (!childThread) die("loading DLL in child thread"); // Wait for child thread to complete if (WaitForSingleObject(childThread, INFINITE) != WAIT_OBJECT_0) die("waiting for load thread"); CloseHandle(childThread); VirtualFreeEx(pinfo.hProcess, childPath, dllpath.size() + 1, MEM_RELEASE); // Load DLL in this process as well to get function pointers HMODULE dll = LoadLibraryA(dllpath.c_str()); if (!dll) die("loading DLL"); // Get handle to initOclgrind function in DLL HANDLE initFunction = GetProcAddress(dll, "initOclgrind"); if (!initFunction) die("getting init function address"); // Launch init function in child process childThread = CreateRemoteThread(pinfo.hProcess, NULL, 0, (LPTHREAD_START_ROUTINE)initFunction, NULL, 0, NULL); if (!childThread) die("launching init in child thread"); // Wait for init to finish if (WaitForSingleObject(childThread, INFINITE) != WAIT_OBJECT_0) die("waiting for init thread"); // Check return value DWORD retval = 0; if (!GetExitCodeThread(childThread, &retval)) die("getting init exit code"); if (!retval) { cerr << "[Oclgrind] initialization failed: " << retval << endl; exit(retval); } CloseHandle(childThread); // Resume child process if (ResumeThread(pinfo.hThread) == -1) die("resuming thread"); // Wait for child process to finish if (WaitForSingleObject(pinfo.hProcess, INFINITE) != WAIT_OBJECT_0) die("waiting for child process failed"); // Get return code and forward it if (!GetExitCodeProcess(pinfo.hProcess, &retval)) die("getting child process exit code"); return retval; #else // not Windows // Get path to Oclgrind library directory string libdir = getLibDirPath(); // Construct new LD_LIBRARY_PATH string ldLibraryPath = libdir; const char* oldLdLibraryPath = getenv(LD_LIBRARY_PATH_ENV); if (oldLdLibraryPath) { ldLibraryPath += ":"; ldLibraryPath += oldLdLibraryPath; } // Add oclgrind-rt library to LD_PRELOAD string ldPreload = libdir; ldPreload += "/liboclgrind-rt."; ldPreload += LIB_EXTENSION; const char* oldLdPreload = getenv(LD_PRELOAD_ENV); if (oldLdPreload) { ldPreload += ":"; ldPreload += oldLdPreload; } setEnvironment(LD_LIBRARY_PATH_ENV, ldLibraryPath.c_str()); setEnvironment(LD_PRELOAD_ENV, ldPreload.c_str()); #ifdef __APPLE__ setEnvironment("DYLD_FORCE_FLAT_NAMESPACE", "1"); #endif // Launch target application if (execvp(appArgs[0], appArgs) == -1) { cerr << "[Oclgrind] Failed to launch target application" << endl; exit(1); } #endif } static bool parseArguments(int argc, char* argv[]) { for (int i = 1; i < argc; i++) { if (!strcmp(argv[i], "--build-options")) { if (++i >= argc) { cerr << "Missing argument to --build-options" << endl; return false; } setEnvironment("OCLGRIND_BUILD_OPTIONS", argv[i]); } else if (!strcmp(argv[i], "--check-api")) { setEnvironment("OCLGRIND_CHECK_API", "1"); } else if (!strcmp(argv[i], "--compute-units")) { if (++i >= argc) { cerr << "Missing argument to --compute-units" << endl; return false; } setEnvironment("OCLGRIND_COMPUTE_UNITS", argv[i]); } else if (!strcmp(argv[i], "--constant-mem-size")) { if (++i >= argc) { cerr << "Missing argument to --constant-mem-size" << endl; return false; } setEnvironment("OCLGRIND_CONSTANT_MEM_SIZE", argv[i]); } else if (!strcmp(argv[i], "--data-races")) { setEnvironment("OCLGRIND_DATA_RACES", "1"); } else if (!strcmp(argv[i], "--disable-pch")) { setEnvironment("OCLGRIND_DISABLE_PCH", "1"); } else if (!strcmp(argv[i], "--dump-spir")) { setEnvironment("OCLGRIND_DUMP_SPIR", "1"); } else if (!strcmp(argv[i], "--global-mem-size")) { if (++i >= argc) { cerr << "Missing argument to --global-mem-size" << endl; return false; } setEnvironment("OCLGRIND_GLOBAL_MEM_SIZE", argv[i]); } else if (!strcmp(argv[i], "-h") || !strcmp(argv[i], "--help")) { printUsage(); exit(0); } else if (!strcmp(argv[i], "--inst-counts")) { setEnvironment("OCLGRIND_INST_COUNTS", "1"); } else if (!strcmp(argv[i], "-i") || !strcmp(argv[i], "--interactive")) { setEnvironment("OCLGRIND_INTERACTIVE", "1"); } else if (!strcmp(argv[i], "--local-mem-size")) { if (++i >= argc) { cerr << "Missing argument to --local-mem-size" << endl; return false; } setEnvironment("OCLGRIND_LOCAL_MEM_SIZE", argv[i]); } else if (!strcmp(argv[i], "--log")) { if (++i >= argc) { cerr << "Missing argument to --log" << endl; return false; } setEnvironment("OCLGRIND_LOG", argv[i]); } else if (!strcmp(argv[i], "--max-errors")) { if (++i >= argc) { cerr << "Missing argument to --max-errors" << endl; return false; } setEnvironment("OCLGRIND_MAX_ERRORS", argv[i]); } else if (!strcmp(argv[i], "--max-wgsize")) { if (++i >= argc) { cerr << "Missing argument to --max-wgsize" << endl; return false; } setEnvironment("OCLGRIND_MAX_WGSIZE", argv[i]); } else if (!strcmp(argv[i], "--num-threads")) { if (++i >= argc) { cerr << "Missing argument to --num-threads" << endl; return false; } setEnvironment("OCLGRIND_NUM_THREADS", argv[i]); } else if (!strcmp(argv[i], "--pch-dir")) { if (++i >= argc) { cerr << "Missing argument to --pch-dir" << endl; return false; } setEnvironment("OCLGRIND_PCH_DIR", argv[i]); } else if (!strcmp(argv[i], "--plugins")) { if (++i >= argc) { cerr << "Missing argument to --plugins" << endl; return false; } setEnvironment("OCLGRIND_PLUGINS", argv[i]); } else if (!strcmp(argv[i], "-q") || !strcmp(argv[i], "--quick")) { setEnvironment("OCLGRIND_QUICK", "1"); } else if (!strcmp(argv[i], "--uniform-writes")) { setEnvironment("OCLGRIND_UNIFORM_WRITES", "1"); } else if (!strcmp(argv[i], "--uninitialized")) { setEnvironment("OCLGRIND_UNINITIALIZED", "1"); } else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--version")) { cout << endl; cout << "Oclgrind " PACKAGE_VERSION << endl; cout << endl; cout << "Copyright (c) 2013-2019" << endl; cout << "James Price and Simon McIntosh-Smith, University of Bristol" << endl; cout << "https://github.com/jrprice/Oclgrind" << endl; cout << endl; exit(0); } else if (argv[i][0] == '-') { cerr << "Unrecognised option '" << argv[i] << "'" << endl; return false; } else { #if defined(_WIN32) && !defined(__MINGW32__) // Build command-line for target application for (; i < argc; i++) { appCmd += argv[i]; appCmd += " "; } #else // not Windows appArgs = (char**)malloc((argc - i + 1) * sizeof(char*)); int offset = i; for (; i < argc; i++) { appArgs[i - offset] = argv[i]; } appArgs[argc - offset] = NULL; #endif break; } } #if defined(_WIN32) && !defined(__MINGW32__) if (appCmd.size() == 0) #else if (!appArgs) #endif { printUsage(); return false; } return true; } static void stripLastComponent(string& path) { size_t slash; #if defined(_WIN32) && !defined(__MINGW32__) if ((slash = path.find_last_of('\\')) == string::npos) #else if ((slash = path.find_last_of('/')) == string::npos) #endif { cerr << "[Oclgrind] Failed to get path to library directory" << endl; exit(1); } path.resize(slash); } static string getLibDirPath() { string libdir; // Get full path to executable #if defined(_WIN32) && !defined(__MINGW32__) char path[MAX_PATH]; GetModuleFileNameA(GetModuleHandle(NULL), path, MAX_PATH); if (GetLastError() != ERROR_SUCCESS) die("getting path to Oclgrind installation"); libdir = path; #else char exepath[PATH_MAX]; char path[PATH_MAX]; // Get path to executable #if defined(__APPLE__) uint32_t sz = PATH_MAX; if (_NSGetExecutablePath(exepath, &sz)) #else // not apple if (readlink("/proc/self/exe", exepath, PATH_MAX) == -1) #endif { cerr << "[Oclgrind] Unable to get path to Oclgrind installation" << endl; exit(1); } // Resolve symbolic links and normalise path realpath(exepath, path); libdir = path; #endif // Remove executable filename stripLastComponent(libdir); const char* testing = getenv("OCLGRIND_TESTING"); if (!testing) { // Remove containing directory and append library directory stripLastComponent(libdir); libdir += "/lib" LIBDIR_SUFFIX; } return libdir; } static void printUsage() { cout << "Usage: oclgrind [OPTIONS] COMMAND" << endl << " oclgrind [--help | --version]" << endl << endl << "Options:" << endl << " --build-options OPTIONS " "Additional options to pass to the OpenCL compiler" << endl << " --check-api " "Report errors on API calls" << endl << " --compute-units UNITS " "Change the number of compute units reported" << endl << " --constant-mem-size BYTES " "Change the constant memory size of the device" << endl << " --data-races " "Enable data-race detection" << endl << " --disable-pch " "Don't use precompiled headers" << endl << " --dump-spir " "Dump SPIR to /tmp/oclgrind_*.{ll,bc}" << endl << " --global-mem-size BYTES " "Change the global memory size of the device" << endl << " --help [-h] " "Display usage information" << endl << " --inst-counts " "Output histograms of instructions executed" << endl << " --interactive [-i] " "Enable interactive mode" << endl << " --local-mem-size BYTES " "Change the local memory size of the device" << endl << " --log LOGFILE " "Redirect log/error messages to a file" << endl << " --max-errors NUM " "Limit the number of error/warning messages" << endl << " --max-wgsize WGSIZE " "Change the maximum work-group size of the device" << endl << " --num-threads NUM " "Set the number of worker threads to use" << endl << " --pch-dir DIR " "Override directory containing precompiled headers" << endl << " --plugins PLUGINS " "Load colon separated list of plugin libraries" << endl << " --quick [-q] " "Only run first and last work-group" << endl << " --uniform-writes " "Don't suppress uniform write-write data-races" << endl << " --uninitialized " "Report usage of uninitialized values" << endl << " --version [-v] " "Display version information" << endl << endl << "For more information, please visit the Oclgrind wiki page:" << endl << "-> https://github.com/jrprice/Oclgrind/wiki" << endl << endl; } static void setEnvironment(const char* name, const char* value) { #if defined(_WIN32) && !defined(__MINGW32__) _putenv_s(name, value); #else setenv(name, value, 1); #endif } #if defined(_WIN32) && !defined(__MINGW32__) void checkWow64(HANDLE parent, HANDLE child) { BOOL parentWow64, childWow64; IsWow64Process(parent, &parentWow64); IsWow64Process(child, &childWow64); if (parentWow64 != childWow64) { const char* bits = childWow64 ? "32" : "64"; cerr << "[Oclgrind] target application is " << bits << "-bit" << endl << "Use the " << bits << "-bit version of oclgrind.exe" << endl; exit(1); } } void die(const char* op) { DWORD err = GetLastError(); char buffer[1024]; FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), buffer, 1024, NULL); cerr << "[Oclgrind] Error while '" << op << "':" << endl << buffer << endl; exit(1); } #endif Oclgrind-21.10/src/runtime/runtime.cpp000066400000000000000000005357341413315665100177450ustar00rootroot00000000000000// runtime.cpp (Oclgrind) // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "config.h" #include #include #include #include #include #include "async_queue.h" #include "icd.h" #include "CL/cl_half.h" #include "core/Context.h" #include "core/Kernel.h" #include "core/Memory.h" #include "core/Program.h" #include "core/Queue.h" using namespace std; #define DEFAULT_GLOBAL_MEM_SIZE (128 * 1048576) #define DEFAULT_CONSTANT_MEM_SIZE (65536) #define DEFAULT_LOCAL_MEM_SIZE (32768) #define DEFAULT_MAX_WGSIZE (1024) #define PLATFORM_NAME "Oclgrind" #define PLATFORM_VENDOR "Oclgrind" #ifdef ENABLE_OPENCL_3 #define PLATFORM_VERSION "OpenCL 3.0 (Oclgrind " PACKAGE_VERSION ")" #else #define PLATFORM_VERSION "OpenCL 1.2 (Oclgrind " PACKAGE_VERSION ")" #endif #define PLATFORM_PROFILE "FULL_PROFILE" #define PLATFORM_SUFFIX "oclg" #define DEVICE_NAME "Oclgrind Simulator" #define DEVICE_VENDOR "Oclgrind" #define DEVICE_VENDOR_ID 0x0042 #ifdef ENABLE_OPENCL_3 #define DEVICE_VERSION "OpenCL 3.0 (Oclgrind " PACKAGE_VERSION ")" #else #define DEVICE_VERSION "OpenCL 1.2 (Oclgrind " PACKAGE_VERSION ")" #endif #define DEVICE_LANG_VERSION "OpenCL C 1.2 (Oclgrind " PACKAGE_VERSION ")" #define DRIVER_VERSION "Oclgrind " PACKAGE_VERSION #define DEVICE_PROFILE "FULL_PROFILE" #define DEVICE_CTS_VERSION "v0000-01-01-00" #define DEVICE_SPIR_VERSIONS "1.2" #define DEVICE_TYPE \ (CL_DEVICE_TYPE_CPU | CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_ACCELERATOR | \ CL_DEVICE_TYPE_DEFAULT) namespace { #define CASE(X) \ case X: \ return #X; const char* CLErrorToString(cl_int err) { switch (err) { CASE(CL_SUCCESS) CASE(CL_DEVICE_NOT_FOUND) CASE(CL_DEVICE_NOT_AVAILABLE) CASE(CL_COMPILER_NOT_AVAILABLE) CASE(CL_MEM_OBJECT_ALLOCATION_FAILURE) CASE(CL_OUT_OF_RESOURCES) CASE(CL_OUT_OF_HOST_MEMORY) CASE(CL_PROFILING_INFO_NOT_AVAILABLE) CASE(CL_MEM_COPY_OVERLAP) CASE(CL_IMAGE_FORMAT_MISMATCH) CASE(CL_IMAGE_FORMAT_NOT_SUPPORTED) CASE(CL_BUILD_PROGRAM_FAILURE) CASE(CL_MAP_FAILURE) CASE(CL_MISALIGNED_SUB_BUFFER_OFFSET) CASE(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST) CASE(CL_COMPILE_PROGRAM_FAILURE) CASE(CL_LINKER_NOT_AVAILABLE) CASE(CL_LINK_PROGRAM_FAILURE) CASE(CL_DEVICE_PARTITION_FAILED) CASE(CL_KERNEL_ARG_INFO_NOT_AVAILABLE) CASE(CL_INVALID_VALUE) CASE(CL_INVALID_DEVICE_TYPE) CASE(CL_INVALID_PLATFORM) CASE(CL_INVALID_DEVICE) CASE(CL_INVALID_CONTEXT) CASE(CL_INVALID_QUEUE_PROPERTIES) CASE(CL_INVALID_COMMAND_QUEUE) CASE(CL_INVALID_HOST_PTR) CASE(CL_INVALID_MEM_OBJECT) CASE(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) CASE(CL_INVALID_IMAGE_SIZE) CASE(CL_INVALID_SAMPLER) CASE(CL_INVALID_BINARY) CASE(CL_INVALID_BUILD_OPTIONS) CASE(CL_INVALID_PROGRAM) CASE(CL_INVALID_PROGRAM_EXECUTABLE) CASE(CL_INVALID_KERNEL_NAME) CASE(CL_INVALID_KERNEL_DEFINITION) CASE(CL_INVALID_KERNEL) CASE(CL_INVALID_ARG_INDEX) CASE(CL_INVALID_ARG_VALUE) CASE(CL_INVALID_ARG_SIZE) CASE(CL_INVALID_KERNEL_ARGS) CASE(CL_INVALID_WORK_DIMENSION) CASE(CL_INVALID_WORK_GROUP_SIZE) CASE(CL_INVALID_WORK_ITEM_SIZE) CASE(CL_INVALID_GLOBAL_OFFSET) CASE(CL_INVALID_EVENT_WAIT_LIST) CASE(CL_INVALID_EVENT) CASE(CL_INVALID_OPERATION) CASE(CL_INVALID_GL_OBJECT) CASE(CL_INVALID_BUFFER_SIZE) CASE(CL_INVALID_MIP_LEVEL) CASE(CL_INVALID_GLOBAL_WORK_SIZE) CASE(CL_INVALID_PROPERTY) CASE(CL_INVALID_IMAGE_DESCRIPTOR) CASE(CL_INVALID_COMPILER_OPTIONS) CASE(CL_INVALID_LINKER_OPTIONS) CASE(CL_INVALID_DEVICE_PARTITION_COUNT) } return "Unknown"; } #undef CASE void notifyAPIError(cl_context context, cl_int err, const char* function, string info = "") { // Remove leading underscore from function name if necessary if (!strncmp(function, "_cl", 3)) { function++; } // Build error message ostringstream oss; oss << endl << "Oclgrind - OpenCL runtime error detected" << endl << "\tFunction: " << function << endl << "\tError: " << CLErrorToString(err) << endl; if (!info.empty()) { oss << "\t" << info << endl; } string error = oss.str(); // Output message to stderr if required if (oclgrind::checkEnv("OCLGRIND_CHECK_API")) { cerr << error << endl; } // Fire context callback if set if (context && context->notify) { context->notify(error.c_str(), context->data, 0, NULL); } } void releaseCommand(oclgrind::Command* command) { if (command) { asyncQueueRelease(command); // Release dependent commands while (!command->execBefore.empty()) { oclgrind::Command* cmd = command->execBefore.front(); command->execBefore.pop_front(); releaseCommand(cmd); } delete command; } } } // namespace namespace { // Name of the API function currently being executed thread_local static std::stack g_apiCallStack; class APICallEntry { public: APICallEntry(const char* name) { g_apiCallStack.push(name); } ~APICallEntry() { g_apiCallStack.pop(); } }; #define REGISTER_API APICallEntry apiCallEntry(__func__) } // namespace #define ReturnErrorInfo(context, err, info) \ { \ ostringstream oss; \ oss << info; \ notifyAPIError(context, err, g_apiCallStack.top(), oss.str()); \ return err; \ } #define ReturnErrorArg(context, err, arg) \ ReturnErrorInfo(context, err, "For argument '" #arg "'") #define ReturnError(context, err) ReturnErrorInfo(context, err, "") #define SetErrorInfo(context, err, info) \ if (err != CL_SUCCESS) \ { \ ostringstream oss; \ oss << info; \ notifyAPIError(context, err, g_apiCallStack.top(), oss.str()); \ } \ if (errcode_ret) \ { \ *errcode_ret = err; \ } #define SetErrorArg(context, err, arg) \ SetErrorInfo(context, err, "For argument '" #arg "'") #define SetError(context, err) SetErrorInfo(context, err, "") #define ParamValueSizeTooSmall \ "param_value_size is " << param_value_size << ", but result requires " \ << result_size << " bytes" static struct _cl_platform_id* m_platform = NULL; static struct _cl_device_id* m_device = NULL; CL_API_ENTRY cl_int CL_API_CALL clIcdGetPlatformIDsKHR( cl_uint num_entries, cl_platform_id* platforms, cl_uint* num_platforms) { REGISTER_API; if (platforms && num_entries < 1) { ReturnError(NULL, CL_INVALID_VALUE); } if (!m_platform) { m_platform = new _cl_platform_id; m_platform->dispatch = m_dispatchTable; m_device = new _cl_device_id; m_device->dispatch = m_dispatchTable; m_device->globalMemSize = oclgrind::getEnvInt( "OCLGRIND_GLOBAL_MEM_SIZE", DEFAULT_GLOBAL_MEM_SIZE, false); m_device->constantMemSize = oclgrind::getEnvInt( "OCLGRIND_CONSTANT_MEM_SIZE", DEFAULT_CONSTANT_MEM_SIZE, false); m_device->localMemSize = oclgrind::getEnvInt("OCLGRIND_LOCAL_MEM_SIZE", DEFAULT_LOCAL_MEM_SIZE, false); m_device->maxWGSize = oclgrind::getEnvInt("OCLGRIND_MAX_WGSIZE", DEFAULT_MAX_WGSIZE, false); } if (platforms) { platforms[0] = m_platform; } if (num_platforms) { *num_platforms = 1; } return CL_SUCCESS; } //////////////////////////////////// // OpenCL Runtime API Definitions // //////////////////////////////////// CL_API_ENTRY void* CL_API_CALL clGetExtensionFunctionAddress(const char* funcname) CL_API_SUFFIX__VERSION_1_2 { REGISTER_API; if (strcmp(funcname, "clIcdGetPlatformIDsKHR") == 0) { return (void*)clIcdGetPlatformIDsKHR; } else if (strcmp(funcname, "clGetPlatformInfo") == 0) { return (void*)clGetPlatformInfo; } else { return NULL; } } CL_API_ENTRY cl_int CL_API_CALL clGetPlatformIDs(cl_uint num_entries, cl_platform_id* platforms, cl_uint* num_platforms) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; return clIcdGetPlatformIDsKHR(num_entries, platforms, num_platforms); } CL_API_ENTRY cl_int CL_API_CALL clGetPlatformInfo( cl_platform_id platform, cl_platform_info param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // All possible return types union { cl_ulong clulong; } result_data; size_t result_size = 0; const void* data = NULL; static constexpr char extensions[] = "cl_khr_icd"; static constexpr cl_version numeric_version = CL_MAKE_VERSION(3, 0, 0); static constexpr cl_name_version extension_versions[] = { {CL_MAKE_VERSION(1, 0, 0), "cl_khr_icd"}, }; // Select platform info switch (param_name) { case CL_PLATFORM_PROFILE: data = PLATFORM_PROFILE; result_size = strlen(static_cast(data)) + 1; break; case CL_PLATFORM_VERSION: data = PLATFORM_VERSION; result_size = strlen(static_cast(data)) + 1; break; case CL_PLATFORM_NAME: data = PLATFORM_NAME; result_size = strlen(static_cast(data)) + 1; break; case CL_PLATFORM_VENDOR: data = PLATFORM_VENDOR; result_size = strlen(static_cast(data)) + 1; break; case CL_PLATFORM_EXTENSIONS: data = extensions; result_size = strlen(static_cast(data)) + 1; break; case CL_PLATFORM_ICD_SUFFIX_KHR: data = PLATFORM_SUFFIX; result_size = strlen(static_cast(data)) + 1; break; case CL_PLATFORM_NUMERIC_VERSION: result_size = sizeof(numeric_version); data = &numeric_version; break; case CL_PLATFORM_EXTENSIONS_WITH_VERSION: result_size = sizeof(extension_versions); data = extension_versions; break; case CL_PLATFORM_HOST_TIMER_RESOLUTION: result_size = sizeof(cl_ulong); result_data.clulong = 0; break; default: ReturnErrorArg(NULL, CL_INVALID_VALUE, param_name); } // Compute size of result if (param_value_size_ret) { *param_value_size_ret = result_size; } // Return result if (param_value) { // Check destination is large enough if (param_value_size < result_size) { ReturnErrorInfo(NULL, CL_INVALID_VALUE, ParamValueSizeTooSmall); } else { if (data) memcpy(param_value, data, result_size); else memcpy(param_value, &result_data, result_size); } } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDs( cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, cl_device_id* devices, cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (devices && num_entries < 1) { ReturnError(NULL, CL_INVALID_VALUE); } if (!(device_type & DEVICE_TYPE)) { ReturnError(NULL, CL_DEVICE_NOT_FOUND); } if (devices) { *devices = m_device; } if (num_devices) { *num_devices = 1; } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo( cl_device_id device, cl_device_info param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check device is valid if (device != m_device) { ReturnErrorArg(NULL, CL_INVALID_DEVICE, device); } size_t dummy; size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy; // All possible return types union { cl_uint cluint; size_t sizet; size_t sizet3[3]; cl_ulong clulong; cl_bool clbool; cl_device_id cldeviceid; cl_device_type cldevicetype; cl_device_fp_config devicefpconfig; cl_device_mem_cache_type devicememcachetype; cl_device_local_mem_type devicelocalmemtype; cl_device_exec_capabilities cldevexeccap; cl_command_queue_properties clcmdqprop; cl_platform_id clplatid; cl_version clversion; cl_device_partition_property cldevpartprop; cl_device_affinity_domain cldevaffdom; cl_device_svm_capabilities svm; cl_device_atomic_capabilities atomiccaps; cl_device_device_enqueue_capabilities devenqcaps; } result_data; // The result is data in memory that needs copying const void* data = 0; static constexpr char extensions[] = " cl_khr_spir" " cl_khr_3d_image_writes" " cl_khr_global_int32_base_atomics" " cl_khr_global_int32_extended_atomics" " cl_khr_local_int32_base_atomics" " cl_khr_local_int32_extended_atomics" " cl_khr_int64_base_atomics" " cl_khr_int64_extended_atomics" " cl_khr_byte_addressable_store" " cl_khr_fp64"; static constexpr cl_name_version extension_versions[] = { {CL_MAKE_VERSION(1, 0, 0), "cl_khr_spir"}, {CL_MAKE_VERSION(1, 0, 0), "cl_khr_3d_image_writes"}, {CL_MAKE_VERSION(1, 0, 0), "cl_khr_global_int32_base_atomics"}, {CL_MAKE_VERSION(1, 0, 0), "cl_khr_global_int32_extended_atomics"}, {CL_MAKE_VERSION(1, 0, 0), "cl_khr_local_int32_base_atomics"}, {CL_MAKE_VERSION(1, 0, 0), "cl_khr_local_int32_extended_atomics"}, {CL_MAKE_VERSION(1, 0, 0), "cl_khr_int64_base_atomics"}, {CL_MAKE_VERSION(1, 0, 0), "cl_khr_int64_extended_atomics"}, {CL_MAKE_VERSION(1, 0, 0), "cl_khr_byte_addressable_store"}, {CL_MAKE_VERSION(1, 0, 0), "cl_khr_fp64"}, }; static constexpr cl_name_version opencl_c_all_versions[] = { {CL_MAKE_VERSION(1, 0, 0), "OpenCL C"}, {CL_MAKE_VERSION(1, 1, 0), "OpenCL C"}, {CL_MAKE_VERSION(1, 2, 0), "OpenCL C"}, {CL_MAKE_VERSION(3, 0, 0), "OpenCL C"}, }; // TODO: Populate this // static constexpr cl_name_version il_versions[] = {}; // TODO: Populate this // static constexpr cl_name_version built_in_kernel_versions[] = {}; // TODO: Populate this // static constexpr cl_name_version opencl_c_features[] = {}; switch (param_name) { case CL_DEVICE_TYPE: result_size = sizeof(cl_device_type); result_data.cldevicetype = DEVICE_TYPE; break; case CL_DEVICE_VENDOR_ID: result_size = sizeof(cl_uint); result_data.cluint = DEVICE_VENDOR_ID; break; case CL_DEVICE_MAX_COMPUTE_UNITS: result_size = sizeof(cl_uint); result_data.cluint = oclgrind::getEnvInt("OCLGRIND_COMPUTE_UNITS", 1, false); break; case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: result_size = sizeof(cl_uint); result_data.cluint = 3; break; case CL_DEVICE_MAX_WORK_GROUP_SIZE: result_size = sizeof(size_t); result_data.sizet = m_device->maxWGSize; break; case CL_DEVICE_MAX_WORK_ITEM_SIZES: result_size = 3 * sizeof(size_t); result_data.sizet3[0] = m_device->maxWGSize; result_data.sizet3[1] = m_device->maxWGSize; result_data.sizet3[2] = m_device->maxWGSize; break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR: case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT: case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT: case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG: case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT: case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: result_size = sizeof(cl_uint); result_data.cluint = 1; break; case CL_DEVICE_MAX_CLOCK_FREQUENCY: result_size = sizeof(cl_uint); result_data.cluint = 1; break; case CL_DEVICE_ADDRESS_BITS: result_size = sizeof(cl_uint); result_data.cluint = sizeof(size_t) << 3; break; case CL_DEVICE_MAX_READ_IMAGE_ARGS: result_size = sizeof(cl_uint); result_data.cluint = 128; break; case CL_DEVICE_MAX_WRITE_IMAGE_ARGS: result_size = sizeof(cl_uint); result_data.cluint = 64; break; case CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS: result_size = sizeof(cl_uint); result_data.cluint = 64; break; case CL_DEVICE_MAX_MEM_ALLOC_SIZE: result_size = sizeof(cl_ulong); result_data.clulong = m_device->globalMemSize; break; case CL_DEVICE_IMAGE2D_MAX_WIDTH: case CL_DEVICE_IMAGE2D_MAX_HEIGHT: result_size = sizeof(size_t); result_data.sizet = 8192; break; case CL_DEVICE_IMAGE3D_MAX_WIDTH: case CL_DEVICE_IMAGE3D_MAX_DEPTH: case CL_DEVICE_IMAGE3D_MAX_HEIGHT: result_size = sizeof(size_t); result_data.sizet = 2048; break; case CL_DEVICE_IMAGE_SUPPORT: result_size = sizeof(cl_bool); result_data.clbool = CL_TRUE; break; case CL_DEVICE_MAX_PARAMETER_SIZE: result_size = sizeof(size_t); result_data.sizet = 1024; break; case CL_DEVICE_MAX_SAMPLERS: result_size = sizeof(cl_uint); result_data.cluint = 16; break; case CL_DEVICE_IMAGE_PITCH_ALIGNMENT: result_size = sizeof(cl_uint); result_data.cluint = 0; break; case CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT: result_size = sizeof(cl_uint); result_data.cluint = 0; break; case CL_DEVICE_MAX_PIPE_ARGS: result_size = sizeof(cl_uint); result_data.cluint = 0; break; case CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS: result_size = sizeof(cl_uint); result_data.cluint = 0; break; case CL_DEVICE_PIPE_MAX_PACKET_SIZE: result_size = sizeof(cl_uint); result_data.cluint = 0; break; case CL_DEVICE_MEM_BASE_ADDR_ALIGN: result_size = sizeof(cl_uint); result_data.cluint = sizeof(cl_long16) << 3; break; case CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE: result_size = sizeof(cl_uint); result_data.cluint = 1; break; case CL_DEVICE_SINGLE_FP_CONFIG: result_size = sizeof(cl_device_fp_config); result_data.devicefpconfig = CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN | CL_FP_DENORM; break; case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: result_size = sizeof(cl_device_mem_cache_type); result_data.devicememcachetype = CL_NONE; break; case CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE: result_size = sizeof(cl_uint); result_data.cluint = 0; break; case CL_DEVICE_GLOBAL_MEM_CACHE_SIZE: result_size = sizeof(cl_ulong); result_data.clulong = 0; break; case CL_DEVICE_GLOBAL_MEM_SIZE: result_size = sizeof(cl_ulong); result_data.clulong = device->globalMemSize; break; case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: result_size = sizeof(cl_ulong); result_data.clulong = device->constantMemSize; break; case CL_DEVICE_MAX_CONSTANT_ARGS: result_size = sizeof(cl_uint); result_data.cluint = 1024; break; case CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE: result_size = sizeof(size_t); result_data.sizet = 64 * 1024; break; case CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: result_size = sizeof(size_t); result_data.sizet = device->globalMemSize; break; case CL_DEVICE_LOCAL_MEM_TYPE: result_size = sizeof(cl_device_local_mem_type); result_data.devicelocalmemtype = CL_LOCAL; break; case CL_DEVICE_LOCAL_MEM_SIZE: result_size = sizeof(cl_ulong); result_data.clulong = device->localMemSize; break; case CL_DEVICE_ERROR_CORRECTION_SUPPORT: result_size = sizeof(cl_bool); result_data.clbool = CL_FALSE; break; case CL_DEVICE_PROFILING_TIMER_RESOLUTION: result_size = sizeof(size_t); result_data.sizet = 1000; break; case CL_DEVICE_ENDIAN_LITTLE: result_size = sizeof(cl_bool); #if IS_BIG_ENDIAN result_data.clbool = CL_FALSE; #else result_data.clbool = CL_TRUE; #endif break; case CL_DEVICE_AVAILABLE: result_size = sizeof(cl_bool); result_data.clbool = CL_TRUE; break; case CL_DEVICE_COMPILER_AVAILABLE: result_size = sizeof(cl_bool); result_data.clbool = CL_TRUE; break; case CL_DEVICE_EXECUTION_CAPABILITIES: result_size = sizeof(cl_device_exec_capabilities); result_data.cldevexeccap = CL_EXEC_KERNEL | CL_EXEC_NATIVE_KERNEL; break; case CL_DEVICE_QUEUE_ON_HOST_PROPERTIES: result_size = sizeof(cl_command_queue_properties); result_data.clcmdqprop = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE; break; case CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES: result_size = sizeof(cl_command_queue_properties); result_data.clcmdqprop = 0; break; case CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE: result_size = sizeof(cl_uint); result_data.cluint = 0; break; case CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE: result_size = sizeof(cl_uint); result_data.cluint = 0; break; case CL_DEVICE_MAX_ON_DEVICE_QUEUES: result_size = sizeof(cl_uint); result_data.cluint = 0; break; case CL_DEVICE_MAX_ON_DEVICE_EVENTS: result_size = sizeof(cl_uint); result_data.cluint = 0; break; case CL_DEVICE_NAME: result_size = sizeof(DEVICE_NAME); data = DEVICE_NAME; break; case CL_DEVICE_VENDOR: result_size = sizeof(DEVICE_VENDOR); data = DEVICE_VENDOR; break; case CL_DRIVER_VERSION: result_size = sizeof(DRIVER_VERSION); data = DRIVER_VERSION; break; case CL_DEVICE_PROFILE: result_size = sizeof(DEVICE_PROFILE); data = DEVICE_PROFILE; break; case CL_DEVICE_VERSION: result_size = sizeof(DEVICE_VERSION); data = DEVICE_VERSION; break; case CL_DEVICE_EXTENSIONS: result_size = sizeof(extensions); data = extensions; break; case CL_DEVICE_PLATFORM: result_size = sizeof(cl_platform_id); result_data.clplatid = m_platform; break; case CL_DEVICE_DOUBLE_FP_CONFIG: result_size = sizeof(cl_device_fp_config); result_data.devicefpconfig = CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM; break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF: result_size = sizeof(cl_uint); result_data.cluint = 0; break; case CL_DEVICE_HOST_UNIFIED_MEMORY: result_size = sizeof(cl_bool); result_data.clbool = CL_FALSE; break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR: case CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT: case CL_DEVICE_NATIVE_VECTOR_WIDTH_INT: case CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG: case CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT: case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: result_size = sizeof(cl_uint); result_data.cluint = 1; break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF: result_size = sizeof(cl_uint); result_data.cluint = 0; break; case CL_DEVICE_OPENCL_C_VERSION: result_size = sizeof(DEVICE_LANG_VERSION); data = DEVICE_LANG_VERSION; break; case CL_DEVICE_LINKER_AVAILABLE: result_size = sizeof(cl_bool); result_data.clbool = CL_TRUE; break; case CL_DEVICE_BUILT_IN_KERNELS: result_size = 1; data = ""; break; case CL_DEVICE_IMAGE_MAX_BUFFER_SIZE: result_size = sizeof(size_t); result_data.sizet = 65536; break; case CL_DEVICE_IMAGE_MAX_ARRAY_SIZE: result_size = sizeof(size_t); result_data.sizet = 2048; break; case CL_DEVICE_PARENT_DEVICE: result_size = sizeof(cl_device_id); result_data.cldeviceid = NULL; break; case CL_DEVICE_PARTITION_MAX_SUB_DEVICES: result_size = sizeof(cl_uint); result_data.cluint = 0; break; case CL_DEVICE_PARTITION_PROPERTIES: case CL_DEVICE_PARTITION_TYPE: result_size = sizeof(cl_device_partition_property); result_data.cldevpartprop = 0; break; case CL_DEVICE_PARTITION_AFFINITY_DOMAIN: result_size = sizeof(cl_device_affinity_domain); result_data.cldevaffdom = 0; break; case CL_DEVICE_REFERENCE_COUNT: result_size = sizeof(cl_uint); result_data.cluint = 1; break; case CL_DEVICE_PREFERRED_INTEROP_USER_SYNC: result_size = sizeof(cl_bool); result_data.clbool = CL_TRUE; break; case CL_DEVICE_PRINTF_BUFFER_SIZE: result_size = sizeof(size_t); result_data.sizet = 1024; break; case CL_DEVICE_SVM_CAPABILITIES: result_size = sizeof(cl_device_svm_capabilities); result_data.svm = 0; break; case CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT: result_size = sizeof(cl_uint); result_data.cluint = 0; break; case CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT: result_size = sizeof(cl_uint); result_data.cluint = 0; break; case CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT: result_size = sizeof(cl_uint); result_data.cluint = 0; break; case CL_DEVICE_MAX_NUM_SUB_GROUPS: result_size = sizeof(cl_uint); result_data.cluint = 0; break; case CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: result_size = sizeof(cl_bool); result_data.clbool = CL_FALSE; break; case CL_DEVICE_SPIR_VERSIONS: result_size = sizeof(DEVICE_SPIR_VERSIONS); data = DEVICE_SPIR_VERSIONS; break; case CL_DEVICE_NUMERIC_VERSION: result_size = sizeof(cl_version); result_data.cluint = CL_MAKE_VERSION(3, 0, 0); break; case CL_DEVICE_EXTENSIONS_WITH_VERSION: result_size = sizeof(extension_versions); data = extension_versions; break; case CL_DEVICE_IL_VERSION: result_size = 1; data = ""; break; case CL_DEVICE_ILS_WITH_VERSION: // TODO: Enable when supported. // result_size = sizeof(il_versions); // data = il_versions; result_size = 0; break; case CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION: // TODO: Enable when supported. // result_size = sizeof(built_in_kernel_versions); // data = built_in_kernel_versions; result_size = 0; break; case CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES: result_size = sizeof(cl_device_atomic_capabilities); result_data.atomiccaps = CL_DEVICE_ATOMIC_ORDER_RELAXED | CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP; break; case CL_DEVICE_ATOMIC_FENCE_CAPABILITIES: result_size = sizeof(cl_device_atomic_capabilities); result_data.atomiccaps = CL_DEVICE_ATOMIC_ORDER_RELAXED | CL_DEVICE_ATOMIC_ORDER_ACQ_REL | CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP; break; case CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT: result_size = sizeof(cl_bool); result_data.clbool = CL_TRUE; break; case CL_DEVICE_OPENCL_C_ALL_VERSIONS: result_size = sizeof(opencl_c_all_versions); data = opencl_c_all_versions; break; case CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: result_size = sizeof(size_t); result_data.sizet = 1; break; case CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT: result_size = sizeof(cl_bool); result_data.clbool = CL_FALSE; break; case CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT: result_size = sizeof(cl_bool); result_data.clbool = CL_FALSE; break; case CL_DEVICE_OPENCL_C_FEATURES: // TODO: Enable when supported. // result_size = sizeof(opencl_c_features); // data = opencl_c_features; result_size = 0; break; case CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES: result_size = sizeof(cl_device_device_enqueue_capabilities); result_data.devenqcaps = 0; break; case CL_DEVICE_PIPE_SUPPORT: result_size = sizeof(cl_bool); result_data.clbool = CL_FALSE; break; case CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED: result_size = sizeof(DEVICE_CTS_VERSION); data = DEVICE_CTS_VERSION; break; default: ReturnErrorArg(NULL, CL_INVALID_VALUE, param_name); } if (param_value) { // Check destination is large enough if (param_value_size < result_size) { ReturnErrorInfo(NULL, CL_INVALID_VALUE, ParamValueSizeTooSmall); } else { if (data) memcpy(param_value, data, result_size); else memcpy(param_value, &result_data, result_size); } } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clCreateSubDevices( cl_device_id in_device, const cl_device_partition_property* properties, cl_uint num_entries, cl_device_id* out_devices, cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_2 { REGISTER_API; ReturnErrorInfo(NULL, CL_INVALID_VALUE, "Not yet implemented"); } CL_API_ENTRY cl_int CL_API_CALL clRetainDevice(cl_device_id device) CL_API_SUFFIX__VERSION_1_2 { REGISTER_API; return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clReleaseDevice(cl_device_id device) CL_API_SUFFIX__VERSION_1_2 { REGISTER_API; return CL_SUCCESS; } CL_API_ENTRY cl_context CL_API_CALL clCreateContext( const cl_context_properties* properties, cl_uint num_devices, const cl_device_id* devices, void(CL_CALLBACK* pfn_notify)(const char*, const void*, size_t, void*), void* user_data, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (num_devices != 1) { SetErrorArg(NULL, CL_INVALID_VALUE, num_devices); return NULL; } if (!devices) { SetErrorArg(NULL, CL_INVALID_VALUE, devices); return NULL; } if (devices[0] != m_device) { SetError(NULL, CL_INVALID_DEVICE); return NULL; } if (!pfn_notify && user_data) { SetErrorInfo(NULL, CL_INVALID_VALUE, "pfn_notify NULL but user_data non-NULL"); return NULL; } // Create context object cl_context context = new _cl_context; context->dispatch = m_dispatchTable; context->context = new oclgrind::Context(); context->notify = pfn_notify; context->data = user_data; context->properties = NULL; context->szProperties = 0; context->refCount = 1; if (properties) { int num = 1; while (properties[num]) { num++; } size_t sz = (num + 1) * sizeof(cl_context_properties); context->szProperties = sz; context->properties = (cl_context_properties*)malloc(sz); memcpy(context->properties, properties, sz); } SetError(NULL, CL_SUCCESS); return context; } CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType( const cl_context_properties* properties, cl_device_type device_type, void(CL_CALLBACK* pfn_notify)(const char*, const void*, size_t, void*), void* user_data, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (!pfn_notify && user_data) { SetErrorInfo(NULL, CL_INVALID_VALUE, "pfn_notify NULL but user_data non-NULL"); return NULL; } if (!(device_type & DEVICE_TYPE)) { SetErrorArg(NULL, CL_DEVICE_NOT_FOUND, device_type); return NULL; } // Create context object cl_context context = new _cl_context; context->dispatch = m_dispatchTable; context->context = new oclgrind::Context(); context->notify = pfn_notify; context->data = user_data; context->properties = NULL; context->szProperties = 0; context->refCount = 1; if (properties) { int num = 0; while (properties[num]) { num++; } size_t sz = (num + 1) * sizeof(cl_context_properties); context->szProperties = sz; context->properties = (cl_context_properties*)malloc(sz); memcpy(context->properties, properties, sz); } SetError(NULL, CL_SUCCESS); return context; } CL_API_ENTRY cl_int CL_API_CALL clRetainContext(cl_context context) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; if (!context) { ReturnErrorArg(NULL, CL_INVALID_CONTEXT, context); } context->refCount++; return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clReleaseContext(cl_context context) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; if (!context) { ReturnErrorArg(NULL, CL_INVALID_CONTEXT, context); } if (--context->refCount == 0) { if (context->properties) { free(context->properties); } while (!context->callbacks.empty()) { pair callback = context->callbacks.top(); callback.first(context, callback.second); context->callbacks.pop(); } delete context->context; delete context; } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetContextInfo( cl_context context, cl_context_info param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check context is valid if (!context) { ReturnErrorArg(NULL, CL_INVALID_CONTEXT, context); } size_t dummy = 0; size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy; union { cl_uint cluint; cl_device_id cldevid; } result_data; cl_context_properties* properties = NULL; switch (param_name) { case CL_CONTEXT_REFERENCE_COUNT: result_size = sizeof(cl_uint); result_data.cluint = context->refCount; break; case CL_CONTEXT_NUM_DEVICES: result_size = sizeof(cl_uint); result_data.cluint = 1; break; case CL_CONTEXT_DEVICES: result_size = sizeof(cl_device_id); result_data.cldevid = m_device; break; case CL_CONTEXT_PROPERTIES: result_size = context->szProperties; properties = context->properties; break; default: ReturnErrorArg(context, CL_INVALID_VALUE, param_name); } if (param_value) { // Check destination is large enough if (param_value_size < result_size) { ReturnErrorInfo(context, CL_INVALID_VALUE, ParamValueSizeTooSmall); } else { if (properties) memcpy(param_value, properties, result_size); else memcpy(param_value, &result_data, result_size); } } return CL_SUCCESS; } CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueue(cl_context context, cl_device_id device, cl_command_queue_properties properties, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (!context) { SetErrorArg(NULL, CL_INVALID_CONTEXT, context); return NULL; } if (device != m_device) { SetErrorArg(context, CL_INVALID_DEVICE, device); return NULL; } // Create command-queue object bool out_of_order = properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; cl_command_queue queue; queue = new _cl_command_queue; queue->queue = new oclgrind::Queue(context->context, out_of_order); queue->dispatch = m_dispatchTable; queue->properties = properties; queue->context = context; queue->refCount = 1; clRetainContext(context); SetError(context, CL_SUCCESS); return queue; } CL_API_ENTRY cl_int CL_API_CALL clSetCommandQueueProperty( cl_command_queue command_queue, cl_command_queue_properties properties, cl_bool enable, cl_command_queue_properties* old_properties) { REGISTER_API; return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clRetainCommandQueue(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } command_queue->refCount++; return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clReleaseCommandQueue(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (--command_queue->refCount == 0) { // TODO: Retain/release queue from async thread // TODO: Spec states that this function performs an implicit flush, // so maybe we are OK to delete queue here? clFinish(command_queue); delete command_queue->queue; clReleaseContext(command_queue->context); delete command_queue; } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetCommandQueueInfo( cl_command_queue command_queue, cl_command_queue_info param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check queue is valid if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } size_t dummy = 0; size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy; union { cl_uint cluint; cl_context context; cl_device_id cldevid; cl_command_queue_properties properties; cl_command_queue queue; } result_data; const void* data = nullptr; switch (param_name) { case CL_QUEUE_CONTEXT: result_size = sizeof(cl_context); result_data.context = command_queue->context; break; case CL_QUEUE_DEVICE: result_size = sizeof(cl_device_id); result_data.cldevid = m_device; break; case CL_QUEUE_REFERENCE_COUNT: result_size = sizeof(cl_uint); result_data.cluint = command_queue->refCount; break; case CL_QUEUE_PROPERTIES: result_size = sizeof(cl_command_queue_properties); result_data.properties = command_queue->properties; break; case CL_QUEUE_PROPERTIES_ARRAY: result_size = command_queue->properties_array.size() * sizeof(cl_queue_properties); data = command_queue->properties_array.data(); break; case CL_QUEUE_SIZE: ReturnErrorArg(command_queue->context, CL_INVALID_COMMAND_QUEUE, param_name); case CL_QUEUE_DEVICE_DEFAULT: result_size = sizeof(cl_command_queue); result_data.queue = nullptr; break; default: ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, param_name); } if (param_value) { // Check destination is large enough if (param_value_size < result_size) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, ParamValueSizeTooSmall); } else { if (data) memcpy(param_value, data, result_size); else memcpy(param_value, &result_data, result_size); } } return CL_SUCCESS; } namespace { cl_mem createBuffer(cl_context context, cl_mem_flags flags, size_t size, void* host_ptr, cl_int* errcode_ret) { // Check parameters if (!context) { SetErrorArg(NULL, CL_INVALID_CONTEXT, context); return NULL; } if (size == 0) { SetErrorArg(context, CL_INVALID_BUFFER_SIZE, size); return NULL; } if ((host_ptr == NULL) == ((flags & CL_MEM_COPY_HOST_PTR) || flags & CL_MEM_USE_HOST_PTR)) { SetErrorInfo(context, CL_INVALID_HOST_PTR, "host_ptr NULL but CL_MEM_{COPY,USE}_HOST_PTR used"); return NULL; } if ((flags & CL_MEM_USE_HOST_PTR) && (flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR))) { SetErrorInfo(context, CL_INVALID_VALUE, "CL_MEM_USE_HOST_PTR cannot be used with " "CL_MEM_{COPY,ALLOC}_HOST_PTR"); return NULL; } // Create memory object oclgrind::Memory* globalMemory = context->context->getGlobalMemory(); cl_mem mem = new _cl_mem; mem->dispatch = m_dispatchTable; mem->context = context; mem->parent = NULL; mem->size = size; mem->offset = 0; mem->flags = flags; mem->isImage = false; mem->refCount = 1; if (flags & CL_MEM_USE_HOST_PTR) { mem->address = globalMemory->createHostBuffer(size, host_ptr, flags); mem->hostPtr = host_ptr; } else { mem->address = globalMemory->allocateBuffer(size, flags); mem->hostPtr = NULL; } if (!mem->address) { SetError(context, CL_MEM_OBJECT_ALLOCATION_FAILURE); delete mem; return NULL; } clRetainContext(context); if (flags & CL_MEM_COPY_HOST_PTR) { context->context->getGlobalMemory()->store((const unsigned char*)host_ptr, mem->address, size); } SetError(context, CL_SUCCESS); return mem; } } // namespace CL_API_ENTRY cl_mem CL_API_CALL clCreateBuffer(cl_context context, cl_mem_flags flags, size_t size, void* host_ptr, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; return createBuffer(context, flags, size, host_ptr, errcode_ret); } CL_API_ENTRY cl_mem CL_API_CALL clCreateBufferWithProperties( cl_context context, const cl_mem_properties* properties, cl_mem_flags flags, size_t size, void* host_ptr, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_3_0 { REGISTER_API; // Check properties (none are supported) if (properties && properties[0] != 0) { SetErrorInfo(context, CL_INVALID_PROPERTY, "Unsupported property"); } cl_mem buffer = createBuffer(context, flags, size, host_ptr, errcode_ret); if (buffer && properties) { buffer->properties.assign(properties, properties + 1); } return buffer; } CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBuffer( cl_mem buffer, cl_mem_flags flags, cl_buffer_create_type buffer_create_type, const void* buffer_create_info, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_1 { REGISTER_API; // Check parameters if (!buffer) { SetErrorArg(NULL, CL_INVALID_MEM_OBJECT, buffer); return NULL; } if (buffer->parent) { SetErrorInfo(buffer->context, CL_INVALID_MEM_OBJECT, "Parent buffer cannot be a sub-buffer"); return NULL; } if (buffer_create_type != CL_BUFFER_CREATE_TYPE_REGION) { SetErrorArg(buffer->context, CL_INVALID_VALUE, buffer_create_type); return NULL; } if (!buffer_create_info) { SetErrorArg(buffer->context, CL_INVALID_VALUE, buffer_create_info); return NULL; } _cl_buffer_region region = *(_cl_buffer_region*)buffer_create_info; if (region.origin + region.size > buffer->size) { SetErrorInfo(buffer->context, CL_INVALID_VALUE, "Region doesn't fit inside parent buffer"); return NULL; } if (region.size == 0) { SetErrorInfo(buffer->context, CL_INVALID_VALUE, "Region size cannot be 0"); return NULL; } // Inherit flags from parent where appropriate cl_mem_flags memFlags = 0; cl_mem_flags rwFlags = CL_MEM_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY; cl_mem_flags hostAccess = CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY; cl_mem_flags hostPtr = CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR; if ((flags & rwFlags) == 0) { memFlags |= buffer->flags & rwFlags; } else { memFlags |= flags & rwFlags; } if ((flags & hostAccess) == 0) { memFlags |= buffer->flags & hostAccess; } else { memFlags |= flags & hostAccess; } memFlags |= buffer->flags & hostPtr; // Create memory object cl_mem mem = new _cl_mem; mem->dispatch = m_dispatchTable; mem->context = buffer->context; mem->parent = buffer; mem->size = region.size; mem->offset = region.origin; mem->isImage = false; mem->flags = memFlags; mem->hostPtr = (unsigned char*)buffer->hostPtr + region.origin; mem->refCount = 1; mem->address = buffer->address + region.origin; clRetainMemObject(buffer); SetError(buffer->context, CL_SUCCESS); return mem; } namespace { // Utility function for getting number of dimensions in image size_t getNumDimensions(cl_mem_object_type type) { switch (type) { case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE1D_ARRAY: case CL_MEM_OBJECT_IMAGE1D_BUFFER: return 1; case CL_MEM_OBJECT_IMAGE2D: case CL_MEM_OBJECT_IMAGE2D_ARRAY: return 2; case CL_MEM_OBJECT_IMAGE3D: return 3; default: return 0; } } // Utility function for getting number of channels in an image size_t getNumChannels(const cl_image_format* format) { switch (format->image_channel_order) { case CL_R: case CL_Rx: case CL_A: case CL_INTENSITY: case CL_LUMINANCE: return 1; case CL_RG: case CL_RGx: case CL_RA: return 2; case CL_RGB: case CL_RGBx: return 3; case CL_RGBA: case CL_ARGB: case CL_BGRA: return 4; default: return 0; } } // Utility function for computing an image format's pixel size (in bytes) size_t getPixelSize(const cl_image_format* format) { // Get number of channels size_t numChannels = getNumChannels(format); // Get size of each pixel (in bytes) switch (format->image_channel_data_type) { case CL_SNORM_INT8: case CL_UNORM_INT8: case CL_SIGNED_INT8: case CL_UNSIGNED_INT8: return numChannels; case CL_SNORM_INT16: case CL_UNORM_INT16: case CL_SIGNED_INT16: case CL_UNSIGNED_INT16: case CL_HALF_FLOAT: return 2 * numChannels; case CL_SIGNED_INT32: case CL_UNSIGNED_INT32: case CL_FLOAT: return 4 * numChannels; case CL_UNORM_SHORT_565: case CL_UNORM_SHORT_555: return 2; case CL_UNORM_INT_101010: return 4; default: return 0; } } bool isImageArray(cl_mem_object_type type) { if (type == CL_MEM_OBJECT_IMAGE1D_ARRAY || type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { return true; } return false; } cl_mem createImage(cl_context context, cl_mem_flags flags, const cl_image_format* image_format, const cl_image_desc* image_desc, void* host_ptr, cl_int* errcode_ret) { // Check parameters if (!context) { SetErrorArg(NULL, CL_INVALID_CONTEXT, context); return NULL; } if (!image_format) { SetErrorArg(context, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, image_format); return NULL; } if (!image_desc) { SetErrorArg(context, CL_INVALID_IMAGE_DESCRIPTOR, image_desc); return NULL; } // Get size of each pixel (in bytes) size_t pixelSize = getPixelSize(image_format); if (!pixelSize) { SetErrorArg(context, CL_INVALID_VALUE, image_format); return NULL; } // Get image dimensions size_t dims = getNumDimensions(image_desc->image_type); size_t width = image_desc->image_width; size_t height = 1, depth = 1; size_t arraySize = 1; if (dims > 1) { height = image_desc->image_height; } if (dims > 2) { depth = image_desc->image_depth; } if (isImageArray(image_desc->image_type)) { arraySize = image_desc->image_array_size; } // Calculate total size of image size_t size = width * height * depth * arraySize * pixelSize; cl_mem mem; if (image_desc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { // Use existing buffer if (!image_desc->buffer) { SetErrorInfo(context, CL_INVALID_VALUE, "image_desc->buffer cannot be NULL " "when using CL_MEM_OBJECT_IMAGE1D_BUFFER"); return NULL; } mem = image_desc->buffer; clRetainMemObject(image_desc->buffer); } else if (image_desc->image_type == CL_MEM_OBJECT_IMAGE2D && image_desc->mem_object) { SetErrorInfo(context, CL_INVALID_OPERATION, "Creating 2D images from buffers is not supported"); return nullptr; } else { // Create buffer // TODO: Use pitches mem = createBuffer(context, flags, size, host_ptr, errcode_ret); if (!mem) { return NULL; } } // Create image object wrapper cl_image* image = new cl_image; *(cl_mem)image = *mem; image->isImage = true; image->format = *image_format; image->desc = *image_desc; image->desc.image_width = width; image->desc.image_height = height; image->desc.image_depth = depth; image->desc.image_array_size = arraySize; image->refCount = 1; if (image_desc->image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER) { delete mem; } SetError(context, CL_SUCCESS); return image; } } // namespace CL_API_ENTRY cl_mem CL_API_CALL clCreateImage( cl_context context, cl_mem_flags flags, const cl_image_format* image_format, const cl_image_desc* image_desc, void* host_ptr, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2 { REGISTER_API; return createImage(context, flags, image_format, image_desc, host_ptr, errcode_ret); } CL_API_ENTRY cl_mem CL_API_CALL clCreateImageWithProperties( cl_context context, const cl_mem_properties* properties, cl_mem_flags flags, const cl_image_format* image_format, const cl_image_desc* image_desc, void* host_ptr, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_3_0 { REGISTER_API; // Check properties (none are supported) if (properties && properties[0] != 0) { SetErrorInfo(context, CL_INVALID_PROPERTY, "Unsupported property"); } cl_mem image = createImage(context, flags, image_format, image_desc, host_ptr, errcode_ret); if (image && properties) { image->properties.assign(properties, properties + 1); } return image; } CL_API_ENTRY cl_mem CL_API_CALL clCreateImage2D( cl_context context, cl_mem_flags flags, const cl_image_format* image_format, size_t image_width, size_t image_height, size_t image_row_pitch, void* host_ptr, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; cl_image_desc desc = {CL_MEM_OBJECT_IMAGE2D, image_width, image_height, 1, 1, image_row_pitch, 0, 0, 0, {NULL}}; return createImage(context, flags, image_format, &desc, host_ptr, errcode_ret); } CL_API_ENTRY cl_mem CL_API_CALL clCreateImage3D( cl_context context, cl_mem_flags flags, const cl_image_format* image_format, size_t image_width, size_t image_height, size_t image_depth, size_t image_row_pitch, size_t image_slice_pitch, void* host_ptr, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; cl_image_desc desc = {CL_MEM_OBJECT_IMAGE3D, image_width, image_height, image_depth, 1, image_row_pitch, image_slice_pitch, 0, 0, {NULL}}; return createImage(context, flags, image_format, &desc, host_ptr, errcode_ret); } CL_API_ENTRY cl_int CL_API_CALL clRetainMemObject(cl_mem memobj) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; if (!memobj) { ReturnErrorArg(NULL, CL_INVALID_MEM_OBJECT, memobj); } memobj->refCount++; return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObject(cl_mem memobj) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; if (!memobj) { ReturnErrorArg(NULL, CL_INVALID_MEM_OBJECT, memobj); } if (--memobj->refCount == 0) { if (memobj->isImage && ((cl_image*)memobj)->desc.image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { clReleaseMemObject(((cl_image*)memobj)->desc.buffer); } else { if (memobj->parent) { clReleaseMemObject(memobj->parent); } else { memobj->context->context->getGlobalMemory()->deallocateBuffer( memobj->address); clReleaseContext(memobj->context); } while (!memobj->callbacks.empty()) { pair callback = memobj->callbacks.top(); callback.first(memobj, callback.second); memobj->callbacks.pop(); } } delete memobj; } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetSupportedImageFormats( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, cl_uint num_entries, cl_image_format* image_formats, cl_uint* num_image_formats) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (!context) { ReturnErrorArg(NULL, CL_INVALID_CONTEXT, context); } if (num_entries == 0 && image_formats) { ReturnErrorInfo(context, CL_INVALID_VALUE, "num_entries should be >0 if image_formats non-NULL"); } // TODO: Add support for packed image types // Channel orders const cl_channel_order ordersAll[] = { CL_R, CL_Rx, CL_A, CL_RG, CL_RGx, CL_RA, CL_RGBA, }; const cl_channel_order ordersNormalized[] = {CL_INTENSITY, CL_LUMINANCE}; const cl_channel_order ordersByte[] = {CL_ARGB, CL_BGRA}; const cl_channel_order ordersPacked[] = {CL_RGB, CL_RGBx}; const cl_channel_order* orders[] = { ordersAll, ordersNormalized, ordersByte //, ordersPacked }; const size_t numOrders[] = { sizeof(ordersAll) / sizeof(cl_channel_order), sizeof(ordersNormalized) / sizeof(cl_channel_order), sizeof(ordersByte) / sizeof(cl_channel_order), sizeof(ordersPacked) / sizeof(cl_channel_order), }; // Channel types const cl_channel_type typesAll[] = { CL_SNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT8, CL_UNORM_INT16, CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_FLOAT, CL_HALF_FLOAT, }; const cl_channel_type typesNormalized[] = { CL_SNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT8, CL_UNORM_INT16, CL_FLOAT, CL_HALF_FLOAT, }; const cl_channel_type typesByte[] = { CL_SNORM_INT8, CL_UNORM_INT8, CL_SIGNED_INT8, CL_UNSIGNED_INT8, }; const cl_channel_type typesPacked[] = {CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010}; const cl_channel_type* types[] = { typesAll, typesNormalized, typesByte //, typesPacked, }; const size_t numTypes[] = { sizeof(typesAll) / sizeof(cl_channel_order), sizeof(typesNormalized) / sizeof(cl_channel_order), sizeof(typesByte) / sizeof(cl_channel_order), sizeof(typesPacked) / sizeof(cl_channel_order), }; // Calculate total number of formats size_t numCatagories = sizeof(orders) / sizeof(cl_channel_order*); size_t numFormats = 0; for (size_t c = 0; c < numCatagories; c++) { numFormats += numOrders[c] * numTypes[c]; } if (num_image_formats) { *num_image_formats = numFormats; } // Generate list of all valid order/type combinations if (image_formats) { unsigned i = 0; for (size_t c = 0; c < numCatagories; c++) { for (size_t o = 0; o < numOrders[c]; o++) { for (size_t t = 0; t < numTypes[c]; t++) { if (i >= num_entries) { return CL_SUCCESS; } cl_image_format format = {orders[c][o], types[c][t]}; image_formats[i++] = format; } } } } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetMemObjectInfo( cl_mem memobj, cl_mem_info param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check mem object is valid if (!memobj) { ReturnErrorArg(NULL, CL_INVALID_MEM_OBJECT, memobj); } size_t dummy = 0; size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy; union { cl_mem_object_type clmemobjty; cl_mem_flags clmemflags; cl_context context; cl_mem clmem; size_t sizet; cl_uint cluint; cl_bool clbool; void* ptr; } result_data; const void* data = nullptr; switch (param_name) { case CL_MEM_TYPE: result_size = sizeof(cl_mem_object_type); result_data.clmemobjty = memobj->isImage ? ((cl_image*)memobj)->desc.image_type : CL_MEM_OBJECT_BUFFER; break; case CL_MEM_FLAGS: result_size = sizeof(cl_mem_flags); result_data.clmemflags = memobj->flags; break; case CL_MEM_SIZE: result_size = sizeof(size_t); result_data.sizet = memobj->size; break; case CL_MEM_HOST_PTR: result_size = sizeof(void*); result_data.ptr = memobj->hostPtr; break; case CL_MEM_MAP_COUNT: result_size = sizeof(cl_uint); result_data.cluint = 0; break; case CL_MEM_REFERENCE_COUNT: result_size = sizeof(cl_uint); result_data.cluint = memobj->refCount; break; case CL_MEM_CONTEXT: result_size = sizeof(cl_context); result_data.context = memobj->context; break; case CL_MEM_ASSOCIATED_MEMOBJECT: result_size = sizeof(cl_mem); if (memobj->isImage) { result_data.clmem = static_cast(memobj)->desc.mem_object; } else { result_data.clmem = memobj->parent; } break; case CL_MEM_OFFSET: result_size = sizeof(size_t); result_data.sizet = memobj->offset; break; case CL_MEM_USES_SVM_POINTER: result_size = sizeof(cl_bool); result_data.clbool = CL_FALSE; break; case CL_MEM_PROPERTIES: result_size = memobj->properties.size() * sizeof(cl_mem_properties); data = memobj->properties.data(); break; default: ReturnErrorArg(memobj->context, CL_INVALID_VALUE, param_name); } if (param_value) { // Check destination is large enough if (param_value_size < result_size) { ReturnErrorInfo(memobj->context, CL_INVALID_VALUE, ParamValueSizeTooSmall); } else { if (data) memcpy(param_value, data, result_size); else memcpy(param_value, &result_data, result_size); } } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetImageInfo( cl_mem image, cl_image_info param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check mem object is valid if (!image) { ReturnErrorArg(NULL, CL_INVALID_MEM_OBJECT, image); } cl_image* img = (cl_image*)image; size_t dummy = 0; size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy; union { cl_image_format climgfmt; size_t sizet; cl_mem clmem; cl_uint cluint; } result_data; switch (param_name) { case CL_IMAGE_FORMAT: result_size = sizeof(cl_image_format); result_data.climgfmt = img->format; break; case CL_IMAGE_ELEMENT_SIZE: result_size = sizeof(size_t); result_data.sizet = getPixelSize(&img->format); break; case CL_IMAGE_ROW_PITCH: result_size = sizeof(size_t); result_data.sizet = img->desc.image_row_pitch; break; case CL_IMAGE_SLICE_PITCH: result_size = sizeof(size_t); result_data.sizet = img->desc.image_slice_pitch; break; case CL_IMAGE_WIDTH: result_size = sizeof(size_t); result_data.sizet = img->desc.image_width; break; case CL_IMAGE_HEIGHT: result_size = sizeof(size_t); result_data.sizet = getNumDimensions(img->desc.image_type) > 1 ? img->desc.image_height : 0; break; case CL_IMAGE_DEPTH: result_size = sizeof(size_t); result_data.sizet = getNumDimensions(img->desc.image_type) > 2 ? img->desc.image_depth : 0; break; case CL_IMAGE_ARRAY_SIZE: result_size = sizeof(size_t); result_data.sizet = isImageArray(img->desc.image_type) ? img->desc.image_array_size : 0; break; case CL_IMAGE_BUFFER: result_size = sizeof(cl_mem); result_data.clmem = img->desc.buffer; break; case CL_IMAGE_NUM_MIP_LEVELS: result_size = sizeof(cl_uint); result_data.cluint = 0; break; case CL_IMAGE_NUM_SAMPLES: result_size = sizeof(cl_uint); result_data.cluint = 0; break; default: ReturnErrorArg(image->context, CL_INVALID_VALUE, param_name); } if (param_value) { // Check destination is large enough if (param_value_size < result_size) { ReturnErrorInfo(image->context, CL_INVALID_VALUE, ParamValueSizeTooSmall); } else { memcpy(param_value, &result_data, result_size); } } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clSetMemObjectDestructorCallback( cl_mem memobj, void(CL_CALLBACK* pfn_notify)(cl_mem, void*), void* user_data) CL_API_SUFFIX__VERSION_1_1 { REGISTER_API; // Check parameters if (!memobj) { ReturnErrorArg(NULL, CL_INVALID_MEM_OBJECT, memobj); } if (!pfn_notify) { ReturnErrorArg(memobj->context, CL_INVALID_VALUE, pfn_notify); } memobj->callbacks.push(make_pair(pfn_notify, user_data)); return CL_SUCCESS; } CL_API_ENTRY cl_sampler CL_API_CALL clCreateSampler(cl_context context, cl_bool normalized_coords, cl_addressing_mode addressing_mode, cl_filter_mode filter_mode, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (!context) { SetErrorArg(NULL, CL_INVALID_CONTEXT, context); return NULL; } // Create sampler bitfield uint32_t bitfield = 0; if (normalized_coords) { bitfield |= CLK_NORMALIZED_COORDS_TRUE; } switch (addressing_mode) { case CL_ADDRESS_NONE: break; case CL_ADDRESS_CLAMP_TO_EDGE: bitfield |= CLK_ADDRESS_CLAMP_TO_EDGE; break; case CL_ADDRESS_CLAMP: bitfield |= CLK_ADDRESS_CLAMP; break; case CL_ADDRESS_REPEAT: bitfield |= CLK_ADDRESS_REPEAT; break; case CL_ADDRESS_MIRRORED_REPEAT: bitfield |= CLK_ADDRESS_MIRRORED_REPEAT; break; default: SetErrorArg(context, CL_INVALID_VALUE, addressing_mode); return NULL; } switch (filter_mode) { case CL_FILTER_NEAREST: bitfield |= CLK_FILTER_NEAREST; break; case CL_FILTER_LINEAR: bitfield |= CLK_FILTER_LINEAR; break; default: SetErrorArg(context, CL_INVALID_VALUE, filter_mode); return NULL; } // Create sampler cl_sampler sampler = new _cl_sampler; sampler->dispatch = m_dispatchTable; sampler->context = context; sampler->normCoords = normalized_coords; sampler->addressMode = addressing_mode; sampler->filterMode = filter_mode; sampler->sampler = bitfield; sampler->refCount = 1; SetError(context, CL_SUCCESS); return sampler; } CL_API_ENTRY cl_int CL_API_CALL clRetainSampler(cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; if (!sampler) { ReturnErrorArg(NULL, CL_INVALID_SAMPLER, sampler); } sampler->refCount++; return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clReleaseSampler(cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; if (!sampler) { ReturnErrorArg(NULL, CL_INVALID_SAMPLER, sampler); } if (--sampler->refCount == 0) { delete sampler; } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetSamplerInfo( cl_sampler sampler, cl_sampler_info param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check sampler is valid if (!sampler) { ReturnErrorArg(NULL, CL_INVALID_SAMPLER, sampler); } size_t dummy = 0; size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy; union { cl_uint cluint; cl_context clcontext; cl_bool clbool; cl_addressing_mode claddrmode; cl_filter_mode clfiltmode; } result_data; const void* data = nullptr; switch (param_name) { case CL_SAMPLER_REFERENCE_COUNT: result_size = sizeof(cl_uint); result_data.cluint = sampler->refCount; break; case CL_SAMPLER_CONTEXT: result_size = sizeof(cl_context); result_data.clcontext = sampler->context; break; case CL_SAMPLER_NORMALIZED_COORDS: result_size = sizeof(cl_bool); result_data.clbool = sampler->normCoords; break; case CL_SAMPLER_ADDRESSING_MODE: result_size = sizeof(cl_addressing_mode); result_data.claddrmode = sampler->addressMode; break; case CL_SAMPLER_FILTER_MODE: result_size = sizeof(cl_filter_mode); result_data.clfiltmode = sampler->filterMode; break; case CL_SAMPLER_PROPERTIES: result_size = sampler->properties.size() * sizeof(cl_sampler_properties); data = sampler->properties.data(); break; default: ReturnErrorArg(sampler->context, CL_INVALID_VALUE, param_name); } if (param_value) { // Check destination is large enough if (param_value_size < result_size) { ReturnErrorInfo(sampler->context, CL_INVALID_VALUE, ParamValueSizeTooSmall); } else { if (data) memcpy(param_value, data, result_size); else memcpy(param_value, &result_data, result_size); } } return CL_SUCCESS; } CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithSource( cl_context context, cl_uint count, const char** strings, const size_t* lengths, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (!context) { SetErrorArg(NULL, CL_INVALID_CONTEXT, context); return NULL; } if (count == 0) { SetErrorArg(context, CL_INVALID_VALUE, count); return NULL; } if (!strings || !strings[0]) { SetErrorArg(context, CL_INVALID_VALUE, strings); return NULL; } // Concatenate sources into a single string std::string source; for (unsigned i = 0; i < count; i++) { size_t length = (lengths && lengths[i]) ? lengths[i] : strlen(strings[i]); source.append(strings[i], length); } // Create program object cl_program prog = new _cl_program; prog->dispatch = m_dispatchTable; prog->program = new oclgrind::Program(context->context, source); prog->context = context; prog->refCount = 1; if (!prog->program) { SetError(context, CL_OUT_OF_HOST_MEMORY); delete prog; return NULL; } clRetainContext(context); SetError(context, CL_SUCCESS); return prog; } CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinary( cl_context context, cl_uint num_devices, const cl_device_id* device_list, const size_t* lengths, const unsigned char** binaries, cl_int* binary_status, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (!context) { SetErrorArg(NULL, CL_INVALID_CONTEXT, context); return NULL; } if (num_devices != 1 || !device_list) { SetErrorInfo(context, CL_INVALID_VALUE, "Invalid device list"); return NULL; } if (!lengths) { SetErrorArg(context, CL_INVALID_VALUE, lengths); return NULL; } if (!binaries) { SetErrorArg(context, CL_INVALID_VALUE, binaries); return NULL; } if (device_list[0] != m_device) { SetErrorArg(context, CL_INVALID_DEVICE, device_list); return NULL; } // Create program object cl_program prog = new _cl_program; prog->dispatch = m_dispatchTable; prog->program = oclgrind::Program::createFromBitcode(context->context, binaries[0], lengths[0]); prog->context = context; prog->refCount = 1; if (!prog->program) { SetError(context, CL_INVALID_BINARY); if (binary_status) { binary_status[0] = CL_INVALID_BINARY; } delete prog; return NULL; } if (binary_status) { binary_status[0] = CL_SUCCESS; } clRetainContext(context); SetError(context, CL_SUCCESS); return prog; } CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBuiltInKernels( cl_context context, cl_uint num_devices, const cl_device_id* device_list, const char* kernel_names, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2 { REGISTER_API; if (!context) { SetError(NULL, CL_INVALID_CONTEXT); return NULL; } SetErrorInfo(context, CL_INVALID_VALUE, "No built-in kernels available"); return NULL; } CL_API_ENTRY cl_int CL_API_CALL clRetainProgram(cl_program program) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; if (!program) { ReturnErrorArg(NULL, CL_INVALID_PROGRAM, program); } program->refCount++; return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clReleaseProgram(cl_program program) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; if (!program) { ReturnErrorArg(NULL, CL_INVALID_PROGRAM, program); } if (--program->refCount == 0) { delete program->program; clReleaseContext(program->context); delete program; } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clBuildProgram( cl_program program, cl_uint num_devices, const cl_device_id* device_list, const char* options, void(CL_CALLBACK* pfn_notify)(cl_program, void*), void* user_data) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (!program || !program->program) { ReturnErrorArg(NULL, CL_INVALID_PROGRAM, program); } if (num_devices > 0 && !device_list) { ReturnErrorInfo(program->context, CL_INVALID_VALUE, "num_devices >0 but device_list is NULL"); } if (num_devices == 0 && device_list) { ReturnErrorInfo(program->context, CL_INVALID_VALUE, "num_devices == 0 but device_list non-NULL"); } if (!pfn_notify && user_data) { ReturnErrorInfo(program->context, CL_INVALID_VALUE, "pfn_notify NULL but user_data non-NULL"); } if (device_list && !device_list[0]) { ReturnErrorArg(program->context, CL_INVALID_DEVICE, device); } // Build program bool success = program->program->build(oclgrind::Program::BUILD, options); // Fire callback if (pfn_notify) { pfn_notify(program, user_data); } if (!success) { ReturnError(program->context, CL_BUILD_PROGRAM_FAILURE); } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clUnloadCompiler(void) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clCompileProgram( cl_program program, cl_uint num_devices, const cl_device_id* device_list, const char* options, cl_uint num_input_headers, const cl_program* input_headers, const char** header_include_names, void(CL_CALLBACK* pfn_notify)(cl_program, void*), void* user_data) CL_API_SUFFIX__VERSION_1_2 { REGISTER_API; // Check parameters if (!program) { ReturnErrorArg(NULL, CL_INVALID_PROGRAM, program); } if (num_devices > 0 && !device_list) { ReturnErrorInfo(program->context, CL_INVALID_VALUE, "num_devices >0 but device_list is NULL"); } if (num_devices == 0 && device_list) { ReturnErrorInfo(program->context, CL_INVALID_VALUE, "num_devices == 0 but device_list non-NULL"); } if (!pfn_notify && user_data) { ReturnErrorInfo(program->context, CL_INVALID_VALUE, "pfn_notify NULL but user_data non-NULL"); } if (device_list && !device_list[0]) { ReturnErrorArg(program->context, CL_INVALID_DEVICE, device); } // Prepare headers list headers; for (unsigned i = 0; i < num_input_headers; i++) { headers.push_back( make_pair(header_include_names[i], input_headers[i]->program)); } // Build program if (!program->program->build(oclgrind::Program::COMPILE, options, headers)) { ReturnError(program->context, CL_BUILD_PROGRAM_FAILURE); } // Fire callback if (pfn_notify) { pfn_notify(program, user_data); } return CL_SUCCESS; } CL_API_ENTRY cl_program CL_API_CALL clLinkProgram(cl_context context, cl_uint num_devices, const cl_device_id* device_list, const char* options, cl_uint num_input_programs, const cl_program* input_programs, void(CL_CALLBACK* pfn_notify)(cl_program, void*), void* user_data, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2 { REGISTER_API; // Check parameters if (!context) { SetErrorArg(NULL, CL_INVALID_CONTEXT, context); return NULL; } if (num_devices > 0 && !device_list) { SetErrorInfo(context, CL_INVALID_VALUE, "num_devices >0 but device_list is NULL"); return NULL; } if (num_devices == 0 && device_list) { SetErrorInfo(context, CL_INVALID_VALUE, "num_devices == 0 but device_list non-NULL"); return NULL; } if (!pfn_notify && user_data) { SetErrorInfo(context, CL_INVALID_VALUE, "pfn_notify NULL but user_data non-NULL"); return NULL; } if (device_list && !device_list[0]) { SetErrorArg(context, CL_INVALID_DEVICE, device_list); return NULL; } // Prepare programs list programs; for (unsigned i = 0; i < num_input_programs; i++) { programs.push_back(input_programs[i]->program); } // Create program object cl_program prog = new _cl_program; prog->dispatch = m_dispatchTable; prog->program = oclgrind::Program::createFromPrograms(context->context, programs, options); prog->context = context; prog->refCount = 1; if (!prog->program) { SetError(context, CL_INVALID_BINARY); delete prog; return NULL; } // Fire callback if (pfn_notify) { pfn_notify(prog, user_data); } clRetainContext(context); SetError(context, CL_SUCCESS); return prog; } CL_API_ENTRY cl_int CL_API_CALL clUnloadPlatformCompiler(cl_platform_id platform) CL_API_SUFFIX__VERSION_1_2 { return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfo( cl_program program, cl_program_info param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check program is valid if (!program) { ReturnErrorArg(NULL, CL_INVALID_PROGRAM, program); } if ((param_name == CL_PROGRAM_NUM_KERNELS || param_name == CL_PROGRAM_KERNEL_NAMES) && program->program->getBuildStatus() != CL_BUILD_SUCCESS) { ReturnErrorInfo(program->context, CL_INVALID_PROGRAM_EXECUTABLE, "Program not successfully built"); } size_t dummy; size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy; union { cl_uint cluint; cl_device_id device; cl_context context; size_t sizet; cl_bool clbool; } result_data; const char* str = 0; string kernelNames; switch (param_name) { case CL_PROGRAM_REFERENCE_COUNT: result_size = sizeof(cl_uint); result_data.cluint = program->refCount; break; case CL_PROGRAM_CONTEXT: result_size = sizeof(cl_context); result_data.context = program->context; break; case CL_PROGRAM_NUM_DEVICES: result_size = sizeof(cl_uint); result_data.cluint = 1; break; case CL_PROGRAM_DEVICES: result_size = sizeof(cl_device_id); result_data.device = m_device; break; case CL_PROGRAM_SOURCE: str = program->program->getSource().c_str(); result_size = strlen(str) + 1; break; case CL_PROGRAM_IL: result_size = 0; break; case CL_PROGRAM_BINARY_SIZES: result_size = sizeof(size_t); result_data.sizet = program->program->getBinarySize(); break; case CL_PROGRAM_BINARIES: result_size = sizeof(unsigned char*); break; case CL_PROGRAM_NUM_KERNELS: result_size = sizeof(size_t); result_data.sizet = program->program->getNumKernels(); break; case CL_PROGRAM_KERNEL_NAMES: { list names = program->program->getKernelNames(); for (list::iterator itr = names.begin(); itr != names.end(); itr++) { kernelNames += *itr; kernelNames += ";"; } if (!kernelNames.empty()) { kernelNames.erase(kernelNames.length() - 1); } str = kernelNames.c_str(); result_size = strlen(str) + 1; break; } case CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT: case CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT: result_size = sizeof(cl_bool); result_data.clbool = CL_FALSE; break; default: ReturnErrorArg(program->context, CL_INVALID_VALUE, param_name); } if (param_value) { // Check destination is large enough if (param_value_size < result_size) { ReturnErrorInfo(NULL, CL_INVALID_VALUE, ParamValueSizeTooSmall); } else if (param_name == CL_PROGRAM_BINARIES) { program->program->getBinary(((unsigned char**)param_value)[0]); } else { if (str) memcpy(param_value, str, result_size); else memcpy(param_value, &result_data, result_size); } } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetProgramBuildInfo( cl_program program, cl_device_id device, cl_program_build_info param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check program is valid if (!program) { ReturnErrorArg(NULL, CL_INVALID_PROGRAM, program); } size_t dummy; size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy; union { cl_build_status status; cl_program_binary_type type; size_t sizet; } result_data; const char* str = 0; switch (param_name) { case CL_PROGRAM_BUILD_STATUS: result_size = sizeof(cl_build_status); result_data.status = program->program->getBuildStatus(); break; case CL_PROGRAM_BUILD_OPTIONS: str = program->program->getBuildOptions().c_str(); result_size = strlen(str) + 1; break; case CL_PROGRAM_BUILD_LOG: str = program->program->getBuildLog().c_str(); result_size = strlen(str) + 1; break; case CL_PROGRAM_BINARY_TYPE: result_size = sizeof(cl_program_binary_type); result_data.type = program->program->getBinaryType(); break; case CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE: result_size = sizeof(size_t); result_data.sizet = program->program->getTotalProgramScopeVarSize(); break; default: ReturnErrorArg(program->context, CL_INVALID_VALUE, param_name); } if (param_value) { // Check destination is large enough if (param_value_size < result_size) { ReturnErrorInfo(program->context, CL_INVALID_VALUE, ParamValueSizeTooSmall); } else { if (str) memcpy(param_value, str, result_size); else memcpy(param_value, &result_data, result_size); } } return CL_SUCCESS; } CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernel(cl_program program, const char* kernel_name, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (program->dispatch != m_dispatchTable) { SetError(NULL, CL_INVALID_PROGRAM); return NULL; } if (!kernel_name) { SetErrorArg(program->context, CL_INVALID_VALUE, kernel_name); return NULL; } // Create kernel object cl_kernel kernel = new _cl_kernel; kernel->dispatch = m_dispatchTable; kernel->kernel = program->program->createKernel(kernel_name); kernel->program = program; kernel->refCount = 1; if (!kernel->kernel) { SetErrorInfo(program->context, CL_INVALID_KERNEL_NAME, "Kernel '" << kernel_name << "' not found"); delete kernel; return NULL; } clRetainProgram(program); SetError(program->context, CL_SUCCESS); return kernel; } CL_API_ENTRY cl_int CL_API_CALL clCreateKernelsInProgram( cl_program program, cl_uint num_kernels, cl_kernel* kernels, cl_uint* num_kernels_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (!program) { ReturnErrorArg(NULL, CL_INVALID_PROGRAM, program); } if (program->program->getBuildStatus() != CL_BUILD_SUCCESS) { ReturnErrorInfo(program->context, CL_INVALID_PROGRAM_EXECUTABLE, "Program not built"); } unsigned int num = program->program->getNumKernels(); if (kernels && num_kernels < num) { ReturnErrorInfo(program->context, CL_INVALID_VALUE, "num_kernels is " << num_kernels << ", but " << num << " kernels found"); } if (kernels) { int i = 0; list names = program->program->getKernelNames(); for (list::iterator itr = names.begin(); itr != names.end(); itr++) { cl_kernel kernel = new _cl_kernel; kernel->dispatch = m_dispatchTable; kernel->kernel = program->program->createKernel(*itr); kernel->program = program; kernel->refCount = 1; kernels[i++] = kernel; clRetainProgram(program); } } if (num_kernels_ret) { *num_kernels_ret = num; } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clRetainKernel(cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; if (!kernel) { ReturnErrorArg(NULL, CL_INVALID_KERNEL, kernel); } kernel->refCount++; return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clReleaseKernel(cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; if (!kernel) { ReturnErrorArg(NULL, CL_INVALID_KERNEL, kernel); } if (--kernel->refCount == 0) { // Release memory allocated for image arguments for (auto* img : kernel->imageArgs) { delete img; } delete kernel->kernel; clReleaseProgram(kernel->program); delete kernel; } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void* arg_value) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters are valid if (!kernel) { ReturnErrorArg(NULL, CL_INVALID_KERNEL, kernel); } if (arg_index >= kernel->kernel->getNumArguments()) { ReturnErrorInfo(kernel->program->context, CL_INVALID_ARG_INDEX, "arg_index is " << arg_index << ", but kernel has " << kernel->kernel->getNumArguments() << " arguments"); } unsigned int addr = kernel->kernel->getArgumentAddressQualifier(arg_index); bool isSampler = kernel->kernel->getArgumentTypeName(arg_index) == "sampler_t"; if (kernel->kernel->getArgumentSize(arg_index) != arg_size && !isSampler && addr != CL_KERNEL_ARG_ADDRESS_LOCAL) { ReturnErrorInfo(kernel->program->context, CL_INVALID_ARG_SIZE, "arg_size is " << arg_size << ", but argument should be " << kernel->kernel->getArgumentSize(arg_index) << " bytes"); } // Prepare argument value oclgrind::TypedValue value; value.data = new unsigned char[arg_size]; value.size = arg_size; value.num = 1; switch (addr) { case CL_KERNEL_ARG_ADDRESS_PRIVATE: if (isSampler) { memcpy(value.data, &(*(cl_sampler*)arg_value)->sampler, 4); } else { memcpy(value.data, arg_value, arg_size); } break; case CL_KERNEL_ARG_ADDRESS_LOCAL: delete[] value.data; value.data = NULL; break; case CL_KERNEL_ARG_ADDRESS_GLOBAL: case CL_KERNEL_ARG_ADDRESS_CONSTANT: if (arg_value && *(cl_mem*)arg_value) { cl_mem mem = *(cl_mem*)arg_value; if (mem->isImage) { // Create Image struct oclgrind::Image* image = new oclgrind::Image; image->address = mem->address; image->format = ((cl_image*)mem)->format; image->desc = ((cl_image*)mem)->desc; *(oclgrind::Image**)value.data = image; // Keep a record of the image struct for releasing it later kernel->imageArgs.push_back(image); } else { memcpy(value.data, &mem->address, arg_size); } kernel->memArgs[arg_index] = mem; } else { value.setPointer(0); kernel->memArgs.erase(arg_index); } break; default: ReturnErrorInfo(kernel->program->context, CL_INVALID_ARG_VALUE, "Unsupported address space"); } // Set argument kernel->kernel->setArgument(arg_index, value); delete[] value.data; return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetKernelInfo( cl_kernel kernel, cl_kernel_info param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check kernel is valid if (!kernel) { ReturnErrorArg(NULL, CL_INVALID_KERNEL, kernel); } size_t dummy; size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy; union { cl_uint cluint; cl_context context; cl_program program; } result_data; const char* str = 0; switch (param_name) { case CL_KERNEL_FUNCTION_NAME: result_size = kernel->kernel->getName().size() + 1; str = kernel->kernel->getName().c_str(); break; case CL_KERNEL_NUM_ARGS: result_size = sizeof(cl_uint); result_data.cluint = kernel->kernel->getNumArguments(); break; case CL_KERNEL_REFERENCE_COUNT: result_size = sizeof(cl_uint); result_data.cluint = kernel->refCount; break; case CL_KERNEL_CONTEXT: result_size = sizeof(cl_context); result_data.context = kernel->program->context; break; case CL_KERNEL_PROGRAM: result_size = sizeof(cl_program); result_data.program = kernel->program; break; case CL_KERNEL_ATTRIBUTES: result_size = kernel->kernel->getAttributes().size() + 1; str = kernel->kernel->getAttributes().c_str(); break; default: ReturnErrorArg(kernel->program->context, CL_INVALID_VALUE, param_name); } if (param_value) { // Check destination is large enough if (param_value_size < result_size) { ReturnErrorInfo(kernel->program->context, CL_INVALID_VALUE, ParamValueSizeTooSmall); } else { if (str) memcpy(param_value, str, result_size); else memcpy(param_value, &result_data, result_size); } } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetKernelArgInfo( cl_kernel kernel, cl_uint arg_indx, cl_kernel_arg_info param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_2 { REGISTER_API; // Check parameters are valid if (!kernel) { ReturnErrorArg(NULL, CL_INVALID_KERNEL, kernel); } if (arg_indx >= kernel->kernel->getNumArguments()) { ReturnErrorInfo(kernel->program->context, CL_INVALID_ARG_INDEX, "arg_indx is " << arg_indx << ", but kernel has " << kernel->kernel->getNumArguments() << " arguments"); } size_t dummy = 0; size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy; union { cl_kernel_arg_address_qualifier addressQual; cl_kernel_arg_access_qualifier accessQual; cl_kernel_arg_type_qualifier typeQual; } result_data; std::string str_data; switch (param_name) { case CL_KERNEL_ARG_ADDRESS_QUALIFIER: result_size = sizeof(cl_kernel_arg_address_qualifier); result_data.addressQual = kernel->kernel->getArgumentAddressQualifier(arg_indx); break; case CL_KERNEL_ARG_ACCESS_QUALIFIER: result_size = sizeof(cl_kernel_arg_access_qualifier); result_data.accessQual = kernel->kernel->getArgumentAccessQualifier(arg_indx); break; case CL_KERNEL_ARG_TYPE_NAME: str_data = kernel->kernel->getArgumentTypeName(arg_indx).str(); result_size = str_data.size() + 1; break; case CL_KERNEL_ARG_TYPE_QUALIFIER: result_size = sizeof(cl_kernel_arg_type_qualifier); result_data.typeQual = kernel->kernel->getArgumentTypeQualifier(arg_indx); break; case CL_KERNEL_ARG_NAME: str_data = kernel->kernel->getArgumentName(arg_indx).str(); result_size = str_data.size() + 1; break; default: ReturnErrorArg(kernel->program->context, CL_INVALID_VALUE, param_name); } if (param_value) { // Check destination is large enough if (param_value_size < result_size) { ReturnErrorInfo(kernel->program->context, CL_INVALID_VALUE, ParamValueSizeTooSmall); } if (str_data.size()) memcpy(param_value, str_data.c_str(), result_size); else memcpy(param_value, &result_data, result_size); } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetKernelWorkGroupInfo( cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters are valid if (!kernel) { ReturnErrorArg(NULL, CL_INVALID_KERNEL, kernel); } if (!device || device != m_device) { ReturnErrorArg(kernel->program->context, CL_INVALID_DEVICE, device); } size_t dummy; size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy; union { size_t sizet; size_t sizet3[3]; cl_ulong clulong; } result_data; switch (param_name) { case CL_KERNEL_GLOBAL_WORK_SIZE: ReturnErrorInfo(kernel->program->context, CL_INVALID_VALUE, "CL_KERNEL_GLOBAL_SIZE only valid on custom devices"); case CL_KERNEL_WORK_GROUP_SIZE: result_size = sizeof(size_t); result_data.sizet = m_device->maxWGSize; break; case CL_KERNEL_COMPILE_WORK_GROUP_SIZE: result_size = sizeof(size_t[3]); kernel->kernel->getRequiredWorkGroupSize(result_data.sizet3); break; case CL_KERNEL_LOCAL_MEM_SIZE: result_size = sizeof(cl_ulong); result_data.clulong = kernel->kernel->getLocalMemorySize(); break; case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: result_size = sizeof(size_t); result_data.sizet = 1; break; case CL_KERNEL_PRIVATE_MEM_SIZE: result_size = sizeof(cl_ulong); result_data.clulong = 0; break; default: ReturnErrorArg(kernel->program->context, CL_INVALID_VALUE, param_name); } if (param_value) { // Check destination is large enough if (param_value_size < result_size) { ReturnErrorInfo(kernel->program->context, CL_INVALID_VALUE, ParamValueSizeTooSmall); } else { memcpy(param_value, &result_data, result_size); } } return CL_SUCCESS; } /* Event Object APIs */ namespace { // Utility to check if an event has completed (or terminated) inline bool isComplete(cl_event event) { return (event->event->state == CL_COMPLETE || event->event->state < 0); } } // namespace CL_API_ENTRY cl_int CL_API_CALL clWaitForEvents( cl_uint num_events, const cl_event* event_list) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (!num_events) { ReturnErrorInfo(NULL, CL_INVALID_VALUE, "num_events cannot be 0"); } if (!event_list) { ReturnErrorInfo(NULL, CL_INVALID_VALUE, "event_list cannot be NULL"); } // Loop until all events complete bool complete = false; while (!complete) { complete = true; for (unsigned i = 0; i < num_events; i++) { // Skip event if already complete if (isComplete(event_list[i])) { continue; } // If it's not a user event, execute the associated command if (event_list[i]->queue) { oclgrind::Command* cmd = event_list[i]->event->command; event_list[i]->event->queue->execute(cmd, false); releaseCommand(cmd); // If it's still not complete, update flag if (!isComplete(event_list[i])) { complete = false; } } else { complete = false; } } } // Check if any command terminated unsuccessfully for (unsigned i = 0; i < num_events; i++) { if (event_list[i]->event->state < 0) { ReturnErrorInfo(event_list[i]->context, CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, "Event " << i << " terminated with error " << event_list[i]->event->state); } } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetEventInfo( cl_event event, cl_event_info param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check event is valid if (!event) { ReturnErrorArg(NULL, CL_INVALID_EVENT, event); } size_t dummy; size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy; union { cl_command_queue queue; cl_context context; cl_command_type type; cl_int clint; cl_uint cluint; size_t sizet; size_t sizet3[3]; } result_data; switch (param_name) { case CL_EVENT_COMMAND_QUEUE: result_size = sizeof(cl_command_queue); result_data.queue = event->queue; break; case CL_EVENT_CONTEXT: result_size = sizeof(cl_context); result_data.context = event->context; break; case CL_EVENT_COMMAND_TYPE: result_size = sizeof(cl_command_type); result_data.type = event->type; break; case CL_EVENT_COMMAND_EXECUTION_STATUS: result_size = sizeof(cl_int); result_data.clint = event->event->state; break; case CL_EVENT_REFERENCE_COUNT: result_size = sizeof(cl_uint); result_data.cluint = event->refCount; break; default: ReturnErrorArg(event->context, CL_INVALID_VALUE, param_name); } if (param_value) { // Check destination is large enough if (param_value_size < result_size) { ReturnErrorInfo(event->context, CL_INVALID_VALUE, ParamValueSizeTooSmall); } else { memcpy(param_value, &result_data, result_size); } } return CL_SUCCESS; } CL_API_ENTRY cl_event CL_API_CALL clCreateUserEvent( cl_context context, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_1 { REGISTER_API; // Check parameters if (!context) { SetErrorArg(NULL, CL_INVALID_CONTEXT, context); return NULL; } /// Create event object cl_event event = new _cl_event; event->dispatch = m_dispatchTable; event->context = context; event->queue = 0; event->type = CL_COMMAND_USER; event->event = new oclgrind::Event(); event->event->state = CL_SUBMITTED; event->event->command = NULL; event->event->queue = NULL; event->refCount = 1; SetError(context, CL_SUCCESS); return event; } CL_API_ENTRY cl_int CL_API_CALL clRetainEvent(cl_event event) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; if (!event) { ReturnErrorArg(NULL, CL_INVALID_EVENT, event); } event->refCount++; return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clReleaseEvent(cl_event event) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; if (!event) { ReturnErrorArg(NULL, CL_INVALID_EVENT, event); } if (--event->refCount == 0) { if (event->event) { delete event->event; } delete event; } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clSetUserEventStatus( cl_event event, cl_int execution_status) CL_API_SUFFIX__VERSION_1_1 { REGISTER_API; // Check parameters if (!event) { ReturnErrorArg(NULL, CL_INVALID_EVENT, event); } if (event->queue) { ReturnErrorInfo(event->context, CL_INVALID_EVENT, "Not a user event"); } if (execution_status != CL_COMPLETE && execution_status >= 0) { ReturnErrorArg(event->context, CL_INVALID_VALUE, execution_status); } if (event->event->state == CL_COMPLETE || event->event->state < 0) { ReturnErrorInfo(event->context, CL_INVALID_OPERATION, "Event status already set"); } event->event->state = execution_status; // Perform callbacks list>::iterator itr; for (itr = event->callbacks.begin(); itr != event->callbacks.end(); itr++) { itr->first(event, execution_status, itr->second); } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clSetEventCallback(cl_event event, cl_int command_exec_callback_type, void(CL_CALLBACK* pfn_notify)(cl_event, cl_int, void*), void* user_data) CL_API_SUFFIX__VERSION_1_1 { REGISTER_API; // Check parameters if (!event) { ReturnErrorArg(NULL, CL_INVALID_EVENT, event); } if (!pfn_notify) { ReturnErrorArg(event->context, CL_INVALID_VALUE, pfn_notify); } if (command_exec_callback_type != CL_COMPLETE && command_exec_callback_type != CL_SUBMITTED && command_exec_callback_type != CL_RUNNING) { ReturnErrorArg(event->context, CL_INVALID_VALUE, command_exec_callback_type); } event->callbacks.push_back(make_pair(pfn_notify, user_data)); return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetEventProfilingInfo( cl_event event, cl_profiling_info param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check event is valid if (!event) { ReturnErrorArg(NULL, CL_INVALID_EVENT, event); } if (!event->queue) { ReturnError(event->context, CL_PROFILING_INFO_NOT_AVAILABLE); } size_t dummy = 0; size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy; cl_ulong result; switch (param_name) { case CL_PROFILING_COMMAND_QUEUED: result_size = sizeof(cl_ulong); result = event->event->queueTime; break; case CL_PROFILING_COMMAND_SUBMIT: result_size = sizeof(cl_ulong); result = event->event->startTime; break; case CL_PROFILING_COMMAND_START: result_size = sizeof(cl_ulong); result = event->event->startTime; break; case CL_PROFILING_COMMAND_END: result_size = sizeof(cl_ulong); result = event->event->endTime; break; default: ReturnErrorArg(event->context, CL_INVALID_VALUE, param_name); } if (param_value) { // Check destination is large enough if (param_value_size < result_size) { ReturnErrorInfo(event->context, CL_INVALID_VALUE, ParamValueSizeTooSmall); } else { *(cl_ulong*)param_value = result; } } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clFlush(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } // TODO: Implement properly? clFinish(command_queue); return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clFinish(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } // TODO: Move this finish to async thread? oclgrind::Command* cmd = command_queue->queue->finish(); releaseCommand(cmd); return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBuffer( cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, size_t offset, size_t cb, void* ptr, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!buffer) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, memobj); } if (!ptr) { ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, ptr); } if (offset + cb > buffer->size) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "offset + cb (" << offset << " + " << cb << ") exceeds buffer size (" << buffer->size << " bytes)"); } if (buffer->flags & (CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_WRITE_ONLY)) { ReturnErrorInfo(command_queue->context, CL_INVALID_OPERATION, "Buffer flags specify host will not read data"); } // Enqueue command oclgrind::BufferCommand* cmd = new oclgrind::BufferCommand(oclgrind::Command::READ); cmd->ptr = (unsigned char*)ptr; cmd->address = buffer->address + offset; cmd->size = cb; asyncQueueRetain(cmd, buffer); asyncEnqueue(command_queue, CL_COMMAND_READ_BUFFER, cmd, num_events_in_wait_list, event_wait_list, event); if (blocking_read) { return clFinish(command_queue); } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBufferRect( cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, const size_t* buffer_origin, const size_t* host_origin, const size_t* region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void* ptr, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_1 { REGISTER_API; // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!buffer) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, memobj); } if (!ptr) { ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, ptr); } if (buffer->flags & (CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_WRITE_ONLY)) { ReturnErrorInfo(command_queue->context, CL_INVALID_OPERATION, "Buffer flags specify host will not read data"); } // Compute pitches if neccessary if (buffer_row_pitch == 0) { buffer_row_pitch = region[0]; } if (buffer_slice_pitch == 0) { buffer_slice_pitch = region[1] * buffer_row_pitch; } if (host_row_pitch == 0) { host_row_pitch = region[0]; } if (host_slice_pitch == 0) { host_slice_pitch = region[1] * host_row_pitch; } // Compute origin offsets size_t buffer_offset = buffer_origin[2] * buffer_slice_pitch + buffer_origin[1] * buffer_row_pitch + buffer_origin[0]; size_t host_offset = host_origin[2] * host_slice_pitch + host_origin[1] * host_row_pitch + host_origin[0]; // Ensure buffer region valid size_t end = buffer_offset + region[0] + (region[1] - 1) * buffer_row_pitch + (region[2] - 1) * buffer_slice_pitch; if (end > buffer->size) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "Region exceeds buffer size (" << buffer->size << " bytes)"); } // Enqueue command oclgrind::BufferRectCommand* cmd = new oclgrind::BufferRectCommand(oclgrind::Command::READ_RECT); cmd->ptr = (unsigned char*)ptr; cmd->address = buffer->address; cmd->buffer_offset[0] = buffer_offset; cmd->buffer_offset[1] = buffer_row_pitch; cmd->buffer_offset[2] = buffer_slice_pitch; cmd->host_offset[0] = host_offset; cmd->host_offset[1] = host_row_pitch; cmd->host_offset[2] = host_slice_pitch; memcpy(cmd->region, region, 3 * sizeof(size_t)); asyncQueueRetain(cmd, buffer); asyncEnqueue(command_queue, CL_COMMAND_READ_BUFFER_RECT, cmd, num_events_in_wait_list, event_wait_list, event); if (blocking_read) { return clFinish(command_queue); } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBuffer( cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, size_t offset, size_t cb, const void* ptr, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!buffer) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, memobj); } if (!ptr) { ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, ptr); } if (offset + cb > buffer->size) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "offset + cb (" << offset << " + " << cb << ") exceeds buffer size (" << buffer->size << " bytes)"); } if (buffer->flags & (CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_READ_ONLY)) { ReturnErrorInfo(command_queue->context, CL_INVALID_OPERATION, "Buffer flags specify host will not write data"); } // Enqueue command oclgrind::BufferCommand* cmd = new oclgrind::BufferCommand(oclgrind::Command::WRITE); cmd->ptr = (unsigned char*)ptr; cmd->address = buffer->address + offset; cmd->size = cb; asyncQueueRetain(cmd, buffer); asyncEnqueue(command_queue, CL_COMMAND_WRITE_BUFFER, cmd, num_events_in_wait_list, event_wait_list, event); if (blocking_write) { return clFinish(command_queue); } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBufferRect( cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, const size_t* buffer_origin, const size_t* host_origin, const size_t* region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void* ptr, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_1 { REGISTER_API; // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!buffer) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, memobj); } if (!ptr) { ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, ptr); } if (buffer->flags & (CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_READ_ONLY)) { ReturnErrorInfo(command_queue->context, CL_INVALID_OPERATION, "Buffer flags specify host will not write data"); } // Compute pitches if necessary if (buffer_row_pitch == 0) { buffer_row_pitch = region[0]; } if (buffer_slice_pitch == 0) { buffer_slice_pitch = region[1] * buffer_row_pitch; } if (host_row_pitch == 0) { host_row_pitch = region[0]; } if (host_slice_pitch == 0) { host_slice_pitch = region[1] * host_row_pitch; } // Compute origin offsets size_t buffer_offset = buffer_origin[2] * buffer_slice_pitch + buffer_origin[1] * buffer_row_pitch + buffer_origin[0]; size_t host_offset = host_origin[2] * host_slice_pitch + host_origin[1] * host_row_pitch + host_origin[0]; // Ensure buffer region valid size_t end = buffer_offset + region[0] + (region[1] - 1) * buffer_row_pitch + (region[2] - 1) * buffer_slice_pitch; if (end > buffer->size) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "Region exceeds buffer size (" << buffer->size << " bytes)"); } // Enqueue command oclgrind::BufferRectCommand* cmd = new oclgrind::BufferRectCommand(oclgrind::Command::WRITE_RECT); cmd->ptr = (unsigned char*)ptr; cmd->address = buffer->address; cmd->buffer_offset[0] = buffer_offset; cmd->buffer_offset[1] = buffer_row_pitch; cmd->buffer_offset[2] = buffer_slice_pitch; cmd->host_offset[0] = host_offset; cmd->host_offset[1] = host_row_pitch; cmd->host_offset[2] = host_slice_pitch; memcpy(cmd->region, region, 3 * sizeof(size_t)); asyncQueueRetain(cmd, buffer); asyncEnqueue(command_queue, CL_COMMAND_WRITE_BUFFER_RECT, cmd, num_events_in_wait_list, event_wait_list, event); if (blocking_write) { return clFinish(command_queue); } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBuffer( cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t cb, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!src_buffer) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, src_buffer); } if (!dst_buffer) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, dst_buffer); } if (dst_offset + cb > dst_buffer->size) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "dst_offset + cb (" << dst_offset << " + " << cb << ") exceeds buffer size (" << dst_buffer->size << " bytes)"); } if (src_offset + cb > src_buffer->size) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "src_offset + cb (" << src_offset << " + " << cb << ") exceeds buffer size (" << src_buffer->size << " bytes)"); } // If src and dst buffers are the same and if src_offset comes before // dst_offset and src buffer size goes beyond dst_offset then there is an // overlap if ((src_buffer == dst_buffer) && (src_offset <= dst_offset) && ((src_offset + cb) > dst_offset)) { ReturnErrorInfo(command_queue->context, CL_MEM_COPY_OVERLAP, "src_buffer == dst_buffer and " "src_offset + cb (" << src_offset << " + " << cb << ") overlaps dst_offset (" << dst_offset << ")"); } // If src and dst buffers are the same and if dst_offset comes before // src_offset and dst buffer size goes beyond src_offset then there is an // overlap if ((src_buffer == dst_buffer) && (dst_offset <= src_offset) && ((dst_offset + cb) > src_offset)) { ReturnErrorInfo(command_queue->context, CL_MEM_COPY_OVERLAP, "src_buffer == dst_buffer and " "dst_offset + cb (" << dst_offset << " + " << cb << ") overlaps src_offset (" << src_offset << ")"); } // Enqueue command oclgrind::CopyCommand* cmd = new oclgrind::CopyCommand(); cmd->dst = dst_buffer->address + dst_offset; cmd->src = src_buffer->address + src_offset; cmd->size = cb; asyncQueueRetain(cmd, src_buffer); asyncQueueRetain(cmd, dst_buffer); asyncEnqueue(command_queue, CL_COMMAND_COPY_BUFFER, cmd, num_events_in_wait_list, event_wait_list, event); return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferRect( cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, const size_t* src_origin, const size_t* dst_origin, const size_t* region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_1 { REGISTER_API; // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!src_buffer) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, src_buffer); } if (!dst_buffer) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, dst_buffer); } if (!region || region[0] == 0 || region[1] == 0 || region[2] == 0) { ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, region); } // Compute pitches if necessary if (src_row_pitch == 0) { src_row_pitch = region[0]; } if (src_slice_pitch == 0) { src_slice_pitch = region[1] * src_row_pitch; } if (dst_row_pitch == 0) { dst_row_pitch = region[0]; } if (dst_slice_pitch == 0) { dst_slice_pitch = region[1] * dst_row_pitch; } // Compute origin offsets size_t src_offset = src_origin[2] * src_slice_pitch + src_origin[1] * src_row_pitch + src_origin[0]; size_t dst_offset = dst_origin[2] * dst_slice_pitch + dst_origin[1] * dst_row_pitch + dst_origin[0]; // Ensure buffer region valid size_t src_end = src_offset + region[0] + (region[1] - 1) * src_row_pitch + (region[2] - 1) * src_slice_pitch; size_t dst_end = dst_offset + region[0] + (region[1] - 1) * dst_row_pitch + (region[2] - 1) * dst_slice_pitch; if (src_end > src_buffer->size) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "Region exceeds source buffer size (" << src_buffer->size << " bytes)"); } if (dst_end > dst_buffer->size) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "Region exceeds destination buffer size (" << dst_buffer->size << " bytes)"); } // Enqueue command oclgrind::CopyRectCommand* cmd = new oclgrind::CopyRectCommand(); cmd->src = src_buffer->address; cmd->dst = dst_buffer->address; cmd->src_offset[0] = src_offset; cmd->src_offset[1] = src_row_pitch; cmd->src_offset[2] = src_slice_pitch; cmd->dst_offset[0] = dst_offset; cmd->dst_offset[1] = dst_row_pitch; cmd->dst_offset[2] = dst_slice_pitch; memcpy(cmd->region, region, 3 * sizeof(size_t)); asyncQueueRetain(cmd, src_buffer); asyncQueueRetain(cmd, dst_buffer); asyncEnqueue(command_queue, CL_COMMAND_COPY_BUFFER_RECT, cmd, num_events_in_wait_list, event_wait_list, event); return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillBuffer( cl_command_queue command_queue, cl_mem buffer, const void* pattern, size_t pattern_size, size_t offset, size_t cb, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_2 { REGISTER_API; // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!buffer) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, buffer); } if (offset + cb > buffer->size) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "offset + cb (" << offset << " + " << cb << ") exceeds buffer size (" << buffer->size << " bytes)"); } if (!pattern) { ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, pattern); } if (pattern_size == 0) { ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, pattern_size); } if (offset % pattern_size) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "offset (" << offset << ")" << " not a multiple of pattern_size (" << pattern_size << ")"); } if (cb % pattern_size) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "cb (" << cb << ")" << " not a multiple of pattern_size (" << pattern_size << ")"); } // Enqueue command oclgrind::FillBufferCommand* cmd = new oclgrind::FillBufferCommand( (const unsigned char*)pattern, pattern_size); cmd->address = buffer->address + offset; cmd->size = cb; asyncQueueRetain(cmd, buffer); asyncEnqueue(command_queue, CL_COMMAND_FILL_BUFFER, cmd, num_events_in_wait_list, event_wait_list, event); return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillImage( cl_command_queue command_queue, cl_mem image, const void* fill_color, const size_t* origin, const size_t* region, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_2 { REGISTER_API; // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!image) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, image); } if (!fill_color) { ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, fill_color); } if (!region[0] || !region[1] || !region[2]) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "Values in region cannot be 0"); } // Get image dimensions cl_image* img = (cl_image*)image; size_t width = img->desc.image_width; size_t height = img->desc.image_height; size_t depth = img->desc.image_depth; size_t arraySize = img->desc.image_array_size; size_t pixelSize = getPixelSize(&img->format); size_t row_pitch = width * pixelSize; size_t slice_pitch = height * row_pitch; if (img->desc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) height = arraySize; if (img->desc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) depth = arraySize; // Ensure region is within image bounds if (origin[0] + region[0] > width) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "origin[0] + region[0] > width (" << origin[0] << " + " << region[0] << " > " << width << " )"); } if (origin[1] + region[1] > height) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "origin[1] + region[1] > height (" << origin[1] << " + " << region[1] << " > " << height << " )"); } if (origin[2] + region[2] > depth) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "origin[2] + region[2] > depth (" << origin[2] << " + " << region[2] << " > " << depth << " )"); } // Generate color data with correct order and data type unsigned char* color = new unsigned char[pixelSize]; for (unsigned output = 0; output < getNumChannels(&img->format); output++) { // Get input channel index int input = output; switch (img->format.image_channel_order) { case CL_R: case CL_Rx: case CL_RG: case CL_RGx: case CL_RGB: case CL_RGBx: case CL_RGBA: break; case CL_BGRA: if (output == 0) input = 2; if (output == 2) input = 0; break; case CL_ARGB: if (output == 0) input = 3; if (output == 1) input = 0; if (output == 2) input = 1; if (output == 3) input = 2; break; case CL_A: if (output == 0) input = 3; break; case CL_RA: if (output == 1) input = 3; break; case CL_INTENSITY: case CL_LUMINANCE: input = 0; break; default: ReturnError(command_queue->context, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR); } // Interpret data switch (img->format.image_channel_data_type) { case CL_SNORM_INT8: ((int8_t*)color)[output] = rint(min(max(((float*)fill_color)[input] * 127.f, -127.f), 128.f)); break; case CL_UNORM_INT8: ((uint8_t*)color)[output] = rint(min(max(((float*)fill_color)[input] * 255.f, 0.f), 255.f)); break; case CL_SNORM_INT16: ((int16_t*)color)[output] = rint( min(max(((float*)fill_color)[input] * 32767.f, -32768.f), 32767.f)); break; case CL_UNORM_INT16: ((uint16_t*)color)[output] = rint(min(max(((float*)fill_color)[input] * 65535.f, 0.f), 65535.f)); break; case CL_FLOAT: ((float*)color)[output] = ((float*)fill_color)[input]; break; case CL_HALF_FLOAT: ((uint16_t*)color)[output] = cl_half_from_float(((float*)fill_color)[input], CL_HALF_RTE); break; case CL_SIGNED_INT8: ((int8_t*)color)[output] = ((int32_t*)fill_color)[input]; break; case CL_SIGNED_INT16: ((int16_t*)color)[output] = ((int32_t*)fill_color)[input]; break; case CL_SIGNED_INT32: ((int32_t*)color)[output] = ((int32_t*)fill_color)[input]; break; case CL_UNSIGNED_INT8: ((uint8_t*)color)[output] = ((uint32_t*)fill_color)[input]; break; case CL_UNSIGNED_INT16: ((uint16_t*)color)[output] = ((uint32_t*)fill_color)[input]; break; case CL_UNSIGNED_INT32: ((uint32_t*)color)[output] = ((uint32_t*)fill_color)[input]; break; default: ReturnError(command_queue->context, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR); } } // Enqueue command oclgrind::FillImageCommand* cmd = new oclgrind::FillImageCommand( image->address, origin, region, row_pitch, slice_pitch, pixelSize, color); asyncQueueRetain(cmd, image); asyncEnqueue(command_queue, CL_COMMAND_FILL_IMAGE, cmd, num_events_in_wait_list, event_wait_list, event); delete[] color; return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadImage( cl_command_queue command_queue, cl_mem image, cl_bool blocking_read, const size_t* origin, const size_t* region, size_t row_pitch, size_t slice_pitch, void* ptr, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!image) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, image); } cl_image* img = (cl_image*)image; size_t pixelSize = getPixelSize(&img->format); size_t buffer_origin[3] = {origin[0] * pixelSize, origin[1], origin[2]}; size_t pixel_region[3] = {region[0] * pixelSize, region[1], region[2]}; size_t host_origin[3] = {0, 0, 0}; size_t img_row_pitch = img->desc.image_width * pixelSize; size_t img_slice_pitch = img->desc.image_height * img_row_pitch; if (row_pitch == 0) { row_pitch = pixel_region[0]; } if (slice_pitch == 0) { slice_pitch = pixel_region[1] * row_pitch; } // Enqueue read cl_int ret = clEnqueueReadBufferRect( command_queue, image, blocking_read, buffer_origin, host_origin, pixel_region, img_row_pitch, img_slice_pitch, row_pitch, slice_pitch, ptr, num_events_in_wait_list, event_wait_list, event); if (event && ret == CL_SUCCESS) { (*event)->type = CL_COMMAND_READ_IMAGE; } return ret; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteImage( cl_command_queue command_queue, cl_mem image, cl_bool blocking_write, const size_t* origin, const size_t* region, size_t input_row_pitch, size_t input_slice_pitch, const void* ptr, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!image) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, image); } cl_image* img = (cl_image*)image; size_t pixelSize = getPixelSize(&img->format); size_t buffer_origin[3] = {origin[0] * pixelSize, origin[1], origin[2]}; size_t pixel_region[3] = {region[0] * pixelSize, region[1], region[2]}; size_t host_origin[3] = {0, 0, 0}; size_t img_row_pitch = img->desc.image_width * pixelSize; size_t img_slice_pitch = img->desc.image_height * img_row_pitch; if (input_row_pitch == 0) { input_row_pitch = pixel_region[0]; } if (input_slice_pitch == 0) { input_slice_pitch = pixel_region[1] * input_row_pitch; } // Enqueue write cl_int ret = clEnqueueWriteBufferRect( command_queue, image, blocking_write, buffer_origin, host_origin, pixel_region, img_row_pitch, img_slice_pitch, input_row_pitch, input_slice_pitch, ptr, num_events_in_wait_list, event_wait_list, event); if (event && ret == CL_SUCCESS) { (*event)->type = CL_COMMAND_WRITE_IMAGE; } return ret; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImage( cl_command_queue command_queue, cl_mem src_image, cl_mem dst_image, const size_t* src_origin, const size_t* dst_origin, const size_t* region, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!src_image) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, src_image); } if (!dst_image) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, dst_image); } cl_image* src = (cl_image*)src_image; cl_image* dst = (cl_image*)dst_image; if (src->format.image_channel_order != dst->format.image_channel_order) { ReturnErrorInfo(command_queue->context, CL_IMAGE_FORMAT_MISMATCH, "Channel orders do not match"); } if (src->format.image_channel_data_type != dst->format.image_channel_data_type) { ReturnErrorInfo(command_queue->context, CL_IMAGE_FORMAT_MISMATCH, "Channel data types do no match"); } size_t srcPixelSize = getPixelSize(&src->format); size_t dstPixelSize = getPixelSize(&dst->format); size_t src_pixel_origin[3] = {src_origin[0] * srcPixelSize, src_origin[1], src_origin[2]}; size_t dst_pixel_origin[3] = {dst_origin[0] * dstPixelSize, dst_origin[1], dst_origin[2]}; size_t pixel_region[3] = {region[0] * srcPixelSize, region[1], region[2]}; size_t src_row_pitch = src->desc.image_width * srcPixelSize; size_t src_slice_pitch = src->desc.image_height * src_row_pitch; size_t dst_row_pitch = dst->desc.image_width * dstPixelSize; size_t dst_slice_pitch = dst->desc.image_height * dst_row_pitch; // Enqueue copy cl_int ret = clEnqueueCopyBufferRect( command_queue, src_image, dst_image, src_pixel_origin, dst_pixel_origin, pixel_region, src_row_pitch, src_slice_pitch, dst_row_pitch, dst_slice_pitch, num_events_in_wait_list, event_wait_list, event); if (event && ret == CL_SUCCESS) { (*event)->type = CL_COMMAND_COPY_IMAGE; } return ret; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImageToBuffer( cl_command_queue command_queue, cl_mem src_image, cl_mem dst_buffer, const size_t* src_origin, const size_t* region, size_t dst_offset, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!src_image) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, src_image); } if (!dst_buffer) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, dst_buffer); } cl_image* src = (cl_image*)src_image; size_t pixel_size = getPixelSize(&src->format); size_t src_pixel_origin[3] = {src_origin[0] * pixel_size, src_origin[1], src_origin[2]}; size_t src_row_pitch = src->desc.image_width * pixel_size; size_t src_slice_pitch = src->desc.image_height * src_row_pitch; size_t pixel_region[3] = {region[0] * pixel_size, region[1], region[2]}; size_t dst_origin[3] = {dst_offset, 0, 0}; // Enqueue copy cl_int ret = clEnqueueCopyBufferRect( command_queue, src_image, dst_buffer, src_pixel_origin, dst_origin, pixel_region, src_row_pitch, src_slice_pitch, 0, 0, num_events_in_wait_list, event_wait_list, event); if (event && ret == CL_SUCCESS) { (*event)->type = CL_COMMAND_COPY_IMAGE_TO_BUFFER; } return ret; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferToImage( cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_image, size_t src_offset, const size_t* dst_origin, const size_t* region, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!src_buffer) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, src_buffer); } if (!dst_image) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, dst_image); } cl_image* dst = (cl_image*)dst_image; size_t pixel_size = getPixelSize(&dst->format); size_t dst_pixel_origin[3] = {dst_origin[0] * pixel_size, dst_origin[1], dst_origin[2]}; size_t dst_row_pitch = dst->desc.image_width * pixel_size; size_t dst_slice_pitch = dst->desc.image_height * dst_row_pitch; size_t pixel_region[3] = {region[0] * pixel_size, region[1], region[2]}; size_t src_origin[3] = {src_offset, 0, 0}; // Enqueue copy cl_int ret = clEnqueueCopyBufferRect( command_queue, src_buffer, dst_image, src_origin, dst_pixel_origin, pixel_region, 0, 0, dst_row_pitch, dst_slice_pitch, num_events_in_wait_list, event_wait_list, event); if (event && ret == CL_SUCCESS) { (*event)->type = CL_COMMAND_COPY_BUFFER_TO_IMAGE; } return ret; } CL_API_ENTRY void* CL_API_CALL clEnqueueMapBuffer( cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, cl_map_flags map_flags, size_t offset, size_t cb, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (!command_queue) { SetErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); return NULL; } if (!buffer) { SetErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, buffer); return NULL; } if (map_flags & CL_MAP_WRITE && buffer->flags & (CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_READ_ONLY)) { SetErrorInfo(command_queue->context, CL_INVALID_OPERATION, "Buffer flags specify host will not write data"); return NULL; } if (map_flags & CL_MAP_READ && buffer->flags & (CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_WRITE_ONLY)) { SetErrorInfo(command_queue->context, CL_INVALID_OPERATION, "Buffer flags specify host will not read data"); return NULL; } // Check map region if (offset + cb > buffer->size) { SetErrorInfo(command_queue->context, CL_INVALID_VALUE, "offset + cb (" << offset << " + " << cb << ") exceeds buffer size (" << buffer->size << " bytes)"); return NULL; } // Map buffer void* ptr = buffer->context->context->getGlobalMemory()->mapBuffer( buffer->address, offset, cb); if (ptr == NULL) { SetError(command_queue->context, CL_INVALID_VALUE); return NULL; } // Enqueue command oclgrind::MapCommand* cmd = new oclgrind::MapCommand(); cmd->address = buffer->address; cmd->offset = offset; cmd->size = cb; cmd->flags = map_flags; asyncQueueRetain(cmd, buffer); asyncEnqueue(command_queue, CL_COMMAND_MAP_BUFFER, cmd, num_events_in_wait_list, event_wait_list, event); SetError(command_queue->context, CL_SUCCESS); if (blocking_map) { SetError(command_queue->context, clFinish(command_queue)); } return ptr; } CL_API_ENTRY void* CL_API_CALL clEnqueueMapImage( cl_command_queue command_queue, cl_mem image, cl_bool blocking_map, cl_map_flags map_flags, const size_t* origin, const size_t* region, size_t* image_row_pitch, size_t* image_slice_pitch, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (!command_queue) { SetErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); return NULL; } if (!image) { SetErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, image); return NULL; } if (!image_row_pitch) { SetErrorArg(command_queue->context, CL_INVALID_VALUE, image_row_pitch); return NULL; } if (map_flags & CL_MAP_WRITE && image->flags & (CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_READ_ONLY)) { SetErrorInfo(command_queue->context, CL_INVALID_OPERATION, "Image flags specify host will not write data"); return NULL; } if (map_flags & CL_MAP_READ && image->flags & (CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_WRITE_ONLY)) { SetErrorInfo(command_queue->context, CL_INVALID_OPERATION, "Image flags specify host will not read data"); return NULL; } if (!region[0] || !region[1] || !region[2]) { SetErrorInfo(command_queue->context, CL_INVALID_VALUE, "Values in region cannot be 0"); } // Get image dimensions cl_image* img = (cl_image*)image; size_t width = img->desc.image_width; size_t height = img->desc.image_height; size_t depth = img->desc.image_depth; size_t arraySize = img->desc.image_array_size; size_t pixelSize = getPixelSize(&img->format); size_t row_pitch = width * pixelSize; size_t slice_pitch = height * row_pitch; if (img->desc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) height = arraySize; if (img->desc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) depth = arraySize; // Ensure region is within image bounds if (origin[0] + region[0] > width) { SetErrorInfo(command_queue->context, CL_INVALID_VALUE, "origin[0] + region[0] > width (" << origin[0] << " + " << region[0] << " > " << width << " )"); } if (origin[1] + region[1] > height) { SetErrorInfo(command_queue->context, CL_INVALID_VALUE, "origin[1] + region[1] > height (" << origin[1] << " + " << region[1] << " > " << height << " )"); } if (origin[2] + region[2] > depth) { SetErrorInfo(command_queue->context, CL_INVALID_VALUE, "origin[2] + region[2] > depth (" << origin[2] << " + " << region[2] << " > " << depth << " )"); } // Compute byte offset and size size_t offset = origin[0] * pixelSize + origin[1] * row_pitch + origin[2] * slice_pitch; size_t size = region[0] * pixelSize + (region[1] - 1) * row_pitch + (region[2] - 1) * slice_pitch; // Map image void* ptr = image->context->context->getGlobalMemory()->mapBuffer( image->address, offset, size); if (ptr == NULL) { SetError(command_queue->context, CL_INVALID_VALUE); return NULL; } *image_row_pitch = row_pitch; if (image_slice_pitch) { *image_slice_pitch = slice_pitch; } // Enqueue command oclgrind::MapCommand* cmd = new oclgrind::MapCommand(); cmd->address = image->address; cmd->offset = offset; cmd->size = size; cmd->flags = map_flags; asyncQueueRetain(cmd, image); asyncEnqueue(command_queue, CL_COMMAND_MAP_IMAGE, cmd, num_events_in_wait_list, event_wait_list, event); SetError(command_queue->context, CL_SUCCESS); if (blocking_map) { SetError(command_queue->context, clFinish(command_queue)); } return ptr; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueUnmapMemObject( cl_command_queue command_queue, cl_mem memobj, void* mapped_ptr, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!memobj) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, memobj); } if (!mapped_ptr) { ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, mapped_ptr); } // Enqueue command oclgrind::UnmapCommand* cmd = new oclgrind::UnmapCommand(); cmd->address = memobj->address; cmd->ptr = mapped_ptr; asyncQueueRetain(cmd, memobj); asyncEnqueue(command_queue, CL_COMMAND_UNMAP_MEM_OBJECT, cmd, num_events_in_wait_list, event_wait_list, event); return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjects( cl_command_queue command_queue, cl_uint num_mem_objects, const cl_mem* mem_objects, cl_mem_migration_flags flags, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_2 { REGISTER_API; // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } // Enqueue command oclgrind::Command* cmd = new oclgrind::Command(); asyncEnqueue(command_queue, CL_COMMAND_MIGRATE_MEM_OBJECTS, cmd, num_events_in_wait_list, event_wait_list, event); return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernel( cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t* global_work_offset, const size_t* global_work_size, const size_t* local_work_size, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (work_dim < 1 || work_dim > 3) { ReturnErrorInfo( command_queue->context, CL_INVALID_WORK_DIMENSION, "Kernels must be 1, 2 or 3 dimensional (work_dim = " << work_dim << ")"); } if (!global_work_size) { ReturnErrorInfo(command_queue->context, CL_INVALID_GLOBAL_WORK_SIZE, "global_work_size cannot be NULL"); } // Check global and local sizes are valid size_t reqdWorkGroupSize[3]; size_t totalWGSize = 1; kernel->kernel->getRequiredWorkGroupSize(reqdWorkGroupSize); for (unsigned i = 0; i < work_dim; i++) { if (kernel->kernel->requiresUniformWorkGroups() && local_work_size && global_work_size[i] % local_work_size[i]) { ReturnErrorInfo(command_queue->context, CL_INVALID_WORK_GROUP_SIZE, "local_work_size[" << i << "]=" << local_work_size[i] << " does not divide global_work_size[" << i << "]=" << global_work_size[i]); } if (local_work_size) { if (local_work_size[i] > m_device->maxWGSize) { ReturnErrorInfo(command_queue->context, CL_INVALID_WORK_ITEM_SIZE, "local_work_size[" << i << "]=" << local_work_size[i] << " exceeds device maximum of " << m_device->maxWGSize); } totalWGSize *= local_work_size[i]; } if (local_work_size && reqdWorkGroupSize[i] && local_work_size[i] != reqdWorkGroupSize[i]) { ReturnErrorInfo(command_queue->context, CL_INVALID_WORK_GROUP_SIZE, "local_work_size[" << i << "]=" << local_work_size[i] << " does not match reqd_work_group_size[" << i << "]=" << reqdWorkGroupSize[i]) } } if (totalWGSize > m_device->maxWGSize) { ReturnErrorInfo(command_queue->context, CL_INVALID_WORK_GROUP_SIZE, "total work-group size (" << totalWGSize << ")" " exceeds device maximum of " << m_device->maxWGSize); } // Ensure all arguments have been set if (!kernel->kernel->allArgumentsSet()) { ReturnErrorInfo(command_queue->context, CL_INVALID_KERNEL_ARGS, "Not all kernel arguments set"); } // Check that local memory requirement is within device maximum size_t totalLocal = kernel->kernel->getLocalMemorySize(); if (totalLocal > m_device->localMemSize) { ReturnErrorInfo(command_queue->context, CL_OUT_OF_RESOURCES, "total local memory size (" << totalLocal << ")" " exceeds device maximum of " << m_device->localMemSize); } // Check that constant memory requirement is within device maximum size_t totalConstant = 0; std::map::iterator arg; for (arg = kernel->memArgs.begin(); arg != kernel->memArgs.end(); arg++) { if (kernel->kernel->getArgumentAddressQualifier(arg->first) == CL_KERNEL_ARG_ADDRESS_CONSTANT) totalConstant += arg->second->size; } if (totalConstant > m_device->constantMemSize) { ReturnErrorInfo(command_queue->context, CL_OUT_OF_RESOURCES, "total constant memory size (" << totalConstant << ")" " exceeds device maximum of " << m_device->constantMemSize); } // Set-up offsets and sizes oclgrind::KernelCommand* cmd = new oclgrind::KernelCommand(); cmd->kernel = new oclgrind::Kernel(*kernel->kernel); cmd->work_dim = work_dim; cmd->globalSize = oclgrind::Size3(1, 1, 1); cmd->globalOffset = oclgrind::Size3(0, 0, 0); cmd->localSize = oclgrind::Size3(1, 1, 1); memcpy(&cmd->globalSize, global_work_size, work_dim * sizeof(size_t)); if (global_work_offset) { memcpy(&cmd->globalOffset, global_work_offset, work_dim * sizeof(size_t)); } if (local_work_size) { memcpy(&cmd->localSize, local_work_size, work_dim * sizeof(size_t)); } // Enqueue command asyncQueueRetain(cmd, kernel); asyncEnqueue(command_queue, CL_COMMAND_NDRANGE_KERNEL, cmd, num_events_in_wait_list, event_wait_list, event); return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueTask(cl_command_queue command_queue, cl_kernel kernel, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; size_t work = 1; return clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &work, &work, num_events_in_wait_list, event_wait_list, event); } CL_API_ENTRY cl_int CL_API_CALL clEnqueueNativeKernel( cl_command_queue command_queue, void(CL_CALLBACK* user_func)(void*), void* args, size_t cb_args, cl_uint num_mem_objects, const cl_mem* mem_list, const void** args_mem_loc, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!user_func) { ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, user_func); } if (!args && (cb_args > 0 || num_mem_objects > 0)) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "args is NULL but cb_args|num_mem_objects >0"); } if (args && cb_args == 0) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "args is non-NULL but cb_args is 0"); } if (num_mem_objects > 0 && (!mem_list || !args_mem_loc)) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "num_mem_objects >0 but mem_list|args_mem_loc is NULL"); } if (num_mem_objects == 0 && (mem_list || args_mem_loc)) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "num_mem_objects is 0 but mem_list|args_mem_loc not NULL"); } // Replace mem objects with real pointers oclgrind::Memory* memory = command_queue->context->context->getGlobalMemory(); for (unsigned i = 0; i < num_mem_objects; i++) { if (!mem_list[i]) { ReturnErrorInfo(command_queue->context, CL_INVALID_MEM_OBJECT, "Memory object " << i << " is NULL"); } void* addr = memory->getPointer(mem_list[i]->address); if (addr == NULL) { ReturnErrorInfo(command_queue->context, CL_INVALID_MEM_OBJECT, "Memory object " << i << " not valid"); } memcpy((void*)args_mem_loc[i], &addr, sizeof(void*)); } // Create command oclgrind::NativeKernelCommand* cmd = new oclgrind::NativeKernelCommand(user_func, args, cb_args); // Retain memory objects for (unsigned i = 0; i < num_mem_objects; i++) { asyncQueueRetain(cmd, mem_list[i]); } // Enqueue commands asyncEnqueue(command_queue, CL_COMMAND_NATIVE_KERNEL, cmd, num_events_in_wait_list, event_wait_list, event); return CL_SUCCESS; } CL_API_ENTRY void* CL_API_CALL clGetExtensionFunctionAddressForPlatform( cl_platform_id platform, const char* func_name) CL_API_SUFFIX__VERSION_1_2 { REGISTER_API; return NULL; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueMarkerWithWaitList( cl_command_queue command_queue, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_2 { REGISTER_API; // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } // Enqueue command oclgrind::Command* cmd = new oclgrind::Command(); asyncEnqueue(command_queue, CL_COMMAND_MARKER, cmd, num_events_in_wait_list, event_wait_list, event); return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueBarrierWithWaitList( cl_command_queue command_queue, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_2 { REGISTER_API; // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } // Enqueue command oclgrind::Command* cmd = new oclgrind::Command(); asyncEnqueue(command_queue, CL_COMMAND_BARRIER, cmd, num_events_in_wait_list, event_wait_list, event); return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clSetPrintfCallback( cl_context context, void(CL_CALLBACK* pfn_notify)(cl_context, cl_uint, char*, void*), void* user_data) CL_API_SUFFIX__VERSION_1_2 { REGISTER_API; ReturnError(NULL, CL_INVALID_OPERATION); } CL_API_ENTRY cl_int CL_API_CALL clEnqueueMarker( cl_command_queue command_queue, cl_event* event) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; return clEnqueueMarkerWithWaitList(command_queue, 0, NULL, event); } CL_API_ENTRY cl_int CL_API_CALL clEnqueueWaitForEvents(cl_command_queue command_queue, cl_uint num_events, const cl_event* event_list) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } // Enqueue command oclgrind::Command* cmd = new oclgrind::Command(); asyncEnqueue(command_queue, CL_COMMAND_BARRIER, cmd, num_events, event_list, NULL); return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueBarrier(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; return clEnqueueBarrierWithWaitList(command_queue, 0, NULL, NULL); } CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLBuffer( cl_context context, cl_mem_flags flags, cl_GLuint bufret_mem, int* errcode_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; SetErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/GL interop not implemented"); return NULL; } CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLTexture( cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2 { REGISTER_API; SetErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/GL interop not implemented"); return NULL; } CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLTexture2D( cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; SetErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/GL interop not implemented"); return NULL; } CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLTexture3D( cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; SetErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/GL interop not implemented"); return NULL; } CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLRenderbuffer( cl_context context, cl_mem_flags flags, cl_GLuint renderbuffer, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; SetErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/GL interop not implemented"); return NULL; } CL_API_ENTRY cl_int CL_API_CALL clGetGLObjectInfo(cl_mem memobj, cl_gl_object_type* gl_object_type, cl_GLuint* gl_object_name) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; ReturnErrorInfo(NULL, CL_INVALID_MEM_OBJECT, "CL/GL interop not implements"); } CL_API_ENTRY cl_int CL_API_CALL clGetGLTextureInfo( cl_mem memobj, cl_gl_texture_info param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; ReturnErrorInfo(NULL, CL_INVALID_MEM_OBJECT, "CL/GL interop not implemented"); } CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireGLObjects( cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; ReturnErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/GL interop not implemented"); } CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseGLObjects( cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; ReturnErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/GL interop not implemented"); } CL_API_ENTRY cl_int CL_API_CALL clGetGLContextInfoKHR( const cl_context_properties* properties, cl_gl_context_info param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/GL interop not implemented"); } CL_API_ENTRY cl_event CL_API_CALL clCreateEventFromGLsyncKHR(cl_context context, cl_GLsync cl_GLsync, cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1 { REGISTER_API; SetErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/GL interop not implemented"); return NULL; } #if defined(_WIN32) && !defined(__MINGW32__) // DX extension functions CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromD3D10KHR( cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source, void* d3d_object, cl_d3d10_device_set_khr d3d_device_set, cl_uint num_entries, cl_device_id* devices, cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented"); } CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10BufferKHR( cl_context context, cl_mem_flags flags, ID3D10Buffer* resource, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; SetErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/DX interop not implemented"); return NULL; } CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10Texture2DKHR( cl_context context, cl_mem_flags flags, ID3D10Texture2D* resource, UINT subresource, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; SetErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented"); return NULL; } CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10Texture3DKHR( cl_context context, cl_mem_flags flags, ID3D10Texture3D* resource, UINT subresource, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; SetErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented"); return NULL; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireD3D10ObjectsKHR( cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented"); } CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D10ObjectsKHR( cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented"); } CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromD3D11KHR( cl_platform_id platform, cl_d3d11_device_source_khr d3d_device_source, void* d3d_object, cl_d3d11_device_set_khr d3d_device_set, cl_uint num_entries, cl_device_id* devices, cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented"); } CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11BufferKHR( cl_context context, cl_mem_flags flags, ID3D11Buffer* resource, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; SetErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/DX interop not implemented"); return NULL; } CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11Texture2DKHR( cl_context context, cl_mem_flags flags, ID3D11Texture2D* resource, UINT subresource, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; SetErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented"); return NULL; } CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11Texture3DKHR( cl_context context, cl_mem_flags flags, ID3D11Texture3D* resource, UINT subresource, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; SetErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented"); return NULL; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireD3D11ObjectsKHR( cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented"); } CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D11ObjectsKHR( cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_0 { REGISTER_API; ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented"); } CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromDX9MediaAdapterKHR( cl_platform_id platform, cl_uint num_media_adapters, cl_dx9_media_adapter_type_khr* media_adapter_type, void* media_adapters, cl_dx9_media_adapter_set_khr media_adapter_set, cl_uint num_entries, cl_device_id* devices, cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_2 { REGISTER_API; ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented"); } CL_API_ENTRY cl_mem CL_API_CALL clCreateFromDX9MediaSurfaceKHR( cl_context context, cl_mem_flags flags, cl_dx9_media_adapter_type_khr adapter_type, void* surface_info, cl_uint plane, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2 { REGISTER_API; SetErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/DX interop not implemented"); return NULL; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireDX9MediaSurfacesKHR( cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_2 { REGISTER_API; ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented"); } CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseDX9MediaSurfacesKHR( cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_1_2 { REGISTER_API; ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented"); } #endif // DX extension functions ///////////////////// // OpenCL 2.0 APIs // ///////////////////// CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueueWithProperties( cl_context context, cl_device_id device, const cl_queue_properties* properties, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_2_0 { REGISTER_API; // Check parameters if (!context) { SetErrorArg(NULL, CL_INVALID_CONTEXT, context); return NULL; } if (device != m_device) { SetErrorArg(context, CL_INVALID_DEVICE, device); return NULL; } // Parse properties cl_command_queue_properties props = 0; bool out_of_order = false; unsigned i = 0; while (properties && properties[i]) { switch (properties[i++]) { case CL_QUEUE_PROPERTIES: if (properties[i] & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) { out_of_order = true; } if (properties[i] & (CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT)) { SetErrorInfo(context, CL_INVALID_QUEUE_PROPERTIES, "On device queues not implemented"); return NULL; } props = properties[i]; break; case CL_QUEUE_SIZE: SetErrorInfo(context, CL_INVALID_VALUE, "CL_QUEUE_SIZE not implemented"); return NULL; default: SetErrorInfo(context, CL_INVALID_VALUE, properties); return NULL; } i++; } unsigned numProperties = i + 1; // Create command-queue object cl_command_queue queue; queue = new _cl_command_queue; queue->queue = new oclgrind::Queue(context->context, out_of_order); queue->dispatch = m_dispatchTable; queue->properties = props; queue->context = context; queue->refCount = 1; if (properties) { queue->properties_array.assign(properties, properties + numProperties); } clRetainContext(context); SetError(context, CL_SUCCESS); return queue; } CL_API_ENTRY cl_mem CL_API_CALL clCreatePipe(cl_context context, cl_mem_flags flags, cl_uint pipe_packet_size, cl_uint pipe_max_packets, const cl_pipe_properties* properties, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_2_0 { REGISTER_API; SetErrorInfo(context, CL_INVALID_OPERATION, "Unimplemented OpenCL 2.0 API"); return NULL; } CL_API_ENTRY cl_int CL_API_CALL clGetPipeInfo( cl_mem pipe, cl_pipe_info param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_2_0 { REGISTER_API; ReturnErrorInfo(NULL, CL_INVALID_MEM_OBJECT, "Pipes are not supported"); } CL_API_ENTRY void* CL_API_CALL clSVMAlloc(cl_context context, cl_svm_mem_flags flags, size_t size, cl_uint alignment) CL_API_SUFFIX__VERSION_2_0 { REGISTER_API; notifyAPIError(context, CL_INVALID_OPERATION, __func__, "Unimplemented OpenCL 2.0 API"); return NULL; } CL_API_ENTRY void CL_API_CALL clSVMFree(cl_context context, void* svm_pointer) CL_API_SUFFIX__VERSION_2_0 { REGISTER_API; notifyAPIError(context, CL_INVALID_OPERATION, __func__, "Unimplemented OpenCL 2.0 API"); } CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMFree( cl_command_queue command_queue, cl_uint num_svm_pointers, void* svm_pointers[], void(CL_CALLBACK* pfn_free_func)(cl_command_queue queue, cl_uint num_svm_pointers, void* svm_pointers[], void* user_data), void* user_data, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_2_0 { REGISTER_API; ReturnErrorInfo(command_queue->context, CL_INVALID_OPERATION, "Unimplemented OpenCL 2.0 API"); } CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMemcpy( cl_command_queue command_queue, cl_bool blocking_copy, void* dst_ptr, const void* src_ptr, size_t size, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_2_0 { REGISTER_API; ReturnErrorInfo(command_queue->context, CL_INVALID_OPERATION, "Unimplemented OpenCL 2.0 API"); } CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMemFill( cl_command_queue command_queue, void* svm_ptr, const void* pattern, size_t pattern_size, size_t size, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_2_0 { REGISTER_API; ReturnErrorInfo(command_queue->context, CL_INVALID_OPERATION, "Unimplemented OpenCL 2.0 API"); } CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMap( cl_command_queue command_queue, cl_bool blocking_map, cl_map_flags flags, void* svm_ptr, size_t size, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_2_0 { REGISTER_API; ReturnErrorInfo(command_queue->context, CL_INVALID_OPERATION, "Unimplemented OpenCL 2.0 API"); } CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMUnmap( cl_command_queue command_queue, void* svm_ptr, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_2_0 { REGISTER_API; ReturnErrorInfo(command_queue->context, CL_INVALID_OPERATION, "Unimplemented OpenCL 2.0 API"); } CL_API_ENTRY cl_sampler CL_API_CALL clCreateSamplerWithProperties( cl_context context, const cl_sampler_properties* sampler_properties, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_2_0 { REGISTER_API; // Check parameters if (!context) { SetErrorArg(NULL, CL_INVALID_CONTEXT, context); return NULL; } cl_bool normalized_coords = CL_TRUE; cl_addressing_mode addressing_mode = CL_ADDRESS_CLAMP; cl_filter_mode filter_mode = CL_FILTER_NEAREST; // Parse properties unsigned i = 0; while (sampler_properties && sampler_properties[i]) { switch (sampler_properties[i++]) { case CL_SAMPLER_NORMALIZED_COORDS: normalized_coords = sampler_properties[i]; break; case CL_SAMPLER_ADDRESSING_MODE: addressing_mode = sampler_properties[i]; break; case CL_SAMPLER_FILTER_MODE: filter_mode = sampler_properties[i]; break; default: SetErrorInfo(context, CL_INVALID_VALUE, sampler_properties); return NULL; } i++; } unsigned numProperties = i + 1; // Create sampler bitfield uint32_t bitfield = 0; if (normalized_coords) { bitfield |= CLK_NORMALIZED_COORDS_TRUE; } switch (addressing_mode) { case CL_ADDRESS_NONE: break; case CL_ADDRESS_CLAMP_TO_EDGE: bitfield |= CLK_ADDRESS_CLAMP_TO_EDGE; break; case CL_ADDRESS_CLAMP: bitfield |= CLK_ADDRESS_CLAMP; break; case CL_ADDRESS_REPEAT: bitfield |= CLK_ADDRESS_REPEAT; break; case CL_ADDRESS_MIRRORED_REPEAT: bitfield |= CLK_ADDRESS_MIRRORED_REPEAT; break; default: SetErrorArg(context, CL_INVALID_VALUE, sampler_properties); return NULL; } switch (filter_mode) { case CL_FILTER_NEAREST: bitfield |= CLK_FILTER_NEAREST; break; case CL_FILTER_LINEAR: bitfield |= CLK_FILTER_LINEAR; break; default: SetErrorArg(context, CL_INVALID_VALUE, sampler_properties); return NULL; } // Create sampler cl_sampler sampler = new _cl_sampler; sampler->dispatch = m_dispatchTable; sampler->context = context; sampler->normCoords = normalized_coords; sampler->addressMode = addressing_mode; sampler->filterMode = filter_mode; sampler->sampler = bitfield; if (sampler_properties) { sampler->properties.assign(sampler_properties, sampler_properties + numProperties); } SetError(context, CL_SUCCESS); return sampler; } CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel, cl_uint arg_index, const void* arg_value) CL_API_SUFFIX__VERSION_2_0 { REGISTER_API; ReturnErrorInfo(kernel->program->context, CL_INVALID_OPERATION, "Unimplemented OpenCL 2.0 API"); } CL_API_ENTRY cl_int CL_API_CALL clSetKernelExecInfo( cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void* param_value) CL_API_SUFFIX__VERSION_2_0 { REGISTER_API; ReturnErrorInfo(kernel->program->context, CL_INVALID_OPERATION, "Unimplemented OpenCL 2.0 API"); } CL_API_ENTRY cl_kernel CL_API_CALL clCloneKernel( cl_kernel source_kernel, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_2_1 { REGISTER_API; if (!source_kernel) { SetErrorArg(nullptr, CL_INVALID_KERNEL, source_kernel); return nullptr; } // Create kernel object cl_kernel kernel = new _cl_kernel; kernel->dispatch = m_dispatchTable; kernel->kernel = new oclgrind::Kernel(*source_kernel->kernel); kernel->program = source_kernel->program; kernel->memArgs = source_kernel->memArgs; for (auto src_img : source_kernel->imageArgs) { oclgrind::Image* image = new oclgrind::Image; image->address = src_img->address; image->format = src_img->format; image->desc = src_img->desc; kernel->imageArgs.push_back(image); } kernel->refCount = 1; clRetainProgram(kernel->program); SetError(nullptr, CL_SUCCESS); return kernel; } CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithIL(cl_context context, const void* il, size_t length, cl_int* errcode_ret) CL_API_SUFFIX__VERSION_2_1 { REGISTER_API; SetErrorInfo(context, CL_INVALID_OPERATION, "Unimplemented OpenCL 2.1 API"); return nullptr; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMigrateMem( cl_command_queue command_queue, cl_uint num_svm_pointers, const void** svm_pointers, const size_t* sizes, cl_mem_migration_flags flags, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) CL_API_SUFFIX__VERSION_2_1 { REGISTER_API; ReturnErrorInfo(command_queue->context, CL_INVALID_OPERATION, "Unimplemented OpenCL 2.1 API"); } CL_API_ENTRY cl_int CL_API_CALL clGetDeviceAndHostTimer(cl_device_id device, cl_ulong* device_timestamp, cl_ulong* host_timestamp) CL_API_SUFFIX__VERSION_2_1 { REGISTER_API; ReturnErrorInfo(nullptr, CL_INVALID_OPERATION, "Unimplemented OpenCL 2.1 API"); } CL_API_ENTRY cl_int CL_API_CALL clGetHostTimer( cl_device_id device, cl_ulong* host_timestamp) CL_API_SUFFIX__VERSION_2_1 { REGISTER_API; ReturnErrorInfo(nullptr, CL_INVALID_OPERATION, "Unimplemented OpenCL 2.1 API"); } CL_API_ENTRY cl_int CL_API_CALL clGetKernelSubGroupInfo( cl_kernel kernel, cl_device_id device, cl_kernel_sub_group_info param_name, size_t input_value_size, const void* input_value, size_t param_value_size, void* param_value, size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_2_1 { REGISTER_API; ReturnErrorInfo(kernel->program->context, CL_INVALID_OPERATION, "Unimplemented OpenCL 2.1 API"); } CL_API_ENTRY cl_int CL_API_CALL clSetDefaultDeviceCommandQueue( cl_context context, cl_device_id device, cl_command_queue command_queue) CL_API_SUFFIX__VERSION_2_1 { REGISTER_API; ReturnErrorInfo(context, CL_INVALID_OPERATION, "Unimplemented OpenCL 2.1 API"); } CL_API_ENTRY CL_EXT_PREFIX__VERSION_2_2_DEPRECATED cl_int CL_API_CALL clSetProgramReleaseCallback( cl_program program, void(CL_CALLBACK* pfn_notify)(cl_program program, void* user_data), void* user_data) CL_EXT_SUFFIX__VERSION_2_2_DEPRECATED { REGISTER_API; ReturnErrorInfo(program->context, CL_INVALID_OPERATION, "Unimplemented OpenCL 2.2 API"); } CL_API_ENTRY cl_int CL_API_CALL clSetProgramSpecializationConstant( cl_program program, cl_uint spec_id, size_t spec_size, const void* spec_value) CL_API_SUFFIX__VERSION_2_2 { REGISTER_API; ReturnErrorInfo(program->context, CL_INVALID_OPERATION, "Unimplemented OpenCL 2.2 API"); } CL_API_ENTRY cl_int CL_API_CALL clSetContextDestructorCallback( cl_context context, void(CL_CALLBACK* pfn_notify)(cl_context context, void* user_data), void* user_data) CL_API_SUFFIX__VERSION_3_0 { REGISTER_API; // Check parameters if (!context) { ReturnErrorArg(NULL, CL_INVALID_CONTEXT, context); } if (!pfn_notify) { ReturnErrorArg(context, CL_INVALID_VALUE, pfn_notify); } context->callbacks.push(make_pair(pfn_notify, user_data)); return CL_SUCCESS; } //////////////////// // Dispatch Table // //////////////////// #define _NULL_ NULL #define DISPATCH_TABLE_ENTRY(FUNCTION) (void*)(FUNCTION) void* m_dispatchTable[] = { DISPATCH_TABLE_ENTRY(clGetPlatformIDs), DISPATCH_TABLE_ENTRY(clGetPlatformInfo), DISPATCH_TABLE_ENTRY(clGetDeviceIDs), DISPATCH_TABLE_ENTRY(clGetDeviceInfo), DISPATCH_TABLE_ENTRY(clCreateContext), DISPATCH_TABLE_ENTRY(clCreateContextFromType), DISPATCH_TABLE_ENTRY(clRetainContext), DISPATCH_TABLE_ENTRY(clReleaseContext), DISPATCH_TABLE_ENTRY(clGetContextInfo), DISPATCH_TABLE_ENTRY(clCreateCommandQueue), DISPATCH_TABLE_ENTRY(clRetainCommandQueue), DISPATCH_TABLE_ENTRY(clReleaseCommandQueue), DISPATCH_TABLE_ENTRY(clGetCommandQueueInfo), DISPATCH_TABLE_ENTRY(clSetCommandQueueProperty), DISPATCH_TABLE_ENTRY(clCreateBuffer), DISPATCH_TABLE_ENTRY(clCreateImage2D), DISPATCH_TABLE_ENTRY(clCreateImage3D), DISPATCH_TABLE_ENTRY(clRetainMemObject), DISPATCH_TABLE_ENTRY(clReleaseMemObject), DISPATCH_TABLE_ENTRY(clGetSupportedImageFormats), DISPATCH_TABLE_ENTRY(clGetMemObjectInfo), DISPATCH_TABLE_ENTRY(clGetImageInfo), DISPATCH_TABLE_ENTRY(clCreateSampler), DISPATCH_TABLE_ENTRY(clRetainSampler), DISPATCH_TABLE_ENTRY(clReleaseSampler), DISPATCH_TABLE_ENTRY(clGetSamplerInfo), DISPATCH_TABLE_ENTRY(clCreateProgramWithSource), DISPATCH_TABLE_ENTRY(clCreateProgramWithBinary), DISPATCH_TABLE_ENTRY(clRetainProgram), DISPATCH_TABLE_ENTRY(clReleaseProgram), DISPATCH_TABLE_ENTRY(clBuildProgram), DISPATCH_TABLE_ENTRY(clUnloadCompiler), DISPATCH_TABLE_ENTRY(clGetProgramInfo), DISPATCH_TABLE_ENTRY(clGetProgramBuildInfo), DISPATCH_TABLE_ENTRY(clCreateKernel), DISPATCH_TABLE_ENTRY(clCreateKernelsInProgram), DISPATCH_TABLE_ENTRY(clRetainKernel), DISPATCH_TABLE_ENTRY(clReleaseKernel), DISPATCH_TABLE_ENTRY(clSetKernelArg), DISPATCH_TABLE_ENTRY(clGetKernelInfo), DISPATCH_TABLE_ENTRY(clGetKernelWorkGroupInfo), DISPATCH_TABLE_ENTRY(clWaitForEvents), DISPATCH_TABLE_ENTRY(clGetEventInfo), DISPATCH_TABLE_ENTRY(clRetainEvent), DISPATCH_TABLE_ENTRY(clReleaseEvent), DISPATCH_TABLE_ENTRY(clGetEventProfilingInfo), DISPATCH_TABLE_ENTRY(clFlush), DISPATCH_TABLE_ENTRY(clFinish), DISPATCH_TABLE_ENTRY(clEnqueueReadBuffer), DISPATCH_TABLE_ENTRY(clEnqueueWriteBuffer), DISPATCH_TABLE_ENTRY(clEnqueueCopyBuffer), DISPATCH_TABLE_ENTRY(clEnqueueReadImage), DISPATCH_TABLE_ENTRY(clEnqueueWriteImage), DISPATCH_TABLE_ENTRY(clEnqueueCopyImage), DISPATCH_TABLE_ENTRY(clEnqueueCopyImageToBuffer), DISPATCH_TABLE_ENTRY(clEnqueueCopyBufferToImage), DISPATCH_TABLE_ENTRY(clEnqueueMapBuffer), DISPATCH_TABLE_ENTRY(clEnqueueMapImage), DISPATCH_TABLE_ENTRY(clEnqueueUnmapMemObject), DISPATCH_TABLE_ENTRY(clEnqueueNDRangeKernel), DISPATCH_TABLE_ENTRY(clEnqueueTask), DISPATCH_TABLE_ENTRY(clEnqueueNativeKernel), DISPATCH_TABLE_ENTRY(clEnqueueMarker), DISPATCH_TABLE_ENTRY(clEnqueueWaitForEvents), DISPATCH_TABLE_ENTRY(clEnqueueBarrier), DISPATCH_TABLE_ENTRY(clGetExtensionFunctionAddress), DISPATCH_TABLE_ENTRY(clCreateFromGLBuffer), DISPATCH_TABLE_ENTRY(clCreateFromGLTexture2D), DISPATCH_TABLE_ENTRY(clCreateFromGLTexture3D), DISPATCH_TABLE_ENTRY(clCreateFromGLRenderbuffer), DISPATCH_TABLE_ENTRY(clGetGLObjectInfo), DISPATCH_TABLE_ENTRY(clGetGLTextureInfo), DISPATCH_TABLE_ENTRY(clEnqueueAcquireGLObjects), DISPATCH_TABLE_ENTRY(clEnqueueReleaseGLObjects), DISPATCH_TABLE_ENTRY(clGetGLContextInfoKHR), #if defined(_WIN32) DISPATCH_TABLE_ENTRY(clGetDeviceIDsFromD3D10KHR), DISPATCH_TABLE_ENTRY(clCreateFromD3D10BufferKHR), DISPATCH_TABLE_ENTRY(clCreateFromD3D10Texture2DKHR), DISPATCH_TABLE_ENTRY(clCreateFromD3D10Texture3DKHR), DISPATCH_TABLE_ENTRY(clEnqueueAcquireD3D10ObjectsKHR), DISPATCH_TABLE_ENTRY(clEnqueueReleaseD3D10ObjectsKHR), #else DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), #endif // OpenCL 1.1 DISPATCH_TABLE_ENTRY(clSetEventCallback), DISPATCH_TABLE_ENTRY(clCreateSubBuffer), DISPATCH_TABLE_ENTRY(clSetMemObjectDestructorCallback), DISPATCH_TABLE_ENTRY(clCreateUserEvent), DISPATCH_TABLE_ENTRY(clSetUserEventStatus), DISPATCH_TABLE_ENTRY(clEnqueueReadBufferRect), DISPATCH_TABLE_ENTRY(clEnqueueWriteBufferRect), DISPATCH_TABLE_ENTRY(clEnqueueCopyBufferRect), DISPATCH_TABLE_ENTRY(NULL), // clCreateSubDevicesEXT DISPATCH_TABLE_ENTRY(NULL), // clRetainDeviceEXT DISPATCH_TABLE_ENTRY(NULL), // clReleaseDeviceEXT DISPATCH_TABLE_ENTRY(clCreateEventFromGLsyncKHR), // OpenCL 1.2 DISPATCH_TABLE_ENTRY(clCreateSubDevices), DISPATCH_TABLE_ENTRY(clRetainDevice), DISPATCH_TABLE_ENTRY(clReleaseDevice), DISPATCH_TABLE_ENTRY(clCreateImage), DISPATCH_TABLE_ENTRY(clCreateProgramWithBuiltInKernels), DISPATCH_TABLE_ENTRY(clCompileProgram), DISPATCH_TABLE_ENTRY(clLinkProgram), DISPATCH_TABLE_ENTRY(clUnloadPlatformCompiler), DISPATCH_TABLE_ENTRY(clGetKernelArgInfo), DISPATCH_TABLE_ENTRY(clEnqueueFillBuffer), DISPATCH_TABLE_ENTRY(clEnqueueFillImage), DISPATCH_TABLE_ENTRY(clEnqueueMigrateMemObjects), DISPATCH_TABLE_ENTRY(clEnqueueMarkerWithWaitList), DISPATCH_TABLE_ENTRY(clEnqueueBarrierWithWaitList), DISPATCH_TABLE_ENTRY(clGetExtensionFunctionAddressForPlatform), DISPATCH_TABLE_ENTRY(clCreateFromGLTexture), #if defined(_WIN32) DISPATCH_TABLE_ENTRY(clGetDeviceIDsFromD3D11KHR), DISPATCH_TABLE_ENTRY(clCreateFromD3D11BufferKHR), DISPATCH_TABLE_ENTRY(clCreateFromD3D11Texture2DKHR), DISPATCH_TABLE_ENTRY(clCreateFromD3D11Texture3DKHR), DISPATCH_TABLE_ENTRY(clCreateFromDX9MediaSurfaceKHR), DISPATCH_TABLE_ENTRY(clEnqueueAcquireD3D11ObjectsKHR), DISPATCH_TABLE_ENTRY(clEnqueueReleaseD3D11ObjectsKHR), DISPATCH_TABLE_ENTRY(clGetDeviceIDsFromDX9MediaAdapterKHR), DISPATCH_TABLE_ENTRY(clEnqueueAcquireDX9MediaSurfacesKHR), DISPATCH_TABLE_ENTRY(clEnqueueReleaseDX9MediaSurfacesKHR), #else DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), #endif // cl_khr_egl_image DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), // cl_khr_egl_event DISPATCH_TABLE_ENTRY(NULL), // OpenCL 2.0 DISPATCH_TABLE_ENTRY(clCreateCommandQueueWithProperties), DISPATCH_TABLE_ENTRY(clCreatePipe), DISPATCH_TABLE_ENTRY(clGetPipeInfo), DISPATCH_TABLE_ENTRY(clSVMAlloc), DISPATCH_TABLE_ENTRY(clSVMFree), DISPATCH_TABLE_ENTRY(clEnqueueSVMFree), DISPATCH_TABLE_ENTRY(clEnqueueSVMMemcpy), DISPATCH_TABLE_ENTRY(clEnqueueSVMMemFill), DISPATCH_TABLE_ENTRY(clEnqueueSVMMap), DISPATCH_TABLE_ENTRY(clEnqueueSVMUnmap), DISPATCH_TABLE_ENTRY(clCreateSamplerWithProperties), DISPATCH_TABLE_ENTRY(clSetKernelArgSVMPointer), DISPATCH_TABLE_ENTRY(clSetKernelExecInfo), // cl_khr_sub_groups DISPATCH_TABLE_ENTRY(NULL), // OpenCL 2.1 DISPATCH_TABLE_ENTRY(clCloneKernel), DISPATCH_TABLE_ENTRY(clCreateProgramWithIL), DISPATCH_TABLE_ENTRY(clEnqueueSVMMigrateMem), DISPATCH_TABLE_ENTRY(clGetDeviceAndHostTimer), DISPATCH_TABLE_ENTRY(clGetHostTimer), DISPATCH_TABLE_ENTRY(clGetKernelSubGroupInfo), DISPATCH_TABLE_ENTRY(clSetDefaultDeviceCommandQueue), // OpenCL 2.2 DISPATCH_TABLE_ENTRY(clSetProgramReleaseCallback), DISPATCH_TABLE_ENTRY(clSetProgramSpecializationConstant), // OpenCL 3.0 DISPATCH_TABLE_ENTRY(clCreateBufferWithProperties), DISPATCH_TABLE_ENTRY(clCreateImageWithProperties), DISPATCH_TABLE_ENTRY(clSetContextDestructorCallback), }; #if defined(_WIN32) && !defined(OCLGRIND_ICD) #include // Function to replace calls to clGetPlatformIDs with // the Oclgrind implementation. // // This is invoked by oclgrind.exe after this DLL is // injected into the child process. // // Returns true on success, false on failure. bool initOclgrind() { // Get base address of process char* base = (char*)GetModuleHandle(NULL); // Get pointer to NT headers PIMAGE_DOS_HEADER dosHeader = (PIMAGE_DOS_HEADER)(base); PIMAGE_NT_HEADERS ntHeaders = (PIMAGE_NT_HEADERS)(base + dosHeader->e_lfanew); if (ntHeaders->Signature != IMAGE_NT_SIGNATURE) { std::cerr << "[Oclgrind] Invalid NT signature: " << ntHeaders->Signature << std::endl; return false; } // Get pointer to import directory DWORD importOffset = ntHeaders->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT] .VirtualAddress; PIMAGE_IMPORT_DESCRIPTOR importDesc = (PIMAGE_IMPORT_DESCRIPTOR)(base + importOffset); // Loop over directory entries while (importDesc->Name) { // Look for OpenCL.dll const char* modname = (const char*)(base + importDesc->Name); if (!stricmp(modname, "opencl.dll")) { // We use the OriginalFirstThunk to match the name, // and then replace the function pointer in FirstThunk PIMAGE_THUNK_DATA origThunk = (PIMAGE_THUNK_DATA)(base + importDesc->OriginalFirstThunk); PIMAGE_THUNK_DATA firstThunk = (PIMAGE_THUNK_DATA)(base + importDesc->FirstThunk); // Loop over functions while (origThunk->u1.AddressOfData) { // Skip unnamed functions if (!(origThunk->u1.Ordinal & IMAGE_ORDINAL_FLAG)) { // Get function name and check for clGetPlatformIDs PIMAGE_IMPORT_BY_NAME import = (PIMAGE_IMPORT_BY_NAME)(base + origThunk->u1.AddressOfData); if (!stricmp((char*)import->Name, "clGetPlatformIDs")) { // Make page writable temporarily MEMORY_BASIC_INFORMATION mbinfo; VirtualQuery(firstThunk, &mbinfo, sizeof(mbinfo)); if (!VirtualProtect(mbinfo.BaseAddress, mbinfo.RegionSize, PAGE_EXECUTE_READWRITE, &mbinfo.Protect)) { std::cerr << "[Oclgrind] Failed to make page writeable: " << GetLastError() << std::endl; return false; } // Replace function pointer with our implementation firstThunk->u1.Function = (ULONG64)clGetPlatformIDs; // Restore page protection DWORD zero = 0; if (!VirtualProtect(mbinfo.BaseAddress, mbinfo.RegionSize, mbinfo.Protect, &zero)) { std::cerr << "[Oclgrind] Failed to restore page protection: " << GetLastError() << std::endl; return false; } return true; } } origThunk++; firstThunk++; } } importDesc++; } // We didn't find the function, so just warn user std::cerr << "[Oclgrind] Warning: unable to patch clGetPlatformIDs" << std::endl; return true; } #endif Oclgrind-21.10/src/runtime/runtime.def000066400000000000000000000050101413315665100176740ustar00rootroot00000000000000EXPORTS ; Make runtime functions visible clGetPlatformIDs clGetPlatformInfo clGetDeviceIDs clGetDeviceInfo clCreateContext clCreateContextFromType clRetainContext clReleaseContext clGetContextInfo clCreateCommandQueue clRetainCommandQueue clReleaseCommandQueue clGetCommandQueueInfo clSetCommandQueueProperty clCreateBuffer clCreateImage2D clCreateImage3D clRetainMemObject clReleaseMemObject clGetSupportedImageFormats clGetMemObjectInfo clGetImageInfo clCreateSampler clRetainSampler clReleaseSampler clGetSamplerInfo clCreateProgramWithSource clCreateProgramWithBinary clRetainProgram clReleaseProgram clBuildProgram clUnloadCompiler clGetProgramInfo clGetProgramBuildInfo clCreateKernel clCreateKernelsInProgram clRetainKernel clReleaseKernel clSetKernelArg clGetKernelInfo clGetKernelWorkGroupInfo clWaitForEvents clGetEventInfo clRetainEvent clReleaseEvent clGetEventProfilingInfo clFlush clFinish clEnqueueReadBuffer clEnqueueWriteBuffer clEnqueueCopyBuffer clEnqueueReadImage clEnqueueWriteImage clEnqueueCopyImage clEnqueueCopyImageToBuffer clEnqueueCopyBufferToImage clEnqueueMapBuffer clEnqueueMapImage clEnqueueUnmapMemObject clEnqueueNDRangeKernel clEnqueueTask clEnqueueNativeKernel clEnqueueMarker clEnqueueWaitForEvents clEnqueueBarrier clGetExtensionFunctionAddress clCreateFromGLBuffer clCreateFromGLTexture2D clCreateFromGLTexture3D clCreateFromGLRenderbuffer clGetGLObjectInfo clGetGLTextureInfo clEnqueueAcquireGLObjects clEnqueueReleaseGLObjects clGetGLContextInfoKHR clGetDeviceIDsFromD3D10KHR clCreateFromD3D10BufferKHR clCreateFromD3D10Texture2DKHR clCreateFromD3D10Texture3DKHR clEnqueueAcquireD3D10ObjectsKHR clEnqueueReleaseD3D10ObjectsKHR clSetEventCallback clCreateSubBuffer clSetMemObjectDestructorCallback clCreateUserEvent clSetUserEventStatus clEnqueueReadBufferRect clEnqueueWriteBufferRect clEnqueueCopyBufferRect clCreateEventFromGLsyncKHR clCreateSubDevices clRetainDevice clReleaseDevice clCreateImage clCreateProgramWithBuiltInKernels clCompileProgram clLinkProgram clUnloadPlatformCompiler clGetKernelArgInfo clEnqueueFillBuffer clEnqueueFillImage clEnqueueMigrateMemObjects clEnqueueMarkerWithWaitList clEnqueueBarrierWithWaitList clGetExtensionFunctionAddressForPlatform clCreateFromGLTexture clGetDeviceIDsFromD3D11KHR clCreateFromD3D11BufferKHR clCreateFromD3D11Texture2DKHR clCreateFromD3D11Texture3DKHR clCreateFromDX9MediaSurfaceKHR clEnqueueAcquireD3D11ObjectsKHR clEnqueueReleaseD3D11ObjectsKHR clGetDeviceIDsFromDX9MediaAdapterKHR clEnqueueAcquireDX9MediaSurfacesKHR clEnqueueReleaseDX9MediaSurfacesKHR initOclgrindOclgrind-21.10/tests/000077500000000000000000000000001413315665100144255ustar00rootroot00000000000000Oclgrind-21.10/tests/apps/000077500000000000000000000000001413315665100153705ustar00rootroot00000000000000Oclgrind-21.10/tests/apps/CMakeLists.txt000066400000000000000000000025411413315665100201320ustar00rootroot00000000000000# CMakeLists.txt (Oclgrind) # Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, # University of Bristol. All rights reserved. # # This program is provided under a three-clause BSD license. For full # license terms please see the LICENSE file distributed with this # source code. set(COMMON_SOURCES ../common/common.c ../common/common.h) include_directories(../common) # Add app tests foreach(test image vecadd) add_executable(${test} ${test}/${test}.c ${COMMON_SOURCES}) target_link_libraries(${test} oclgrind-rt) # Generate test binaries in same dir as Oclgrind libraries on Windows if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") set_target_properties(${test} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") else() set_target_properties(${test} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${test}") set_target_properties(${test} PROPERTIES LINKER_LANGUAGE CXX) endif() add_test( NAME app_${test} COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/tests/run_test.py $ $) set_tests_properties(app_${test} PROPERTIES DEPENDS ${test}) # Set PCH directory set(ENV "OCLGRIND_TESTING=1") list(APPEND ENV "OCLGRIND_PCH_DIR=${CMAKE_BINARY_DIR}/include/oclgrind") set_tests_properties(app_${test} PROPERTIES ENVIRONMENT "${ENV}") endforeach(${test}) Oclgrind-21.10/tests/apps/image/000077500000000000000000000000001413315665100164525ustar00rootroot00000000000000Oclgrind-21.10/tests/apps/image/image.c000066400000000000000000000077161413315665100177130ustar00rootroot00000000000000#include "common.h" #include #include #include #include #define IMG_SIZE 100 #define TOL 1e-8 #define MAX_ERRORS 8 const char* KERNEL_SOURCE = "__kernel void image_copy(__read_only image2d_array_t src, \n" " __write_only image2d_array_t dst) \n" "{ \n" " size_t size = get_image_array_size(src); \n" " const int x = get_global_id(0); \n" " const int y = get_global_id(1); \n" " const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | \n" " CLK_ADDRESS_CLAMP | \n" " CLK_FILTER_NEAREST; \n" " float4 pixel = read_imagef(src, sampler, (int4)(x, y, 0, 0)); \n" " write_imagef(dst, (int4)(x, y, 0, 0), pixel); \n" "} \n"; int main() { cl_int err; cl_kernel kernel; cl_image_format img_fmt; cl_image_desc img_desc; cl_mem src, dst; float *input, *output; size_t width, height; width = height = 10; size_t origin[] = {0, 0, 0}; size_t region[] = {width, height, 1}; size_t GWSize[] = {width, height, 1}; input = (float*)malloc(IMG_SIZE * 3 * sizeof(float)); output = (float*)malloc(IMG_SIZE * 3 * sizeof(float)); // Create Input data for (int i = 0; i < 3; ++i) { for (int j = 0; j < IMG_SIZE; ++j) { input[i * IMG_SIZE + j] = j + 1.0; } } Context cl = createContext(KERNEL_SOURCE, ""); kernel = clCreateKernel(cl.program, "image_copy", &err); checkError(err, "creating kernel"); img_fmt.image_channel_order = CL_RGB; img_fmt.image_channel_data_type = CL_FLOAT; img_desc.image_type = CL_MEM_OBJECT_IMAGE2D; img_desc.image_width = width; img_desc.image_height = height; img_desc.image_depth = 0; img_desc.image_array_size = 0; img_desc.image_row_pitch = 0; img_desc.image_slice_pitch = 0; img_desc.num_mip_levels = 0; img_desc.num_samples = 0; img_desc.buffer = NULL; src = clCreateImage(cl.context, CL_MEM_READ_ONLY, &img_fmt, &img_desc, NULL, &err); checkError(err, "creating source image"); dst = clCreateImage(cl.context, CL_MEM_READ_WRITE, &img_fmt, &img_desc, NULL, &err); checkError(err, "creating destination image"); err = clEnqueueWriteImage(cl.queue, src, CL_TRUE, origin, region, 0, 0, input, 0, NULL, NULL); checkError(err, "enqueuing write image"); err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &src); err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &dst); checkError(err, "setting kernel args"); err = clEnqueueNDRangeKernel(cl.queue, kernel, 2, NULL, GWSize, NULL, 0, NULL, NULL); checkError(err, "enqueuing kernel"); err = clFinish(cl.queue); checkError(err, "running kernel"); err = clEnqueueReadImage(cl.queue, dst, CL_TRUE, origin, region, 0, 0, output, 0, NULL, NULL); checkError(err, "reading image data"); // Check results unsigned errors = 0; for (int i = 0; i < 3; ++i) { for (int j = 0; j < IMG_SIZE; ++j) { float ref = input[i * IMG_SIZE + j]; float val = output[i * IMG_SIZE + j]; if (fabs(ref - val) > TOL) { if (errors < MAX_ERRORS) { fprintf(stderr, "%4d: %.4f != %.4f\n", i, val, ref); } errors++; } } } free(input); free(output); clReleaseMemObject(src); clReleaseMemObject(dst); clReleaseKernel(kernel); releaseContext(cl); return (errors != 0); } // cl_mem image3; // // image3 = clCreateImage2D(context, CL_MEM_READ_WRITE, &img_fmt, width, height, // 0, 0, &err); // //// copy Image1 to Image3 // err = clEnqueueCopyImage(command_queue, image1, image3, origin, origin, // region, 1, event, &event[3]); err_check(err, "clEnqueueCopyImage"); // clReleaseMemObject(image3); Oclgrind-21.10/tests/apps/vecadd/000077500000000000000000000000001413315665100166165ustar00rootroot00000000000000Oclgrind-21.10/tests/apps/vecadd/vecadd.c000066400000000000000000000065201413315665100202130ustar00rootroot00000000000000#include "common.h" #include #include #include #include #define TOL 1e-8 #define MAX_ERRORS 8 #define MAX_PLATFORMS 8 const char* KERNEL_SOURCE = "kernel void vecadd(global float *a, \n" " global float *b, \n" " global float *c) \n" "{ \n" " int i = get_global_id(0); \n" " c[i] = a[i] + b[i]; \n" "} \n"; int main(int argc, char* argv[]) { cl_int err; cl_kernel kernel; cl_mem d_a, d_b, d_c; float *h_a, *h_b, *h_c; size_t N = 1024; if (argc > 1) { N = atoi(argv[1]); } size_t global = N; if (argc > 2) { global = atoi(argv[2]); } if (!N || !global) { printf("Usage: ./vecadd N [GLOBAL_SIZE]\n"); exit(1); } Context cl = createContext(KERNEL_SOURCE, ""); kernel = clCreateKernel(cl.program, "vecadd", &err); checkError(err, "creating kernel"); size_t dataSize = N * sizeof(cl_float); // Initialise host data srand(0); h_a = malloc(dataSize); h_b = malloc(dataSize); h_c = malloc(dataSize); for (unsigned i = 0; i < N; i++) { h_a[i] = rand() / (float)RAND_MAX; h_b[i] = rand() / (float)RAND_MAX; h_c[i] = 0; } d_a = clCreateBuffer(cl.context, CL_MEM_READ_ONLY, dataSize, NULL, &err); checkError(err, "creating d_a buffer"); d_b = clCreateBuffer(cl.context, CL_MEM_READ_ONLY, dataSize, NULL, &err); checkError(err, "creating d_b buffer"); d_c = clCreateBuffer(cl.context, CL_MEM_WRITE_ONLY, dataSize, NULL, &err); checkError(err, "creating d_c buffer"); err = clEnqueueWriteBuffer(cl.queue, d_a, CL_FALSE, 0, dataSize, h_a, 0, NULL, NULL); checkError(err, "writing d_a data"); err = clEnqueueWriteBuffer(cl.queue, d_b, CL_FALSE, 0, dataSize, h_b, 0, NULL, NULL); checkError(err, "writing d_b data"); err = clEnqueueWriteBuffer(cl.queue, d_c, CL_FALSE, 0, dataSize, h_c, 0, NULL, NULL); checkError(err, "writing d_c data"); err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_a); err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_b); err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_c); checkError(err, "setting kernel args"); err = clEnqueueNDRangeKernel(cl.queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL); checkError(err, "enqueuing kernel"); err = clFinish(cl.queue); checkError(err, "running kernel"); err = clEnqueueReadBuffer(cl.queue, d_c, CL_TRUE, 0, dataSize, h_c, 0, NULL, NULL); checkError(err, "reading d_c data"); // Check results unsigned errors = 0; for (unsigned i = 0; i < N; i++) { float ref = h_a[i] + h_b[i]; if (fabs(ref - h_c[i]) > TOL) { if (errors < MAX_ERRORS) { fprintf(stderr, "%4d: %.4f != %.4f\n", i, h_c[i], ref); } errors++; } } if (errors) printf("%d errors detected\n", errors); free(h_a); free(h_b); free(h_c); clReleaseMemObject(d_a); clReleaseMemObject(d_b); clReleaseMemObject(d_c); clReleaseKernel(kernel); releaseContext(cl); return (errors != 0); } Oclgrind-21.10/tests/common/000077500000000000000000000000001413315665100157155ustar00rootroot00000000000000Oclgrind-21.10/tests/common/common.c000066400000000000000000000035531413315665100173570ustar00rootroot00000000000000#include "common.h" #include #include void checkError(cl_int err, const char* operation) { if (err != CL_SUCCESS) { fprintf(stderr, "Error during operation '%s': %d\n", operation, err); exit(1); } } // Check platform is Oclgrind void checkOclgrindPlatform(cl_platform_id platform) { char name[256]; cl_int err = clGetPlatformInfo(platform, CL_PLATFORM_NAME, 256, name, NULL); checkError(err, "getting platform name"); if (strcmp(name, "Oclgrind")) { fprintf(stderr, "Unable to find Oclgrind platform\n"); exit(1); } } Context createContext(const char* source, const char* options) { Context cl; cl_int err; err = clGetPlatformIDs(1, &cl.platform, NULL); checkError(err, "getting platform"); checkOclgrindPlatform(cl.platform); err = clGetDeviceIDs(cl.platform, CL_DEVICE_TYPE_ALL, 1, &cl.device, NULL); checkError(err, "getting device"); cl.context = clCreateContext(NULL, 1, &cl.device, NULL, NULL, &err); checkError(err, "creating context"); cl.queue = clCreateCommandQueue(cl.context, cl.device, 0, &err); checkError(err, "creating command queue"); cl.program = clCreateProgramWithSource(cl.context, 1, &source, NULL, &err); checkError(err, "creating program"); err = clBuildProgram(cl.program, 1, &cl.device, options, NULL, NULL); if (err == CL_BUILD_PROGRAM_FAILURE) { size_t sz; clGetProgramBuildInfo(cl.program, cl.device, CL_PROGRAM_BUILD_LOG, sizeof(size_t), NULL, &sz); char* buildLog = malloc(++sz); clGetProgramBuildInfo(cl.program, cl.device, CL_PROGRAM_BUILD_LOG, sz, buildLog, NULL); fprintf(stderr, "%s\n", buildLog); } checkError(err, "building program"); return cl; } void releaseContext(Context cl) { clReleaseProgram(cl.program); clReleaseCommandQueue(cl.queue); clReleaseContext(cl.context); } Oclgrind-21.10/tests/common/common.h000066400000000000000000000007031413315665100173560ustar00rootroot00000000000000#define CL_TARGET_OPENCL_VERSION 300 #define CL_USE_DEPRECATED_OPENCL_1_2_APIS #include typedef struct { cl_platform_id platform; cl_device_id device; cl_context context; cl_command_queue queue; cl_program program; } Context; void checkError(cl_int err, const char* operation); void checkOclgrindPlatform(cl_platform_id platform); Context createContext(const char* source, const char* options); void releaseContext(Context cl); Oclgrind-21.10/tests/kernels/000077500000000000000000000000001413315665100160705ustar00rootroot00000000000000Oclgrind-21.10/tests/kernels/CMakeLists.txt000066400000000000000000000015301413315665100206270ustar00rootroot00000000000000# CMakeLists.txt (Oclgrind) # Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, # University of Bristol. All rights reserved. # # This program is provided under a three-clause BSD license. For full # license terms please see the LICENSE file distributed with this # source code. # Add kernel tests file(READ TESTS KERNEL_TESTS) string(REPLACE "\n" ";" KERNEL_TESTS ${KERNEL_TESTS}) foreach(test ${KERNEL_TESTS}) add_test( NAME ${test} COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/tests/run_test.py $ ${CMAKE_SOURCE_DIR}/tests/kernels/${test}.sim) endforeach(${test}) # Set PCH directory set_tests_properties(${KERNEL_TESTS} PROPERTIES ENVIRONMENT "OCLGRIND_PCH_DIR=${CMAKE_BINARY_DIR}/include/oclgrind") # Expected failures set_tests_properties(${XFAIL} PROPERTIES WILL_FAIL TRUE) Oclgrind-21.10/tests/kernels/TESTS000066400000000000000000000052121413315665100167150ustar00rootroot00000000000000alignment/packed alignment/unaligned async_copy/async_copy async_copy/async_copy_divergent async_copy/async_copy_global_race async_copy/async_copy_local_race async_copy/async_copy_loop async_copy/async_copy_loop_divergent async_copy/async_copy_single_wi async_copy/async_copy_unwaited atomics/atomic_cmpxchg_false_race atomics/atomic_cmpxchg_read_race atomics/atomic_cmpxchg_write_race atomics/atomic_global_fence atomics/atomic_global_fence_race atomics/atomic_increment atomics/atomic_intergroup_race atomics/atomic_local_fence atomics/atomic_minmax_signed atomics/atomic_race_after atomics/atomic_race_before atomics/atomic_same_workitem atomics/atom_add barrier/barrier_different_instructions barrier/barrier_divergence bugs/byval_function_argument bugs/const_gep_expr_pointee_type bugs/false_warning_vector_argument bugs/gvn_arbitrary_integers bugs/kernel_struct_argument bugs/llvm_bswap bugs/llvm_memcpyopt_bug bugs/many_alloca bugs/multidim_array_in_struct bugs/null_argument bugs/rhadd_overflow bugs/sroa_addrspace_cast bugs/write_vector_write_only_fp data-race/broadcast data-race/global_fence data-race/global_only_fence data-race/global_read_write_race data-race/global_write_write_race data-race/increment data-race/intergroup_hidden_race data-race/intragroup_hidden_race data-race/intergroup_race data-race/local_only_fence data-race/local_read_write_race data-race/local_write_write_race data-race/uniform_write_race interactive/struct_member memcheck/async_copy_out_of_bounds memcheck/atomic_out_of_bounds memcheck/casted_static_array memcheck/dereference_null memcheck/fake_out_of_bounds memcheck/read_out_of_bounds memcheck/read_write_only_memory memcheck/static_array memcheck/static_array_padded_struct memcheck/write_out_of_bounds memcheck/write_read_only_memory misc/array misc/global_variables misc/lvalue_loads misc/non_uniform_work_groups misc/printf misc/program_scope_constant_array misc/reduce misc/switch_case misc/vecadd misc/vector_argument uninitialized/padded_nested_struct_memcpy uninitialized/padded_struct_alloca_fp uninitialized/padded_struct_memcpy_fp uninitialized/partially_uninitialized_fract uninitialized/private_array_initializer_list uninitialized/uninitialized_global_buffer uninitialized/uninitialized_address uninitialized/uninitialized_local_array uninitialized/uninitialized_local_ptr uninitialized/uninitialized_local_variable uninitialized/uninitialized_packed_struct_memcpy uninitialized/uninitialized_padded_struct_memcpy uninitialized/uninitialized_padded_nested_struct_memcpy uninitialized/uninitialized_private_array wait_event/wait_event_chained wait_event/wait_event_divergent wait_event/wait_event_duplicates wait_event/wait_event_invalid Oclgrind-21.10/tests/kernels/alignment/000077500000000000000000000000001413315665100200465ustar00rootroot00000000000000Oclgrind-21.10/tests/kernels/alignment/packed.cl000066400000000000000000000002001413315665100216050ustar00rootroot00000000000000struct __attribute__((packed)) Foo { char a; int b; }; kernel void packed(struct Foo x, global int *out) { *out = x.b; } Oclgrind-21.10/tests/kernels/alignment/packed.ref000066400000000000000000000000701413315665100217700ustar00rootroot00000000000000EXACT Argument 'out': 4 bytes EXACT out[0] = 33554434 Oclgrind-21.10/tests/kernels/alignment/packed.sim000066400000000000000000000001371413315665100220100ustar00rootroot00000000000000packed.cl packed 1 1 1 1 1 1 0x01 0x02 0x00 0x00 0x02 Oclgrind-21.10/tests/kernels/alignment/unaligned.cl000066400000000000000000000002571413315665100223400ustar00rootroot00000000000000kernel void unaligned(global int *in, global int *out) { global char *char_ptr = (global char*)in + 2; global int *address = (global int*)char_ptr; *out = *address; } Oclgrind-21.10/tests/kernels/alignment/unaligned.ref000066400000000000000000000002061413315665100225100ustar00rootroot00000000000000ERROR Invalid memory load - source pointer is not aligned to the pointed type EXACT Argument 'out': 4 bytes EXACT out[0] = 2752512 Oclgrind-21.10/tests/kernels/alignment/unaligned.sim000066400000000000000000000001121413315665100225200ustar00rootroot00000000000000unaligned.cl unaligned 1 1 1 1 1 1 Oclgrind-21.10/tests/kernels/async_copy/000077500000000000000000000000001413315665100202375ustar00rootroot00000000000000Oclgrind-21.10/tests/kernels/async_copy/async_copy.cl000066400000000000000000000003671413315665100227340ustar00rootroot00000000000000kernel void async_copy(global int *data, local int *scratch) { event_t event = async_work_group_copy(scratch, data, get_local_size(0), 0); wait_group_events(1, &event); int i = get_local_id(0); data[get_local_size(0)-i-1] = scratch[i]; } Oclgrind-21.10/tests/kernels/async_copy/async_copy.ref000066400000000000000000000001601413315665100231010ustar00rootroot00000000000000EXACT Argument 'data': 16 bytes EXACT data[0] = 3 EXACT data[1] = 2 EXACT data[2] = 1 EXACT data[3] = 0 Oclgrind-21.10/tests/kernels/async_copy/async_copy.sim000066400000000000000000000001131413315665100231130ustar00rootroot00000000000000async_copy.cl async_copy 4 1 1 4 1 1 Oclgrind-21.10/tests/kernels/async_copy/async_copy_divergent.cl000066400000000000000000000005011413315665100247710ustar00rootroot00000000000000kernel void async_copy_divergent(global int *data, local int *scratch) { int i = get_local_id(0); size_t size = get_local_size(0); if (i == size-1) { size = 1; } event_t event = async_work_group_copy(scratch, data, size, 0); wait_group_events(1, &event); data[get_local_size(0)-i-1] = scratch[i]; } Oclgrind-21.10/tests/kernels/async_copy/async_copy_divergent.ref000066400000000000000000000002431413315665100251520ustar00rootroot00000000000000ERROR Work-group divergence detected (async copy) EXACT Argument 'data': 16 bytes EXACT data[0] = 3 EXACT data[1] = 2 EXACT data[2] = 1 EXACT data[3] = 0 Oclgrind-21.10/tests/kernels/async_copy/async_copy_divergent.sim000066400000000000000000000001371413315665100251700ustar00rootroot00000000000000async_copy_divergent.cl async_copy_divergent 4 1 1 4 1 1 Oclgrind-21.10/tests/kernels/async_copy/async_copy_global_race.cl000066400000000000000000000004311413315665100252360ustar00rootroot00000000000000kernel void async_copy_global_race(global int *data, local int *scratch) { int i = get_local_id(0); scratch[i] = i; barrier(CLK_LOCAL_MEM_FENCE); data[i] = 0; event_t event = async_work_group_copy(data, scratch, get_local_size(0), 0); wait_group_events(1, &event); } Oclgrind-21.10/tests/kernels/async_copy/async_copy_global_race.ref000066400000000000000000000003701413315665100254160ustar00rootroot00000000000000ERROR Write-write data race at global memory ERROR Write-write data race at global memory ERROR Write-write data race at global memory EXACT Argument 'data': 16 bytes EXACT data[0] = 0 EXACT data[1] = 1 EXACT data[2] = 2 EXACT data[3] = 3 Oclgrind-21.10/tests/kernels/async_copy/async_copy_global_race.sim000066400000000000000000000001431413315665100254300ustar00rootroot00000000000000async_copy_global_race.cl async_copy_global_race 4 1 1 4 1 1 Oclgrind-21.10/tests/kernels/async_copy/async_copy_local_race.cl000066400000000000000000000004251413315665100250730ustar00rootroot00000000000000kernel void async_copy_local_race(global int *data, local int *scratch) { int i = get_local_id(0); scratch[i] = 0; event_t event = async_work_group_copy(scratch, data, get_local_size(0), 0); wait_group_events(1, &event); data[get_local_size(0)-i-1] = scratch[i]; } Oclgrind-21.10/tests/kernels/async_copy/async_copy_local_race.ref000066400000000000000000000003651413315665100252540ustar00rootroot00000000000000ERROR Write-write data race at local memory ERROR Write-write data race at local memory ERROR Write-write data race at local memory EXACT Argument 'data': 16 bytes EXACT data[0] = 3 EXACT data[1] = 2 EXACT data[2] = 1 EXACT data[3] = 0 Oclgrind-21.10/tests/kernels/async_copy/async_copy_local_race.sim000066400000000000000000000001411413315665100252600ustar00rootroot00000000000000async_copy_local_race.cl async_copy_local_race 4 1 1 4 1 1 Oclgrind-21.10/tests/kernels/async_copy/async_copy_loop.cl000066400000000000000000000005301413315665100237550ustar00rootroot00000000000000kernel void async_copy_loop(global int *data, local int *scratch) { int i = get_local_id(0); event_t event = 0; for (int j = 0; j < get_local_size(0); j++) { int offset = j; event = async_work_group_copy(scratch+offset, data+offset, 1, event); } wait_group_events(1, &event); data[get_local_size(0)-i-1] = scratch[i]; } Oclgrind-21.10/tests/kernels/async_copy/async_copy_loop.ref000066400000000000000000000001601413315665100241320ustar00rootroot00000000000000EXACT Argument 'data': 16 bytes EXACT data[0] = 3 EXACT data[1] = 2 EXACT data[2] = 1 EXACT data[3] = 0 Oclgrind-21.10/tests/kernels/async_copy/async_copy_loop.sim000066400000000000000000000001251413315665100241470ustar00rootroot00000000000000async_copy_loop.cl async_copy_loop 4 1 1 4 1 1 Oclgrind-21.10/tests/kernels/async_copy/async_copy_loop_divergent.cl000066400000000000000000000006331413315665100260300ustar00rootroot00000000000000kernel void async_copy_loop_divergent(global int *data, local int *scratch) { int i = get_local_id(0); event_t event = 0; for (int j = 0; j < get_local_size(0); j++) { int offset = j; if (i == 2 && j == 2) { offset = 0; } event = async_work_group_copy(scratch+offset, data+offset, 1, event); } wait_group_events(1, &event); data[get_local_size(0)-i-1] = scratch[i]; } Oclgrind-21.10/tests/kernels/async_copy/async_copy_loop_divergent.ref000066400000000000000000000002431413315665100262030ustar00rootroot00000000000000ERROR Work-group divergence detected (async copy) EXACT Argument 'data': 16 bytes EXACT data[0] = 3 EXACT data[1] = 2 EXACT data[2] = 1 EXACT data[3] = 0 Oclgrind-21.10/tests/kernels/async_copy/async_copy_loop_divergent.sim000066400000000000000000000001511413315665100262150ustar00rootroot00000000000000async_copy_loop_divergent.cl async_copy_loop_divergent 4 1 1 4 1 1 Oclgrind-21.10/tests/kernels/async_copy/async_copy_single_wi.cl000066400000000000000000000006261413315665100247720ustar00rootroot00000000000000kernel void async_copy_single_wi(global int *data, local int *scratch) { int i = get_local_id(0); event_t event = async_work_group_copy(scratch, data, get_local_size(0), 0); if (i == 0) { // An extra copy that will only be registered by one work-item event = async_work_group_copy(scratch, data, 1, event); } wait_group_events(1, &event); data[get_local_size(0)-i-1] = scratch[i]; } Oclgrind-21.10/tests/kernels/async_copy/async_copy_single_wi.ref000066400000000000000000000002431413315665100251430ustar00rootroot00000000000000ERROR Work-group divergence detected (async copy) EXACT Argument 'data': 16 bytes EXACT data[0] = 3 EXACT data[1] = 2 EXACT data[2] = 1 EXACT data[3] = 0 Oclgrind-21.10/tests/kernels/async_copy/async_copy_single_wi.sim000066400000000000000000000001371413315665100251610ustar00rootroot00000000000000async_copy_single_wi.cl async_copy_single_wi 4 1 1 4 1 1 Oclgrind-21.10/tests/kernels/async_copy/async_copy_unwaited.cl000066400000000000000000000003271413315665100246300ustar00rootroot00000000000000kernel void async_copy_unwaited(global int *data, local int *scratch) { event_t event = async_work_group_copy(scratch, data, get_local_size(0), 0); int i = get_local_id(0); data[get_local_size(0)-i-1] = i; } Oclgrind-21.10/tests/kernels/async_copy/async_copy_unwaited.ref000066400000000000000000000002451413315665100250050ustar00rootroot00000000000000ERROR Work-item finished without waiting for events EXACT Argument 'data': 16 bytes EXACT data[0] = 3 EXACT data[1] = 2 EXACT data[2] = 1 EXACT data[3] = 0 Oclgrind-21.10/tests/kernels/async_copy/async_copy_unwaited.sim000066400000000000000000000001351413315665100250170ustar00rootroot00000000000000async_copy_unwaited.cl async_copy_unwaited 4 1 1 4 1 1 Oclgrind-21.10/tests/kernels/atomics/000077500000000000000000000000001413315665100175275ustar00rootroot00000000000000Oclgrind-21.10/tests/kernels/atomics/atom_add.cl000066400000000000000000000002161413315665100216160ustar00rootroot00000000000000#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable kernel void _atom_add(global ulong *data) { atom_add(data, (ulong)UINT_MAX); } Oclgrind-21.10/tests/kernels/atomics/atom_add.ref000066400000000000000000000000751413315665100217770ustar00rootroot00000000000000EXACT Argument 'data': 8 bytes EXACT data[0] = 17179869180 Oclgrind-21.10/tests/kernels/atomics/atom_add.sim000066400000000000000000000000701413315665100220060ustar00rootroot00000000000000atom_add.cl _atom_add 4 1 1 1 1 1 Oclgrind-21.10/tests/kernels/atomics/atomic_cmpxchg_false_race.cl000066400000000000000000000012201413315665100251730ustar00rootroot00000000000000kernel void atomic_cmpxchg_false_race(global int *data, local int *scratch) { int l = get_local_id(0); if (l == 0) { scratch[0] = 0; } barrier(CLK_LOCAL_MEM_FENCE); bool done = false; int before, old; int result; for (int i = 0; i < get_local_size(0); i++) { barrier(CLK_LOCAL_MEM_FENCE); before = scratch[0]; barrier(CLK_LOCAL_MEM_FENCE); if (!done) { old = atomic_cmpxchg(scratch, before, before+1); if (old == before) { done = true; result = scratch[0]; } } } barrier(CLK_LOCAL_MEM_FENCE); if (l == 0) { *data = *scratch; } data[l+1] = result; } Oclgrind-21.10/tests/kernels/atomics/atomic_cmpxchg_false_race.ref000066400000000000000000000002041413315665100253520ustar00rootroot00000000000000EXACT Argument 'data': 20 bytes EXACT data[0] = 4 EXACT data[1] = 1 EXACT data[2] = 2 EXACT data[3] = 3 EXACT data[4] = 4 Oclgrind-21.10/tests/kernels/atomics/atomic_cmpxchg_false_race.sim000066400000000000000000000001431413315665100253700ustar00rootroot00000000000000atomic_cmpxchg_false_race.cl atomic_cmpxchg_false_race 4 1 1 4 1 1 Oclgrind-21.10/tests/kernels/atomics/atomic_cmpxchg_read_race.cl000066400000000000000000000002641413315665100250230ustar00rootroot00000000000000kernel void atomic_cmpxchg_read_race(global int *data) { int i = get_global_id(0); if (i == 0) { *data = 0; } else { atomic_cmpxchg(data, 0, 0x01000001); } } Oclgrind-21.10/tests/kernels/atomics/atomic_cmpxchg_read_race.ref000066400000000000000000000002131413315665100251730ustar00rootroot00000000000000ERROR Read-write data race at global memory ERROR Write-write data race at global memory EXACT Argument 'data': 4 bytes MATCH data[0] = Oclgrind-21.10/tests/kernels/atomics/atomic_cmpxchg_read_race.sim000066400000000000000000000001231413315665100252070ustar00rootroot00000000000000atomic_cmpxchg_read_race.cl atomic_cmpxchg_read_race 2 1 1 2 1 1 -1 Oclgrind-21.10/tests/kernels/atomics/atomic_cmpxchg_write_race.cl000066400000000000000000000002341413315665100252370ustar00rootroot00000000000000kernel void atomic_cmpxchg_write_race(global int *data) { int i = get_global_id(0); if (i == 0) { *data = 0; } atomic_cmpxchg(data, i, 42); } Oclgrind-21.10/tests/kernels/atomics/atomic_cmpxchg_write_race.ref000066400000000000000000000001411413315665100254120ustar00rootroot00000000000000ERROR Read-write data race at global memory EXACT Argument 'data': 4 bytes EXACT data[0] = 42 Oclgrind-21.10/tests/kernels/atomics/atomic_cmpxchg_write_race.sim000066400000000000000000000001251413315665100254300ustar00rootroot00000000000000atomic_cmpxchg_write_race.cl atomic_cmpxchg_write_race 2 1 1 2 1 1 -1 Oclgrind-21.10/tests/kernels/atomics/atomic_global_fence.cl000066400000000000000000000005251413315665100240050ustar00rootroot00000000000000kernel void atomic_global_fence(global int *data, global int *scratch) { int i = get_global_id(0); int l = get_local_id(0); int g = get_group_id(0); if (l == 0) { scratch[g] = 0; } barrier(CLK_GLOBAL_MEM_FENCE); atomic_add(scratch+g, i); barrier(CLK_GLOBAL_MEM_FENCE); if (l == 0) { data[g] = scratch[g]; } } Oclgrind-21.10/tests/kernels/atomics/atomic_global_fence.ref000066400000000000000000000001101413315665100241510ustar00rootroot00000000000000EXACT Argument 'data': 8 bytes EXACT data[0] = 6 EXACT data[1] = 22 Oclgrind-21.10/tests/kernels/atomics/atomic_global_fence.sim000066400000000000000000000001361413315665100241750ustar00rootroot00000000000000atomic_global_fence.cl atomic_global_fence 8 1 1 4 1 1 Oclgrind-21.10/tests/kernels/atomics/atomic_global_fence_race.cl000066400000000000000000000004131413315665100247730ustar00rootroot00000000000000kernel void atomic_global_fence_race(global int *data, global int *scratch) { int i = get_global_id(0); int l = get_local_id(0); int g = get_group_id(0); atomic_add(scratch, i); barrier(CLK_GLOBAL_MEM_FENCE); if (l == 0) { data[g] = *scratch; } } Oclgrind-21.10/tests/kernels/atomics/atomic_global_fence_race.ref000066400000000000000000000002341413315665100251520ustar00rootroot00000000000000ERROR Read-write data race at global memory ERROR Read-write data race at global memory EXACT Argument 'data': 8 bytes MATCH data[0] = MATCH data[1] = Oclgrind-21.10/tests/kernels/atomics/atomic_global_fence_race.sim000066400000000000000000000001471413315665100251710ustar00rootroot00000000000000atomic_global_fence_race.cl atomic_global_fence_race 8 1 1 4 1 1 Oclgrind-21.10/tests/kernels/atomics/atomic_increment.cl000066400000000000000000000001071413315665100233650ustar00rootroot00000000000000kernel void atomic_increment(global int *data) { atomic_inc(data); } Oclgrind-21.10/tests/kernels/atomics/atomic_increment.ref000066400000000000000000000000631413315665100235440ustar00rootroot00000000000000EXACT Argument 'data': 4 bytes EXACT data[0] = 4 Oclgrind-21.10/tests/kernels/atomics/atomic_increment.sim000066400000000000000000000001071413315665100235570ustar00rootroot00000000000000atomic_increment.cl atomic_increment 4 1 1 1 1 1 Oclgrind-21.10/tests/kernels/atomics/atomic_intergroup_race.cl000066400000000000000000000002571413315665100245770ustar00rootroot00000000000000kernel void atomic_intergroup_race(global int *data) { int i = get_global_id(0); if (i == 0) { *data = 0; } barrier(CLK_GLOBAL_MEM_FENCE); atomic_inc(data); } Oclgrind-21.10/tests/kernels/atomics/atomic_intergroup_race.ref000066400000000000000000000002131413315665100247450ustar00rootroot00000000000000ERROR Read-write data race at global memory ERROR Write-write data race at global memory EXACT Argument 'data': 4 bytes MATCH data[0] = Oclgrind-21.10/tests/kernels/atomics/atomic_intergroup_race.sim000066400000000000000000000001241413315665100247620ustar00rootroot00000000000000atomic_intergroup_race.cl atomic_intergroup_race 8 1 1 4 1 1 Oclgrind-21.10/tests/kernels/atomics/atomic_local_fence.cl000066400000000000000000000005131413315665100236340ustar00rootroot00000000000000kernel void atomic_local_fence(global int *data, local int *scratch) { int i = get_global_id(0); int l = get_local_id(0); int g = get_group_id(0); if (l == 0) { *scratch = 0; } barrier(CLK_LOCAL_MEM_FENCE); atomic_add(scratch, i); barrier(CLK_LOCAL_MEM_FENCE); if (l == 0) { data[g] = *scratch; } } Oclgrind-21.10/tests/kernels/atomics/atomic_local_fence.ref000066400000000000000000000001101413315665100240030ustar00rootroot00000000000000EXACT Argument 'data': 8 bytes EXACT data[0] = 6 EXACT data[1] = 22 Oclgrind-21.10/tests/kernels/atomics/atomic_local_fence.sim000066400000000000000000000001241413315665100240240ustar00rootroot00000000000000atomic_local_fence.cl atomic_local_fence 8 1 1 4 1 1 Oclgrind-21.10/tests/kernels/atomics/atomic_minmax_signed.cl000066400000000000000000000005541413315665100242310ustar00rootroot00000000000000kernel void atomic_minmax_signed(global int *data) { atomic_min(data+0, -8); atomic_min(data+1, -6); atomic_min(data+2, 3); atomic_min(data+3, -3); atomic_min(data+4, 6); atomic_min(data+5, 8); atomic_max(data+6, -8); atomic_max(data+7, -6); atomic_max(data+8, 3); atomic_max(data+9, -3); atomic_max(data+10, 6); atomic_max(data+11, 8); } Oclgrind-21.10/tests/kernels/atomics/atomic_minmax_signed.ref000066400000000000000000000004301413315665100244000ustar00rootroot00000000000000EXACT Argument 'data': 48 bytes EXACT data[0] = -8 EXACT data[1] = -7 EXACT data[2] = -7 EXACT data[3] = -3 EXACT data[4] = 6 EXACT data[5] = 7 EXACT data[6] = -7 EXACT data[7] = -6 EXACT data[8] = 3 EXACT data[9] = 7 EXACT data[10] = 7 EXACT data[11] = 8 Oclgrind-21.10/tests/kernels/atomics/atomic_minmax_signed.sim000066400000000000000000000001531413315665100244160ustar00rootroot00000000000000atomic_minmax_signed.cl atomic_minmax_signed 1 1 1 1 1 1 -7 -7 -7 7 7 7 -7 -7 -7 7 7 7 Oclgrind-21.10/tests/kernels/atomics/atomic_race_after.cl000066400000000000000000000002511413315665100234740ustar00rootroot00000000000000kernel void atomic_race_after(global int *data, global int *output) { atomic_inc(data); if (get_global_id(0) == get_global_size(0)-1) { *output = *data; } } Oclgrind-21.10/tests/kernels/atomics/atomic_race_after.ref000066400000000000000000000001421413315665100236510ustar00rootroot00000000000000ERROR Read-write data race at global memory EXACT Argument 'output': 4 bytes MATCH output[0] = Oclgrind-21.10/tests/kernels/atomics/atomic_race_after.sim000066400000000000000000000001311413315665100236630ustar00rootroot00000000000000atomic_race_after.cl atomic_race_after 4 1 1 4 1 1 Oclgrind-21.10/tests/kernels/atomics/atomic_race_before.cl000066400000000000000000000001751413315665100236420ustar00rootroot00000000000000kernel void atomic_race_before(global int *data) { if (get_global_id(0) == 0) { *data = 0; } atomic_dec(data); } Oclgrind-21.10/tests/kernels/atomics/atomic_race_before.ref000066400000000000000000000005601413315665100240160ustar00rootroot00000000000000ERROR Read-write data race at global memory address ERROR Write-write data race at global memory address ERROR Read-write data race at global memory address ERROR Write-write data race at global memory address ERROR Read-write data race at global memory address ERROR Write-write data race at global memory address EXACT Argument 'data': 4 bytes EXACT data[0] = -4 Oclgrind-21.10/tests/kernels/atomics/atomic_race_before.sim000066400000000000000000000001131413315665100240240ustar00rootroot00000000000000atomic_race_before.cl atomic_race_before 4 1 1 4 1 1 Oclgrind-21.10/tests/kernels/atomics/atomic_same_workitem.cl000066400000000000000000000003321413315665100242470ustar00rootroot00000000000000kernel void atomic_same_workitem(global int *data) { int i = get_global_id(0); if ((i % 2) == 0) { data[i] = 0; atomic_inc(data+i); } else { atomic_inc(data+i); data[i] = data[i] + 1; } } Oclgrind-21.10/tests/kernels/atomics/atomic_same_workitem.ref000066400000000000000000000001601413315665100244240ustar00rootroot00000000000000EXACT Argument 'data': 16 bytes EXACT data[0] = 1 EXACT data[1] = 2 EXACT data[2] = 1 EXACT data[3] = 2 Oclgrind-21.10/tests/kernels/atomics/atomic_same_workitem.sim000066400000000000000000000001201413315665100244340ustar00rootroot00000000000000atomic_same_workitem.cl atomic_same_workitem 4 1 1 4 1 1 Oclgrind-21.10/tests/kernels/barrier/000077500000000000000000000000001413315665100175165ustar00rootroot00000000000000Oclgrind-21.10/tests/kernels/barrier/barrier_different_instructions.cl000066400000000000000000000003651413315665100263420ustar00rootroot00000000000000kernel void barrier_different_instructions(global int *data) { int i = get_global_id(0); if (i == 0) { data[0] = 42; barrier(CLK_GLOBAL_MEM_FENCE); } else { barrier(CLK_GLOBAL_MEM_FENCE); data[i] = i + data[0]; } } Oclgrind-21.10/tests/kernels/barrier/barrier_different_instructions.ref000066400000000000000000000004021413315665100265100ustar00rootroot00000000000000ERROR Work-group divergence detected (barrier) ERROR Work-group divergence detected (barrier) ERROR Work-group divergence detected (barrier) EXACT Argument 'data': 16 bytes EXACT data[0] = 42 EXACT data[1] = 43 EXACT data[2] = 44 EXACT data[3] = 45 Oclgrind-21.10/tests/kernels/barrier/barrier_different_instructions.sim000066400000000000000000000001441413315665100265270ustar00rootroot00000000000000barrier_different_instructions.cl barrier_different_instructions 4 1 1 4 1 1 Oclgrind-21.10/tests/kernels/barrier/barrier_divergence.cl000066400000000000000000000002311413315665100236530ustar00rootroot00000000000000kernel void barrier_divergence(global int *data) { int i = get_global_id(0); if (i != 0) { barrier(CLK_GLOBAL_MEM_FENCE); } data[i] = i; } Oclgrind-21.10/tests/kernels/barrier/barrier_divergence.ref000066400000000000000000000002401413315665100240310ustar00rootroot00000000000000ERROR Work-group divergence detected (barrier) EXACT Argument 'data': 16 bytes EXACT data[0] = 0 EXACT data[1] = 1 EXACT data[2] = 2 EXACT data[3] = 3 Oclgrind-21.10/tests/kernels/barrier/barrier_divergence.sim000066400000000000000000000001141413315665100240450ustar00rootroot00000000000000barrier_divergence.cl barrier_divergence 4 1 1 4 1 1 Oclgrind-21.10/tests/kernels/bugs/000077500000000000000000000000001413315665100170305ustar00rootroot00000000000000Oclgrind-21.10/tests/kernels/bugs/byval_function_argument.cl000066400000000000000000000003601413315665100242730ustar00rootroot00000000000000union U { uint a; uint b; }; uint func(union U value) { uint ret = value.a; value.b = 777; return ret; } kernel void byval_function_argument(global uint *output) { union U u = {42}; output[0] = func(u); output[1] = u.b; } Oclgrind-21.10/tests/kernels/bugs/byval_function_argument.ref000066400000000000000000000001171413315665100244510ustar00rootroot00000000000000EXACT Argument 'output': 8 bytes EXACT output[0] = 42 EXACT output[1] = 42 Oclgrind-21.10/tests/kernels/bugs/byval_function_argument.sim000066400000000000000000000001251413315665100244640ustar00rootroot00000000000000byval_function_argument.cl byval_function_argument 1 1 1 1 1 1 Oclgrind-21.10/tests/kernels/bugs/const_gep_expr_pointee_type.cl000066400000000000000000000002511413315665100251510ustar00rootroot00000000000000#pragma clang diagnostic ignored "-Wunused-value" struct S0 { int d; long b; } fn1() { struct S0 a = {3}; a.d; return a; } __kernel void entry() { fn1(); } Oclgrind-21.10/tests/kernels/bugs/const_gep_expr_pointee_type.sim000066400000000000000000000000611413315665100253420ustar00rootroot00000000000000const_gep_expr_pointee_type.cl entry 1 1 1 1 1 1 Oclgrind-21.10/tests/kernels/bugs/false_warning_vector_argument.cl000066400000000000000000000002611413315665100254520ustar00rootroot00000000000000kernel void false_warning_vector_argument(int16 arg, global int8 *res) { int8 v = (int8)(1,2,3,4,5,6,7,8); int16 add = arg + v.s0011223344556677; *res = add.lo; } Oclgrind-21.10/tests/kernels/bugs/false_warning_vector_argument.ref000066400000000000000000000002671413315665100256360ustar00rootroot00000000000000EXACT Argument 'res': 32 bytes EXACT res[0] = 1 EXACT res[1] = 1 EXACT res[2] = 2 EXACT res[3] = 2 EXACT res[4] = 3 EXACT res[5] = 3 EXACT res[6] = 4 EXACT res[7] = 4 Oclgrind-21.10/tests/kernels/bugs/false_warning_vector_argument.sim000066400000000000000000000001631413315665100256450ustar00rootroot00000000000000false_warning_vector_argument.cl false_warning_vector_argument 1 1 1 1 1 1 Oclgrind-21.10/tests/kernels/bugs/gvn_arbitrary_integers.cl000066400000000000000000000003261413315665100241220ustar00rootroot00000000000000__kernel void gvn_arbitrary_integers(__global int *source, __global int *dest) { size_t i = get_global_id(0); int3 tmp = 0; tmp.S2 = source[i]; vstore3(tmp, 0, dest); } Oclgrind-21.10/tests/kernels/bugs/gvn_arbitrary_integers.ref000066400000000000000000000001351413315665100242760ustar00rootroot00000000000000EXACT Argument 'dest': 12 bytes EXACT dest[0] = 0 EXACT dest[1] = 0 EXACT dest[2] = 42 Oclgrind-21.10/tests/kernels/bugs/gvn_arbitrary_integers.sim000066400000000000000000000001451413315665100243130ustar00rootroot00000000000000gvn_arbitrary_integers.cl gvn_arbitrary_integers 1 1 1 1 1 1 Oclgrind-21.10/tests/kernels/bugs/kernel_struct_argument.cl000066400000000000000000000002411413315665100241330ustar00rootroot00000000000000typedef struct { float a; float b; float c; } Structure; kernel void kernel_struct_argument(Structure x, global float *out) { *out = x.a * x.b + x.c; } Oclgrind-21.10/tests/kernels/bugs/kernel_struct_argument.ref000066400000000000000000000000631413315665100243130ustar00rootroot00000000000000EXACT Argument 'out': 4 bytes EXACT out[0] = 144 Oclgrind-21.10/tests/kernels/bugs/kernel_struct_argument.sim000066400000000000000000000001541413315665100243300ustar00rootroot00000000000000kernel_struct_argument.cl kernel_struct_argument 1 1 1 1 1 1 42 3 18 Oclgrind-21.10/tests/kernels/bugs/llvm_bswap.cl000066400000000000000000000004001413315665100215100ustar00rootroot00000000000000kernel void test(global uint *input, global uint *output) { for (unsigned int i = 0; i < 4; i++) { uint word = input[i]; output[i] = ((word & 0xff) << 24) | ((word & 0xff00) << 8) | ((word & 0xff0000) >> 8) | ((word & 0xff000000) >> 24); } } Oclgrind-21.10/tests/kernels/bugs/llvm_bswap.ref000066400000000000000000000002401413315665100216700ustar00rootroot00000000000000 EXACT Argument 'output': 16 bytes EXACT output[0] = 0x01000000 EXACT output[1] = 0x00000010 EXACT output[2] = 0x78563412 EXACT output[3] = 0x45342312 Oclgrind-21.10/tests/kernels/bugs/llvm_bswap.sim000066400000000000000000000001651413315665100217120ustar00rootroot00000000000000llvm_bswap.cl test 1 1 1 1 1 1 0x00000001 0x10000000 0x12345678 0x12233445 Oclgrind-21.10/tests/kernels/bugs/llvm_memcpyopt_bug.cl000066400000000000000000000002441413315665100232540ustar00rootroot00000000000000typedef struct { int a; int b; int c; } S; S foo(S a) { return a; } kernel void llvm_memcpyopt_bug(global S *out) { S a = {7,7,7}; out[0] = foo(a); } Oclgrind-21.10/tests/kernels/bugs/llvm_memcpyopt_bug.ref000066400000000000000000000001311413315665100234250ustar00rootroot00000000000000 EXACT Argument 'out': 12 bytes EXACT out[0] = 7 EXACT out[1] = 7 EXACT out[2] = 7 Oclgrind-21.10/tests/kernels/bugs/llvm_memcpyopt_bug.sim000066400000000000000000000001201413315665100234370ustar00rootroot00000000000000llvm_memcpyopt_bug.cl llvm_memcpyopt_bug 1 1 1 1 1 1 Oclgrind-21.10/tests/kernels/bugs/many_alloca.cl000066400000000000000000000003521413315665100216270ustar00rootroot00000000000000void bar(int *x) { *x += 1; } int foo() { int x = 0; bar(&x); return x; } kernel void many_alloca(global int *data, int n) { int x = 0; for (int i = 0; i < n; i++) { x += foo(); } data[get_global_id(0)] = x; } Oclgrind-21.10/tests/kernels/bugs/many_alloca.ref000066400000000000000000000000701413315665100220020ustar00rootroot00000000000000EXACT Argument 'data': 4 bytes EXACT data[0] = 100000 Oclgrind-21.10/tests/kernels/bugs/many_alloca.sim000066400000000000000000000001161413315665100220170ustar00rootroot00000000000000many_alloca.cl many_alloca 1 1 1 1 1 1 100000 Oclgrind-21.10/tests/kernels/bugs/multidim_array_in_struct.cl000066400000000000000000000011021413315665100244560ustar00rootroot00000000000000// // Issue #64 on GitHub: // https://github.com/jrprice/Oclgrind/issues/64 // // Required alignment for multi-dimensional arrays was incorrect. // struct S0 { uchar a; ulong b[2][3][1]; }; kernel void multidim_array_in_struct(global ulong *output) { struct S0 s = { 1UL, { { {1L}, {1L}, {1L} }, { {1L}, {1L}, {1L} } }, }; ulong c = 0UL; for (int i = 0; i < 2; i++) for (int j = 0; j < 3; j++) for (int k = 0; k < 1; k++) c += s.b[i][j][k]; *output = c; } Oclgrind-21.10/tests/kernels/bugs/multidim_array_in_struct.ref000066400000000000000000000000671413315665100246450ustar00rootroot00000000000000EXACT Argument 'output': 8 bytes EXACT output[0] = 6 Oclgrind-21.10/tests/kernels/bugs/multidim_array_in_struct.sim000066400000000000000000000003461413315665100246610ustar00rootroot00000000000000# # Issue #64 on GitHub: # https://github.com/jrprice/Oclgrind/issues/64 # # Required alignment for multi-dimensional arrays was incorrect. # multidim_array_in_struct.cl multidim_array_in_struct 1 1 1 1 1 1 Oclgrind-21.10/tests/kernels/bugs/null_argument.cl000066400000000000000000000001751413315665100222270ustar00rootroot00000000000000ulong func_1(ulong * p_1) { return 1; } kernel void null_argument(global ulong *output) { *output = func_1((void*)0); } Oclgrind-21.10/tests/kernels/bugs/null_argument.ref000066400000000000000000000000671413315665100224050ustar00rootroot00000000000000EXACT Argument 'output': 8 bytes EXACT output[0] = 1 Oclgrind-21.10/tests/kernels/bugs/null_argument.sim000066400000000000000000000001011413315665100224060ustar00rootroot00000000000000null_argument.cl null_argument 1 1 1 1 1 1 Oclgrind-21.10/tests/kernels/bugs/rhadd_overflow.cl000066400000000000000000000001451413315665100223550ustar00rootroot00000000000000kernel void rhadd_overflow(global ulong *output) { output[0] = rhadd(0UL, 0xFFFFFFFFFFFFFFFFUL); } Oclgrind-21.10/tests/kernels/bugs/rhadd_overflow.ref000066400000000000000000000001111413315665100225240ustar00rootroot00000000000000 EXACT Argument 'output': 8 bytes EXACT output[0] = 0x8000000000000000 Oclgrind-21.10/tests/kernels/bugs/rhadd_overflow.sim000066400000000000000000000001071413315665100225450ustar00rootroot00000000000000rhadd_overflow.cl rhadd_overflow 1 1 1 1 1 1 Oclgrind-21.10/tests/kernels/bugs/sroa_addrspace_cast.cl000066400000000000000000000003641413315665100233370ustar00rootroot00000000000000typedef struct { float x; } DataStruct; __kernel void sroa_addrspace_cast(__global DataStruct *input, __global float *output) { size_t i = get_global_id(0); DataStruct s = input[i]; output[i] = s.x; } Oclgrind-21.10/tests/kernels/bugs/sroa_addrspace_cast.ref000066400000000000000000000000731413315665100235120ustar00rootroot00000000000000EXACT Argument 'output': 4 bytes EXACT output[0] = 42.24 Oclgrind-21.10/tests/kernels/bugs/sroa_addrspace_cast.sim000066400000000000000000000001471413315665100235300ustar00rootroot00000000000000sroa_addrspace_cast.cl sroa_addrspace_cast 1 1 1 1 1 1 Oclgrind-21.10/tests/kernels/bugs/write_vector_write_only_fp.cl000066400000000000000000000001601413315665100250210ustar00rootroot00000000000000kernel void write_vector_write_only_fp(global int4 *output) { int i = get_global_id(0); output[i].x = 42; } Oclgrind-21.10/tests/kernels/bugs/write_vector_write_only_fp.ref000066400000000000000000000006141413315665100252030ustar00rootroot00000000000000EXACT Argument 'output': 64 bytes EXACT output[0] = 42 EXACT output[1] = 7 EXACT output[2] = 7 EXACT output[3] = 7 EXACT output[4] = 42 EXACT output[5] = 7 EXACT output[6] = 7 EXACT output[7] = 7 EXACT output[8] = 42 EXACT output[9] = 7 EXACT output[10] = 7 EXACT output[11] = 7 EXACT output[12] = 42 EXACT output[13] = 7 EXACT output[14] = 7 EXACT output[15] = 7 Oclgrind-21.10/tests/kernels/bugs/write_vector_write_only_fp.sim000066400000000000000000000001371413315665100252170ustar00rootroot00000000000000write_vector_write_only_fp.cl write_vector_write_only_fp 4 1 1 1 1 1 Oclgrind-21.10/tests/kernels/data-race/000077500000000000000000000000001413315665100177115ustar00rootroot00000000000000Oclgrind-21.10/tests/kernels/data-race/broadcast.cl000066400000000000000000000001651413315665100221750ustar00rootroot00000000000000kernel void broadcast(global int *value, global int *output) { int i = get_global_id(0); output[i] = value[0]; } Oclgrind-21.10/tests/kernels/data-race/broadcast.ref000066400000000000000000000001761413315665100223550ustar00rootroot00000000000000EXACT Argument 'output': 16 bytes EXACT output[0] = 42 EXACT output[1] = 42 EXACT output[2] = 42 EXACT output[3] = 42 Oclgrind-21.10/tests/kernels/data-race/broadcast.sim000066400000000000000000000001071413315665100223630ustar00rootroot00000000000000broadcast.cl broadcast 4 1 1 1 1 1 42 Oclgrind-21.10/tests/kernels/data-race/global_fence.cl000066400000000000000000000005371413315665100226360ustar00rootroot00000000000000kernel void global_fence(global int *scratch, global int *output) { int i = get_global_id(0); int g = get_group_id(0); scratch[i] = i; barrier(CLK_GLOBAL_MEM_FENCE); if (get_local_id(0) == 0) { int x = 0; for (int l = 0; l < get_local_size(0); l++) { x += scratch[get_local_size(0)*g + l]; } output[g] = x; } } Oclgrind-21.10/tests/kernels/data-race/global_fence.ref000066400000000000000000000001751413315665100230120ustar00rootroot00000000000000EXACT Argument 'output': 16 bytes EXACT output[0] = 6 EXACT output[1] = 22 EXACT output[2] = 38 EXACT output[3] = 54 Oclgrind-21.10/tests/kernels/data-race/global_fence.sim000066400000000000000000000001221413315665100230160ustar00rootroot00000000000000global_fence.cl global_fence 16 1 1 4 1 1 Oclgrind-21.10/tests/kernels/data-race/global_only_fence.cl000066400000000000000000000005141413315665100236720ustar00rootroot00000000000000kernel void global_only_fence(local int *scratch, global int *output) { int l = get_local_id(0); int g = get_group_id(0); scratch[l] = l; barrier(CLK_GLOBAL_MEM_FENCE); if (get_local_id(0) == 0) { int x = 0; for (int i = 0; i < get_local_size(0); i++) { x += scratch[i]; } output[g] = x; } } Oclgrind-21.10/tests/kernels/data-race/global_only_fence.ref000066400000000000000000000003741413315665100240540ustar00rootroot00000000000000ERROR Read-write data race at local memory ERROR Read-write data race at local memory ERROR Read-write data race at local memory EXACT Argument 'output': 16 bytes EXACT output[0] = 6 EXACT output[1] = 0 EXACT output[2] = 0 EXACT output[3] = 0 Oclgrind-21.10/tests/kernels/data-race/global_only_fence.sim000066400000000000000000000001241413315665100240610ustar00rootroot00000000000000global_only_fence.cl global_only_fence 4 1 1 4 1 1 Oclgrind-21.10/tests/kernels/data-race/global_read_write_race.cl000066400000000000000000000002031413315665100246630ustar00rootroot00000000000000kernel void global_read_write_race(global int *data) { int i = get_global_id(0); if (i > 0) { data[i] = data[i-1]; } } Oclgrind-21.10/tests/kernels/data-race/global_read_write_race.ref000066400000000000000000000003011413315665100250400ustar00rootroot00000000000000ERROR Read-write data race at global memory ERROR Read-write data race at global memory EXACT Argument 'data': 16 bytes MATCH data[0] = MATCH data[1] = MATCH data[2] = MATCH data[3] = Oclgrind-21.10/tests/kernels/data-race/global_read_write_race.sim000066400000000000000000000001311413315665100250550ustar00rootroot00000000000000global_read_write_race.cl global_read_write_race 4 1 1 4 1 1 Oclgrind-21.10/tests/kernels/data-race/global_write_write_race.cl000066400000000000000000000001301413315665100251010ustar00rootroot00000000000000kernel void global_write_write_race(global int *data) { data[0] = get_global_id(0); } Oclgrind-21.10/tests/kernels/data-race/global_write_write_race.ref000066400000000000000000000002711413315665100252650ustar00rootroot00000000000000ERROR Write-write data race at global memory ERROR Write-write data race at global memory ERROR Write-write data race at global memory EXACT Argument 'data': 4 bytes MATCH data[0] = Oclgrind-21.10/tests/kernels/data-race/global_write_write_race.sim000066400000000000000000000001251413315665100252770ustar00rootroot00000000000000global_write_write_race.cl global_write_write_race 4 1 1 1 1 1 Oclgrind-21.10/tests/kernels/data-race/increment.cl000066400000000000000000000001421413315665100222120ustar00rootroot00000000000000kernel void increment(global int *data) { int i = get_global_id(0); data[i] = data[i] + 1; } Oclgrind-21.10/tests/kernels/data-race/increment.ref000066400000000000000000000001601413315665100223700ustar00rootroot00000000000000EXACT Argument 'data': 16 bytes EXACT data[0] = 1 EXACT data[1] = 2 EXACT data[2] = 3 EXACT data[3] = 4 Oclgrind-21.10/tests/kernels/data-race/increment.sim000066400000000000000000000000771413315665100224130ustar00rootroot00000000000000increment.cl increment 4 1 1 1 1 1 Oclgrind-21.10/tests/kernels/data-race/intergroup_hidden_race.cl000066400000000000000000000002661413315665100247400ustar00rootroot00000000000000kernel void intergroup_hidden_race(global int *data, global int *output) { int group = get_group_id(0); output[group] = data[0]; if (group == 1) { data[0] = group; } } Oclgrind-21.10/tests/kernels/data-race/intergroup_hidden_race.ref000066400000000000000000000001661413315665100251150ustar00rootroot00000000000000ERROR Read-write data race at global memory EXACT Argument 'output': 8 bytes MATCH output[0] = MATCH output[1] = Oclgrind-21.10/tests/kernels/data-race/intergroup_hidden_race.sim000066400000000000000000000001431413315665100251240ustar00rootroot00000000000000intergroup_hidden_race.cl intergroup_hidden_race 2 1 1 1 1 1 Oclgrind-21.10/tests/kernels/data-race/intergroup_race.cl000066400000000000000000000005311413315665100234200ustar00rootroot00000000000000kernel void intergroup_race(global int *data) { int g = get_group_id(0); if (get_local_id(0) == 0) { data[g] = g; } barrier(CLK_GLOBAL_MEM_FENCE); if (get_global_id(0) == 0) { int x = 0; for (int i = 0; i < get_num_groups(0); i++) { x += data[i]; } data[0] = x; } barrier(CLK_GLOBAL_MEM_FENCE); } Oclgrind-21.10/tests/kernels/data-race/intergroup_race.ref000066400000000000000000000001601413315665100235740ustar00rootroot00000000000000ERROR Read-write data race at global memory EXACT Argument 'data': 8 bytes MATCH data[0] = MATCH data[1] = Oclgrind-21.10/tests/kernels/data-race/intergroup_race.sim000066400000000000000000000001051413315665100236070ustar00rootroot00000000000000intergroup_race.cl intergroup_race 8 1 1 4 1 1 Oclgrind-21.10/tests/kernels/data-race/intragroup_hidden_race.cl000066400000000000000000000003121413315665100247240ustar00rootroot00000000000000kernel void intragroup_hidden_race(global int *data, global int *output) { int id = get_local_id(0); output[id] = data[0]; barrier(CLK_LOCAL_MEM_FENCE); if (id == 0) { data[0] = -1; } } Oclgrind-21.10/tests/kernels/data-race/intragroup_hidden_race.ref000066400000000000000000000001741413315665100251100ustar00rootroot00000000000000ERROR Read-write data race at global memory EXACT Argument 'output': 8 bytes EXACT output[0] = 42 EXACT output[1] = 42 Oclgrind-21.10/tests/kernels/data-race/intragroup_hidden_race.sim000066400000000000000000000001441413315665100251210ustar00rootroot00000000000000intragroup_hidden_race.cl intragroup_hidden_race 2 1 1 2 1 1 Oclgrind-21.10/tests/kernels/data-race/local_only_fence.cl000066400000000000000000000005421413315665100235250ustar00rootroot00000000000000kernel void local_only_fence(global int *scratch, global int *output) { int i = get_global_id(0); int g = get_group_id(0); scratch[i] = i; barrier(CLK_LOCAL_MEM_FENCE); if (get_local_id(0) == 0) { int x = 0; for (int l = 0; l < get_local_size(0); l++) { x += scratch[get_local_size(0)*g + l]; } output[g] = x; } } Oclgrind-21.10/tests/kernels/data-race/local_only_fence.ref000066400000000000000000000013561413315665100237070ustar00rootroot00000000000000ERROR Read-write data race at global memory address ERROR Read-write data race at global memory address ERROR Read-write data race at global memory address ERROR Read-write data race at global memory address ERROR Read-write data race at global memory address ERROR Read-write data race at global memory address ERROR Read-write data race at global memory address ERROR Read-write data race at global memory address ERROR Read-write data race at global memory address ERROR Read-write data race at global memory address ERROR Read-write data race at global memory address ERROR Read-write data race at global memory address EXACT Argument 'output': 16 bytes EXACT output[0] = 6 EXACT output[1] = 22 EXACT output[2] = 38 EXACT output[3] = 54 Oclgrind-21.10/tests/kernels/data-race/local_only_fence.sim000066400000000000000000000001321413315665100237120ustar00rootroot00000000000000local_only_fence.cl local_only_fence 16 1 1 4 1 1 Oclgrind-21.10/tests/kernels/data-race/local_read_write_race.cl000066400000000000000000000004631413315665100245250ustar00rootroot00000000000000kernel void local_read_write_race(global int *data, local int *scratch) { int l = get_local_id(0); scratch[l] = 0; barrier(CLK_LOCAL_MEM_FENCE); scratch[l] = l; if (l == 0) { int x = 0; for (int i = 0; i < get_local_size(0); i++) { x += scratch[i]; } *data = x; } } Oclgrind-21.10/tests/kernels/data-race/local_read_write_race.ref000066400000000000000000000002651413315665100247030ustar00rootroot00000000000000ERROR Read-write data race at local memory ERROR Read-write data race at local memory ERROR Read-write data race at local memory EXACT Argument 'data': 4 bytes EXACT data[0] = 0 Oclgrind-21.10/tests/kernels/data-race/local_read_write_race.sim000066400000000000000000000001331413315665100247110ustar00rootroot00000000000000local_read_write_race.cl local_read_write_race 4 1 1 4 1 1 Oclgrind-21.10/tests/kernels/data-race/local_write_write_race.cl000066400000000000000000000002571413315665100247450ustar00rootroot00000000000000kernel void local_write_write_race(global int *data, local int *scratch) { int i = get_global_id(0); *scratch = i; barrier(CLK_LOCAL_MEM_FENCE); data[i] = *scratch; } Oclgrind-21.10/tests/kernels/data-race/local_write_write_race.ref000066400000000000000000000003651413315665100251230ustar00rootroot00000000000000ERROR Write-write data race at local memory ERROR Write-write data race at local memory ERROR Write-write data race at local memory EXACT Argument 'data': 16 bytes EXACT data[0] = 3 EXACT data[1] = 3 EXACT data[2] = 3 EXACT data[3] = 3 Oclgrind-21.10/tests/kernels/data-race/local_write_write_race.sim000066400000000000000000000001351413315665100251320ustar00rootroot00000000000000local_write_write_race.cl local_write_write_race 4 1 1 4 1 1 Oclgrind-21.10/tests/kernels/data-race/uniform_write_race.cl000066400000000000000000000001021413315665100241050ustar00rootroot00000000000000kernel void uniform_write_race(global int *data) { *data = 0; } Oclgrind-21.10/tests/kernels/data-race/uniform_write_race.ref000066400000000000000000000000631413315665100242710ustar00rootroot00000000000000EXACT Argument 'data': 4 bytes EXACT data[0] = 0 Oclgrind-21.10/tests/kernels/data-race/uniform_write_race.sim000066400000000000000000000001141413315665100243020ustar00rootroot00000000000000uniform_write_race.cl uniform_write_race 4 1 1 4 1 1 Oclgrind-21.10/tests/kernels/interactive/000077500000000000000000000000001413315665100204055ustar00rootroot00000000000000Oclgrind-21.10/tests/kernels/interactive/struct_member.cl000066400000000000000000000004021413315665100235740ustar00rootroot00000000000000struct S { int a; float b; }; kernel void struct_member(global int *i, global float *f, global struct S *out) { struct S s; local struct S t; s.a = *i; s.b = *f; t = s; t.a += 1; t.b += 0.1f; *out = t; out->a += 2; out->b += 0.2f; } Oclgrind-21.10/tests/kernels/interactive/struct_member.inp000066400000000000000000000002551413315665100237720ustar00rootroot00000000000000step step step step print s print s.a print s.b print s.c print s. step step print t.a print t.b step step step step print out[0].a print out[0].b print out->a print out->b Oclgrind-21.10/tests/kernels/interactive/struct_member.ref000066400000000000000000000013451413315665100237610ustar00rootroot00000000000000MATCH 7 kernel void struct_member(global int *i, global float *f, global struct S *out) MATCH 9 struct S s; MATCH 11 s.a = *i; MATCH 12 s.b = *f; MATCH 13 t = s; MATCH s = (raw) 0x0700000000002842 MATCH s.a = 7 MATCH s.b = 42 MATCH s.c = no member named 'c' found MATCH s. = no member named '' found MATCH 14 t.a += 1; MATCH 15 t.b += 0.1f; MATCH t.a = 8 MATCH t.b = 42 MATCH 16 *out = t; MATCH 17 out->a += 2; MATCH 18 out->b += 0.2f; MATCH out[0].a = 10 MATCH out[0].b = 42.3 MATCH out->a = 10 MATCH out->b = 42.3 MATCH Argument 'out': 8 bytes MATCH out[0] = 0x0A MATCH out[1] = 0x00 MATCH out[2] = 0x00 MATCH out[3] = 0x00 MATCH out[4] = 0x33 MATCH out[5] = 0x33 MATCH out[6] = 0x29 MATCH out[7] = 0x42 Oclgrind-21.10/tests/kernels/interactive/struct_member.sim000066400000000000000000000002001413315665100237620ustar00rootroot00000000000000# ARGS: -i struct_member.cl struct_member 1 1 1 1 1 1 Oclgrind-21.10/tests/kernels/memcheck/000077500000000000000000000000001413315665100176445ustar00rootroot00000000000000Oclgrind-21.10/tests/kernels/memcheck/async_copy_out_of_bounds.cl000066400000000000000000000003771413315665100252670ustar00rootroot00000000000000kernel void async_copy_out_of_bounds(local int *src, global int *dst) { int l = get_local_id(0); src[l] = l; barrier(CLK_LOCAL_MEM_FENCE); event_t event = async_work_group_copy(dst+1, src, get_local_size(0), 0); wait_group_events(1, &event); } Oclgrind-21.10/tests/kernels/memcheck/async_copy_out_of_bounds.ref000066400000000000000000000002331413315665100254340ustar00rootroot00000000000000ERROR Invalid write of size 4 at global memory EXACT Argument 'dst': 16 bytes EXACT dst[0] = 0 EXACT dst[1] = 0 EXACT dst[2] = 1 EXACT dst[3] = 2 Oclgrind-21.10/tests/kernels/memcheck/async_copy_out_of_bounds.sim000066400000000000000000000001421413315665100254470ustar00rootroot00000000000000async_copy_out_of_bounds.cl async_copy_out_of_bounds 4 1 1 4 1 1 Oclgrind-21.10/tests/kernels/memcheck/atomic_out_of_bounds.cl000066400000000000000000000001611413315665100243630ustar00rootroot00000000000000kernel void atomic_out_of_bounds(global int *counters) { int i = get_global_id(0); atomic_inc(counters+i); } Oclgrind-21.10/tests/kernels/memcheck/atomic_out_of_bounds.ref000066400000000000000000000003741413315665100245470ustar00rootroot00000000000000ERROR Invalid read of size 4 at global memory ERROR Invalid write of size 4 at global memory ERROR Uninitialized value EXACT Argument 'counters': 16 bytes EXACT counters[0] = 1 EXACT counters[1] = 1 EXACT counters[2] = 1 EXACT counters[3] = 1 Oclgrind-21.10/tests/kernels/memcheck/atomic_out_of_bounds.sim000066400000000000000000000001201413315665100245500ustar00rootroot00000000000000atomic_out_of_bounds.cl atomic_out_of_bounds 5 1 1 1 1 1 Oclgrind-21.10/tests/kernels/memcheck/casted_static_array.cl000066400000000000000000000013651413315665100242010ustar00rootroot00000000000000void transparent_crc_no_string(ulong *p1, ulong p2) { *p1 += p2; } int get_linear_global_id() { return (get_global_id(2) * get_global_size(1) + get_global_id(1)) * get_global_size(0) + get_global_id(0); } union U5 { short f0; int f3; }; struct S6 { union U5 g_75[5][7][2]; union U5 **g_91[78]; }; __kernel void casted_static_array(__global ulong *p1) { int i, j, k; struct S6 c_864; struct S6 *p_863 = &c_864; union U5 *p_863_6; struct S6 c_865 = {{{{{0xD54EL}}}}, {&p_863_6}}; c_864 = c_865; ulong crc64_context = i = 0; for (; i < 9; i++) { j = 0; { k = 0; { transparent_crc_no_string(&crc64_context, p_863->g_75[i][j][k].f0); } } } p1[get_linear_global_id()] = crc64_context; } Oclgrind-21.10/tests/kernels/memcheck/casted_static_array.ref000066400000000000000000000002561413315665100243550ustar00rootroot00000000000000ERROR exceeds static array size ERROR exceeds static array size ERROR exceeds static array size ERROR exceeds static array size EXACT Argument 'p1': 8 bytes MATCH p1[0] = Oclgrind-21.10/tests/kernels/memcheck/casted_static_array.sim000066400000000000000000000001151413315665100243630ustar00rootroot00000000000000casted_static_array.cl casted_static_array 1 1 1 1 1 1 Oclgrind-21.10/tests/kernels/memcheck/dereference_null.cl000066400000000000000000000001411413315665100234610ustar00rootroot00000000000000kernel void dereference_null(global int *input, global int *output) { output[0] *= input[0]; } Oclgrind-21.10/tests/kernels/memcheck/dereference_null.ref000066400000000000000000000002141413315665100236400ustar00rootroot00000000000000ERROR Invalid read of size 4 at global memory address 0x0 ERROR Uninitialized value EXACT Argument 'output': 4 bytes EXACT output[0] = 0 Oclgrind-21.10/tests/kernels/memcheck/dereference_null.sim000066400000000000000000000001161413315665100236550ustar00rootroot00000000000000dereference_null.cl dereference_null 1 1 1 1 1 1 Oclgrind-21.10/tests/kernels/memcheck/fake_out_of_bounds.cl000066400000000000000000000003101413315665100240110ustar00rootroot00000000000000struct S0 { uchar f[1]; ulong g[4]; }; __kernel void entry(__global ulong *result) { struct S0 s = {{1}, {2,3,4,5}}; struct S0 t = s; volatile int i = 0; *result = t.g[i]; } Oclgrind-21.10/tests/kernels/memcheck/fake_out_of_bounds.ref000066400000000000000000000000671413315665100242000ustar00rootroot00000000000000EXACT Argument 'result': 8 bytes EXACT result[0] = 2 Oclgrind-21.10/tests/kernels/memcheck/fake_out_of_bounds.sim000066400000000000000000000000761413315665100242140ustar00rootroot00000000000000fake_out_of_bounds.cl entry 1 1 1 1 1 1 Oclgrind-21.10/tests/kernels/memcheck/read_out_of_bounds.cl000066400000000000000000000003111413315665100240170ustar00rootroot00000000000000kernel void read_out_of_bounds(global int *a, global int *b, global int *c) { int i = get_global_id(0); if (i < 4) { c[i] = a[i] + b[i]; } else { c[i] = a[0] * (a[i] + b[i]); } } Oclgrind-21.10/tests/kernels/memcheck/read_out_of_bounds.ref000066400000000000000000000003511413315665100242010ustar00rootroot00000000000000ERROR Invalid read of size 4 at global memory ERROR Invalid read of size 4 at global memory ERROR Uninitialized value EXACT Argument 'c': 20 bytes EXACT c[0] = 0 EXACT c[1] = 2 EXACT c[2] = 4 EXACT c[3] = 6 EXACT c[4] = 0 Oclgrind-21.10/tests/kernels/memcheck/read_out_of_bounds.sim000066400000000000000000000001701413315665100242140ustar00rootroot00000000000000read_out_of_bounds.cl read_out_of_bounds 5 1 1 5 1 1 Oclgrind-21.10/tests/kernels/memcheck/read_write_only_memory.cl000066400000000000000000000002031413315665100247350ustar00rootroot00000000000000kernel void read_write_only_memory(global int *input, global int *output) { int i = get_global_id(0); output[i] += input[i]; } Oclgrind-21.10/tests/kernels/memcheck/read_write_only_memory.ref000066400000000000000000000004431413315665100251210ustar00rootroot00000000000000ERROR Invalid read from write-only buffer ERROR Invalid read from write-only buffer ERROR Invalid read from write-only buffer ERROR Invalid read from write-only buffer EXACT Argument 'output': 16 bytes EXACT output[0] = 0 EXACT output[1] = 1 EXACT output[2] = 2 EXACT output[3] = 3 Oclgrind-21.10/tests/kernels/memcheck/read_write_only_memory.sim000066400000000000000000000001601413315665100251310ustar00rootroot00000000000000read_write_only_memory.cl read_write_only_memory 4 1 1 4 1 1 Oclgrind-21.10/tests/kernels/memcheck/static_array.cl000066400000000000000000000002761413315665100226560ustar00rootroot00000000000000struct S { int a; char b[2]; }; kernel void static_array(global char *output) { volatile struct S s = {-1, {42, 7}}; int i = get_global_id(0); s.b[i] = i; output[i] = s.b[i]; } Oclgrind-21.10/tests/kernels/memcheck/static_array.ref000066400000000000000000000003661413315665100230340ustar00rootroot00000000000000ERROR exceeds static array size ERROR exceeds static array size ERROR exceeds static array size ERROR exceeds static array size EXACT Argument 'output': 4 bytes EXACT output[0] = 0 EXACT output[1] = 1 MATCH output[2] = MATCH output[3] = Oclgrind-21.10/tests/kernels/memcheck/static_array.sim000066400000000000000000000000771413315665100230470ustar00rootroot00000000000000static_array.cl static_array 4 1 1 4 1 1 Oclgrind-21.10/tests/kernels/memcheck/static_array_padded_struct.cl000066400000000000000000000002651413315665100255610ustar00rootroot00000000000000struct S { int a; char b[2]; }; kernel void static_array_padded_struct(global char *output) { struct S s = {-1, {42, 7}}; int i = get_global_id(0); output[i] = s.b[i]; } Oclgrind-21.10/tests/kernels/memcheck/static_array_padded_struct.ref000066400000000000000000000002671413315665100257410ustar00rootroot00000000000000ERROR exceeds static array size ERROR exceeds static array size EXACT Argument 'output': 4 bytes EXACT output[0] = 42 EXACT output[1] = 7 MATCH output[2] = MATCH output[3] = Oclgrind-21.10/tests/kernels/memcheck/static_array_padded_struct.sim000066400000000000000000000001331413315665100257450ustar00rootroot00000000000000static_array_padded_struct.cl static_array_padded_struct 4 1 1 4 1 1 Oclgrind-21.10/tests/kernels/memcheck/write_out_of_bounds.cl000066400000000000000000000002031413315665100242360ustar00rootroot00000000000000kernel void write_out_of_bounds(global int *a, global int *b, global int *c) { int i = get_global_id(0); c[i] = a[i] + b[i]; } Oclgrind-21.10/tests/kernels/memcheck/write_out_of_bounds.ref000066400000000000000000000002311413315665100244150ustar00rootroot00000000000000ERROR Invalid write of size 4 at global memory address EXACT Argument 'c': 16 bytes EXACT c[0] = 0 EXACT c[1] = 2 EXACT c[2] = 4 EXACT c[3] = 6 Oclgrind-21.10/tests/kernels/memcheck/write_out_of_bounds.sim000066400000000000000000000001721413315665100244350ustar00rootroot00000000000000write_out_of_bounds.cl write_out_of_bounds 5 1 1 5 1 1 Oclgrind-21.10/tests/kernels/memcheck/write_read_only_memory.cl000066400000000000000000000002041413315665100247360ustar00rootroot00000000000000kernel void write_read_only_memory(global int *input, global int *output) { int i = get_global_id(0); output[i] = input[i]++; } Oclgrind-21.10/tests/kernels/memcheck/write_read_only_memory.ref000066400000000000000000000004331413315665100251200ustar00rootroot00000000000000ERROR Invalid write to read-only buffer ERROR Invalid write to read-only buffer ERROR Invalid write to read-only buffer ERROR Invalid write to read-only buffer EXACT Argument 'output': 16 bytes EXACT output[0] = 0 EXACT output[1] = 1 EXACT output[2] = 2 EXACT output[3] = 3 Oclgrind-21.10/tests/kernels/memcheck/write_read_only_memory.sim000066400000000000000000000001551413315665100251350ustar00rootroot00000000000000write_read_only_memory.cl write_read_only_memory 4 1 1 4 1 1 Oclgrind-21.10/tests/kernels/misc/000077500000000000000000000000001413315665100170235ustar00rootroot00000000000000Oclgrind-21.10/tests/kernels/misc/array.cl000066400000000000000000000002621413315665100204610ustar00rootroot00000000000000kernel void array(global long16 *output) { long16 data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; int i = get_global_id(0); long16 *foo = data; output[i] = foo[i]; } Oclgrind-21.10/tests/kernels/misc/array.ref000066400000000000000000000056661413315665100206540ustar00rootroot00000000000000EXACT Argument 'output': 1024 bytes EXACT output[0] = 0 EXACT output[1] = 0 EXACT output[2] = 0 EXACT output[3] = 0 EXACT output[4] = 0 EXACT output[5] = 0 EXACT output[6] = 0 EXACT output[7] = 0 EXACT output[8] = 0 EXACT output[9] = 0 EXACT output[10] = 0 EXACT output[11] = 0 EXACT output[12] = 0 EXACT output[13] = 0 EXACT output[14] = 0 EXACT output[15] = 0 EXACT output[16] = 1 EXACT output[17] = 1 EXACT output[18] = 1 EXACT output[19] = 1 EXACT output[20] = 1 EXACT output[21] = 1 EXACT output[22] = 1 EXACT output[23] = 1 EXACT output[24] = 1 EXACT output[25] = 1 EXACT output[26] = 1 EXACT output[27] = 1 EXACT output[28] = 1 EXACT output[29] = 1 EXACT output[30] = 1 EXACT output[31] = 1 EXACT output[32] = 2 EXACT output[33] = 2 EXACT output[34] = 2 EXACT output[35] = 2 EXACT output[36] = 2 EXACT output[37] = 2 EXACT output[38] = 2 EXACT output[39] = 2 EXACT output[40] = 2 EXACT output[41] = 2 EXACT output[42] = 2 EXACT output[43] = 2 EXACT output[44] = 2 EXACT output[45] = 2 EXACT output[46] = 2 EXACT output[47] = 2 EXACT output[48] = 3 EXACT output[49] = 3 EXACT output[50] = 3 EXACT output[51] = 3 EXACT output[52] = 3 EXACT output[53] = 3 EXACT output[54] = 3 EXACT output[55] = 3 EXACT output[56] = 3 EXACT output[57] = 3 EXACT output[58] = 3 EXACT output[59] = 3 EXACT output[60] = 3 EXACT output[61] = 3 EXACT output[62] = 3 EXACT output[63] = 3 EXACT output[64] = 4 EXACT output[65] = 4 EXACT output[66] = 4 EXACT output[67] = 4 EXACT output[68] = 4 EXACT output[69] = 4 EXACT output[70] = 4 EXACT output[71] = 4 EXACT output[72] = 4 EXACT output[73] = 4 EXACT output[74] = 4 EXACT output[75] = 4 EXACT output[76] = 4 EXACT output[77] = 4 EXACT output[78] = 4 EXACT output[79] = 4 EXACT output[80] = 5 EXACT output[81] = 5 EXACT output[82] = 5 EXACT output[83] = 5 EXACT output[84] = 5 EXACT output[85] = 5 EXACT output[86] = 5 EXACT output[87] = 5 EXACT output[88] = 5 EXACT output[89] = 5 EXACT output[90] = 5 EXACT output[91] = 5 EXACT output[92] = 5 EXACT output[93] = 5 EXACT output[94] = 5 EXACT output[95] = 5 EXACT output[96] = 6 EXACT output[97] = 6 EXACT output[98] = 6 EXACT output[99] = 6 EXACT output[100] = 6 EXACT output[101] = 6 EXACT output[102] = 6 EXACT output[103] = 6 EXACT output[104] = 6 EXACT output[105] = 6 EXACT output[106] = 6 EXACT output[107] = 6 EXACT output[108] = 6 EXACT output[109] = 6 EXACT output[110] = 6 EXACT output[111] = 6 EXACT output[112] = 7 EXACT output[113] = 7 EXACT output[114] = 7 EXACT output[115] = 7 EXACT output[116] = 7 EXACT output[117] = 7 EXACT output[118] = 7 EXACT output[119] = 7 EXACT output[120] = 7 EXACT output[121] = 7 EXACT output[122] = 7 EXACT output[123] = 7 EXACT output[124] = 7 EXACT output[125] = 7 EXACT output[126] = 7 EXACT output[127] = 7 Oclgrind-21.10/tests/kernels/misc/array.sim000066400000000000000000000000641413315665100206530ustar00rootroot00000000000000array.cl array 8 1 1 1 1 1 Oclgrind-21.10/tests/kernels/misc/global_variables.cl000066400000000000000000000005111413315665100226300ustar00rootroot00000000000000global int g_arr[] = {7, 42}; constant int c_arr[] = {-3, 56}; global int *p_g_int = &g_arr[1]; constant int *p_c_int = &c_arr[1]; kernel void global_variables(global int *output) { output[0] = g_arr[0]; output[1] = g_arr[1]; output[2] = c_arr[0]; output[3] = c_arr[1]; output[4] = *p_g_int; output[5] = *p_c_int; } Oclgrind-21.10/tests/kernels/misc/global_variables.ref000066400000000000000000000002531413315665100230110ustar00rootroot00000000000000EXACT Argument 'output': 24 bytes EXACT output[0] = 7 EXACT output[1] = 42 EXACT output[2] = -3 EXACT output[3] = 56 EXACT output[4] = 42 EXACT output[5] = 56 Oclgrind-21.10/tests/kernels/misc/global_variables.sim000066400000000000000000000001631413315665100230250ustar00rootroot00000000000000# ARGS: --build-options -cl-std=CL2.0 global_variables.cl global_variables 1 1 1 1 1 1 Oclgrind-21.10/tests/kernels/misc/lvalue_loads.cl000066400000000000000000000037261413315665100220250ustar00rootroot00000000000000typedef struct { char a; int b; int c; char d; } S; void va(global float4 *input, global float4 *output) { int i = get_global_id(0); output[i].z = 42.f; } void vb(global float4 *input, global float4 *output) { int i = get_global_id(0); output[i].z = 7.f; output[i].y = 42.f; } void vc(global float4 *input, global float4 *output) { int i = get_global_id(0); output[i].zy = (float2)(7.f,42.f); } void vd(global float4 *input, global float4 *output) { int i = get_global_id(0); output[i].y = output[i].z; } void ve(global float4 *input, global float4 *output) { int i = get_global_id(0); output[i].wzyx = output[i]; } void vf(global float4 *input, global float4 *output) { int i = get_global_id(0); output[i].zy = output[i].yz; } void vg(global float4 *input, global float4 *output) { int i = get_global_id(0); output[i].wzyx = input[i]; } void vh(global float4 *input, global float4 *output) { int i = get_global_id(0); output[i].zy = input[i].yz; } void vi(global float4 *input, global float4 *output) { int i = get_global_id(0); float4 x = output[i]; x.z = 42.f; output[i] = x; output[i+1] = x; } void sa(global S *input, global S *output) { int i = get_global_id(0); output[i].c = 42; } void sb(global S *input, global S *output) { int i = get_global_id(0); output[i].c = output[i].b; } void sc(global S *input, global S *output) { int i = get_global_id(0); output[i].c = input[i].b; } kernel void lvalue_loads( global float4 *vIn, global float4 *vA, global float4 *vB, global float4 *vC, global float4 *vD, global float4 *vE, global float4 *vF, global float4 *vG, global float4 *vH, global float4 *vI, global S *sIn, global S *sA, global S *sB, global S *sC, global float *nop ) { va(vIn, vA); vb(vIn, vB); vc(vIn, vC); vd(vIn, vD); ve(vIn, vE); vf(vIn, vF); vg(vIn, vG); vh(vIn, vH); vi(vIn, vI); sa(sIn, sA); sb(sIn, sB); sc(sIn, sC); } Oclgrind-21.10/tests/kernels/misc/lvalue_loads.ref000066400000000000000000000024501413315665100221740ustar00rootroot00000000000000EXACT Argument 'vA': 16 bytes EXACT vA[0] = 1 EXACT vA[1] = 2 EXACT vA[2] = 42 EXACT vA[3] = 4 EXACT Argument 'vB': 16 bytes EXACT vB[0] = 1 EXACT vB[1] = 42 EXACT vB[2] = 7 EXACT vB[3] = 4 EXACT Argument 'vC': 16 bytes EXACT vC[0] = 1 EXACT vC[1] = 42 EXACT vC[2] = 7 EXACT vC[3] = 4 EXACT Argument 'vD': 16 bytes EXACT vD[0] = 1 EXACT vD[1] = 3 EXACT vD[2] = 3 EXACT vD[3] = 4 EXACT Argument 'vE': 16 bytes EXACT vE[0] = 4 EXACT vE[1] = 3 EXACT vE[2] = 2 EXACT vE[3] = 1 EXACT Argument 'vF': 16 bytes EXACT vF[0] = 1 EXACT vF[1] = 3 EXACT vF[2] = 2 EXACT vF[3] = 4 EXACT Argument 'vG': 16 bytes EXACT vG[0] = 19 EXACT vG[1] = 18 EXACT vG[2] = 17 EXACT vG[3] = 16 EXACT Argument 'vH': 16 bytes EXACT vH[0] = 1 EXACT vH[1] = 18 EXACT vH[2] = 17 EXACT vH[3] = 4 EXACT Argument 'vI': 32 bytes EXACT vI[0] = 1 EXACT vI[1] = 2 EXACT vI[2] = 42 EXACT vI[3] = 4 EXACT vI[4] = 1 EXACT vI[5] = 2 EXACT vI[6] = 42 EXACT vI[7] = 4 EXACT Argument 'sA': 16 bytes EXACT sA[0] = 1 EXACT sA[1] = 2 EXACT sA[2] = 42 EXACT sA[3] = 4 EXACT Argument 'sB': 16 bytes EXACT sB[0] = 1 EXACT sB[1] = 2 EXACT sB[2] = 2 EXACT sB[3] = 4 EXACT Argument 'sC': 16 bytes EXACT sC[0] = 1 EXACT sC[1] = 2 EXACT sC[2] = 17 EXACT sC[3] = 4 Oclgrind-21.10/tests/kernels/misc/lvalue_loads.sim000066400000000000000000000010431413315665100222050ustar00rootroot00000000000000lvalue_loads.cl lvalue_loads 1 1 1 1 1 1 # Vector input # Vector outputs # Structure input # Structure outputs # NOP Oclgrind-21.10/tests/kernels/misc/non_uniform_work_groups.cl000066400000000000000000000007551413315665100243440ustar00rootroot00000000000000kernel void non_uniform_work_groups(global int *output) { int i = get_global_linear_id(); output[i] = get_local_linear_id(); int end = get_global_size(0) * get_global_size(1) * get_global_size(2); if (i == end-1) { output[end] = get_local_size(0); output[end+1] = get_local_size(1); output[end+2] = get_local_size(2); output[end+3] = get_enqueued_local_size(0); output[end+4] = get_enqueued_local_size(1); output[end+5] = get_enqueued_local_size(2); } } Oclgrind-21.10/tests/kernels/misc/non_uniform_work_groups.ref000066400000000000000000000014201413315665100245100ustar00rootroot00000000000000EXACT Argument 'output': 132 bytes EXACT output[0] = 0 EXACT output[1] = 1 EXACT output[2] = 0 EXACT output[3] = 2 EXACT output[4] = 3 EXACT output[5] = 1 EXACT output[6] = 0 EXACT output[7] = 1 EXACT output[8] = 0 EXACT output[9] = 4 EXACT output[10] = 5 EXACT output[11] = 2 EXACT output[12] = 6 EXACT output[13] = 7 EXACT output[14] = 3 EXACT output[15] = 2 EXACT output[16] = 3 EXACT output[17] = 1 EXACT output[18] = 0 EXACT output[19] = 1 EXACT output[20] = 0 EXACT output[21] = 2 EXACT output[22] = 3 EXACT output[23] = 1 EXACT output[24] = 0 EXACT output[25] = 1 EXACT output[26] = 0 EXACT output[27] = 1 EXACT output[28] = 1 EXACT output[29] = 1 EXACT output[30] = 2 EXACT output[31] = 2 EXACT output[32] = 2 Oclgrind-21.10/tests/kernels/misc/non_uniform_work_groups.sim000066400000000000000000000001751413315665100245320ustar00rootroot00000000000000# ARGS: --build-options -cl-std=CL2.0 non_uniform_work_groups.cl non_uniform_work_groups 3 3 3 2 2 2 Oclgrind-21.10/tests/kernels/misc/printf.cl000066400000000000000000000003621413315665100206460ustar00rootroot00000000000000#pragma clang diagnostic ignored "-Wformat-invalid-specifier" kernel void printf_kernel( const int i, const float f, const float4 f4v) { printf("int = %d\n", i); printf("float = %.1f\n", f); printf("float4 = %.2v4hlf\n", f4v); } Oclgrind-21.10/tests/kernels/misc/printf.ref000066400000000000000000000001141413315665100210170ustar00rootroot00000000000000MATCH int = 7 MATCH float = 42.0 MATCH float4 = 42.12,-3.70,100001.01,-0.00 Oclgrind-21.10/tests/kernels/misc/printf.sim000066400000000000000000000001531413315665100210360ustar00rootroot00000000000000printf.cl printf_kernel 1 1 1 1 1 1 42.123 -3.7 100001.01 -0.0 Oclgrind-21.10/tests/kernels/misc/program_scope_constant_array.cl000066400000000000000000000002341413315665100253110ustar00rootroot00000000000000constant int data[4] = {7, 42, 0, -1}; kernel void program_scope_constant_array(global int *output) { int i = get_global_id(0); output[i] = data[i]; } Oclgrind-21.10/tests/kernels/misc/program_scope_constant_array.ref000066400000000000000000000001741413315665100254720ustar00rootroot00000000000000EXACT Argument 'output': 16 bytes EXACT output[0] = 7 EXACT output[1] = 42 EXACT output[2] = 0 EXACT output[3] = -1 Oclgrind-21.10/tests/kernels/misc/program_scope_constant_array.sim000066400000000000000000000001401413315665100254770ustar00rootroot00000000000000program_scope_constant_array.cl program_scope_constant_array 4 1 1 1 1 1 Oclgrind-21.10/tests/kernels/misc/reduce.cl000066400000000000000000000012101413315665100206040ustar00rootroot00000000000000kernel void reduce(uint n, global uint *data, global uint *result, local uint *localData) { uint gid = get_global_id(0); uint lid = get_local_id(0); uint gsz = get_global_size(0); uint lsz = get_local_size(0); uint grp = get_group_id(0); uint sum = 0; for (uint i = gid; i < n; i+=gsz) { sum += data[i]; } localData[lid] = sum; for (uint offset = lsz/2; offset > 0; offset/=2) { barrier(CLK_LOCAL_MEM_FENCE); if (lid < offset) { localData[lid] += localData[lid + offset]; } } if (lid == 0) { result[grp] = localData[lid]; } } Oclgrind-21.10/tests/kernels/misc/reduce.ref000066400000000000000000000006701413315665100207730ustar00rootroot00000000000000EXACT Argument 'result': 64 bytes EXACT result[0] = 1560 EXACT result[1] = 1624 EXACT result[2] = 1688 EXACT result[3] = 1752 EXACT result[4] = 1816 EXACT result[5] = 1880 EXACT result[6] = 1944 EXACT result[7] = 2008 EXACT result[8] = 2072 EXACT result[9] = 2136 EXACT result[10] = 2200 EXACT result[11] = 2264 EXACT result[12] = 2328 EXACT result[13] = 2392 EXACT result[14] = 2456 EXACT result[15] = 2520 Oclgrind-21.10/tests/kernels/misc/reduce.sim000066400000000000000000000001471413315665100210060ustar00rootroot00000000000000reduce.cl reduce 64 1 1 4 1 1 256 Oclgrind-21.10/tests/kernels/misc/switch_case.cl000066400000000000000000000005151413315665100216400ustar00rootroot00000000000000kernel void switch_case(global int *input, global int *output) { int i = get_global_id(0); int in = input[i]; int out; switch (in) { case 0: out = -7; break; case 1: out = i; break; case 2: case 3: case 4: out = in + i; break; default: out = 42; break; } output[i] = out; } Oclgrind-21.10/tests/kernels/misc/switch_case.ref000066400000000000000000000003261413315665100220160ustar00rootroot00000000000000EXACT Argument 'output': 32 bytes EXACT output[0] = 2 EXACT output[1] = 4 EXACT output[2] = 4 EXACT output[3] = -7 EXACT output[4] = 4 EXACT output[5] = 42 EXACT output[6] = 42 EXACT output[7] = 11 Oclgrind-21.10/tests/kernels/misc/switch_case.sim000066400000000000000000000001321413315665100220250ustar00rootroot00000000000000switch_case.cl switch_case 8 1 1 1 1 1 2 3 2 0 1 5 -1 4 Oclgrind-21.10/tests/kernels/misc/vecadd.cl000066400000000000000000000001771413315665100205760ustar00rootroot00000000000000kernel void vecadd(global float *a, global float *b, global float *c) { size_t i = get_global_id(0); c[i] = a[i] + b[i]; } Oclgrind-21.10/tests/kernels/misc/vecadd.ref000066400000000000000000000526361413315665100207630ustar00rootroot00000000000000EXACT Argument 'c': 4096 bytes EXACT c[0] = 0 EXACT c[1] = 2 EXACT c[2] = 4 EXACT c[3] = 6 EXACT c[4] = 8 EXACT c[5] = 10 EXACT c[6] = 12 EXACT c[7] = 14 EXACT c[8] = 16 EXACT c[9] = 18 EXACT c[10] = 20 EXACT c[11] = 22 EXACT c[12] = 24 EXACT c[13] = 26 EXACT c[14] = 28 EXACT c[15] = 30 EXACT c[16] = 32 EXACT c[17] = 34 EXACT c[18] = 36 EXACT c[19] = 38 EXACT c[20] = 40 EXACT c[21] = 42 EXACT c[22] = 44 EXACT c[23] = 46 EXACT c[24] = 48 EXACT c[25] = 50 EXACT c[26] = 52 EXACT c[27] = 54 EXACT c[28] = 56 EXACT c[29] = 58 EXACT c[30] = 60 EXACT c[31] = 62 EXACT c[32] = 64 EXACT c[33] = 66 EXACT c[34] = 68 EXACT c[35] = 70 EXACT c[36] = 72 EXACT c[37] = 74 EXACT c[38] = 76 EXACT c[39] = 78 EXACT c[40] = 80 EXACT c[41] = 82 EXACT c[42] = 84 EXACT c[43] = 86 EXACT c[44] = 88 EXACT c[45] = 90 EXACT c[46] = 92 EXACT c[47] = 94 EXACT c[48] = 96 EXACT c[49] = 98 EXACT c[50] = 100 EXACT c[51] = 102 EXACT c[52] = 104 EXACT c[53] = 106 EXACT c[54] = 108 EXACT c[55] = 110 EXACT c[56] = 112 EXACT c[57] = 114 EXACT c[58] = 116 EXACT c[59] = 118 EXACT c[60] = 120 EXACT c[61] = 122 EXACT c[62] = 124 EXACT c[63] = 126 EXACT c[64] = 128 EXACT c[65] = 130 EXACT c[66] = 132 EXACT c[67] = 134 EXACT c[68] = 136 EXACT c[69] = 138 EXACT c[70] = 140 EXACT c[71] = 142 EXACT c[72] = 144 EXACT c[73] = 146 EXACT c[74] = 148 EXACT c[75] = 150 EXACT c[76] = 152 EXACT c[77] = 154 EXACT c[78] = 156 EXACT c[79] = 158 EXACT c[80] = 160 EXACT c[81] = 162 EXACT c[82] = 164 EXACT c[83] = 166 EXACT c[84] = 168 EXACT c[85] = 170 EXACT c[86] = 172 EXACT c[87] = 174 EXACT c[88] = 176 EXACT c[89] = 178 EXACT c[90] = 180 EXACT c[91] = 182 EXACT c[92] = 184 EXACT c[93] = 186 EXACT c[94] = 188 EXACT c[95] = 190 EXACT c[96] = 192 EXACT c[97] = 194 EXACT c[98] = 196 EXACT c[99] = 198 EXACT c[100] = 200 EXACT c[101] = 202 EXACT c[102] = 204 EXACT c[103] = 206 EXACT c[104] = 208 EXACT c[105] = 210 EXACT c[106] = 212 EXACT c[107] = 214 EXACT c[108] = 216 EXACT c[109] = 218 EXACT c[110] = 220 EXACT c[111] = 222 EXACT c[112] = 224 EXACT c[113] = 226 EXACT c[114] = 228 EXACT c[115] = 230 EXACT c[116] = 232 EXACT c[117] = 234 EXACT c[118] = 236 EXACT c[119] = 238 EXACT c[120] = 240 EXACT c[121] = 242 EXACT c[122] = 244 EXACT c[123] = 246 EXACT c[124] = 248 EXACT c[125] = 250 EXACT c[126] = 252 EXACT c[127] = 254 EXACT c[128] = 256 EXACT c[129] = 258 EXACT c[130] = 260 EXACT c[131] = 262 EXACT c[132] = 264 EXACT c[133] = 266 EXACT c[134] = 268 EXACT c[135] = 270 EXACT c[136] = 272 EXACT c[137] = 274 EXACT c[138] = 276 EXACT c[139] = 278 EXACT c[140] = 280 EXACT c[141] = 282 EXACT c[142] = 284 EXACT c[143] = 286 EXACT c[144] = 288 EXACT c[145] = 290 EXACT c[146] = 292 EXACT c[147] = 294 EXACT c[148] = 296 EXACT c[149] = 298 EXACT c[150] = 300 EXACT c[151] = 302 EXACT c[152] = 304 EXACT c[153] = 306 EXACT c[154] = 308 EXACT c[155] = 310 EXACT c[156] = 312 EXACT c[157] = 314 EXACT c[158] = 316 EXACT c[159] = 318 EXACT c[160] = 320 EXACT c[161] = 322 EXACT c[162] = 324 EXACT c[163] = 326 EXACT c[164] = 328 EXACT c[165] = 330 EXACT c[166] = 332 EXACT c[167] = 334 EXACT c[168] = 336 EXACT c[169] = 338 EXACT c[170] = 340 EXACT c[171] = 342 EXACT c[172] = 344 EXACT c[173] = 346 EXACT c[174] = 348 EXACT c[175] = 350 EXACT c[176] = 352 EXACT c[177] = 354 EXACT c[178] = 356 EXACT c[179] = 358 EXACT c[180] = 360 EXACT c[181] = 362 EXACT c[182] = 364 EXACT c[183] = 366 EXACT c[184] = 368 EXACT c[185] = 370 EXACT c[186] = 372 EXACT c[187] = 374 EXACT c[188] = 376 EXACT c[189] = 378 EXACT c[190] = 380 EXACT c[191] = 382 EXACT c[192] = 384 EXACT c[193] = 386 EXACT c[194] = 388 EXACT c[195] = 390 EXACT c[196] = 392 EXACT c[197] = 394 EXACT c[198] = 396 EXACT c[199] = 398 EXACT c[200] = 400 EXACT c[201] = 402 EXACT c[202] = 404 EXACT c[203] = 406 EXACT c[204] = 408 EXACT c[205] = 410 EXACT c[206] = 412 EXACT c[207] = 414 EXACT c[208] = 416 EXACT c[209] = 418 EXACT c[210] = 420 EXACT c[211] = 422 EXACT c[212] = 424 EXACT c[213] = 426 EXACT c[214] = 428 EXACT c[215] = 430 EXACT c[216] = 432 EXACT c[217] = 434 EXACT c[218] = 436 EXACT c[219] = 438 EXACT c[220] = 440 EXACT c[221] = 442 EXACT c[222] = 444 EXACT c[223] = 446 EXACT c[224] = 448 EXACT c[225] = 450 EXACT c[226] = 452 EXACT c[227] = 454 EXACT c[228] = 456 EXACT c[229] = 458 EXACT c[230] = 460 EXACT c[231] = 462 EXACT c[232] = 464 EXACT c[233] = 466 EXACT c[234] = 468 EXACT c[235] = 470 EXACT c[236] = 472 EXACT c[237] = 474 EXACT c[238] = 476 EXACT c[239] = 478 EXACT c[240] = 480 EXACT c[241] = 482 EXACT c[242] = 484 EXACT c[243] = 486 EXACT c[244] = 488 EXACT c[245] = 490 EXACT c[246] = 492 EXACT c[247] = 494 EXACT c[248] = 496 EXACT c[249] = 498 EXACT c[250] = 500 EXACT c[251] = 502 EXACT c[252] = 504 EXACT c[253] = 506 EXACT c[254] = 508 EXACT c[255] = 510 EXACT c[256] = 512 EXACT c[257] = 514 EXACT c[258] = 516 EXACT c[259] = 518 EXACT c[260] = 520 EXACT c[261] = 522 EXACT c[262] = 524 EXACT c[263] = 526 EXACT c[264] = 528 EXACT c[265] = 530 EXACT c[266] = 532 EXACT c[267] = 534 EXACT c[268] = 536 EXACT c[269] = 538 EXACT c[270] = 540 EXACT c[271] = 542 EXACT c[272] = 544 EXACT c[273] = 546 EXACT c[274] = 548 EXACT c[275] = 550 EXACT c[276] = 552 EXACT c[277] = 554 EXACT c[278] = 556 EXACT c[279] = 558 EXACT c[280] = 560 EXACT c[281] = 562 EXACT c[282] = 564 EXACT c[283] = 566 EXACT c[284] = 568 EXACT c[285] = 570 EXACT c[286] = 572 EXACT c[287] = 574 EXACT c[288] = 576 EXACT c[289] = 578 EXACT c[290] = 580 EXACT c[291] = 582 EXACT c[292] = 584 EXACT c[293] = 586 EXACT c[294] = 588 EXACT c[295] = 590 EXACT c[296] = 592 EXACT c[297] = 594 EXACT c[298] = 596 EXACT c[299] = 598 EXACT c[300] = 600 EXACT c[301] = 602 EXACT c[302] = 604 EXACT c[303] = 606 EXACT c[304] = 608 EXACT c[305] = 610 EXACT c[306] = 612 EXACT c[307] = 614 EXACT c[308] = 616 EXACT c[309] = 618 EXACT c[310] = 620 EXACT c[311] = 622 EXACT c[312] = 624 EXACT c[313] = 626 EXACT c[314] = 628 EXACT c[315] = 630 EXACT c[316] = 632 EXACT c[317] = 634 EXACT c[318] = 636 EXACT c[319] = 638 EXACT c[320] = 640 EXACT c[321] = 642 EXACT c[322] = 644 EXACT c[323] = 646 EXACT c[324] = 648 EXACT c[325] = 650 EXACT c[326] = 652 EXACT c[327] = 654 EXACT c[328] = 656 EXACT c[329] = 658 EXACT c[330] = 660 EXACT c[331] = 662 EXACT c[332] = 664 EXACT c[333] = 666 EXACT c[334] = 668 EXACT c[335] = 670 EXACT c[336] = 672 EXACT c[337] = 674 EXACT c[338] = 676 EXACT c[339] = 678 EXACT c[340] = 680 EXACT c[341] = 682 EXACT c[342] = 684 EXACT c[343] = 686 EXACT c[344] = 688 EXACT c[345] = 690 EXACT c[346] = 692 EXACT c[347] = 694 EXACT c[348] = 696 EXACT c[349] = 698 EXACT c[350] = 700 EXACT c[351] = 702 EXACT c[352] = 704 EXACT c[353] = 706 EXACT c[354] = 708 EXACT c[355] = 710 EXACT c[356] = 712 EXACT c[357] = 714 EXACT c[358] = 716 EXACT c[359] = 718 EXACT c[360] = 720 EXACT c[361] = 722 EXACT c[362] = 724 EXACT c[363] = 726 EXACT c[364] = 728 EXACT c[365] = 730 EXACT c[366] = 732 EXACT c[367] = 734 EXACT c[368] = 736 EXACT c[369] = 738 EXACT c[370] = 740 EXACT c[371] = 742 EXACT c[372] = 744 EXACT c[373] = 746 EXACT c[374] = 748 EXACT c[375] = 750 EXACT c[376] = 752 EXACT c[377] = 754 EXACT c[378] = 756 EXACT c[379] = 758 EXACT c[380] = 760 EXACT c[381] = 762 EXACT c[382] = 764 EXACT c[383] = 766 EXACT c[384] = 768 EXACT c[385] = 770 EXACT c[386] = 772 EXACT c[387] = 774 EXACT c[388] = 776 EXACT c[389] = 778 EXACT c[390] = 780 EXACT c[391] = 782 EXACT c[392] = 784 EXACT c[393] = 786 EXACT c[394] = 788 EXACT c[395] = 790 EXACT c[396] = 792 EXACT c[397] = 794 EXACT c[398] = 796 EXACT c[399] = 798 EXACT c[400] = 800 EXACT c[401] = 802 EXACT c[402] = 804 EXACT c[403] = 806 EXACT c[404] = 808 EXACT c[405] = 810 EXACT c[406] = 812 EXACT c[407] = 814 EXACT c[408] = 816 EXACT c[409] = 818 EXACT c[410] = 820 EXACT c[411] = 822 EXACT c[412] = 824 EXACT c[413] = 826 EXACT c[414] = 828 EXACT c[415] = 830 EXACT c[416] = 832 EXACT c[417] = 834 EXACT c[418] = 836 EXACT c[419] = 838 EXACT c[420] = 840 EXACT c[421] = 842 EXACT c[422] = 844 EXACT c[423] = 846 EXACT c[424] = 848 EXACT c[425] = 850 EXACT c[426] = 852 EXACT c[427] = 854 EXACT c[428] = 856 EXACT c[429] = 858 EXACT c[430] = 860 EXACT c[431] = 862 EXACT c[432] = 864 EXACT c[433] = 866 EXACT c[434] = 868 EXACT c[435] = 870 EXACT c[436] = 872 EXACT c[437] = 874 EXACT c[438] = 876 EXACT c[439] = 878 EXACT c[440] = 880 EXACT c[441] = 882 EXACT c[442] = 884 EXACT c[443] = 886 EXACT c[444] = 888 EXACT c[445] = 890 EXACT c[446] = 892 EXACT c[447] = 894 EXACT c[448] = 896 EXACT c[449] = 898 EXACT c[450] = 900 EXACT c[451] = 902 EXACT c[452] = 904 EXACT c[453] = 906 EXACT c[454] = 908 EXACT c[455] = 910 EXACT c[456] = 912 EXACT c[457] = 914 EXACT c[458] = 916 EXACT c[459] = 918 EXACT c[460] = 920 EXACT c[461] = 922 EXACT c[462] = 924 EXACT c[463] = 926 EXACT c[464] = 928 EXACT c[465] = 930 EXACT c[466] = 932 EXACT c[467] = 934 EXACT c[468] = 936 EXACT c[469] = 938 EXACT c[470] = 940 EXACT c[471] = 942 EXACT c[472] = 944 EXACT c[473] = 946 EXACT c[474] = 948 EXACT c[475] = 950 EXACT c[476] = 952 EXACT c[477] = 954 EXACT c[478] = 956 EXACT c[479] = 958 EXACT c[480] = 960 EXACT c[481] = 962 EXACT c[482] = 964 EXACT c[483] = 966 EXACT c[484] = 968 EXACT c[485] = 970 EXACT c[486] = 972 EXACT c[487] = 974 EXACT c[488] = 976 EXACT c[489] = 978 EXACT c[490] = 980 EXACT c[491] = 982 EXACT c[492] = 984 EXACT c[493] = 986 EXACT c[494] = 988 EXACT c[495] = 990 EXACT c[496] = 992 EXACT c[497] = 994 EXACT c[498] = 996 EXACT c[499] = 998 EXACT c[500] = 1000 EXACT c[501] = 1002 EXACT c[502] = 1004 EXACT c[503] = 1006 EXACT c[504] = 1008 EXACT c[505] = 1010 EXACT c[506] = 1012 EXACT c[507] = 1014 EXACT c[508] = 1016 EXACT c[509] = 1018 EXACT c[510] = 1020 EXACT c[511] = 1022 EXACT c[512] = 1024 EXACT c[513] = 1026 EXACT c[514] = 1028 EXACT c[515] = 1030 EXACT c[516] = 1032 EXACT c[517] = 1034 EXACT c[518] = 1036 EXACT c[519] = 1038 EXACT c[520] = 1040 EXACT c[521] = 1042 EXACT c[522] = 1044 EXACT c[523] = 1046 EXACT c[524] = 1048 EXACT c[525] = 1050 EXACT c[526] = 1052 EXACT c[527] = 1054 EXACT c[528] = 1056 EXACT c[529] = 1058 EXACT c[530] = 1060 EXACT c[531] = 1062 EXACT c[532] = 1064 EXACT c[533] = 1066 EXACT c[534] = 1068 EXACT c[535] = 1070 EXACT c[536] = 1072 EXACT c[537] = 1074 EXACT c[538] = 1076 EXACT c[539] = 1078 EXACT c[540] = 1080 EXACT c[541] = 1082 EXACT c[542] = 1084 EXACT c[543] = 1086 EXACT c[544] = 1088 EXACT c[545] = 1090 EXACT c[546] = 1092 EXACT c[547] = 1094 EXACT c[548] = 1096 EXACT c[549] = 1098 EXACT c[550] = 1100 EXACT c[551] = 1102 EXACT c[552] = 1104 EXACT c[553] = 1106 EXACT c[554] = 1108 EXACT c[555] = 1110 EXACT c[556] = 1112 EXACT c[557] = 1114 EXACT c[558] = 1116 EXACT c[559] = 1118 EXACT c[560] = 1120 EXACT c[561] = 1122 EXACT c[562] = 1124 EXACT c[563] = 1126 EXACT c[564] = 1128 EXACT c[565] = 1130 EXACT c[566] = 1132 EXACT c[567] = 1134 EXACT c[568] = 1136 EXACT c[569] = 1138 EXACT c[570] = 1140 EXACT c[571] = 1142 EXACT c[572] = 1144 EXACT c[573] = 1146 EXACT c[574] = 1148 EXACT c[575] = 1150 EXACT c[576] = 1152 EXACT c[577] = 1154 EXACT c[578] = 1156 EXACT c[579] = 1158 EXACT c[580] = 1160 EXACT c[581] = 1162 EXACT c[582] = 1164 EXACT c[583] = 1166 EXACT c[584] = 1168 EXACT c[585] = 1170 EXACT c[586] = 1172 EXACT c[587] = 1174 EXACT c[588] = 1176 EXACT c[589] = 1178 EXACT c[590] = 1180 EXACT c[591] = 1182 EXACT c[592] = 1184 EXACT c[593] = 1186 EXACT c[594] = 1188 EXACT c[595] = 1190 EXACT c[596] = 1192 EXACT c[597] = 1194 EXACT c[598] = 1196 EXACT c[599] = 1198 EXACT c[600] = 1200 EXACT c[601] = 1202 EXACT c[602] = 1204 EXACT c[603] = 1206 EXACT c[604] = 1208 EXACT c[605] = 1210 EXACT c[606] = 1212 EXACT c[607] = 1214 EXACT c[608] = 1216 EXACT c[609] = 1218 EXACT c[610] = 1220 EXACT c[611] = 1222 EXACT c[612] = 1224 EXACT c[613] = 1226 EXACT c[614] = 1228 EXACT c[615] = 1230 EXACT c[616] = 1232 EXACT c[617] = 1234 EXACT c[618] = 1236 EXACT c[619] = 1238 EXACT c[620] = 1240 EXACT c[621] = 1242 EXACT c[622] = 1244 EXACT c[623] = 1246 EXACT c[624] = 1248 EXACT c[625] = 1250 EXACT c[626] = 1252 EXACT c[627] = 1254 EXACT c[628] = 1256 EXACT c[629] = 1258 EXACT c[630] = 1260 EXACT c[631] = 1262 EXACT c[632] = 1264 EXACT c[633] = 1266 EXACT c[634] = 1268 EXACT c[635] = 1270 EXACT c[636] = 1272 EXACT c[637] = 1274 EXACT c[638] = 1276 EXACT c[639] = 1278 EXACT c[640] = 1280 EXACT c[641] = 1282 EXACT c[642] = 1284 EXACT c[643] = 1286 EXACT c[644] = 1288 EXACT c[645] = 1290 EXACT c[646] = 1292 EXACT c[647] = 1294 EXACT c[648] = 1296 EXACT c[649] = 1298 EXACT c[650] = 1300 EXACT c[651] = 1302 EXACT c[652] = 1304 EXACT c[653] = 1306 EXACT c[654] = 1308 EXACT c[655] = 1310 EXACT c[656] = 1312 EXACT c[657] = 1314 EXACT c[658] = 1316 EXACT c[659] = 1318 EXACT c[660] = 1320 EXACT c[661] = 1322 EXACT c[662] = 1324 EXACT c[663] = 1326 EXACT c[664] = 1328 EXACT c[665] = 1330 EXACT c[666] = 1332 EXACT c[667] = 1334 EXACT c[668] = 1336 EXACT c[669] = 1338 EXACT c[670] = 1340 EXACT c[671] = 1342 EXACT c[672] = 1344 EXACT c[673] = 1346 EXACT c[674] = 1348 EXACT c[675] = 1350 EXACT c[676] = 1352 EXACT c[677] = 1354 EXACT c[678] = 1356 EXACT c[679] = 1358 EXACT c[680] = 1360 EXACT c[681] = 1362 EXACT c[682] = 1364 EXACT c[683] = 1366 EXACT c[684] = 1368 EXACT c[685] = 1370 EXACT c[686] = 1372 EXACT c[687] = 1374 EXACT c[688] = 1376 EXACT c[689] = 1378 EXACT c[690] = 1380 EXACT c[691] = 1382 EXACT c[692] = 1384 EXACT c[693] = 1386 EXACT c[694] = 1388 EXACT c[695] = 1390 EXACT c[696] = 1392 EXACT c[697] = 1394 EXACT c[698] = 1396 EXACT c[699] = 1398 EXACT c[700] = 1400 EXACT c[701] = 1402 EXACT c[702] = 1404 EXACT c[703] = 1406 EXACT c[704] = 1408 EXACT c[705] = 1410 EXACT c[706] = 1412 EXACT c[707] = 1414 EXACT c[708] = 1416 EXACT c[709] = 1418 EXACT c[710] = 1420 EXACT c[711] = 1422 EXACT c[712] = 1424 EXACT c[713] = 1426 EXACT c[714] = 1428 EXACT c[715] = 1430 EXACT c[716] = 1432 EXACT c[717] = 1434 EXACT c[718] = 1436 EXACT c[719] = 1438 EXACT c[720] = 1440 EXACT c[721] = 1442 EXACT c[722] = 1444 EXACT c[723] = 1446 EXACT c[724] = 1448 EXACT c[725] = 1450 EXACT c[726] = 1452 EXACT c[727] = 1454 EXACT c[728] = 1456 EXACT c[729] = 1458 EXACT c[730] = 1460 EXACT c[731] = 1462 EXACT c[732] = 1464 EXACT c[733] = 1466 EXACT c[734] = 1468 EXACT c[735] = 1470 EXACT c[736] = 1472 EXACT c[737] = 1474 EXACT c[738] = 1476 EXACT c[739] = 1478 EXACT c[740] = 1480 EXACT c[741] = 1482 EXACT c[742] = 1484 EXACT c[743] = 1486 EXACT c[744] = 1488 EXACT c[745] = 1490 EXACT c[746] = 1492 EXACT c[747] = 1494 EXACT c[748] = 1496 EXACT c[749] = 1498 EXACT c[750] = 1500 EXACT c[751] = 1502 EXACT c[752] = 1504 EXACT c[753] = 1506 EXACT c[754] = 1508 EXACT c[755] = 1510 EXACT c[756] = 1512 EXACT c[757] = 1514 EXACT c[758] = 1516 EXACT c[759] = 1518 EXACT c[760] = 1520 EXACT c[761] = 1522 EXACT c[762] = 1524 EXACT c[763] = 1526 EXACT c[764] = 1528 EXACT c[765] = 1530 EXACT c[766] = 1532 EXACT c[767] = 1534 EXACT c[768] = 1536 EXACT c[769] = 1538 EXACT c[770] = 1540 EXACT c[771] = 1542 EXACT c[772] = 1544 EXACT c[773] = 1546 EXACT c[774] = 1548 EXACT c[775] = 1550 EXACT c[776] = 1552 EXACT c[777] = 1554 EXACT c[778] = 1556 EXACT c[779] = 1558 EXACT c[780] = 1560 EXACT c[781] = 1562 EXACT c[782] = 1564 EXACT c[783] = 1566 EXACT c[784] = 1568 EXACT c[785] = 1570 EXACT c[786] = 1572 EXACT c[787] = 1574 EXACT c[788] = 1576 EXACT c[789] = 1578 EXACT c[790] = 1580 EXACT c[791] = 1582 EXACT c[792] = 1584 EXACT c[793] = 1586 EXACT c[794] = 1588 EXACT c[795] = 1590 EXACT c[796] = 1592 EXACT c[797] = 1594 EXACT c[798] = 1596 EXACT c[799] = 1598 EXACT c[800] = 1600 EXACT c[801] = 1602 EXACT c[802] = 1604 EXACT c[803] = 1606 EXACT c[804] = 1608 EXACT c[805] = 1610 EXACT c[806] = 1612 EXACT c[807] = 1614 EXACT c[808] = 1616 EXACT c[809] = 1618 EXACT c[810] = 1620 EXACT c[811] = 1622 EXACT c[812] = 1624 EXACT c[813] = 1626 EXACT c[814] = 1628 EXACT c[815] = 1630 EXACT c[816] = 1632 EXACT c[817] = 1634 EXACT c[818] = 1636 EXACT c[819] = 1638 EXACT c[820] = 1640 EXACT c[821] = 1642 EXACT c[822] = 1644 EXACT c[823] = 1646 EXACT c[824] = 1648 EXACT c[825] = 1650 EXACT c[826] = 1652 EXACT c[827] = 1654 EXACT c[828] = 1656 EXACT c[829] = 1658 EXACT c[830] = 1660 EXACT c[831] = 1662 EXACT c[832] = 1664 EXACT c[833] = 1666 EXACT c[834] = 1668 EXACT c[835] = 1670 EXACT c[836] = 1672 EXACT c[837] = 1674 EXACT c[838] = 1676 EXACT c[839] = 1678 EXACT c[840] = 1680 EXACT c[841] = 1682 EXACT c[842] = 1684 EXACT c[843] = 1686 EXACT c[844] = 1688 EXACT c[845] = 1690 EXACT c[846] = 1692 EXACT c[847] = 1694 EXACT c[848] = 1696 EXACT c[849] = 1698 EXACT c[850] = 1700 EXACT c[851] = 1702 EXACT c[852] = 1704 EXACT c[853] = 1706 EXACT c[854] = 1708 EXACT c[855] = 1710 EXACT c[856] = 1712 EXACT c[857] = 1714 EXACT c[858] = 1716 EXACT c[859] = 1718 EXACT c[860] = 1720 EXACT c[861] = 1722 EXACT c[862] = 1724 EXACT c[863] = 1726 EXACT c[864] = 1728 EXACT c[865] = 1730 EXACT c[866] = 1732 EXACT c[867] = 1734 EXACT c[868] = 1736 EXACT c[869] = 1738 EXACT c[870] = 1740 EXACT c[871] = 1742 EXACT c[872] = 1744 EXACT c[873] = 1746 EXACT c[874] = 1748 EXACT c[875] = 1750 EXACT c[876] = 1752 EXACT c[877] = 1754 EXACT c[878] = 1756 EXACT c[879] = 1758 EXACT c[880] = 1760 EXACT c[881] = 1762 EXACT c[882] = 1764 EXACT c[883] = 1766 EXACT c[884] = 1768 EXACT c[885] = 1770 EXACT c[886] = 1772 EXACT c[887] = 1774 EXACT c[888] = 1776 EXACT c[889] = 1778 EXACT c[890] = 1780 EXACT c[891] = 1782 EXACT c[892] = 1784 EXACT c[893] = 1786 EXACT c[894] = 1788 EXACT c[895] = 1790 EXACT c[896] = 1792 EXACT c[897] = 1794 EXACT c[898] = 1796 EXACT c[899] = 1798 EXACT c[900] = 1800 EXACT c[901] = 1802 EXACT c[902] = 1804 EXACT c[903] = 1806 EXACT c[904] = 1808 EXACT c[905] = 1810 EXACT c[906] = 1812 EXACT c[907] = 1814 EXACT c[908] = 1816 EXACT c[909] = 1818 EXACT c[910] = 1820 EXACT c[911] = 1822 EXACT c[912] = 1824 EXACT c[913] = 1826 EXACT c[914] = 1828 EXACT c[915] = 1830 EXACT c[916] = 1832 EXACT c[917] = 1834 EXACT c[918] = 1836 EXACT c[919] = 1838 EXACT c[920] = 1840 EXACT c[921] = 1842 EXACT c[922] = 1844 EXACT c[923] = 1846 EXACT c[924] = 1848 EXACT c[925] = 1850 EXACT c[926] = 1852 EXACT c[927] = 1854 EXACT c[928] = 1856 EXACT c[929] = 1858 EXACT c[930] = 1860 EXACT c[931] = 1862 EXACT c[932] = 1864 EXACT c[933] = 1866 EXACT c[934] = 1868 EXACT c[935] = 1870 EXACT c[936] = 1872 EXACT c[937] = 1874 EXACT c[938] = 1876 EXACT c[939] = 1878 EXACT c[940] = 1880 EXACT c[941] = 1882 EXACT c[942] = 1884 EXACT c[943] = 1886 EXACT c[944] = 1888 EXACT c[945] = 1890 EXACT c[946] = 1892 EXACT c[947] = 1894 EXACT c[948] = 1896 EXACT c[949] = 1898 EXACT c[950] = 1900 EXACT c[951] = 1902 EXACT c[952] = 1904 EXACT c[953] = 1906 EXACT c[954] = 1908 EXACT c[955] = 1910 EXACT c[956] = 1912 EXACT c[957] = 1914 EXACT c[958] = 1916 EXACT c[959] = 1918 EXACT c[960] = 1920 EXACT c[961] = 1922 EXACT c[962] = 1924 EXACT c[963] = 1926 EXACT c[964] = 1928 EXACT c[965] = 1930 EXACT c[966] = 1932 EXACT c[967] = 1934 EXACT c[968] = 1936 EXACT c[969] = 1938 EXACT c[970] = 1940 EXACT c[971] = 1942 EXACT c[972] = 1944 EXACT c[973] = 1946 EXACT c[974] = 1948 EXACT c[975] = 1950 EXACT c[976] = 1952 EXACT c[977] = 1954 EXACT c[978] = 1956 EXACT c[979] = 1958 EXACT c[980] = 1960 EXACT c[981] = 1962 EXACT c[982] = 1964 EXACT c[983] = 1966 EXACT c[984] = 1968 EXACT c[985] = 1970 EXACT c[986] = 1972 EXACT c[987] = 1974 EXACT c[988] = 1976 EXACT c[989] = 1978 EXACT c[990] = 1980 EXACT c[991] = 1982 EXACT c[992] = 1984 EXACT c[993] = 1986 EXACT c[994] = 1988 EXACT c[995] = 1990 EXACT c[996] = 1992 EXACT c[997] = 1994 EXACT c[998] = 1996 EXACT c[999] = 1998 EXACT c[1000] = 2000 EXACT c[1001] = 2002 EXACT c[1002] = 2004 EXACT c[1003] = 2006 EXACT c[1004] = 2008 EXACT c[1005] = 2010 EXACT c[1006] = 2012 EXACT c[1007] = 2014 EXACT c[1008] = 2016 EXACT c[1009] = 2018 EXACT c[1010] = 2020 EXACT c[1011] = 2022 EXACT c[1012] = 2024 EXACT c[1013] = 2026 EXACT c[1014] = 2028 EXACT c[1015] = 2030 EXACT c[1016] = 2032 EXACT c[1017] = 2034 EXACT c[1018] = 2036 EXACT c[1019] = 2038 EXACT c[1020] = 2040 EXACT c[1021] = 2042 EXACT c[1022] = 2044 EXACT c[1023] = 2046 Oclgrind-21.10/tests/kernels/misc/vecadd.sim000066400000000000000000000001601413315665100207600ustar00rootroot00000000000000vecadd.cl vecadd 1024 1 1 16 1 1 Oclgrind-21.10/tests/kernels/misc/vector_argument.cl000066400000000000000000000001331413315665100225440ustar00rootroot00000000000000kernel void vector_argument(int4 vector, global int4 *output) { *output = vector + 42; } Oclgrind-21.10/tests/kernels/misc/vector_argument.ref000066400000000000000000000001761413315665100227310ustar00rootroot00000000000000EXACT Argument 'output': 16 bytes EXACT output[0] = 49 EXACT output[1] = 84 EXACT output[2] = 42 EXACT output[3] = 41 Oclgrind-21.10/tests/kernels/misc/vector_argument.sim000066400000000000000000000001331413315665100227360ustar00rootroot00000000000000vector_argument.cl vector_argument 4 1 1 1 1 1 7 42 0 -1 Oclgrind-21.10/tests/kernels/uninitialized/000077500000000000000000000000001413315665100207405ustar00rootroot00000000000000Oclgrind-21.10/tests/kernels/uninitialized/padded_nested_struct_memcpy.cl000066400000000000000000000004451413315665100270240ustar00rootroot00000000000000struct T { char a; int b; char c; }; struct S { char a; int b; char c; struct T d; }; kernel void padded_nested_struct_memcpy(global struct S *output) { struct S s; s.a = 1; s.b = 0x02000002; s.c = 3; s.d.a = 4; s.d.b = 0x05000005; s.d.c = 6; *output = s; } Oclgrind-21.10/tests/kernels/uninitialized/padded_nested_struct_memcpy.ref000066400000000000000000000010641413315665100272000ustar00rootroot00000000000000EXACT Argument 'output': 24 bytes EXACT output[0] = 1 MATCH output[1] = MATCH output[2] = MATCH output[3] = EXACT output[4] = 2 EXACT output[5] = 0 EXACT output[6] = 0 EXACT output[7] = 2 EXACT output[8] = 3 MATCH output[9] = MATCH output[10] = MATCH output[11] = EXACT output[12] = 4 MATCH output[13] = MATCH output[14] = MATCH output[15] = EXACT output[16] = 5 EXACT output[17] = 0 EXACT output[18] = 0 EXACT output[19] = 5 EXACT output[20] = 6 MATCH output[21] = MATCH output[22] = MATCH output[23] = Oclgrind-21.10/tests/kernels/uninitialized/padded_nested_struct_memcpy.sim000066400000000000000000000001431413315665100272110ustar00rootroot00000000000000padded_nested_struct_memcpy.cl padded_nested_struct_memcpy 1 1 1 1 1 1 Oclgrind-21.10/tests/kernels/uninitialized/padded_struct_alloca_fp.cl000066400000000000000000000002711413315665100261050ustar00rootroot00000000000000struct S { char a; int b; char c; }; kernel void padded_struct_alloca_fp(global struct S *output) { struct S s; s.a = 42; s.b = 0xF9FFFFF9; s.c = 127; *output = s; } Oclgrind-21.10/tests/kernels/uninitialized/padded_struct_alloca_fp.ref000066400000000000000000000004631413315665100262660ustar00rootroot00000000000000EXACT Argument 'output': 12 bytes EXACT output[0] = 42 EXACT output[1] = 0 EXACT output[2] = 0 EXACT output[3] = 0 EXACT output[4] = -7 EXACT output[5] = -1 EXACT output[6] = -1 EXACT output[7] = -7 EXACT output[8] = 127 EXACT output[9] = 0 EXACT output[10] = 0 EXACT output[11] = 0 Oclgrind-21.10/tests/kernels/uninitialized/padded_struct_alloca_fp.sim000066400000000000000000000001331413315665100262740ustar00rootroot00000000000000padded_struct_alloca_fp.cl padded_struct_alloca_fp 1 1 1 1 1 1 Oclgrind-21.10/tests/kernels/uninitialized/padded_struct_memcpy_fp.cl000066400000000000000000000005771413315665100261550ustar00rootroot00000000000000struct S { char a; int b; char c; }; kernel void padded_struct_memcpy_fp(local struct S *scratch, global struct S *output) { int lid = get_local_id(0); struct S s; s.a = 42; s.b = 0xF9FFFFF9; s.c = 127; if (lid == 0) { *scratch = s; } barrier(CLK_LOCAL_MEM_FENCE); if (lid == 1) { *output = *scratch; } } Oclgrind-21.10/tests/kernels/uninitialized/padded_struct_memcpy_fp.ref000066400000000000000000000004631413315665100263250ustar00rootroot00000000000000EXACT Argument 'output': 12 bytes EXACT output[0] = 42 EXACT output[1] = 0 EXACT output[2] = 0 EXACT output[3] = 0 EXACT output[4] = -7 EXACT output[5] = -1 EXACT output[6] = -1 EXACT output[7] = -7 EXACT output[8] = 127 EXACT output[9] = 0 EXACT output[10] = 0 EXACT output[11] = 0 Oclgrind-21.10/tests/kernels/uninitialized/padded_struct_memcpy_fp.sim000066400000000000000000000001531413315665100263350ustar00rootroot00000000000000padded_struct_memcpy_fp.cl padded_struct_memcpy_fp 2 1 1 2 1 1 Oclgrind-21.10/tests/kernels/uninitialized/partially_uninitialized_fract.cl000066400000000000000000000002161413315665100273670ustar00rootroot00000000000000__kernel void partially_uninitialized_fract(__global float4 *output) { float4 f; f.xzw = 4.2; *(output + 1) = fract(f, output); } Oclgrind-21.10/tests/kernels/uninitialized/partially_uninitialized_fract.ref000066400000000000000000000004131413315665100275440ustar00rootroot00000000000000ERROR Uninitialized value ERROR Uninitialized value EXACT Argument 'output': 32 bytes EXACT output[0] = 4 MATCH output[1] = EXACT output[2] = 4 EXACT output[3] = 4 EXACT output[4] = 0.2 MATCH output[5] = EXACT output[6] = 0.2 EXACT output[7] = 0.2 Oclgrind-21.10/tests/kernels/uninitialized/partially_uninitialized_fract.sim000066400000000000000000000001421413315665100275570ustar00rootroot00000000000000partially_uninitialized_fract.cl partially_uninitialized_fract 1 1 1 1 1 1 Oclgrind-21.10/tests/kernels/uninitialized/private_array_initializer_list.cl000066400000000000000000000002651413315665100275710ustar00rootroot00000000000000kernel void private_array_initializer_list(global float *output) { float scratch[4] = {7.f, 42.f, -1.f, 0.f}; for (int i = 0; i < 4; i++) { output[i] = scratch[i]; } } Oclgrind-21.10/tests/kernels/uninitialized/private_array_initializer_list.ref000066400000000000000000000001741413315665100277460ustar00rootroot00000000000000EXACT Argument 'output': 16 bytes EXACT output[0] = 7 EXACT output[1] = 42 EXACT output[2] = -1 EXACT output[3] = 0 Oclgrind-21.10/tests/kernels/uninitialized/private_array_initializer_list.sim000066400000000000000000000001441413315665100277570ustar00rootroot00000000000000private_array_initializer_list.cl private_array_initializer_list 1 1 1 1 1 1 Oclgrind-21.10/tests/kernels/uninitialized/uninitialized_address.cl000066400000000000000000000002161413315665100256340ustar00rootroot00000000000000__kernel void uninitialized_address(__global ulong *output) { int a[] = {1, 2, 3}; volatile int i, j; a[i] = 4; output[0] = a[j]; } Oclgrind-21.10/tests/kernels/uninitialized/uninitialized_address.ref000066400000000000000000000001571413315665100260160ustar00rootroot00000000000000ERROR Uninitialized address ERROR Uninitialized address EXACT Argument 'output': 8 bytes MATCH output[0] = Oclgrind-21.10/tests/kernels/uninitialized/uninitialized_address.sim000066400000000000000000000001211413315665100260210ustar00rootroot00000000000000uninitialized_address.cl uninitialized_address 1 1 1 1 1 1 Oclgrind-21.10/tests/kernels/uninitialized/uninitialized_global_buffer.cl000066400000000000000000000002441413315665100270010ustar00rootroot00000000000000kernel void uninitialized_global_buffer(global float *input, global float *output) { output[get_global_id(0)] = *input; } Oclgrind-21.10/tests/kernels/uninitialized/uninitialized_global_buffer.ref000066400000000000000000000001221413315665100271520ustar00rootroot00000000000000ERROR Uninitialized value EXACT Argument 'output': 4 bytes EXACT output[0] = 0 Oclgrind-21.10/tests/kernels/uninitialized/uninitialized_global_buffer.sim000066400000000000000000000001561413315665100271750ustar00rootroot00000000000000uninitialized_global_buffer.cl uninitialized_global_buffer 1 1 1 1 1 1 Oclgrind-21.10/tests/kernels/uninitialized/uninitialized_local_array.cl000066400000000000000000000003151413315665100264770ustar00rootroot00000000000000kernel void uninitialized_local_array(global float *output) { local float scratch[16]; int i = get_local_id(0); if (i != get_local_size(0)/2) { scratch[i] = i; } output[i] = scratch[i]; } Oclgrind-21.10/tests/kernels/uninitialized/uninitialized_local_array.ref000066400000000000000000000006511413315665100266600ustar00rootroot00000000000000ERROR Uninitialized value EXACT Argument 'output': 64 bytes EXACT output[0] = 0 EXACT output[1] = 1 EXACT output[2] = 2 EXACT output[3] = 3 EXACT output[4] = 4 EXACT output[5] = 5 EXACT output[6] = 6 EXACT output[7] = 7 EXACT output[8] = 0 EXACT output[9] = 9 EXACT output[10] = 10 EXACT output[11] = 11 EXACT output[12] = 12 EXACT output[13] = 13 EXACT output[14] = 14 EXACT output[15] = 15 Oclgrind-21.10/tests/kernels/uninitialized/uninitialized_local_array.sim000066400000000000000000000001341413315665100266700ustar00rootroot00000000000000uninitialized_local_array.cl uninitialized_local_array 16 1 1 16 1 1 Oclgrind-21.10/tests/kernels/uninitialized/uninitialized_local_ptr.cl000066400000000000000000000003051413315665100261650ustar00rootroot00000000000000kernel void uninitialized_local_ptr(local float *scratch, global float *output) { int i = get_local_id(0); if (i != get_local_size(0)/2) { scratch[i] = i; } output[i] = scratch[i]; } Oclgrind-21.10/tests/kernels/uninitialized/uninitialized_local_ptr.ref000066400000000000000000000006511413315665100263470ustar00rootroot00000000000000ERROR Uninitialized value EXACT Argument 'output': 64 bytes EXACT output[0] = 0 EXACT output[1] = 1 EXACT output[2] = 2 EXACT output[3] = 3 EXACT output[4] = 4 EXACT output[5] = 5 EXACT output[6] = 6 EXACT output[7] = 7 EXACT output[8] = 0 EXACT output[9] = 9 EXACT output[10] = 10 EXACT output[11] = 11 EXACT output[12] = 12 EXACT output[13] = 13 EXACT output[14] = 14 EXACT output[15] = 15 Oclgrind-21.10/tests/kernels/uninitialized/uninitialized_local_ptr.sim000066400000000000000000000001431413315665100263570ustar00rootroot00000000000000uninitialized_local_ptr.cl uninitialized_local_ptr 16 1 1 16 1 1 Oclgrind-21.10/tests/kernels/uninitialized/uninitialized_local_variable.cl000066400000000000000000000002031413315665100271420ustar00rootroot00000000000000kernel void uninitialized_local_variable(global int *output) { local int x; if (*output > 0) x = *output; *output = x; } Oclgrind-21.10/tests/kernels/uninitialized/uninitialized_local_variable.ref000066400000000000000000000001221413315665100273200ustar00rootroot00000000000000ERROR Uninitialized value EXACT Argument 'output': 4 bytes EXACT output[0] = 0 Oclgrind-21.10/tests/kernels/uninitialized/uninitialized_local_variable.sim000066400000000000000000000001371413315665100273420ustar00rootroot00000000000000uninitialized_local_variable.cl uninitialized_local_variable 1 1 1 1 1 1 Oclgrind-21.10/tests/kernels/uninitialized/uninitialized_packed_struct_memcpy.cl000066400000000000000000000003571413315665100304220ustar00rootroot00000000000000struct __attribute__ ((packed)) S { char a; int b __attribute__ ((packed)); char c; }; kernel void uninitialized_packed_struct_memcpy(local int *scratch, global struct S *output) { struct S s = {1, *scratch, 2}; *output = s; } Oclgrind-21.10/tests/kernels/uninitialized/uninitialized_packed_struct_memcpy.ref000066400000000000000000000002741413315665100305760ustar00rootroot00000000000000ERROR Uninitialized value EXACT Argument 'output': 6 bytes EXACT output[0] = 1 MATCH output[1] = MATCH output[2] = MATCH output[3] = MATCH output[4] = EXACT output[5] = 2 Oclgrind-21.10/tests/kernels/uninitialized/uninitialized_packed_struct_memcpy.sim000066400000000000000000000001721413315665100306070ustar00rootroot00000000000000uninitialized_packed_struct_memcpy.cl uninitialized_packed_struct_memcpy 1 1 1 1 1 1 Oclgrind-21.10/tests/kernels/uninitialized/uninitialized_padded_nested_struct_memcpy.cl000066400000000000000000000004231413315665100317500ustar00rootroot00000000000000struct T { char a; int b; char c; }; struct S { char a; int b; char c; struct T d; }; kernel void uninitialized_padded_nested_struct_memcpy(local int *scratch, global struct S *output) { struct S s = {1, 0x02000002, 3, {4, *scratch, 5}}; *output = s; } Oclgrind-21.10/tests/kernels/uninitialized/uninitialized_padded_nested_struct_memcpy.ref000066400000000000000000000011131413315665100321230ustar00rootroot00000000000000ERROR Uninitialized value EXACT Argument 'output': 24 bytes EXACT output[0] = 1 MATCH output[1] = MATCH output[2] = MATCH output[3] = EXACT output[4] = 2 EXACT output[5] = 0 EXACT output[6] = 0 EXACT output[7] = 2 EXACT output[8] = 3 MATCH output[9] = MATCH output[10] = MATCH output[11] = EXACT output[12] = 4 MATCH output[13] = MATCH output[14] = MATCH output[15] = MATCH output[16] = MATCH output[17] = MATCH output[18] = MATCH output[19] = EXACT output[20] = 5 MATCH output[21] = MATCH output[22] = MATCH output[23] = Oclgrind-21.10/tests/kernels/uninitialized/uninitialized_padded_nested_struct_memcpy.sim000066400000000000000000000002111413315665100321350ustar00rootroot00000000000000uninitialized_padded_nested_struct_memcpy.cl uninitialized_padded_nested_struct_memcpy 1 1 1 1 1 1 Oclgrind-21.10/tests/kernels/uninitialized/uninitialized_padded_struct_memcpy.cl000066400000000000000000000002751413315665100304130ustar00rootroot00000000000000struct S { char a; int b; char c; }; kernel void uninitialized_padded_struct_memcpy(local int *scratch, global struct S *output) { struct S s = {1, *scratch, 2}; *output = s; } Oclgrind-21.10/tests/kernels/uninitialized/uninitialized_padded_struct_memcpy.ref000066400000000000000000000004771413315665100305750ustar00rootroot00000000000000ERROR Uninitialized value EXACT Argument 'output': 12 bytes EXACT output[0] = 1 EXACT output[1] = 0 EXACT output[2] = 0 EXACT output[3] = 0 MATCH output[4] = MATCH output[5] = MATCH output[6] = MATCH output[7] = EXACT output[8] = 2 EXACT output[9] = 0 EXACT output[10] = 0 EXACT output[11] = 0 Oclgrind-21.10/tests/kernels/uninitialized/uninitialized_padded_struct_memcpy.sim000066400000000000000000000001731413315665100306020ustar00rootroot00000000000000uninitialized_padded_struct_memcpy.cl uninitialized_padded_struct_memcpy 1 1 1 1 1 1 Oclgrind-21.10/tests/kernels/uninitialized/uninitialized_private_array.cl000066400000000000000000000005311413315665100270570ustar00rootroot00000000000000kernel void uninitialized_private_array(global uint *indices, global float *input, global float *output) { float scratch[4]; for (int i = 0; i < 4; i++) { scratch[indices[i]] = i; } for (int i = 0; i < 4; i++) { output[i] = scratch[i]; } } Oclgrind-21.10/tests/kernels/uninitialized/uninitialized_private_array.ref000066400000000000000000000002251413315665100272350ustar00rootroot00000000000000ERROR Uninitialized value EXACT Argument 'output': 16 bytes EXACT output[0] = 0 EXACT output[1] = 2 EXACT output[2] = 0 EXACT output[3] = 3 Oclgrind-21.10/tests/kernels/uninitialized/uninitialized_private_array.sim000066400000000000000000000002101413315665100272430ustar00rootroot00000000000000uninitialized_private_array.cl uninitialized_private_array 1 1 1 1 1 1 0 1 1 3 Oclgrind-21.10/tests/kernels/wait_event/000077500000000000000000000000001413315665100202355ustar00rootroot00000000000000Oclgrind-21.10/tests/kernels/wait_event/wait_event_chained.cl000066400000000000000000000005261413315665100244000ustar00rootroot00000000000000kernel void wait_event_chained(global int *data, local int *scratch) { event_t event; event = async_work_group_copy(scratch, data, 1, 0); for (int i = 1; i < 4; i++) { async_work_group_copy(scratch+i, data+i, 1, event); } wait_group_events(1, &event); int i = get_local_id(0); data[get_local_size(0)-i-1] = scratch[i]; } Oclgrind-21.10/tests/kernels/wait_event/wait_event_chained.ref000066400000000000000000000001601413315665100245500ustar00rootroot00000000000000EXACT Argument 'data': 16 bytes EXACT data[0] = 3 EXACT data[1] = 2 EXACT data[2] = 1 EXACT data[3] = 0 Oclgrind-21.10/tests/kernels/wait_event/wait_event_chained.sim000066400000000000000000000001331413315665100245640ustar00rootroot00000000000000wait_event_chained.cl wait_event_chained 4 1 1 4 1 1 Oclgrind-21.10/tests/kernels/wait_event/wait_event_divergent.cl000066400000000000000000000005661413315665100250000ustar00rootroot00000000000000kernel void wait_event_divergent(global int *data, local int *scratch) { int i = get_local_id(0); scratch[i] = 0; barrier(CLK_LOCAL_MEM_FENCE); event_t events[2]; events[0] = async_work_group_copy(scratch, data, 1, 0); events[1] = async_work_group_copy(scratch+1, data+1, 1, 0); wait_group_events(1, events+i); data[get_local_size(0)-i-1] = scratch[i]; } Oclgrind-21.10/tests/kernels/wait_event/wait_event_divergent.ref000066400000000000000000000002531413315665100251470ustar00rootroot00000000000000ERROR Work-group divergence detected (barrier) ERROR Work-item finished without waiting for events EXACT Argument 'data': 8 bytes EXACT data[0] = 0 EXACT data[1] = 0 Oclgrind-21.10/tests/kernels/wait_event/wait_event_divergent.sim000066400000000000000000000001351413315665100251620ustar00rootroot00000000000000wait_event_divergent.cl wait_event_divergent 2 1 1 2 1 1 Oclgrind-21.10/tests/kernels/wait_event/wait_event_duplicates.cl000066400000000000000000000005641413315665100251440ustar00rootroot00000000000000kernel void wait_event_duplicates(global int *data, local int *scratch) { event_t events[4]; events[0] = async_work_group_copy(scratch, data, 1, 0); events[1] = events[0]; events[2] = async_work_group_copy(scratch+1, data+1, 3, 0); events[3] = events[0]; wait_group_events(4, events); int i = get_local_id(0); data[get_local_size(0)-i-1] = scratch[i]; } Oclgrind-21.10/tests/kernels/wait_event/wait_event_duplicates.ref000066400000000000000000000001601413315665100253120ustar00rootroot00000000000000EXACT Argument 'data': 16 bytes EXACT data[0] = 3 EXACT data[1] = 2 EXACT data[2] = 1 EXACT data[3] = 0 Oclgrind-21.10/tests/kernels/wait_event/wait_event_duplicates.sim000066400000000000000000000001411413315665100253250ustar00rootroot00000000000000wait_event_duplicates.cl wait_event_duplicates 4 1 1 4 1 1 Oclgrind-21.10/tests/kernels/wait_event/wait_event_invalid.cl000066400000000000000000000001521413315665100244260ustar00rootroot00000000000000kernel void wait_event_invalid(global int *data) { event_t event = 0; wait_group_events(1, &event); } Oclgrind-21.10/tests/kernels/wait_event/wait_event_invalid.ref000066400000000000000000000002121413315665100246010ustar00rootroot00000000000000ERROR Invalid wait event EXACT Argument 'data': 16 bytes EXACT data[0] = 0 EXACT data[1] = 1 EXACT data[2] = 2 EXACT data[3] = 3 Oclgrind-21.10/tests/kernels/wait_event/wait_event_invalid.sim000066400000000000000000000001211413315665100246140ustar00rootroot00000000000000wait_event_invalid.cl wait_event_invalid 4 1 1 4 1 1 Oclgrind-21.10/tests/run_test.py000066400000000000000000000112311413315665100166400ustar00rootroot00000000000000# run_test.py (Oclgrind) # Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, # University of Bristol. All rights reserved. # # This program is provided under a three-clause BSD license. For full # license terms please see the LICENSE file distributed with this # source code. import errno import os import re import subprocess import sys # Check arguments if len(sys.argv) != 3: print('Usage: python run_test.py OCLGRIND-EXE TEST_EXE|TEST.sim') sys.exit(1) if not os.path.isfile(sys.argv[2]): print('Test file not found') sys.exit(1) # Construct paths to test inputs/outputs oclgrind_exe = sys.argv[1] test_full_path = sys.argv[2] test_dir = os.path.dirname(os.path.realpath(test_full_path)) test_file = os.path.basename(test_full_path) test_name = os.path.splitext(test_file)[0] current_dir = os.getcwd() if test_file.endswith('.sim'): test_inp = test_full_path[:-4] + '.inp' test_ref = test_full_path[:-4] + '.ref' else: if test_full_path[0] == '/': rel_path = test_full_path[test_full_path.find('/tests/') + 7:] else: rel_path = test_full_path test_inp = os.path.dirname(os.path.abspath(__file__)) + os.path.sep \ + rel_path + '.inp' test_ref = os.path.dirname(os.path.abspath(__file__)) + os.path.sep \ + rel_path + '.ref' # Enable race detection and uninitialized memory plugins os.environ["OCLGRIND_CHECK_API"] = "1" os.environ["OCLGRIND_DATA_RACES"] = "1" os.environ["OCLGRIND_UNINITIALIZED"] = "1" def fail(ret=1): print('FAILED') sys.exit(ret) def run(output_suffix): # Get filename for test output if test_file.endswith('.sim'): test_out = test_dir.split(os.path.sep)[-1] + os.path.sep + \ test_name + output_suffix + '.out' else: test_out = test_dir + os.path.sep + \ test_name + output_suffix + '.out' output_dir = os.path.dirname(test_out) try: os.makedirs(output_dir) except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir(output_dir): pass else: raise out = open(test_out, 'w') try: inp = open(test_inp, 'r') except: inp = None # Run test if test_file.endswith('.sim'): os.chdir(test_dir) cmd = [oclgrind_exe] # Add any additional arguments specified in the test file first_line = open(test_file).readline()[:-1] if first_line[:7] == '# ARGS:': cmd.extend(first_line[8:].split(' ')) cmd.append(test_file) retval = subprocess.call(cmd, stdout=out, stderr=out, stdin=inp) os.chdir(current_dir) else: retval = subprocess.call([oclgrind_exe,test_full_path], stdout=out, stderr=out, stdin=inp) out.close() if retval != 0: print('Test returned non-zero value (' + str(retval) + ')') fail(retval) # Compare output to reference file (if provided) if os.path.isfile(test_ref): # Open output and reference files out = open(test_out).read().splitlines() ref = open(test_ref).read().splitlines() # Check output matches references oi = 0 for line in ref: if len(line) == 0: continue type = line.split()[0] text = line[6:] # Find next non-blank line in output file while True: if oi >= len(out): print('Unexpected end of output when matching ' + line) fail() if len(out[oi]): break oi += 1 if type == 'ERROR': # Check first line of error contains reference message if not text in out[oi]: print('Expected ' + line) print('Found "' + out[oi] + '"') fail() # Skip remaining lines of error while oi < len(out) and len(out[oi]): oi += 1 elif type == 'EXACT': # Check line of output matches reference exactly if not text == out[oi]: print('Expected ' + line) print('Found "' + out[oi] + '"') fail() oi += 1 elif type == 'MATCH': # Check line of output contains reference text if not text in out[oi]: print('Expected ' + line) print('Found "' + out[oi] + '"') fail() oi += 1 else: print('Invalid match type in reference file') fail() # Check there are no more lines in output while oi < len(out): if len(out[oi]) > 0: print('Unexpected output after all matches completed (line %d):' % oi) print(out[oi]) fail() oi += 1 print('Running test with optimisations') run('') print('PASSED') print('') print('Running test without optimisations') os.environ["OCLGRIND_BUILD_OPTIONS"] = "-cl-opt-disable" run('_noopt') print('PASSED') # Test passed sys.exit(0) Oclgrind-21.10/tests/runtime/000077500000000000000000000000001413315665100161105ustar00rootroot00000000000000Oclgrind-21.10/tests/runtime/CMakeLists.txt000066400000000000000000000027031413315665100206520ustar00rootroot00000000000000# CMakeLists.txt (Oclgrind) # Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith, # University of Bristol. All rights reserved. # # This program is provided under a three-clause BSD license. For full # license terms please see the LICENSE file distributed with this # source code. set(COMMON_SOURCES ../common/common.c ../common/common.h) include_directories(../common) # Add runtime tests foreach(test build_program kernel_scope_local_mem_usage map_buffer multqueues sampler) add_executable(${test} ${test}.c ${COMMON_SOURCES}) target_compile_definitions(${test} PRIVATE "-DROOT_DIR=\"${CMAKE_CURRENT_SOURCE_DIR}\"") target_link_libraries(${test} oclgrind-rt) # Generate test binaries in same dir as Oclgrind libraries on Windows if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") set_target_properties(${test} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") else() set_target_properties(${test} PROPERTIES LINKER_LANGUAGE CXX) endif() add_test( NAME rt_${test} COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/tests/run_test.py $ $) set_tests_properties(rt_${test} PROPERTIES DEPENDS ${test}) # Set PCH directory set(ENV "OCLGRIND_TESTING=1") list(APPEND ENV "OCLGRIND_PCH_DIR=${CMAKE_BINARY_DIR}/include/oclgrind") set_tests_properties(rt_${test} PROPERTIES ENVIRONMENT "${ENV}") endforeach(${test}) Oclgrind-21.10/tests/runtime/build_program.c000066400000000000000000000044021413315665100211020ustar00rootroot00000000000000#include "common.h" #include #include #define TOL 1e-8 #define MAX_ERRORS 8 #define N 16 const char* SOURCE_INCLUDE = "#include \"header.h\" \n" "kernel void test_kernel(global int *out) \n" "{ \n" " *out = VALUE; \n" "} \n"; const char* SOURCE_MACRO = "#define _STR(ARG) #ARG \n" "#define STR(ARG) _STR(ARG) \n" "kernel void test_kernel(global int *out) \n" "{ \n" " printf(\"MSG = %s\\n\", STR(MSG)); \n" "} \n"; void run(const char* source, const char* options) { cl_int err; cl_kernel kernel; cl_mem d_out; Context cl = createContext(source, options); kernel = clCreateKernel(cl.program, "test_kernel", &err); checkError(err, "creating kernel"); d_out = clCreateBuffer(cl.context, CL_MEM_WRITE_ONLY, 4, NULL, &err); checkError(err, "creating d_out"); err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_out); checkError(err, "setting kernel argument"); size_t global[1] = {1}; err = clEnqueueNDRangeKernel(cl.queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL); checkError(err, "enqueuing kernel"); err = clFinish(cl.queue); checkError(err, "running kernel"); int* h_out = clEnqueueMapBuffer(cl.queue, d_out, CL_TRUE, CL_MAP_READ, 0, 4, 0, NULL, NULL, &err); checkError(err, "mapping buffer for reading"); printf("out = %d\n", *h_out); err = clEnqueueUnmapMemObject(cl.queue, d_out, h_out, 0, NULL, NULL); checkError(err, "unmapping buffer"); clReleaseMemObject(d_out); clReleaseKernel(kernel); releaseContext(cl); } int main(int argc, char* argv[]) { run(SOURCE_INCLUDE, "-I " ROOT_DIR "/inc/nospace"); run(SOURCE_INCLUDE, "-I \"" ROOT_DIR "/inc/with space\""); run(SOURCE_MACRO, "-D MSG=hello"); run(SOURCE_MACRO, "-D MSG=foo\\ and\\ bar"); return 0; } Oclgrind-21.10/tests/runtime/build_program.ref000066400000000000000000000001401413315665100214270ustar00rootroot00000000000000EXACT out = 42 EXACT out = -7 EXACT MSG = hello MATCH out = EXACT MSG = foo and bar MATCH out = Oclgrind-21.10/tests/runtime/inc/000077500000000000000000000000001413315665100166615ustar00rootroot00000000000000Oclgrind-21.10/tests/runtime/inc/nospace/000077500000000000000000000000001413315665100203115ustar00rootroot00000000000000Oclgrind-21.10/tests/runtime/inc/nospace/header.h000066400000000000000000000000211413315665100217030ustar00rootroot00000000000000#define VALUE 42 Oclgrind-21.10/tests/runtime/inc/with space/000077500000000000000000000000001413315665100207105ustar00rootroot00000000000000Oclgrind-21.10/tests/runtime/inc/with space/header.h000066400000000000000000000000211413315665100223020ustar00rootroot00000000000000#define VALUE -7 Oclgrind-21.10/tests/runtime/kernel_scope_local_mem_usage.c000066400000000000000000000056521413315665100241310ustar00rootroot00000000000000#include "common.h" #include const char* KERNEL_SOURCE = "kernel void kernel1(global int *data) \n" "{ \n" " local int scratch[10]; \n" " size_t lid = get_local_id(0); \n" " scratch[lid] = data[lid]; \n" " barrier(CLK_LOCAL_MEM_FENCE); \n" " int sum = 0; \n" " for (int i = 0; i < 10; i++) \n" " sum += scratch[i]; \n" " data[0] = sum; \n" "} \n" " \n" "kernel void kernel2(global int *data) \n" "{ \n" " local int scratch[20]; \n" " size_t lid = get_local_id(0); \n" " scratch[lid] = data[lid]; \n" " barrier(CLK_LOCAL_MEM_FENCE); \n" " int sum = 0; \n" " for (int i = 0; i < 20; i++) \n" " sum += scratch[i]; \n" " data[0] = sum; \n" "} \n" " \n"; int main(int argc, char* argv[]) { cl_int err; cl_kernel kernel1, kernel2; cl_ulong localSize; Context cl = createContext(KERNEL_SOURCE, ""); kernel1 = clCreateKernel(cl.program, "kernel1", &err); checkError(err, "creating kernel1"); kernel2 = clCreateKernel(cl.program, "kernel2", &err); checkError(err, "creating kernel2"); err = clGetKernelWorkGroupInfo(kernel1, cl.device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(cl_ulong), &localSize, NULL); checkError(err, "getting kernel1 local mem size"); if (localSize != 40) { fprintf(stderr, "Incorrect kernel1 local memory size %llu (expected 40)\n", localSize); return 1; } err = clGetKernelWorkGroupInfo(kernel2, cl.device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(cl_ulong), &localSize, NULL); checkError(err, "getting kernel2 local mem size"); if (localSize != 80) { fprintf(stderr, "Incorrect kernel2 local memory size %llu (expected 80)\n", localSize); return 1; } clReleaseKernel(kernel1); clReleaseKernel(kernel2); releaseContext(cl); return 0; } Oclgrind-21.10/tests/runtime/map_buffer.c000066400000000000000000000243651413315665100203740ustar00rootroot00000000000000#include "common.h" #include #include #include #include #define TOL 1e-8 #define MAX_ERRORS 8 const char* KERNEL_SOURCE = "kernel void vecadd(global float *a, \n" " global float *b, \n" " global float *c) \n" "{ \n" " int i = get_global_id(0); \n" " c[i] = a[i] + b[i]; \n" "} \n"; unsigned checkResults(size_t N, float* a, float* b, float* results); // Run everything as normal unsigned run1(Context cl, cl_kernel kernel, cl_mem d_a, cl_mem d_b, cl_mem d_c, size_t N) { cl_int err; float *h_a, *h_b, *h_c; size_t dataSize = N * sizeof(cl_float); // Initialise data srand(0); h_a = clEnqueueMapBuffer(cl.queue, d_a, CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, 0, dataSize, 0, NULL, NULL, &err); checkError(err, "mapping d_a buffer"); h_b = clEnqueueMapBuffer(cl.queue, d_b, CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, 0, dataSize, 0, NULL, NULL, &err); checkError(err, "mapping d_b buffer"); h_c = clEnqueueMapBuffer(cl.queue, d_c, CL_FALSE, CL_MAP_READ, 0, dataSize, 0, NULL, NULL, &err); checkError(err, "mapping d_c buffer"); for (unsigned i = 0; i < N; i++) { h_a[i] = rand() / (float)RAND_MAX; h_b[i] = rand() / (float)RAND_MAX; h_c[i] = 0; } err = clEnqueueUnmapMemObject(cl.queue, d_a, h_a, 0, NULL, NULL); checkError(err, "unmapping d_a"); err = clEnqueueUnmapMemObject(cl.queue, d_b, h_b, 0, NULL, NULL); checkError(err, "unmapping d_b"); err = clEnqueueUnmapMemObject(cl.queue, d_c, h_c, 0, NULL, NULL); checkError(err, "unmapping d_c"); err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_a); err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_b); err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_c); checkError(err, "setting kernel args"); err = clEnqueueNDRangeKernel(cl.queue, kernel, 1, NULL, &N, NULL, 0, NULL, NULL); checkError(err, "enqueuing kernel"); h_c = clEnqueueMapBuffer(cl.queue, d_c, CL_FALSE, CL_MAP_READ, 0, dataSize, 0, NULL, NULL, &err); checkError(err, "mapping d_c buffer"); err = clFinish(cl.queue); checkError(err, "running kernel"); unsigned errors = checkResults(N, h_a, h_b, h_c); err = clEnqueueUnmapMemObject(cl.queue, d_c, h_c, 0, NULL, NULL); checkError(err, "unmapping d_c"); return errors; } // Don't unmap input buffers before running kernel // Should result in "Invalid read from buffer mapped for writing" error unsigned run2(Context cl, cl_kernel kernel, cl_mem d_a, cl_mem d_b, cl_mem d_c, size_t N) { cl_int err; float *h_a, *h_b, *h_c; size_t dataSize = N * sizeof(cl_float); // Initialise data srand(0); h_a = clEnqueueMapBuffer(cl.queue, d_a, CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, 0, dataSize, 0, NULL, NULL, &err); checkError(err, "mapping d_a buffer"); h_b = clEnqueueMapBuffer(cl.queue, d_b, CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, 0, dataSize, 0, NULL, NULL, &err); checkError(err, "mapping d_b buffer"); h_c = clEnqueueMapBuffer(cl.queue, d_c, CL_FALSE, CL_MAP_READ, 0, dataSize, 0, NULL, NULL, &err); checkError(err, "mapping d_c buffer"); for (unsigned i = 0; i < N; i++) { h_a[i] = rand() / (float)RAND_MAX; h_b[i] = rand() / (float)RAND_MAX; h_c[i] = 0; } err = clEnqueueUnmapMemObject(cl.queue, d_c, h_c, 0, NULL, NULL); checkError(err, "unmapping d_c"); err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_a); err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_b); err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_c); checkError(err, "setting kernel args"); err = clEnqueueNDRangeKernel(cl.queue, kernel, 1, NULL, &N, NULL, 0, NULL, NULL); checkError(err, "enqueuing kernel"); h_c = clEnqueueMapBuffer(cl.queue, d_c, CL_FALSE, CL_MAP_READ, 0, dataSize, 0, NULL, NULL, &err); checkError(err, "mapping d_c buffer"); err = clFinish(cl.queue); checkError(err, "running kernel"); unsigned errors = checkResults(N, h_a, h_b, h_c); err = clEnqueueUnmapMemObject(cl.queue, d_a, h_a, 0, NULL, NULL); checkError(err, "unmapping d_a"); err = clEnqueueUnmapMemObject(cl.queue, d_b, h_b, 0, NULL, NULL); checkError(err, "unmapping d_b"); err = clEnqueueUnmapMemObject(cl.queue, d_c, h_c, 0, NULL, NULL); checkError(err, "unmapping d_c"); return errors; } // Don't unmap output buffer before running kernel // Should result in "Invalid write to mapped buffer" error unsigned run3(Context cl, cl_kernel kernel, cl_mem d_a, cl_mem d_b, cl_mem d_c, size_t N) { cl_int err; float *h_a, *h_b, *h_c; size_t dataSize = N * sizeof(cl_float); // Initialise data srand(0); h_a = clEnqueueMapBuffer(cl.queue, d_a, CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, 0, dataSize, 0, NULL, NULL, &err); checkError(err, "mapping d_a buffer"); h_b = clEnqueueMapBuffer(cl.queue, d_b, CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, 0, dataSize, 0, NULL, NULL, &err); checkError(err, "mapping d_b buffer"); h_c = clEnqueueMapBuffer(cl.queue, d_c, CL_FALSE, CL_MAP_READ, 0, dataSize, 0, NULL, NULL, &err); checkError(err, "mapping d_c buffer"); for (unsigned i = 0; i < N; i++) { h_a[i] = rand() / (float)RAND_MAX; h_b[i] = rand() / (float)RAND_MAX; h_c[i] = 0; } err = clEnqueueUnmapMemObject(cl.queue, d_a, h_a, 0, NULL, NULL); checkError(err, "unmapping d_a"); err = clEnqueueUnmapMemObject(cl.queue, d_b, h_b, 0, NULL, NULL); checkError(err, "unmapping d_b"); err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_a); err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_b); err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_c); checkError(err, "setting kernel args"); err = clEnqueueNDRangeKernel(cl.queue, kernel, 1, NULL, &N, NULL, 0, NULL, NULL); checkError(err, "enqueuing kernel"); err = clFinish(cl.queue); checkError(err, "running kernel"); unsigned errors = checkResults(N, h_a, h_b, h_c); err = clEnqueueUnmapMemObject(cl.queue, d_c, h_c, 0, NULL, NULL); checkError(err, "unmapping d_c"); return errors; } // Re-map input buffers for reading // Should not result in any error unsigned run4(Context cl, cl_kernel kernel, cl_mem d_a, cl_mem d_b, cl_mem d_c, size_t N) { cl_int err; float *h_a, *h_b, *h_c; size_t dataSize = N * sizeof(cl_float); // Initialise data srand(0); h_a = clEnqueueMapBuffer(cl.queue, d_a, CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, 0, dataSize, 0, NULL, NULL, &err); checkError(err, "mapping d_a buffer"); h_b = clEnqueueMapBuffer(cl.queue, d_b, CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, 0, dataSize, 0, NULL, NULL, &err); checkError(err, "mapping d_b buffer"); h_c = clEnqueueMapBuffer(cl.queue, d_c, CL_FALSE, CL_MAP_READ, 0, dataSize, 0, NULL, NULL, &err); checkError(err, "mapping d_c buffer"); for (unsigned i = 0; i < N; i++) { h_a[i] = rand() / (float)RAND_MAX; h_b[i] = rand() / (float)RAND_MAX; h_c[i] = 0; } h_a = clEnqueueMapBuffer(cl.queue, d_a, CL_TRUE, CL_MAP_READ, 0, dataSize, 0, NULL, NULL, &err); checkError(err, "mapping d_a buffer"); h_b = clEnqueueMapBuffer(cl.queue, d_b, CL_TRUE, CL_MAP_READ, 0, dataSize, 0, NULL, NULL, &err); checkError(err, "mapping d_b buffer"); err = clEnqueueUnmapMemObject(cl.queue, d_a, h_a, 0, NULL, NULL); checkError(err, "unmapping d_a"); err = clEnqueueUnmapMemObject(cl.queue, d_b, h_b, 0, NULL, NULL); checkError(err, "unmapping d_b"); err = clEnqueueUnmapMemObject(cl.queue, d_c, h_c, 0, NULL, NULL); checkError(err, "unmapping d_c"); err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_a); err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_b); err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_c); checkError(err, "setting kernel args"); err = clEnqueueNDRangeKernel(cl.queue, kernel, 1, NULL, &N, NULL, 0, NULL, NULL); checkError(err, "enqueuing kernel"); h_c = clEnqueueMapBuffer(cl.queue, d_c, CL_FALSE, CL_MAP_READ, 0, dataSize, 0, NULL, NULL, &err); checkError(err, "mapping d_c buffer"); err = clFinish(cl.queue); checkError(err, "running kernel"); unsigned errors = checkResults(N, h_a, h_b, h_c); err = clEnqueueUnmapMemObject(cl.queue, d_c, h_c, 0, NULL, NULL); checkError(err, "unmapping d_c"); return errors; } int main(int argc, char* argv[]) { cl_int err; cl_kernel kernel; cl_mem d_a, d_b, d_c; size_t N = 1; if (argc > 1) { N = atoi(argv[1]); } Context cl = createContext(KERNEL_SOURCE, ""); kernel = clCreateKernel(cl.program, "vecadd", &err); checkError(err, "creating kernel"); size_t dataSize = N * sizeof(cl_float); d_a = clCreateBuffer(cl.context, CL_MEM_READ_ONLY, dataSize, NULL, &err); checkError(err, "creating d_a buffer"); d_b = clCreateBuffer(cl.context, CL_MEM_READ_ONLY, dataSize, NULL, &err); checkError(err, "creating d_b buffer"); d_c = clCreateBuffer(cl.context, CL_MEM_WRITE_ONLY, dataSize, NULL, &err); checkError(err, "creating d_c buffer"); unsigned errors = 0; errors += run1(cl, kernel, d_a, d_b, d_c, N); errors += run2(cl, kernel, d_a, d_b, d_c, N); errors += run3(cl, kernel, d_a, d_b, d_c, N); errors += run4(cl, kernel, d_a, d_b, d_c, N); clReleaseMemObject(d_a); clReleaseMemObject(d_b); clReleaseMemObject(d_c); clReleaseKernel(kernel); releaseContext(cl); return (errors != 0); } unsigned checkResults(size_t N, float* a, float* b, float* results) { // Check results unsigned errors = 0; for (unsigned i = 0; i < N; i++) { float ref = a[i] + b[i]; if (fabs(ref - results[i]) > TOL) { if (errors < MAX_ERRORS) { fprintf(stderr, "%4d: %.4f != %.4f\n", i, results[i], ref); } errors++; } } if (errors) printf("%d errors detected\n", errors); return errors; } Oclgrind-21.10/tests/runtime/map_buffer.ref000066400000000000000000000002121413315665100207070ustar00rootroot00000000000000ERROR Invalid read from buffer mapped for writing ERROR Invalid read from buffer mapped for writing ERROR Invalid write to mapped buffer Oclgrind-21.10/tests/runtime/multqueues.c000066400000000000000000000134031413315665100204660ustar00rootroot00000000000000#include "common.h" #include #include #define MQ_BUFSIZE 128 typedef enum { MQ_WAIT_FOR_EVENTS, MQ_FINISH } WaitType; void write_read_test(cl_context ctx, cl_device_id dev, cl_command_queue cq1, cl_command_queue cq2, WaitType wait_type, char* test_name) { // Variables cl_int err; cl_float* buf_host1A = (cl_float*)malloc(MQ_BUFSIZE * sizeof(cl_float)); cl_float* buf_host1B = (cl_float*)calloc(MQ_BUFSIZE, sizeof(cl_float)); cl_mem buf_dev1 = NULL; cl_int* buf_host2A = (cl_int*)malloc(MQ_BUFSIZE * sizeof(cl_int)); cl_int* buf_host2B = (cl_int*)calloc(MQ_BUFSIZE, sizeof(cl_int)); cl_mem buf_dev2 = NULL; cl_event ew[2] = {NULL, NULL}, er[2] = {NULL, NULL}; cl_uint i; // Fill host buffers with random data for (i = 0; i < MQ_BUFSIZE; i++) { buf_host1A[i] = rand() / (cl_float)RAND_MAX; buf_host2A[i] = (cl_int)rand(); } // Create device buffers buf_dev1 = clCreateBuffer(ctx, CL_MEM_READ_WRITE, MQ_BUFSIZE * sizeof(cl_float), NULL, &err); checkError(err, "creating device buffer 1"); buf_dev2 = clCreateBuffer(ctx, CL_MEM_READ_WRITE, MQ_BUFSIZE * sizeof(cl_int), NULL, &err); checkError(err, "creating device buffer 2"); // Write something to device buffer 1 using command queue 1, // generate event ew[0] err = clEnqueueWriteBuffer(cq1, buf_dev1, CL_FALSE, 0, MQ_BUFSIZE * sizeof(cl_float), buf_host1A, 0, NULL, &ew[0]); checkError(err, "writing to buffer 1"); // Write something to device buffer 2 using command queue 2, // generate event ew[1] err = clEnqueueWriteBuffer(cq2, buf_dev2, CL_FALSE, 0, MQ_BUFSIZE * sizeof(cl_int), buf_host2A, 0, NULL, &ew[1]); checkError(err, "writing to buffer 2"); // Read from device buffer 1 using command queue 2, make it depend // on event ew[0] and generate event er[0] err = clEnqueueReadBuffer(cq2, buf_dev1, CL_FALSE, 0, MQ_BUFSIZE * sizeof(cl_float), buf_host1B, 1, &ew[0], &er[0]); checkError(err, "reading from buffer 1"); // Read from device buffer 2 using command queue 1, make it depend // on event ew[1] and generate event er[1] err = clEnqueueReadBuffer(cq1, buf_dev2, CL_FALSE, 0, MQ_BUFSIZE * sizeof(cl_int), buf_host2B, 1, &ew[1], &er[1]); checkError(err, "reading from buffer 1"); // Wait on host thread for work to finish switch (wait_type) { case MQ_WAIT_FOR_EVENTS: // Wait on host thread for read events clWaitForEvents(2, er); break; case MQ_FINISH: // Wait on host thread for queues to be processed clFinish(cq1); // If they're not the same queue, must also wait in the second queue if (cq1 != cq2) { clFinish(cq2); } break; default: fprintf(stderr, "Unknown wait type\n"); exit(1); } // Check results for (i = 0; i < MQ_BUFSIZE; i++) { if (buf_host1A[i] != buf_host1B[i] || buf_host2A[i] != buf_host2B[i]) { fprintf(stderr, "Incorrect results in test %s\n", test_name); exit(1); } } // If we get here everything is OK printf("OK\n"); // Release stuff for (i = 0; i < 2; i++) { clReleaseEvent(ew[i]); clReleaseEvent(er[i]); } clReleaseMemObject(buf_dev1); clReleaseMemObject(buf_dev2); free(buf_host1A); free(buf_host1B); free(buf_host2A); free(buf_host2B); } int main(int argc, char* argv[]) { /////////// // Setup // /////////// // Variables cl_platform_id platf; cl_context ctx = NULL; cl_device_id dev; cl_int err; cl_command_queue cq1 = NULL, cq2 = NULL, oocq = NULL; // Initialize PRNG srand(0); // Get platform err = clGetPlatformIDs(1, &platf, NULL); checkError(err, "getting platform"); // Check its Oclgrind checkOclgrindPlatform(platf); // Get first device err = clGetDeviceIDs(platf, CL_DEVICE_TYPE_ALL, 1, &dev, NULL); checkError(err, "getting device"); // Create context ctx = clCreateContext(NULL, 1, &dev, NULL, NULL, &err); checkError(err, "creating context"); //////////////////////////////// // Test 1: Two command queues // //////////////////////////////// // Create first command queue cq1 = clCreateCommandQueue(ctx, dev, 0, &err); checkError(err, "creating first command queue"); // Create second command queue cq2 = clCreateCommandQueue(ctx, dev, 0, &err); checkError(err, "creating second command queue"); // Test 1.1: Perform test with two different command queues and explicitly // waiting for events write_read_test(ctx, dev, cq1, cq2, MQ_WAIT_FOR_EVENTS, "test 1.1"); // Test 1.2: Perform test with two different command queues and wait for // the second queue to finish write_read_test(ctx, dev, cq1, cq2, MQ_FINISH, "test 1.2"); // Release command queues clReleaseCommandQueue(cq2); clReleaseCommandQueue(cq1); //////////////////////////////////////////// // Test 2: One out-of-order command queue // //////////////////////////////////////////// // Create an out-of-order command queue oocq = clCreateCommandQueue(ctx, dev, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); checkError(err, "creating out-of-order command queue"); // Test 2.1: Perform test with out-of-order command queue and explicitly // waiting for events write_read_test(ctx, dev, oocq, oocq, MQ_WAIT_FOR_EVENTS, "test 2.1"); // Test 2.2: Perform test with out-of-order command queue and wait for // queue to finish write_read_test(ctx, dev, oocq, oocq, MQ_FINISH, "test 2.2"); // Release command queue clReleaseCommandQueue(oocq); ////////////// // Clean-up // ////////////// // Release context clReleaseContext(ctx); return 0; } Oclgrind-21.10/tests/runtime/multqueues.ref000066400000000000000000000000431413315665100210140ustar00rootroot00000000000000EXACT OK EXACT OK EXACT OK EXACT OKOclgrind-21.10/tests/runtime/sampler.c000066400000000000000000000074761413315665100177350ustar00rootroot00000000000000#include "common.h" #include #include #include #include #include #define TOL 1e-8 #define MAX_ERRORS 8 #define N 16 const char* KERNEL_SOURCE = "kernel void test_sampler(read_only image2d_t input, \n" " write_only image2d_t output, \n" " sampler_t sampler) \n" "{ \n" " int x = get_global_id(0); \n" " int y = get_global_id(1); \n" " float4 pixel = read_imagef(input, sampler, (int2)(x,y)); \n" " float4 left = read_imagef(input, sampler, (int2)(x-1,y)); \n" " write_imagef(output, (int2)(x,y), pixel+left); \n" "} \n"; unsigned checkResults(uint8_t* input, uint8_t* output); int main(int argc, char* argv[]) { cl_int err; cl_kernel kernel; cl_mem d_input, d_output; cl_sampler sampler; Context cl = createContext(KERNEL_SOURCE, ""); kernel = clCreateKernel(cl.program, "test_sampler", &err); checkError(err, "creating kernel"); cl_image_format format; format.image_channel_order = CL_RGBA; format.image_channel_data_type = CL_UNORM_INT8; cl_image_desc desc = {0}; desc.image_type = CL_MEM_OBJECT_IMAGE2D; desc.image_width = N; desc.image_height = N; // Create images d_input = clCreateImage(cl.context, CL_MEM_READ_ONLY, &format, &desc, NULL, &err); checkError(err, "creating d_input image"); d_output = clCreateImage(cl.context, CL_MEM_WRITE_ONLY, &format, &desc, NULL, &err); checkError(err, "creating d_output image"); size_t dataSize = N * N * 4; // Initialise data uint8_t* h_input = malloc(dataSize); uint8_t* h_output = malloc(dataSize); srand(0); for (unsigned i = 0; i < dataSize; i++) { h_input[i] = rand() % 256 / 2; h_output[i] = 0; } size_t origin[] = {0, 0, 0}; size_t region[] = {N, N, 1}; err = clEnqueueWriteImage(cl.queue, d_input, CL_TRUE, origin, region, 0, 0, h_input, 0, NULL, NULL); checkError(err, "writing image data"); // Create sampler sampler = clCreateSampler(cl.context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err); checkError(err, "creating sampler"); err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_input); err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_output); err |= clSetKernelArg(kernel, 2, sizeof(cl_sampler), &sampler); checkError(err, "setting kernel args"); size_t global[2] = {N, N}; err = clEnqueueNDRangeKernel(cl.queue, kernel, 2, NULL, global, NULL, 0, NULL, NULL); checkError(err, "enqueuing kernel"); err = clFinish(cl.queue); checkError(err, "running kernel"); err = clEnqueueReadImage(cl.queue, d_output, CL_TRUE, origin, region, 0, 0, h_output, 0, NULL, NULL); checkError(err, "writing image data"); unsigned errors = checkResults(h_input, h_output); clReleaseMemObject(d_input); clReleaseMemObject(d_output); clReleaseKernel(kernel); releaseContext(cl); return (errors != 0); } unsigned checkResults(uint8_t* input, uint8_t* output) { // Check results unsigned errors = 0; for (int y = 0; y < N; y++) { for (int x = 0; x < N; x++) { int xleft = x ? x - 1 : 0; for (int c = 0; c < 4; c++) { int i = (x + y * N) * 4 + c; int ref = input[i] + input[(xleft + y * N) * 4 + c]; if (output[i] != ref) { if (errors < MAX_ERRORS) { fprintf(stderr, "%2d,%2d,%2d: %d != %d\n", x, y, c, output[i], ref); } errors++; } } } } printf("%d errors detected\n", errors); return errors; } Oclgrind-21.10/tests/runtime/sampler.ref000066400000000000000000000000301413315665100202420ustar00rootroot00000000000000EXACT 0 errors detected