pax_global_header00006660000000000000000000000064125244167100014514gustar00rootroot0000000000000052 comment=595cf308804927d5e5bbe98820a594379a578d33 Oclgrind-15.5/000077500000000000000000000000001252441671000132075ustar00rootroot00000000000000Oclgrind-15.5/.gitignore000066400000000000000000000011561252441671000152020ustar00rootroot00000000000000# Autotools generated files aclocal.m4 autom4te.cache compile config.guess config.h config.h.in config.log config.status config.sub configure depcomp .deps *.dirstamp install-sh ltmain.sh m4/libtool.m4 m4/ltoptions.m4 m4/ltsugar.m4 m4/ltversion.m4 m4/lt~obsolete.m4 Makefile Makefile.in missing stamp-h1 test-driver # Compiler output *.o *.lo .libs liboclgrind.la liboclgrind-rt.la liboclgrind-rt-icd.la libtool /oclgrind oclgrind.icd oclgrind-kernel src/core/clc_h.cpp # Test output test-suite.log *.trs *.log *.out *.diff tests/apps/vecadd/vecadd # Misc oclgrind-*.tar.gz oclgrind-*.zip .DS_Store *.kdev4 *.sublime-* Oclgrind-15.5/CMakeLists.txt000066400000000000000000000207031252441671000157510ustar00rootroot00000000000000# CMakeLists.txt (Oclgrind) # Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, # University of Bristol. All rights reserved. # # This program is provided under a three-clause BSD license. For full # license terms please see the LICENSE file distributed with this # source code. cmake_minimum_required(VERSION 2.8.12) project(Oclgrind) set(Oclgrind_VERSION_MAJOR 15) set(Oclgrind_VERSION_MINOR 5) include(CheckIncludeFiles) include(CheckLibraryExists) # Enable C99 for GCC (required for tests) if (CMAKE_COMPILER_IS_GNUCC) set(CMAKE_C_FLAGS "-std=c99") endif() # Enable rpath on OS X set(CMAKE_MACOSX_RPATH 1) # Enable C++11 for Clang/GCC if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") set(CMAKE_CXX_FLAGS "-std=c++11") endif() # Disable min/max macros on Windows if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") add_definitions(-DNOMINMAX) endif() # Suppress warnings from OpenCL runtime API headers if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-ignored-attributes -Wno-gcc-compat -Wno-availability") endif() # Find LLVM find_package(LLVM REQUIRED CONFIG NO_CMAKE_BUILDS_PATH) message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") # Check LLVM version if (${LLVM_PACKAGE_VERSION} VERSION_LESS "3.6") message(FATAL_ERROR "LLVM version must be >= 3.6") endif() set(LLVM_VERSION ${LLVM_VERSION_MAJOR}${LLVM_VERSION_MINOR}) # Add flags for LLVM add_definitions(${LLVM_DEFINITIONS}) include_directories(${LLVM_INCLUDE_DIRS}) link_directories(${LLVM_LIBRARY_DIRS}) set(CLANG ${LLVM_TOOLS_BINARY_DIR}/clang) # Get LLVM libraries for linking llvm_map_components_to_libnames(LLVM_LIBS bitreader bitwriter core instrumentation ipo irreader linker mcparser objcarcopts option) # Check for GNU readline library if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") set(READLINE_DIR "" CACHE PATH "Location of GNU readline library") set(CMAKE_REQUIRED_INCLUDES ${READLINE_DIR}/include) include_directories(${READLINE_DIR}/include) link_directories(${READLINE_DIR}/lib) message(STATUS ${CMAKE_REQUIRED_LIBRARIES}) check_include_files("stdio.h;readline/readline.h" HAVE_READLINE_H) check_include_files("stdio.h;readline/history.h" HAVE_HISTORY_H) check_library_exists(readline readline "${READLINE_DIR}/lib" HAVE_READLINE_LIB) check_library_exists(readline add_history "${READLINE_DIR}/lib" HAVE_HISTORY_LIB) if (HAVE_READLINE_H AND HAVE_HISTORY_H AND HAVE_READLINE_LIB AND HAVE_HISTORY_LIB) set(HAVE_READLINE 1) list(APPEND CORE_EXTRA_LIBS readline) else() set(HAVE_READLINE 0) message(WARNING "GNU readline library not found (set READLINE_DIR)\n" "The interactive debugger will not have a command history.") endif() else() set(HAVE_READLINE 0) endif() # Generate stringified clc.h add_custom_command( OUTPUT src/core/clc_h.cpp COMMAND ${CMAKE_COMMAND} -DSOURCE_FILE=${CMAKE_SOURCE_DIR}/src/core/clc.h -P ${CMAKE_SOURCE_DIR}/src/core/gen_clc_h.cmake DEPENDS src/core/clc.h src/core/gen_clc_h.cmake ) include_directories("src/" "${PROJECT_BINARY_DIR}") if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") set(CORE_LIB_TYPE "SHARED") endif() set(CORE_HEADERS src/core/common.h src/core/Context.h src/core/half.h src/core/Kernel.h src/core/KernelInvocation.h src/core/Memory.h src/core/Plugin.h src/core/Program.h src/core/Queue.h src/core/WorkItem.h src/core/WorkGroup.h) add_library(oclgrind ${CORE_LIB_TYPE} ${CORE_HEADERS} src/core/clc_h.cpp src/core/common.cpp src/core/Context.cpp src/core/Kernel.cpp src/core/KernelInvocation.cpp src/core/Memory.cpp src/core/Plugin.cpp src/core/Program.cpp src/core/Queue.cpp src/core/WorkItem.cpp src/core/WorkItemBuiltins.cpp src/core/WorkGroup.cpp src/plugins/InstructionCounter.h src/plugins/InstructionCounter.cpp src/plugins/InteractiveDebugger.h src/plugins/InteractiveDebugger.cpp src/plugins/Logger.h src/plugins/Logger.cpp src/plugins/MemCheck.h src/plugins/MemCheck.cpp src/plugins/RaceDetector.h src/plugins/RaceDetector.cpp) target_link_libraries(oclgrind ${CORE_EXTRA_LIBS} clangAnalysis clangAST clangBasic clangCodeGen clangDriver clangEdit clangFrontend clangLex clangParse clangSema clangSerialization ${LLVM_LIBS}) # Sources for OpenCL runtime API frontend set(RUNTIME_SOURCES src/runtime/async_queue.h src/runtime/async_queue.cpp src/runtime/icd.h src/runtime/runtime.cpp) # Add ICD exports on Windows if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") list(APPEND RUNTIME_SOURCES src/runtime/icd.def) endif() add_library(oclgrind-rt-icd SHARED ${RUNTIME_SOURCES}) set_target_properties(oclgrind-rt-icd PROPERTIES COMPILE_FLAGS -DOCLGRIND_ICD) target_link_libraries(oclgrind-rt-icd ${CMAKE_DL_LIBS} oclgrind) # Add runtime exports on Windows if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") list(APPEND RUNTIME_SOURCES src/runtime/runtime.def) endif() add_library(oclgrind-rt SHARED ${RUNTIME_SOURCES}) target_link_libraries(oclgrind-rt ${CMAKE_DL_LIBS} oclgrind) add_executable(oclgrind-kernel src/kernel/oclgrind-kernel.cpp src/kernel/Simulation.h src/kernel/Simulation.cpp) target_link_libraries(oclgrind-kernel oclgrind) set(CLC_HEADERS ${CMAKE_BINARY_DIR}/include/oclgrind/clc.h ${CMAKE_BINARY_DIR}/include/oclgrind/clc32.pch ${CMAKE_BINARY_DIR}/include/oclgrind/clc64.pch ) add_custom_target(CLC_HEADERS ALL DEPENDS ${CLC_HEADERS}) add_custom_command( OUTPUT include/oclgrind/clc.h POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/src/core/clc.h include/oclgrind/clc.h DEPENDS src/core/clc.h) # Generate precompiled headers for clc.h add_custom_command( OUTPUT include/oclgrind/clc32.pch POST_BUILD COMMAND ${CLANG} -cc1 -x cl -cl-std=CL1.2 -O0 -g -fno-builtin -emit-pch -triple spir-unknown-unknown -relocatable-pch -isysroot ${CMAKE_BINARY_DIR}/include/oclgrind/ ${CMAKE_BINARY_DIR}/include/oclgrind/clc.h -o include/oclgrind/clc32.pch DEPENDS include/oclgrind/clc.h ) add_custom_command( OUTPUT include/oclgrind/clc64.pch POST_BUILD COMMAND ${CLANG} -cc1 -x cl -cl-std=CL1.2 -O0 -g -fno-builtin -emit-pch -triple spir64-unknown-unknown -relocatable-pch -isysroot ${CMAKE_BINARY_DIR}/include/oclgrind/ ${CMAKE_BINARY_DIR}/include/oclgrind/clc.h -o include/oclgrind/clc64.pch DEPENDS include/oclgrind/clc.h ) # Generate config.h configure_file("cmake_config.h.in" "config.h") # Install oclgrind script if not on Windows if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") file(READ src/runtime/oclgrind OCLGRIND_SCRIPT) string(REGEX REPLACE "__VERSION__" "${Oclgrind_VERSION_MAJOR}.${Oclgrind_VERSION_MINOR}" OCLGRIND_SCRIPT "${OCLGRIND_SCRIPT}") file(WRITE ${CMAKE_BINARY_DIR}/oclgrind "${OCLGRIND_SCRIPT}") # Generate ICD loader get_property(OCLGRIND_RT_FILENAME TARGET oclgrind-rt-icd PROPERTY LOCATION) file(WRITE ${CMAKE_BINARY_DIR}/oclgrind.icd "${OCLGRIND_RT_FILENAME}\n") install(PROGRAMS ${CMAKE_BINARY_DIR}/oclgrind DESTINATION bin) endif() install(TARGETS oclgrind-kernel DESTINATION bin) install(TARGETS oclgrind oclgrind-rt oclgrind-rt-icd DESTINATION lib) install(FILES ${CORE_HEADERS} ${CMAKE_BINARY_DIR}/config.h ${CLC_HEADERS} LICENSE DESTINATION include/oclgrind) if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") install(FILES src/CL/cl.h src/CL/cl_d3d10.h src/CL/cl_d3d11.h src/CL/cl_dx9_media_sharing.h src/CL/cl_egl.h src/CL/cl_ext.h src/CL/cl_gl.h src/CL/cl_gl_ext.h src/CL/cl_platform.h src/CL/opencl.h DESTINATION include/CL) endif() # Tests enable_testing() # Check for Python find_package(PythonInterp) if (PYTHONINTERP_FOUND) # Add kernel tests file(READ tests/kernels/TESTS KERNEL_TESTS) string(REPLACE "\n" ";" KERNEL_TESTS ${KERNEL_TESTS}) foreach(test ${KERNEL_TESTS}) add_test( NAME ${test} COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/tests/kernels/run_kernel_test.py $ ${CMAKE_SOURCE_DIR}/tests/kernels/${test}.sim) endforeach(${test}) # Set PCH directory set_tests_properties(${KERNEL_TESTS} PROPERTIES ENVIRONMENT "OCLGRIND_PCH_DIR=${CMAKE_BINARY_DIR}/include/oclgrind") # Expected failures set_tests_properties( atomics/atomic_intergroup_race data-race/intragroup_hidden_race PROPERTIES WILL_FAIL TRUE) else() message(WARNING "Kernel tests will not be run (Python required)") endif() # Add app tests add_subdirectory(tests/apps) Oclgrind-15.5/LICENSE000066400000000000000000000030001252441671000142050ustar00rootroot00000000000000Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, University of Bristol. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Oclgrind-15.5/Makefile.am000066400000000000000000000132621252441671000152470ustar00rootroot00000000000000# Makefile.am (Oclgrind) # Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, # University of Bristol. All rights reserved. # # This program is provided under a three-clause BSD license. For full # license terms please see the LICENSE file distributed with this # source code. AUTOMAKE_OPTIONS = subdir-objects ACLOCAL_AMFLAGS = ${ACLOCAL_FLAGS} -I m4 AM_CFLAGS = -std=c99 AM_CPPFLAGS = -I$(top_srcdir)/src/ -Wall # Suppress warnings from OpenCL runtime API headers if USING_CLANG AM_CPPFLAGS += -Wno-ignored-attributes -Wno-gcc-compat -Wno-availability endif USING_CLANG lib_LTLIBRARIES = liboclgrind.la liboclgrind-rt.la liboclgrind-rt-icd.la LLVM_LIBS = `$(llvm_config) --system-libs --libs bitreader bitwriter \ core instrumentation ipo irreader linker mcparser objcarcopts option` liboclgrind_la_SOURCES = src/core/common.h src/core/common.cpp \ src/core/Context.h src/core/Context.cpp src/core/half.h \ src/core/Kernel.h src/core/Kernel.cpp src/core/KernelInvocation.h \ src/core/KernelInvocation.cpp src/core/Memory.h src/core/Memory.cpp \ src/core/Plugin.h src/core/Plugin.cpp src/core/Program.h \ src/core/Program.cpp src/core/Queue.h src/core/Queue.cpp \ src/core/WorkItem.h src/core/WorkItem.cpp \ src/core/WorkItemBuiltins.cpp src/core/WorkGroup.h \ src/core/WorkGroup.cpp src/plugins/InstructionCounter.h \ src/plugins/InstructionCounter.cpp src/plugins/InteractiveDebugger.h \ src/plugins/InteractiveDebugger.cpp src/plugins/Logger.h \ src/plugins/Logger.cpp src/plugins/MemCheck.h \ src/plugins/MemCheck.cpp src/plugins/RaceDetector.h \ src/plugins/RaceDetector.cpp nodist_liboclgrind_la_SOURCES = src/core/clc_h.cpp config.h liboclgrind_la_LDFLAGS = -lclangFrontend -lclangDriver \ -lclangSerialization -lclangCodeGen -lclangParse -lclangSema \ -lclangAnalysis -lclangEdit -lclangAST -lclangLex -lclangBasic \ ${LLVM_LIBS} $(oclgrind_extra_libs) -shared oclgrind_includedir = $(includedir)/oclgrind oclgrind_include_HEADERS = src/core/common.h src/core/Context.h \ src/core/half.h src/core/Kernel.h src/core/KernelInvocation.h \ src/core/Memory.h src/core/Plugin.h src/core/Program.h \ src/core/Queue.h src/core/WorkItem.h src/core/WorkGroup.h config.h LICENSE src/core/clc_h.cpp: src/core/gen_clc_h.sh src/core/clc.h $(top_srcdir)/src/core/gen_clc_h.sh $(top_srcdir)/src/core/clc.h $@ install-data-hook: cp -p src/include/oclgrind/clc.h $(DESTDIR)$(includedir)/oclgrind/ cp -p src/include/oclgrind/clc32.pch $(DESTDIR)$(includedir)/oclgrind/ cp -p src/include/oclgrind/clc64.pch $(DESTDIR)$(includedir)/oclgrind/ uninstall-hook: rm -rf $(DESTDIR)$(includedir)/oclgrind/clc.h rm -rf $(DESTDIR)$(includedir)/oclgrind/clc32.pch rm -rf $(DESTDIR)$(includedir)/oclgrind/clc64.pch RUNTIME_SOURCES = src/runtime/async_queue.h \ src/runtime/async_queue.cpp src/runtime/icd.h src/runtime/runtime.cpp liboclgrind_rt_la_SOURCES = $(RUNTIME_SOURCES) liboclgrind_rt_la_LIBADD = liboclgrind.la liboclgrind_rt_la_LDFLAGS = -shared liboclgrind_rt_icd_la_CPPFLAGS = -DOCLGRIND_ICD $(AM_CPPFLAGS) liboclgrind_rt_icd_la_SOURCES = $(RUNTIME_SOURCES) liboclgrind_rt_icd_la_LIBADD = liboclgrind.la liboclgrind_rt_icd_la_LDFLAGS = -shared bin_PROGRAMS = oclgrind-kernel oclgrind_kernel_SOURCES = src/kernel/oclgrind-kernel.cpp \ src/kernel/Simulation.h src/kernel/Simulation.cpp oclgrind_kernel_LDADD = liboclgrind.la bin_SCRIPTS = oclgrind oclgrind: $(top_srcdir)/src/runtime/oclgrind cat $(top_srcdir)/src/runtime/oclgrind \ | $(SED) 's|__VERSION__|'$(VERSION)'|g' \ >$@ noinst_SCRIPTS = oclgrind.icd \ src/include/oclgrind/clc.h \ src/include/oclgrind/clc32.pch \ src/include/oclgrind/clc64.pch oclgrind.icd: liboclgrind-rt-icd.la printf $(libdir)/ >$@ $(GREP) dlname $< | $(AWK) -F "'" '{print $$2}' >>$@ src/include/oclgrind/clc.h: $(top_srcdir)/src/core/clc.h mkdir -p src/include/oclgrind cp $< $@ src/include/oclgrind/clc32.pch: src/include/oclgrind/clc.h $(clang) \ -cc1 -x cl -cl-std=CL1.2 -O0 -g -fno-builtin \ -emit-pch -triple spir-unknown-unknown \ -relocatable-pch \ -isysroot $(abs_builddir)/src/include/oclgrind \ $< -o $@ src/include/oclgrind/clc64.pch: src/include/oclgrind/clc.h $(clang) \ -cc1 -x cl -cl-std=CL1.2 -O0 -g -fno-builtin \ -emit-pch -triple spir64-unknown-unknown \ -relocatable-pch \ -isysroot $(abs_builddir)/src/include/oclgrind \ $< -o $@ check_PROGRAMS = tests/apps/vecadd/vecadd tests_apps_vecadd_vecadd_LDADD = liboclgrind-rt.la TESTS = $(check_PROGRAMS) TEST_EXTENSIONS = .sim SIM_LOG_COMPILER = $(PYTHON) \ $(top_srcdir)/tests/kernels/run_kernel_test.py \ ${abs_top_builddir}/oclgrind-kernel AM_TESTS_ENVIRONMENT = \ export AM_TESTS=1; \ export OCLGRIND_PCH_DIR=$(abs_builddir)/src/include/oclgrind; if HAVE_PYTHON TESTS += $(KERNEL_TESTS) XFAIL_TESTS = \ tests/kernels/atomics/atomic_intergroup_race.sim \ tests/kernels/data-race/intragroup_hidden_race.sim else check-local: @echo @echo "WARNING: Kernel tests skipped (Python required)." @echo endif EXTRA_DIST = NEWS src/core/gen_clc_h.sh src/core/clc.h \ src/runtime/oclgrind src/CL/cl.h src/CL/cl_gl.h src/CL/cl_platform.h \ src/CL/cl_ext.h src/CL/cl_gl_ext.h src/CL/cl_egl.h src/CL/cl_d3d10.h \ src/CL/cl_d3d11.h src/CL/cl_dx9_media_sharing.h src/CL/opencl.h \ CMakeLists.txt tests/apps/CMakeLists.txt cmake_config.h.in \ src/core/gen_clc_h.cmake src/runtime/icd.def src/runtime/runtime.def \ src/install/INSTALL.darwin src/install/INSTALL.linux \ src/install/INSTALL.windows src/install/install.bat \ src/install/uninstall.bat src/install/oclgrind-icd.reg \ tests/kernels/run_kernel_test.py tests/kernels/TESTS \ $(KERNEL_TEST_INPUTS) CLEANFILES = src/core/clc_h.cpp $(bin_SCRIPTS) $(noinst_SCRIPTS) \ $(KERNEL_TEST_OUTPUTS) Oclgrind-15.5/NEWS000066400000000000000000000040011252441671000137010ustar00rootroot00000000000000For more information, please visit the Oclgrind Wiki: https://github.com/jrprice/Oclgrind/wiki Oclgrind 15.5 ============= This release updates to LLVM 3.6, which improves the OpenCL C compiler and provides some additional performance enhancements. See README for revised instructions on how to build Oclgrind from source. - Fixed race conditions in atomic operations - Interactive debugger breaks on Ctrl+C - Various other minor bug fixes Oclgrind 15.2 ============= This release significantly improves simulation performance, and fixes several bugs impacting on usage and stability. - Added detection for violations of read-only/write-only attributes - Added --build-options argument to append additional compiler flags - Added hostMemoryLoad and hostMemoryStore callbacks - Added workGroupBegin and workItemBegin callbacks - Split atomic callbacks into separate load and store - Multi-threaded simulation to improve performance - Various other performance improvements - Several general bug fixes and stability improvements Oclgrind 14.12 ============== This release incorporates a new plugin system, to allow third-party developers to build tools that utilise Oclgrind. More information can be found on the Wiki: https://github.com/jrprice/Oclgrind/wiki/Creating-Plugins In addition, this release contains the following changes: - Interactive debugger now has a command history - Detection for unaligned memory accesses - Limit the number of error messages printed to avoid flooding output - Various other bug fixes and improvements Oclgrind 14.5 ============= Initial release (beta). Implements a SPIR 1.2 interpreter which can be targeted either via an OpenCL 1.2 runtime API implementation or using a standalone kernel interface. Provides the following utilities: - Memory access error detection - Work-group divergence detection (barriers, async-copies) - Data-race detection (--data-races) - Simple interactive debugger (--interactive) - Instruction histograms (--inst-counts) - OpenCL runtime API error reporting (--check-api) Oclgrind-15.5/README000066400000000000000000000121061252441671000140670ustar00rootroot00000000000000======== Oclgrind ======== About ----- This project implements a virtual OpenCL device simulator, including an OpenCL runtime with ICD support. The goal is to provide a platform for creating tools to aid OpenCL development. In particular, this project currently implements utilities for debugging memory access errors, detecting data-races and barrier divergence, collecting instruction histograms, and for interactive OpenCL kernel debugging. The simulator is built on an interpreter for LLVM IR. This project is being developed by James Price and Simon McIntosh-Smith at the University of Bristol. Binary releases can be found on the GitHub releases page: https://github.com/jrprice/Oclgrind/releases Building -------- To build this project, you will require the LLVM and Clang 3.6 development libraries and headers. With some modifications, it may also be possible to use other (recent) versions of LLVM. If building LLVM from source, it is recommended to enable optimizations to improve the performance of Oclgrind (configure with --enable-optimized, or set CMAKE_BUILD_TYPE to RelWithDebInfo). You will also need to use a compiler that supports C++11. Building on Linux and OS X -------------------------- If you are building directly from the GitHub repository, you need to run 'autoreconf -i' to generate the necessary build files. This is not required if you are using a released source package. Run ./configure to generate the Makefile, optionally using --prefix=PATH to specify the target installation directory. If you don't have the LLVM/Clang includes and libraries on your search path, you can specify the location of your LLVM installation using the --with-llvm=PATH option. For example: ./configure --prefix=$PWD/build/ --with-llvm=PATH/TO/LLVM/INSTALL This path should be the directory in which LLVM is installed (e.g. the path specified to --prefix or CMAKE_INSTALL_PATH when LLVM was built). Next, build and install with make: make make check make install If installing to a non-default location, you should add the bin/ directory to the PATH environment variable in order to make use of the oclgrind command. If you wish to use Oclgrind via the OpenCL ICD (optional), then you should create an ICD loading point by copying the oclgrind.icd file from the build directory to /etc/OpenCL/vendors/. Building on Windows ------------------- A CMake build system is provided for building Oclgrind on Windows. At present, this only works with Visual Studio 2013 (or newer), and Windows 7. When configuring the CMake build, you may be prompted to supply a value for the LLVM_DIR parameter. This should be set to the directory containing your LLVM installations's LLVMConfig.cmake file, (for example C:\Program Files\LLVM\share\llvm\cmake\). If you wish to use Oclgrind via the OpenCL ICD (optional), then you should also create an ICD loading point. To do this, you should add a REG_DWORD value to the Windows Registry under one or both of the registry keys below, with the name set to the absolute path of the oclgrind-rt-icd.dll library and the value set to 0. Key for 32-bit machines or 64-bit apps on a 64-bit machine: HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors Key for 32-bit apps on a 64-bit machine: HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Khronos\OpenCL\Vendors Usage ----- The recommended method of running an application with Oclgrind is to use the oclgrind command, for example: oclgrind ./application This command will make it such the only OpenCL platform and device available to your application is Oclgrind. If you need more control over platform selection then installing an ICD loading point for Oclgrind will cause it to appear when an application calls clGetPlatformIDs(), alongside any other OpenCL platforms installed on your system. If it encounters any invalid memory accesses, Oclgrind will report the details to stderr, for example: > Invalid write of size 4 at global memory address 0x1000000000040 > Kernel: vecadd > Entity: Global(16,0,0) Local(0,0,0) Group(16,0,0) > store i32 %tmp9, i32 addrspace(1)* %tmp15, align 4 > At line 4 of input.cl > c[i] = a[i] + b[i] Since it is interpreting an abstract intermediate representation and bounds-checking each memory access, Oclgrind will run quite slowly (typically a couple of orders of magnitude slower than a regular CPU implementation). Therefore, it is recommended to run your application with a small problem if possible. To enable an interactive, GDB-style debugging session, supply the -i flag to the oclgrind command, or export the environment variable OCLGRIND_INTERACTIVE=1. This will cause Oclgrind to automatically break at the beginning of each kernel invocation, and upon encountering an invalid memory access. Type 'help' for details of available commands. For more detailed information about using Oclgrind please visit the GitHub Wiki: https://github.com/jrprice/Oclgrind/wiki/ Contact ------- If you encounter any issues or have any questions, please use the GitHub issues page: https://github.com/jrprice/Oclgrind/issues You can also contact the primary developer via email: James Price Oclgrind-15.5/cmake_config.h.in000066400000000000000000000002301252441671000163650ustar00rootroot00000000000000#define PACKAGE_VERSION "@Oclgrind_VERSION_MAJOR@.@Oclgrind_VERSION_MINOR@" #define HAVE_READLINE @HAVE_READLINE@ #define LLVM_VERSION @LLVM_VERSION@ Oclgrind-15.5/configure.ac000066400000000000000000000100611252441671000154730ustar00rootroot00000000000000# configure.ac (Oclgrind) # Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, # University of Bristol. All rights reserved. # # This program is provided under a three-clause BSD license. For full # license terms please see the LICENSE file distributed with this # source code. AC_INIT([Oclgrind], [15.5], , [oclgrind], [https://github.com/jrprice/Oclgrind]) AC_PREREQ([2.63]) AC_CONFIG_SRCDIR([src/]) AM_INIT_AUTOMAKE([foreign 1.12]) AC_LANG(C++) AC_PROG_CXX AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_HEADERS([config.h]) AC_CONFIG_FILES([Makefile]) LT_INIT # Check if we're compiling with Clang AS_CASE([`$CC --version`], [*clang*], [using_clang=yes]) AM_CONDITIONAL([USING_CLANG], [test "$using_clang" == "yes"]) oclgrind_extra_libs= # Check for C++11 AX_CHECK_COMPILE_FLAG([-std=c++11], [], [AC_MSG_ERROR([C++11 support is required])]) CXXFLAGS="$CXXFLAGS -std=c++11" CPPFLAGS="$CPPFLAGS -std=c++11" # --with-llvm option to specify root of LLVM/Clang installation AC_ARG_WITH( llvm, [AS_HELP_STRING([--with-llvm], [directory containing LLVM/Clang installation])], [AC_SUBST(clang, $withval/bin/clang) AC_SUBST(llvm_config, $withval/bin/llvm-config)]) # Find LLVM/Clang binaries (assume on PATH if --with-llvm not used) AC_CHECK_PROG(clang, [clang], `which clang`) AC_CHECK_PROG(llvm_config, [llvm-config], `which llvm-config`) if test -z $llvm_config; then AC_MSG_ERROR([llvm-config not found (use --with-llvm=)]) fi # Check version of LLVM AC_MSG_CHECKING([llvm version]) llvm_full_version=`$llvm_config --version` llvm_version=`echo $llvm_full_version | cut -b 1,3` AC_MSG_RESULT($llvm_full_version) if test $llvm_version -lt 36; then AC_MSG_ERROR([LLVM version must be >= 3.6]) fi AC_DEFINE_UNQUOTED([LLVM_VERSION], [$llvm_version], [Version of LLVM we are building against]) # Add flags for LLVM CPPFLAGS="$CPPFLAGS `$llvm_config --cppflags`" LDFLAGS="$LDFLAGS `$llvm_config --ldflags`" # Check for LLVM/Clang headers/libraries AC_CHECK_HEADERS( [llvm/IR/Instruction.h clang/CodeGen/CodeGenAction.h], [:], [AC_MSG_ERROR([LLVM/Clang includes not found (use --with-llvm=)])]) AC_CHECK_LIB( [clangFrontend], [main], [:], [AC_MSG_ERROR([Clang library not found (use --with-llvm)])]) # GNU readline library (for interactive debugger) AC_ARG_WITH( [readline], AS_HELP_STRING([--with-readline], [location of GNU readline library]), [CPPFLAGS="$CPPFLAGS -I$withval/include"; LDFLAGS="$LDFLAGS -L$withval/lib"]) have_readline=true AC_CHECK_HEADER( [readline/readline.h], [:], [have_readline=false]) AC_CHECK_HEADER( [readline/history.h], [:], [have_readline=false]) AC_CHECK_LIB( [readline], [readline], [:], [have_readline=false]) AC_CHECK_LIB( [readline], [add_history], [:], [have_readline=false]) if test $have_readline = true; then AC_DEFINE([HAVE_READLINE], [1], [Define to 1 if GNU readline found]) oclgrind_extra_libs="$oclgrind_extra_libs -lreadline" else AC_MSG_WARN([GNU readline library not found (use --with-readline)]) fi AC_SUBST([oclgrind_extra_libs], [$oclgrind_extra_libs]) # Check if Python is available (required to run tests) AM_PATH_PYTHON(,,[:]) AM_CONDITIONAL([HAVE_PYTHON], [test "$PYTHON" != :]) # Kernel tests KERNEL_TESTS="" KERNEL_TEST_INPUTS="" KERNEL_TEST_OUTPUTS="" m4_foreach([name], m4_split(m4_include(tests/kernels/TESTS), m4_newline), [ KERNEL_TESTS="$KERNEL_TESTS tests/kernels/"name".sim" KERNEL_TEST_INPUTS="$KERNEL_TEST_INPUTS tests/kernels/"name".sim" KERNEL_TEST_INPUTS="$KERNEL_TEST_INPUTS tests/kernels/"name".cl" KERNEL_TEST_INPUTS="$KERNEL_TEST_INPUTS tests/kernels/"name".ref" KERNEL_TEST_OUTPUTS="$KERNEL_TEST_OUTPUTS tests/kernels/"name".out" ]) AC_SUBST(KERNEL_TESTS, $KERNEL_TESTS) AC_SUBST(KERNEL_TEST_INPUTS, $KERNEL_TEST_INPUTS) AC_SUBST(KERNEL_TEST_OUTPUTS, $KERNEL_TEST_OUTPUTS) AC_OUTPUT Oclgrind-15.5/m4/000077500000000000000000000000001252441671000135275ustar00rootroot00000000000000Oclgrind-15.5/m4/m4_ax_check_compile_flag.m4000066400000000000000000000064021252441671000206410ustar00rootroot00000000000000# =========================================================================== # http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html # =========================================================================== # # SYNOPSIS # # AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT]) # # DESCRIPTION # # Check whether the given FLAG works with the current language's compiler # or gives an error. (Warnings, however, are ignored) # # ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on # success/failure. # # If EXTRA-FLAGS is defined, it is added to the current language's default # flags (e.g. CFLAGS) when the check is done. The check is thus made with # the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to # force the compiler to issue an error when a bad flag is given. # # INPUT gives an alternative input source to AC_COMPILE_IFELSE. # # NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this # macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG. # # LICENSE # # Copyright (c) 2008 Guido U. Draheim # Copyright (c) 2011 Maarten Bosmans # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation, either version 3 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General # Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. If not, see . # # As a special exception, the respective Autoconf Macro's copyright owner # gives unlimited permission to copy, distribute and modify the configure # scripts that are the output of Autoconf when processing the Macro. You # need not follow the terms of the GNU General Public License when using # or distributing such scripts, even though portions of the text of the # Macro appear in them. The GNU General Public License (GPL) does govern # all other use of the material that constitutes the Autoconf Macro. # # This special exception to the GPL applies to versions of the Autoconf # Macro released by the Autoconf Archive. When you make and distribute a # modified version of the Autoconf Macro, you may extend this special # exception to the GPL to apply to your modified version as well. #serial 4 AC_DEFUN([AX_CHECK_COMPILE_FLAG], [AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [ ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1" AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])], [AS_VAR_SET(CACHEVAR,[yes])], [AS_VAR_SET(CACHEVAR,[no])]) _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags]) AS_VAR_IF(CACHEVAR,yes, [m4_default([$2], :)], [m4_default([$3], :)]) AS_VAR_POPDEF([CACHEVAR])dnl ])dnl AX_CHECK_COMPILE_FLAGS Oclgrind-15.5/src/000077500000000000000000000000001252441671000137765ustar00rootroot00000000000000Oclgrind-15.5/src/CL/000077500000000000000000000000001252441671000142745ustar00rootroot00000000000000Oclgrind-15.5/src/CL/cl.h000066400000000000000000001701511252441671000150500ustar00rootroot00000000000000/******************************************************************************* * Copyright (c) 2008 - 2012 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the * "Materials"), to deal in the Materials without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Materials, and to * permit persons to whom the Materials are furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. ******************************************************************************/ #ifndef __OPENCL_CL_H #define __OPENCL_CL_H #ifdef __APPLE__ #include #else #include #endif #ifdef __cplusplus extern "C" { #endif /******************************************************************************/ typedef struct _cl_platform_id * cl_platform_id; typedef struct _cl_device_id * cl_device_id; typedef struct _cl_context * cl_context; typedef struct _cl_command_queue * cl_command_queue; typedef struct _cl_mem * cl_mem; typedef struct _cl_program * cl_program; typedef struct _cl_kernel * cl_kernel; typedef struct _cl_event * cl_event; typedef struct _cl_sampler * cl_sampler; typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ typedef cl_ulong cl_bitfield; typedef cl_bitfield cl_device_type; typedef cl_uint cl_platform_info; typedef cl_uint cl_device_info; typedef cl_bitfield cl_device_fp_config; typedef cl_uint cl_device_mem_cache_type; typedef cl_uint cl_device_local_mem_type; typedef cl_bitfield cl_device_exec_capabilities; typedef cl_bitfield cl_command_queue_properties; typedef intptr_t cl_device_partition_property; typedef cl_bitfield cl_device_affinity_domain; typedef intptr_t cl_context_properties; typedef cl_uint cl_context_info; typedef cl_uint cl_command_queue_info; typedef cl_uint cl_channel_order; typedef cl_uint cl_channel_type; typedef cl_bitfield cl_mem_flags; typedef cl_uint cl_mem_object_type; typedef cl_uint cl_mem_info; typedef cl_bitfield cl_mem_migration_flags; typedef cl_uint cl_image_info; typedef cl_uint cl_buffer_create_type; typedef cl_uint cl_addressing_mode; typedef cl_uint cl_filter_mode; typedef cl_uint cl_sampler_info; typedef cl_bitfield cl_map_flags; typedef cl_uint cl_program_info; typedef cl_uint cl_program_build_info; typedef cl_uint cl_program_binary_type; typedef cl_int cl_build_status; typedef cl_uint cl_kernel_info; typedef cl_uint cl_kernel_arg_info; typedef cl_uint cl_kernel_arg_address_qualifier; typedef cl_uint cl_kernel_arg_access_qualifier; typedef cl_bitfield cl_kernel_arg_type_qualifier; typedef cl_uint cl_kernel_work_group_info; typedef cl_uint cl_event_info; typedef cl_uint cl_command_type; typedef cl_uint cl_profiling_info; typedef struct _cl_image_format { cl_channel_order image_channel_order; cl_channel_type image_channel_data_type; } cl_image_format; typedef struct _cl_image_desc { cl_mem_object_type image_type; size_t image_width; size_t image_height; size_t image_depth; size_t image_array_size; size_t image_row_pitch; size_t image_slice_pitch; cl_uint num_mip_levels; cl_uint num_samples; cl_mem buffer; } cl_image_desc; typedef struct _cl_buffer_region { size_t origin; size_t size; } cl_buffer_region; /******************************************************************************/ /* Error Codes */ #define CL_SUCCESS 0 #define CL_DEVICE_NOT_FOUND -1 #define CL_DEVICE_NOT_AVAILABLE -2 #define CL_COMPILER_NOT_AVAILABLE -3 #define CL_MEM_OBJECT_ALLOCATION_FAILURE -4 #define CL_OUT_OF_RESOURCES -5 #define CL_OUT_OF_HOST_MEMORY -6 #define CL_PROFILING_INFO_NOT_AVAILABLE -7 #define CL_MEM_COPY_OVERLAP -8 #define CL_IMAGE_FORMAT_MISMATCH -9 #define CL_IMAGE_FORMAT_NOT_SUPPORTED -10 #define CL_BUILD_PROGRAM_FAILURE -11 #define CL_MAP_FAILURE -12 #define CL_MISALIGNED_SUB_BUFFER_OFFSET -13 #define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14 #define CL_COMPILE_PROGRAM_FAILURE -15 #define CL_LINKER_NOT_AVAILABLE -16 #define CL_LINK_PROGRAM_FAILURE -17 #define CL_DEVICE_PARTITION_FAILED -18 #define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19 #define CL_INVALID_VALUE -30 #define CL_INVALID_DEVICE_TYPE -31 #define CL_INVALID_PLATFORM -32 #define CL_INVALID_DEVICE -33 #define CL_INVALID_CONTEXT -34 #define CL_INVALID_QUEUE_PROPERTIES -35 #define CL_INVALID_COMMAND_QUEUE -36 #define CL_INVALID_HOST_PTR -37 #define CL_INVALID_MEM_OBJECT -38 #define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39 #define CL_INVALID_IMAGE_SIZE -40 #define CL_INVALID_SAMPLER -41 #define CL_INVALID_BINARY -42 #define CL_INVALID_BUILD_OPTIONS -43 #define CL_INVALID_PROGRAM -44 #define CL_INVALID_PROGRAM_EXECUTABLE -45 #define CL_INVALID_KERNEL_NAME -46 #define CL_INVALID_KERNEL_DEFINITION -47 #define CL_INVALID_KERNEL -48 #define CL_INVALID_ARG_INDEX -49 #define CL_INVALID_ARG_VALUE -50 #define CL_INVALID_ARG_SIZE -51 #define CL_INVALID_KERNEL_ARGS -52 #define CL_INVALID_WORK_DIMENSION -53 #define CL_INVALID_WORK_GROUP_SIZE -54 #define CL_INVALID_WORK_ITEM_SIZE -55 #define CL_INVALID_GLOBAL_OFFSET -56 #define CL_INVALID_EVENT_WAIT_LIST -57 #define CL_INVALID_EVENT -58 #define CL_INVALID_OPERATION -59 #define CL_INVALID_GL_OBJECT -60 #define CL_INVALID_BUFFER_SIZE -61 #define CL_INVALID_MIP_LEVEL -62 #define CL_INVALID_GLOBAL_WORK_SIZE -63 #define CL_INVALID_PROPERTY -64 #define CL_INVALID_IMAGE_DESCRIPTOR -65 #define CL_INVALID_COMPILER_OPTIONS -66 #define CL_INVALID_LINKER_OPTIONS -67 #define CL_INVALID_DEVICE_PARTITION_COUNT -68 /* OpenCL Version */ #define CL_VERSION_1_0 1 #define CL_VERSION_1_1 1 #define CL_VERSION_1_2 1 /* cl_bool */ #define CL_FALSE 0 #define CL_TRUE 1 #define CL_BLOCKING CL_TRUE #define CL_NON_BLOCKING CL_FALSE /* cl_platform_info */ #define CL_PLATFORM_PROFILE 0x0900 #define CL_PLATFORM_VERSION 0x0901 #define CL_PLATFORM_NAME 0x0902 #define CL_PLATFORM_VENDOR 0x0903 #define CL_PLATFORM_EXTENSIONS 0x0904 /* cl_device_type - bitfield */ #define CL_DEVICE_TYPE_DEFAULT (1 << 0) #define CL_DEVICE_TYPE_CPU (1 << 1) #define CL_DEVICE_TYPE_GPU (1 << 2) #define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) #define CL_DEVICE_TYPE_CUSTOM (1 << 4) #define CL_DEVICE_TYPE_ALL 0xFFFFFFFF /* cl_device_info */ #define CL_DEVICE_TYPE 0x1000 #define CL_DEVICE_VENDOR_ID 0x1001 #define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002 #define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003 #define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004 #define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B #define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C #define CL_DEVICE_ADDRESS_BITS 0x100D #define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E #define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F #define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010 #define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011 #define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012 #define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013 #define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014 #define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015 #define CL_DEVICE_IMAGE_SUPPORT 0x1016 #define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017 #define CL_DEVICE_MAX_SAMPLERS 0x1018 #define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019 #define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A #define CL_DEVICE_SINGLE_FP_CONFIG 0x101B #define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C #define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D #define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E #define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F #define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020 #define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021 #define CL_DEVICE_LOCAL_MEM_TYPE 0x1022 #define CL_DEVICE_LOCAL_MEM_SIZE 0x1023 #define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024 #define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025 #define CL_DEVICE_ENDIAN_LITTLE 0x1026 #define CL_DEVICE_AVAILABLE 0x1027 #define CL_DEVICE_COMPILER_AVAILABLE 0x1028 #define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029 #define CL_DEVICE_QUEUE_PROPERTIES 0x102A #define CL_DEVICE_NAME 0x102B #define CL_DEVICE_VENDOR 0x102C #define CL_DRIVER_VERSION 0x102D #define CL_DEVICE_PROFILE 0x102E #define CL_DEVICE_VERSION 0x102F #define CL_DEVICE_EXTENSIONS 0x1030 #define CL_DEVICE_PLATFORM 0x1031 #define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 /* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */ #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034 #define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A #define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B #define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C #define CL_DEVICE_OPENCL_C_VERSION 0x103D #define CL_DEVICE_LINKER_AVAILABLE 0x103E #define CL_DEVICE_BUILT_IN_KERNELS 0x103F #define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040 #define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041 #define CL_DEVICE_PARENT_DEVICE 0x1042 #define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043 #define CL_DEVICE_PARTITION_PROPERTIES 0x1044 #define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045 #define CL_DEVICE_PARTITION_TYPE 0x1046 #define CL_DEVICE_REFERENCE_COUNT 0x1047 #define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048 #define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049 #define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A #define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B /* cl_device_fp_config - bitfield */ #define CL_FP_DENORM (1 << 0) #define CL_FP_INF_NAN (1 << 1) #define CL_FP_ROUND_TO_NEAREST (1 << 2) #define CL_FP_ROUND_TO_ZERO (1 << 3) #define CL_FP_ROUND_TO_INF (1 << 4) #define CL_FP_FMA (1 << 5) #define CL_FP_SOFT_FLOAT (1 << 6) #define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7) /* cl_device_mem_cache_type */ #define CL_NONE 0x0 #define CL_READ_ONLY_CACHE 0x1 #define CL_READ_WRITE_CACHE 0x2 /* cl_device_local_mem_type */ #define CL_LOCAL 0x1 #define CL_GLOBAL 0x2 /* cl_device_exec_capabilities - bitfield */ #define CL_EXEC_KERNEL (1 << 0) #define CL_EXEC_NATIVE_KERNEL (1 << 1) /* cl_command_queue_properties - bitfield */ #define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0) #define CL_QUEUE_PROFILING_ENABLE (1 << 1) /* cl_context_info */ #define CL_CONTEXT_REFERENCE_COUNT 0x1080 #define CL_CONTEXT_DEVICES 0x1081 #define CL_CONTEXT_PROPERTIES 0x1082 #define CL_CONTEXT_NUM_DEVICES 0x1083 /* cl_context_properties */ #define CL_CONTEXT_PLATFORM 0x1084 #define CL_CONTEXT_INTEROP_USER_SYNC 0x1085 /* cl_device_partition_property */ #define CL_DEVICE_PARTITION_EQUALLY 0x1086 #define CL_DEVICE_PARTITION_BY_COUNTS 0x1087 #define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0 #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088 /* cl_device_affinity_domain */ #define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0) #define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1) #define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2) #define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3) #define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4) #define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5) /* cl_command_queue_info */ #define CL_QUEUE_CONTEXT 0x1090 #define CL_QUEUE_DEVICE 0x1091 #define CL_QUEUE_REFERENCE_COUNT 0x1092 #define CL_QUEUE_PROPERTIES 0x1093 /* cl_mem_flags - bitfield */ #define CL_MEM_READ_WRITE (1 << 0) #define CL_MEM_WRITE_ONLY (1 << 1) #define CL_MEM_READ_ONLY (1 << 2) #define CL_MEM_USE_HOST_PTR (1 << 3) #define CL_MEM_ALLOC_HOST_PTR (1 << 4) #define CL_MEM_COPY_HOST_PTR (1 << 5) // reserved (1 << 6) #define CL_MEM_HOST_WRITE_ONLY (1 << 7) #define CL_MEM_HOST_READ_ONLY (1 << 8) #define CL_MEM_HOST_NO_ACCESS (1 << 9) /* cl_mem_migration_flags - bitfield */ #define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0) #define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1) /* cl_channel_order */ #define CL_R 0x10B0 #define CL_A 0x10B1 #define CL_RG 0x10B2 #define CL_RA 0x10B3 #define CL_RGB 0x10B4 #define CL_RGBA 0x10B5 #define CL_BGRA 0x10B6 #define CL_ARGB 0x10B7 #define CL_INTENSITY 0x10B8 #define CL_LUMINANCE 0x10B9 #define CL_Rx 0x10BA #define CL_RGx 0x10BB #define CL_RGBx 0x10BC #define CL_DEPTH 0x10BD #define CL_DEPTH_STENCIL 0x10BE /* cl_channel_type */ #define CL_SNORM_INT8 0x10D0 #define CL_SNORM_INT16 0x10D1 #define CL_UNORM_INT8 0x10D2 #define CL_UNORM_INT16 0x10D3 #define CL_UNORM_SHORT_565 0x10D4 #define CL_UNORM_SHORT_555 0x10D5 #define CL_UNORM_INT_101010 0x10D6 #define CL_SIGNED_INT8 0x10D7 #define CL_SIGNED_INT16 0x10D8 #define CL_SIGNED_INT32 0x10D9 #define CL_UNSIGNED_INT8 0x10DA #define CL_UNSIGNED_INT16 0x10DB #define CL_UNSIGNED_INT32 0x10DC #define CL_HALF_FLOAT 0x10DD #define CL_FLOAT 0x10DE #define CL_UNORM_INT24 0x10DF /* cl_mem_object_type */ #define CL_MEM_OBJECT_BUFFER 0x10F0 #define CL_MEM_OBJECT_IMAGE2D 0x10F1 #define CL_MEM_OBJECT_IMAGE3D 0x10F2 #define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3 #define CL_MEM_OBJECT_IMAGE1D 0x10F4 #define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5 #define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6 /* cl_mem_info */ #define CL_MEM_TYPE 0x1100 #define CL_MEM_FLAGS 0x1101 #define CL_MEM_SIZE 0x1102 #define CL_MEM_HOST_PTR 0x1103 #define CL_MEM_MAP_COUNT 0x1104 #define CL_MEM_REFERENCE_COUNT 0x1105 #define CL_MEM_CONTEXT 0x1106 #define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107 #define CL_MEM_OFFSET 0x1108 /* cl_image_info */ #define CL_IMAGE_FORMAT 0x1110 #define CL_IMAGE_ELEMENT_SIZE 0x1111 #define CL_IMAGE_ROW_PITCH 0x1112 #define CL_IMAGE_SLICE_PITCH 0x1113 #define CL_IMAGE_WIDTH 0x1114 #define CL_IMAGE_HEIGHT 0x1115 #define CL_IMAGE_DEPTH 0x1116 #define CL_IMAGE_ARRAY_SIZE 0x1117 #define CL_IMAGE_BUFFER 0x1118 #define CL_IMAGE_NUM_MIP_LEVELS 0x1119 #define CL_IMAGE_NUM_SAMPLES 0x111A /* cl_addressing_mode */ #define CL_ADDRESS_NONE 0x1130 #define CL_ADDRESS_CLAMP_TO_EDGE 0x1131 #define CL_ADDRESS_CLAMP 0x1132 #define CL_ADDRESS_REPEAT 0x1133 #define CL_ADDRESS_MIRRORED_REPEAT 0x1134 /* cl_filter_mode */ #define CL_FILTER_NEAREST 0x1140 #define CL_FILTER_LINEAR 0x1141 /* cl_sampler_info */ #define CL_SAMPLER_REFERENCE_COUNT 0x1150 #define CL_SAMPLER_CONTEXT 0x1151 #define CL_SAMPLER_NORMALIZED_COORDS 0x1152 #define CL_SAMPLER_ADDRESSING_MODE 0x1153 #define CL_SAMPLER_FILTER_MODE 0x1154 /* cl_map_flags - bitfield */ #define CL_MAP_READ (1 << 0) #define CL_MAP_WRITE (1 << 1) #define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2) /* cl_program_info */ #define CL_PROGRAM_REFERENCE_COUNT 0x1160 #define CL_PROGRAM_CONTEXT 0x1161 #define CL_PROGRAM_NUM_DEVICES 0x1162 #define CL_PROGRAM_DEVICES 0x1163 #define CL_PROGRAM_SOURCE 0x1164 #define CL_PROGRAM_BINARY_SIZES 0x1165 #define CL_PROGRAM_BINARIES 0x1166 #define CL_PROGRAM_NUM_KERNELS 0x1167 #define CL_PROGRAM_KERNEL_NAMES 0x1168 /* cl_program_build_info */ #define CL_PROGRAM_BUILD_STATUS 0x1181 #define CL_PROGRAM_BUILD_OPTIONS 0x1182 #define CL_PROGRAM_BUILD_LOG 0x1183 #define CL_PROGRAM_BINARY_TYPE 0x1184 /* cl_program_binary_type */ #define CL_PROGRAM_BINARY_TYPE_NONE 0x0 #define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1 #define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2 #define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4 /* cl_build_status */ #define CL_BUILD_SUCCESS 0 #define CL_BUILD_NONE -1 #define CL_BUILD_ERROR -2 #define CL_BUILD_IN_PROGRESS -3 /* cl_kernel_info */ #define CL_KERNEL_FUNCTION_NAME 0x1190 #define CL_KERNEL_NUM_ARGS 0x1191 #define CL_KERNEL_REFERENCE_COUNT 0x1192 #define CL_KERNEL_CONTEXT 0x1193 #define CL_KERNEL_PROGRAM 0x1194 #define CL_KERNEL_ATTRIBUTES 0x1195 /* cl_kernel_arg_info */ #define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196 #define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197 #define CL_KERNEL_ARG_TYPE_NAME 0x1198 #define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199 #define CL_KERNEL_ARG_NAME 0x119A /* cl_kernel_arg_address_qualifier */ #define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B #define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C #define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D #define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E /* cl_kernel_arg_access_qualifier */ #define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0 #define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1 #define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2 #define CL_KERNEL_ARG_ACCESS_NONE 0x11A3 /* cl_kernel_arg_type_qualifer */ #define CL_KERNEL_ARG_TYPE_NONE 0 #define CL_KERNEL_ARG_TYPE_CONST (1 << 0) #define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1) #define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2) /* cl_kernel_work_group_info */ #define CL_KERNEL_WORK_GROUP_SIZE 0x11B0 #define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1 #define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2 #define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3 #define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4 #define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5 /* cl_event_info */ #define CL_EVENT_COMMAND_QUEUE 0x11D0 #define CL_EVENT_COMMAND_TYPE 0x11D1 #define CL_EVENT_REFERENCE_COUNT 0x11D2 #define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3 #define CL_EVENT_CONTEXT 0x11D4 /* cl_command_type */ #define CL_COMMAND_NDRANGE_KERNEL 0x11F0 #define CL_COMMAND_TASK 0x11F1 #define CL_COMMAND_NATIVE_KERNEL 0x11F2 #define CL_COMMAND_READ_BUFFER 0x11F3 #define CL_COMMAND_WRITE_BUFFER 0x11F4 #define CL_COMMAND_COPY_BUFFER 0x11F5 #define CL_COMMAND_READ_IMAGE 0x11F6 #define CL_COMMAND_WRITE_IMAGE 0x11F7 #define CL_COMMAND_COPY_IMAGE 0x11F8 #define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9 #define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA #define CL_COMMAND_MAP_BUFFER 0x11FB #define CL_COMMAND_MAP_IMAGE 0x11FC #define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD #define CL_COMMAND_MARKER 0x11FE #define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF #define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200 #define CL_COMMAND_READ_BUFFER_RECT 0x1201 #define CL_COMMAND_WRITE_BUFFER_RECT 0x1202 #define CL_COMMAND_COPY_BUFFER_RECT 0x1203 #define CL_COMMAND_USER 0x1204 #define CL_COMMAND_BARRIER 0x1205 #define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206 #define CL_COMMAND_FILL_BUFFER 0x1207 #define CL_COMMAND_FILL_IMAGE 0x1208 /* command execution status */ #define CL_COMPLETE 0x0 #define CL_RUNNING 0x1 #define CL_SUBMITTED 0x2 #define CL_QUEUED 0x3 /* cl_buffer_create_type */ #define CL_BUFFER_CREATE_TYPE_REGION 0x1220 /* cl_profiling_info */ #define CL_PROFILING_COMMAND_QUEUED 0x1280 #define CL_PROFILING_COMMAND_SUBMIT 0x1281 #define CL_PROFILING_COMMAND_START 0x1282 #define CL_PROFILING_COMMAND_END 0x1283 /********************************************************************************************************/ /* Platform API */ extern CL_API_ENTRY cl_int CL_API_CALL clGetPlatformIDs(cl_uint /* num_entries */, cl_platform_id * /* platforms */, cl_uint * /* num_platforms */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetPlatformInfo(cl_platform_id /* platform */, cl_platform_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Device APIs */ extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDs(cl_platform_id /* platform */, cl_device_type /* device_type */, cl_uint /* num_entries */, cl_device_id * /* devices */, cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo(cl_device_id /* device */, cl_device_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clCreateSubDevices(cl_device_id /* in_device */, const cl_device_partition_property * /* properties */, cl_uint /* num_devices */, cl_device_id * /* out_devices */, cl_uint * /* num_devices_ret */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clRetainDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2; /* Context APIs */ extern CL_API_ENTRY cl_context CL_API_CALL clCreateContext(const cl_context_properties * /* properties */, cl_uint /* num_devices */, const cl_device_id * /* devices */, void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *), void * /* user_data */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(const cl_context_properties * /* properties */, cl_device_type /* device_type */, void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *), void * /* user_data */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetContextInfo(cl_context /* context */, cl_context_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Command Queue APIs */ extern CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueue(cl_context /* context */, cl_device_id /* device */, cl_command_queue_properties /* properties */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clRetainCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetCommandQueueInfo(cl_command_queue /* command_queue */, cl_command_queue_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Memory Object APIs */ extern CL_API_ENTRY cl_mem CL_API_CALL clCreateBuffer(cl_context /* context */, cl_mem_flags /* flags */, size_t /* size */, void * /* host_ptr */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBuffer(cl_mem /* buffer */, cl_mem_flags /* flags */, cl_buffer_create_type /* buffer_create_type */, const void * /* buffer_create_info */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateImage(cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format * /* image_format */, const cl_image_desc * /* image_desc */, void * /* host_ptr */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clRetainMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetSupportedImageFormats(cl_context /* context */, cl_mem_flags /* flags */, cl_mem_object_type /* image_type */, cl_uint /* num_entries */, cl_image_format * /* image_formats */, cl_uint * /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetMemObjectInfo(cl_mem /* memobj */, cl_mem_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetImageInfo(cl_mem /* image */, cl_image_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clSetMemObjectDestructorCallback( cl_mem /* memobj */, void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), void * /*user_data */ ) CL_API_SUFFIX__VERSION_1_1; /* Sampler APIs */ extern CL_API_ENTRY cl_sampler CL_API_CALL clCreateSampler(cl_context /* context */, cl_bool /* normalized_coords */, cl_addressing_mode /* addressing_mode */, cl_filter_mode /* filter_mode */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clRetainSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetSamplerInfo(cl_sampler /* sampler */, cl_sampler_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Program Object APIs */ extern CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithSource(cl_context /* context */, cl_uint /* count */, const char ** /* strings */, const size_t * /* lengths */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinary(cl_context /* context */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, const size_t * /* lengths */, const unsigned char ** /* binaries */, cl_int * /* binary_status */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBuiltInKernels(cl_context /* context */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, const char * /* kernel_names */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clBuildProgram(cl_program /* program */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, const char * /* options */, void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), void * /* user_data */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clCompileProgram(cl_program /* program */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, const char * /* options */, cl_uint /* num_input_headers */, const cl_program * /* input_headers */, const char ** /* header_include_names */, void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), void * /* user_data */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_program CL_API_CALL clLinkProgram(cl_context /* context */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, const char * /* options */, cl_uint /* num_input_programs */, const cl_program * /* input_programs */, void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), void * /* user_data */, cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clUnloadPlatformCompiler(cl_platform_id /* platform */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfo(cl_program /* program */, cl_program_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetProgramBuildInfo(cl_program /* program */, cl_device_id /* device */, cl_program_build_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Kernel Object APIs */ extern CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernel(cl_program /* program */, const char * /* kernel_name */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clCreateKernelsInProgram(cl_program /* program */, cl_uint /* num_kernels */, cl_kernel * /* kernels */, cl_uint * /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clRetainKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clSetKernelArg(cl_kernel /* kernel */, cl_uint /* arg_index */, size_t /* arg_size */, const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetKernelInfo(cl_kernel /* kernel */, cl_kernel_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetKernelArgInfo(cl_kernel /* kernel */, cl_uint /* arg_indx */, cl_kernel_arg_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clGetKernelWorkGroupInfo(cl_kernel /* kernel */, cl_device_id /* device */, cl_kernel_work_group_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Event Object APIs */ extern CL_API_ENTRY cl_int CL_API_CALL clWaitForEvents(cl_uint /* num_events */, const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetEventInfo(cl_event /* event */, cl_event_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_event CL_API_CALL clCreateUserEvent(cl_context /* context */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_int CL_API_CALL clRetainEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clReleaseEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clSetUserEventStatus(cl_event /* event */, cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_int CL_API_CALL clSetEventCallback( cl_event /* event */, cl_int /* command_exec_callback_type */, void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), void * /* user_data */) CL_API_SUFFIX__VERSION_1_1; /* Profiling APIs */ extern CL_API_ENTRY cl_int CL_API_CALL clGetEventProfilingInfo(cl_event /* event */, cl_profiling_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Flush and Finish APIs */ extern CL_API_ENTRY cl_int CL_API_CALL clFlush(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; /* Enqueued Commands APIs */ extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBuffer(cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */, size_t /* offset */, size_t /* size */, void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBufferRect(cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */, const size_t * /* buffer_offset */, const size_t * /* host_offset */, const size_t * /* region */, size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, size_t /* host_row_pitch */, size_t /* host_slice_pitch */, void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBuffer(cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_write */, size_t /* offset */, size_t /* size */, const void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBufferRect(cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_write */, const size_t * /* buffer_offset */, const size_t * /* host_offset */, const size_t * /* region */, size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, size_t /* host_row_pitch */, size_t /* host_slice_pitch */, const void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillBuffer(cl_command_queue /* command_queue */, cl_mem /* buffer */, const void * /* pattern */, size_t /* pattern_size */, size_t /* offset */, size_t /* size */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBuffer(cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */, size_t /* src_offset */, size_t /* dst_offset */, size_t /* size */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferRect(cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */, const size_t * /* src_origin */, const size_t * /* dst_origin */, const size_t * /* region */, size_t /* src_row_pitch */, size_t /* src_slice_pitch */, size_t /* dst_row_pitch */, size_t /* dst_slice_pitch */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadImage(cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_read */, const size_t * /* origin[3] */, const size_t * /* region[3] */, size_t /* row_pitch */, size_t /* slice_pitch */, void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteImage(cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_write */, const size_t * /* origin[3] */, const size_t * /* region[3] */, size_t /* input_row_pitch */, size_t /* input_slice_pitch */, const void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillImage(cl_command_queue /* command_queue */, cl_mem /* image */, const void * /* fill_color */, const size_t * /* origin[3] */, const size_t * /* region[3] */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImage(cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_image */, const size_t * /* src_origin[3] */, const size_t * /* dst_origin[3] */, const size_t * /* region[3] */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImageToBuffer(cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_buffer */, const size_t * /* src_origin[3] */, const size_t * /* region[3] */, size_t /* dst_offset */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferToImage(cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_image */, size_t /* src_offset */, const size_t * /* dst_origin[3] */, const size_t * /* region[3] */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY void * CL_API_CALL clEnqueueMapBuffer(cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_map */, cl_map_flags /* map_flags */, size_t /* offset */, size_t /* size */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY void * CL_API_CALL clEnqueueMapImage(cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_map */, cl_map_flags /* map_flags */, const size_t * /* origin[3] */, const size_t * /* region[3] */, size_t * /* image_row_pitch */, size_t * /* image_slice_pitch */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueUnmapMemObject(cl_command_queue /* command_queue */, cl_mem /* memobj */, void * /* mapped_ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjects(cl_command_queue /* command_queue */, cl_uint /* num_mem_objects */, const cl_mem * /* mem_objects */, cl_mem_migration_flags /* flags */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue /* command_queue */, cl_kernel /* kernel */, cl_uint /* work_dim */, const size_t * /* global_work_offset */, const size_t * /* global_work_size */, const size_t * /* local_work_size */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueTask(cl_command_queue /* command_queue */, cl_kernel /* kernel */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueNativeKernel(cl_command_queue /* command_queue */, void (CL_CALLBACK * /*user_func*/)(void *), void * /* args */, size_t /* cb_args */, cl_uint /* num_mem_objects */, const cl_mem * /* mem_list */, const void ** /* args_mem_loc */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueMarkerWithWaitList(cl_command_queue /* command_queue */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueBarrierWithWaitList(cl_command_queue /* command_queue */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; /* Extension function access * * Returns the extension function address for the given function name, * or NULL if a valid function can not be found. The client must * check to make sure the address is not NULL, before using or * calling the returned function address. */ extern CL_API_ENTRY void * CL_API_CALL clGetExtensionFunctionAddressForPlatform(cl_platform_id /* platform */, const char * /* func_name */) CL_API_SUFFIX__VERSION_1_2; // Deprecated OpenCL 1.1 APIs extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL clCreateImage2D(cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format * /* image_format */, size_t /* image_width */, size_t /* image_height */, size_t /* image_row_pitch */, void * /* host_ptr */, cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL clCreateImage3D(cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format * /* image_format */, size_t /* image_width */, size_t /* image_height */, size_t /* image_depth */, size_t /* image_row_pitch */, size_t /* image_slice_pitch */, void * /* host_ptr */, cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL clEnqueueMarker(cl_command_queue /* command_queue */, cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL clEnqueueWaitForEvents(cl_command_queue /* command_queue */, cl_uint /* num_events */, const cl_event * /* event_list */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL clEnqueueBarrier(cl_command_queue /* command_queue */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL clUnloadCompiler(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL clGetExtensionFunctionAddress(const char * /* func_name */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; #ifdef __cplusplus } #endif #endif /* __OPENCL_CL_H */ Oclgrind-15.5/src/CL/cl_d3d10.h000066400000000000000000000113731252441671000157430ustar00rootroot00000000000000/********************************************************************************** * Copyright (c) 2008-2012 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the * "Materials"), to deal in the Materials without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Materials, and to * permit persons to whom the Materials are furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. **********************************************************************************/ /* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ #ifndef __OPENCL_CL_D3D10_H #define __OPENCL_CL_D3D10_H #include #include #include #ifdef __cplusplus extern "C" { #endif /****************************************************************************** * cl_khr_d3d10_sharing */ #define cl_khr_d3d10_sharing 1 typedef cl_uint cl_d3d10_device_source_khr; typedef cl_uint cl_d3d10_device_set_khr; /******************************************************************************/ // Error Codes #define CL_INVALID_D3D10_DEVICE_KHR -1002 #define CL_INVALID_D3D10_RESOURCE_KHR -1003 #define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004 #define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005 // cl_d3d10_device_source_nv #define CL_D3D10_DEVICE_KHR 0x4010 #define CL_D3D10_DXGI_ADAPTER_KHR 0x4011 // cl_d3d10_device_set_nv #define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012 #define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013 // cl_context_info #define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014 #define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C // cl_mem_info #define CL_MEM_D3D10_RESOURCE_KHR 0x4015 // cl_image_info #define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016 // cl_command_type #define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017 #define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018 /******************************************************************************/ typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)( cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source, void * d3d_object, cl_d3d10_device_set_khr d3d_device_set, cl_uint num_entries, cl_device_id * devices, cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)( cl_context context, cl_mem_flags flags, ID3D10Buffer * resource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)( cl_context context, cl_mem_flags flags, ID3D10Texture2D * resource, UINT subresource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)( cl_context context, cl_mem_flags flags, ID3D10Texture3D * resource, UINT subresource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; #ifdef __cplusplus } #endif #endif // __OPENCL_CL_D3D10_H Oclgrind-15.5/src/CL/cl_d3d11.h000066400000000000000000000113651252441671000157450ustar00rootroot00000000000000/********************************************************************************** * Copyright (c) 2008-2012 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the * "Materials"), to deal in the Materials without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Materials, and to * permit persons to whom the Materials are furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. **********************************************************************************/ /* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ #ifndef __OPENCL_CL_D3D11_H #define __OPENCL_CL_D3D11_H #include #include #include #ifdef __cplusplus extern "C" { #endif /****************************************************************************** * cl_khr_d3d11_sharing */ #define cl_khr_d3d11_sharing 1 typedef cl_uint cl_d3d11_device_source_khr; typedef cl_uint cl_d3d11_device_set_khr; /******************************************************************************/ // Error Codes #define CL_INVALID_D3D11_DEVICE_KHR -1006 #define CL_INVALID_D3D11_RESOURCE_KHR -1007 #define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR -1008 #define CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR -1009 // cl_d3d11_device_source #define CL_D3D11_DEVICE_KHR 0x4019 #define CL_D3D11_DXGI_ADAPTER_KHR 0x401A // cl_d3d11_device_set #define CL_PREFERRED_DEVICES_FOR_D3D11_KHR 0x401B #define CL_ALL_DEVICES_FOR_D3D11_KHR 0x401C // cl_context_info #define CL_CONTEXT_D3D11_DEVICE_KHR 0x401D #define CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR 0x402D // cl_mem_info #define CL_MEM_D3D11_RESOURCE_KHR 0x401E // cl_image_info #define CL_IMAGE_D3D11_SUBRESOURCE_KHR 0x401F // cl_command_type #define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR 0x4020 #define CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR 0x4021 /******************************************************************************/ typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)( cl_platform_id platform, cl_d3d11_device_source_khr d3d_device_source, void * d3d_object, cl_d3d11_device_set_khr d3d_device_set, cl_uint num_entries, cl_device_id * devices, cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)( cl_context context, cl_mem_flags flags, ID3D11Buffer * resource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)( cl_context context, cl_mem_flags flags, ID3D11Texture2D * resource, UINT subresource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)( cl_context context, cl_mem_flags flags, ID3D11Texture3D * resource, UINT subresource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_2; #ifdef __cplusplus } #endif #endif // __OPENCL_CL_D3D11_H Oclgrind-15.5/src/CL/cl_dx9_media_sharing.h000066400000000000000000000120451252441671000205030ustar00rootroot00000000000000/********************************************************************************** * Copyright (c) 2008-2012 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the * "Materials"), to deal in the Materials without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Materials, and to * permit persons to whom the Materials are furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. **********************************************************************************/ /* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ #ifndef __OPENCL_CL_DX9_MEDIA_SHARING_H #define __OPENCL_CL_DX9_MEDIA_SHARING_H #include #include #ifdef __cplusplus extern "C" { #endif /****************************************************************************** /* cl_khr_dx9_media_sharing */ #define cl_khr_dx9_media_sharing 1 typedef cl_uint cl_dx9_media_adapter_type_khr; typedef cl_uint cl_dx9_media_adapter_set_khr; #if defined(_WIN32) #include typedef struct _cl_dx9_surface_info_khr { IDirect3DSurface9 *resource; HANDLE shared_handle; } cl_dx9_surface_info_khr; #endif /******************************************************************************/ // Error Codes #define CL_INVALID_DX9_MEDIA_ADAPTER_KHR -1010 #define CL_INVALID_DX9_MEDIA_SURFACE_KHR -1011 #define CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR -1012 #define CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR -1013 // cl_media_adapter_type_khr #define CL_ADAPTER_D3D9_KHR 0x2020 #define CL_ADAPTER_D3D9EX_KHR 0x2021 #define CL_ADAPTER_DXVA_KHR 0x2022 // cl_media_adapter_set_khr #define CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2023 #define CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2024 // cl_context_info #define CL_CONTEXT_ADAPTER_D3D9_KHR 0x2025 #define CL_CONTEXT_ADAPTER_D3D9EX_KHR 0x2026 #define CL_CONTEXT_ADAPTER_DXVA_KHR 0x2027 // cl_mem_info #define CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR 0x2028 #define CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR 0x2029 // cl_image_info #define CL_IMAGE_DX9_MEDIA_PLANE_KHR 0x202A // cl_command_type #define CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR 0x202B #define CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR 0x202C /******************************************************************************/ typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)( cl_platform_id platform, cl_uint num_media_adapters, cl_dx9_media_adapter_type_khr * media_adapter_type, void * media_adapters, cl_dx9_media_adapter_set_khr media_adapter_set, cl_uint num_entries, cl_device_id * devices, cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)( cl_context context, cl_mem_flags flags, cl_dx9_media_adapter_type_khr adapter_type, void * surface_info, cl_uint plane, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_2; #ifdef __cplusplus } #endif #endif // __OPENCL_CL_DX9_MEDIA_SHARING_H Oclgrind-15.5/src/CL/cl_egl.h000066400000000000000000000116541252441671000157010ustar00rootroot00000000000000/******************************************************************************* * Copyright (c) 2008-2010 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the * "Materials"), to deal in the Materials without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Materials, and to * permit persons to whom the Materials are furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. ******************************************************************************/ #ifndef __OPENCL_CL_EGL_H #define __OPENCL_CL_EGL_H #ifdef __APPLE__ #else #include #include #include #endif #ifdef __cplusplus extern "C" { #endif /* Command type for events created with clEnqueueAcquireEGLObjectsKHR */ #define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR 0x202F #define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR 0x202D #define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR 0x202E /* Error type for clCreateFromEGLImageKHR */ #define CL_INVALID_EGL_OBJECT_KHR -1093 #define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -1092 /* CLeglImageKHR is an opaque handle to an EGLImage */ typedef void* CLeglImageKHR; /* CLeglDisplayKHR is an opaque handle to an EGLDisplay */ typedef void* CLeglDisplayKHR; /* properties passed to clCreateFromEGLImageKHR */ typedef intptr_t cl_egl_image_properties_khr; #define cl_khr_egl_image 1 extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromEGLImageKHR(cl_context /* context */, CLeglDisplayKHR /* egldisplay */, CLeglImageKHR /* eglimage */, cl_mem_flags /* flags */, const cl_egl_image_properties_khr * /* properties */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)( cl_context context, CLeglDisplayKHR egldisplay, CLeglImageKHR eglimage, cl_mem_flags flags, const cl_egl_image_properties_khr * properties, cl_int * errcode_ret); extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireEGLObjectsKHR(cl_command_queue /* command_queue */, cl_uint /* num_objects */, const cl_mem * /* mem_objects */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event); extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseEGLObjectsKHR(cl_command_queue /* command_queue */, cl_uint /* num_objects */, const cl_mem * /* mem_objects */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event); #define cl_khr_egl_event 1 extern CL_API_ENTRY cl_event CL_API_CALL clCreateEventFromEGLSyncKHR(cl_context /* context */, EGLSyncKHR /* sync */, EGLDisplay /* display */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)( cl_context context, EGLSyncKHR sync, EGLDisplay display, cl_int * errcode_ret); #ifdef __cplusplus } #endif #endif /* __OPENCL_CL_EGL_H */ Oclgrind-15.5/src/CL/cl_ext.h000066400000000000000000000327061252441671000157330ustar00rootroot00000000000000/******************************************************************************* * Copyright (c) 2008-2013 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the * "Materials"), to deal in the Materials without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Materials, and to * permit persons to whom the Materials are furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. ******************************************************************************/ /* $Revision: 11928 $ on $Date: 2010-07-13 09:04:56 -0700 (Tue, 13 Jul 2010) $ */ /* cl_ext.h contains OpenCL extensions which don't have external */ /* (OpenGL, D3D) dependencies. */ #ifndef __CL_EXT_H #define __CL_EXT_H #ifdef __cplusplus extern "C" { #endif #ifdef __APPLE__ #include #include #else #include #endif /* cl_khr_fp16 extension - no extension #define since it has no functions */ #define CL_DEVICE_HALF_FP_CONFIG 0x1033 /* Memory object destruction * * Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR * * Registers a user callback function that will be called when the memory object is deleted and its resources * freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback * stack associated with memobj. The registered user callback functions are called in the reverse order in * which they were registered. The user callback functions are called and then the memory object is deleted * and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be * notified when the memory referenced by host_ptr, specified when the memory object is created and used as * the storage bits for the memory object, can be reused or freed. * * The application may not call CL api's with the cl_mem object passed to the pfn_notify. * * Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) * before using. */ #define cl_APPLE_SetMemObjectDestructor 1 cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem /* memobj */, void (* /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), void * /*user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; /* Context Logging Functions * * The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext(). * Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) * before using. * * clLogMessagesToSystemLog fowards on all log messages to the Apple System Logger */ #define cl_APPLE_ContextLoggingFunctions 1 extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * /* errstr */, const void * /* private_info */, size_t /* cb */, void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; /* clLogMessagesToStdout sends all log messages to the file descriptor stdout */ extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * /* errstr */, const void * /* private_info */, size_t /* cb */, void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; /* clLogMessagesToStderr sends all log messages to the file descriptor stderr */ extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * /* errstr */, const void * /* private_info */, size_t /* cb */, void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; /************************ * cl_khr_icd extension * ************************/ #define cl_khr_icd 1 /* cl_platform_info */ #define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920 /* Additional Error Codes */ #define CL_PLATFORM_NOT_FOUND_KHR -1001 extern CL_API_ENTRY cl_int CL_API_CALL clIcdGetPlatformIDsKHR(cl_uint /* num_entries */, cl_platform_id * /* platforms */, cl_uint * /* num_platforms */); typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)( cl_uint /* num_entries */, cl_platform_id * /* platforms */, cl_uint * /* num_platforms */); /* Extension: cl_khr_image2D_buffer * * This extension allows a 2D image to be created from a cl_mem buffer without a copy. * The type associated with a 2D image created from a buffer in an OpenCL program is image2d_t. * Both the sampler and sampler-less read_image built-in functions are supported for 2D images * and 2D images created from a buffer. Similarly, the write_image built-ins are also supported * for 2D images created from a buffer. * * When the 2D image from buffer is created, the client must specify the width, * height, image format (i.e. channel order and channel data type) and optionally the row pitch * * The pitch specified must be a multiple of CL_DEVICE_IMAGE_PITCH_ALIGNMENT pixels. * The base address of the buffer must be aligned to CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT pixels. */ /************************************* * cl_khr_initalize_memory extension * *************************************/ #define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x200E /************************************** * cl_khr_terminate_context extension * **************************************/ #define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x200F #define CL_CONTEXT_TERMINATE_KHR 0x2010 #define cl_khr_terminate_context 1 extern CL_API_ENTRY cl_int CL_API_CALL clTerminateContextKHR(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int (CL_API_CALL *clTerminateContextKHR_fn)(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2; /* * Extension: cl_khr_spir * * This extension adds support to create an OpenCL program object from a * Standard Portable Intermediate Representation (SPIR) instance */ #define CL_DEVICE_SPIR_VERSIONS 0x40E0 #define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE 0x40E1 /****************************************** * cl_nv_device_attribute_query extension * ******************************************/ /* cl_nv_device_attribute_query extension - no extension #define since it has no functions */ #define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 #define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 #define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002 #define CL_DEVICE_WARP_SIZE_NV 0x4003 #define CL_DEVICE_GPU_OVERLAP_NV 0x4004 #define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 #define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 /********************************* * cl_amd_device_attribute_query * *********************************/ #define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036 #ifdef CL_VERSION_1_1 /*********************************** * cl_ext_device_fission extension * ***********************************/ #define cl_ext_device_fission 1 extern CL_API_ENTRY cl_int CL_API_CALL clReleaseDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_int (CL_API_CALL *clReleaseDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_int CL_API_CALL clRetainDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_int (CL_API_CALL *clRetainDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; typedef cl_ulong cl_device_partition_property_ext; extern CL_API_ENTRY cl_int CL_API_CALL clCreateSubDevicesEXT( cl_device_id /*in_device*/, const cl_device_partition_property_ext * /* properties */, cl_uint /*num_entries*/, cl_device_id * /*out_devices*/, cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_int ( CL_API_CALL * clCreateSubDevicesEXT_fn)( cl_device_id /*in_device*/, const cl_device_partition_property_ext * /* properties */, cl_uint /*num_entries*/, cl_device_id * /*out_devices*/, cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; /* cl_device_partition_property_ext */ #define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050 #define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051 #define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052 #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053 /* clDeviceGetInfo selectors */ #define CL_DEVICE_PARENT_DEVICE_EXT 0x4054 #define CL_DEVICE_PARTITION_TYPES_EXT 0x4055 #define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056 #define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057 #define CL_DEVICE_PARTITION_STYLE_EXT 0x4058 /* error codes */ #define CL_DEVICE_PARTITION_FAILED_EXT -1057 #define CL_INVALID_PARTITION_COUNT_EXT -1058 #define CL_INVALID_PARTITION_NAME_EXT -1059 /* CL_AFFINITY_DOMAINs */ #define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1 #define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2 #define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3 #define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4 #define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10 #define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100 /* cl_device_partition_property_ext list terminators */ #define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0) #define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0) #define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1) /********************************* * cl_qcom_ext_host_ptr extension *********************************/ #define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29) #define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0 #define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1 #define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2 #define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3 #define CL_MEM_HOST_UNCACHED_QCOM 0x40A4 #define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5 #define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6 #define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7 typedef cl_uint cl_image_pitch_info_qcom; extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceImageInfoQCOM(cl_device_id device, size_t image_width, size_t image_height, const cl_image_format *image_format, cl_image_pitch_info_qcom param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret); typedef struct _cl_mem_ext_host_ptr { // Type of external memory allocation. // Legal values will be defined in layered extensions. cl_uint allocation_type; // Host cache policy for this external memory allocation. cl_uint host_cache_policy; } cl_mem_ext_host_ptr; /********************************* * cl_qcom_ion_host_ptr extension *********************************/ #define CL_MEM_ION_HOST_PTR_QCOM 0x40A8 typedef struct _cl_mem_ion_host_ptr { // Type of external memory allocation. // Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. cl_mem_ext_host_ptr ext_host_ptr; // ION file descriptor int ion_filedesc; // Host pointer to the ION allocated memory void* ion_hostptr; } cl_mem_ion_host_ptr; #endif /* CL_VERSION_1_1 */ #ifdef __cplusplus } #endif #endif /* __CL_EXT_H */ Oclgrind-15.5/src/CL/cl_gl.h000066400000000000000000000162571252441671000155400ustar00rootroot00000000000000/********************************************************************************** * Copyright (c) 2008 - 2012 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the * "Materials"), to deal in the Materials without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Materials, and to * permit persons to whom the Materials are furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. **********************************************************************************/ #ifndef __OPENCL_CL_GL_H #define __OPENCL_CL_GL_H #ifdef __APPLE__ #include #else #include #endif #ifdef __cplusplus extern "C" { #endif typedef cl_uint cl_gl_object_type; typedef cl_uint cl_gl_texture_info; typedef cl_uint cl_gl_platform_info; typedef struct __GLsync *cl_GLsync; /* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */ #define CL_GL_OBJECT_BUFFER 0x2000 #define CL_GL_OBJECT_TEXTURE2D 0x2001 #define CL_GL_OBJECT_TEXTURE3D 0x2002 #define CL_GL_OBJECT_RENDERBUFFER 0x2003 #define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E #define CL_GL_OBJECT_TEXTURE1D 0x200F #define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010 #define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011 /* cl_gl_texture_info */ #define CL_GL_TEXTURE_TARGET 0x2004 #define CL_GL_MIPMAP_LEVEL 0x2005 #define CL_GL_NUM_SAMPLES 0x2012 extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLBuffer(cl_context /* context */, cl_mem_flags /* flags */, cl_GLuint /* bufobj */, int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLTexture(cl_context /* context */, cl_mem_flags /* flags */, cl_GLenum /* target */, cl_GLint /* miplevel */, cl_GLuint /* texture */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLRenderbuffer(cl_context /* context */, cl_mem_flags /* flags */, cl_GLuint /* renderbuffer */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetGLObjectInfo(cl_mem /* memobj */, cl_gl_object_type * /* gl_object_type */, cl_GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetGLTextureInfo(cl_mem /* memobj */, cl_gl_texture_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireGLObjects(cl_command_queue /* command_queue */, cl_uint /* num_objects */, const cl_mem * /* mem_objects */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseGLObjects(cl_command_queue /* command_queue */, cl_uint /* num_objects */, const cl_mem * /* mem_objects */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; // Deprecated OpenCL 1.1 APIs extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL clCreateFromGLTexture2D(cl_context /* context */, cl_mem_flags /* flags */, cl_GLenum /* target */, cl_GLint /* miplevel */, cl_GLuint /* texture */, cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL clCreateFromGLTexture3D(cl_context /* context */, cl_mem_flags /* flags */, cl_GLenum /* target */, cl_GLint /* miplevel */, cl_GLuint /* texture */, cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; /* cl_khr_gl_sharing extension */ #define cl_khr_gl_sharing 1 typedef cl_uint cl_gl_context_info; /* Additional Error Codes */ #define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000 /* cl_gl_context_info */ #define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006 #define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007 /* Additional cl_context_properties */ #define CL_GL_CONTEXT_KHR 0x2008 #define CL_EGL_DISPLAY_KHR 0x2009 #define CL_GLX_DISPLAY_KHR 0x200A #define CL_WGL_HDC_KHR 0x200B #define CL_CGL_SHAREGROUP_KHR 0x200C extern CL_API_ENTRY cl_int CL_API_CALL clGetGLContextInfoKHR(const cl_context_properties * /* properties */, cl_gl_context_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)( const cl_context_properties * properties, cl_gl_context_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret); #ifdef __cplusplus } #endif #endif /* __OPENCL_CL_GL_H */ Oclgrind-15.5/src/CL/cl_gl_ext.h000066400000000000000000000051061252441671000164070ustar00rootroot00000000000000/********************************************************************************** * Copyright (c) 2008-2012 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the * "Materials"), to deal in the Materials without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Materials, and to * permit persons to whom the Materials are furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. **********************************************************************************/ /* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ /* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have */ /* OpenGL dependencies. */ #ifndef __OPENCL_CL_GL_EXT_H #define __OPENCL_CL_GL_EXT_H #ifdef __cplusplus extern "C" { #endif #ifdef __APPLE__ #include #else #include #endif /* * For each extension, follow this template * cl_VEN_extname extension */ /* #define cl_VEN_extname 1 * ... define new types, if any * ... define new tokens, if any * ... define new APIs, if any * * If you need GLtypes here, mirror them with a cl_GLtype, rather than including a GL header * This allows us to avoid having to decide whether to include GL headers or GLES here. */ /* * cl_khr_gl_event extension * See section 9.9 in the OpenCL 1.1 spec for more information */ #define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D extern CL_API_ENTRY cl_event CL_API_CALL clCreateEventFromGLsyncKHR(cl_context /* context */, cl_GLsync /* cl_GLsync */, cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1; #ifdef __cplusplus } #endif #endif /* __OPENCL_CL_GL_EXT_H */ Oclgrind-15.5/src/CL/cl_platform.h000066400000000000000000001205571252441671000167610ustar00rootroot00000000000000/********************************************************************************** * Copyright (c) 2008-2012 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the * "Materials"), to deal in the Materials without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Materials, and to * permit persons to whom the Materials are furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. **********************************************************************************/ /* $Revision: 11803 $ on $Date: 2010-06-25 10:02:12 -0700 (Fri, 25 Jun 2010) $ */ #ifndef __CL_PLATFORM_H #define __CL_PLATFORM_H #ifdef __APPLE__ /* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */ #include #endif #ifdef __cplusplus extern "C" { #endif #if defined(_WIN32) #define CL_API_ENTRY #define CL_API_CALL __stdcall #define CL_CALLBACK __stdcall #else #define CL_API_ENTRY #define CL_API_CALL #define CL_CALLBACK #endif #ifdef __APPLE__ #define CL_EXTENSION_WEAK_LINK __attribute__((weak_import)) #define CL_API_SUFFIX__VERSION_1_0 AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER #define CL_EXT_SUFFIX__VERSION_1_0 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER #define CL_API_SUFFIX__VERSION_1_1 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER #define GCL_API_SUFFIX__VERSION_1_1 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER #define CL_EXT_SUFFIX__VERSION_1_1 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 #ifdef AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER #define CL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER #define GCL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER #define CL_EXT_SUFFIX__VERSION_1_2 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 #else #warning This path should never happen outside of internal operating system development. AvailabilityMacros do not function correctly here! #define CL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER #define GCL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER #define CL_EXT_SUFFIX__VERSION_1_2 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER #endif #else #define CL_EXTENSION_WEAK_LINK #define CL_API_SUFFIX__VERSION_1_0 #define CL_EXT_SUFFIX__VERSION_1_0 #define CL_API_SUFFIX__VERSION_1_1 #define CL_EXT_SUFFIX__VERSION_1_1 #define CL_API_SUFFIX__VERSION_1_2 #define CL_EXT_SUFFIX__VERSION_1_2 #ifdef __GNUC__ #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED #else #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED __attribute__((deprecated)) #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED #endif #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED #else #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED __attribute__((deprecated)) #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED #endif #elif _WIN32 #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED #else #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED __declspec(deprecated) #endif #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED #else #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED __declspec(deprecated) #endif #else #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED #endif #endif #if (defined (_WIN32) && defined(_MSC_VER)) /* scalar types */ typedef signed __int8 cl_char; typedef unsigned __int8 cl_uchar; typedef signed __int16 cl_short; typedef unsigned __int16 cl_ushort; typedef signed __int32 cl_int; typedef unsigned __int32 cl_uint; typedef signed __int64 cl_long; typedef unsigned __int64 cl_ulong; typedef unsigned __int16 cl_half; typedef float cl_float; typedef double cl_double; /* Macro names and corresponding values defined by OpenCL */ #define CL_CHAR_BIT 8 #define CL_SCHAR_MAX 127 #define CL_SCHAR_MIN (-127-1) #define CL_CHAR_MAX CL_SCHAR_MAX #define CL_CHAR_MIN CL_SCHAR_MIN #define CL_UCHAR_MAX 255 #define CL_SHRT_MAX 32767 #define CL_SHRT_MIN (-32767-1) #define CL_USHRT_MAX 65535 #define CL_INT_MAX 2147483647 #define CL_INT_MIN (-2147483647-1) #define CL_UINT_MAX 0xffffffffU #define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) #define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) #define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) #define CL_FLT_DIG 6 #define CL_FLT_MANT_DIG 24 #define CL_FLT_MAX_10_EXP +38 #define CL_FLT_MAX_EXP +128 #define CL_FLT_MIN_10_EXP -37 #define CL_FLT_MIN_EXP -125 #define CL_FLT_RADIX 2 #define CL_FLT_MAX 340282346638528859811704183484516925440.0f #define CL_FLT_MIN 1.175494350822287507969e-38f #define CL_FLT_EPSILON 0x1.0p-23f #define CL_DBL_DIG 15 #define CL_DBL_MANT_DIG 53 #define CL_DBL_MAX_10_EXP +308 #define CL_DBL_MAX_EXP +1024 #define CL_DBL_MIN_10_EXP -307 #define CL_DBL_MIN_EXP -1021 #define CL_DBL_RADIX 2 #define CL_DBL_MAX 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0 #define CL_DBL_MIN 2.225073858507201383090e-308 #define CL_DBL_EPSILON 2.220446049250313080847e-16 #define CL_M_E 2.718281828459045090796 #define CL_M_LOG2E 1.442695040888963387005 #define CL_M_LOG10E 0.434294481903251816668 #define CL_M_LN2 0.693147180559945286227 #define CL_M_LN10 2.302585092994045901094 #define CL_M_PI 3.141592653589793115998 #define CL_M_PI_2 1.570796326794896557999 #define CL_M_PI_4 0.785398163397448278999 #define CL_M_1_PI 0.318309886183790691216 #define CL_M_2_PI 0.636619772367581382433 #define CL_M_2_SQRTPI 1.128379167095512558561 #define CL_M_SQRT2 1.414213562373095145475 #define CL_M_SQRT1_2 0.707106781186547572737 #define CL_M_E_F 2.71828174591064f #define CL_M_LOG2E_F 1.44269502162933f #define CL_M_LOG10E_F 0.43429449200630f #define CL_M_LN2_F 0.69314718246460f #define CL_M_LN10_F 2.30258512496948f #define CL_M_PI_F 3.14159274101257f #define CL_M_PI_2_F 1.57079637050629f #define CL_M_PI_4_F 0.78539818525314f #define CL_M_1_PI_F 0.31830987334251f #define CL_M_2_PI_F 0.63661974668503f #define CL_M_2_SQRTPI_F 1.12837922573090f #define CL_M_SQRT2_F 1.41421353816986f #define CL_M_SQRT1_2_F 0.70710676908493f #define CL_NAN (CL_INFINITY - CL_INFINITY) #define CL_HUGE_VALF ((cl_float) 1e50) #define CL_HUGE_VAL ((cl_double) 1e500) #define CL_MAXFLOAT CL_FLT_MAX #define CL_INFINITY CL_HUGE_VALF #else #include /* scalar types */ typedef int8_t cl_char; typedef uint8_t cl_uchar; typedef int16_t cl_short __attribute__((aligned(2))); typedef uint16_t cl_ushort __attribute__((aligned(2))); typedef int32_t cl_int __attribute__((aligned(4))); typedef uint32_t cl_uint __attribute__((aligned(4))); typedef int64_t cl_long __attribute__((aligned(8))); typedef uint64_t cl_ulong __attribute__((aligned(8))); typedef uint16_t cl_half __attribute__((aligned(2))); typedef float cl_float __attribute__((aligned(4))); typedef double cl_double __attribute__((aligned(8))); /* Macro names and corresponding values defined by OpenCL */ #define CL_CHAR_BIT 8 #define CL_SCHAR_MAX 127 #define CL_SCHAR_MIN (-127-1) #define CL_CHAR_MAX CL_SCHAR_MAX #define CL_CHAR_MIN CL_SCHAR_MIN #define CL_UCHAR_MAX 255 #define CL_SHRT_MAX 32767 #define CL_SHRT_MIN (-32767-1) #define CL_USHRT_MAX 65535 #define CL_INT_MAX 2147483647 #define CL_INT_MIN (-2147483647-1) #define CL_UINT_MAX 0xffffffffU #define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) #define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) #define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) #define CL_FLT_DIG 6 #define CL_FLT_MANT_DIG 24 #define CL_FLT_MAX_10_EXP +38 #define CL_FLT_MAX_EXP +128 #define CL_FLT_MIN_10_EXP -37 #define CL_FLT_MIN_EXP -125 #define CL_FLT_RADIX 2 #define CL_FLT_MAX 0x1.fffffep127f #define CL_FLT_MIN 0x1.0p-126f #define CL_FLT_EPSILON 0x1.0p-23f #define CL_DBL_DIG 15 #define CL_DBL_MANT_DIG 53 #define CL_DBL_MAX_10_EXP +308 #define CL_DBL_MAX_EXP +1024 #define CL_DBL_MIN_10_EXP -307 #define CL_DBL_MIN_EXP -1021 #define CL_DBL_RADIX 2 #define CL_DBL_MAX 0x1.fffffffffffffp1023 #define CL_DBL_MIN 0x1.0p-1022 #define CL_DBL_EPSILON 0x1.0p-52 #define CL_M_E 2.718281828459045090796 #define CL_M_LOG2E 1.442695040888963387005 #define CL_M_LOG10E 0.434294481903251816668 #define CL_M_LN2 0.693147180559945286227 #define CL_M_LN10 2.302585092994045901094 #define CL_M_PI 3.141592653589793115998 #define CL_M_PI_2 1.570796326794896557999 #define CL_M_PI_4 0.785398163397448278999 #define CL_M_1_PI 0.318309886183790691216 #define CL_M_2_PI 0.636619772367581382433 #define CL_M_2_SQRTPI 1.128379167095512558561 #define CL_M_SQRT2 1.414213562373095145475 #define CL_M_SQRT1_2 0.707106781186547572737 #define CL_M_E_F 2.71828174591064f #define CL_M_LOG2E_F 1.44269502162933f #define CL_M_LOG10E_F 0.43429449200630f #define CL_M_LN2_F 0.69314718246460f #define CL_M_LN10_F 2.30258512496948f #define CL_M_PI_F 3.14159274101257f #define CL_M_PI_2_F 1.57079637050629f #define CL_M_PI_4_F 0.78539818525314f #define CL_M_1_PI_F 0.31830987334251f #define CL_M_2_PI_F 0.63661974668503f #define CL_M_2_SQRTPI_F 1.12837922573090f #define CL_M_SQRT2_F 1.41421353816986f #define CL_M_SQRT1_2_F 0.70710676908493f #if defined( __GNUC__ ) #define CL_HUGE_VALF __builtin_huge_valf() #define CL_HUGE_VAL __builtin_huge_val() #define CL_NAN __builtin_nanf( "" ) #else #define CL_HUGE_VALF ((cl_float) 1e50) #define CL_HUGE_VAL ((cl_double) 1e500) float nanf( const char * ); #define CL_NAN nanf( "" ) #endif #define CL_MAXFLOAT CL_FLT_MAX #define CL_INFINITY CL_HUGE_VALF #endif #include /* Mirror types to GL types. Mirror types allow us to avoid deciding which 87s to load based on whether we are using GL or GLES here. */ typedef unsigned int cl_GLuint; typedef int cl_GLint; typedef unsigned int cl_GLenum; /* * Vector types * * Note: OpenCL requires that all types be naturally aligned. * This means that vector types must be naturally aligned. * For example, a vector of four floats must be aligned to * a 16 byte boundary (calculated as 4 * the natural 4-byte * alignment of the float). The alignment qualifiers here * will only function properly if your compiler supports them * and if you don't actively work to defeat them. For example, * in order for a cl_float4 to be 16 byte aligned in a struct, * the start of the struct must itself be 16-byte aligned. * * Maintaining proper alignment is the user's responsibility. */ /* Define basic vector types */ #if defined( __VEC__ ) #include /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */ typedef vector unsigned char __cl_uchar16; typedef vector signed char __cl_char16; typedef vector unsigned short __cl_ushort8; typedef vector signed short __cl_short8; typedef vector unsigned int __cl_uint4; typedef vector signed int __cl_int4; typedef vector float __cl_float4; #define __CL_UCHAR16__ 1 #define __CL_CHAR16__ 1 #define __CL_USHORT8__ 1 #define __CL_SHORT8__ 1 #define __CL_UINT4__ 1 #define __CL_INT4__ 1 #define __CL_FLOAT4__ 1 #endif #if defined( __SSE__ ) #if defined( __MINGW64__ ) #include #else #include #endif #if defined( __GNUC__ ) typedef float __cl_float4 __attribute__((vector_size(16))); #else typedef __m128 __cl_float4; #endif #define __CL_FLOAT4__ 1 #endif #if defined( __SSE2__ ) #if defined( __MINGW64__ ) #include #else #include #endif #if defined( __GNUC__ ) typedef cl_uchar __cl_uchar16 __attribute__((vector_size(16))); typedef cl_char __cl_char16 __attribute__((vector_size(16))); typedef cl_ushort __cl_ushort8 __attribute__((vector_size(16))); typedef cl_short __cl_short8 __attribute__((vector_size(16))); typedef cl_uint __cl_uint4 __attribute__((vector_size(16))); typedef cl_int __cl_int4 __attribute__((vector_size(16))); typedef cl_ulong __cl_ulong2 __attribute__((vector_size(16))); typedef cl_long __cl_long2 __attribute__((vector_size(16))); typedef cl_double __cl_double2 __attribute__((vector_size(16))); #else typedef __m128i __cl_uchar16; typedef __m128i __cl_char16; typedef __m128i __cl_ushort8; typedef __m128i __cl_short8; typedef __m128i __cl_uint4; typedef __m128i __cl_int4; typedef __m128i __cl_ulong2; typedef __m128i __cl_long2; typedef __m128d __cl_double2; #endif #define __CL_UCHAR16__ 1 #define __CL_CHAR16__ 1 #define __CL_USHORT8__ 1 #define __CL_SHORT8__ 1 #define __CL_INT4__ 1 #define __CL_UINT4__ 1 #define __CL_ULONG2__ 1 #define __CL_LONG2__ 1 #define __CL_DOUBLE2__ 1 #endif #if defined( __MMX__ ) #include #if defined( __GNUC__ ) typedef cl_uchar __cl_uchar8 __attribute__((vector_size(8))); typedef cl_char __cl_char8 __attribute__((vector_size(8))); typedef cl_ushort __cl_ushort4 __attribute__((vector_size(8))); typedef cl_short __cl_short4 __attribute__((vector_size(8))); typedef cl_uint __cl_uint2 __attribute__((vector_size(8))); typedef cl_int __cl_int2 __attribute__((vector_size(8))); typedef cl_ulong __cl_ulong1 __attribute__((vector_size(8))); typedef cl_long __cl_long1 __attribute__((vector_size(8))); typedef cl_float __cl_float2 __attribute__((vector_size(8))); #else typedef __m64 __cl_uchar8; typedef __m64 __cl_char8; typedef __m64 __cl_ushort4; typedef __m64 __cl_short4; typedef __m64 __cl_uint2; typedef __m64 __cl_int2; typedef __m64 __cl_ulong1; typedef __m64 __cl_long1; typedef __m64 __cl_float2; #endif #define __CL_UCHAR8__ 1 #define __CL_CHAR8__ 1 #define __CL_USHORT4__ 1 #define __CL_SHORT4__ 1 #define __CL_INT2__ 1 #define __CL_UINT2__ 1 #define __CL_ULONG1__ 1 #define __CL_LONG1__ 1 #define __CL_FLOAT2__ 1 #endif #if defined( __AVX__ ) #if defined( __MINGW64__ ) #include #else #include #endif #if defined( __GNUC__ ) typedef cl_float __cl_float8 __attribute__((vector_size(32))); typedef cl_double __cl_double4 __attribute__((vector_size(32))); #else typedef __m256 __cl_float8; typedef __m256d __cl_double4; #endif #define __CL_FLOAT8__ 1 #define __CL_DOUBLE4__ 1 #endif /* Define capabilities for anonymous struct members. */ #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) #define __CL_HAS_ANON_STRUCT__ 1 #define __CL_ANON_STRUCT__ __extension__ #elif defined( _WIN32) && (_MSC_VER >= 1500) /* Microsoft Developer Studio 2008 supports anonymous structs, but * complains by default. */ #define __CL_HAS_ANON_STRUCT__ 1 #define __CL_ANON_STRUCT__ /* Disable warning C4201: nonstandard extension used : nameless * struct/union */ #pragma warning( push ) #pragma warning( disable : 4201 ) #else #define __CL_HAS_ANON_STRUCT__ 0 #define __CL_ANON_STRUCT__ #endif /* Define alignment keys */ #if defined( __GNUC__ ) #define CL_ALIGNED(_x) __attribute__ ((aligned(_x))) #elif defined( _WIN32) && (_MSC_VER) /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements */ /* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx */ /* #include */ /* #define CL_ALIGNED(_x) _CRT_ALIGN(_x) */ #define CL_ALIGNED(_x) #else #warning Need to implement some method to align data here #define CL_ALIGNED(_x) #endif /* Indicate whether .xyzw, .s0123 and .hi.lo are supported */ #if __CL_HAS_ANON_STRUCT__ /* .xyzw and .s0123...{f|F} are supported */ #define CL_HAS_NAMED_VECTOR_FIELDS 1 /* .hi and .lo are supported */ #define CL_HAS_HI_LO_VECTOR_FIELDS 1 #endif /* Define cl_vector types */ /* ---- cl_charn ---- */ typedef union { cl_char CL_ALIGNED(2) s[2]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_char x, y; }; __CL_ANON_STRUCT__ struct{ cl_char s0, s1; }; __CL_ANON_STRUCT__ struct{ cl_char lo, hi; }; #endif #if defined( __CL_CHAR2__) __cl_char2 v2; #endif }cl_char2; typedef union { cl_char CL_ALIGNED(4) s[4]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3; }; __CL_ANON_STRUCT__ struct{ cl_char2 lo, hi; }; #endif #if defined( __CL_CHAR2__) __cl_char2 v2[2]; #endif #if defined( __CL_CHAR4__) __cl_char4 v4; #endif }cl_char4; /* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */ typedef cl_char4 cl_char3; typedef union { cl_char CL_ALIGNED(8) s[8]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7; }; __CL_ANON_STRUCT__ struct{ cl_char4 lo, hi; }; #endif #if defined( __CL_CHAR2__) __cl_char2 v2[4]; #endif #if defined( __CL_CHAR4__) __cl_char4 v4[2]; #endif #if defined( __CL_CHAR8__ ) __cl_char8 v8; #endif }cl_char8; typedef union { cl_char CL_ALIGNED(16) s[16]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __CL_ANON_STRUCT__ struct{ cl_char8 lo, hi; }; #endif #if defined( __CL_CHAR2__) __cl_char2 v2[8]; #endif #if defined( __CL_CHAR4__) __cl_char4 v4[4]; #endif #if defined( __CL_CHAR8__ ) __cl_char8 v8[2]; #endif #if defined( __CL_CHAR16__ ) __cl_char16 v16; #endif }cl_char16; /* ---- cl_ucharn ---- */ typedef union { cl_uchar CL_ALIGNED(2) s[2]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_uchar x, y; }; __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1; }; __CL_ANON_STRUCT__ struct{ cl_uchar lo, hi; }; #endif #if defined( __cl_uchar2__) __cl_uchar2 v2; #endif }cl_uchar2; typedef union { cl_uchar CL_ALIGNED(4) s[4]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3; }; __CL_ANON_STRUCT__ struct{ cl_uchar2 lo, hi; }; #endif #if defined( __CL_UCHAR2__) __cl_uchar2 v2[2]; #endif #if defined( __CL_UCHAR4__) __cl_uchar4 v4; #endif }cl_uchar4; /* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */ typedef cl_uchar4 cl_uchar3; typedef union { cl_uchar CL_ALIGNED(8) s[8]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7; }; __CL_ANON_STRUCT__ struct{ cl_uchar4 lo, hi; }; #endif #if defined( __CL_UCHAR2__) __cl_uchar2 v2[4]; #endif #if defined( __CL_UCHAR4__) __cl_uchar4 v4[2]; #endif #if defined( __CL_UCHAR8__ ) __cl_uchar8 v8; #endif }cl_uchar8; typedef union { cl_uchar CL_ALIGNED(16) s[16]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __CL_ANON_STRUCT__ struct{ cl_uchar8 lo, hi; }; #endif #if defined( __CL_UCHAR2__) __cl_uchar2 v2[8]; #endif #if defined( __CL_UCHAR4__) __cl_uchar4 v4[4]; #endif #if defined( __CL_UCHAR8__ ) __cl_uchar8 v8[2]; #endif #if defined( __CL_UCHAR16__ ) __cl_uchar16 v16; #endif }cl_uchar16; /* ---- cl_shortn ---- */ typedef union { cl_short CL_ALIGNED(4) s[2]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_short x, y; }; __CL_ANON_STRUCT__ struct{ cl_short s0, s1; }; __CL_ANON_STRUCT__ struct{ cl_short lo, hi; }; #endif #if defined( __CL_SHORT2__) __cl_short2 v2; #endif }cl_short2; typedef union { cl_short CL_ALIGNED(8) s[4]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3; }; __CL_ANON_STRUCT__ struct{ cl_short2 lo, hi; }; #endif #if defined( __CL_SHORT2__) __cl_short2 v2[2]; #endif #if defined( __CL_SHORT4__) __cl_short4 v4; #endif }cl_short4; /* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */ typedef cl_short4 cl_short3; typedef union { cl_short CL_ALIGNED(16) s[8]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7; }; __CL_ANON_STRUCT__ struct{ cl_short4 lo, hi; }; #endif #if defined( __CL_SHORT2__) __cl_short2 v2[4]; #endif #if defined( __CL_SHORT4__) __cl_short4 v4[2]; #endif #if defined( __CL_SHORT8__ ) __cl_short8 v8; #endif }cl_short8; typedef union { cl_short CL_ALIGNED(32) s[16]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __CL_ANON_STRUCT__ struct{ cl_short8 lo, hi; }; #endif #if defined( __CL_SHORT2__) __cl_short2 v2[8]; #endif #if defined( __CL_SHORT4__) __cl_short4 v4[4]; #endif #if defined( __CL_SHORT8__ ) __cl_short8 v8[2]; #endif #if defined( __CL_SHORT16__ ) __cl_short16 v16; #endif }cl_short16; /* ---- cl_ushortn ---- */ typedef union { cl_ushort CL_ALIGNED(4) s[2]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_ushort x, y; }; __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1; }; __CL_ANON_STRUCT__ struct{ cl_ushort lo, hi; }; #endif #if defined( __CL_USHORT2__) __cl_ushort2 v2; #endif }cl_ushort2; typedef union { cl_ushort CL_ALIGNED(8) s[4]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3; }; __CL_ANON_STRUCT__ struct{ cl_ushort2 lo, hi; }; #endif #if defined( __CL_USHORT2__) __cl_ushort2 v2[2]; #endif #if defined( __CL_USHORT4__) __cl_ushort4 v4; #endif }cl_ushort4; /* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */ typedef cl_ushort4 cl_ushort3; typedef union { cl_ushort CL_ALIGNED(16) s[8]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7; }; __CL_ANON_STRUCT__ struct{ cl_ushort4 lo, hi; }; #endif #if defined( __CL_USHORT2__) __cl_ushort2 v2[4]; #endif #if defined( __CL_USHORT4__) __cl_ushort4 v4[2]; #endif #if defined( __CL_USHORT8__ ) __cl_ushort8 v8; #endif }cl_ushort8; typedef union { cl_ushort CL_ALIGNED(32) s[16]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __CL_ANON_STRUCT__ struct{ cl_ushort8 lo, hi; }; #endif #if defined( __CL_USHORT2__) __cl_ushort2 v2[8]; #endif #if defined( __CL_USHORT4__) __cl_ushort4 v4[4]; #endif #if defined( __CL_USHORT8__ ) __cl_ushort8 v8[2]; #endif #if defined( __CL_USHORT16__ ) __cl_ushort16 v16; #endif }cl_ushort16; /* ---- cl_intn ---- */ typedef union { cl_int CL_ALIGNED(8) s[2]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_int x, y; }; __CL_ANON_STRUCT__ struct{ cl_int s0, s1; }; __CL_ANON_STRUCT__ struct{ cl_int lo, hi; }; #endif #if defined( __CL_INT2__) __cl_int2 v2; #endif }cl_int2; typedef union { cl_int CL_ALIGNED(16) s[4]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3; }; __CL_ANON_STRUCT__ struct{ cl_int2 lo, hi; }; #endif #if defined( __CL_INT2__) __cl_int2 v2[2]; #endif #if defined( __CL_INT4__) __cl_int4 v4; #endif }cl_int4; /* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */ typedef cl_int4 cl_int3; typedef union { cl_int CL_ALIGNED(32) s[8]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7; }; __CL_ANON_STRUCT__ struct{ cl_int4 lo, hi; }; #endif #if defined( __CL_INT2__) __cl_int2 v2[4]; #endif #if defined( __CL_INT4__) __cl_int4 v4[2]; #endif #if defined( __CL_INT8__ ) __cl_int8 v8; #endif }cl_int8; typedef union { cl_int CL_ALIGNED(64) s[16]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __CL_ANON_STRUCT__ struct{ cl_int8 lo, hi; }; #endif #if defined( __CL_INT2__) __cl_int2 v2[8]; #endif #if defined( __CL_INT4__) __cl_int4 v4[4]; #endif #if defined( __CL_INT8__ ) __cl_int8 v8[2]; #endif #if defined( __CL_INT16__ ) __cl_int16 v16; #endif }cl_int16; /* ---- cl_uintn ---- */ typedef union { cl_uint CL_ALIGNED(8) s[2]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_uint x, y; }; __CL_ANON_STRUCT__ struct{ cl_uint s0, s1; }; __CL_ANON_STRUCT__ struct{ cl_uint lo, hi; }; #endif #if defined( __CL_UINT2__) __cl_uint2 v2; #endif }cl_uint2; typedef union { cl_uint CL_ALIGNED(16) s[4]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3; }; __CL_ANON_STRUCT__ struct{ cl_uint2 lo, hi; }; #endif #if defined( __CL_UINT2__) __cl_uint2 v2[2]; #endif #if defined( __CL_UINT4__) __cl_uint4 v4; #endif }cl_uint4; /* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */ typedef cl_uint4 cl_uint3; typedef union { cl_uint CL_ALIGNED(32) s[8]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7; }; __CL_ANON_STRUCT__ struct{ cl_uint4 lo, hi; }; #endif #if defined( __CL_UINT2__) __cl_uint2 v2[4]; #endif #if defined( __CL_UINT4__) __cl_uint4 v4[2]; #endif #if defined( __CL_UINT8__ ) __cl_uint8 v8; #endif }cl_uint8; typedef union { cl_uint CL_ALIGNED(64) s[16]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __CL_ANON_STRUCT__ struct{ cl_uint8 lo, hi; }; #endif #if defined( __CL_UINT2__) __cl_uint2 v2[8]; #endif #if defined( __CL_UINT4__) __cl_uint4 v4[4]; #endif #if defined( __CL_UINT8__ ) __cl_uint8 v8[2]; #endif #if defined( __CL_UINT16__ ) __cl_uint16 v16; #endif }cl_uint16; /* ---- cl_longn ---- */ typedef union { cl_long CL_ALIGNED(16) s[2]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_long x, y; }; __CL_ANON_STRUCT__ struct{ cl_long s0, s1; }; __CL_ANON_STRUCT__ struct{ cl_long lo, hi; }; #endif #if defined( __CL_LONG2__) __cl_long2 v2; #endif }cl_long2; typedef union { cl_long CL_ALIGNED(32) s[4]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3; }; __CL_ANON_STRUCT__ struct{ cl_long2 lo, hi; }; #endif #if defined( __CL_LONG2__) __cl_long2 v2[2]; #endif #if defined( __CL_LONG4__) __cl_long4 v4; #endif }cl_long4; /* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */ typedef cl_long4 cl_long3; typedef union { cl_long CL_ALIGNED(64) s[8]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7; }; __CL_ANON_STRUCT__ struct{ cl_long4 lo, hi; }; #endif #if defined( __CL_LONG2__) __cl_long2 v2[4]; #endif #if defined( __CL_LONG4__) __cl_long4 v4[2]; #endif #if defined( __CL_LONG8__ ) __cl_long8 v8; #endif }cl_long8; typedef union { cl_long CL_ALIGNED(128) s[16]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __CL_ANON_STRUCT__ struct{ cl_long8 lo, hi; }; #endif #if defined( __CL_LONG2__) __cl_long2 v2[8]; #endif #if defined( __CL_LONG4__) __cl_long4 v4[4]; #endif #if defined( __CL_LONG8__ ) __cl_long8 v8[2]; #endif #if defined( __CL_LONG16__ ) __cl_long16 v16; #endif }cl_long16; /* ---- cl_ulongn ---- */ typedef union { cl_ulong CL_ALIGNED(16) s[2]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_ulong x, y; }; __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1; }; __CL_ANON_STRUCT__ struct{ cl_ulong lo, hi; }; #endif #if defined( __CL_ULONG2__) __cl_ulong2 v2; #endif }cl_ulong2; typedef union { cl_ulong CL_ALIGNED(32) s[4]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3; }; __CL_ANON_STRUCT__ struct{ cl_ulong2 lo, hi; }; #endif #if defined( __CL_ULONG2__) __cl_ulong2 v2[2]; #endif #if defined( __CL_ULONG4__) __cl_ulong4 v4; #endif }cl_ulong4; /* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */ typedef cl_ulong4 cl_ulong3; typedef union { cl_ulong CL_ALIGNED(64) s[8]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7; }; __CL_ANON_STRUCT__ struct{ cl_ulong4 lo, hi; }; #endif #if defined( __CL_ULONG2__) __cl_ulong2 v2[4]; #endif #if defined( __CL_ULONG4__) __cl_ulong4 v4[2]; #endif #if defined( __CL_ULONG8__ ) __cl_ulong8 v8; #endif }cl_ulong8; typedef union { cl_ulong CL_ALIGNED(128) s[16]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __CL_ANON_STRUCT__ struct{ cl_ulong8 lo, hi; }; #endif #if defined( __CL_ULONG2__) __cl_ulong2 v2[8]; #endif #if defined( __CL_ULONG4__) __cl_ulong4 v4[4]; #endif #if defined( __CL_ULONG8__ ) __cl_ulong8 v8[2]; #endif #if defined( __CL_ULONG16__ ) __cl_ulong16 v16; #endif }cl_ulong16; /* --- cl_floatn ---- */ typedef union { cl_float CL_ALIGNED(8) s[2]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_float x, y; }; __CL_ANON_STRUCT__ struct{ cl_float s0, s1; }; __CL_ANON_STRUCT__ struct{ cl_float lo, hi; }; #endif #if defined( __CL_FLOAT2__) __cl_float2 v2; #endif }cl_float2; typedef union { cl_float CL_ALIGNED(16) s[4]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3; }; __CL_ANON_STRUCT__ struct{ cl_float2 lo, hi; }; #endif #if defined( __CL_FLOAT2__) __cl_float2 v2[2]; #endif #if defined( __CL_FLOAT4__) __cl_float4 v4; #endif }cl_float4; /* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */ typedef cl_float4 cl_float3; typedef union { cl_float CL_ALIGNED(32) s[8]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7; }; __CL_ANON_STRUCT__ struct{ cl_float4 lo, hi; }; #endif #if defined( __CL_FLOAT2__) __cl_float2 v2[4]; #endif #if defined( __CL_FLOAT4__) __cl_float4 v4[2]; #endif #if defined( __CL_FLOAT8__ ) __cl_float8 v8; #endif }cl_float8; typedef union { cl_float CL_ALIGNED(64) s[16]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __CL_ANON_STRUCT__ struct{ cl_float8 lo, hi; }; #endif #if defined( __CL_FLOAT2__) __cl_float2 v2[8]; #endif #if defined( __CL_FLOAT4__) __cl_float4 v4[4]; #endif #if defined( __CL_FLOAT8__ ) __cl_float8 v8[2]; #endif #if defined( __CL_FLOAT16__ ) __cl_float16 v16; #endif }cl_float16; /* --- cl_doublen ---- */ typedef union { cl_double CL_ALIGNED(16) s[2]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_double x, y; }; __CL_ANON_STRUCT__ struct{ cl_double s0, s1; }; __CL_ANON_STRUCT__ struct{ cl_double lo, hi; }; #endif #if defined( __CL_DOUBLE2__) __cl_double2 v2; #endif }cl_double2; typedef union { cl_double CL_ALIGNED(32) s[4]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3; }; __CL_ANON_STRUCT__ struct{ cl_double2 lo, hi; }; #endif #if defined( __CL_DOUBLE2__) __cl_double2 v2[2]; #endif #if defined( __CL_DOUBLE4__) __cl_double4 v4; #endif }cl_double4; /* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */ typedef cl_double4 cl_double3; typedef union { cl_double CL_ALIGNED(64) s[8]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; }; __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7; }; __CL_ANON_STRUCT__ struct{ cl_double4 lo, hi; }; #endif #if defined( __CL_DOUBLE2__) __cl_double2 v2[4]; #endif #if defined( __CL_DOUBLE4__) __cl_double4 v4[2]; #endif #if defined( __CL_DOUBLE8__ ) __cl_double8 v8; #endif }cl_double8; typedef union { cl_double CL_ALIGNED(128) s[16]; #if __CL_HAS_ANON_STRUCT__ __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; __CL_ANON_STRUCT__ struct{ cl_double8 lo, hi; }; #endif #if defined( __CL_DOUBLE2__) __cl_double2 v2[8]; #endif #if defined( __CL_DOUBLE4__) __cl_double4 v4[4]; #endif #if defined( __CL_DOUBLE8__ ) __cl_double8 v8[2]; #endif #if defined( __CL_DOUBLE16__ ) __cl_double16 v16; #endif }cl_double16; /* Macro to facilitate debugging * Usage: * Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source. * The first line ends with: CL_PROGRAM_STRING_DEBUG_INFO \" * Each line thereafter of OpenCL C source must end with: \n\ * The last line ends in "; * * Example: * * const char *my_program = CL_PROGRAM_STRING_DEBUG_INFO "\ * kernel void foo( int a, float * b ) \n\ * { \n\ * // my comment \n\ * *b[ get_global_id(0)] = a; \n\ * } \n\ * "; * * This should correctly set up the line, (column) and file information for your source * string so you can do source level debugging. */ #define __CL_STRINGIFY( _x ) # _x #define _CL_STRINGIFY( _x ) __CL_STRINGIFY( _x ) #define CL_PROGRAM_STRING_DEBUG_INFO "#line " _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n" #ifdef __cplusplus } #endif #undef __CL_HAS_ANON_STRUCT__ #undef __CL_ANON_STRUCT__ #if defined( _WIN32) && (_MSC_VER >= 1500) #pragma warning( pop ) #endif #endif /* __CL_PLATFORM_H */ Oclgrind-15.5/src/CL/opencl.h000066400000000000000000000033321252441671000157260ustar00rootroot00000000000000/******************************************************************************* * Copyright (c) 2008-2012 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the * "Materials"), to deal in the Materials without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Materials, and to * permit persons to whom the Materials are furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. ******************************************************************************/ /* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ #ifndef __OPENCL_H #define __OPENCL_H #ifdef __cplusplus extern "C" { #endif #ifdef __APPLE__ #include #include #include #include #else #include #include #include #include #endif #ifdef __cplusplus } #endif #endif /* __OPENCL_H */ Oclgrind-15.5/src/core/000077500000000000000000000000001252441671000147265ustar00rootroot00000000000000Oclgrind-15.5/src/core/Context.cpp000066400000000000000000000316171252441671000170660ustar00rootroot00000000000000// Context.cpp (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" #if defined(_WIN32) && !defined(__MINGW32__) #include #undef ERROR #else #include #endif #include #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Instruction.h" #include "Context.h" #include "Kernel.h" #include "KernelInvocation.h" #include "Memory.h" #include "Program.h" #include "WorkGroup.h" #include "WorkItem.h" #include "plugins/InstructionCounter.h" #include "plugins/InteractiveDebugger.h" #include "plugins/Logger.h" #include "plugins/MemCheck.h" #include "plugins/RaceDetector.h" using namespace oclgrind; using namespace std; Context::Context() { m_globalMemory = new Memory(AddrSpaceGlobal, this); m_kernelInvocation = NULL; loadPlugins(); } Context::~Context() { delete m_globalMemory; unloadPlugins(); } bool Context::isThreadSafe() const { for (const PluginEntry &p : m_plugins) { if (!p.first->isThreadSafe()) return false; } return true; } Memory* Context::getGlobalMemory() const { return m_globalMemory; } void Context::loadPlugins() { // Create core plugins m_plugins.push_back(make_pair(new Logger(this), true)); m_plugins.push_back(make_pair(new MemCheck(this), true)); if (checkEnv("OCLGRIND_INST_COUNTS")) m_plugins.push_back(make_pair(new InstructionCounter(this), true)); if (checkEnv("OCLGRIND_DATA_RACES")) m_plugins.push_back(make_pair(new RaceDetector(this), true)); if (checkEnv("OCLGRIND_INTERACTIVE")) m_plugins.push_back(make_pair(new InteractiveDebugger(this), true)); // Load dynamic plugins const char *dynamicPlugins = getenv("OCLGRIND_PLUGINS"); if (dynamicPlugins) { std::istringstream ss(dynamicPlugins); std::string libpath; while(std::getline(ss, libpath, ':')) { #if defined(_WIN32) && !defined(__MINGW32__) HMODULE library = LoadLibrary(libpath.c_str()); if (!library) { cerr << "Loading Oclgrind plugin failed (LoadLibrary): " << GetLastError() << endl; continue; } void *initialize = GetProcAddress(library, "initializePlugins"); if (!initialize) { cerr << "Loading Oclgrind plugin failed (GetProcAddress): " << GetLastError() << endl; continue; } #else void *library = dlopen(libpath.c_str(), RTLD_NOW); if (!library) { cerr << "Loading Oclgrind plugin failed (dlopen): " << dlerror() << endl; continue; } void *initialize = dlsym(library, "initializePlugins"); if (!initialize) { cerr << "Loading Oclgrind plugin failed (dlsym): " << dlerror() << endl; continue; } #endif ((void(*)(Context*))initialize)(this); m_pluginLibraries.push_back(library); } } } void Context::unloadPlugins() { // Release dynamic plugin libraries list::iterator plibItr; for (plibItr = m_pluginLibraries.begin(); plibItr != m_pluginLibraries.end(); plibItr++) { #if defined(_WIN32) && !defined(__MINGW32__) void *release = GetProcAddress((HMODULE)*plibItr, "releasePlugins"); if (release) { ((void(*)(Context*))release)(this); } FreeLibrary((HMODULE)*plibItr); #else void *release = dlsym(*plibItr, "releasePlugins"); if (release) { ((void(*)(Context*))release)(this); } dlclose(*plibItr); #endif } // Destroy internal plugins PluginList::iterator pItr; for (pItr = m_plugins.begin(); pItr != m_plugins.end(); pItr++) { if (pItr->second) delete pItr->first; } m_plugins.clear(); } void Context::registerPlugin(Plugin *plugin) { m_plugins.push_back(make_pair(plugin, false)); } void Context::unregisterPlugin(Plugin *plugin) { m_plugins.remove(make_pair(plugin, false)); } void Context::logError(const char* error) const { Message msg(ERROR, this); msg << error << endl << msg.INDENT << "Kernel: " << msg.CURRENT_KERNEL << endl << "Entity: " << msg.CURRENT_ENTITY << endl << msg.CURRENT_LOCATION << endl; msg.send(); } #define NOTIFY(function, ...) \ { \ PluginList::const_iterator pluginItr; \ for (pluginItr = m_plugins.begin(); \ pluginItr != m_plugins.end(); pluginItr++) \ { \ pluginItr->first->function(__VA_ARGS__); \ } \ } void Context::notifyInstructionExecuted(const WorkItem *workItem, const llvm::Instruction *instruction, const TypedValue& result) const { NOTIFY(instructionExecuted, workItem, instruction, result); } void Context::notifyKernelBegin(const KernelInvocation *kernelInvocation) const { assert(m_kernelInvocation == NULL); m_kernelInvocation = kernelInvocation; NOTIFY(kernelBegin, kernelInvocation); } void Context::notifyKernelEnd(const KernelInvocation *kernelInvocation) const { NOTIFY(kernelEnd, kernelInvocation); assert(m_kernelInvocation == kernelInvocation); m_kernelInvocation = NULL; } void Context::notifyMemoryAllocated(const Memory *memory, size_t address, size_t size, cl_mem_flags flags) const { NOTIFY(memoryAllocated, memory, address, size, flags); } void Context::notifyMemoryAtomicLoad(const Memory *memory, AtomicOp op, size_t address, size_t size) const { if (m_kernelInvocation && m_kernelInvocation->getCurrentWorkItem()) { NOTIFY(memoryAtomicLoad, memory, m_kernelInvocation->getCurrentWorkItem(), op, address, size); } } void Context::notifyMemoryAtomicStore(const Memory *memory, AtomicOp op, size_t address, size_t size) const { if (m_kernelInvocation && m_kernelInvocation->getCurrentWorkItem()) { NOTIFY(memoryAtomicStore, memory, m_kernelInvocation->getCurrentWorkItem(), op, address, size); } } void Context::notifyMemoryDeallocated(const Memory *memory, size_t address) const { NOTIFY(memoryDeallocated, memory, address); } void Context::notifyMemoryLoad(const Memory *memory, size_t address, size_t size) const { if (m_kernelInvocation) { if (m_kernelInvocation->getCurrentWorkItem()) { NOTIFY(memoryLoad, memory, m_kernelInvocation->getCurrentWorkItem(), address, size); } else if (m_kernelInvocation->getCurrentWorkGroup()) { NOTIFY(memoryLoad, memory, m_kernelInvocation->getCurrentWorkGroup(), address, size); } } else { NOTIFY(hostMemoryLoad, memory, address, size); } } void Context::notifyMemoryStore(const Memory *memory, size_t address, size_t size, const uint8_t *storeData) const { if (m_kernelInvocation) { if (m_kernelInvocation->getCurrentWorkItem()) { NOTIFY(memoryStore, memory, m_kernelInvocation->getCurrentWorkItem(), address, size, storeData); } else if (m_kernelInvocation->getCurrentWorkGroup()) { NOTIFY(memoryStore, memory, m_kernelInvocation->getCurrentWorkGroup(), address, size, storeData); } } else { NOTIFY(hostMemoryStore, memory, address, size, storeData); } } void Context::notifyMessage(MessageType type, const char *message) const { NOTIFY(log, type, message); } void Context::notifyWorkGroupBarrier(const WorkGroup *workGroup, uint32_t flags) const { NOTIFY(workGroupBarrier, workGroup, flags); } void Context::notifyWorkGroupBegin(const WorkGroup *workGroup) const { NOTIFY(workGroupBegin, workGroup); } void Context::notifyWorkGroupComplete(const WorkGroup *workGroup) const { NOTIFY(workGroupComplete, workGroup); } void Context::notifyWorkItemBegin(const WorkItem *workItem) const { NOTIFY(workItemBegin, workItem); } void Context::notifyWorkItemComplete(const WorkItem *workItem) const { NOTIFY(workItemComplete, workItem); } #undef NOTIFY Context::Message::Message(MessageType type, const Context *context) { m_type = type; m_context = context; m_kernelInvocation = context->m_kernelInvocation; } Context::Message& Context::Message::operator<<(const special& id) { switch (id) { case INDENT: m_indentModifiers.push_back( m_stream.tellp()); break; case UNINDENT: m_indentModifiers.push_back(-m_stream.tellp()); break; case CURRENT_KERNEL: *this << m_kernelInvocation->getKernel()->getName(); break; case CURRENT_WORK_ITEM_GLOBAL: { const WorkItem *workItem = m_kernelInvocation->getCurrentWorkItem(); if (workItem) { *this << workItem->getGlobalID(); } else { *this << "(none)"; } break; } case CURRENT_WORK_ITEM_LOCAL: { const WorkItem *workItem = m_kernelInvocation->getCurrentWorkItem(); if (workItem) { *this << workItem->getLocalID(); } else { *this << "(none)"; } break; } case CURRENT_WORK_GROUP: { const WorkGroup *workGroup = m_kernelInvocation->getCurrentWorkGroup(); if (workGroup) { *this << workGroup->getGroupID(); } else { *this << "(none)"; } break; } case CURRENT_ENTITY: { const WorkItem *workItem = m_kernelInvocation->getCurrentWorkItem(); const WorkGroup *workGroup = m_kernelInvocation->getCurrentWorkGroup(); if (workItem) { *this << "Global" << workItem->getGlobalID() << " Local" << workItem->getLocalID() << " "; } if (workGroup) { *this << "Group" << workGroup->getGroupID(); } if (!workItem && ! workGroup) { *this << "(unknown)"; } break; } case CURRENT_LOCATION: { const llvm::Instruction *instruction = NULL; const WorkItem *workItem = m_kernelInvocation->getCurrentWorkItem(); const WorkGroup *workGroup = m_kernelInvocation->getCurrentWorkGroup(); if (workItem) { instruction = workItem->getCurrentInstruction(); } else if (workGroup) { instruction = workGroup->getCurrentBarrier(); } *this << instruction; break; } } return *this; } Context::Message& Context::Message::operator<<( const llvm::Instruction *instruction) { // Use mutex as some part of LLVM used by dumpInstruction() is not thread-safe static std::mutex mtx; std::lock_guard lock(mtx); if (instruction) { // Output instruction dumpInstruction(m_stream, instruction); *this << endl; // Output debug information llvm::MDNode *md = instruction->getMetadata("dbg"); if (!md) { *this << "Debugging information not available." << endl; } else { #if LLVM_VERSION > 36 llvm::DILocation *loc = (llvm::DILocation*)md; unsigned lineNumber = loc->getLine(); llvm::StringRef filename = loc->getFilename(); #else llvm::DILocation loc((llvm::MDLocation*)md); unsigned lineNumber = loc.getLineNumber(); llvm::StringRef filename = loc.getFilename(); #endif *this << "At line " << dec << lineNumber << " of " << filename.str() << ":" << endl; // Get source line const Program *program = m_kernelInvocation->getKernel()->getProgram(); const char *line = program->getSourceLine(lineNumber); if (line) { while (isspace(line[0])) line++; *this << " " << line; } else *this << " (source not available)"; } } else { *this << "(location unknown)"; } return *this; } Context::Message& Context::Message::operator<<( std::ostream& (*t)(std::ostream&)) { m_stream << t; return *this; } Context::Message& Context::Message::operator<<( std::ios& (*t)(std::ios&)) { m_stream << t; return *this; } Context::Message& Context::Message::operator<<( std::ios_base& (*t)(std::ios_base&)) { m_stream << t; return *this; } void Context::Message::send() const { string msg; string line; int currentIndent = 0; list::const_iterator itr = m_indentModifiers.begin(); m_stream.clear(); m_stream.seekg(0); while (m_stream.good()) { getline(m_stream, line); // TODO: Wrap long lines msg += line; // Check for indentation modifiers long pos = m_stream.tellg(); if (itr != m_indentModifiers.end() && pos >= abs(*itr)) { if (*itr >= 0) currentIndent++; else currentIndent--; itr++; } if (!m_stream.eof()) { // Add newline and indentation msg += '\n'; for (int i = 0; i < currentIndent; i++) msg += '\t'; } } m_context->notifyMessage(m_type, msg.c_str()); } Oclgrind-15.5/src/core/Context.h000066400000000000000000000071721252441671000165320ustar00rootroot00000000000000// Context.h (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" namespace oclgrind { class KernelInvocation; class Memory; class Plugin; class WorkGroup; class WorkItem; typedef std::pair PluginEntry; typedef std::list PluginList; class Context { public: Context(); virtual ~Context(); Memory* getGlobalMemory() const; bool isThreadSafe() const; void logError(const char* error) const; // Simulation callbacks void notifyInstructionExecuted(const WorkItem *workItem, const llvm::Instruction *instruction, const TypedValue& result) const; void notifyKernelBegin(const KernelInvocation *kernelInvocation) const; void notifyKernelEnd(const KernelInvocation *kernelInvocation) const; void notifyMemoryAllocated(const Memory *memory, size_t address, size_t size, cl_mem_flags flags) const; void notifyMemoryAtomicLoad(const Memory *memory, AtomicOp op, size_t address, size_t size) const; void notifyMemoryAtomicStore(const Memory *memory, AtomicOp op, size_t address, size_t size) const; void notifyMemoryDeallocated(const Memory *memory, size_t address) const; void notifyMemoryLoad(const Memory *memory, size_t address, size_t size) const; void notifyMemoryStore(const Memory *memory, size_t address, size_t size, const uint8_t *storeData) const; void notifyMessage(MessageType type, const char *message) const; void notifyWorkGroupBarrier(const WorkGroup *workGroup, uint32_t flags) const; void notifyWorkGroupBegin(const WorkGroup *workGroup) const; void notifyWorkGroupComplete(const WorkGroup *workGroup) const; void notifyWorkItemBegin(const WorkItem *workItem) const; void notifyWorkItemComplete(const WorkItem *workItem) const; // Plugins void registerPlugin(Plugin *plugin); void unregisterPlugin(Plugin *plugin); private: mutable const KernelInvocation *m_kernelInvocation; Memory *m_globalMemory; PluginList m_plugins; std::list m_pluginLibraries; void loadPlugins(); void unloadPlugins(); public: class Message { public: enum special { INDENT, UNINDENT, CURRENT_KERNEL, CURRENT_WORK_ITEM_GLOBAL, CURRENT_WORK_ITEM_LOCAL, CURRENT_WORK_GROUP, CURRENT_ENTITY, CURRENT_LOCATION, }; Message(MessageType type, const Context *context); Message& operator<<(const special& id); Message& operator<<(const llvm::Instruction *instruction); template Message& operator<<(const T& t); Message& operator<<(std::ostream& (*t)(std::ostream&)); Message& operator<<(std::ios& (*t)(std::ios&)); Message& operator<<(std::ios_base& (*t)(std::ios_base&)); void send() const; private: MessageType m_type; const Context *m_context; const KernelInvocation *m_kernelInvocation; mutable std::stringstream m_stream; std::list m_indentModifiers; }; }; template Context::Message& Context::Message::operator<<(const T& t) { m_stream << t; return *this; } } Oclgrind-15.5/src/core/Kernel.cpp000066400000000000000000000321441252441671000166560ustar00rootroot00000000000000// Kernel.cpp (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" #include #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Module.h" #include "llvm/Support/raw_os_ostream.h" #include "Kernel.h" #include "Program.h" #include "Memory.h" using namespace oclgrind; using namespace std; Kernel::Kernel(const Program *program, const llvm::Function *function, const llvm::Module *module) : m_program(program), m_function(function), m_name(function->getName()) { m_localMemory = new Memory(AddrSpaceLocal, program->getContext()); m_privateMemory = new Memory(AddrSpacePrivate, program->getContext()); // Set-up global variables llvm::Module::const_global_iterator itr; for (itr = module->global_begin(); itr != module->global_end(); itr++) { llvm::PointerType *type = itr->getType(); switch (type->getPointerAddressSpace()) { case AddrSpacePrivate: { const llvm::Constant *init = itr->getInitializer(); // Allocate private memory for variable unsigned size = getTypeSize(init->getType()); size_t address = m_privateMemory->allocateBuffer(size); // Initialize variable void *ptr = m_privateMemory->getPointer(address); getConstantData((unsigned char*)ptr, init); TypedValue value = { sizeof(size_t), 1, new unsigned char[sizeof(size_t)] }; value.setPointer(address); m_arguments[itr] = value; break; } case AddrSpaceConstant: m_constants.push_back(itr); break; case AddrSpaceLocal: { // Allocate buffer unsigned size = getTypeSize(itr->getInitializer()->getType()); TypedValue v = { sizeof(size_t), 1, new unsigned char[sizeof(size_t)] }; v.setPointer(m_localMemory->allocateBuffer(size)); m_arguments[itr] = v; break; } default: FATAL_ERROR("Unsupported GlobalVariable address space: %d", type->getPointerAddressSpace()); } } // Get metadata node containing kernel arg info m_metadata = NULL; llvm::NamedMDNode *md = module->getNamedMetadata("opencl.kernels"); if (md) { for (unsigned i = 0; i < md->getNumOperands(); i++) { llvm::MDNode *node = md->getOperand(i); llvm::ConstantAsMetadata *cam = llvm::dyn_cast(node->getOperand(0).get()); if (!cam) continue; llvm::Function *function = ((llvm::Function*)cam->getValue()); if (function->getName() == m_name) { m_metadata = node; break; } } } } Kernel::Kernel(const Kernel& kernel) : m_program(kernel.m_program) { m_function = kernel.m_function; m_constants = kernel.m_constants; m_constantBuffers = kernel.m_constantBuffers; m_localMemory = kernel.m_localMemory->clone(); m_privateMemory = kernel.m_privateMemory->clone(); m_name = kernel.m_name; m_metadata = kernel.m_metadata; TypedValueMap::const_iterator itr; for (itr = kernel.m_arguments.begin(); itr != kernel.m_arguments.end(); itr++) { m_arguments[itr->first] = itr->second.clone(); } } Kernel::~Kernel() { delete m_localMemory; delete m_privateMemory; TypedValueMap::iterator itr; for (itr = m_arguments.begin(); itr != m_arguments.end(); itr++) { delete[] itr->second.data; } } bool Kernel::allArgumentsSet() const { llvm::Function::const_arg_iterator itr; for (itr = m_function->arg_begin(); itr != m_function->arg_end(); itr++) { if (!m_arguments.count(itr)) { return false; } } return true; } void Kernel::allocateConstants(Memory *memory) { list::const_iterator itr; for (itr = m_constants.begin(); itr != m_constants.end(); itr++) { const llvm::Constant *initializer = (*itr)->getInitializer(); const llvm::Type *type = initializer->getType(); // Allocate buffer unsigned size = getTypeSize(type); TypedValue v = { sizeof(size_t), 1, new unsigned char[sizeof(size_t)] }; size_t address = memory->allocateBuffer(size); v.setPointer(address); m_constantBuffers.push_back(address); m_arguments[*itr] = v; // Initialise buffer contents unsigned char *data = new unsigned char[size]; getConstantData(data, (const llvm::Constant*)initializer); memory->store(data, address, size); delete[] data; } } void Kernel::deallocateConstants(Memory *memory) { list::const_iterator itr; for (itr = m_constantBuffers.begin(); itr != m_constantBuffers.end(); itr++) { memory->deallocateBuffer(*itr); } m_constantBuffers.clear(); } const llvm::Argument* Kernel::getArgument(unsigned int index) const { assert(index < getNumArguments()); llvm::Function::const_arg_iterator argItr = m_function->arg_begin(); for (unsigned i = 0; i < index; i++) { argItr++; } return argItr; } unsigned int Kernel::getArgumentAccessQualifier(unsigned int index) const { assert(index < getNumArguments()); // Get metadata node const llvm::MDNode *node = getArgumentMetadata("kernel_arg_access_qual"); if (!node) { return -1; } // Get qualifier string llvm::MDString *str = llvm::dyn_cast(node->getOperand(index+1)); string access = str->getString(); if (access == "read_only") { return CL_KERNEL_ARG_ACCESS_READ_ONLY; } else if (access == "write_only") { return CL_KERNEL_ARG_ACCESS_WRITE_ONLY; } else if (access == "read_write") { return CL_KERNEL_ARG_ACCESS_READ_WRITE; } return CL_KERNEL_ARG_ACCESS_NONE; } unsigned int Kernel::getArgumentAddressQualifier(unsigned int index) const { assert(index < getNumArguments()); // Get metadata node const llvm::MDNode *node = getArgumentMetadata("kernel_arg_addr_space"); if (!node) { return -1; } // Get address space switch(getMDOpAsConstInt(node->getOperand(index+1))->getZExtValue()) { case AddrSpacePrivate: return CL_KERNEL_ARG_ADDRESS_PRIVATE; case AddrSpaceGlobal: return CL_KERNEL_ARG_ADDRESS_GLOBAL; case AddrSpaceConstant: return CL_KERNEL_ARG_ADDRESS_CONSTANT; case AddrSpaceLocal: return CL_KERNEL_ARG_ADDRESS_LOCAL; default: return -1; } } const llvm::MDNode* Kernel::getArgumentMetadata(string name) const { if (!m_metadata) { return NULL; } // Loop over all metadata nodes for this kernel for (unsigned i = 0; i < m_metadata->getNumOperands(); i++) { const llvm::MDOperand& op = m_metadata->getOperand(i); if (llvm::MDNode *node = llvm::dyn_cast(op.get())) { // Check if node matches target name if (node->getNumOperands() > 0 && ((llvm::MDString*)(node->getOperand(0).get()))->getString() == name) { return node; } } } return NULL; } const llvm::StringRef Kernel::getArgumentName(unsigned int index) const { return getArgument(index)->getName(); } const llvm::StringRef Kernel::getArgumentTypeName(unsigned int index) const { assert(index < getNumArguments()); // Get metadata node const llvm::MDNode *node = getArgumentMetadata("kernel_arg_type"); if (!node) { return ""; } return llvm::dyn_cast(node->getOperand(index+1))->getString(); } unsigned int Kernel::getArgumentTypeQualifier(unsigned int index) const { assert(index < getNumArguments()); // Get metadata node const llvm::MDNode *node = getArgumentMetadata("kernel_arg_type_qual"); if (!node) { return -1; } // Get qualifiers llvm::MDString *str = llvm::dyn_cast(node->getOperand(index+1)); istringstream iss(str->getString().str()); unsigned int result = CL_KERNEL_ARG_TYPE_NONE; while (!iss.eof()) { string tok; iss >> tok; if (tok == "const") { result |= CL_KERNEL_ARG_TYPE_CONST; } else if (tok == "restrict") { result |= CL_KERNEL_ARG_TYPE_RESTRICT; } else if (tok == "volatile") { result |= CL_KERNEL_ARG_TYPE_VOLATILE; } } return result; } size_t Kernel::getArgumentSize(unsigned int index) const { const llvm::Argument *argument = getArgument(index); const llvm::Type *type = argument->getType(); // Check if pointer argument if (type->isPointerTy() && argument->hasByValAttr()) { return getTypeSize(type->getPointerElementType()); } return getTypeSize(type); } string Kernel::getAttributes() const { ostringstream attributes(""); for (unsigned i = 0; i < m_metadata->getNumOperands(); i++) { llvm::MDNode *op = llvm::dyn_cast(m_metadata->getOperand(i)); if (op) { llvm::MDNode *val = ((llvm::MDNode*)op); llvm::MDString *str = llvm::dyn_cast(val->getOperand(0).get()); string name = str->getString().str(); if (name == "reqd_work_group_size" || name == "work_group_size_hint") { attributes << name << "(" << getMDOpAsConstInt(val->getOperand(1))->getZExtValue() << "," << getMDOpAsConstInt(val->getOperand(2))->getZExtValue() << "," << getMDOpAsConstInt(val->getOperand(3))->getZExtValue() << ") "; } else if (name == "vec_type_hint") { // Get type hint size_t n = 1; llvm::Metadata *md = val->getOperand(1).get(); llvm::ValueAsMetadata *vam = llvm::dyn_cast(md); const llvm::Type *type = vam->getType(); if (type->isVectorTy()) { n = type->getVectorNumElements(); type = type->getVectorElementType(); } // Generate attribute string attributes << name << "(" << flush; llvm::raw_os_ostream out(attributes); type->print(out); out.flush(); attributes << n << ") "; } } } return attributes.str(); } const llvm::Function* Kernel::getFunction() const { return m_function; } const Memory* Kernel::getLocalMemory() const { return m_localMemory; } size_t Kernel::getLocalMemorySize() const { return m_localMemory->getTotalAllocated(); } const std::string& Kernel::getName() const { return m_name; } unsigned int Kernel::getNumArguments() const { return m_function->arg_size(); } const Memory* Kernel::getPrivateMemory() const { return m_privateMemory; } const Program* Kernel::getProgram() const { return m_program; } void Kernel::getRequiredWorkGroupSize(size_t reqdWorkGroupSize[3]) const { memset(reqdWorkGroupSize, 0, 3*sizeof(size_t)); for (unsigned i = 0; i < m_metadata->getNumOperands(); i++) { const llvm::MDOperand& op = m_metadata->getOperand(i); if (llvm::MDNode *val = llvm::dyn_cast(op.get())) { llvm::MDString *str = llvm::dyn_cast(val->getOperand(0).get()); if (str->getString() == "reqd_work_group_size") { for (int j = 0; j < 3; j++) { reqdWorkGroupSize[j] = getMDOpAsConstInt(val->getOperand(j+1))->getZExtValue(); } } } } } void Kernel::setArgument(unsigned int index, TypedValue value) { assert(index < m_function->arg_size()); const llvm::Value *argument = getArgument(index); unsigned int type = getArgumentAddressQualifier(index); if (type == CL_KERNEL_ARG_ADDRESS_LOCAL) { // Deallocate existing argument if (m_arguments.count(argument)) { m_localMemory->deallocateBuffer(m_arguments[argument].getPointer()); delete[] m_arguments[argument].data; } // Allocate local memory buffer TypedValue v = { sizeof(size_t), 1, new unsigned char[sizeof(size_t)] }; v.setPointer(m_localMemory->allocateBuffer(value.size)); m_arguments[argument] = v; } else { if (((const llvm::Argument*)argument)->hasByValAttr()) { // Deallocate existing argument if (m_arguments.count(argument)) { m_privateMemory->deallocateBuffer(m_arguments[argument].getPointer()); delete[] m_arguments[argument].data; } TypedValue address = { sizeof(size_t), 1, new unsigned char[sizeof(size_t)] }; size_t size = value.size*value.num; address.setPointer(m_privateMemory->allocateBuffer(size)); m_privateMemory->store(value.data, address.getPointer(), size); m_arguments[argument] = address; } else { // Deallocate existing argument if (m_arguments.count(argument)) { delete[] m_arguments[argument].data; } const llvm::Type *type = argument->getType(); if (type->isVectorTy()) { value.num = type->getVectorNumElements(); value.size = getTypeSize(type->getVectorElementType()); } m_arguments[argument] = value.clone(); } } } TypedValueMap::const_iterator Kernel::args_begin() const { return m_arguments.begin(); } TypedValueMap::const_iterator Kernel::args_end() const { return m_arguments.end(); } Oclgrind-15.5/src/core/Kernel.h000066400000000000000000000043651252441671000163270ustar00rootroot00000000000000// Kernel.h (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" #include "llvm/ADT/StringRef.h" namespace llvm { class Argument; class Constant; class Function; class GlobalVariable; class MDNode; class Module; } namespace oclgrind { class Memory; class Program; class Kernel { public: Kernel(const Program *program, const llvm::Function *function, const llvm::Module *module); Kernel(const Kernel& kernel); virtual ~Kernel(); TypedValueMap::const_iterator args_begin() const; TypedValueMap::const_iterator args_end() const; bool allArgumentsSet() const; void allocateConstants(Memory *memory); void deallocateConstants(Memory *memory); size_t getArgumentSize(unsigned int index) const; unsigned int getArgumentAccessQualifier(unsigned int index) const; unsigned int getArgumentAddressQualifier(unsigned int index) const; const llvm::StringRef getArgumentName(unsigned int index) const; const llvm::StringRef getArgumentTypeName(unsigned int index) const; unsigned int getArgumentTypeQualifier(unsigned int index) const; std::string getAttributes() const; const llvm::Function* getFunction() const; const Memory* getLocalMemory() const; size_t getLocalMemorySize() const; const std::string& getName() const; unsigned int getNumArguments() const; const Memory* getPrivateMemory() const; const Program* getProgram() const; void getRequiredWorkGroupSize(size_t reqdWorkGroupSize[3]) const; void setArgument(unsigned int index, TypedValue value); private: const Program *m_program; const llvm::Function *m_function; TypedValueMap m_arguments; std::list m_constants; std::list m_constantBuffers; Memory *m_localMemory; const llvm::MDNode *m_metadata; std::string m_name; Memory *m_privateMemory; const llvm::Argument* getArgument(unsigned int index) const; const llvm::MDNode* getArgumentMetadata(std::string name) const; }; } Oclgrind-15.5/src/core/KernelInvocation.cpp000066400000000000000000000211661252441671000207120ustar00rootroot00000000000000// KernelInvocation.cpp (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" #include #include #include #include "Context.h" #include "Kernel.h" #include "KernelInvocation.h" #include "Memory.h" #include "WorkGroup.h" #include "WorkItem.h" using namespace oclgrind; using namespace std; // TODO: Remove this when thread_local fixed on OS X #ifdef __APPLE__ #define THREAD_LOCAL __thread #elif defined(_WIN32) && !defined(__MINGW32__) #define THREAD_LOCAL __declspec(thread) #else #define THREAD_LOCAL thread_local #endif struct { WorkGroup *workGroup; WorkItem *workItem; } static THREAD_LOCAL workerState; static atomic nextGroupIndex; KernelInvocation::KernelInvocation(const Context *context, const Kernel *kernel, unsigned int workDim, Size3 globalOffset, Size3 globalSize, Size3 localSize) : m_context(context), m_kernel(kernel) { m_workDim = workDim; m_globalOffset = globalOffset; m_globalSize = globalSize; m_localSize = localSize; m_numGroups.x = m_globalSize.x/m_localSize.x; m_numGroups.y = m_globalSize.y/m_localSize.y; m_numGroups.z = m_globalSize.z/m_localSize.z; // Check for user overriding number of threads m_numWorkers = 0; const char *numThreads = getenv("OCLGRIND_NUM_THREADS"); if (numThreads) { char *next; m_numWorkers = strtoul(numThreads, &next, 10); if (strlen(next)) { cerr << "Oclgrind: Invalid value for OCLGRIND_NUM_THREADS" << endl; } } else { m_numWorkers = thread::hardware_concurrency(); } if (!m_numWorkers || !m_context->isThreadSafe()) m_numWorkers = 1; // Check for quick-mode environment variable if (checkEnv("OCLGRIND_QUICK")) { // Only run first and last work-groups in quick-mode Size3 firstGroup(0, 0, 0); Size3 lastGroup(m_numGroups.x-1, m_numGroups.y-1, m_numGroups.z-1); m_workGroups.push_back(firstGroup); m_workGroups.push_back(lastGroup); } else { for (size_t k = 0; k < m_numGroups.z; k++) { for (size_t j = 0; j < m_numGroups.y; j++) { for (size_t i = 0; i < m_numGroups.x; i++) { m_workGroups.push_back(Size3(i, j, k)); } } } } } KernelInvocation::~KernelInvocation() { // Destroy any remaining work-groups while (!m_runningGroups.empty()) { delete m_runningGroups.front(); m_runningGroups.pop_front(); } } const Context* KernelInvocation::getContext() const { return m_context; } const WorkGroup* KernelInvocation::getCurrentWorkGroup() const { return workerState.workGroup; } const WorkItem* KernelInvocation::getCurrentWorkItem() const { return workerState.workItem; } Size3 KernelInvocation::getGlobalOffset() const { return m_globalOffset; } Size3 KernelInvocation::getGlobalSize() const { return m_globalSize; } const Kernel* KernelInvocation::getKernel() const { return m_kernel; } Size3 KernelInvocation::getLocalSize() const { return m_localSize; } Size3 KernelInvocation::getNumGroups() const { return m_numGroups; } size_t KernelInvocation::getWorkDim() const { return m_workDim; } void KernelInvocation::run(const Context *context, Kernel *kernel, unsigned int workDim, Size3 globalOffset, Size3 globalSize, Size3 localSize) { try { // Allocate and initialise constant memory kernel->allocateConstants(context->getGlobalMemory()); } catch (FatalError& err) { ostringstream info; info << endl << "OCLGRIND FATAL ERROR " << "(" << err.getFile() << ":" << err.getLine() << ")" << endl << err.what() << endl << "When allocating kernel constants for '" << kernel->getName() << "'"; context->logError(info.str().c_str()); return; } // Create kernel invocation KernelInvocation *ki = new KernelInvocation(context, kernel, workDim, globalOffset, globalSize, localSize); // Run kernel context->notifyKernelBegin(ki); ki->run(); context->notifyKernelEnd(ki); delete ki; // Deallocate constant memory kernel->deallocateConstants(context->getGlobalMemory()); } void KernelInvocation::run() { nextGroupIndex = 0; // Create worker threads // TODO: Run in main thread if only 1 worker vector threads; for (unsigned i = 0; i < m_numWorkers; i++) { threads.push_back(thread(&KernelInvocation::runWorker, this)); } // Wait for workers to complete for (unsigned i = 0; i < m_numWorkers; i++) { threads[i].join(); } } void KernelInvocation::runWorker() { workerState.workGroup = NULL; workerState.workItem = NULL; try { while (true) { // Move to next work-group if (!m_runningGroups.empty()) { // Take next work-group from running pool workerState.workGroup = m_runningGroups.front(); m_runningGroups.pop_front(); } else { // Take next work-group from pending pool unsigned index = nextGroupIndex++; if (index >= m_workGroups.size()) // No more work to do break; workerState.workGroup = new WorkGroup(this, m_workGroups[index]); m_context->notifyWorkGroupBegin(workerState.workGroup); } // Execute work-group workerState.workItem = workerState.workGroup->getNextWorkItem(); while (workerState.workItem) { // Run work-item until complete or at barrier while (workerState.workItem->getState() == WorkItem::READY) { workerState.workItem->step(); } // Move to next work-item workerState.workItem = workerState.workGroup->getNextWorkItem(); if (workerState.workItem) continue; // No more work-items in READY state // Check if there are work-items at a barrier if (workerState.workGroup->hasBarrier()) { // Resume execution workerState.workGroup->clearBarrier(); workerState.workItem = workerState.workGroup->getNextWorkItem(); } } // Work-group has finished m_context->notifyWorkGroupComplete(workerState.workGroup); delete workerState.workGroup; workerState.workGroup = NULL; } } catch (FatalError& err) { ostringstream info; info << endl << "OCLGRIND FATAL ERROR " << "(" << err.getFile() << ":" << err.getLine() << ")" << endl << err.what(); m_context->logError(info.str().c_str()); if (workerState.workGroup) delete workerState.workGroup; } } bool KernelInvocation::switchWorkItem(const Size3 gid) { assert(m_numWorkers == 1); // Compute work-group ID Size3 group(gid.x/m_localSize.x, gid.y/m_localSize.y, gid.z/m_localSize.z); bool found = false; WorkGroup *previousWorkGroup = workerState.workGroup; // Check if we're already running the work-group if (group == previousWorkGroup->getGroupID()) { found = true; } // Check if work-group is in running pool if (!found) { std::list::iterator rItr; for (rItr = m_runningGroups.begin(); rItr != m_runningGroups.end(); rItr++) { if (group == (*rItr)->getGroupID()) { workerState.workGroup = *rItr; m_runningGroups.erase(rItr); found = true; break; } } } // Check if work-group is in pending pool if (!found) { std::vector::iterator pItr; for (pItr = m_workGroups.begin()+nextGroupIndex; pItr != m_workGroups.end(); pItr++) { if (group == *pItr) { workerState.workGroup = new WorkGroup(this, group); found = true; // Re-order list of groups accordingly // Safe since this is not in a multi-threaded context m_workGroups.erase(pItr); m_workGroups.insert(m_workGroups.begin()+nextGroupIndex, group); nextGroupIndex++; break; } } } if (!found) { return false; } if (previousWorkGroup != workerState.workGroup) { m_runningGroups.push_back(previousWorkGroup); } // Get work-item Size3 lid(gid.x%m_localSize.x, gid.y%m_localSize.y, gid.z%m_localSize.z); workerState.workItem = workerState.workGroup->getWorkItem(lid); return true; } Oclgrind-15.5/src/core/KernelInvocation.h000066400000000000000000000033301252441671000203500ustar00rootroot00000000000000// KernelInvocation.h (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" namespace oclgrind { class Context; class Kernel; class WorkGroup; class WorkItem; class KernelInvocation { public: static void run(const Context *context, Kernel *kernel, unsigned int workDim, Size3 globalOffset, Size3 globalSize, Size3 localSize); const Context* getContext() const; const WorkGroup* getCurrentWorkGroup() const; const WorkItem* getCurrentWorkItem() const; Size3 getGlobalOffset() const; Size3 getGlobalSize() const; Size3 getLocalSize() const; const Kernel* getKernel() const; Size3 getNumGroups() const; size_t getWorkDim() const; bool switchWorkItem(const Size3 gid); private: KernelInvocation(const Context *context, const Kernel *kernel, unsigned int workDim, Size3 globalOffset, Size3 globalSize, Size3 localSize); virtual ~KernelInvocation(); void run(); // Kernel launch parameters const Context *m_context; const Kernel *m_kernel; size_t m_workDim; Size3 m_globalOffset; Size3 m_globalSize; Size3 m_localSize; Size3 m_numGroups; // Current execution state std::vector m_workGroups; std::list m_runningGroups; // Worker threads void runWorker(); unsigned m_numWorkers; }; } Oclgrind-15.5/src/core/Memory.cpp000066400000000000000000000227201252441671000167050ustar00rootroot00000000000000// Memory.cpp (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" #include #include #include #include #include "Context.h" #include "Memory.h" #include "WorkGroup.h" #include "WorkItem.h" using namespace oclgrind; using namespace std; // Multiple mutexes to mitigate risk of unnecessary synchronisation in atomics #define NUM_ATOMIC_MUTEXES 64 // Must be power of two mutex atomicMutex[NUM_ATOMIC_MUTEXES]; #define ATOMIC_MUTEX(offset) \ atomicMutex[(((offset)>>2) & (NUM_ATOMIC_MUTEXES-1))] Memory::Memory(unsigned int addrSpace, const Context *context) { m_context = context; m_addressSpace = addrSpace; clear(); } Memory::~Memory() { clear(); } size_t Memory::allocateBuffer(size_t size, cl_mem_flags flags) { // Check requested size doesn't exceed maximum if (size > MAX_BUFFER_SIZE) { return 0; } // Find first unallocated buffer slot unsigned b = getNextBuffer(); if (b >= MAX_NUM_BUFFERS) { return 0; } // Create buffer Buffer *buffer = new Buffer; buffer->size = size; buffer->flags = flags; buffer->data = new unsigned char[size]; // Initialize contents to 0 memset(buffer->data, 0, size); if (b >= m_memory.size()) { m_memory.push_back(buffer); } else { m_memory[b] = buffer; } m_totalAllocated += size; size_t address = ((size_t)b) << NUM_ADDRESS_BITS; m_context->notifyMemoryAllocated(this, address, size, flags); return address; } uint32_t Memory::atomic(AtomicOp op, size_t address, uint32_t value) { m_context->notifyMemoryAtomicLoad(this, op, address, 4); m_context->notifyMemoryAtomicStore(this, op, address, 4); // Bounds check if (!isAddressValid(address, 4)) { return 0; } // Get buffer size_t offset = EXTRACT_OFFSET(address); Buffer *buffer = m_memory[EXTRACT_BUFFER(address)]; uint32_t *ptr = (uint32_t*)(buffer->data + offset); if (m_addressSpace == AddrSpaceGlobal) ATOMIC_MUTEX(offset).lock(); uint32_t old = *ptr; switch(op) { case AtomicAdd: *ptr = old + value; break; case AtomicAnd: *ptr = old & value; break; case AtomicCmpXchg: FATAL_ERROR("AtomicCmpXchg in generic atomic handler"); break; case AtomicDec: *ptr = old - 1; break; case AtomicInc: *ptr = old + 1; break; case AtomicMax: *ptr = old > value ? old : value; break; case AtomicMin: *ptr = old < value ? old : value; break; case AtomicOr: *ptr = old | value; break; case AtomicSub: *ptr = old - value; break; case AtomicXchg: *ptr = value; break; case AtomicXor: *ptr = old ^ value; break; } if (m_addressSpace == AddrSpaceGlobal) ATOMIC_MUTEX(offset).unlock(); return old; } uint32_t Memory::atomicCmpxchg(size_t address, uint32_t cmp, uint32_t value) { m_context->notifyMemoryAtomicLoad(this, AtomicCmpXchg, address, 4); // Bounds check if (!isAddressValid(address, 4)) { return 0; } // Get buffer size_t offset = EXTRACT_OFFSET(address); Buffer *buffer = m_memory[EXTRACT_BUFFER(address)]; uint32_t *ptr = (uint32_t*)(buffer->data + offset); if (m_addressSpace == AddrSpaceGlobal) ATOMIC_MUTEX(offset).lock(); // Perform cmpxchg uint32_t old = *ptr; if (old == cmp) { *ptr = value; m_context->notifyMemoryAtomicStore(this, AtomicCmpXchg, address, 4); } if (m_addressSpace == AddrSpaceGlobal) ATOMIC_MUTEX(offset).unlock(); return old; } void Memory::clear() { vector::iterator itr; for (itr = m_memory.begin(); itr != m_memory.end(); itr++) { if (*itr) { if (!((*itr)->flags & CL_MEM_USE_HOST_PTR)) { delete[] (*itr)->data; } delete *itr; size_t address = (itr-m_memory.begin())<notifyMemoryDeallocated(this, address); } } m_memory.resize(1); m_memory[0] = NULL; m_freeBuffers = queue(); m_totalAllocated = 0; } Memory* Memory::clone() const { Memory *mem = new Memory(m_addressSpace, m_context); // Clone buffers mem->m_memory.resize(m_memory.size()); mem->m_memory[0] = NULL; for (unsigned i = 1; i < m_memory.size(); i++) { Buffer *src = m_memory[i]; Buffer *dst = new Buffer; dst->size = src->size; dst->flags = src->flags, dst->data = (src->flags&CL_MEM_USE_HOST_PTR) ? src->data : new unsigned char[src->size], memcpy(dst->data, src->data, src->size); mem->m_memory[i] = dst; m_context->notifyMemoryAllocated(mem, ((size_t)i<size, src->flags); } // Clone state mem->m_freeBuffers = m_freeBuffers; mem->m_totalAllocated = m_totalAllocated; return mem; } size_t Memory::createHostBuffer(size_t size, void *ptr, cl_mem_flags flags) { // Check requested size doesn't exceed maximum if (size > MAX_BUFFER_SIZE) { return 0; } // Find first unallocated buffer slot unsigned b = getNextBuffer(); if (b >= MAX_NUM_BUFFERS) { return 0; } // Create buffer Buffer *buffer = new Buffer; buffer->size = size; buffer->flags = flags; buffer->data = (unsigned char*)ptr; if (b >= m_memory.size()) { m_memory.push_back(buffer); } else { m_memory[b] = buffer; } m_totalAllocated += size; size_t address = ((size_t)b) << NUM_ADDRESS_BITS; m_context->notifyMemoryAllocated(this, address, size, flags); return address; } bool Memory::copy(size_t dst, size_t src, size_t size) { m_context->notifyMemoryLoad(this, src, size); // Check source address if (!isAddressValid(src, size)) { return false; } size_t src_offset = EXTRACT_OFFSET(src); Buffer *src_buffer = m_memory.at(EXTRACT_BUFFER(src)); m_context->notifyMemoryStore(this, dst, size, src_buffer->data + src_offset); // Check destination address if (!isAddressValid(dst, size)) { return false; } size_t dst_offset = EXTRACT_OFFSET(dst); Buffer *dst_buffer = m_memory.at(EXTRACT_BUFFER(dst)); // Copy data memcpy(dst_buffer->data + dst_offset, src_buffer->data + src_offset, size); return true; } void Memory::deallocateBuffer(size_t address) { unsigned buffer = EXTRACT_BUFFER(address); assert(buffer < m_memory.size() && m_memory[buffer]); if (!(m_memory[buffer]->flags & CL_MEM_USE_HOST_PTR)) { delete[] m_memory[buffer]->data; } m_totalAllocated -= m_memory[buffer]->size; m_freeBuffers.push(buffer); delete m_memory[buffer]; m_memory[buffer] = NULL; m_context->notifyMemoryDeallocated(this, address); } void Memory::dump() const { for (unsigned b = 1; b < m_memory.size(); b++) { if (!m_memory[b]->data) { continue; } for (unsigned i = 0; i < m_memory[b]->size; i++) { if (i%4 == 0) { cout << endl << hex << uppercase << setw(16) << setfill(' ') << right << ((((size_t)b)<data[i]; } } cout << endl; } unsigned int Memory::getAddressSpace() const { return m_addressSpace; } const Memory::Buffer* Memory::getBuffer(size_t address) const { size_t buf = EXTRACT_BUFFER(address); if (buf == 0 || buf >= m_memory.size() || !m_memory[buf]->data) { return NULL; } return m_memory[buf]; } size_t Memory::getMaxAllocSize() { return MAX_BUFFER_SIZE; } unsigned Memory::getNextBuffer() { if (m_freeBuffers.empty()) { return m_memory.size(); } else { unsigned b = m_freeBuffers.front(); m_freeBuffers.pop(); return b; } } void* Memory::getPointer(size_t address) const { size_t buffer = EXTRACT_BUFFER(address); // Bounds check if (!isAddressValid(address)) { return NULL; } return m_memory[buffer]->data + EXTRACT_OFFSET(address); } size_t Memory::getTotalAllocated() const { return m_totalAllocated; } bool Memory::isAddressValid(size_t address, size_t size) const { size_t buffer = EXTRACT_BUFFER(address); size_t offset = EXTRACT_OFFSET(address); if (buffer == 0 || buffer >= m_memory.size() || !m_memory[buffer] || offset+size > m_memory[buffer]->size) { return false; } return true; } bool Memory::load(unsigned char *dest, size_t address, size_t size) const { m_context->notifyMemoryLoad(this, address, size); // Bounds check if (!isAddressValid(address, size)) { return false; } // Get buffer size_t offset = EXTRACT_OFFSET(address); Buffer *src = m_memory[EXTRACT_BUFFER(address)]; // Load data memcpy(dest, src->data + offset, size); return true; } unsigned char* Memory::mapBuffer(size_t address, size_t offset, size_t size) { size_t buffer = EXTRACT_BUFFER(address); // Bounds check if (!isAddressValid(address, size)) { return NULL; } return m_memory[buffer]->data + offset + EXTRACT_OFFSET(address); } bool Memory::store(const unsigned char *source, size_t address, size_t size) { m_context->notifyMemoryStore(this, address, size, source); // Bounds check if (!isAddressValid(address, size)) { return false; } // Get buffer size_t offset = EXTRACT_OFFSET(address); Buffer *dst = m_memory[EXTRACT_BUFFER(address)]; // Store data memcpy(dst->data + offset, source, size); return true; } Oclgrind-15.5/src/core/Memory.h000066400000000000000000000041221252441671000163460ustar00rootroot00000000000000// Memory.h (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" #define NUM_BUFFER_BITS ( (sizeof(size_t)==4) ? 8 : 16) #define MAX_NUM_BUFFERS ((size_t)1 << NUM_BUFFER_BITS) #define NUM_ADDRESS_BITS ((sizeof(size_t)<<3) - NUM_BUFFER_BITS) #define MAX_BUFFER_SIZE ((size_t)1 << NUM_ADDRESS_BITS) #define EXTRACT_BUFFER(address) \ (address >> NUM_ADDRESS_BITS) #define EXTRACT_OFFSET(address) \ (address & (((size_t)-1) >> NUM_BUFFER_BITS)) namespace oclgrind { class Context; class Memory { public: typedef struct { size_t size; cl_mem_flags flags; unsigned char *data; } Buffer; public: Memory(unsigned int addrSpace, const Context *context); virtual ~Memory(); size_t allocateBuffer(size_t size, cl_mem_flags flags=0); uint32_t atomic(AtomicOp op, size_t address, uint32_t value = 0); uint32_t atomicCmpxchg(size_t address, uint32_t cmp, uint32_t value); void clear(); Memory *clone() const; size_t createHostBuffer(size_t size, void *ptr, cl_mem_flags flags=0); bool copy(size_t dest, size_t src, size_t size); void deallocateBuffer(size_t address); void dump() const; unsigned int getAddressSpace() const; const Buffer* getBuffer(size_t address) const; void* getPointer(size_t address) const; size_t getTotalAllocated() const; bool isAddressValid(size_t address, size_t size=1) const; bool load(unsigned char *dst, size_t address, size_t size=1) const; unsigned char* mapBuffer(size_t address, size_t offset, size_t size); bool store(const unsigned char *source, size_t address, size_t size=1); static size_t getMaxAllocSize(); private: const Context *m_context; std::queue m_freeBuffers; std::vector m_memory; unsigned int m_addressSpace; size_t m_totalAllocated; unsigned getNextBuffer(); }; } Oclgrind-15.5/src/core/Plugin.cpp000066400000000000000000000007471252441671000167000ustar00rootroot00000000000000// Plugin.cpp (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "Plugin.h" using namespace oclgrind; Plugin::Plugin(const Context *context) : m_context(context) { } Plugin::~Plugin() { } bool Plugin::isThreadSafe() const { return true; } Oclgrind-15.5/src/core/Plugin.h000066400000000000000000000055721252441671000163460ustar00rootroot00000000000000// Plugin.h (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #pragma once #include "common.h" namespace oclgrind { class Context; class Kernel; class KernelInvocation; class Memory; class WorkGroup; class WorkItem; class Plugin { public: Plugin(const Context *context); virtual ~Plugin(); virtual void hostMemoryLoad(const Memory *memory, size_t address, size_t size){} virtual void hostMemoryStore(const Memory *memory, size_t address, size_t size, const uint8_t *storeData){} virtual void instructionExecuted(const WorkItem *workItem, const llvm::Instruction *instruction, const TypedValue& result){} virtual void kernelBegin(const KernelInvocation *kernelInvocation){} virtual void kernelEnd(const KernelInvocation *kernelInvocation){} virtual void log(MessageType type, const char *message){} virtual void memoryAllocated(const Memory *memory, size_t address, size_t size, cl_mem_flags flags){} virtual void memoryAtomicLoad(const Memory *memory, const WorkItem *workItem, AtomicOp op, size_t address, size_t size){} virtual void memoryAtomicStore(const Memory *memory, const WorkItem *workItem, AtomicOp op, size_t address, size_t size){} virtual void memoryDeallocated(const Memory *memory, size_t address){} virtual void memoryLoad(const Memory *memory, const WorkItem *workItem, size_t address, size_t size){} virtual void memoryLoad(const Memory *memory, const WorkGroup *workGroup, size_t address, size_t size){} virtual void memoryStore(const Memory *memory, const WorkItem *workItem, size_t address, size_t size, const uint8_t *storeData){} virtual void memoryStore(const Memory *memory, const WorkGroup *workGroup, size_t address, size_t size, const uint8_t *storeData){} virtual void workGroupBarrier(const WorkGroup *workGroup, uint32_t flags){} virtual void workGroupBegin(const WorkGroup *workGroup){} virtual void workGroupComplete(const WorkGroup *workGroup){} virtual void workItemBegin(const WorkItem *workItem){} virtual void workItemComplete(const WorkItem *workItem){} virtual bool isThreadSafe() const; protected: const Context *m_context; }; } Oclgrind-15.5/src/core/Program.cpp000066400000000000000000000437041252441671000170510ustar00rootroot00000000000000// Program.cpp (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" #include #if defined(_WIN32) && !defined(__MINGW32__) #include #else #include #endif #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/IR/AssemblyAnnotationWriter.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Linker/Linker.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Utils/Cloning.h" #include "clang/CodeGen/CodeGenAction.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/TextDiagnosticPrinter.h" #include "Kernel.h" #include "Program.h" #include "WorkItem.h" #define ENV_DUMP_SPIR "OCLGRIND_DUMP_SPIR" #define CL_DUMP_NAME "/tmp/oclgrind_%lX.cl" #define IR_DUMP_NAME "/tmp/oclgrind_%lX.s" #define BC_DUMP_NAME "/tmp/oclgrind_%lX.bc" #if defined(_WIN32) #define REMAP_DIR "Z:/remapped/" #else #define REMAP_DIR "/remapped/" #endif #define REMAP_INPUT "input.cl" #define CLC_H_PATH REMAP_DIR"clc.h" extern const char CLC_H_DATA[]; const char *EXTENSIONS[] = { "cl_khr_fp64", "cl_khr_3d_image_writes", "cl_khr_global_int32_base_atomics", "cl_khr_global_int32_extended_atomics", "cl_khr_local_int32_base_atomics", "cl_khr_local_int32_extended_atomics", "cl_khr_byte_addressable_store", }; using namespace oclgrind; using namespace std; Program::Program(const Context *context, llvm::Module *module) : m_module(module), m_context(context) { m_buildLog = ""; m_buildOptions = ""; m_buildStatus = CL_BUILD_SUCCESS; m_uid = generateUID(); } Program::Program(const Context *context, const string& source) : m_context(context) { m_source = source; m_buildLog = ""; m_buildOptions = ""; m_buildStatus = CL_BUILD_NONE; m_uid = 0; // Split source into individual lines m_sourceLines.clear(); if (!source.empty()) { std::stringstream ss(source); std::string line; while(std::getline(ss, line, '\n')) { m_sourceLines.push_back(line); } } } Program::~Program() { clearInterpreterCache(); } bool Program::build(const char *options, list
headers) { m_buildStatus = CL_BUILD_IN_PROGRESS; m_buildOptions = options ? options : ""; // Create build log m_buildLog = ""; llvm::raw_string_ostream buildLog(m_buildLog); // Do nothing if program was created with binary if (m_source.empty() && m_module) { m_buildStatus = CL_BUILD_SUCCESS; return true; } if (m_module) { clearInterpreterCache(); m_module.reset(); } // Assign a new UID to this program m_uid = generateUID(); // Set compiler arguments vector args; args.push_back("-cl-std=CL1.2"); args.push_back("-cl-kernel-arg-info"); args.push_back("-fno-builtin"); args.push_back("-g"); args.push_back("-triple"); if (sizeof(size_t) == 4) args.push_back("spir-unknown-unknown"); else args.push_back("spir64-unknown-unknown"); // Define extensions for (unsigned i = 0; i < sizeof(EXTENSIONS)/sizeof(const char*); i++) { args.push_back("-D"); args.push_back(EXTENSIONS[i]); } // Disable Clang's optimizations. // We will manually run optimization passes and legalize the IR later. args.push_back("-O0"); bool optimize = true; bool cl12 = true; // Add OpenCL build options const char *mainOptions = options; const char *extraOptions = getenv("OCLGRIND_BUILD_OPTIONS"); if (!mainOptions) mainOptions = ""; if (!extraOptions) extraOptions = ""; char *tmpOptions = new char[strlen(mainOptions) + strlen(extraOptions) + 2]; sprintf(tmpOptions, "%s %s", mainOptions, extraOptions); for (char *opt = strtok(tmpOptions, " "); opt; opt = strtok(NULL, " ")) { // Ignore options that break PCH if (strcmp(opt, "-cl-fast-relaxed-math") != 0 && strcmp(opt, "-cl-finite-math-only") != 0 && strcmp(opt, "-cl-single-precision-constant") != 0) { // Check for optimization flags if (strcmp(opt, "-O0") == 0 || strcmp(opt, "-cl-opt-disable") == 0) { optimize = false; continue; } else if (strncmp(opt, "-O", 2) == 0) { optimize = true; continue; } // Check for -cl-std flag if (strncmp(opt, "-cl-std=", 8) == 0) { if (strcmp(opt+8, "CL1.2") != 0) { cl12 = false; args.push_back(opt); } continue; } args.push_back(opt); } } if (cl12) { args.push_back("-cl-std=CL1.2"); } // Pre-compiled header char *pchdir = NULL; char *pch = NULL; if (!checkEnv("OCLGRIND_DISABLE_PCH") && cl12) { const char *pchdirOverride = getenv("OCLGRIND_PCH_DIR"); if (pchdirOverride) { pchdir = strdup(pchdirOverride); } else { // Get directory containing library #if defined(_WIN32) && !defined(__MINGW32__) char libpath[4096]; HMODULE dll; if (GetModuleHandleEx( GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, (LPCSTR)&Program::createFromBitcode, &dll) && GetModuleFileName(dll, libpath, sizeof(libpath))) { #else Dl_info dlinfo; if (dladdr((const void*)Program::createFromBitcode, &dlinfo)) { const char *libpath = dlinfo.dli_fname; #endif // Construct path to PCH directory const char *dirend; #if defined(_WIN32) && !defined(__MINGW32__) if ((dirend = strrchr(libpath, '\\'))) #else if ((dirend = strrchr(libpath, '/'))) #endif { const char *includes_relative = "/../include/oclgrind/"; size_t length = dirend - libpath; pchdir = new char[length + strlen(includes_relative) + 1]; strncpy(pchdir, libpath, length); strcpy(pchdir + length, includes_relative); } } } if (pchdir) { // Select precompiled header pch = new char[strlen(pchdir) + 20]; sprintf(pch, "%s/clc%d.pch", pchdir, (sizeof(size_t) == 4 ? 32 : 64)); // Check if precompiled header exists ifstream pchfile(pch); if (!pchfile.good()) { buildLog << "WARNING: Unable to find precompiled header:\n" << pch << "\n"; delete[] pch; pch = NULL; } pchfile.close(); } else { buildLog << "WARNING: Unable to determine precompiled header path\n"; } } if (pch) { args.push_back("-isysroot"); args.push_back(pchdir); args.push_back("-include-pch"); args.push_back(pch); } else { // Fall back to embedded clc.h args.push_back("-include"); args.push_back(CLC_H_PATH); } // Append input file to arguments (remapped later) args.push_back(REMAP_INPUT); // Create diagnostics engine clang::DiagnosticOptions *diagOpts = new clang::DiagnosticOptions(); llvm::IntrusiveRefCntPtr diagID( new clang::DiagnosticIDs()); clang::TextDiagnosticPrinter *diagConsumer = new clang::TextDiagnosticPrinter(buildLog, diagOpts); clang::DiagnosticsEngine diags(diagID, diagOpts, diagConsumer); // Create compiler instance clang::CompilerInstance compiler; compiler.createDiagnostics(diagConsumer, false); // Create compiler invocation clang::CompilerInvocation *invocation = new clang::CompilerInvocation; clang::CompilerInvocation::CreateFromArgs(*invocation, &args[0], &args[0] + args.size(), compiler.getDiagnostics()); compiler.setInvocation(invocation); // Remap include files std::unique_ptr buffer; compiler.getHeaderSearchOpts().AddPath(REMAP_DIR, clang::frontend::Quoted, false, true); list
::iterator itr; for (itr = headers.begin(); itr != headers.end(); itr++) { buffer = llvm::MemoryBuffer::getMemBuffer(itr->second->m_source, "", false); compiler.getPreprocessorOpts().addRemappedFile(REMAP_DIR + itr->first, buffer.release()); } // Remap clc.h buffer = llvm::MemoryBuffer::getMemBuffer(CLC_H_DATA, "", false); compiler.getPreprocessorOpts().addRemappedFile(CLC_H_PATH, buffer.release()); // Remap input file buffer = llvm::MemoryBuffer::getMemBuffer(m_source, "", false); compiler.getPreprocessorOpts().addRemappedFile(REMAP_INPUT, buffer.release()); // Compile llvm::LLVMContext& context = llvm::getGlobalContext(); clang::EmitLLVMOnlyAction action(&context); if (compiler.ExecuteAction(action)) { // Retrieve module m_module = action.takeModule(); // Strip debug intrinsics if not in interactive mode if (!checkEnv("OCLGRIND_INTERACTIVE")) { stripDebugIntrinsics(); } // Initialize pass managers llvm::legacy::PassManager modulePasses; llvm::legacy::FunctionPassManager functionPasses(m_module.get()); #if LLVM_VERSION < 37 modulePasses.add(new llvm::DataLayoutPass()); functionPasses.add(new llvm::DataLayoutPass()); #endif // Run optimizations on module if (optimize) { // Populate pass managers with -Oz llvm::PassManagerBuilder builder; builder.OptLevel = 2; builder.SizeLevel = 2; builder.populateModulePassManager(modulePasses); builder.populateFunctionPassManager(functionPasses); } // Run passes functionPasses.doInitialization(); llvm::Module::iterator fItr; for (fItr = m_module->begin(); fItr != m_module->end(); fItr++) functionPasses.run(*fItr); functionPasses.doFinalization(); modulePasses.run(*m_module); m_buildStatus = CL_BUILD_SUCCESS; } else { m_buildStatus = CL_BUILD_ERROR; } // Dump temps if required if (checkEnv(ENV_DUMP_SPIR)) { // Temporary directory #if defined(_WIN32) const char *tmpdir = getenv("TEMP"); #else const char *tmpdir = "/tmp"; #endif // Construct unique output filenames size_t sz = snprintf(NULL, 0, "%s/oclgrind_%lX.XX", tmpdir, m_uid) + 1; char *tempCL = new char[sz]; char *tempIR = new char[sz]; char *tempBC = new char[sz]; sprintf(tempCL, "%s/oclgrind_%lX.cl", tmpdir, m_uid); sprintf(tempIR, "%s/oclgrind_%lX.ll", tmpdir, m_uid); sprintf(tempBC, "%s/oclgrind_%lX.bc", tmpdir, m_uid); // Dump source ofstream cl; cl.open(tempCL); cl << m_source; cl.close(); if (m_buildStatus == CL_BUILD_SUCCESS) { // Dump IR std::error_code err; llvm::raw_fd_ostream ir(tempIR, err, llvm::sys::fs::F_None); llvm::AssemblyAnnotationWriter asmWriter; m_module->print(ir, &asmWriter); ir.close(); // Dump bitcode llvm::raw_fd_ostream bc(tempBC, err, llvm::sys::fs::F_None); llvm::WriteBitcodeToFile(m_module.get(), bc); bc.close(); } delete[] tempCL; delete[] tempIR; delete[] tempBC; } delete[] tmpOptions; delete[] pchdir; delete[] pch; return m_buildStatus == CL_BUILD_SUCCESS; } void Program::clearInterpreterCache() { InterpreterCacheMap::iterator itr; for (itr = m_interpreterCache.begin(); itr != m_interpreterCache.end(); itr++) { delete itr->second; } m_interpreterCache.clear(); } Program* Program::createFromBitcode(const Context *context, const unsigned char *bitcode, size_t length) { // Load bitcode from file llvm::StringRef data((const char*)bitcode, length); unique_ptr buffer = llvm::MemoryBuffer::getMemBuffer(data, "", false); if (!buffer) { return NULL; } // Parse bitcode into IR module llvm::ErrorOr module = parseBitcodeFile(buffer->getMemBufferRef(), llvm::getGlobalContext()); if (!module) { return NULL; } return new Program(context, module.get()); } Program* Program::createFromBitcodeFile(const Context *context, const string filename) { // Load bitcode from file llvm::ErrorOr> buffer = llvm::MemoryBuffer::getFile(filename); if (!buffer) { return NULL; } // Parse bitcode into IR module llvm::ErrorOr module = parseBitcodeFile(buffer->get()->getMemBufferRef(), llvm::getGlobalContext()); if (!module) { return NULL; } return new Program(context, module.get()); } Program* Program::createFromPrograms(const Context *context, list programs) { llvm::Module *module = new llvm::Module("oclgrind_linked", llvm::getGlobalContext()); llvm::Linker linker(module); // Link modules list::iterator itr; for (itr = programs.begin(); itr != programs.end(); itr++) { if (linker.linkInModule(CloneModule((*itr)->m_module.get()))) { return NULL; } } return new Program(context, linker.getModule()); } Kernel* Program::createKernel(const string name) { if (!m_module) return NULL; // Iterate over functions in module to find kernel llvm::Function *function = NULL; // Query the SPIR kernel list llvm::NamedMDNode* tuple = m_module->getNamedMetadata("opencl.kernels"); // No kernels in module if (!tuple) return NULL; for (unsigned i = 0; i < tuple->getNumOperands(); ++i) { llvm::MDNode* kernel = tuple->getOperand(i); llvm::ConstantAsMetadata *cam = llvm::dyn_cast(kernel->getOperand(0).get()); if (!cam) continue; llvm::Function *kernelFunction = llvm::dyn_cast(cam->getValue()); // Shouldn't really happen - this would mean an invalid Module as input if (!kernelFunction) continue; // Is this the kernel we want? if (kernelFunction->getName() == name) { function = kernelFunction; break; } } if (function == NULL) { return NULL; } try { // Create cache if none already InterpreterCacheMap::iterator itr = m_interpreterCache.find(function); if (itr == m_interpreterCache.end()) { m_interpreterCache[function] = new InterpreterCache(function); } return new Kernel(this, function, m_module.get()); } catch (FatalError& err) { cerr << endl << "OCLGRIND FATAL ERROR " << "(" << err.getFile() << ":" << err.getLine() << ")" << endl << err.what() << endl << "When creating kernel '" << name << "'" << endl; return NULL; } } unsigned char* Program::getBinary() const { if (!m_module) { return NULL; } std::string str; llvm::raw_string_ostream stream(str); llvm::WriteBitcodeToFile(m_module.get(), stream); stream.str(); unsigned char *bitcode = new unsigned char[str.length()]; memcpy(bitcode, str.c_str(), str.length()); return bitcode; } size_t Program::getBinarySize() const { if (!m_module) { return 0; } std::string str; llvm::raw_string_ostream stream(str); llvm::WriteBitcodeToFile(m_module.get(), stream); stream.str(); return str.length(); } const string& Program::getBuildLog() const { return m_buildLog; } const string& Program::getBuildOptions() const { return m_buildOptions; } unsigned int Program::getBuildStatus() const { return m_buildStatus; } const Context* Program::getContext() const { return m_context; } unsigned long Program::generateUID() const { srand(now()); return rand(); } const InterpreterCache* Program::getInterpreterCache( const llvm::Function *kernel) const { return m_interpreterCache[kernel]; } list Program::getKernelNames() const { list names; // Query the SPIR kernel list llvm::NamedMDNode* tuple = m_module->getNamedMetadata("opencl.kernels"); if (tuple) { for (unsigned i = 0; i < tuple->getNumOperands(); ++i) { llvm::MDNode* kernel = tuple->getOperand(i); llvm::ConstantAsMetadata *cam = llvm::dyn_cast(kernel->getOperand(0).get()); if (!cam) continue; llvm::Function *kernelFunction = llvm::dyn_cast(cam->getValue()); // Shouldn't really happen - this would mean an invalid Module as input if (!kernelFunction) continue; names.push_back(kernelFunction->getName()); } } return names; } unsigned int Program::getNumKernels() const { assert(m_module); // Extract kernels from metadata llvm::NamedMDNode* tuple = m_module->getNamedMetadata("opencl.kernels"); // No kernels in module if (!tuple) return 0; return tuple->getNumOperands(); } const string& Program::getSource() const { return m_source; } const char* Program::getSourceLine(size_t lineNumber) const { if (!lineNumber || (lineNumber-1) >= m_sourceLines.size()) return NULL; return m_sourceLines[lineNumber-1].c_str(); } size_t Program::getNumSourceLines() const { return m_sourceLines.size(); } unsigned long Program::getUID() const { return m_uid; } void Program::stripDebugIntrinsics() { // Get list of llvm.dbg intrinsics set intrinsics; for (llvm::Module::iterator F = m_module->begin(); F != m_module->end(); F++) { for (llvm::inst_iterator I = inst_begin(F), E = inst_end(F); I != E; I++) { if (I->getOpcode() == llvm::Instruction::Call) { llvm::CallInst *call = (llvm::CallInst*)&*I; llvm::Function *function = (llvm::Function*)call->getCalledValue()->stripPointerCasts(); if (function->getName().startswith("llvm.dbg")) { intrinsics.insert(&*I); } } } } // Remove instructions set::iterator itr; for (itr = intrinsics.begin(); itr != intrinsics.end(); itr++) { (*itr)->removeFromParent(); delete *itr; } } Oclgrind-15.5/src/core/Program.h000066400000000000000000000045771252441671000165230ustar00rootroot00000000000000// Program.h (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" namespace llvm { class Function; class Module; } namespace oclgrind { class Context; class InterpreterCache; class Kernel; class Program { public: typedef std::pair Header; public: Program(const Context *context, const std::string& source); virtual ~Program(); static Program* createFromBitcode(const Context *context, const unsigned char *bitcode, size_t length); static Program* createFromBitcodeFile(const Context *context, const std::string filename); static Program* createFromPrograms(const Context *context, std::list); bool build(const char *options, std::list
headers = std::list
()); Kernel* createKernel(const std::string name); const std::string& getBuildLog() const; const std::string& getBuildOptions() const; unsigned char* getBinary() const; size_t getBinarySize() const; unsigned int getBuildStatus() const; const Context *getContext() const; const InterpreterCache* getInterpreterCache( const llvm::Function *kernel) const; std::list getKernelNames() const; unsigned int getNumKernels() const; const std::string& getSource() const; const char* getSourceLine(size_t lineNumber) const; size_t getNumSourceLines() const; unsigned long getUID() const; private: Program(const Context *context, llvm::Module *module); std::unique_ptr m_module; std::string m_source; std::string m_buildLog; std::string m_buildOptions; unsigned int m_buildStatus; const Context *m_context; std::vector m_sourceLines; unsigned long m_uid; unsigned long generateUID() const; void stripDebugIntrinsics(); typedef std::map InterpreterCacheMap; mutable InterpreterCacheMap m_interpreterCache; void clearInterpreterCache(); }; } Oclgrind-15.5/src/core/Queue.cpp000066400000000000000000000134201252441671000165160ustar00rootroot00000000000000// Queue.cpp (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" #include #include "Context.h" #include "KernelInvocation.h" #include "Memory.h" #include "Queue.h" using namespace oclgrind; using namespace std; Queue::Queue(const Context *context) : m_context(context) { } Queue::~Queue() { } Event::Event() { state = CL_QUEUED; queueTime = now(); startTime = endTime = 0; } Event* Queue::enqueue(Command *cmd) { Event *event = new Event(); cmd->event = event; m_queue.push(cmd); return event; } void Queue::executeCopyBuffer(CopyCommand *cmd) { m_context->getGlobalMemory()->copy(cmd->dst, cmd->src, cmd->size); } void Queue::executeCopyBufferRect(CopyRectCommand *cmd) { // Perform copy Memory *memory = m_context->getGlobalMemory(); for (unsigned z = 0; z < cmd->region[2]; z++) { for (unsigned y = 0; y < cmd->region[1]; y++) { // Compute addresses size_t src = cmd->src + cmd->src_offset[0] + y * cmd->src_offset[1] + z * cmd->src_offset[2]; size_t dst = cmd->dst + cmd->dst_offset[0] + y * cmd->dst_offset[1] + z * cmd->dst_offset[2]; // Copy data memory->copy(dst, src, cmd->region[0]); } } } void Queue::executeFillBuffer(FillBufferCommand *cmd) { Memory *memory = m_context->getGlobalMemory(); for (unsigned i = 0; i < cmd->size/cmd->pattern_size; i++) { memory->store(cmd->pattern, cmd->address + i*cmd->pattern_size, cmd->pattern_size); } } void Queue::executeFillImage(FillImageCommand *cmd) { Memory *memory = m_context->getGlobalMemory(); for (unsigned z = 0; z < cmd->region[2]; z++) { for (unsigned y = 0; y < cmd->region[1]; y++) { for (unsigned x = 0; x < cmd->region[0]; x++) { size_t address = cmd->base + (cmd->origin[0] + x) * cmd->pixelSize + (cmd->origin[1] + y) * cmd->rowPitch + (cmd->origin[2] + z) * cmd->slicePitch; memory->store(cmd->color, address, cmd->pixelSize); } } } } void Queue::executeKernel(KernelCommand *cmd) { // Run kernel KernelInvocation::run(m_context, cmd->kernel, cmd->work_dim, cmd->globalOffset, cmd->globalSize, cmd->localSize); } void Queue::executeNativeKernel(NativeKernelCommand *cmd) { // Run kernel cmd->func(cmd->args); } void Queue::executeReadBuffer(BufferCommand *cmd) { m_context->getGlobalMemory()->load(cmd->ptr, cmd->address, cmd->size); } void Queue::executeReadBufferRect(BufferRectCommand *cmd) { Memory *memory = m_context->getGlobalMemory(); for (unsigned z = 0; z < cmd->region[2]; z++) { for (unsigned y = 0; y < cmd->region[1]; y++) { unsigned char *host = cmd->ptr + cmd->host_offset[0] + y * cmd->host_offset[1] + z * cmd->host_offset[2]; size_t buff = cmd->address + cmd->buffer_offset[0] + y * cmd->buffer_offset[1] + z * cmd->buffer_offset[2]; memory->load(host, buff, cmd->region[0]); } } } void Queue::executeWriteBuffer(BufferCommand *cmd) { m_context->getGlobalMemory()->store(cmd->ptr, cmd->address, cmd->size); } void Queue::executeWriteBufferRect(BufferRectCommand *cmd) { // Perform write Memory *memory = m_context->getGlobalMemory(); for (unsigned z = 0; z < cmd->region[2]; z++) { for (unsigned y = 0; y < cmd->region[1]; y++) { const unsigned char *host = cmd->ptr + cmd->host_offset[0] + y * cmd->host_offset[1] + z * cmd->host_offset[2]; size_t buff = cmd->address + cmd->buffer_offset[0] + y * cmd->buffer_offset[1] + z * cmd->buffer_offset[2]; memory->store(host, buff, cmd->region[0]); } } } bool Queue::isEmpty() const { return m_queue.empty(); } Queue::Command* Queue::update() { if (m_queue.empty()) { return NULL; } // Get next command Command *cmd = m_queue.front(); // Check if all events in wait list have completed while (!cmd->waitList.empty()) { if (cmd->waitList.front()->state == CL_COMPLETE) { cmd->waitList.pop_front(); } else if (cmd->waitList.front()->state < 0) { cmd->event->state = cmd->waitList.front()->state; m_queue.pop(); return cmd; } else { return NULL; } } cmd->event->startTime = now(); cmd->event->state = CL_RUNNING; // Dispatch command switch (cmd->type) { case COPY: executeCopyBuffer((CopyCommand*)cmd); break; case COPY_RECT: executeCopyBufferRect((CopyRectCommand*)cmd); break; case EMPTY: break; case FILL_BUFFER: executeFillBuffer((FillBufferCommand*)cmd); break; case FILL_IMAGE: executeFillImage((FillImageCommand*)cmd); break; case READ: executeReadBuffer((BufferCommand*)cmd); break; case READ_RECT: executeReadBufferRect((BufferRectCommand*)cmd); break; case KERNEL: executeKernel((KernelCommand*)cmd); break; case NATIVE_KERNEL: executeNativeKernel((NativeKernelCommand*)cmd); break; case WRITE: executeWriteBuffer((BufferCommand*)cmd); break; case WRITE_RECT: executeWriteBufferRect((BufferRectCommand*)cmd); break; default: assert(false && "Unhandled command type in queue."); } cmd->event->endTime = now(); cmd->event->state = CL_COMPLETE; // Remove command from queue and delete m_queue.pop(); return cmd; } Oclgrind-15.5/src/core/Queue.h000066400000000000000000000101441252441671000161630ustar00rootroot00000000000000// Queue.h (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #pragma once #include "common.h" namespace oclgrind { class Context; class Kernel; struct Event { int state; double queueTime, startTime, endTime; Event(); }; class Queue { public: enum CommandType {EMPTY, COPY, COPY_RECT, FILL_BUFFER, FILL_IMAGE, KERNEL, NATIVE_KERNEL, READ, READ_RECT, WRITE, WRITE_RECT}; struct Command { CommandType type; std::list waitList; Command() { type = EMPTY; } private: Event *event; friend class Queue; }; struct BufferCommand : Command { unsigned char *ptr; size_t address, size; BufferCommand(CommandType t) { type = t; } }; struct BufferRectCommand : Command { unsigned char *ptr; size_t address; size_t region[3]; size_t host_offset[3]; size_t buffer_offset[3]; BufferRectCommand(CommandType t) { type = t; } }; struct CopyCommand : Command { size_t src, dst, size; CopyCommand() { type = COPY; } }; struct CopyRectCommand : Command { size_t src, dst; size_t region[3]; size_t src_offset[3]; size_t dst_offset[3]; CopyRectCommand() { type = COPY_RECT; } }; struct FillBufferCommand : Command { size_t address, size; size_t pattern_size; unsigned char *pattern; FillBufferCommand(const unsigned char *p, size_t sz) { type = FILL_BUFFER; pattern = new unsigned char[sz]; pattern_size = sz; memcpy(pattern, p, sz); } ~FillBufferCommand() { delete[] pattern; } }; struct FillImageCommand : Command { size_t base; size_t origin[3], region[3]; size_t rowPitch, slicePitch; size_t pixelSize; unsigned char color[16]; FillImageCommand(size_t b, const size_t o[3], const size_t r[3], size_t rp, size_t sp, size_t ps, const unsigned char *col) { type = FILL_IMAGE; base = b; memcpy(origin, o, sizeof(size_t)*3); memcpy(region, r, sizeof(size_t)*3); rowPitch = rp; slicePitch = sp; pixelSize = ps; memcpy(color, col, 16); } }; struct KernelCommand : Command { Kernel *kernel; unsigned int work_dim; Size3 globalOffset; Size3 globalSize; Size3 localSize; KernelCommand() { type = KERNEL; } }; struct NativeKernelCommand : Command { void (CL_CALLBACK *func)(void *); void *args; NativeKernelCommand(void (CL_CALLBACK *f)(void *), void *a, size_t sz) { type = NATIVE_KERNEL; func = f; if (a) { args = malloc(sz); memcpy(args, a, sz); } else { args = NULL; } } ~NativeKernelCommand() { if (args) { free(args); } } }; public: Queue(const Context *context); virtual ~Queue(); Event* enqueue(Command *command); void executeCopyBuffer(CopyCommand *cmd); void executeCopyBufferRect(CopyRectCommand *cmd); void executeFillBuffer(FillBufferCommand *cmd); void executeFillImage(FillImageCommand *cmd); void executeKernel(KernelCommand *cmd); void executeNativeKernel(NativeKernelCommand *cmd); void executeReadBuffer(BufferCommand *cmd); void executeReadBufferRect(BufferRectCommand *cmd); void executeWriteBuffer(BufferCommand *cmd); void executeWriteBufferRect(BufferRectCommand *cmd); bool isEmpty() const; Command* update(); private: const Context *m_context; std::queue m_queue; }; } Oclgrind-15.5/src/core/WorkGroup.cpp000066400000000000000000000261401252441671000173740ustar00rootroot00000000000000// WorkGroup.cpp (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" #include #include "llvm/IR/Module.h" #include "Context.h" #include "Kernel.h" #include "KernelInvocation.h" #include "Memory.h" #include "WorkGroup.h" #include "WorkItem.h" using namespace oclgrind; using namespace std; WorkGroup::WorkGroup(const KernelInvocation *kernelInvocation, Size3 wgid) : m_context(kernelInvocation->getContext()) { m_groupID = wgid; m_groupSize = kernelInvocation->getLocalSize(); m_groupIndex = (m_groupID.x + (m_groupID.y + m_groupID.z*(kernelInvocation->getNumGroups().y) * kernelInvocation->getNumGroups().x)); // Allocate local memory m_localMemory = kernelInvocation->getKernel()->getLocalMemory()->clone(); // Initialise work-items for (size_t k = 0; k < m_groupSize.z; k++) { for (size_t j = 0; j < m_groupSize.y; j++) { for (size_t i = 0; i < m_groupSize.x; i++) { WorkItem *workItem = new WorkItem(kernelInvocation, this, Size3(i, j, k)); m_workItems.push_back(workItem); m_running.insert(workItem); m_context->notifyWorkItemBegin(workItem); } } } m_nextEvent = 1; m_barrier = NULL; } WorkGroup::~WorkGroup() { // Delete work-items for (unsigned i = 0; i < m_workItems.size(); i++) { delete m_workItems[i]; } delete m_localMemory; } size_t WorkGroup::async_copy( const WorkItem *workItem, const llvm::Instruction *instruction, AsyncCopyType type, size_t dest, size_t src, size_t size, size_t num, size_t srcStride, size_t destStride, size_t event) { AsyncCopy copy = { instruction, type, dest, src, size, num, srcStride, destStride, event }; // Check if copy has already been registered by another work-item list< pair > >::iterator itr; for (itr = m_asyncCopies.begin(); itr != m_asyncCopies.end(); itr++) { if (itr->second.count(workItem)) { continue; } // Check for divergence if ((itr->first.instruction->getDebugLoc() != copy.instruction->getDebugLoc()) || (itr->first.type != copy.type) || (itr->first.dest != copy.dest) || (itr->first.src != copy.src) || (itr->first.size != copy.size) || (itr->first.num != copy.num) || (itr->first.srcStride != copy.srcStride) || (itr->first.destStride != copy.destStride)) { Context::Message msg(ERROR, m_context); msg << "Work-group divergence detected (async copy)" << endl << msg.INDENT << "Kernel: " << msg.CURRENT_KERNEL << endl << "Work-group: " << msg.CURRENT_WORK_GROUP << endl << endl << "Work-item: " << msg.CURRENT_ENTITY << endl << msg.CURRENT_LOCATION << endl << "dest=0x" << hex << copy.dest << ", " << "src=0x" << hex << copy.src << endl << "elem_size=" << dec << copy.size << ", " << "num_elems=" << dec << copy.num << ", " << "src_stride=" << dec << copy.srcStride << ", " << "dest_stride=" << dec << copy.destStride << endl << endl << "Previous work-items executed:" << endl << itr->first.instruction << endl << "dest=0x" << hex << itr->first.dest << ", " << "src=0x" << hex << itr->first.src << endl << "elem_size=" << dec << itr->first.size << ", " << "num_elems=" << dec << itr->first.num << ", " << "src_stride=" << dec << itr->first.srcStride << ", " << "dest_stride=" << dec << itr->first.destStride << endl; msg.send(); } itr->second.insert(workItem); return itr->first.event; } // Create new event if necessary if (copy.event == 0) { copy.event = m_nextEvent++; } // Register new copy and event m_asyncCopies.push_back(make_pair(copy, set())); m_asyncCopies.back().second.insert(workItem); if (!m_events.count(event)) { m_events[copy.event] = list(); } m_events[copy.event].push_back(copy); return copy.event; } void WorkGroup::clearBarrier() { assert(m_barrier); // Check for divergence if (m_barrier->workItems.size() != m_workItems.size()) { Context::Message msg(ERROR, m_context); msg << "Work-group divergence detected (barrier)" << endl << msg.INDENT << "Kernel: " << msg.CURRENT_KERNEL << endl << "Work-group: " << msg.CURRENT_WORK_GROUP << endl << "Only " << dec << m_barrier->workItems.size() << " out of " << m_workItems.size() << " work-items executed barrier" << endl << m_barrier->instruction << endl; msg.send(); } // Move work-items to running state set::iterator itr; for (itr = m_barrier->workItems.begin(); itr != m_barrier->workItems.end(); itr++) { (*itr)->clearBarrier(); m_running.insert(*itr); } m_barrier->workItems.clear(); // Deal with events while (!m_barrier->events.empty()) { size_t event = m_barrier->events.front(); // Perform copy list copies = m_events[event]; list::iterator itr; for (itr = copies.begin(); itr != copies.end(); itr++) { Memory *destMem, *srcMem; if (itr->type == GLOBAL_TO_LOCAL) { destMem = m_localMemory; srcMem = m_context->getGlobalMemory(); } else { destMem = m_context->getGlobalMemory(); srcMem = m_localMemory; } size_t src = itr->src; size_t dest = itr->dest; unsigned char *buffer = new unsigned char[itr->size]; for (unsigned i = 0; i < itr->num; i++) { srcMem->load(buffer, src, itr->size); destMem->store(buffer, dest, itr->size); src += itr->srcStride * itr->size; dest += itr->destStride * itr->size; } delete[] buffer; } m_events.erase(event); // Remove copies from list for this event list< pair > >::iterator cItr; for (cItr = m_asyncCopies.begin(); cItr != m_asyncCopies.end();) { if (cItr->first.event == event) { // Check that all work-items registered the copy if (cItr->second.size() != m_workItems.size()) { Context::Message msg(ERROR, m_context); msg << "Work-group divergence detected (async copy)" << endl << msg.INDENT << "Kernel: " << msg.CURRENT_KERNEL << endl << "Work-group: " << msg.CURRENT_WORK_GROUP << endl << "Only " << dec << cItr->second.size() << " out of " << m_workItems.size() << " work-items executed copy" << endl << cItr->first.instruction << endl; msg.send(); } cItr = m_asyncCopies.erase(cItr); } else { cItr++; } } m_barrier->events.remove(event); } m_context->notifyWorkGroupBarrier(this, m_barrier->fence); delete m_barrier; m_barrier = NULL; } const llvm::Instruction* WorkGroup::getCurrentBarrier() const { return m_barrier ? m_barrier->instruction : NULL; } Size3 WorkGroup::getGroupID() const { return m_groupID; } size_t WorkGroup::getGroupIndex() const { return m_groupIndex; } Size3 WorkGroup::getGroupSize() const { return m_groupSize; } Memory* WorkGroup::getLocalMemory() const { return m_localMemory; } WorkItem* WorkGroup::getNextWorkItem() const { if (m_running.empty()) { return NULL; } return *m_running.begin(); } WorkItem* WorkGroup::getWorkItem(Size3 localID) const { return m_workItems[localID.x + (localID.y + localID.z*m_groupSize.y)*m_groupSize.x]; } bool WorkGroup::hasBarrier() const { return m_barrier; } void WorkGroup::notifyBarrier(WorkItem *workItem, const llvm::Instruction *instruction, uint64_t fence, list events) { if (!m_barrier) { // Create new barrier m_barrier = new Barrier; m_barrier->instruction = instruction; m_barrier->fence = fence; m_barrier->events = events; // Check for invalid events list::iterator itr; for (itr = events.begin(); itr != events.end(); itr++) { if (!m_events.count(*itr)) { m_context->logError("Invalid wait event"); } } } else { // Check for divergence bool divergence = false; if (instruction->getDebugLoc() != m_barrier->instruction->getDebugLoc() || fence != m_barrier->fence || events.size() != m_barrier->events.size()) { divergence = true; } // Check events are all the same int divergentEventIndex = -1; size_t newEvent = -1; size_t oldEvent = -1; if (!divergence) { int i = 0; list::iterator cItr = events.begin(); list::iterator pItr = m_barrier->events.begin(); for (; cItr != events.end(); cItr++, pItr++, i++) { if (*cItr != *pItr) { divergence = true; divergentEventIndex = i; newEvent = *cItr; oldEvent = *pItr; break; } } } if (divergence) { Context::Message msg(ERROR, m_context); msg << "Work-group divergence detected (barrier)" << endl << msg.INDENT << "Kernel: " << msg.CURRENT_KERNEL << endl << "Work-group: " << msg.CURRENT_WORK_GROUP << endl << endl << "Work-item: " << msg.CURRENT_ENTITY << endl << msg.CURRENT_LOCATION << endl << "fence=0x" << hex << fence << ", " << "num_events=" << dec << events.size() << endl; if (divergentEventIndex >= 0) { msg << "events[" << dec << divergentEventIndex << "]=" << newEvent << endl; } msg << endl << "Previous work-items executed:" << endl << m_barrier->instruction << endl << "fence=0x" << hex << m_barrier->fence << ", " << "num_events=" << dec << m_barrier->events.size() << endl; if (divergentEventIndex >= 0) { msg << "events[" << dec << divergentEventIndex << "]=" << oldEvent << endl; } msg.send(); } } m_running.erase(workItem); m_barrier->workItems.insert(workItem); } void WorkGroup::notifyFinished(WorkItem *workItem) { m_running.erase(workItem); // Check if work-group finished without waiting for all events if (m_running.empty() && !m_barrier && !m_events.empty()) { m_context->logError("Work-item finished without waiting for events"); } } bool WorkGroup::WorkItemCmp::operator()(const WorkItem *lhs, const WorkItem *rhs) const { Size3 lgid = lhs->getGlobalID(); Size3 rgid = rhs->getGlobalID(); if (lgid.z != rgid.z) { return lgid.z < rgid.z; } if (lgid.y != rgid.y) { return lgid.y < rgid.y; } return lgid.x < rgid.x; } Oclgrind-15.5/src/core/WorkGroup.h000066400000000000000000000050021252441671000170330ustar00rootroot00000000000000// WorkGroup.h (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" #define CLK_LOCAL_MEM_FENCE (1<<0) #define CLK_GLOBAL_MEM_FENCE (1<<1) namespace oclgrind { class Context; class Memory; class Kernel; class KernelInvocation; class WorkItem; class WorkGroup { public: enum AsyncCopyType{GLOBAL_TO_LOCAL, LOCAL_TO_GLOBAL}; private: // Comparator for ordering work-items struct WorkItemCmp { bool operator()(const WorkItem *lhs, const WorkItem *rhs) const; }; std::set m_running; typedef struct { const llvm::Instruction *instruction; AsyncCopyType type; size_t dest; size_t src; size_t size; size_t num; size_t srcStride; size_t destStride; size_t event; } AsyncCopy; typedef struct { const llvm::Instruction *instruction; std::set workItems; uint64_t fence; std::list events; } Barrier; public: WorkGroup(const KernelInvocation *kernelInvocation, Size3 wgid); virtual ~WorkGroup(); size_t async_copy( const WorkItem *workItem, const llvm::Instruction *instruction, AsyncCopyType type, size_t dest, size_t src, size_t size, size_t num, size_t srcStride, size_t destStride, size_t event); void clearBarrier(); const llvm::Instruction* getCurrentBarrier() const; Size3 getGroupID() const; size_t getGroupIndex() const; Size3 getGroupSize() const; Memory* getLocalMemory() const; WorkItem *getNextWorkItem() const; WorkItem *getWorkItem(Size3 localID) const; bool hasBarrier() const; void notifyBarrier(WorkItem *workItem, const llvm::Instruction *instruction, uint64_t fence, std::list events=std::list()); void notifyFinished(WorkItem *workItem); private: size_t m_groupIndex; Size3 m_groupID; Size3 m_groupSize; const Context *m_context; Memory *m_localMemory; std::vector m_workItems; Barrier *m_barrier; size_t m_nextEvent; std::list< std::pair > > m_asyncCopies; std::map < size_t, std::list > m_events; }; } Oclgrind-15.5/src/core/WorkItem.cpp000066400000000000000000001244511252441671000172020ustar00rootroot00000000000000// WorkItem.cpp (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved.` // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/InstIterator.h" #include "Context.h" #include "Kernel.h" #include "KernelInvocation.h" #include "Memory.h" #include "Program.h" #include "WorkGroup.h" #include "WorkItem.h" using namespace oclgrind; using namespace std; struct WorkItem::Position { llvm::Function::const_iterator prevBlock; llvm::Function::const_iterator currBlock; llvm::Function::const_iterator nextBlock; llvm::BasicBlock::const_iterator currInst; std::stack callStack; std::stack< std::list > allocations; }; WorkItem::WorkItem(const KernelInvocation *kernelInvocation, WorkGroup *workGroup, Size3 lid) : m_context(kernelInvocation->getContext()), m_kernelInvocation(kernelInvocation), m_workGroup(workGroup) { m_localID = lid; // Compute global ID Size3 groupID = workGroup->getGroupID(); Size3 groupSize = workGroup->getGroupSize(); Size3 globalOffset = kernelInvocation->getGlobalOffset(); m_globalID.x = lid.x + groupID.x*groupSize.x + globalOffset.x; m_globalID.y = lid.y + groupID.y*groupSize.y + globalOffset.y; m_globalID.z = lid.z + groupID.z*groupSize.z + globalOffset.z; Size3 globalSize = kernelInvocation->getGlobalSize(); m_globalIndex = (m_globalID.x + (m_globalID.y + m_globalID.z*globalSize.y) * globalSize.x); const Kernel *kernel = kernelInvocation->getKernel(); // Load interpreter cache m_cache = kernel->getProgram()->getInterpreterCache(kernel->getFunction()); // Set initial number of values to store based on cache m_values.resize(m_cache->getNumValues()); m_privateMemory = kernel->getPrivateMemory()->clone(); // Initialise kernel arguments TypedValueMap::const_iterator argItr; for (argItr = kernel->args_begin(); argItr != kernel->args_end(); argItr++) { setValue(argItr->first, m_pool.clone(argItr->second)); } // Initialize interpreter state m_state = READY; m_position = new Position; m_position->prevBlock = NULL; m_position->nextBlock = NULL; m_position->currBlock = kernel->getFunction()->begin(); m_position->currInst = m_position->currBlock->begin(); } WorkItem::~WorkItem() { delete m_privateMemory; delete m_position; } void WorkItem::clearBarrier() { if (m_state == BARRIER) { m_state = READY; } } void WorkItem::dispatch(const llvm::Instruction *instruction, TypedValue& result) { switch (instruction->getOpcode()) { case llvm::Instruction::Add: add(instruction, result); break; case llvm::Instruction::Alloca: alloc(instruction, result); break; case llvm::Instruction::And: bwand(instruction, result); break; case llvm::Instruction::AShr: ashr(instruction, result); break; case llvm::Instruction::BitCast: bitcast(instruction, result); break; case llvm::Instruction::Br: br(instruction, result); break; case llvm::Instruction::Call: call(instruction, result); break; case llvm::Instruction::ExtractElement: extractelem(instruction, result); break; case llvm::Instruction::ExtractValue: extractval(instruction, result); break; case llvm::Instruction::FAdd: fadd(instruction, result); break; case llvm::Instruction::FCmp: fcmp(instruction, result); break; case llvm::Instruction::FDiv: fdiv(instruction, result); break; case llvm::Instruction::FMul: fmul(instruction, result); break; case llvm::Instruction::FPExt: fpext(instruction, result); break; case llvm::Instruction::FPToSI: fptosi(instruction, result); break; case llvm::Instruction::FPToUI: fptoui(instruction, result); break; case llvm::Instruction::FPTrunc: fptrunc(instruction, result); break; case llvm::Instruction::FRem: frem(instruction, result); break; case llvm::Instruction::FSub: fsub(instruction, result); break; case llvm::Instruction::GetElementPtr: gep(instruction, result); break; case llvm::Instruction::ICmp: icmp(instruction, result); break; case llvm::Instruction::InsertElement: insertelem(instruction, result); break; case llvm::Instruction::InsertValue: insertval(instruction, result); break; case llvm::Instruction::IntToPtr: inttoptr(instruction, result); break; case llvm::Instruction::Load: load(instruction, result); break; case llvm::Instruction::LShr: lshr(instruction, result); break; case llvm::Instruction::Mul: mul(instruction, result); break; case llvm::Instruction::Or: bwor(instruction, result); break; case llvm::Instruction::PHI: phi(instruction, result); break; case llvm::Instruction::PtrToInt: ptrtoint(instruction, result); break; case llvm::Instruction::Ret: ret(instruction, result); break; case llvm::Instruction::SDiv: sdiv(instruction, result); break; case llvm::Instruction::Select: select(instruction, result); break; case llvm::Instruction::SExt: sext(instruction, result); break; case llvm::Instruction::Shl: shl(instruction, result); break; case llvm::Instruction::ShuffleVector: shuffle(instruction, result); break; case llvm::Instruction::SIToFP: sitofp(instruction, result); break; case llvm::Instruction::SRem: srem(instruction, result); break; case llvm::Instruction::Store: store(instruction, result); break; case llvm::Instruction::Sub: sub(instruction, result); break; case llvm::Instruction::Switch: swtch(instruction, result); break; case llvm::Instruction::Trunc: itrunc(instruction, result); break; case llvm::Instruction::UDiv: udiv(instruction, result); break; case llvm::Instruction::UIToFP: uitofp(instruction, result); break; case llvm::Instruction::URem: urem(instruction, result); break; case llvm::Instruction::Unreachable: FATAL_ERROR("Encountered unreachable instruction"); case llvm::Instruction::Xor: bwxor(instruction, result); break; case llvm::Instruction::ZExt: zext(instruction, result); break; default: FATAL_ERROR("Unsupported instruction: %s", instruction->getOpcodeName()); } } void WorkItem::execute(const llvm::Instruction *instruction) { // Prepare private variable for instruction result pair resultSize = getValueSize(instruction); // Prepare result TypedValue result = { resultSize.first, resultSize.second, NULL }; if (result.size) { result.data = m_pool.alloc(result.size*result.num); } if (instruction->getOpcode() != llvm::Instruction::PHI && m_phiTemps.size() > 0) { TypedValueMap::iterator itr; for (itr = m_phiTemps.begin(); itr != m_phiTemps.end(); itr++) { setValue(itr->first, itr->second); } m_phiTemps.clear(); } // Execute instruction dispatch(instruction, result); // Store result if (result.size) { if (instruction->getOpcode() != llvm::Instruction::PHI) { setValue(instruction, result); } else { m_phiTemps[instruction] = result; } } m_context->notifyInstructionExecuted(this, instruction, result); } TypedValue WorkItem::getValue(const llvm::Value *key) const { return m_values[m_cache->getValueID(key)]; } const stack& WorkItem::getCallStack() const { return m_position->callStack; } const llvm::Instruction* WorkItem::getCurrentInstruction() const { return m_position->currInst; } Size3 WorkItem::getGlobalID() const { return m_globalID; } size_t WorkItem::getGlobalIndex() const { return m_globalIndex; } Size3 WorkItem::getLocalID() const { return m_localID; } Memory* WorkItem::getMemory(unsigned int addrSpace) const { switch (addrSpace) { case AddrSpacePrivate: return m_privateMemory; case AddrSpaceGlobal: case AddrSpaceConstant: return m_context->getGlobalMemory(); case AddrSpaceLocal: return m_workGroup->getLocalMemory(); default: FATAL_ERROR("Unsupported address space: %d", addrSpace); } } TypedValue WorkItem::getOperand(const llvm::Value *operand) const { unsigned valID = operand->getValueID(); if (valID == llvm::Value::ArgumentVal || valID == llvm::Value::GlobalVariableVal || valID >= llvm::Value::InstructionVal) { return getValue(operand); } //else if (valID == llvm::Value::BasicBlockVal) //{ //} //else if (valID == llvm::Value::FunctionVal) //{ //} //else if (valID == llvm::Value::GlobalAliasVal) //{ //} //else if (valID == llvm::Value::BlockAddressVal) //{ //} else if (valID == llvm::Value::ConstantExprVal) { pair size = getValueSize(operand); TypedValue result; result.size = size.first; result.num = size.second; result.data = m_pool.alloc(getTypeSize(operand->getType())); // Use of const_cast here is ugly, but ConstExpr instructions // shouldn't actually modify WorkItem state anyway const_cast(this)->dispatch( m_cache->getConstantExpr(operand), result); return result; } else if (valID == llvm::Value::UndefValueVal || valID == llvm::Value::ConstantAggregateZeroVal || valID == llvm::Value::ConstantDataArrayVal || valID == llvm::Value::ConstantDataVectorVal || valID == llvm::Value::ConstantIntVal || valID == llvm::Value::ConstantFPVal || valID == llvm::Value::ConstantArrayVal || valID == llvm::Value::ConstantStructVal || valID == llvm::Value::ConstantVectorVal || valID == llvm::Value::ConstantPointerNullVal) { return m_cache->getConstant(operand); } //else if (valID == llvm::Value::MDNodeVal) //{ //} //else if (valID == llvm::Value::MDStringVal) //{ //} //else if (valID == llvm::Value::InlineAsmVal) //{ //} //else if (valID == llvm::Value::PseudoSourceValueVal) //{ //} //else if (valID == llvm::Value::FixedStackPseudoSourceValueVal) //{ //} else { FATAL_ERROR("Unhandled operand type: %d", valID); } // Unreachable assert(false); } Memory* WorkItem::getPrivateMemory() const { return m_privateMemory; } WorkItem::State WorkItem::getState() const { return m_state; } const unsigned char* WorkItem::getValueData(const llvm::Value *value) const { if (!hasValue(value)) { return NULL; } return getValue(value).data; } const llvm::Value* WorkItem::getVariable(std::string name) const { VariableMap::const_iterator itr; itr = m_variables.find(name); if (itr == m_variables.end()) { return NULL; } return itr->second; } const WorkGroup* WorkItem::getWorkGroup() const { return m_workGroup; } bool WorkItem::hasValue(const llvm::Value *key) const { return m_cache->hasValue(key); } bool WorkItem::printValue(const llvm::Value *value) const { if (!hasValue(value)) { return false; } printTypedData(value->getType(), getValue(value).data); return true; } bool WorkItem::printVariable(string name) const { // Find variable const llvm::Value *value = getVariable(name); if (!value) { return false; } // Get variable value TypedValue result = getValue(value); const llvm::Type *type = value->getType(); if (((const llvm::Instruction*)value)->getOpcode() == llvm::Instruction::Alloca) { // If value is alloca result, look-up data at address const llvm::Type *elemType = value->getType()->getPointerElementType(); size_t address = result.getPointer(); unsigned char *data = (unsigned char*)m_privateMemory->getPointer(address); printTypedData(elemType, data); } else { printTypedData(type, result.data); } return true; } void WorkItem::setValue(const llvm::Value *key, TypedValue value) { m_values[m_cache->getValueID(key)] = value; } WorkItem::State WorkItem::step() { assert(m_state == READY); // Execute the next instruction execute(m_position->currInst); // Check if we've reached the end of the block if (++m_position->currInst == m_position->currBlock->end() || m_position->nextBlock) { if (m_position->nextBlock) { // Move to next basic block m_position->prevBlock = m_position->currBlock; m_position->currBlock = m_position->nextBlock; m_position->nextBlock = NULL; m_position->currInst = m_position->currBlock->begin(); } } return m_state; } /////////////////////////////// //// Instruction execution //// /////////////////////////////// #define INSTRUCTION(name) \ void WorkItem::name(const llvm::Instruction *instruction, TypedValue& result) INSTRUCTION(add) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { result.setUInt(opA.getUInt(i) + opB.getUInt(i), i); } } INSTRUCTION(alloc) { const llvm::AllocaInst *allocInst = ((const llvm::AllocaInst*)instruction); const llvm::Type *type = allocInst->getAllocatedType(); // Perform allocation unsigned size = getTypeSize(type); size_t address = m_privateMemory->allocateBuffer(size); if (!address) FATAL_ERROR("Insufficient private memory (alloca)"); // Create pointer to alloc'd memory result.setPointer(address); // Track allocation in stack frame if (!m_position->allocations.empty()) m_position->allocations.top().push_back(address); } INSTRUCTION(ashr) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); uint64_t shiftMask = (result.num > 1 ? result.size : max((size_t)result.size, sizeof(uint32_t))) * 8 - 1; for (unsigned i = 0; i < result.num; i++) { result.setUInt(opA.getSInt(i) >> (opB.getUInt(i) & shiftMask), i); } } INSTRUCTION(bitcast) { const llvm::Value *op = instruction->getOperand(0); // Check for address space casts if (instruction->getType()->isPointerTy()) { unsigned srcAddrSpace = op->getType()->getPointerAddressSpace(); unsigned dstAddrSpace = instruction->getType()->getPointerAddressSpace(); if (srcAddrSpace != dstAddrSpace) { FATAL_ERROR("Invalid pointer cast from %s to %s address spaces", getAddressSpaceName(srcAddrSpace), getAddressSpaceName(dstAddrSpace)); } } TypedValue operand = getOperand(op); memcpy(result.data, operand.data, result.size*result.num); } INSTRUCTION(br) { if (instruction->getNumOperands() == 1) { // Unconditional branch m_position->nextBlock = (const llvm::BasicBlock*)instruction->getOperand(0); } else { // Conditional branch bool pred = getOperand(instruction->getOperand(0)).getUInt(); const llvm::Value *iftrue = instruction->getOperand(2); const llvm::Value *iffalse = instruction->getOperand(1); m_position->nextBlock = (const llvm::BasicBlock*)(pred ? iftrue : iffalse); } } INSTRUCTION(bwand) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { result.setUInt(opA.getUInt(i) & opB.getUInt(i), i); } } INSTRUCTION(bwor) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { result.setUInt(opA.getUInt(i) | opB.getUInt(i), i); } } INSTRUCTION(bwxor) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { result.setUInt(opA.getUInt(i) ^ opB.getUInt(i), i); } } INSTRUCTION(call) { const llvm::CallInst *callInst = (const llvm::CallInst*)instruction; const llvm::Function *function = callInst->getCalledFunction(); // Check for indirect function calls if (!callInst->getCalledFunction()) { // Resolve indirect function pointer const llvm::Value *func = callInst->getCalledValue(); const llvm::Value *funcPtr = ((const llvm::User*)func)->getOperand(0); function = (const llvm::Function*)funcPtr; } // Check if function has definition if (!function->isDeclaration()) { m_position->callStack.push(m_position->currInst); m_position->allocations.push(list()); m_position->nextBlock = function->begin(); // Set function arguments llvm::Function::const_arg_iterator argItr; for (argItr = function->arg_begin(); argItr != function->arg_end(); argItr++) { const llvm::Value *arg = callInst->getArgOperand(argItr->getArgNo()); setValue(argItr, m_pool.clone(getOperand(arg))); } return; } // Call builtin function InterpreterCache::Builtin builtin = m_cache->getBuiltin(function); builtin.function.func(this, callInst, builtin.name, builtin.overload, result, builtin.function.op); } INSTRUCTION(extractelem) { const llvm::ExtractElementInst *extract = (const llvm::ExtractElementInst*)instruction; unsigned index = getOperand(extract->getIndexOperand()).getUInt(); TypedValue operand = getOperand(extract->getVectorOperand()); memcpy(result.data, operand.data + result.size*index, result.size); } INSTRUCTION(extractval) { const llvm::ExtractValueInst *extract = (const llvm::ExtractValueInst*)instruction; const llvm::Value *agg = extract->getAggregateOperand(); llvm::ArrayRef indices = extract->getIndices(); // Compute offset for target value int offset = 0; const llvm::Type *type = agg->getType(); for (unsigned i = 0; i < indices.size(); i++) { if (type->isArrayTy()) { type = type->getArrayElementType(); offset += getTypeSize(type) * indices[i]; } else if (type->isStructTy()) { offset += getStructMemberOffset((const llvm::StructType*)type, indices[i]); type = type->getStructElementType(indices[i]); } else { FATAL_ERROR("Unsupported aggregate type: %d", type->getTypeID()) } } // Copy target value to result memcpy(result.data, getOperand(agg).data + offset, getTypeSize(type)); } INSTRUCTION(fadd) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { result.setFloat(opA.getFloat(i) + opB.getFloat(i), i); } } INSTRUCTION(fcmp) { const llvm::CmpInst *cmpInst = (const llvm::CmpInst*)instruction; llvm::CmpInst::Predicate pred = cmpInst->getPredicate(); TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); uint64_t t = result.num > 1 ? -1 : 1; for (unsigned i = 0; i < result.num; i++) { double a = opA.getFloat(i); double b = opB.getFloat(i); uint64_t r; switch (pred) { case llvm::CmpInst::FCMP_OEQ: case llvm::CmpInst::FCMP_UEQ: r = a == b; break; case llvm::CmpInst::FCMP_ONE: case llvm::CmpInst::FCMP_UNE: r = a != b; break; case llvm::CmpInst::FCMP_OGT: case llvm::CmpInst::FCMP_UGT: r = a > b; break; case llvm::CmpInst::FCMP_OGE: case llvm::CmpInst::FCMP_UGE: r = a >= b; break; case llvm::CmpInst::FCMP_OLT: case llvm::CmpInst::FCMP_ULT: r = a < b; break; case llvm::CmpInst::FCMP_OLE: case llvm::CmpInst::FCMP_ULE: r = a <= b; break; case llvm::CmpInst::FCMP_FALSE: r = false; break; case llvm::CmpInst::FCMP_TRUE: r = true; break; case llvm::CmpInst::FCMP_ORD: case llvm::CmpInst::FCMP_UNO: break; default: FATAL_ERROR("Unsupported FCmp predicate: %d", pred); } // Deal with NaN operands if (::isnan(a) || ::isnan(b)) { r = !llvm::CmpInst::isOrdered(pred); } result.setUInt(r ? t : 0, i); } } INSTRUCTION(fdiv) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { result.setFloat(opA.getFloat(i) / opB.getFloat(i), i); } } INSTRUCTION(fmul) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { result.setFloat(opA.getFloat(i) * opB.getFloat(i), i); } } INSTRUCTION(fpext) { TypedValue op = getOperand(instruction->getOperand(0)); for (unsigned i = 0; i < result.num; i++) { result.setFloat(op.getFloat(i), i); } } INSTRUCTION(fptosi) { TypedValue op = getOperand(instruction->getOperand(0)); for (unsigned i = 0; i < result.num; i++) { result.setSInt((int64_t)op.getFloat(i), i); } } INSTRUCTION(fptoui) { TypedValue op = getOperand(instruction->getOperand(0)); for (unsigned i = 0; i < result.num; i++) { result.setUInt((uint64_t)op.getFloat(i), i); } } INSTRUCTION(frem) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { result.setFloat(fmod(opA.getFloat(i), opB.getFloat(i)), i); } } INSTRUCTION(fptrunc) { TypedValue op = getOperand(instruction->getOperand(0)); for (unsigned i = 0; i < result.num; i++) { result.setFloat(op.getFloat(i), i); } } INSTRUCTION(fsub) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { result.setFloat(opA.getFloat(i) - opB.getFloat(i), i); } } INSTRUCTION(gep) { const llvm::GetElementPtrInst *gepInst = (const llvm::GetElementPtrInst*)instruction; // Get base address const llvm::Value *base = gepInst->getPointerOperand(); size_t address = getOperand(base).getPointer(); const llvm::Type *ptrType = gepInst->getPointerOperandType(); // Iterate over indices llvm::User::const_op_iterator opItr; for (opItr = gepInst->idx_begin(); opItr != gepInst->idx_end(); opItr++) { int64_t offset = getOperand(opItr->get()).getSInt(); if (ptrType->isPointerTy()) { // Get pointer element size const llvm::Type *elemType = ptrType->getPointerElementType(); address += offset*getTypeSize(elemType); ptrType = elemType; } else if (ptrType->isArrayTy()) { // Get array element size const llvm::Type *elemType = ptrType->getArrayElementType(); address += offset*getTypeSize(elemType); ptrType = elemType; } else if (ptrType->isVectorTy()) { // Get vector element size const llvm::Type *elemType = ptrType->getVectorElementType(); address += offset*getTypeSize(elemType); ptrType = elemType; } else if (ptrType->isStructTy()) { address += getStructMemberOffset((const llvm::StructType*)ptrType, offset); ptrType = ptrType->getStructElementType(offset); } else { FATAL_ERROR("Unsupported GEP base type: %d", ptrType->getTypeID()); } } result.setPointer(address); } INSTRUCTION(icmp) { const llvm::CmpInst *cmpInst = (const llvm::CmpInst*)instruction; llvm::CmpInst::Predicate pred = cmpInst->getPredicate(); TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); uint64_t t = result.num > 1 ? -1 : 1; for (unsigned i = 0; i < result.num; i++) { // Load operands uint64_t ua = opA.getUInt(i); uint64_t ub = opB.getUInt(i); int64_t sa = opA.getSInt(i); int64_t sb = opB.getSInt(i); uint64_t r; switch (pred) { case llvm::CmpInst::ICMP_EQ: r = ua == ub; break; case llvm::CmpInst::ICMP_NE: r = ua != ub; break; case llvm::CmpInst::ICMP_UGT: r = ua > ub; break; case llvm::CmpInst::ICMP_UGE: r = ua >= ub; break; case llvm::CmpInst::ICMP_ULT: r = ua < ub; break; case llvm::CmpInst::ICMP_ULE: r = ua <= ub; break; case llvm::CmpInst::ICMP_SGT: r = sa > sb; break; case llvm::CmpInst::ICMP_SGE: r = sa >= sb; break; case llvm::CmpInst::ICMP_SLT: r = sa < sb; break; case llvm::CmpInst::ICMP_SLE: r = sa <= sb; break; default: FATAL_ERROR("Unsupported ICmp predicate: %d", pred); } result.setUInt(r ? t : 0, i); } } INSTRUCTION(insertelem) { TypedValue vector = getOperand(instruction->getOperand(0)); TypedValue element = getOperand(instruction->getOperand(1)); unsigned index = getOperand(instruction->getOperand(2)).getUInt(); memcpy(result.data, vector.data, result.size*result.num); memcpy(result.data + index*result.size, element.data, result.size); } INSTRUCTION(insertval) { const llvm::InsertValueInst *insert = (const llvm::InsertValueInst*)instruction; // Load original aggregate data const llvm::Value *agg = insert->getAggregateOperand(); memcpy(result.data, getOperand(agg).data, result.size*result.num); // Compute offset for inserted value int offset = 0; llvm::ArrayRef indices = insert->getIndices(); const llvm::Type *type = agg->getType(); for (unsigned i = 0; i < indices.size(); i++) { if (type->isArrayTy()) { type = type->getArrayElementType(); offset += getTypeSize(type) * indices[i]; } else if (type->isStructTy()) { offset += getStructMemberOffset((const llvm::StructType*)type, indices[i]); type = type->getStructElementType(indices[i]); } else { FATAL_ERROR("Unsupported aggregate type: %d", type->getTypeID()) } } // Copy inserted value into result const llvm::Value *value = insert->getInsertedValueOperand(); memcpy(result.data + offset, getOperand(value).data, getTypeSize(value->getType())); } INSTRUCTION(inttoptr) { TypedValue op = getOperand(instruction->getOperand(0)); for (unsigned i = 0; i < result.num; i++) { result.setPointer(op.getUInt(i), i); } } INSTRUCTION(itrunc) { TypedValue op = getOperand(instruction->getOperand(0)); for (unsigned i = 0; i < result.num; i++) { memcpy(result.data+i*result.size, op.data+i*op.size, result.size); } } INSTRUCTION(load) { const llvm::LoadInst *loadInst = (const llvm::LoadInst*)instruction; unsigned addressSpace = loadInst->getPointerAddressSpace(); size_t address = getOperand(loadInst->getPointerOperand()).getPointer(); // Check address is correctly aligned if (address & (loadInst->getAlignment()-1)) { m_context->logError("Invalid memory load - source pointer is " "not aligned to the pointed type"); } // Load data getMemory(addressSpace)->load(result.data, address, result.size*result.num); } INSTRUCTION(lshr) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); uint64_t shiftMask = (result.num > 1 ? result.size : max((size_t)result.size, sizeof(uint32_t))) * 8 - 1; for (unsigned i = 0; i < result.num; i++) { result.setUInt(opA.getUInt(i) >> (opB.getUInt(i) & shiftMask), i); } } INSTRUCTION(mul) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { result.setUInt(opA.getUInt(i) * opB.getUInt(i), i); } } INSTRUCTION(phi) { const llvm::PHINode *phiNode = (const llvm::PHINode*)instruction; const llvm::Value *value = phiNode->getIncomingValueForBlock( (const llvm::BasicBlock*)m_position->prevBlock); memcpy(result.data, getOperand(value).data, result.size*result.num); } INSTRUCTION(ptrtoint) { TypedValue op = getOperand(instruction->getOperand(0)); for (unsigned i = 0; i < result.num; i++) { result.setUInt(op.getPointer(i), i); } } INSTRUCTION(ret) { const llvm::ReturnInst *retInst = (const llvm::ReturnInst*)instruction; if (!m_position->callStack.empty()) { m_position->currInst = m_position->callStack.top(); m_position->currBlock = m_position->currInst->getParent(); m_position->callStack.pop(); // Set return value const llvm::Value *returnVal = retInst->getReturnValue(); if (returnVal) { setValue(m_position->currInst, m_pool.clone(getOperand(returnVal))); } // Clear stack allocations list& allocs = m_position->allocations.top(); list::iterator itr; for (itr = allocs.begin(); itr != allocs.end(); itr++) { m_privateMemory->deallocateBuffer(*itr); } m_position->allocations.pop(); } else { m_position->nextBlock = NULL; m_state = FINISHED; m_workGroup->notifyFinished(this); m_context->notifyWorkItemComplete(this); } } INSTRUCTION(sdiv) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { int64_t a = opA.getSInt(i); int64_t b = opB.getSInt(i); int64_t r = 0; if (b && !(a == INT64_MIN && b == -1)) { r = a / b; } result.setSInt(r, i); } } INSTRUCTION(select) { const llvm::SelectInst *selectInst = (const llvm::SelectInst*)instruction; TypedValue opCondition = getOperand(selectInst->getCondition()); for (unsigned i = 0; i < result.num; i++) { const bool cond = selectInst->getCondition()->getType()->isVectorTy() ? opCondition.getUInt(i) : opCondition.getUInt(); const llvm::Value *op = cond ? selectInst->getTrueValue() : selectInst->getFalseValue(); memcpy(result.data + i*result.size, getOperand(op).data + i*result.size, result.size); } } INSTRUCTION(sext) { const llvm::Value *operand = instruction->getOperand(0); TypedValue value = getOperand(operand); for (unsigned i = 0; i < result.num; i++) { int64_t val = value.getSInt(i); if (operand->getType()->getPrimitiveSizeInBits() == 1) { val = val ? -1 : 0; } result.setSInt(val, i); } } INSTRUCTION(shl) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); uint64_t shiftMask = (result.num > 1 ? result.size : max((size_t)result.size, sizeof(uint32_t))) * 8 - 1; for (unsigned i = 0; i < result.num; i++) { result.setUInt(opA.getUInt(i) << (opB.getUInt(i) & shiftMask), i); } } INSTRUCTION(shuffle) { const llvm::ShuffleVectorInst *shuffle = (const llvm::ShuffleVectorInst*)instruction; const llvm::Value *v1 = shuffle->getOperand(0); const llvm::Value *v2 = shuffle->getOperand(1); TypedValue mask = getOperand(shuffle->getMask()); unsigned num = v1->getType()->getVectorNumElements(); for (unsigned i = 0; i < result.num; i++) { if (shuffle->getMask()->getAggregateElement(i)->getValueID() == llvm::Value::UndefValueVal) { // Don't care / undef continue; } const llvm::Value *src = v1; unsigned int index = mask.getUInt(i); if (index >= num) { index -= num; src = v2; } memcpy(result.data + i*result.size, getOperand(src).data + index*result.size, result.size); } } INSTRUCTION(sitofp) { TypedValue op = getOperand(instruction->getOperand(0)); for (unsigned i = 0; i < result.num; i++) { result.setFloat(op.getSInt(i), i); } } INSTRUCTION(srem) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { int64_t a = opA.getSInt(i); int64_t b = opB.getSInt(i); int64_t r = 0; if (b && !(a == INT64_MIN && b == -1)) { r = a % b; } result.setSInt(r, i); } } INSTRUCTION(store) { const llvm::StoreInst *storeInst = (const llvm::StoreInst*)instruction; unsigned addressSpace = storeInst->getPointerAddressSpace(); size_t address = getOperand(storeInst->getPointerOperand()).getPointer(); // Check address is correctly aligned if (address & (storeInst->getAlignment()-1)) { m_context->logError("Invalid memory store - source pointer is " "not aligned to the pointed type"); } // Store data TypedValue operand = getOperand(storeInst->getValueOperand()); getMemory(addressSpace)->store(operand.data, address, operand.size*operand.num); } INSTRUCTION(sub) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { result.setUInt(opA.getUInt(i) - opB.getUInt(i), i); } } INSTRUCTION(swtch) { const llvm::SwitchInst *swtch = (const llvm::SwitchInst*)instruction; const llvm::Value *cond = swtch->getCondition(); uint64_t val = getOperand(cond).getUInt(); const llvm::ConstantInt *cval = (const llvm::ConstantInt*)llvm::ConstantInt::get(cond->getType(), val); m_position->nextBlock = swtch->findCaseValue(cval).getCaseSuccessor(); } INSTRUCTION(udiv) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { uint64_t a = opA.getUInt(i); uint64_t b = opB.getUInt(i); result.setUInt(b ? a / b : 0, i); } } INSTRUCTION(uitofp) { TypedValue op = getOperand(instruction->getOperand(0)); for (unsigned i = 0; i < result.num; i++) { result.setFloat(op.getUInt(i), i); } } INSTRUCTION(urem) { TypedValue opA = getOperand(instruction->getOperand(0)); TypedValue opB = getOperand(instruction->getOperand(1)); for (unsigned i = 0; i < result.num; i++) { uint64_t a = opA.getUInt(i); uint64_t b = opB.getUInt(i); result.setUInt(b ? a % b : 0, i); } } INSTRUCTION(zext) { TypedValue operand = getOperand(instruction->getOperand(0)); for (unsigned i = 0; i < result.num; i++) { result.setUInt(operand.getUInt(i), i); } } #undef INSTRUCTION //////////////////////////////// // WorkItem::InterpreterCache // //////////////////////////////// InterpreterCache::InterpreterCache(llvm::Function *kernel) { // TODO: Determine this number dynamically? m_valueIDs.reserve(1024); // Add global variables to cache // TODO: Only add variables that are used? const llvm::Module *module = kernel->getParent(); llvm::Module::const_global_iterator G; for (G = module->global_begin(); G != module->global_end(); G++) { addValueID(G); } set processed; set pending; pending.insert(kernel); while (!pending.empty()) { // Get next function to process llvm::Function *function = *pending.begin(); processed.insert(function); pending.erase(function); // Iterate through the function arguments llvm::Function::arg_iterator A; for (A = function->arg_begin(); A != function->arg_end(); A++) { addValueID(A); } // Iterate through instructions in function llvm::inst_iterator I; for (I = inst_begin(function); I != inst_end(function); I++) { addValueID(&*I); // Check for function calls if (I->getOpcode() == llvm::Instruction::Call) { const llvm::CallInst *call = ((const llvm::CallInst*)&*I); llvm::Function *callee = (llvm::Function*)call->getCalledValue()->stripPointerCasts(); if (callee->isDeclaration()) { // Resolve builtin function calls addBuiltin(callee); } else if (!processed.count(callee)) { // Process called function pending.insert(callee); } } // Process operands for (llvm::User::value_op_iterator O = I->value_op_begin(); O != I->value_op_end(); O++) { addOperand(*O); } } } } InterpreterCache::~InterpreterCache() { ConstantMap::iterator constItr; for (constItr = m_constants.begin(); constItr != m_constants.end(); constItr++) { delete[] constItr->second.data; } ConstExprMap::iterator constExprItr; for (constExprItr = m_constExpressions.begin(); constExprItr != m_constExpressions.end(); constExprItr++) { delete constExprItr->second; } } void InterpreterCache::addBuiltin( const llvm::Function *function) { // Check if already in cache InterpreterCache::BuiltinMap::iterator fItr = m_builtins.find(function); if (fItr != m_builtins.end()) { return; } // Extract unmangled name and overload string name, overload; const string fullname = function->getName().str(); if (fullname.compare(0,2, "_Z") == 0) { int len = atoi(fullname.c_str()+2); int start = fullname.find_first_not_of("0123456789", 2); name = fullname.substr(start, len); overload = fullname.substr(start + len); } else { name = fullname; overload = ""; } // Find builtin function in map BuiltinFunctionMap::iterator bItr = workItemBuiltins.find(name); if (bItr != workItemBuiltins.end()) { // Add builtin to cache const InterpreterCache::Builtin builtin = {bItr->second, name, overload}; m_builtins[function] = builtin; return; } // Check for builtin with matching prefix BuiltinFunctionPrefixList::iterator pItr; for (pItr = workItemPrefixBuiltins.begin(); pItr != workItemPrefixBuiltins.end(); pItr++) { if (name.compare(0, pItr->first.length(), pItr->first) == 0) { // Add builtin to cache const InterpreterCache::Builtin builtin = {pItr->second, name, overload}; m_builtins[function] = builtin; return; } } // Function didn't match any builtins FATAL_ERROR("Undefined external function: %s", name.c_str()); } InterpreterCache::Builtin InterpreterCache::getBuiltin( const llvm::Function *function) const { return m_builtins.at(function); } void InterpreterCache::addConstant(const llvm::Value *value) { // Check if constant already in cache if (m_constants.count(value)) { return; } // Create constant and add to cache pair size = getValueSize(value); TypedValue constant; constant.size = size.first; constant.num = size.second; constant.data = new unsigned char[getTypeSize(value->getType())]; getConstantData(constant.data, (const llvm::Constant*)value); m_constants[value] = constant; } TypedValue InterpreterCache::getConstant(const llvm::Value *operand) const { ConstantMap::const_iterator itr = m_constants.find(operand); if (itr == m_constants.end()) { FATAL_ERROR("Constant not found in cache (ID %d)", operand->getValueID()); } return itr->second; } const llvm::Instruction* InterpreterCache::getConstantExpr( const llvm::Value *expr) const { ConstExprMap::const_iterator itr = m_constExpressions.find(expr); if (itr == m_constExpressions.end()) { FATAL_ERROR("Constant expression not found in cache"); } return itr->second; } unsigned InterpreterCache::addValueID(const llvm::Value *value) { ValueMap::iterator itr = m_valueIDs.find(value); if (itr == m_valueIDs.end()) { // Assign next index to value unsigned pos = m_valueIDs.size(); itr = m_valueIDs.insert(make_pair(value, pos)).first; } return itr->second; } unsigned InterpreterCache::getValueID(const llvm::Value *value) const { ValueMap::const_iterator itr = m_valueIDs.find(value); if (itr == m_valueIDs.end()) { FATAL_ERROR("Value not found in cache (ID %d)", value->getValueID()); } return itr->second; } unsigned InterpreterCache::getNumValues() const { return m_valueIDs.size(); } bool InterpreterCache::hasValue(const llvm::Value *value) const { return m_valueIDs.count(value); } void InterpreterCache::addOperand(const llvm::Value *operand) { addValueID(operand); // Resolve constants if (operand->getValueID() == llvm::Value::UndefValueVal || operand->getValueID() == llvm::Value::ConstantAggregateZeroVal || operand->getValueID() == llvm::Value::ConstantDataArrayVal || operand->getValueID() == llvm::Value::ConstantDataVectorVal || operand->getValueID() == llvm::Value::ConstantIntVal || operand->getValueID() == llvm::Value::ConstantFPVal || operand->getValueID() == llvm::Value::ConstantArrayVal || operand->getValueID() == llvm::Value::ConstantStructVal || operand->getValueID() == llvm::Value::ConstantVectorVal || operand->getValueID() == llvm::Value::ConstantPointerNullVal) { addConstant(operand); } else if (operand->getValueID() == llvm::Value::ConstantExprVal) { // Resolve constant expressions const llvm::ConstantExpr *expr = (const llvm::ConstantExpr*)operand; if (!m_constExpressions.count(expr)) { for (llvm::User::const_op_iterator O = expr->op_begin(); O != expr->op_end(); O++) { addOperand(*O); } m_constExpressions[expr] = getConstExprAsInstruction(expr); // TODO: Resolve actual value? } } } ////////////////////////// // WorkItem::MemoryPool // ////////////////////////// WorkItem::MemoryPool::MemoryPool(size_t blockSize) : m_blockSize(blockSize) { // Force first allocation to create new block m_offset = m_blockSize; } WorkItem::MemoryPool::~MemoryPool() { list::iterator itr; for (itr = m_blocks.begin(); itr != m_blocks.end(); itr++) { delete[] *itr; } } unsigned char* WorkItem::MemoryPool::alloc(size_t size) { // Check if requested size larger than block size if (size > m_blockSize) { // Oversized buffers allocated separately from main pool unsigned char *buffer = new unsigned char[size]; m_blocks.push_back(buffer); return buffer; } // Check if enough space in current block if (m_offset + size > m_blockSize) { // Allocate new block m_blocks.push_front(new unsigned char[m_blockSize]); m_offset = 0; } unsigned char *buffer = m_blocks.front() + m_offset; m_offset += size; return buffer; } TypedValue WorkItem::MemoryPool::clone(const TypedValue& source) { TypedValue dest; dest.size = source.size; dest.num = source.num; dest.data = alloc(dest.size*dest.num); memcpy(dest.data, source.data, dest.size*dest.num); return dest; } Oclgrind-15.5/src/core/WorkItem.h000066400000000000000000000137141252441671000166460ustar00rootroot00000000000000// WorkItem.h (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" namespace llvm { class CallInst; class ConstExpr; class DbgValueInst; class Function; class Module; } namespace oclgrind { class Context; class Kernel; class KernelInvocation; class Memory; class WorkGroup; class WorkItem; class WorkItemBuiltins; // Data structures for builtin functions typedef struct _BuiltinFunction { void (*func)(WorkItem*, const llvm::CallInst*, const std::string&, const std::string&, TypedValue&, void*); void *op; _BuiltinFunction(){}; _BuiltinFunction(void (*f)(WorkItem*, const llvm::CallInst*, const std::string&, const std::string&, TypedValue&, void*), void *o) : func(f), op(o) {}; } BuiltinFunction; typedef std::unordered_map BuiltinFunctionMap; typedef std::list< std::pair > BuiltinFunctionPrefixList; extern BuiltinFunctionMap workItemBuiltins; extern BuiltinFunctionPrefixList workItemPrefixBuiltins; // Per-kernel cache for various interpreter state information class InterpreterCache { public: typedef struct { BuiltinFunction function; std::string name, overload; } Builtin; InterpreterCache(llvm::Function *kernel); ~InterpreterCache(); void addBuiltin(const llvm::Function *function); Builtin getBuiltin(const llvm::Function *function) const; void addConstant(const llvm::Value *constant); TypedValue getConstant(const llvm::Value *operand) const; const llvm::Instruction* getConstantExpr(const llvm::Value *expr) const; unsigned addValueID(const llvm::Value *value); unsigned getValueID(const llvm::Value *value) const; unsigned getNumValues() const; bool hasValue(const llvm::Value *value) const; private: typedef std::unordered_map ValueMap; typedef std::unordered_map BuiltinMap; typedef std::unordered_map ConstantMap; typedef std::unordered_map ConstExprMap; BuiltinMap m_builtins; ConstantMap m_constants; ConstExprMap m_constExpressions; ValueMap m_valueIDs; void addOperand(const llvm::Value *value); }; class WorkItem { friend class WorkItemBuiltins; public: enum State {READY, BARRIER, FINISHED}; private: class MemoryPool { public: MemoryPool(size_t blockSize = 1024); ~MemoryPool(); unsigned char* alloc(size_t size); TypedValue clone(const TypedValue& source); private: size_t m_blockSize; size_t m_offset; std::list m_blocks; } mutable m_pool; public: WorkItem(const KernelInvocation *kernelInvocation, WorkGroup *workGroup, Size3 lid); virtual ~WorkItem(); void clearBarrier(); void dispatch(const llvm::Instruction *instruction, TypedValue& result); void execute(const llvm::Instruction *instruction); const std::stack& getCallStack() const; const llvm::Instruction* getCurrentInstruction() const; Size3 getGlobalID() const; size_t getGlobalIndex() const; Size3 getLocalID() const; TypedValue getOperand(const llvm::Value *operand) const; Memory* getPrivateMemory() const; State getState() const; const unsigned char* getValueData(const llvm::Value *value) const; const llvm::Value* getVariable(std::string name) const; const WorkGroup* getWorkGroup() const; bool printValue(const llvm::Value *value) const; bool printVariable(std::string name) const; State step(); // SPIR instructions private: #define INSTRUCTION(name) \ void name(const llvm::Instruction *instruction, TypedValue& result) INSTRUCTION(add); INSTRUCTION(alloc); INSTRUCTION(ashr); INSTRUCTION(bitcast); INSTRUCTION(br); INSTRUCTION(bwand); INSTRUCTION(bwor); INSTRUCTION(bwxor); INSTRUCTION(call); INSTRUCTION(extractelem); INSTRUCTION(extractval); INSTRUCTION(fadd); INSTRUCTION(fcmp); INSTRUCTION(fdiv); INSTRUCTION(fmul); INSTRUCTION(fpext); INSTRUCTION(fptosi); INSTRUCTION(fptoui); INSTRUCTION(fptrunc); INSTRUCTION(frem); INSTRUCTION(fsub); INSTRUCTION(gep); INSTRUCTION(icmp); INSTRUCTION(insertelem); INSTRUCTION(insertval); INSTRUCTION(inttoptr); INSTRUCTION(itrunc); INSTRUCTION(load); INSTRUCTION(lshr); INSTRUCTION(mul); INSTRUCTION(phi); INSTRUCTION(ptrtoint); INSTRUCTION(ret); INSTRUCTION(sdiv); INSTRUCTION(select); INSTRUCTION(sext); INSTRUCTION(shl); INSTRUCTION(shuffle); INSTRUCTION(sitofp); INSTRUCTION(srem); INSTRUCTION(store); INSTRUCTION(sub); INSTRUCTION(swtch); INSTRUCTION(udiv); INSTRUCTION(uitofp); INSTRUCTION(urem); INSTRUCTION(zext); #undef INSTRUCTION private: typedef std::map VariableMap; size_t m_globalIndex; Size3 m_globalID; Size3 m_localID; TypedValueMap m_phiTemps; VariableMap m_variables; const Context *m_context; const KernelInvocation *m_kernelInvocation; Memory *m_privateMemory; WorkGroup *m_workGroup; State m_state; struct Position; Position *m_position; Memory* getMemory(unsigned int addrSpace) const; // Store for instruction results and other operand values std::vector m_values; TypedValue getValue(const llvm::Value *key) const; bool hasValue(const llvm::Value *key) const; void setValue(const llvm::Value *key, TypedValue value); const InterpreterCache *m_cache; }; } Oclgrind-15.5/src/core/WorkItemBuiltins.cpp000066400000000000000000003137621252441671000207210ustar00rootroot00000000000000// WorkItemBuiltins.cpp (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" #include #include #include #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" #if LLVM_VERSION > 36 #include "llvm/IR/DebugInfoMetadata.h" #endif #include "CL/cl.h" #include "Context.h" #include "half.h" #include "KernelInvocation.h" #include "Memory.h" #include "WorkGroup.h" #include "WorkItem.h" using namespace oclgrind; using namespace std; #define CLK_NORMALIZED_COORDS_TRUE 0x0001 #define CLK_ADDRESS_NONE 0x0000 #define CLK_ADDRESS_CLAMP_TO_EDGE 0x0002 #define CLK_ADDRESS_CLAMP 0x0004 #define CLK_ADDRESS_REPEAT 0x0006 #define CLK_ADDRESS_MIRRORED_REPEAT 0x0008 #define CLK_ADDRESS_MASK 0x000E #define CLK_FILTER_NEAREST 0x0010 #define CLK_FILTER_LINEAR 0x0020 #ifndef M_PI #define M_PI 3.1415926535897932384626433832795 #endif namespace oclgrind { static mutex printfMutex; class WorkItemBuiltins { // Utility macros for creating builtins #define DEFINE_BUILTIN(name) \ static void name(WorkItem *workItem, const llvm::CallInst *callInst, \ const string& fnName, const string& overload, \ TypedValue& result, void *) #define ARG(i) (callInst->getArgOperand(i)) #define UARGV(i,v) workItem->getOperand(ARG(i)).getUInt(v) #define SARGV(i,v) workItem->getOperand(ARG(i)).getSInt(v) #define FARGV(i,v) workItem->getOperand(ARG(i)).getFloat(v) #define PARGV(i,v) workItem->getOperand(ARG(i)).getPointer(v) #define UARG(i) UARGV(i, 0) #define SARG(i) SARGV(i, 0) #define FARG(i) FARGV(i, 0) #define PARG(i) PARGV(i, 0) // Functions that apply generic builtins to each component of a vector static void f1arg(WorkItem *workItem, const llvm::CallInst *callInst, const string& name, const string& overload, TypedValue& result, double (*func)(double)) { for (unsigned i = 0; i < result.num; i++) { result.setFloat(func(FARGV(0, i)), i); } } static void f2arg(WorkItem *workItem, const llvm::CallInst *callInst, const string& name, const string& overload, TypedValue& result, double (*func)(double, double)) { for (unsigned i = 0; i < result.num; i++) { result.setFloat(func(FARGV(0, i), FARGV(1, i)), i); } } static void f3arg(WorkItem *workItem, const llvm::CallInst *callInst, const string& name, const string& overload, TypedValue& result, double (*func)(double, double, double)) { for (unsigned i = 0; i < result.num; i++) { result.setFloat(func(FARGV(0, i), FARGV(1, i), FARGV(2, i)), i); } } static void u1arg(WorkItem *workItem, const llvm::CallInst *callInst, const string& name, const string& overload, TypedValue& result, uint64_t (*func)(uint64_t)) { for (unsigned i = 0; i < result.num; i++) { result.setUInt(func(UARGV(0, i)), i); } } static void u2arg(WorkItem *workItem, const llvm::CallInst *callInst, const string& name, const string& overload, TypedValue& result, uint64_t (*func)(uint64_t, uint64_t)) { for (unsigned i = 0; i < result.num; i++) { result.setUInt(func(UARGV(0, i), UARGV(1, i)), i); } } static void u3arg(WorkItem *workItem, const llvm::CallInst *callInst, const string& name, const string& overload, TypedValue& result, uint64_t (*func)(uint64_t, uint64_t, uint64_t)) { for (unsigned i = 0; i < result.num; i++) { result.setUInt(func(UARGV(0, i), UARGV(1, i), UARGV(2, i)), i); } } static void s1arg(WorkItem *workItem, const llvm::CallInst *callInst, const string& name, const string& overload, TypedValue& result, int64_t (*func)(int64_t)) { for (unsigned i = 0; i < result.num; i++) { result.setSInt(func(SARGV(0, i)), i); } } static void s2arg(WorkItem *workItem, const llvm::CallInst *callInst, const string& name, const string& overload, TypedValue& result, int64_t (*func)(int64_t, int64_t)) { for (unsigned i = 0; i < result.num; i++) { result.setSInt(func(SARGV(0, i), SARGV(1, i)), i); } } static void s3arg(WorkItem *workItem, const llvm::CallInst *callInst, const string& name, const string& overload, TypedValue& result, int64_t (*func)(int64_t, int64_t, int64_t)) { for (unsigned i = 0; i < result.num; i++) { result.setSInt(func(SARGV(0, i), SARGV(1, i), SARGV(2, i)), i); } } static void rel1arg(WorkItem *workItem, const llvm::CallInst *callInst, const string& name, const string& overload, TypedValue& result, int64_t (*func)(double)) { int64_t t = result.num > 1 ? -1 : 1; for (unsigned i = 0; i < result.num; i++) { result.setSInt(func(FARGV(0, i))*t, i); } } static void rel2arg(WorkItem *workItem, const llvm::CallInst *callInst, const string& name, const string& overload, TypedValue& result, int64_t (*func)(double, double)) { int64_t t = result.num > 1 ? -1 : 1; for (unsigned i = 0; i < result.num; i++) { result.setSInt(func(FARGV(0, i), FARGV(1, i))*t, i); } } // Extract the (first) argument type from an overload string static char getOverloadArgType(const string& overload) { char type = overload[0]; if (type == 'D') { char *typestr; strtol(overload.c_str() + 2, &typestr, 10); type = typestr[1]; } return type; } /////////////////////////////////////// // Async Copy and Prefetch Functions // /////////////////////////////////////// DEFINE_BUILTIN(async_work_group_copy) { int arg = 0; // Get src/dest addresses const llvm::Value *destOp = ARG(arg++); const llvm::Value *srcOp = ARG(arg++); size_t dest = workItem->getOperand(destOp).getPointer(); size_t src = workItem->getOperand(srcOp).getPointer(); // Get size of copy unsigned elemSize = getTypeSize(destOp->getType()->getPointerElementType()); uint64_t num = UARG(arg++); // Get stride uint64_t stride = 1; size_t srcStride = 1; size_t destStride = 1; if (fnName == "async_work_group_strided_copy") { stride = UARG(arg++); } size_t event = UARG(arg++); // Get type of copy WorkGroup::AsyncCopyType type; if (destOp->getType()->getPointerAddressSpace() == AddrSpaceLocal) { type = WorkGroup::GLOBAL_TO_LOCAL; srcStride = stride; } else { type = WorkGroup::LOCAL_TO_GLOBAL; destStride = stride; } // Register copy event = workItem->m_workGroup->async_copy( workItem, callInst, type, dest, src, elemSize, num, srcStride, destStride, event); result.setUInt(event); } DEFINE_BUILTIN(wait_group_events) { uint64_t num = UARG(0); size_t address = PARG(1); list events; for (unsigned i = 0; i < num; i++) { size_t event; if (!workItem->m_privateMemory->load((unsigned char*)&event, address, sizeof(size_t))) { return; } events.push_back(event); address += sizeof(size_t); } workItem->m_state = WorkItem::BARRIER; workItem->m_workGroup->notifyBarrier(workItem, callInst, CLK_LOCAL_MEM_FENCE, events); } DEFINE_BUILTIN(prefetch) { // Do nothing. } ////////////////////// // Atomic Functions // ////////////////////// DEFINE_BUILTIN(atomic_add) { Memory *memory = workItem->getMemory(ARG(0)->getType()->getPointerAddressSpace()); size_t address = PARG(0); // Verify the address is 4-byte aligned if ((address & 0x3) != 0) { workItem->m_context->logError("Unaligned address on atomic_add"); } uint32_t old = memory->atomic(AtomicAdd, address, UARG(1)); result.setUInt(old); } DEFINE_BUILTIN(atomic_and) { Memory *memory = workItem->getMemory(ARG(0)->getType()->getPointerAddressSpace()); size_t address = PARG(0); // Verify the address is 4-byte aligned if ((address & 0x3) != 0) { workItem->m_context->logError("Unaligned address on atomic_and"); } uint32_t old = memory->atomic(AtomicAnd, address, UARG(1)); result.setUInt(old); } DEFINE_BUILTIN(atomic_cmpxchg) { Memory *memory = workItem->getMemory(ARG(0)->getType()->getPointerAddressSpace()); size_t address = PARG(0); // Verify the address is 4-byte aligned if ((address & 0x3) != 0) { workItem->m_context->logError("Unaligned address on atomic_cmpxchg"); } uint32_t old = memory->atomicCmpxchg(address, UARG(1), UARG(2)); result.setUInt(old); } DEFINE_BUILTIN(atomic_dec) { Memory *memory = workItem->getMemory(ARG(0)->getType()->getPointerAddressSpace()); size_t address = PARG(0); // Verify the address is 4-byte aligned if ((address & 0x3) != 0) { workItem->m_context->logError("Unaligned address on atomic_dec"); } uint32_t old = memory->atomic(AtomicDec, address); result.setUInt(old); } DEFINE_BUILTIN(atomic_inc) { Memory *memory = workItem->getMemory(ARG(0)->getType()->getPointerAddressSpace()); size_t address = PARG(0); // Verify the address is 4-byte aligned if ((address & 0x3) != 0) { workItem->m_context->logError("Unaligned address on atomic_dec"); } uint32_t old = memory->atomic(AtomicInc, address); result.setUInt(old); } DEFINE_BUILTIN(atomic_max) { Memory *memory = workItem->getMemory(ARG(0)->getType()->getPointerAddressSpace()); size_t address = PARG(0); // Verify the address is 4-byte aligned if ((address & 0x3) != 0) { workItem->m_context->logError("Unaligned address on atomic_max"); } uint32_t old = memory->atomic(AtomicMax, address, UARG(1)); result.setUInt(old); } DEFINE_BUILTIN(atomic_min) { Memory *memory = workItem->getMemory(ARG(0)->getType()->getPointerAddressSpace()); size_t address = PARG(0); // Verify the address is 4-byte aligned if ((address & 0x3) != 0) { workItem->m_context->logError("Unaligned address on atomic_min"); } uint32_t old = memory->atomic(AtomicMin, address, UARG(1)); result.setUInt(old); } DEFINE_BUILTIN(atomic_or) { Memory *memory = workItem->getMemory(ARG(0)->getType()->getPointerAddressSpace()); size_t address = PARG(0); // Verify the address is 4-byte aligned if ((address & 0x3) != 0) { workItem->m_context->logError("Unaligned address on atomic_or"); } uint32_t old = memory->atomic(AtomicOr, address, UARG(1)); result.setUInt(old); } DEFINE_BUILTIN(atomic_sub) { Memory *memory = workItem->getMemory(ARG(0)->getType()->getPointerAddressSpace()); size_t address = PARG(0); // Verify the address is 4-byte aligned if ((address & 0x3) != 0) { workItem->m_context->logError("Unaligned address on atomic_sub"); } uint32_t old = memory->atomic(AtomicSub, address, UARG(1)); result.setUInt(old); } DEFINE_BUILTIN(atomic_xchg) { Memory *memory = workItem->getMemory(ARG(0)->getType()->getPointerAddressSpace()); size_t address = PARG(0); // Verify the address is 4-byte aligned if ((address & 0x3) != 0) { workItem->m_context->logError("Unaligned address on atomic_xchg"); } uint32_t old = memory->atomic(AtomicXchg, address, UARG(1)); result.setUInt(old); } DEFINE_BUILTIN(atomic_xor) { Memory *memory = workItem->getMemory(ARG(0)->getType()->getPointerAddressSpace()); size_t address = PARG(0); // Verify the address is 4-byte aligned if ((address & 0x3) != 0) { workItem->m_context->logError("Unaligned address on atomic_xor"); } uint32_t old = memory->atomic(AtomicXor, address, UARG(1)); result.setUInt(old); } ////////////////////// // Common Functions // ////////////////////// template T static _max_(T a, T b){return a > b ? a : b;} template T static _min_(T a, T b){return a < b ? a : b;} template T static _clamp_(T x, T min, T max) { return _min_(_max_(x, min), max); } static double _degrees_(double x) { return x * (180 / M_PI); } static double _radians_(double x) { return x * (M_PI / 180); } static double _sign_(double x) { if (::isnan(x)) return 0.0; if (x > 0.0) return 1.0; if (x == -0.0) return -0.0; if (x == 0.0) return 0.0; if (x < 0.0) return -1.0; return 0.0; } DEFINE_BUILTIN(clamp) { switch (getOverloadArgType(overload)) { case 'f': case 'd': if (ARG(1)->getType()->isVectorTy()) { f3arg(workItem, callInst, fnName, overload, result, _clamp_); } else { for (unsigned i = 0; i < result.num; i++) { double x = FARGV(0, i); double minval = FARG(1); double maxval = FARG(2); result.setFloat(_clamp_(x, minval, maxval), i); } } break; case 'h': case 't': case 'j': case 'm': u3arg(workItem, callInst, fnName, overload, result, _clamp_); break; case 'c': case 's': case 'i': case 'l': s3arg(workItem, callInst, fnName, overload, result, _clamp_); break; default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } DEFINE_BUILTIN(max) { switch (getOverloadArgType(overload)) { case 'f': case 'd': if (ARG(1)->getType()->isVectorTy()) { f2arg(workItem, callInst, fnName, overload, result, fmax); } else { for (unsigned i = 0; i < result.num; i++) { double x = FARGV(0, i); double y = FARG(1); result.setFloat(_max_(x, y), i); } } break; case 'h': case 't': case 'j': case 'm': u2arg(workItem, callInst, fnName, overload, result, _max_); break; case 'c': case 's': case 'i': case 'l': s2arg(workItem, callInst, fnName, overload, result, _max_); break; default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } DEFINE_BUILTIN(min) { switch (getOverloadArgType(overload)) { case 'f': case 'd': if (ARG(1)->getType()->isVectorTy()) { f2arg(workItem, callInst, fnName, overload, result, fmin); } else { for (unsigned i = 0; i < result.num; i++) { double x = FARGV(0, i); double y = FARG(1); result.setFloat(_min_(x, y), i); } } break; case 'h': case 't': case 'j': case 'm': u2arg(workItem, callInst, fnName, overload, result, _min_); break; case 'c': case 's': case 'i': case 'l': s2arg(workItem, callInst, fnName, overload, result, _min_); break; default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } DEFINE_BUILTIN(mix) { for (unsigned i = 0; i < result.num; i++) { double x = FARGV(0, i); double y = FARGV(1, i); double a = ARG(2)->getType()->isVectorTy() ? FARGV(2, i) : FARG(2); double r = x + (y - x) * a; result.setFloat(r, i); } } DEFINE_BUILTIN(smoothstep) { for (unsigned i = 0; i < result.num; i++) { double edge0 = ARG(0)->getType()->isVectorTy() ? FARGV(0, i) : FARG(0); double edge1 = ARG(1)->getType()->isVectorTy() ? FARGV(1, i) : FARG(1); double x = FARGV(2, i); double t = _clamp_((x - edge0) / (edge1 - edge0), 0, 1); double r = t * t * (3 - 2*t); result.setFloat(r, i); } } DEFINE_BUILTIN(step) { for (unsigned i = 0; i < result.num; i++) { double edge = ARG(0)->getType()->isVectorTy() ? FARGV(0, i) : FARG(0); double x = FARGV(1, i); double r = (x < edge) ? 0.0 : 1.0; result.setFloat(r, i); } } ///////////////////////// // Geometric Functions // ///////////////////////// DEFINE_BUILTIN(cross) { double u1 = FARGV(0, 0); double u2 = FARGV(0, 1); double u3 = FARGV(0, 2); double v1 = FARGV(1, 0); double v2 = FARGV(1, 1); double v3 = FARGV(1, 2); result.setFloat(u2*v3 - u3*v2, 0); result.setFloat(u3*v1 - u1*v3, 1); result.setFloat(u1*v2 - u2*v1, 2); result.setFloat(0, 3); } DEFINE_BUILTIN(dot) { unsigned num = 1; if (ARG(0)->getType()->isVectorTy()) { num = ARG(0)->getType()->getVectorNumElements(); } double r = 0.f; for (unsigned i = 0; i < num; i++) { double a = FARGV(0, i); double b = FARGV(1, i); r += a * b; } result.setFloat(r); } DEFINE_BUILTIN(distance) { unsigned num = 1; if (ARG(0)->getType()->isVectorTy()) { num = ARG(0)->getType()->getVectorNumElements(); } double distSq = 0.0; for (unsigned i = 0; i < num; i++) { double diff = FARGV(0,i) - FARGV(1,i); distSq += diff*diff; } result.setFloat(sqrt(distSq)); } DEFINE_BUILTIN(length) { unsigned num = 1; if (ARG(0)->getType()->isVectorTy()) { num = ARG(0)->getType()->getVectorNumElements(); } double lengthSq = 0.0; for (unsigned i = 0; i < num; i++) { lengthSq += FARGV(0, i) * FARGV(0, i); } result.setFloat(sqrt(lengthSq)); } DEFINE_BUILTIN(normalize) { double lengthSq = 0.0; for (unsigned i = 0; i < result.num; i++) { lengthSq += FARGV(0, i) * FARGV(0, i); } double length = sqrt(lengthSq); for (unsigned i = 0; i < result.num; i++) { result.setFloat(FARGV(0, i)/length, i); } } ///////////////////// // Image Functions // ///////////////////// static size_t getChannelSize(const cl_image_format& format) { switch (format.image_channel_data_type) { case CL_SNORM_INT8: case CL_UNORM_INT8: case CL_SIGNED_INT8: case CL_UNSIGNED_INT8: return 1; case CL_SNORM_INT16: case CL_UNORM_INT16: case CL_SIGNED_INT16: case CL_UNSIGNED_INT16: case CL_HALF_FLOAT: return 2; case CL_SIGNED_INT32: case CL_UNSIGNED_INT32: case CL_FLOAT: return 4; default: return 0; } } static size_t getNumChannels(const cl_image_format& format) { switch (format.image_channel_order) { case CL_R: case CL_Rx: case CL_A: case CL_INTENSITY: case CL_LUMINANCE: return 1; case CL_RG: case CL_RGx: case CL_RA: return 2; case CL_RGB: case CL_RGBx: return 3; case CL_RGBA: case CL_ARGB: case CL_BGRA: return 4; default: return 0; } } static bool hasZeroAlphaBorder(const cl_image_format& format) { switch (format.image_channel_order) { case CL_A: case CL_INTENSITY: case CL_Rx: case CL_RA: case CL_RGx: case CL_RGBx: case CL_ARGB: case CL_BGRA: case CL_RGBA: return true; default: return false; } } DEFINE_BUILTIN(get_image_array_size) { Image *image = *(Image**)(workItem->getValue(ARG(0)).data); result.setUInt(image->desc.image_array_size); } DEFINE_BUILTIN(get_image_channel_data_type) { Image *image = *(Image**)(workItem->getValue(ARG(0)).data); result.setSInt(image->format.image_channel_data_type); } DEFINE_BUILTIN(get_image_channel_order) { Image *image = *(Image**)(workItem->getValue(ARG(0)).data); result.setSInt(image->format.image_channel_order); } DEFINE_BUILTIN(get_image_dim) { Image *image = *(Image**)(workItem->getValue(ARG(0)).data); result.setSInt(image->desc.image_width, 0); result.setSInt(image->desc.image_height, 1); if (result.num > 2) { result.setSInt(image->desc.image_depth, 2); result.setSInt(0, 3); } } DEFINE_BUILTIN(get_image_depth) { Image *image = *(Image**)(workItem->getValue(ARG(0)).data); result.setSInt(image->desc.image_depth); } DEFINE_BUILTIN(get_image_height) { Image *image = *(Image**)(workItem->getValue(ARG(0)).data); result.setSInt(image->desc.image_height); } DEFINE_BUILTIN(get_image_width) { Image *image = *(Image**)(workItem->getValue(ARG(0)).data); result.setSInt(image->desc.image_width); } static inline float getCoordinate(const llvm::Value *value, int index, char type, WorkItem *workItem) { switch (type) { case 'i': return workItem->getOperand(value).getSInt(index); case 'f': return workItem->getOperand(value).getFloat(index); default: FATAL_ERROR("Unsupported coordinate type: '%c'", type); } } static inline int getNearestCoordinate(uint32_t sampler, float n, // Normalized float u, // Unormalized size_t size) { switch (sampler & CLK_ADDRESS_MASK) { case CLK_ADDRESS_NONE: return floor(u); case CLK_ADDRESS_CLAMP_TO_EDGE: return _clamp_(floor(u), 0, size - 1); case CLK_ADDRESS_CLAMP: return _clamp_(floor(u), -1, size); case CLK_ADDRESS_REPEAT: return (int)floorf((n - floorf(n))*size) % size; case CLK_ADDRESS_MIRRORED_REPEAT: return _min_((int)floorf(fabsf(n - 2.f * rintf(0.5f*n)) * size), size - 1); default: FATAL_ERROR("Unsupported sampler addressing mode: %X", sampler & CLK_ADDRESS_MASK); } } static inline float getAdjacentCoordinates(uint32_t sampler, float n, // Normalized float u, // Unnormalized size_t size, int *c0, int *c1) { switch (sampler & CLK_ADDRESS_MASK) { case CLK_ADDRESS_NONE: *c0 = floor(u); *c1 = floor(u) + 1; return u; case CLK_ADDRESS_CLAMP_TO_EDGE: *c0 = _clamp_(floorf(u - 0.5f), 0, size - 1); *c1 = _clamp_(floorf(u - 0.5f) + 1, 0, size - 1); return u; case CLK_ADDRESS_CLAMP: *c0 = _clamp_((floorf(u - 0.5f)), -1, size); *c1 = _clamp_((floorf(u - 0.5f)) + 1, -1, size); return u; case CLK_ADDRESS_REPEAT: { u = (n - floorf(n)) * size; *c0 = (int)floorf(u - 0.5f); *c1 = *c0 + 1; if (*c0 < 0) *c0 += size; if (*c1 >= size) *c1 -= size; return u; } case CLK_ADDRESS_MIRRORED_REPEAT: { u = fabsf(n - 2.0f * rintf(0.5f * n)) * size; *c0 = (int)floorf(u - 0.5f); *c1 = *c0 + 1; *c0 = _max_(*c0, 0); *c1 = _min_(*c1, (int)size-1); return u; } default: FATAL_ERROR("Unsupported sampler addressing mode: %X", sampler & CLK_ADDRESS_MASK); } } static inline int getInputChannel(const cl_image_format& format, int output, float *ret) { int input = output; switch (format.image_channel_order) { case CL_R: case CL_Rx: if (output == 1) { *ret = 0.f; return -1; } case CL_RG: case CL_RGx: if (output == 2) { *ret = 0.f; return -1; } case CL_RGB: case CL_RGBx: if (output == 3) { *ret = 1.f; return -1; } break; case CL_RGBA: break; case CL_BGRA: if (output == 0) input = 2; if (output == 2) input = 0; break; case CL_ARGB: if (output == 0) input = 1; if (output == 1) input = 2; if (output == 2) input = 3; if (output == 3) input = 0; break; case CL_A: if (output == 3) input = 0; else { *ret = 0.f; return -1; } break; case CL_RA: if (output == 3) input = 1; else if (output != 0) { *ret = 0.f; return -1; } break; case CL_INTENSITY: input = 0; break; case CL_LUMINANCE: if (output == 3) { *ret = 1.f; return -1; } input = 0; break; default: FATAL_ERROR("Unsupported image channel order: %X", format.image_channel_order); } return input; } static inline float readNormalizedColor(const Image *image, WorkItem *workItem, int i, int j, int k, int layer, int c) { // Check for out-of-range coordinages if (i < 0 || i >= image->desc.image_width || j < 0 || j >= image->desc.image_height || k < 0 || k >= image->desc.image_depth) { // Return border color if (c == 3 && !hasZeroAlphaBorder(image->format)) { return 1.f; } return 0.f; } // Remap channels float ret; int channel = getInputChannel(image->format, c, &ret); if (channel < 0) { return ret; } // Calculate pixel address size_t channelSize = getChannelSize(image->format); size_t numChannels = getNumChannels(image->format); size_t pixelSize = channelSize*numChannels; size_t address = image->address + (i + (j + (k + layer*image->desc.image_depth) * image->desc.image_height) * image->desc.image_width) * pixelSize + channel*channelSize; // Load channel data unsigned char *data = workItem->m_pool.alloc(channelSize); if (!workItem->getMemory(AddrSpaceGlobal)->load(data, address, channelSize)) { return 0.f; } // Compute normalized color value float color; switch (image->format.image_channel_data_type) { case CL_SNORM_INT8: color = _clamp_(*(int8_t*)data / 127.f, -1.f, 1.f); break; case CL_UNORM_INT8: color = _clamp_(*(uint8_t*)data / 255.f, 0.f, 1.f); break; case CL_SNORM_INT16: color = _clamp_(*(int16_t*)data / 32767.f, -1.f, 1.f); break; case CL_UNORM_INT16: color = _clamp_(*(uint16_t*)data / 65535.f, 0.f, 1.f); break; case CL_FLOAT: color = *(float*)data; break; case CL_HALF_FLOAT: color = halfToFloat(*(uint16_t*)data); break; default: FATAL_ERROR("Unsupported image channel data type: %X", image->format.image_channel_data_type); } return color; } static inline int32_t readSignedColor(const Image *image, WorkItem *workItem, int i, int j, int k, int layer, int c) { // Check for out-of-range coordinages if (i < 0 || i >= image->desc.image_width || j < 0 || j >= image->desc.image_height || k < 0 || k >= image->desc.image_depth) { // Return border color if (c == 3 && !hasZeroAlphaBorder(image->format)) { return 1.f; } return 0.f; } // Remap channels float ret; int channel = getInputChannel(image->format, c, &ret); if (channel < 0) { return ret; } // Calculate pixel address size_t channelSize = getChannelSize(image->format); size_t numChannels = getNumChannels(image->format); size_t pixelSize = channelSize*numChannels; size_t address = image->address + (i + (j + (k + layer*image->desc.image_depth) * image->desc.image_height) * image->desc.image_width) * pixelSize + channel*channelSize; // Load channel data unsigned char *data = workItem->m_pool.alloc(channelSize); if (!workItem->getMemory(AddrSpaceGlobal)->load(data, address, channelSize)) { return 0; } // Compute unnormalized color value int32_t color; switch (image->format.image_channel_data_type) { case CL_SIGNED_INT8: color = *(int8_t*)data; break; case CL_SIGNED_INT16: color = *(int16_t*)data; break; case CL_SIGNED_INT32: color = *(int32_t*)data; break; default: FATAL_ERROR("Unsupported image channel data type: %X", image->format.image_channel_data_type); } return color; } static inline uint32_t readUnsignedColor(const Image *image, WorkItem *workItem, int i, int j, int k, int layer, int c) { // Check for out-of-range coordinages if (i < 0 || i >= image->desc.image_width || j < 0 || j >= image->desc.image_height || k < 0 || k >= image->desc.image_depth) { // Return border color if (c == 3 && !hasZeroAlphaBorder(image->format)) { return 1.f; } return 0.f; } // Remap channels float ret; int channel = getInputChannel(image->format, c, &ret); if (channel < 0) { return ret; } // Calculate pixel address size_t channelSize = getChannelSize(image->format); size_t numChannels = getNumChannels(image->format); size_t pixelSize = channelSize*numChannels; size_t address = image->address + (i + (j + (k + layer*image->desc.image_depth) * image->desc.image_height) * image->desc.image_width) * pixelSize + channel*channelSize; // Load channel data unsigned char *data = workItem->m_pool.alloc(channelSize); if (!workItem->getMemory(AddrSpaceGlobal)->load(data, address, channelSize)) { return 0; } // Load color value uint32_t color; switch (image->format.image_channel_data_type) { case CL_UNSIGNED_INT8: color = *(uint8_t*)data; break; case CL_UNSIGNED_INT16: color = *(uint16_t*)data; break; case CL_UNSIGNED_INT32: color = *(uint32_t*)data; break; default: FATAL_ERROR("Unsupported image channel data type: %X", image->format.image_channel_data_type); } return color; } static inline float frac(float x) { return x - floorf(x); } static inline float interpolate(float v000, float v010, float v100, float v110, float v001, float v011, float v101, float v111, float a, float b, float c) { return (1-a) * (1-b) * (1-c) * v000 + a * (1-b) * (1-c) * v100 + (1-a) * b * (1-c) * v010 + a * b * (1-c) * v110 + (1-a) * (1-b) * c * v001 + a * (1-b) * c * v101 + (1-a) * b * c * v011 + a * b * c * v111; } DEFINE_BUILTIN(read_imagef) { const Image *image = *(Image**)(workItem->getValue(ARG(0)).data); uint32_t sampler = CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; int coordIndex = 1; // Check for sampler version if (callInst->getNumArgOperands() > 2) { sampler = UARG(1); coordIndex = 2; } // Get coordinates float s = 0.f, t = 0.f, r = 0.f; char coordType = *overload.rbegin(); s = getCoordinate(ARG(coordIndex), 0, coordType, workItem); if (ARG(coordIndex)->getType()->isVectorTy()) { t = getCoordinate(ARG(coordIndex), 1, coordType, workItem); if (ARG(coordIndex)->getType()->getVectorNumElements() > 2) { r = getCoordinate(ARG(coordIndex), 2, coordType, workItem); } } // Get unnormalized coordinates float u = 0.f, v = 0.f, w = 0.f; bool noormCoords = sampler & CLK_NORMALIZED_COORDS_TRUE; if (noormCoords) { u = s * image->desc.image_width; v = t * image->desc.image_height; w = r * image->desc.image_depth; } else { u = s; v = t; w = r; } // Get array layer index int layer = 0; if (image->desc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { layer = _clamp_(rintf(t), 0, image->desc.image_array_size - 1); v = t = 0.f; } else if (image->desc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { layer = _clamp_(rintf(r), 0, image->desc.image_array_size - 1); w = r = 0.f; } float values[4]; if (sampler & CLK_FILTER_LINEAR) { // Get coordinates of adjacent pixels int i0 = 0, i1 = 0, j0 = 0, j1 = 0, k0 = 0, k1 = 0; u = getAdjacentCoordinates(sampler, s, u, image->desc.image_width, &i0, &i1); v = getAdjacentCoordinates(sampler, t, v, image->desc.image_height, &j0, &j1); w = getAdjacentCoordinates(sampler, r, w, image->desc.image_depth, &k0, &k1); // Make sure y and z coordinates are equal for 1 and 2D images if (image->desc.image_type == CL_MEM_OBJECT_IMAGE1D || image->desc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { j0 = j1; k0 = k1; } else if (image->desc.image_type == CL_MEM_OBJECT_IMAGE2D || image->desc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { k0 = k1; } // Perform linear interpolation float a = frac(u - 0.5f); float b = frac(v - 0.5f); float c = frac(w - 0.5f); for (int i = 0; i < 4; i++) { values[i] = interpolate( readNormalizedColor(image, workItem, i0, j0, k0, layer, i), readNormalizedColor(image, workItem, i0, j1, k0, layer, i), readNormalizedColor(image, workItem, i1, j0, k0, layer, i), readNormalizedColor(image, workItem, i1, j1, k0, layer, i), readNormalizedColor(image, workItem, i0, j0, k1, layer, i), readNormalizedColor(image, workItem, i0, j1, k1, layer, i), readNormalizedColor(image, workItem, i1, j0, k1, layer, i), readNormalizedColor(image, workItem, i1, j1, k1, layer, i), a, b, c); } } else { // Read values from nearest pixel int i = getNearestCoordinate(sampler, s, u, image->desc.image_width); int j = getNearestCoordinate(sampler, t, v, image->desc.image_height); int k = getNearestCoordinate(sampler, r, w, image->desc.image_depth); values[0] = readNormalizedColor(image, workItem, i, j, k, layer, 0); values[1] = readNormalizedColor(image, workItem, i, j, k, layer, 1); values[2] = readNormalizedColor(image, workItem, i, j, k, layer, 2); values[3] = readNormalizedColor(image, workItem, i, j, k, layer, 3); } // Store values in result for (int i = 0; i < 4; i++) { result.setFloat(values[i], i); } } DEFINE_BUILTIN(read_imagei) { const Image *image = *(Image**)(workItem->getValue(ARG(0)).data); uint32_t sampler = CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; int coordIndex = 1; // Check for sampler version if (callInst->getNumArgOperands() > 2) { sampler = UARG(1); coordIndex = 2; } // Get coordinates float s = 0.f, t = 0.f, r = 0.f; char coordType = *overload.rbegin(); s = getCoordinate(ARG(coordIndex), 0, coordType, workItem); if (ARG(coordIndex)->getType()->isVectorTy()) { t = getCoordinate(ARG(coordIndex), 1, coordType, workItem); if (ARG(coordIndex)->getType()->getVectorNumElements() > 2) { r = getCoordinate(ARG(coordIndex), 2, coordType, workItem); } } // Get unnormalized coordinates float u = 0.f, v = 0.f, w = 0.f; bool noormCoords = sampler & CLK_NORMALIZED_COORDS_TRUE; if (noormCoords) { u = s * image->desc.image_width; v = t * image->desc.image_height; w = r * image->desc.image_depth; } else { u = s; v = t; w = r; } // Get array layer index int layer = 0; if (image->desc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { layer = _clamp_(rintf(t), 0, image->desc.image_array_size - 1); v = t = 0.f; } else if (image->desc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { layer = _clamp_(rintf(r), 0, image->desc.image_array_size - 1); w = r = 0.f; } // Read values from nearest pixel int32_t values[4]; int i = getNearestCoordinate(sampler, s, u, image->desc.image_width); int j = getNearestCoordinate(sampler, t, v, image->desc.image_height); int k = getNearestCoordinate(sampler, r, w, image->desc.image_depth); values[0] = readSignedColor(image, workItem, i, j, k, layer, 0); values[1] = readSignedColor(image, workItem, i, j, k, layer, 1); values[2] = readSignedColor(image, workItem, i, j, k, layer, 2); values[3] = readSignedColor(image, workItem, i, j, k, layer, 3); // Store values in result for (int i = 0; i < 4; i++) { result.setSInt(values[i], i); } } DEFINE_BUILTIN(read_imageui) { const Image *image = *(Image**)(workItem->getValue(ARG(0)).data); uint32_t sampler = CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; int coordIndex = 1; // Check for sampler version if (callInst->getNumArgOperands() > 2) { sampler = UARG(1); coordIndex = 2; } // Get coordinates float s = 0.f, t = 0.f, r = 0.f; char coordType = *overload.rbegin(); s = getCoordinate(ARG(coordIndex), 0, coordType, workItem); if (ARG(coordIndex)->getType()->isVectorTy()) { t = getCoordinate(ARG(coordIndex), 1, coordType, workItem); if (ARG(coordIndex)->getType()->getVectorNumElements() > 2) { r = getCoordinate(ARG(coordIndex), 2, coordType, workItem); } } // Get unnormalized coordinates float u = 0.f, v = 0.f, w = 0.f; bool noormCoords = sampler & CLK_NORMALIZED_COORDS_TRUE; if (noormCoords) { u = s * image->desc.image_width; v = t * image->desc.image_height; w = r * image->desc.image_depth; } else { u = s; v = t; w = r; } // Get array layer index int layer = 0; if (image->desc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { layer = _clamp_(rintf(t), 0, image->desc.image_array_size - 1); v = t = 0.f; } else if (image->desc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { layer = _clamp_(rintf(r), 0, image->desc.image_array_size - 1); w = r = 0.f; } // Read values from nearest pixel uint32_t values[4]; int i = getNearestCoordinate(sampler, s, u, image->desc.image_width); int j = getNearestCoordinate(sampler, t, v, image->desc.image_height); int k = getNearestCoordinate(sampler, r, w, image->desc.image_depth); values[0] = readUnsignedColor(image, workItem, i, j, k, layer, 0); values[1] = readUnsignedColor(image, workItem, i, j, k, layer, 1); values[2] = readUnsignedColor(image, workItem, i, j, k, layer, 2); values[3] = readUnsignedColor(image, workItem, i, j, k, layer, 3); // Store values in result for (int i = 0; i < 4; i++) { result.setUInt(values[i], i); } } DEFINE_BUILTIN(write_imagef) { Image *image = *(Image**)(workItem->getValue(ARG(0)).data); // Get pixel coordinates int x, y = 0, z = 0 ; x = SARGV(1, 0); if (ARG(1)->getType()->isVectorTy()) { y = SARGV(1, 1); if (ARG(1)->getType()->getVectorNumElements() > 2) { z = SARGV(1, 2); } } // Get color data float values[4] = { (float)FARGV(2, 0), (float)FARGV(2, 1), (float)FARGV(2, 2), (float)FARGV(2, 3), }; // Re-order color values switch (image->format.image_channel_order) { case CL_R: case CL_Rx: case CL_RG: case CL_RGx: case CL_RGB: case CL_RGBx: case CL_RGBA: case CL_INTENSITY: case CL_LUMINANCE: break; case CL_A: values[0] = values[3]; break; case CL_RA: values[1] = values[3]; break; case CL_ARGB: swap(values[2], values[3]); swap(values[1], values[2]); swap(values[0], values[1]); break; case CL_BGRA: swap(values[0], values[2]); break; default: FATAL_ERROR("Unsupported image channel order: %X", image->format.image_channel_order); } size_t channelSize = getChannelSize(image->format); size_t numChannels = getNumChannels(image->format); size_t pixelSize = channelSize*numChannels; size_t pixelAddress = image->address + (x + (y + z*image->desc.image_height) * image->desc.image_width) * pixelSize; // Generate channel values Memory *memory = workItem->getMemory(AddrSpaceGlobal); unsigned char *data = workItem->m_pool.alloc(channelSize*numChannels); for (unsigned i = 0; i < numChannels; i++) { switch (image->format.image_channel_data_type) { case CL_SNORM_INT8: ((int8_t*)data)[i] = rint(_clamp_(values[i] * 127.f, -128.f, 127.f)); break; case CL_UNORM_INT8: data[i] = rint(_clamp_(values[i] * 255.f, 0.f, 255.f)); break; case CL_SNORM_INT16: ((int16_t*)data)[i] = rint(_clamp_(values[i] * 32767.f, -32768.f, 32767.f)); break; case CL_UNORM_INT16: ((uint16_t*)data)[i] = rint(_clamp_(values[i] * 65535.f, 0.f, 65535.f)); break; case CL_FLOAT: ((float*)data)[i] = values[i]; break; case CL_HALF_FLOAT: ((uint16_t*)data)[i] = floatToHalf(values[i]); break; default: FATAL_ERROR("Unsupported image channel data type: %X", image->format.image_channel_data_type); } } // Write pixel data memory->store(data, pixelAddress, channelSize*numChannels); } DEFINE_BUILTIN(write_imagei) { Image *image = *(Image**)(workItem->getValue(ARG(0)).data); // Get pixel coordinates int x, y = 0, z = 0 ; x = SARGV(1, 0); if (ARG(1)->getType()->isVectorTy()) { y = SARGV(1, 1); if (ARG(1)->getType()->getVectorNumElements() > 2) { z = SARGV(1, 2); } } // Get color data int32_t values[4] = { (int32_t)SARGV(2, 0), (int32_t)SARGV(2, 1), (int32_t)SARGV(2, 2), (int32_t)SARGV(2, 3), }; // Re-order color values switch (image->format.image_channel_order) { case CL_R: case CL_Rx: case CL_RG: case CL_RGx: case CL_RGB: case CL_RGBx: case CL_RGBA: case CL_INTENSITY: case CL_LUMINANCE: break; case CL_A: values[0] = values[3]; break; case CL_RA: values[1] = values[3]; break; case CL_ARGB: swap(values[2], values[3]); swap(values[1], values[2]); swap(values[0], values[1]); break; case CL_BGRA: swap(values[0], values[2]); break; default: FATAL_ERROR("Unsupported image channel order: %X", image->format.image_channel_order); } size_t channelSize = getChannelSize(image->format); size_t numChannels = getNumChannels(image->format); size_t pixelSize = channelSize*numChannels; size_t pixelAddress = image->address + (x + (y + z*image->desc.image_height) * image->desc.image_width) * pixelSize; // Generate channel values Memory *memory = workItem->getMemory(AddrSpaceGlobal); unsigned char *data = workItem->m_pool.alloc(channelSize*numChannels); for (unsigned i = 0; i < numChannels; i++) { switch (image->format.image_channel_data_type) { case CL_SIGNED_INT8: ((int8_t*)data)[i] = _clamp_(values[i], -128, 127); break; case CL_SIGNED_INT16: ((int16_t*)data)[i] = _clamp_(values[i], -32768, 32767); break; case CL_SIGNED_INT32: ((int32_t*)data)[i] = values[i]; break; default: FATAL_ERROR("Unsupported image channel data type: %X", image->format.image_channel_data_type); } } // Write pixel data memory->store(data, pixelAddress, channelSize*numChannels); } DEFINE_BUILTIN(write_imageui) { Image *image = *(Image**)(workItem->getValue(ARG(0)).data); // Get pixel coordinates int x, y = 0, z = 0 ; x = SARGV(1, 0); if (ARG(1)->getType()->isVectorTy()) { y = SARGV(1, 1); if (ARG(1)->getType()->getVectorNumElements() > 2) { z = SARGV(1, 2); } } // Get color data uint32_t values[4] = { (uint32_t)SARGV(2, 0), (uint32_t)SARGV(2, 1), (uint32_t)SARGV(2, 2), (uint32_t)SARGV(2, 3), }; // Re-order color values switch (image->format.image_channel_order) { case CL_R: case CL_Rx: case CL_RG: case CL_RGx: case CL_RGB: case CL_RGBx: case CL_RGBA: case CL_INTENSITY: case CL_LUMINANCE: break; case CL_A: values[0] = values[3]; break; case CL_RA: values[1] = values[3]; break; case CL_ARGB: swap(values[2], values[3]); swap(values[1], values[2]); swap(values[0], values[1]); break; case CL_BGRA: swap(values[0], values[2]); break; default: FATAL_ERROR("Unsupported image channel order: %X", image->format.image_channel_order); } size_t channelSize = getChannelSize(image->format); size_t numChannels = getNumChannels(image->format); size_t pixelSize = channelSize*numChannels; size_t pixelAddress = image->address + (x + (y + z*image->desc.image_height) * image->desc.image_width) * pixelSize; // Generate channel values Memory *memory = workItem->getMemory(AddrSpaceGlobal); unsigned char *data = workItem->m_pool.alloc(channelSize*numChannels); for (unsigned i = 0; i < numChannels; i++) { switch (image->format.image_channel_data_type) { case CL_UNSIGNED_INT8: ((uint8_t*)data)[i] = _min_(values[i], UINT8_MAX); break; case CL_UNSIGNED_INT16: ((uint16_t*)data)[i] = _min_(values[i], UINT16_MAX); break; case CL_UNSIGNED_INT32: ((uint32_t*)data)[i] = values[i]; break; default: FATAL_ERROR("Unsupported image channel data type: %X", image->format.image_channel_data_type); } } // Write pixel data memory->store(data, pixelAddress, channelSize*numChannels); } /////////////////////// // Integer Functions // /////////////////////// DEFINE_BUILTIN(abs_builtin) { for (unsigned i = 0; i < result.num; i++) { switch (getOverloadArgType(overload)) { case 'h': case 't': case 'j': case 'm': result.setUInt(UARGV(0,i), i); break; case 'c': case 's': case 'i': case 'l': result.setSInt(abs(SARGV(0,i)), i); break; default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } } DEFINE_BUILTIN(abs_diff) { for (unsigned i = 0; i < result.num; i++) { switch (getOverloadArgType(overload)) { case 'h': case 't': case 'j': case 'm': { uint64_t a = UARGV(0, i); uint64_t b = UARGV(1, i); result.setUInt(_max_(a,b) - _min_(a,b), i); break; } case 'c': case 's': case 'i': case 'l': { int64_t a = SARGV(0, i); int64_t b = SARGV(1, i); result.setSInt(_max_(a,b) - _min_(a,b), i); break; } default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } } DEFINE_BUILTIN(add_sat) { for (unsigned i = 0; i < result.num; i++) { uint64_t uresult = UARGV(0,i) + UARGV(1,i); int64_t sresult = SARGV(0,i) + SARGV(1,i); switch (getOverloadArgType(overload)) { case 'h': uresult = _min_(uresult, UINT8_MAX); result.setUInt(uresult, i); break; case 't': uresult = _min_(uresult, UINT16_MAX); result.setUInt(uresult, i); break; case 'j': uresult = _min_(uresult, UINT32_MAX); result.setUInt(uresult, i); break; case 'm': uresult = (UARGV(1, i) > uresult) ? UINT64_MAX : uresult; result.setUInt(uresult, i); break; case 'c': sresult = _clamp_(sresult, INT8_MIN, INT8_MAX); result.setSInt(sresult, i); break; case 's': sresult = _clamp_(sresult, INT16_MIN, INT16_MAX); result.setSInt(sresult, i); break; case 'i': sresult = _clamp_(sresult, INT32_MIN, INT32_MAX); result.setSInt(sresult, i); break; case 'l': if ((SARGV(0,i)>0) == (SARGV(1,i)>0) && (SARGV(0,i)>0) != (sresult>0)) { sresult = (SARGV(0,i)>0) ? INT64_MAX : INT64_MIN; } result.setSInt(sresult, i); break; default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } } DEFINE_BUILTIN(clz) { for (unsigned i = 0; i < result.num; i++) { uint64_t x = UARGV(0, i); int nz = 0; while (x) { x >>= 1; nz++; } uint64_t r = ((result.size<<3) - nz); result.setUInt(r, i); } } DEFINE_BUILTIN(hadd) { for (unsigned i = 0; i < result.num; i++) { switch (getOverloadArgType(overload)) { case 'h': case 't': case 'j': case 'm': { uint64_t a = UARGV(0, i); uint64_t b = UARGV(1, i); uint64_t c = (a > UINT64_MAX-b) ? (1L<<63) : 0; result.setUInt(((a + b) >> 1) | c, i); break; } case 'c': case 's': case 'i': case 'l': { int64_t a = SARGV(0, i); int64_t b = SARGV(1, i); int64_t c = (a & b) & 1; result.setSInt((a>>1) + (b>>1) + c, i); break; } default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } } static uint64_t _mad_(uint64_t a, uint64_t b, uint64_t c) { return a*b + c; } static uint64_t _umul_hi_(uint64_t x, uint64_t y, uint64_t bits) { if (bits == 64) { uint64_t xl = x & UINT32_MAX; uint64_t xh = x >> 32; uint64_t yl = y & UINT32_MAX; uint64_t yh = y >> 32; uint64_t xlyl = xl*yl; uint64_t xlyh = xl*yh; uint64_t xhyl = xh*yl; uint64_t xhyh = xh*yh; uint64_t a = xhyl + ((xlyl)>>32); uint64_t al = a & UINT32_MAX; uint64_t ah = a >> 32; uint64_t b = ((al + xlyh)>>32) + ah; return xhyh + b; } else { return (x*y) >> bits; } } static int64_t _smul_hi_(int64_t x, int64_t y, int64_t bits) { if (bits == 64) { int64_t xl = x & UINT32_MAX; int64_t xh = x >> 32; int64_t yl = y & UINT32_MAX; int64_t yh = y >> 32; int64_t xlyl = xl*yl; int64_t xlyh = xl*yh; int64_t xhyl = xh*yl; int64_t xhyh = xh*yh; int64_t a = xhyl + ((xlyl>>32) & UINT32_MAX); int64_t al = a & UINT32_MAX; int64_t ah = a >> 32; int64_t b = ((al + xlyh)>>32) + ah; return xhyh + b; } else { return (x*y) >> bits; } } DEFINE_BUILTIN(mad_hi) { for (unsigned i = 0; i < result.num; i++) { switch (getOverloadArgType(overload)) { case 'h': case 't': case 'j': case 'm': { uint64_t r = _umul_hi_(UARGV(0, i), UARGV(1, i), result.size<<3) + UARGV(2, i); result.setUInt(r, i); break; } case 'c': case 's': case 'i': case 'l': { int64_t r = _smul_hi_(SARGV(0, i), SARGV(1, i), result.size<<3) + SARGV(2, i); result.setSInt(r, i); break; } default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } } DEFINE_BUILTIN(mad_sat) { for (unsigned i = 0; i < result.num; i++) { uint64_t uresult = UARGV(0,i)*UARGV(1,i) + UARGV(2,i); int64_t sresult = SARGV(0,i)*SARGV(1,i) + SARGV(2,i); switch (getOverloadArgType(overload)) { case 'h': uresult = _min_(uresult, UINT8_MAX); result.setUInt(uresult, i); break; case 't': uresult = _min_(uresult, UINT16_MAX); result.setUInt(uresult, i); break; case 'j': uresult = _min_(uresult, UINT32_MAX); result.setUInt(uresult, i); break; case 'm': { uint64_t hi = _umul_hi_(UARGV(0, i), UARGV(1, i), 64); if (hi || UARGV(2, i) > uresult) { uresult = UINT64_MAX; } result.setUInt(uresult, i); break; } case 'c': sresult = _clamp_(sresult, INT8_MIN, INT8_MAX); result.setSInt(sresult, i); break; case 's': sresult = _clamp_(sresult, INT16_MIN, INT16_MAX); result.setSInt(sresult, i); break; case 'i': sresult = _clamp_(sresult, INT32_MIN, INT32_MAX); result.setSInt(sresult, i); break; case 'l': // Check for overflow in multiplication if (_smul_hi_(SARGV(0, i), SARGV(1, i), 64)) { sresult = (SARGV(0,i)>0) ^ (SARGV(1,i)>0) ? INT64_MIN : INT64_MAX; } else { // Check for overflow in addition int64_t m = SARGV(0, i) * SARGV(1, i); if ((m>0) == (SARGV(2,i)>0) && (m>0) != (sresult>0)) { sresult = (m>0) ? INT64_MAX : INT64_MIN; } } result.setSInt(sresult, i); break; default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } } static uint64_t _mul_(uint64_t a, uint64_t b) { return a*b; } DEFINE_BUILTIN(mul_hi) { for (unsigned i = 0; i < result.num; i++) { switch (getOverloadArgType(overload)) { case 'h': case 't': case 'j': case 'm': { uint64_t r = _umul_hi_(UARGV(0, i), UARGV(1, i), result.size<<3); result.setUInt(r, i); break; } case 'c': case 's': case 'i': case 'l': { int64_t r = _smul_hi_(SARGV(0, i), SARGV(1, i), result.size<<3); result.setSInt(r, i); break; } default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } } static uint64_t _popcount_(uint64_t x) { int i = 0; while (x) { i += (x & 0x1); x >>= 1; } return i; } DEFINE_BUILTIN(rhadd) { for (unsigned i = 0; i < result.num; i++) { switch (getOverloadArgType(overload)) { case 'h': case 't': case 'j': case 'm': { uint64_t a = UARGV(0, i); uint64_t b = UARGV(1, i); uint64_t c = (a > UINT64_MAX-(b+1)) ? (1L<<63) : 0; result.setUInt(((a + b + 1) >> 1) | c, i); break; } case 'c': case 's': case 'i': case 'l': { int64_t a = SARGV(0, i); int64_t b = SARGV(1, i); int64_t c = (a | b) & 1; result.setSInt((a>>1) + (b>>1) + c, i); break; } default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } } DEFINE_BUILTIN(rotate) { for (unsigned i = 0; i < result.num; i++) { uint64_t width = (result.size << 3); uint64_t v = UARGV(0, i); uint64_t ls = UARGV(1, i) % width; uint64_t rs = width - ls; result.setUInt((v << ls) | (v >> rs), i); } } DEFINE_BUILTIN(sub_sat) { for (unsigned i = 0; i < result.num; i++) { uint64_t uresult = UARGV(0,i) - UARGV(1,i); int64_t sresult = SARGV(0,i) - SARGV(1,i); switch (getOverloadArgType(overload)) { case 'h': uresult = uresult > UINT8_MAX ? 0 : uresult; result.setUInt(uresult, i); break; case 't': uresult = uresult > UINT16_MAX ? 0 : uresult; result.setUInt(uresult, i); break; case 'j': uresult = uresult > UINT32_MAX ? 0 : uresult; result.setUInt(uresult, i); break; case 'm': uresult = (UARGV(1, i) > UARGV(0, i)) ? 0 : uresult; result.setUInt(uresult, i); break; case 'c': sresult = _clamp_(sresult, INT8_MIN, INT8_MAX); result.setSInt(sresult, i); break; case 's': sresult = _clamp_(sresult, INT16_MIN, INT16_MAX); result.setSInt(sresult, i); break; case 'i': sresult = _clamp_(sresult, INT32_MIN, INT32_MAX); result.setSInt(sresult, i); break; case 'l': if ((SARGV(0,i)>0) != (SARGV(1,i)>0) && (SARGV(0,i)>0) != (sresult>0)) { sresult = (SARGV(0,i)>0) ? INT64_MAX : INT64_MIN; } result.setSInt(sresult, i); break; default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } } DEFINE_BUILTIN(upsample) { for (unsigned i = 0; i < result.num; i++) { uint64_t r = (UARGV(0,i)<<(result.size<<2)) | UARGV(1, i); result.setUInt(r, i); } } //////////////////// // Math Functions // //////////////////// static double _acospi_(double x){ return (acos(x) / M_PI); } static double _asinpi_(double x){ return (asin(x) / M_PI); } static double _atanpi_(double x){ return (atan(x) / M_PI); } static double _atan2pi_(double x, double y){ return (atan2(x, y) / M_PI); } static double _cospi_(double x){ return (cos(x * M_PI)); } static double _exp10_(double x){ return pow(10, x); } static double _fdivide_(double x, double y){ return x/y; } static double _frecip_(double x){ return 1.0/x; } static double _rsqrt_(double x){ return 1.0 / sqrt(x); } static double _sinpi_(double x){ return (sin(x * M_PI)); } static double _tanpi_(double x){ return (tan(x * M_PI)); } static double _fma_(double a, double b, double c) { return a*b + c; } static double _maxmag_(double x, double y) { double _x = fabs(x); double _y = fabs(y); if (_x > _y) { return x; } else if (_y > _x) { return y; } else { return fmax(x, y); } } static double _minmag_(double x, double y) { double _x = fabs(x); double _y = fabs(y); if (_x < _y) { return x; } else if (_y < _x) { return y; } else { return fmin(x, y); } } DEFINE_BUILTIN(fract) { Memory *memory = workItem->getMemory(ARG(1)->getType()->getPointerAddressSpace()); size_t iptr = PARG(1); for (unsigned i = 0; i < result.num; i++) { double x = FARGV(0, i); double fl = floor(x); #if defined(_WIN32) && !defined(__MINGW32__) double r = fmin(x - fl, nextafter(1, 0)); #else double r = fmin(x - fl, 0x1.fffffep-1f); #endif size_t offset = i*result.size; result.setFloat(fl, i); memory->store(result.data + offset, iptr + offset, result.size); result.setFloat(r, i); } } DEFINE_BUILTIN(frexp_builtin) { Memory *memory = workItem->getMemory(ARG(1)->getType()->getPointerAddressSpace()); size_t iptr = PARG(1); for (unsigned i = 0; i < result.num; i++) { int32_t e; double r = frexp(FARGV(0, i), &e); memory->store((const unsigned char*)&e, iptr + i*4, 4); result.setFloat(r, i); } } DEFINE_BUILTIN(ilogb_builtin) { for (unsigned i = 0; i < result.num; i++) { result.setSInt(ilogb(FARGV(0, i)), i); } } DEFINE_BUILTIN(ldexp_builtin) { for (unsigned i = 0; i < result.num; i++) { result.setFloat(ldexp(FARGV(0, i), SARGV(1, i)), i); } } DEFINE_BUILTIN(lgamma_r) { Memory *memory = workItem->getMemory(ARG(1)->getType()->getPointerAddressSpace()); size_t signp = PARG(1); for (unsigned i = 0; i < result.num; i++) { double r = lgamma(FARGV(0, i)); int32_t s = (tgamma(FARGV(0, i)) < 0 ? -1 : 1); memory->store((const unsigned char*)&s, signp + i*4, 4); result.setFloat(r, i); } } DEFINE_BUILTIN(modf_builtin) { Memory *memory = workItem->getMemory(ARG(1)->getType()->getPointerAddressSpace()); size_t iptr = PARG(1); for (unsigned i = 0; i < result.num; i++) { double x = FARGV(0, i); double integral = trunc(x); double fractional = copysign(::isinf(x) ? 0.0 : x - integral, x); size_t offset = i*result.size; result.setFloat(integral, i); memory->store(result.data + offset, iptr + offset, result.size); result.setFloat(fractional, i); } } DEFINE_BUILTIN(nan_builtin) { for (unsigned i = 0; i < result.num; i++) { result.setFloat(nan(""), i); } } DEFINE_BUILTIN(nextafter_builtin) { for (unsigned i = 0; i < result.num; i++) { if (result.size == 4) result.setFloat(nextafterf(FARGV(0, i), FARGV(1, i)), i); else result.setFloat(nextafter(FARGV(0, i), FARGV(1, i)), i); } } DEFINE_BUILTIN(pown) { for (unsigned i = 0; i < result.num; i++) { double x = FARGV(0, i); int32_t y = SARGV(1, i); result.setFloat(pow(x, y), i); } } DEFINE_BUILTIN(remquo_builtin) { Memory *memory = workItem->getMemory(ARG(2)->getType()->getPointerAddressSpace()); size_t quop = PARG(2); for (unsigned i = 0; i < result.num; i++) { double x = FARGV(0, i); double y = FARGV(1, i); int32_t quo; double rem = remquo(x, y, &quo); memory->store((const unsigned char*)&quo, quop + i*4, 4); result.setFloat(rem, i); } } DEFINE_BUILTIN(rootn) { for (unsigned i = 0; i < result.num; i++) { double x = FARGV(0, i); int y = SARGV(1, i); result.setFloat(pow(x, (double)(1.0/y)), i); } } DEFINE_BUILTIN(sincos) { Memory *memory = workItem->getMemory(ARG(1)->getType()->getPointerAddressSpace()); size_t cv = PARG(1); for (unsigned i = 0; i < result.num; i++) { double x = FARGV(0, i); size_t offset = i*result.size; result.setFloat(cos(x), i); memory->store(result.data + offset, cv + offset, result.size); result.setFloat(sin(x), i); } } //////////////////////////// // Misc. Vector Functions // //////////////////////////// DEFINE_BUILTIN(shuffle_builtin) { for (unsigned i = 0; i < result.num; i++) { result.setUInt(UARGV(0, UARGV(1, i)), i); } } DEFINE_BUILTIN(shuffle2_builtin) { for (unsigned i = 0; i < result.num; i++) { uint64_t m = 1; if (ARG(0)->getType()->isVectorTy()) { m = ARG(0)->getType()->getVectorNumElements(); } uint64_t src = 0; uint64_t index = UARGV(2, i); if (index >= m) { index -= m; src = 1; } result.setUInt(UARGV(src, index), i); } } ////////////////////////// // Relational Functions // ////////////////////////// static int64_t _iseq_(double x, double y){ return x == y; } static int64_t _isneq_(double x, double y){ return x != y; } static int64_t _isgt_(double x, double y){ return isgreater(x, y); } static int64_t _isge_(double x, double y){ return isgreaterequal(x, y); } static int64_t _islt_(double x, double y){ return isless(x, y); } static int64_t _isle_(double x, double y){ return islessequal(x, y); } static int64_t _islg_(double x, double y){ return islessgreater(x, y); } static int64_t _isfin_(double x){ return isfinite(x); } static int64_t _isinf_(double x){ return ::isinf(x); } static int64_t _isnan_(double x){ return ::isnan(x); } static int64_t _isnorm_(double x){ return isnormal(x); } static int64_t _isord_(double x, double y){ return !isunordered(x, y); } static int64_t _isuord_(double x, double y){ return isunordered(x, y); } static int64_t _signbit_(double x){ return signbit(x); } DEFINE_BUILTIN(all) { unsigned num = 1; if (ARG(0)->getType()->isVectorTy()) { num = ARG(0)->getType()->getVectorNumElements(); } for (unsigned i = 0; i < num; i++) { if (!(SARGV(0, i) & INT64_MIN)) { result.setSInt(0); return; } } result.setSInt(1); } DEFINE_BUILTIN(any) { unsigned num = 1; if (ARG(0)->getType()->isVectorTy()) { num = ARG(0)->getType()->getVectorNumElements(); } for (unsigned i = 0; i < num; i++) { if (SARGV(0, i) & INT64_MIN) { result.setSInt(1); return; } } result.setSInt(0); } static uint64_t _ibitselect_(uint64_t a, uint64_t b, uint64_t c) { return ((a & ~c) | (b & c)); } static double _fbitselect_(double a, double b, double c) { uint64_t _a = *(uint64_t*)&a; uint64_t _b = *(uint64_t*)&b; uint64_t _c = *(uint64_t*)&c; uint64_t _r = _ibitselect_(_a, _b, _c); return *(double*)&_r; } DEFINE_BUILTIN(bitselect) { switch (getOverloadArgType(overload)) { case 'f': case 'd': f3arg(workItem, callInst, fnName, overload, result, _fbitselect_); break; case 'h': case 't': case 'j': case 'm': case 'c': case 's': case 'i': case 'l': u3arg(workItem, callInst, fnName, overload, result, _ibitselect_); break; default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } DEFINE_BUILTIN(select_builtin) { char type = getOverloadArgType(overload); for (unsigned i = 0; i < result.num; i++) { int64_t c = SARGV(2, i); bool _c = (result.num > 1) ? c & INT64_MIN : c; switch (type) { case 'f': case 'd': result.setFloat(_c ? FARGV(1, i) : FARGV(0, i), i); break; case 'h': case 't': case 'j': case 'm': case 'c': case 's': case 'i': case 'l': result.setSInt(_c ? SARGV(1, i) : SARGV(0, i), i); break; default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } } /////////////////////////////// // Synchronization Functions // /////////////////////////////// DEFINE_BUILTIN(barrier) { workItem->m_state = WorkItem::BARRIER; workItem->m_workGroup->notifyBarrier(workItem, callInst, UARG(0)); } DEFINE_BUILTIN(mem_fence) { // TODO: Implement? } ////////////////////////////////////////// // Vector Data Load and Store Functions // ////////////////////////////////////////// DEFINE_BUILTIN(vload) { size_t base = PARG(1); unsigned int addressSpace = ARG(1)->getType()->getPointerAddressSpace(); uint64_t offset = UARG(0); size_t address = base + offset*result.size*result.num; size_t size = result.size*result.num; workItem->getMemory(addressSpace)->load(result.data, address, size); } DEFINE_BUILTIN(vstore) { const llvm::Value *value = ARG(0); unsigned size = getTypeSize(value->getType()); if (isVector3(value)) { // 3-element vectors are same size as 4-element vectors, // but vstore address offset shouldn't use this. size = (size/4) * 3; } size_t base = PARG(2); unsigned int addressSpace = ARG(2)->getType()->getPointerAddressSpace(); uint64_t offset = UARG(1); size_t address = base + offset*size; unsigned char *data = workItem->getOperand(value).data; workItem->getMemory(addressSpace)->store(data, address, size); } DEFINE_BUILTIN(vload_half) { size_t base = PARG(1); unsigned int addressSpace = ARG(1)->getType()->getPointerAddressSpace(); uint64_t offset = UARG(0); size_t address; if (fnName.compare(0, 6, "vloada") == 0 && result.num == 3) { address = base + offset*sizeof(cl_half)*4; } else { address = base + offset*sizeof(cl_half)*result.num; } size_t size = sizeof(cl_half)*result.num; uint16_t *halfData = (uint16_t*)workItem->m_pool.alloc(2*result.num); workItem->getMemory(addressSpace)->load((unsigned char*)halfData, address, size); // Convert to floats for (unsigned i = 0; i < result.num; i++) { ((float*)result.data)[i] = halfToFloat(halfData[i]); } } DEFINE_BUILTIN(vstore_half) { const llvm::Value *value = ARG(0); unsigned size = getTypeSize(value->getType()); if (isVector3(value)) { // 3-element vectors are same size as 4-element vectors, // but vstore address offset shouldn't use this. size = (size/4) * 3; } size_t base = PARG(2); unsigned int addressSpace = ARG(2)->getType()->getPointerAddressSpace(); uint64_t offset = UARG(1); // Convert to halfs unsigned char *data = workItem->getOperand(value).data; size_t num = size / sizeof(float); size = num*sizeof(cl_half); uint16_t *halfData = (uint16_t*)workItem->m_pool.alloc(2*num); HalfRoundMode rmode = Half_RTE; // The Oclgrind device's round mode if (fnName.find("_rtz") != std::string::npos) rmode = Half_RTZ; else if (fnName.find("_rtn") != std::string::npos) rmode = Half_RTN; else if (fnName.find("_rtp") != std::string::npos) rmode = Half_RTP; for (unsigned i = 0; i < num; i++) { halfData[i] = floatToHalf(((float*)data)[i], rmode); } size_t address; if (fnName.compare(0, 7, "vstorea") == 0 && num == 3) { address = base + offset*sizeof(cl_half)*4; } else { address = base + offset*sizeof(cl_half)*num; } workItem->getMemory(addressSpace)->store((unsigned char*)halfData, address, size); } ///////////////////////// // Work-Item Functions // ///////////////////////// DEFINE_BUILTIN(get_global_id) { uint64_t dim = UARG(0); size_t r = dim < 3 ? workItem->m_globalID[dim] : 0; result.setUInt(r); } DEFINE_BUILTIN(get_global_size) { uint64_t dim = UARG(0); size_t r = dim < 3 ? workItem->m_kernelInvocation->getGlobalSize()[dim] : 0; result.setUInt(r); } DEFINE_BUILTIN(get_global_offset) { uint64_t dim = UARG(0); size_t r = dim < 3 ? workItem->m_kernelInvocation->getGlobalOffset()[dim] : 0; result.setUInt(r); } DEFINE_BUILTIN(get_group_id) { uint64_t dim = UARG(0); size_t r = dim < 3 ? workItem->m_workGroup->getGroupID()[dim] : 0; result.setUInt(r); } DEFINE_BUILTIN(get_local_id) { uint64_t dim = UARG(0); size_t r = dim < 3 ? workItem->m_localID[dim] : 0; result.setUInt(r); } DEFINE_BUILTIN(get_local_size) { uint64_t dim = UARG(0); size_t r = dim < 3 ? workItem->m_workGroup->getGroupSize()[dim] : 0; result.setUInt(r); } DEFINE_BUILTIN(get_num_groups) { uint64_t dim = UARG(0); size_t r = 0; if (dim < 3) { r = workItem->m_kernelInvocation->getNumGroups()[dim]; } result.setUInt(r); } DEFINE_BUILTIN(get_work_dim) { result.setUInt(workItem->m_kernelInvocation->getWorkDim()); } ///////////////////// // Other Functions // ///////////////////// DEFINE_BUILTIN(convert_float) { for (unsigned i = 0; i < result.num; i++) { switch (getOverloadArgType(overload)) { case 'h': case 't': case 'j': case 'm': result.setFloat((float)UARGV(0, i), i); break; case 'c': case 's': case 'i': case 'l': result.setFloat((float)SARGV(0, i), i); break; case 'f': case 'd': result.setFloat(FARGV(0, i), i); break; default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } } } DEFINE_BUILTIN(convert_half) { float f; HalfRoundMode rmode = Half_RTE; if (fnName.find("_rtz") != std::string::npos) rmode = Half_RTZ; else if (fnName.find("_rtn") != std::string::npos) rmode = Half_RTN; else if (fnName.find("_rtp") != std::string::npos) rmode = Half_RTP; const char srcType = getOverloadArgType(overload); for (unsigned i = 0; i < result.num; i++) { switch (srcType) { case 'h': case 't': case 'j': case 'm': f = (float)UARGV(0, i); break; case 'c': case 's': case 'i': case 'l': f = (float)SARGV(0, i); break; case 'd': case 'f': f = FARGV(0, i); default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } result.setUInt(floatToHalf(f, rmode), i); } } static void setConvertRoundingMode(const string& name) { size_t rpos = name.find("_rt"); if (rpos != string::npos) { switch (name[rpos+3]) { case 'e': fesetround(FE_TONEAREST); break; case 'z': fesetround(FE_TOWARDZERO); break; case 'p': fesetround(FE_UPWARD); break; case 'n': fesetround(FE_DOWNWARD); break; default: FATAL_ERROR("Unsupported rounding mode: %c", name[rpos=3]); } } else { fesetround(FE_TOWARDZERO); } } DEFINE_BUILTIN(convert_uint) { // Check for saturation modifier bool sat = fnName.find("_sat") != string::npos; uint64_t max = (1UL<<(result.size*8)) - 1; // Use rounding mode const int origRnd = fegetround(); setConvertRoundingMode(fnName); for (unsigned i = 0; i < result.num; i++) { uint64_t r; switch (getOverloadArgType(overload)) { case 'h': case 't': case 'j': case 'm': r = UARGV(0, i); if (sat) { r = _min_(r, max); } break; case 'c': case 's': case 'i': case 'l': { int64_t si = SARGV(0, i); r = si; if (sat) { if (si < 0) { r = 0; } else if (si > max) { r = max; } } break; } case 'f': case 'd': if (sat) { r = rint(_clamp_(FARGV(0, i), 0.0, (double)max)); } else { r = rint(FARGV(0, i)); } break; default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } result.setUInt(r, i); } fesetround(origRnd); } DEFINE_BUILTIN(convert_sint) { // Check for saturation modifier bool sat = fnName.find("_sat") != string::npos; int64_t min, max; switch (result.size) { case 1: min = INT8_MIN; max = INT8_MAX; break; case 2: min = INT16_MIN; max = INT16_MAX; break; case 4: min = INT32_MIN; max = INT32_MAX; break; case 8: min = INT64_MIN; max = INT64_MAX; break; } // Use rounding mode const int origRnd = fegetround(); setConvertRoundingMode(fnName); for (unsigned i = 0; i < result.num; i++) { int64_t r; switch (getOverloadArgType(overload)) { case 'h': case 't': case 'j': case 'm': r = UARGV(0, i); if (sat) { r = _min_((uint64_t)r, (uint64_t)max); } break; case 'c': case 's': case 'i': case 'l': r = SARGV(0, i); if (sat) { r = _clamp_(r, min, max); } break; case 'f': case 'd': if (sat) { r = rint(_clamp_(FARGV(0, i), (double)min, (double)max)); } else { r = rint(FARGV(0, i)); } break; default: FATAL_ERROR("Unsupported argument type: %c", getOverloadArgType(overload)); } result.setSInt(r, i); } fesetround(origRnd); } DEFINE_BUILTIN(printf_builtin) { lock_guard lck(printfMutex); size_t formatPtr = workItem->getOperand(ARG(0)).getPointer(); Memory *memory = workItem->getMemory(AddrSpaceGlobal); int arg = 1; while (true) { char c; memory->load((unsigned char*)&c, formatPtr++); if (c == '\0') { break; } if (c == '%') { unsigned vectorWidth = 1; string format = "%"; while (true) { memory->load((unsigned char*)&c, formatPtr++); if (c == '\0') { cout << format; break; } if (c == 'v') { // Load vector width specifier memory->load((unsigned char*)&c, formatPtr++); vectorWidth = c - '0'; if (vectorWidth == 1) { // Assume this is 16 vectorWidth = 16; formatPtr++; } continue; } // Ignore all 'h' specifiers if (c == 'h') continue; format += c; bool done = false; switch (c) { case 'c': case 'd': case 'i': for (unsigned i = 0; i < vectorWidth; i++) { if (i > 0) printf(","); printf(format.c_str(), SARGV(arg, i)); } arg++; done = true; break; case 'o': case 'u': case 'x': case 'X': case 'p': for (unsigned i = 0; i < vectorWidth; i++) { if (i > 0) printf(","); printf(format.c_str(), UARGV(arg, i)); } arg++; done = true; break; case 'f': case 'F': case 'e': case 'E': case 'g': case 'G': case 'a': case 'A': for (unsigned i = 0; i < vectorWidth; i++) { if (i > 0) printf(","); printf(format.c_str(), FARGV(arg, i)); } arg++; done = true; break; case 's': { size_t ptr = UARG(arg++); if (!ptr) { // Special case for printing NULL pointer printf(format.c_str(), NULL); } else { // Load string from memory char c; string str = ""; while (true) { if (!memory->load((unsigned char*)&c, ptr++)) break; if (c == '\0') break; str += c; } printf(format.c_str(), str.c_str()); } done = true; break; } case '%': printf("%%"); done = true; break; } if (done) { break; } } if (c == '\0') { break; } } else { cout << c; } } } ///////////////////// // LLVM Intrinsics // ///////////////////// DEFINE_BUILTIN(llvm_dbg_declare) { const llvm::DbgDeclareInst *dbgInst = (const llvm::DbgDeclareInst*)callInst; const llvm::Value *addr = dbgInst->getAddress(); #if LLVM_VERSION > 36 const llvm::DILocalVariable *var = dbgInst->getVariable(); workItem->m_variables[var->getName()] = addr; #else const llvm::MDNode *var = dbgInst->getVariable(); llvm::MDString *str = llvm::dyn_cast(var->getOperand(0)); if (str) { // TODO: There must be a better way of getting the variable name... unsigned length = str->getLength(); const char *name = str->getString().str().c_str(); if (length > strlen(name) + 1) { name += strlen(name) + 1; workItem->m_variables[name] = addr; } } #endif } DEFINE_BUILTIN(llvm_dbg_value) { const llvm::DbgValueInst *dbgInst = (const llvm::DbgValueInst*)callInst; const llvm::Value *value = dbgInst->getValue(); // TODO: Use offset? //uint64_t offset = dbgInst->getOffset(); #if LLVM_VERSION > 36 const llvm::DILocalVariable *var = dbgInst->getVariable(); workItem->m_variables[var->getName()] = value; #else const llvm::MDNode *var = dbgInst->getVariable(); llvm::MDString *str = llvm::dyn_cast(var->getOperand(0)); if (str) { // TODO: There must be a better way of getting the variable name... unsigned length = str->getLength(); const char *name = str->getString().str().c_str(); if (length > strlen(name) + 1) { name += strlen(name) + 1; workItem->m_variables[name] = value; } } #endif } DEFINE_BUILTIN(llvm_lifetime_start) { // TODO: Implement? } DEFINE_BUILTIN(llvm_lifetime_end) { // TODO: Implement? } DEFINE_BUILTIN(llvm_memcpy) { const llvm::MemCpyInst *memcpyInst = (const llvm::MemCpyInst*)callInst; size_t dest = workItem->getOperand(memcpyInst->getDest()).getPointer(); size_t src = workItem->getOperand(memcpyInst->getSource()).getPointer(); size_t size = workItem->getOperand(memcpyInst->getLength()).getUInt(); unsigned destAddrSpace = memcpyInst->getDestAddressSpace(); unsigned srcAddrSpace = memcpyInst->getSourceAddressSpace(); unsigned char *buffer = workItem->m_pool.alloc(size); workItem->getMemory(srcAddrSpace)->load(buffer, src, size); workItem->getMemory(destAddrSpace)->store(buffer, dest, size); } DEFINE_BUILTIN(llvm_memset) { const llvm::MemSetInst *memsetInst = (const llvm::MemSetInst*)callInst; size_t dest = workItem->getOperand(memsetInst->getDest()).getPointer(); size_t size = workItem->getOperand(memsetInst->getLength()).getUInt(); unsigned addressSpace = memsetInst->getDestAddressSpace(); unsigned char *buffer = workItem->m_pool.alloc(size); unsigned char value = UARG(1); memset(buffer, value, size); workItem->getMemory(addressSpace)->store(buffer, dest, size); } DEFINE_BUILTIN(llvm_trap) { FATAL_ERROR("Encountered trap instruction"); } public: static BuiltinFunctionMap initBuiltins(); }; // Utility macros for generating builtin function map #define CAST \ void(*)(WorkItem*, const llvm::CallInst*, \ const std::string&, const std::string&, TypedValue& result, void*) #define F1ARG(name) (double(*)(double))name #define F2ARG(name) (double(*)(double,double))name #define F3ARG(name) (double(*)(double,double,double))name #define ADD_BUILTIN(name, func, op) \ builtins[name] = BuiltinFunction((CAST)func, (void*)op); #define ADD_PREFIX_BUILTIN(name, func, op) \ workItemPrefixBuiltins.push_back( \ make_pair(name, BuiltinFunction((CAST)func, (void*)op))); // Generate builtin function map BuiltinFunctionPrefixList workItemPrefixBuiltins; BuiltinFunctionMap workItemBuiltins = WorkItemBuiltins::initBuiltins(); BuiltinFunctionMap WorkItemBuiltins::initBuiltins() { BuiltinFunctionMap builtins; // Async Copy and Prefetch Functions ADD_BUILTIN("async_work_group_copy", async_work_group_copy, NULL); ADD_BUILTIN("async_work_group_strided_copy", async_work_group_copy, NULL); ADD_BUILTIN("wait_group_events", wait_group_events, NULL); ADD_BUILTIN("prefetch", prefetch, NULL); // Atomic Functions ADD_BUILTIN("atom_add", atomic_add, NULL); ADD_BUILTIN("atomic_add", atomic_add, NULL); ADD_BUILTIN("atom_and", atomic_and, NULL); ADD_BUILTIN("atomic_and", atomic_and, NULL); ADD_BUILTIN("atom_cmpxchg", atomic_cmpxchg, NULL); ADD_BUILTIN("atomic_cmpxchg", atomic_cmpxchg, NULL); ADD_BUILTIN("atom_dec", atomic_dec, NULL); ADD_BUILTIN("atomic_dec", atomic_dec, NULL); ADD_BUILTIN("atom_inc", atomic_inc, NULL); ADD_BUILTIN("atomic_inc", atomic_inc, NULL); ADD_BUILTIN("atom_max", atomic_max, NULL); ADD_BUILTIN("atomic_max", atomic_max, NULL); ADD_BUILTIN("atom_min", atomic_min, NULL); ADD_BUILTIN("atomic_min", atomic_min, NULL); ADD_BUILTIN("atom_or", atomic_or, NULL); ADD_BUILTIN("atomic_or", atomic_or, NULL); ADD_BUILTIN("atom_sub", atomic_sub, NULL); ADD_BUILTIN("atomic_sub", atomic_sub, NULL); ADD_BUILTIN("atom_xchg", atomic_xchg, NULL); ADD_BUILTIN("atomic_xchg", atomic_xchg, NULL); ADD_BUILTIN("atom_xor", atomic_xor, NULL); ADD_BUILTIN("atomic_xor", atomic_xor, NULL); // Common Functions ADD_BUILTIN("clamp", clamp, NULL); ADD_BUILTIN("degrees", f1arg, _degrees_); ADD_BUILTIN("max", max, NULL); ADD_BUILTIN("min", min, NULL); ADD_BUILTIN("mix", mix, NULL); ADD_BUILTIN("radians", f1arg, _radians_); ADD_BUILTIN("sign", f1arg, _sign_); ADD_BUILTIN("smoothstep", smoothstep, NULL); ADD_BUILTIN("step", step, NULL); // Geometric Functions ADD_BUILTIN("cross", cross, NULL); ADD_BUILTIN("dot", dot, NULL); ADD_BUILTIN("distance", distance, NULL); ADD_BUILTIN("length", length, NULL); ADD_BUILTIN("normalize", normalize, NULL); ADD_BUILTIN("fast_distance", distance, NULL); ADD_BUILTIN("fast_length", length, NULL); ADD_BUILTIN("fast_normalize", normalize, NULL); // Image Functions ADD_BUILTIN("get_image_array_size", get_image_array_size, NULL); ADD_BUILTIN("get_image_channel_data_type", get_image_channel_data_type, NULL); ADD_BUILTIN("get_image_channel_order", get_image_channel_order, NULL); ADD_BUILTIN("get_image_dim", get_image_dim, NULL); ADD_BUILTIN("get_image_depth", get_image_depth, NULL); ADD_BUILTIN("get_image_height", get_image_height, NULL); ADD_BUILTIN("get_image_width", get_image_width, NULL); ADD_BUILTIN("read_imagef", read_imagef, NULL); ADD_BUILTIN("read_imagei", read_imagei, NULL); ADD_BUILTIN("read_imageui", read_imageui, NULL); ADD_BUILTIN("write_imagef", write_imagef, NULL); ADD_BUILTIN("write_imagei", write_imagei, NULL); ADD_BUILTIN("write_imageui", write_imageui, NULL); // Integer Functions ADD_BUILTIN("abs", abs_builtin, NULL); ADD_BUILTIN("abs_diff", abs_diff, NULL); ADD_BUILTIN("add_sat", add_sat, NULL); ADD_BUILTIN("clz", clz, NULL); ADD_BUILTIN("hadd", hadd, NULL); ADD_BUILTIN("mad24", u3arg, _mad_); ADD_BUILTIN("mad_hi", mad_hi, NULL); ADD_BUILTIN("mad_sat", mad_sat, NULL); ADD_BUILTIN("mul24", u2arg, _mul_); ADD_BUILTIN("mul_hi", mul_hi, NULL); ADD_BUILTIN("popcount", u1arg, _popcount_); ADD_BUILTIN("rhadd", rhadd, NULL); ADD_BUILTIN("rotate", rotate, NULL); ADD_BUILTIN("sub_sat", sub_sat, NULL); ADD_BUILTIN("upsample", upsample, NULL); // Math Functions ADD_BUILTIN("acos", f1arg, F1ARG(acos)); ADD_BUILTIN("acosh", f1arg, F1ARG(acosh)); ADD_BUILTIN("acospi", f1arg, _acospi_); ADD_BUILTIN("asin", f1arg, F1ARG(asin)); ADD_BUILTIN("asinh", f1arg, F1ARG(asinh)); ADD_BUILTIN("asinpi", f1arg, _asinpi_); ADD_BUILTIN("atan", f1arg, F1ARG(atan)); ADD_BUILTIN("atan2", f2arg, F2ARG(atan2)); ADD_BUILTIN("atanh", f1arg, F1ARG(atanh)); ADD_BUILTIN("atanpi", f1arg, _atanpi_); ADD_BUILTIN("atan2pi", f2arg, _atan2pi_); ADD_BUILTIN("cbrt", f1arg, F1ARG(cbrt)); ADD_BUILTIN("ceil", f1arg, F1ARG(ceil)); ADD_BUILTIN("copysign", f2arg, F2ARG(copysign)); ADD_BUILTIN("cos", f1arg, F1ARG(cos)); ADD_BUILTIN("cosh", f1arg, F1ARG(cosh)); ADD_BUILTIN("cospi", f1arg, _cospi_); ADD_BUILTIN("erfc", f1arg, F1ARG(erfc)); ADD_BUILTIN("erf", f1arg, F1ARG(erf)); ADD_BUILTIN("exp", f1arg, F1ARG(exp)); ADD_BUILTIN("exp2", f1arg, F1ARG(exp2)); ADD_BUILTIN("exp10", f1arg, _exp10_); ADD_BUILTIN("expm1", f1arg, F1ARG(expm1)); ADD_BUILTIN("fabs", f1arg, F1ARG(fabs)); ADD_BUILTIN("fdim", f2arg, F2ARG(fdim)); ADD_BUILTIN("floor", f1arg, F1ARG(floor)); ADD_BUILTIN("fma", f3arg, F3ARG(_fma_)); ADD_BUILTIN("fmax", f2arg, F2ARG(fmax)); ADD_BUILTIN("fmin", f2arg, F2ARG(fmin)); ADD_BUILTIN("fmod", f2arg, F2ARG(fmod)); ADD_BUILTIN("fract", fract, NULL); ADD_BUILTIN("frexp", frexp_builtin, NULL); ADD_BUILTIN("hypot", f2arg, F2ARG(hypot)); ADD_BUILTIN("ilogb", ilogb_builtin, NULL); ADD_BUILTIN("ldexp", ldexp_builtin, NULL); ADD_BUILTIN("lgamma", f1arg, F1ARG(lgamma)); ADD_BUILTIN("lgamma_r", lgamma_r, NULL); ADD_BUILTIN("log", f1arg, F1ARG(log)); ADD_BUILTIN("log2", f1arg, F1ARG(log2)); ADD_BUILTIN("log10", f1arg, F1ARG(log10)); ADD_BUILTIN("log1p", f1arg, F1ARG(log1p)); ADD_BUILTIN("logb", f1arg, F1ARG(logb)); ADD_BUILTIN("mad", f3arg, F3ARG(_fma_)); ADD_BUILTIN("maxmag", f2arg, _maxmag_); ADD_BUILTIN("minmag", f2arg, _minmag_); ADD_BUILTIN("modf", modf_builtin, NULL); ADD_BUILTIN("nan", nan_builtin, NULL); ADD_BUILTIN("nanf", nan_builtin, NULL); ADD_BUILTIN("nextafter", nextafter_builtin, NULL); ADD_BUILTIN("pow", f2arg, F2ARG(pow)); ADD_BUILTIN("pown", pown, NULL); ADD_BUILTIN("powr", f2arg, F2ARG(pow)); ADD_BUILTIN("remainder", f2arg, F2ARG(remainder)); ADD_BUILTIN("remquo", remquo_builtin, NULL); ADD_BUILTIN("rint", f1arg, F1ARG(rint)); ADD_BUILTIN("rootn", rootn, NULL); ADD_BUILTIN("round", f1arg, F1ARG(round)); ADD_BUILTIN("rsqrt", f1arg, _rsqrt_); ADD_BUILTIN("sin", f1arg, F1ARG(sin)); ADD_BUILTIN("sinh", f1arg, F1ARG(sinh)); ADD_BUILTIN("sinpi", f1arg, _sinpi_); ADD_BUILTIN("sincos", sincos, NULL); ADD_BUILTIN("sqrt", f1arg, F1ARG(sqrt)); ADD_BUILTIN("tan", f1arg, F1ARG(tan)); ADD_BUILTIN("tanh", f1arg, F1ARG(tanh)); ADD_BUILTIN("tanpi", f1arg, _tanpi_); ADD_BUILTIN("tgamma", f1arg, F1ARG(tgamma)); ADD_BUILTIN("trunc", f1arg, F1ARG(trunc)); // Native Math Functions ADD_BUILTIN("half_cos", f1arg, F1ARG(cos)); ADD_BUILTIN("native_cos", f1arg, F1ARG(cos)); ADD_BUILTIN("half_divide", f2arg, _fdivide_); ADD_BUILTIN("native_divide", f2arg, _fdivide_); ADD_BUILTIN("half_exp", f1arg, F1ARG(exp)); ADD_BUILTIN("native_exp", f1arg, F1ARG(exp)); ADD_BUILTIN("half_exp2", f1arg, F1ARG(exp2)); ADD_BUILTIN("native_exp2", f1arg, F1ARG(exp2)); ADD_BUILTIN("half_exp10", f1arg, _exp10_); ADD_BUILTIN("native_exp10", f1arg, _exp10_); ADD_BUILTIN("half_log", f1arg, F1ARG(log)); ADD_BUILTIN("native_log", f1arg, F1ARG(log)); ADD_BUILTIN("half_log2", f1arg, F1ARG(log2)); ADD_BUILTIN("native_log2", f1arg, F1ARG(log2)); ADD_BUILTIN("half_log10", f1arg, F1ARG(log10)); ADD_BUILTIN("native_log10", f1arg, F1ARG(log10)); ADD_BUILTIN("half_powr", f2arg, F2ARG(pow)); ADD_BUILTIN("native_powr", f2arg, F2ARG(pow)); ADD_BUILTIN("half_recip", f1arg, _frecip_); ADD_BUILTIN("native_recip", f1arg, _frecip_); ADD_BUILTIN("half_rsqrt", f1arg, _rsqrt_); ADD_BUILTIN("native_rsqrt", f1arg, _rsqrt_); ADD_BUILTIN("half_sin", f1arg, F1ARG(sin)); ADD_BUILTIN("native_sin", f1arg, F1ARG(sin)); ADD_BUILTIN("half_sqrt", f1arg, F1ARG(sqrt)); ADD_BUILTIN("native_sqrt", f1arg, F1ARG(sqrt)); ADD_BUILTIN("half_tan", f1arg, F1ARG(tan)); ADD_BUILTIN("native_tan", f1arg, F1ARG(tan)); // Misc. Vector Functions ADD_BUILTIN("shuffle", shuffle_builtin, NULL); ADD_BUILTIN("shuffle2", shuffle2_builtin, NULL); // Relational Functional ADD_BUILTIN("all", all, NULL); ADD_BUILTIN("any", any, NULL); ADD_BUILTIN("bitselect", bitselect, NULL); ADD_BUILTIN("isequal", rel2arg, _iseq_); ADD_BUILTIN("isnotequal", rel2arg, _isneq_); ADD_BUILTIN("isgreater", rel2arg, _isgt_); ADD_BUILTIN("isgreaterequal", rel2arg, _isge_); ADD_BUILTIN("isless", rel2arg, _islt_); ADD_BUILTIN("islessequal", rel2arg, _isle_); ADD_BUILTIN("islessgreater", rel2arg, _islg_); ADD_BUILTIN("isfinite", rel1arg, _isfin_); ADD_BUILTIN("isinf", rel1arg, _isinf_); ADD_BUILTIN("isnan", rel1arg, _isnan_); ADD_BUILTIN("isnormal", rel1arg, _isnorm_); ADD_BUILTIN("isordered", rel2arg, _isord_); ADD_BUILTIN("isunordered", rel2arg, _isuord_); ADD_BUILTIN("select", select_builtin, NULL); ADD_BUILTIN("signbit", rel1arg, _signbit_); // Synchronization Functions ADD_BUILTIN("barrier", barrier, NULL); ADD_BUILTIN("mem_fence", mem_fence, NULL); ADD_BUILTIN("read_mem_fence", mem_fence, NULL); ADD_BUILTIN("write_mem_fence", mem_fence, NULL); // Vector Data Load and Store Functions ADD_PREFIX_BUILTIN("vload_half", vload_half, NULL); ADD_PREFIX_BUILTIN("vloada_half", vload_half, NULL); ADD_PREFIX_BUILTIN("vstore_half", vstore_half, NULL); ADD_PREFIX_BUILTIN("vstorea_half", vstore_half, NULL); ADD_PREFIX_BUILTIN("vload", vload, NULL); ADD_PREFIX_BUILTIN("vstore", vstore, NULL); // Work-Item Functions ADD_BUILTIN("get_global_id", get_global_id, NULL); ADD_BUILTIN("get_global_size", get_global_size, NULL); ADD_BUILTIN("get_global_offset", get_global_offset, NULL); ADD_BUILTIN("get_group_id", get_group_id, NULL); ADD_BUILTIN("get_local_id", get_local_id, NULL); ADD_BUILTIN("get_local_size", get_local_size, NULL); ADD_BUILTIN("get_num_groups", get_num_groups, NULL); ADD_BUILTIN("get_work_dim", get_work_dim, NULL); // Other Functions ADD_PREFIX_BUILTIN("convert_half", convert_half, NULL); ADD_PREFIX_BUILTIN("convert_float", convert_float, NULL); ADD_PREFIX_BUILTIN("convert_double", convert_float, NULL); ADD_PREFIX_BUILTIN("convert_u", convert_uint, NULL); ADD_PREFIX_BUILTIN("convert_", convert_sint, NULL); ADD_BUILTIN("printf", printf_builtin, NULL); // LLVM Intrinsics ADD_BUILTIN("llvm.dbg.declare", llvm_dbg_declare, NULL); ADD_BUILTIN("llvm.dbg.value", llvm_dbg_value, NULL); ADD_BUILTIN("llvm.lifetime.start", llvm_lifetime_start, NULL); ADD_BUILTIN("llvm.lifetime.end", llvm_lifetime_end, NULL); ADD_PREFIX_BUILTIN("llvm.memcpy", llvm_memcpy, NULL); ADD_PREFIX_BUILTIN("llvm.memmove", llvm_memcpy, NULL); ADD_PREFIX_BUILTIN("llvm.memset", llvm_memset, NULL); ADD_PREFIX_BUILTIN("llvm.fmuladd", f3arg, F3ARG(_fma_)); ADD_BUILTIN("llvm.trap", llvm_trap, NULL); return builtins; } } Oclgrind-15.5/src/core/clc.h000066400000000000000000001123221252441671000156410ustar00rootroot00000000000000// clc.h (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #pragma OPENCL EXTENSION cl_khr_fp64 : enable typedef unsigned char uchar; typedef unsigned short ushort; typedef unsigned int uint; typedef unsigned long ulong; #if defined(__SPIR32__) typedef uint size_t; typedef int ptrdiff_t; #else typedef ulong size_t; typedef long ptrdiff_t; #endif typedef size_t uintptr_t; typedef ptrdiff_t intptr_t; #define event_t size_t #define TYPEDEF_VECTOR(type) \ typedef __attribute__((ext_vector_type(2))) type type##2; \ typedef __attribute__((ext_vector_type(3))) type type##3; \ typedef __attribute__((ext_vector_type(4))) type type##4; \ typedef __attribute__((ext_vector_type(8))) type type##8; \ typedef __attribute__((ext_vector_type(16))) type type##16; TYPEDEF_VECTOR(char); TYPEDEF_VECTOR(uchar); TYPEDEF_VECTOR(short); TYPEDEF_VECTOR(ushort); TYPEDEF_VECTOR(int); TYPEDEF_VECTOR(uint); TYPEDEF_VECTOR(long); TYPEDEF_VECTOR(ulong); TYPEDEF_VECTOR(float); TYPEDEF_VECTOR(double); #define __ENDIAN_LITTLE__ 1 #define __OPENCL_VERSION__ 120 #define __OPENCL_C_VERSION__ 120 #define __IMAGE_SUPPORT__ 1 #define __kernel_exec(X, typen) __kernel \ __attribute__((work_group_size_hint(X, 1, 1))) \ __attribute__((vec_type_hint(typen))) #define CHAR_BIT 8 #define SCHAR_MAX 127 #define SCHAR_MIN (-128) #define UCHAR_MAX 255 #define CHAR_MAX SCHAR_MAX #define CHAR_MIN SCHAR_MIN #define USHRT_MAX 65535 #define SHRT_MAX 32767 #define SHRT_MIN (-32768) #define UINT_MAX 0xffffffff #define INT_MAX 2147483647 #define INT_MIN (-2147483647-1) #define ULONG_MAX 0xffffffffffffffffUL #define LONG_MAX ((long)0x7fffffffffffffffL) #define LONG_MIN ((long)(-0x7fffffffffffffffL-1)) #define FLT_DIG 6 #define FLT_MANT_DIG 24 #define FLT_MAX_10_EXP +38 #define FLT_MAX_EXP +128 #define FLT_MIN_10_EXP -37 #define FLT_MIN_EXP -125 #define FLT_RADIX 2 #define FLT_MAX 0x1.fffffep127f #define FLT_MIN 0x1.0p-126f #define FLT_EPSILON 0x1.0p-23f #define DBL_DIG 15 #define DBL_MANT_DIG 53 #define DBL_MAX_10_EXP +308 #define DBL_MAX_EXP +1024 #define DBL_MIN_10_EXP -307 #define DBL_MIN_EXP -1021 #define DBL_RADIX 2 #define DBL_MAX 0x1.fffffffffffffp1023 #define DBL_MIN 0x1.0p-1022 #define DBL_EPSILON 0x1.0p-52 #define FP_ILOGB0 INT_MIN #define FP_ILOGBNAN INT_MIN #define M_E_F 2.71828182845904523536028747135266250f #define M_LOG2E_F 1.44269504088896340735992468100189214f #define M_LOG10E_F 0.434294481903251827651128918916605082f #define M_LN2_F 0.693147180559945309417232121458176568f #define M_LN10_F 2.3025850929940456840179914546843642f #define M_PI_F 3.14159265358979323846264338327950288f #define M_PI_2_F 1.57079632679489661923132169163975144f #define M_PI_4_F 0.785398163397448309615660845819875721f #define M_1_PI_F 0.318309886183790671537767526745028724f #define M_2_PI_F 0.636619772367581343075535053490057448f #define M_2_SQRTPI_F 1.12837916709551257389615890312154517f #define M_SQRT2_F 1.41421356237309504880168872420969808f #define M_SQRT1_2_F 0.707106781186547524400844362104849039f #define M_E 2.71828182845904523536028747135266250 #define M_LOG2E 1.44269504088896340735992468100189214 #define M_LOG10E 0.434294481903251827651128918916605082 #define M_LN2 0.693147180559945309417232121458176568 #define M_LN10 2.30258509299404568401799145468436421 #define M_PI 3.14159265358979323846264338327950288 #define M_PI_2 1.57079632679489661923132169163975144 #define M_PI_4 0.785398163397448309615660845819875721 #define M_1_PI 0.318309886183790671537767526745028724 #define M_2_PI 0.636619772367581343075535053490057448 #define M_2_SQRTPI 1.12837916709551257389615890312154517 #define M_SQRT2 1.41421356237309504880168872420969808 #define M_SQRT1_2 0.707106781186547524400844362104849039 #define MAXFLOAT ((float)3.40282346638528860e+38) #define HUGE_VALF __builtin_huge_valf() #define HUGE_VAL __builtin_huge_val() #define INFINITY __builtin_inff() #define NAN __builtin_nanf(0) #define CLK_SNORM_INT8 0x10D0 #define CLK_SNORM_INT16 0x10D1 #define CLK_UNORM_INT8 0x10D2 #define CLK_UNORM_INT16 0x10D3 #define CLK_UNORM_SHORT_565 0x10D4 #define CLK_UNORM_SHORT_555 0x10D5 #define CLK_UNORM_INT_101010 0x10D6 #define CLK_SIGNED_INT8 0x10D7 #define CLK_SIGNED_INT16 0x10D8 #define CLK_SIGNED_INT32 0x10D9 #define CLK_UNSIGNED_INT8 0x10DA #define CLK_UNSIGNED_INT16 0x10DB #define CLK_UNSIGNED_INT32 0x10DC #define CLK_HALF_FLOAT 0x10DD #define CLK_FLOAT 0x10DE #define CLK_UNORM_INT24 0x10DF #define CLK_R 0x10B0 #define CLK_A 0x10B1 #define CLK_RG 0x10B2 #define CLK_RA 0x10B3 #define CLK_RGB 0x10B4 #define CLK_RGBA 0x10B5 #define CLK_BGRA 0x10B6 #define CLK_ARGB 0x10B7 #define CLK_INTENSITY 0x10B8 #define CLK_LUMINANCE 0x10B9 #define CLK_Rx 0x10BA #define CLK_RGx 0x10BB #define CLK_RGBx 0x10BC #define CLK_DEPTH 0x10BD #define CLK_DEPTH_STENCIL 0x10BE #define CLK_NORMALIZED_COORDS_FALSE 0x0000 #define CLK_NORMALIZED_COORDS_TRUE 0x0001 #define CLK_ADDRESS_NONE 0x0000 #define CLK_ADDRESS_CLAMP_TO_EDGE 0x0002 #define CLK_ADDRESS_CLAMP 0x0004 #define CLK_ADDRESS_REPEAT 0x0006 #define CLK_ADDRESS_MIRRORED_REPEAT 0x0008 #define CLK_FILTER_NEAREST 0x0010 #define CLK_FILTER_LINEAR 0x0020 #define __OVERLOAD__ __attribute__((__overloadable__)) #define BUILTIN_1ARG(rtype, type0, name) \ rtype __OVERLOAD__ name(type0 a); \ rtype##2 __OVERLOAD__ name(type0##2 a); \ rtype##3 __OVERLOAD__ name(type0##3 a); \ rtype##4 __OVERLOAD__ name(type0##4 a); \ rtype##8 __OVERLOAD__ name(type0##8 a); \ rtype##16 __OVERLOAD__ name(type0##16 a); #define BUILTIN_2ARG(rtype, type0, type1, name) \ rtype __OVERLOAD__ name(type0 a, type1 b); \ rtype##2 __OVERLOAD__ name(type0##2 a, type1##2 b); \ rtype##3 __OVERLOAD__ name(type0##3 a, type1##3 b); \ rtype##4 __OVERLOAD__ name(type0##4 a, type1##4 b); \ rtype##8 __OVERLOAD__ name(type0##8 a, type1##8 b); \ rtype##16 __OVERLOAD__ name(type0##16 a, type1##16 b); #define BUILTIN_3ARG(rtype, type0, type1, type2, name) \ rtype __OVERLOAD__ name(type0 a, type1 b, type2 c); \ rtype##2 __OVERLOAD__ name(type0##2 a, type1##2 b, type2##2 c); \ rtype##3 __OVERLOAD__ name(type0##3 a, type1##3 b, type2##3 c); \ rtype##4 __OVERLOAD__ name(type0##4 a, type1##4 b, type2##4 c); \ rtype##8 __OVERLOAD__ name(type0##8 a, type1##8 b, type2##8 c); \ rtype##16 __OVERLOAD__ name(type0##16 a, type1##16 b, type2##16 c); #define BUILTIN_1ARG_INTEGERS(name) \ BUILTIN_1ARG(char, char, name) \ BUILTIN_1ARG(uchar, uchar, name) \ BUILTIN_1ARG(short, short, name) \ BUILTIN_1ARG(ushort, ushort, name) \ BUILTIN_1ARG(int, int, name) \ BUILTIN_1ARG(uint, uint, name) \ BUILTIN_1ARG(long, long, name) \ BUILTIN_1ARG(ulong, ulong, name); #define BUILTIN_2ARG_INTEGERS(name) \ BUILTIN_2ARG(char, char, char, name) \ BUILTIN_2ARG(uchar, uchar, uchar, name) \ BUILTIN_2ARG(short, short, short, name) \ BUILTIN_2ARG(ushort, ushort, ushort, name) \ BUILTIN_2ARG(int, int, int, name) \ BUILTIN_2ARG(uint, uint, uint, name) \ BUILTIN_2ARG(long, long, long, name) \ BUILTIN_2ARG(ulong, ulong, ulong, name); #define BUILTIN_3ARG_INTEGERS(name) \ BUILTIN_3ARG(char, char, char, char, name) \ BUILTIN_3ARG(uchar, uchar, uchar, uchar, name) \ BUILTIN_3ARG(short, short, short, short, name) \ BUILTIN_3ARG(ushort, ushort, ushort, ushort, name) \ BUILTIN_3ARG(int, int, int, int, name) \ BUILTIN_3ARG(uint, uint, uint, uint, name) \ BUILTIN_3ARG(long, long, long, long, name) \ BUILTIN_3ARG(ulong, ulong, ulong, ulong, name); #define BUILTIN_1ARG_FLOATS(name) \ BUILTIN_1ARG(float, float, name) \ BUILTIN_1ARG(double, double, name); #define BUILTIN_2ARG_FLOATS(name) \ BUILTIN_2ARG(float, float, float, name) \ BUILTIN_2ARG(double, double, double, name); #define BUILTIN_3ARG_FLOATS(name) \ BUILTIN_3ARG(float, float, float, float, name) \ BUILTIN_3ARG(double, double, double, double, name); /////////////////////////////////////// // Async Copy and Prefetch Functions // /////////////////////////////////////// #define ASYNC_COPY_TYPE(type) \ event_t __OVERLOAD__ async_work_group_copy(__local type*, const __global type*, size_t, event_t); \ event_t __OVERLOAD__ async_work_group_copy(__global type*, const __local type*, size_t, event_t); \ event_t __OVERLOAD__ async_work_group_strided_copy(__local type*, const __global type*, size_t, size_t, event_t); \ event_t __OVERLOAD__ async_work_group_strided_copy(__global type*, const __local type*, size_t, size_t, event_t); #define ASYNC_COPY(type) \ ASYNC_COPY_TYPE(type) \ ASYNC_COPY_TYPE(type##2) \ ASYNC_COPY_TYPE(type##3) \ ASYNC_COPY_TYPE(type##4) \ ASYNC_COPY_TYPE(type##8) \ ASYNC_COPY_TYPE(type##16); ASYNC_COPY(char); ASYNC_COPY(uchar); ASYNC_COPY(short); ASYNC_COPY(ushort); ASYNC_COPY(int); ASYNC_COPY(uint); ASYNC_COPY(long); ASYNC_COPY(ulong); ASYNC_COPY(float); ASYNC_COPY(double); void wait_group_events(int, event_t*); #define PREFETCH(type) \ void __OVERLOAD__ prefetch(const __global type*, size_t); \ void __OVERLOAD__ prefetch(const __global type##2*, size_t); \ void __OVERLOAD__ prefetch(const __global type##3*, size_t); \ void __OVERLOAD__ prefetch(const __global type##4*, size_t); \ void __OVERLOAD__ prefetch(const __global type##8*, size_t); \ void __OVERLOAD__ prefetch(const __global type##16*, size_t); PREFETCH(char); PREFETCH(uchar); PREFETCH(short); PREFETCH(ushort); PREFETCH(int); PREFETCH(uint); PREFETCH(long); PREFETCH(ulong); PREFETCH(float); PREFETCH(double); ////////////////////// // Atomic Functions // ////////////////////// #define ATOMIC_0ARG_DEF(name, type) \ type __OVERLOAD__ name(volatile __global type *p); \ type __OVERLOAD__ name(volatile __local type *p); #define ATOMIC_0ARG(name) \ ATOMIC_0ARG_DEF(atom_##name, int); \ ATOMIC_0ARG_DEF(atom_##name, uint); \ ATOMIC_0ARG_DEF(atomic_##name, int); \ ATOMIC_0ARG_DEF(atomic_##name, uint); #define ATOMIC_1ARG_DEF(name, type) \ type __OVERLOAD__ name(volatile __global type *p, type val); \ type __OVERLOAD__ name(volatile __local type *p, type val); #define ATOMIC_1ARG(name) \ ATOMIC_1ARG_DEF(atom_##name, int); \ ATOMIC_1ARG_DEF(atom_##name, uint); \ ATOMIC_1ARG_DEF(atomic_##name, int); \ ATOMIC_1ARG_DEF(atomic_##name, uint); ATOMIC_1ARG(add); ATOMIC_1ARG(and); ATOMIC_0ARG(dec); ATOMIC_0ARG(inc); ATOMIC_1ARG(max); ATOMIC_1ARG(min); ATOMIC_1ARG(or); ATOMIC_1ARG(sub); ATOMIC_1ARG(xchg); ATOMIC_1ARG_DEF(atom_xchg, float); ATOMIC_1ARG_DEF(atomic_xchg, float); ATOMIC_1ARG(xor); int __OVERLOAD__ atom_cmpxchg(volatile __global int *p, int cmp, int val); int __OVERLOAD__ atom_cmpxchg(volatile __local int *p, int cmp, int val); uint __OVERLOAD__ atom_cmpxchg(volatile __global uint *p, uint cmp, uint val); uint __OVERLOAD__ atom_cmpxchg(volatile __local uint *p, uint cmp, uint val); int __OVERLOAD__ atomic_cmpxchg(volatile __global int *p, int cmp, int val); int __OVERLOAD__ atomic_cmpxchg(volatile __local int *p, int cmp, int val); uint __OVERLOAD__ atomic_cmpxchg(volatile __global uint *p, uint cmp, uint val); uint __OVERLOAD__ atomic_cmpxchg(volatile __local uint *p, uint cmp, uint val); ////////////////////// // Common Functions // ////////////////////// #define ABS(type) \ u##type __OVERLOAD__ abs(type); \ u##type __OVERLOAD__ abs(u##type); #define ABS_DIFF(type) \ u##type __OVERLOAD__ abs_diff(type, type); \ u##type __OVERLOAD__ abs_diff(u##type, u##type); #define ABS_BOTH(type) \ ABS(type); \ ABS_DIFF(type); #define ABS_ALL(type) \ ABS_BOTH(type); \ ABS_BOTH(type##2); \ ABS_BOTH(type##3); \ ABS_BOTH(type##4); \ ABS_BOTH(type##8); \ ABS_BOTH(type##16); ABS_ALL(char); ABS_ALL(short); ABS_ALL(int); ABS_ALL(long); BUILTIN_3ARG_FLOATS(clamp); BUILTIN_1ARG_FLOATS(degrees); BUILTIN_2ARG_FLOATS(max); BUILTIN_2ARG_FLOATS(min); BUILTIN_3ARG_FLOATS(mix); BUILTIN_1ARG_FLOATS(radians); BUILTIN_1ARG_FLOATS(sign); BUILTIN_3ARG_FLOATS(smoothstep); BUILTIN_2ARG_FLOATS(step); #define COMMON_SCALAR(type, n) \ type##n __OVERLOAD__ clamp(type##n, type, type); \ type##n __OVERLOAD__ max(type##n, type); \ type##n __OVERLOAD__ min(type##n, type); \ type##n __OVERLOAD__ mix(type##n, type##n, type); \ type##n __OVERLOAD__ smoothstep(type, type, type##n); \ type##n __OVERLOAD__ step(type, type##n); COMMON_SCALAR(float, 2); COMMON_SCALAR(float, 3); COMMON_SCALAR(float, 4); COMMON_SCALAR(float, 8); COMMON_SCALAR(float, 16); COMMON_SCALAR(double, 2); COMMON_SCALAR(double, 3); COMMON_SCALAR(double, 4); COMMON_SCALAR(double, 8); COMMON_SCALAR(double, 16); ///////////////////////// // Geometric Functions // ///////////////////////// #define GEOM_1ARG(type, name) \ type __OVERLOAD__ name(type); \ type __OVERLOAD__ name(type##2); \ type __OVERLOAD__ name(type##3); \ type __OVERLOAD__ name(type##4); \ type __OVERLOAD__ name(type##8); \ type __OVERLOAD__ name(type##16); #define GEOM_2ARG(type, name) \ type __OVERLOAD__ name(type, type); \ type __OVERLOAD__ name(type##2, type##2); \ type __OVERLOAD__ name(type##3, type##3); \ type __OVERLOAD__ name(type##4, type##4); \ type __OVERLOAD__ name(type##8, type##8); \ type __OVERLOAD__ name(type##16, type##16); float4 __OVERLOAD__ cross(float4, float4); float3 __OVERLOAD__ cross(float3, float3); double4 __OVERLOAD__ cross(double4, double4); double3 __OVERLOAD__ cross(double3, double3); GEOM_2ARG(float, dot); GEOM_2ARG(double, dot); GEOM_2ARG(float, distance); GEOM_2ARG(double, distance); GEOM_1ARG(float, length); GEOM_1ARG(double, length); BUILTIN_1ARG_FLOATS(normalize); GEOM_2ARG(float, fast_distance); GEOM_2ARG(double, fast_distance); GEOM_1ARG(float, fast_length); GEOM_1ARG(double, fast_length); BUILTIN_1ARG_FLOATS(fast_normalize); ///////////////////// // Image Functions // ///////////////////// size_t __OVERLOAD__ get_image_array_size(image1d_array_t image); size_t __OVERLOAD__ get_image_array_size(image2d_array_t image); int __OVERLOAD__ get_image_channel_data_type(image1d_t image); int __OVERLOAD__ get_image_channel_data_type(image1d_buffer_t image); int __OVERLOAD__ get_image_channel_data_type(image1d_array_t image); int __OVERLOAD__ get_image_channel_data_type(image2d_t image); int __OVERLOAD__ get_image_channel_data_type(image2d_array_t image); int __OVERLOAD__ get_image_channel_data_type(image3d_t image); int __OVERLOAD__ get_image_channel_order(image1d_t image); int __OVERLOAD__ get_image_channel_order(image1d_buffer_t image); int __OVERLOAD__ get_image_channel_order(image1d_array_t image); int __OVERLOAD__ get_image_channel_order(image2d_t image); int __OVERLOAD__ get_image_channel_order(image2d_array_t image); int __OVERLOAD__ get_image_channel_order(image3d_t image); int2 __OVERLOAD__ get_image_dim(image2d_t image); int2 __OVERLOAD__ get_image_dim(image2d_array_t image); int4 __OVERLOAD__ get_image_dim(image3d_t image); int __OVERLOAD__ get_image_depth(image3d_t image); int __OVERLOAD__ get_image_height(image2d_t image); int __OVERLOAD__ get_image_height(image2d_array_t image); int __OVERLOAD__ get_image_height(image3d_t image); int __OVERLOAD__ get_image_width(image1d_t image); int __OVERLOAD__ get_image_width(image1d_buffer_t image); int __OVERLOAD__ get_image_width(image1d_array_t image); int __OVERLOAD__ get_image_width(image2d_t image); int __OVERLOAD__ get_image_width(image2d_array_t image); int __OVERLOAD__ get_image_width(image3d_t image); float4 __OVERLOAD__ read_imagef(image1d_t, int); float4 __OVERLOAD__ read_imagef(image1d_buffer_t, int); float4 __OVERLOAD__ read_imagef(image1d_array_t, int2); float4 __OVERLOAD__ read_imagef(image2d_t, int2); float4 __OVERLOAD__ read_imagef(image2d_array_t, int4); float4 __OVERLOAD__ read_imagef(image3d_t, int4); float4 __OVERLOAD__ read_imagef(image1d_t, sampler_t, int); float4 __OVERLOAD__ read_imagef(image1d_t, sampler_t, float); float4 __OVERLOAD__ read_imagef(image1d_array_t, sampler_t, int2); float4 __OVERLOAD__ read_imagef(image1d_array_t, sampler_t, float2); float4 __OVERLOAD__ read_imagef(image2d_t, sampler_t, int2); float4 __OVERLOAD__ read_imagef(image2d_t, sampler_t, float2); float4 __OVERLOAD__ read_imagef(image2d_array_t, sampler_t, int4); float4 __OVERLOAD__ read_imagef(image2d_array_t, sampler_t, float4); float4 __OVERLOAD__ read_imagef(image3d_t, sampler_t, int4); float4 __OVERLOAD__ read_imagef(image3d_t, sampler_t, float4); int4 __OVERLOAD__ read_imagei(image1d_t, int); int4 __OVERLOAD__ read_imagei(image1d_buffer_t, int); int4 __OVERLOAD__ read_imagei(image1d_array_t, int2); int4 __OVERLOAD__ read_imagei(image2d_t, int2); int4 __OVERLOAD__ read_imagei(image2d_array_t, int4); int4 __OVERLOAD__ read_imagei(image3d_t, int4); int4 __OVERLOAD__ read_imagei(image1d_t, sampler_t, int); int4 __OVERLOAD__ read_imagei(image1d_t, sampler_t, float); int4 __OVERLOAD__ read_imagei(image1d_array_t, sampler_t, int2); int4 __OVERLOAD__ read_imagei(image1d_array_t, sampler_t, float2); int4 __OVERLOAD__ read_imagei(image2d_t, sampler_t, int2); int4 __OVERLOAD__ read_imagei(image2d_t, sampler_t, float2); int4 __OVERLOAD__ read_imagei(image2d_array_t, sampler_t, int4); int4 __OVERLOAD__ read_imagei(image2d_array_t, sampler_t, float4); int4 __OVERLOAD__ read_imagei(image3d_t, sampler_t, int4); int4 __OVERLOAD__ read_imagei(image3d_t, sampler_t, float4); uint4 __OVERLOAD__ read_imageui(image1d_t, int); uint4 __OVERLOAD__ read_imageui(image1d_buffer_t, int); uint4 __OVERLOAD__ read_imageui(image1d_array_t, int2); uint4 __OVERLOAD__ read_imageui(image2d_t, int2); uint4 __OVERLOAD__ read_imageui(image2d_array_t, int4); uint4 __OVERLOAD__ read_imageui(image3d_t, int4); uint4 __OVERLOAD__ read_imageui(image1d_t, sampler_t, int); uint4 __OVERLOAD__ read_imageui(image1d_t, sampler_t, float); uint4 __OVERLOAD__ read_imageui(image1d_array_t, sampler_t, int2); uint4 __OVERLOAD__ read_imageui(image1d_array_t, sampler_t, float2); uint4 __OVERLOAD__ read_imageui(image2d_t, sampler_t, int2); uint4 __OVERLOAD__ read_imageui(image2d_t, sampler_t, float2); uint4 __OVERLOAD__ read_imageui(image2d_array_t, sampler_t, int4); uint4 __OVERLOAD__ read_imageui(image2d_array_t, sampler_t, float4); uint4 __OVERLOAD__ read_imageui(image3d_t, sampler_t, int4); uint4 __OVERLOAD__ read_imageui(image3d_t, sampler_t, float4); void __OVERLOAD__ write_imagef(image1d_t, int, float4); void __OVERLOAD__ write_imagef(image1d_array_t, int2, float4); void __OVERLOAD__ write_imagef(image2d_t, int2, float4); void __OVERLOAD__ write_imagef(image2d_array_t, int4, float4); void __OVERLOAD__ write_imagef(image3d_t, int4, float4); void __OVERLOAD__ write_imagei(image1d_t, int, int4); void __OVERLOAD__ write_imagei(image1d_array_t, int2, int4); void __OVERLOAD__ write_imagei(image2d_t, int2, int4); void __OVERLOAD__ write_imagei(image2d_array_t, int4, int4); void __OVERLOAD__ write_imagei(image3d_t, int4, int4); void __OVERLOAD__ write_imageui(image1d_t, int, uint4); void __OVERLOAD__ write_imageui(image1d_array_t, int2, uint4); void __OVERLOAD__ write_imageui(image2d_t, int2, uint4); void __OVERLOAD__ write_imageui(image2d_array_t, int4, uint4); void __OVERLOAD__ write_imageui(image3d_t, int4, uint4); /////////////////////// // Integer Functions // /////////////////////// BUILTIN_2ARG_INTEGERS(add_sat); BUILTIN_3ARG_INTEGERS(clamp); BUILTIN_1ARG_INTEGERS(clz); BUILTIN_2ARG_INTEGERS(hadd); BUILTIN_3ARG(int, int, int, int, mad24); BUILTIN_3ARG(uint, uint, uint, uint, mad24); BUILTIN_3ARG_INTEGERS(mad_hi); BUILTIN_3ARG_INTEGERS(mad_sat); BUILTIN_2ARG_INTEGERS(max); BUILTIN_2ARG_INTEGERS(min); BUILTIN_2ARG(int, int, int, mul24); BUILTIN_2ARG(uint, uint, uint, mul24); BUILTIN_2ARG_INTEGERS(mul_hi); BUILTIN_1ARG_INTEGERS(popcount); BUILTIN_2ARG_INTEGERS(rhadd); BUILTIN_2ARG_INTEGERS(rotate); BUILTIN_2ARG_INTEGERS(sub_sat); #define UPSAMPLE_SIZES(out, in1, in2) \ out __OVERLOAD__ upsample(in1, in2); \ out##2 __OVERLOAD__ upsample(in1##2, in2##2); \ out##3 __OVERLOAD__ upsample(in1##3, in2##3); \ out##4 __OVERLOAD__ upsample(in1##4, in2##4); \ out##8 __OVERLOAD__ upsample(in1##8, in2##8); \ out##16 __OVERLOAD__ upsample(in1##16, in2##16); #define UPSAMPLE(out, in) \ UPSAMPLE_SIZES(out, in, u##in); \ UPSAMPLE_SIZES(u##out, u##in, u##in); UPSAMPLE(short, char); UPSAMPLE(int, short); UPSAMPLE(long, int); //////////////////// // Math Functions // //////////////////// #define BUILTIN_2TYPE_PTR(type1, type2, name) \ type1 __OVERLOAD__ name(type1, __global type2*); \ type1 __OVERLOAD__ name(type1, __local type2*); \ type1 __OVERLOAD__ name(type1, __private type2*); #define BUILTIN_PTR_ARG(type1, type2, name) \ BUILTIN_2TYPE_PTR(type1, type2, name) \ BUILTIN_2TYPE_PTR(type1##2, type2##2, name) \ BUILTIN_2TYPE_PTR(type1##3, type2##3, name) \ BUILTIN_2TYPE_PTR(type1##4, type2##4, name) \ BUILTIN_2TYPE_PTR(type1##8, type2##8, name) \ BUILTIN_2TYPE_PTR(type1##16, type2##16, name); #define REMQUO(type, addrspace) \ type __OVERLOAD__ remquo(type, type, addrspace int*); \ type##2 __OVERLOAD__ remquo(type##2, type##2, addrspace int2*); \ type##3 __OVERLOAD__ remquo(type##3, type##3, addrspace int3*); \ type##4 __OVERLOAD__ remquo(type##4, type##4, addrspace int4*); \ type##8 __OVERLOAD__ remquo(type##8, type##8, addrspace int8*); \ type##16 __OVERLOAD__ remquo(type##16, type##16, addrspace int16*); BUILTIN_1ARG_FLOATS(acos); BUILTIN_1ARG_FLOATS(acosh); BUILTIN_1ARG_FLOATS(acospi); BUILTIN_1ARG_FLOATS(asin); BUILTIN_1ARG_FLOATS(asinh); BUILTIN_1ARG_FLOATS(asinpi); BUILTIN_1ARG_FLOATS(atan); BUILTIN_2ARG_FLOATS(atan2); BUILTIN_1ARG_FLOATS(atanh); BUILTIN_1ARG_FLOATS(atanpi); BUILTIN_2ARG_FLOATS(atan2pi); BUILTIN_1ARG_FLOATS(cbrt); BUILTIN_1ARG_FLOATS(ceil); BUILTIN_2ARG_FLOATS(copysign); BUILTIN_1ARG_FLOATS(cos); BUILTIN_1ARG_FLOATS(cosh); BUILTIN_1ARG_FLOATS(cospi); BUILTIN_1ARG_FLOATS(erfc); BUILTIN_1ARG_FLOATS(erf); BUILTIN_1ARG_FLOATS(exp); BUILTIN_1ARG_FLOATS(exp2); BUILTIN_1ARG_FLOATS(exp10); BUILTIN_1ARG_FLOATS(expm1); BUILTIN_1ARG_FLOATS(fabs); BUILTIN_2ARG_FLOATS(fdim); BUILTIN_1ARG_FLOATS(floor); BUILTIN_3ARG_FLOATS(fma); BUILTIN_2ARG_FLOATS(fmax); BUILTIN_2ARG_FLOATS(fmin); BUILTIN_2ARG_FLOATS(fmod); BUILTIN_PTR_ARG(float, float, fract); BUILTIN_PTR_ARG(double, double, fract); BUILTIN_PTR_ARG(float, int, frexp); BUILTIN_PTR_ARG(double, int, frexp); BUILTIN_2ARG_FLOATS(hypot); BUILTIN_1ARG(int, float, ilogb); BUILTIN_1ARG(int, double, ilogb); BUILTIN_2ARG(float, float, int, ldexp); BUILTIN_2ARG(double, double, int, ldexp); BUILTIN_1ARG_FLOATS(lgamma); BUILTIN_PTR_ARG(float, int, lgamma_r); BUILTIN_PTR_ARG(double, int, lgamma_r); BUILTIN_1ARG_FLOATS(log); BUILTIN_1ARG_FLOATS(log2); BUILTIN_1ARG_FLOATS(log10); BUILTIN_1ARG_FLOATS(log1p); BUILTIN_1ARG_FLOATS(logb); BUILTIN_3ARG_FLOATS(mad); BUILTIN_2ARG_FLOATS(maxmag); BUILTIN_2ARG_FLOATS(minmag); BUILTIN_PTR_ARG(float, float, modf); BUILTIN_PTR_ARG(double, double, modf); BUILTIN_1ARG(float, uint, nan); BUILTIN_1ARG(double, ulong, nan); BUILTIN_2ARG_FLOATS(nextafter); BUILTIN_2ARG_FLOATS(pow); BUILTIN_2ARG(float, float, int, pown); BUILTIN_2ARG(double, double, int, pown); BUILTIN_2ARG_FLOATS(powr); BUILTIN_2ARG_FLOATS(remainder); REMQUO(float, global); REMQUO(float, local); REMQUO(float, private); REMQUO(double, global); REMQUO(double, local); REMQUO(double, private); BUILTIN_1ARG_FLOATS(rint); BUILTIN_2ARG(float, float, int, rootn); BUILTIN_2ARG(double, double, int, rootn); BUILTIN_1ARG_FLOATS(round); BUILTIN_1ARG_FLOATS(rsqrt); BUILTIN_1ARG_FLOATS(sin); BUILTIN_1ARG_FLOATS(sinpi); BUILTIN_1ARG_FLOATS(sinh); BUILTIN_PTR_ARG(float, float, sincos); BUILTIN_PTR_ARG(double, double, sincos); BUILTIN_1ARG_FLOATS(sqrt); BUILTIN_1ARG_FLOATS(tan); BUILTIN_1ARG_FLOATS(tanh); BUILTIN_1ARG_FLOATS(tanpi); BUILTIN_1ARG_FLOATS(tgamma); BUILTIN_1ARG_FLOATS(trunc); // Native math functions BUILTIN_1ARG_FLOATS(half_cos); BUILTIN_1ARG_FLOATS(native_cos); BUILTIN_2ARG_FLOATS(half_divide); BUILTIN_2ARG_FLOATS(native_divide); BUILTIN_1ARG_FLOATS(half_exp); BUILTIN_1ARG_FLOATS(native_exp); BUILTIN_1ARG_FLOATS(half_exp2); BUILTIN_1ARG_FLOATS(native_exp2); BUILTIN_1ARG_FLOATS(half_exp10); BUILTIN_1ARG_FLOATS(native_exp10); BUILTIN_1ARG_FLOATS(half_log); BUILTIN_1ARG_FLOATS(native_log); BUILTIN_1ARG_FLOATS(half_log2); BUILTIN_1ARG_FLOATS(native_log2); BUILTIN_1ARG_FLOATS(half_log10); BUILTIN_1ARG_FLOATS(native_log10); BUILTIN_2ARG_FLOATS(half_powr); BUILTIN_2ARG_FLOATS(native_powr); BUILTIN_1ARG_FLOATS(half_recip); BUILTIN_1ARG_FLOATS(native_recip); BUILTIN_1ARG_FLOATS(half_rsqrt); BUILTIN_1ARG_FLOATS(native_rsqrt); BUILTIN_1ARG_FLOATS(half_sin); BUILTIN_1ARG_FLOATS(native_sin); BUILTIN_1ARG_FLOATS(half_sqrt); BUILTIN_1ARG_FLOATS(native_sqrt); BUILTIN_1ARG_FLOATS(half_tan); BUILTIN_1ARG_FLOATS(native_tan); //////////////////////////// // Misc. Vector Functions // //////////////////////////// #define SHUFFLE_TYPE(ret, type, mask) \ ret __OVERLOAD__ shuffle(type, mask); \ ret##2 __OVERLOAD__ shuffle(type, mask##2); \ ret##3 __OVERLOAD__ shuffle(type, mask##3); \ ret##4 __OVERLOAD__ shuffle(type, mask##4); \ ret##8 __OVERLOAD__ shuffle(type, mask##8); \ ret##16 __OVERLOAD__ shuffle(type, mask##16); #define SHUFFLE(type, mask) \ SHUFFLE_TYPE(type, type, mask); \ SHUFFLE_TYPE(type, type##2, mask); \ SHUFFLE_TYPE(type, type##3, mask); \ SHUFFLE_TYPE(type, type##4, mask); \ SHUFFLE_TYPE(type, type##8, mask); \ SHUFFLE_TYPE(type, type##16, mask); SHUFFLE(char, uchar); SHUFFLE(uchar, uchar); SHUFFLE(short, ushort); SHUFFLE(ushort, ushort); SHUFFLE(int, uint); SHUFFLE(uint, uint); SHUFFLE(long, ulong); SHUFFLE(ulong, ulong); SHUFFLE(float, uint); SHUFFLE(double, ulong); #define SHUFFLE2_TYPE(ret, type, mask) \ ret __OVERLOAD__ shuffle2(type, type, mask); \ ret##2 __OVERLOAD__ shuffle2(type, type, mask##2); \ ret##3 __OVERLOAD__ shuffle2(type, type, mask##3); \ ret##4 __OVERLOAD__ shuffle2(type, type, mask##4); \ ret##8 __OVERLOAD__ shuffle2(type, type, mask##8); \ ret##16 __OVERLOAD__ shuffle2(type, type, mask##16); #define SHUFFLE2(type, mask) \ SHUFFLE2_TYPE(type, type, mask); \ SHUFFLE2_TYPE(type, type##2, mask); \ SHUFFLE2_TYPE(type, type##3, mask); \ SHUFFLE2_TYPE(type, type##4, mask); \ SHUFFLE2_TYPE(type, type##8, mask); \ SHUFFLE2_TYPE(type, type##16, mask); SHUFFLE2(char, uchar); SHUFFLE2(uchar, uchar); SHUFFLE2(short, ushort); SHUFFLE2(ushort, ushort); SHUFFLE2(int, uint); SHUFFLE2(uint, uint); SHUFFLE2(long, ulong); SHUFFLE2(ulong, ulong); SHUFFLE2(float, uint); SHUFFLE2(double, ulong); ////////////////////////// // Relational Functions // ////////////////////////// #define BUILTIN_ANYALL(name, type) \ int __OVERLOAD__ name(type); \ int __OVERLOAD__ name(type##2); \ int __OVERLOAD__ name(type##3); \ int __OVERLOAD__ name(type##4); \ int __OVERLOAD__ name(type##8); \ int __OVERLOAD__ name(type##16); #define REL_1ARG(name) \ BUILTIN_1ARG(int, float, name); \ BUILTIN_1ARG(long, double, name); #define REL_2ARG(name) \ BUILTIN_2ARG(int, float, float, name); \ BUILTIN_2ARG(long, double, double, name); BUILTIN_ANYALL(all, char); BUILTIN_ANYALL(all, short); BUILTIN_ANYALL(all, int); BUILTIN_ANYALL(all, long); BUILTIN_ANYALL(any, char); BUILTIN_ANYALL(any, short); BUILTIN_ANYALL(any, int); BUILTIN_ANYALL(any, long); BUILTIN_3ARG_FLOATS(bitselect); BUILTIN_3ARG_INTEGERS(bitselect); REL_2ARG(isequal); REL_2ARG(isnotequal); REL_2ARG(isgreater); REL_2ARG(isgreaterequal); REL_2ARG(isless); REL_2ARG(islessequal); REL_2ARG(islessgreater); REL_1ARG(isfinite); REL_1ARG(isinf); REL_1ARG(isnan); REL_1ARG(isnormal); REL_2ARG(isordered); REL_2ARG(isunordered); REL_1ARG(signbit); #define SELECT_TYPE(type, ctype) \ type __OVERLOAD__ select(type, type, ctype); \ type __OVERLOAD__ select(type, type, u##ctype); #define SELECT(type, ctype) \ SELECT_TYPE(type, ctype) \ SELECT_TYPE(type##2, ctype##2) \ SELECT_TYPE(type##3, ctype##3) \ SELECT_TYPE(type##4, ctype##4) \ SELECT_TYPE(type##8, ctype##8) \ SELECT_TYPE(type##16, ctype##16); SELECT(char, char); SELECT(uchar, char); SELECT(short, short); SELECT(ushort, short); SELECT(int, int); SELECT(uint, int); SELECT(long, long); SELECT(ulong, long); SELECT(float, int); SELECT(double, long); /////////////////////////////// // Synchronization Functions // /////////////////////////////// typedef uint cl_mem_fence_flags; #define CLK_LOCAL_MEM_FENCE (1<<0) #define CLK_GLOBAL_MEM_FENCE (1<<1) void barrier(cl_mem_fence_flags); void mem_fence(cl_mem_fence_flags); void read_mem_fence(cl_mem_fence_flags); void write_mem_fence(cl_mem_fence_flags); ////////////////////////////////////////// // Vector Data Load and Store Functions // ////////////////////////////////////////// #define VLOAD_ADDRSPACE(type, width) \ type##width __OVERLOAD__ vload##width(size_t, const __private type*); \ type##width __OVERLOAD__ vload##width(size_t, const __local type*); \ type##width __OVERLOAD__ vload##width(size_t, const __global type*); \ type##width __OVERLOAD__ vload##width(size_t, const __constant type*); #define VSTORE_ADDRSPACE(type, width) \ void __OVERLOAD__ vstore##width(type##width, size_t, __local type*); \ void __OVERLOAD__ vstore##width(type##width, size_t, __global type*); \ void __OVERLOAD__ vstore##width(type##width, size_t, __private type*); #define V_ADDRSPACE(macro, type) \ macro(type, 2) \ macro(type, 3) \ macro(type, 4) \ macro(type, 8) \ macro(type, 16); #define VLOADSTORE(type) \ V_ADDRSPACE(VLOAD_ADDRSPACE, type); \ V_ADDRSPACE(VSTORE_ADDRSPACE, type); VLOADSTORE(char); VLOADSTORE(uchar); VLOADSTORE(short); VLOADSTORE(ushort); VLOADSTORE(int); VLOADSTORE(uint); VLOADSTORE(long); VLOADSTORE(ulong); VLOADSTORE(float); VLOADSTORE(double); #define VLOAD_HALF_WIDTH(n) \ float##n __OVERLOAD__ vload_half##n(size_t, const __private half*); \ float##n __OVERLOAD__ vloada_half##n(size_t, const __private half*); \ float##n __OVERLOAD__ vload_half##n(size_t, const __local half*); \ float##n __OVERLOAD__ vloada_half##n(size_t, const __local half*); \ float##n __OVERLOAD__ vload_half##n(size_t, const __global half*); \ float##n __OVERLOAD__ vloada_half##n(size_t, const __global half*); \ float##n __OVERLOAD__ vload_half##n(size_t, const __constant half*); \ float##n __OVERLOAD__ vloada_half##n(size_t, const __constant half*); #define VSTORE_HALF_ADDRSPACE(func, type) \ void __OVERLOAD__ func(type, size_t, const __private half*); \ void __OVERLOAD__ func(type, size_t, const __local half*); \ void __OVERLOAD__ func(type, size_t, const __global half*); \ void __OVERLOAD__ func(type, size_t, const __constant half*); #define VSTORE_HALF_ROUND(func, type) \ VSTORE_HALF_ADDRSPACE(func, type); \ VSTORE_HALF_ADDRSPACE(func##_rte, type); \ VSTORE_HALF_ADDRSPACE(func##_rtz, type); \ VSTORE_HALF_ADDRSPACE(func##_rtp, type); \ VSTORE_HALF_ADDRSPACE(func##_rtn, type); #define VSTORE_HALF_WIDTH(n) \ VSTORE_HALF_ROUND(vstore_half##n, float##n); \ VSTORE_HALF_ROUND(vstorea_half##n, float##n); #define VLOADSTORE_HALF_WIDTH(n) \ VLOAD_HALF_WIDTH(n); \ VSTORE_HALF_WIDTH(n); VLOADSTORE_HALF_WIDTH(); VLOADSTORE_HALF_WIDTH(2); VLOADSTORE_HALF_WIDTH(3); VLOADSTORE_HALF_WIDTH(4); VLOADSTORE_HALF_WIDTH(8); VLOADSTORE_HALF_WIDTH(16); ///////////////////////// // Work-Item Functions // ///////////////////////// size_t get_global_id(uint dim); size_t get_global_size(uint dim); size_t get_global_offset(uint dim); size_t get_group_id(uint dim); size_t get_local_id(uint dim); size_t get_local_size(uint dim); size_t get_num_groups(uint dim); uint get_work_dim(void); ///////////////////// // Other Functions // ///////////////////// int printf(__constant char * restrict, ...); ///////////////// // Conversions // ///////////////// #define as_char( _x ) __builtin_astype( _x, char ) #define as_char2( _x ) __builtin_astype( _x, char2 ) #define as_char3( _x ) __builtin_astype( _x, char3 ) #define as_char4( _x ) __builtin_astype( _x, char4 ) #define as_char8( _x ) __builtin_astype( _x, char8 ) #define as_char16( _x ) __builtin_astype( _x, char16 ) #define as_uchar( _x ) __builtin_astype( _x, uchar ) #define as_uchar2( _x ) __builtin_astype( _x, uchar2 ) #define as_uchar3( _x ) __builtin_astype( _x, uchar3 ) #define as_uchar4( _x ) __builtin_astype( _x, uchar4 ) #define as_uchar8( _x ) __builtin_astype( _x, uchar8 ) #define as_uchar16( _x ) __builtin_astype( _x, uchar16 ) #define as_short( _x ) __builtin_astype( _x, short ) #define as_short2( _x ) __builtin_astype( _x, short2 ) #define as_short3( _x ) __builtin_astype( _x, short3 ) #define as_short4( _x ) __builtin_astype( _x, short4 ) #define as_short8( _x ) __builtin_astype( _x, short8 ) #define as_short16( _x ) __builtin_astype( _x, short16 ) #define as_ushort( _x ) __builtin_astype( _x, ushort ) #define as_ushort2( _x ) __builtin_astype( _x, ushort2 ) #define as_ushort3( _x ) __builtin_astype( _x, ushort3 ) #define as_ushort4( _x ) __builtin_astype( _x, ushort4 ) #define as_ushort8( _x ) __builtin_astype( _x, ushort8 ) #define as_ushort16( _x ) __builtin_astype( _x, ushort16 ) #define as_int( _x ) __builtin_astype( _x, int ) #define as_int2( _x ) __builtin_astype( _x, int2 ) #define as_int3( _x ) __builtin_astype( _x, int3 ) #define as_int4( _x ) __builtin_astype( _x, int4 ) #define as_int8( _x ) __builtin_astype( _x, int8 ) #define as_int16( _x ) __builtin_astype( _x, int16 ) #define as_uint( _x ) __builtin_astype( _x, uint ) #define as_uint2( _x ) __builtin_astype( _x, uint2 ) #define as_uint3( _x ) __builtin_astype( _x, uint3 ) #define as_uint4( _x ) __builtin_astype( _x, uint4 ) #define as_uint8( _x ) __builtin_astype( _x, uint8 ) #define as_uint16( _x ) __builtin_astype( _x, uint16 ) #define as_long( _x ) __builtin_astype( _x, long ) #define as_long2( _x ) __builtin_astype( _x, long2 ) #define as_long3( _x ) __builtin_astype( _x, long3 ) #define as_long4( _x ) __builtin_astype( _x, long4 ) #define as_long8( _x ) __builtin_astype( _x, long8 ) #define as_long16( _x ) __builtin_astype( _x, long16 ) #define as_ulong( _x ) __builtin_astype( _x, ulong ) #define as_ulong2( _x ) __builtin_astype( _x, ulong2 ) #define as_ulong3( _x ) __builtin_astype( _x, ulong3 ) #define as_ulong4( _x ) __builtin_astype( _x, ulong4 ) #define as_ulong8( _x ) __builtin_astype( _x, ulong8 ) #define as_ulong16( _x ) __builtin_astype( _x, ulong16 ) #define as_float( _x ) __builtin_astype( _x, float ) #define as_float2( _x ) __builtin_astype( _x, float2 ) #define as_float3( _x ) __builtin_astype( _x, float3 ) #define as_float4( _x ) __builtin_astype( _x, float4 ) #define as_float8( _x ) __builtin_astype( _x, float8 ) #define as_float16( _x ) __builtin_astype( _x, float16 ) #define as_double( _x ) __builtin_astype( _x, double ) #define as_double2( _x ) __builtin_astype( _x, double2 ) #define as_double3( _x ) __builtin_astype( _x, double3 ) #define as_double4( _x ) __builtin_astype( _x, double4 ) #define as_double8( _x ) __builtin_astype( _x, double8 ) #define as_double16( _x ) __builtin_astype( _x, double16 ) #define as_size_t( _x ) __builtin_astype( _x, size_t ) #define as_ptrdiff_t( _x ) __builtin_astype( _x, ptrdiff_t ) #define as_uintptr_t( _x ) __builtin_astype( _x, uintptr_t ) #define as_intptr_t( _x ) __builtin_astype( _x, intptr_t ) #define CONVERT_TYPE_SIZE(out, in) \ out __OVERLOAD__ convert_##out(in); \ out __OVERLOAD__ convert_##out##_rte(in); \ out __OVERLOAD__ convert_##out##_rtz(in); \ out __OVERLOAD__ convert_##out##_rtp(in); \ out __OVERLOAD__ convert_##out##_rtn(in); \ out __OVERLOAD__ convert_##out##_sat(in); \ out __OVERLOAD__ convert_##out##_sat_rte(in); \ out __OVERLOAD__ convert_##out##_sat_rtz(in); \ out __OVERLOAD__ convert_##out##_sat_rtp(in); \ out __OVERLOAD__ convert_##out##_sat_rtn(in); #define CONVERT_TYPE(out, in) \ CONVERT_TYPE_SIZE(out, in); \ CONVERT_TYPE_SIZE(out##2, in##2); \ CONVERT_TYPE_SIZE(out##3, in##3); \ CONVERT_TYPE_SIZE(out##4, in##4); \ CONVERT_TYPE_SIZE(out##8, in##8); \ CONVERT_TYPE_SIZE(out##16, in##16); #define CONVERT(out) \ CONVERT_TYPE(out, char); \ CONVERT_TYPE(out, uchar); \ CONVERT_TYPE(out, short); \ CONVERT_TYPE(out, ushort); \ CONVERT_TYPE(out, int); \ CONVERT_TYPE(out, uint); \ CONVERT_TYPE(out, long); \ CONVERT_TYPE(out, ulong); \ CONVERT_TYPE(out, float); \ CONVERT_TYPE(out, double); CONVERT(char); CONVERT(uchar); CONVERT(short); CONVERT(ushort); CONVERT(int); CONVERT(uint); CONVERT(long); CONVERT(ulong); CONVERT(float); CONVERT(double); Oclgrind-15.5/src/core/common.cpp000066400000000000000000000434231252441671000167300ustar00rootroot00000000000000// common.cpp (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" #if defined(_WIN32) && !defined(__MINGW32__) #include #else #include #endif #include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" #include "llvm/Support/raw_os_ostream.h" using namespace oclgrind; using namespace std; namespace oclgrind { _Size3_::_Size3_() { x = y = z = 0; } _Size3_::_Size3_(size_t _x, size_t _y, size_t _z) { x = _x; y = _y; z = _z; } _Size3_::_Size3_(size_t linear, _Size3_ dimensions) { x = linear % dimensions.x; y = (linear / dimensions.x) % dimensions.y; z = (linear / (dimensions.x * dimensions.y)); } size_t& Size3::operator[](unsigned i) { switch (i) { case 0: return x; case 1: return y; case 2: return z; default: assert(false && "Size3 index out of range"); } } const size_t& Size3::operator[](unsigned i) const { switch (i) { case 0: return x; case 1: return y; case 2: return z; default: assert(false && "Size3 index out of range"); } } bool Size3::operator==(const Size3& rhs) const { return x == rhs.x && y == rhs.y && z == rhs.z; } ostream& operator<<(ostream& stream, const Size3& size) { stream << dec << "(" << size.x << "," << size.y << "," << size.z << ")"; return stream; } double TypedValue::getFloat(unsigned index) const { switch (size) { case 4: return ((float*)data)[index]; case 8: return ((double*)data)[index]; default: FATAL_ERROR("Unsupported float size: %u bytes", size); } } size_t TypedValue::getPointer(unsigned index) const { if (size != sizeof(size_t)) { FATAL_ERROR("Unsupported pointer size: %u bytes", size); } return ((size_t*)data)[index]; } int64_t TypedValue::getSInt(unsigned index) const { switch (size) { case 1: return ((int8_t*)data)[index]; case 2: return ((int16_t*)data)[index]; case 4: return ((int32_t*)data)[index]; case 8: return ((int64_t*)data)[index]; default: FATAL_ERROR("Unsupported signed int size: %u bytes", size); } } uint64_t TypedValue::getUInt(unsigned index) const { switch (size) { case 1: return ((uint8_t*)data)[index]; case 2: return ((uint16_t*)data)[index]; case 4: return ((uint32_t*)data)[index]; case 8: return ((uint64_t*)data)[index]; default: FATAL_ERROR("Unsupported unsigned int size: %u bytes", size); } } void TypedValue::setFloat(double value, unsigned index) { switch (size) { case 4: ((float*)data)[index] = value; break; case 8: ((double*)data)[index] = value; break; default: FATAL_ERROR("Unsupported float size: %u bytes", size); } } void TypedValue::setPointer(size_t value, unsigned index) { if (size != sizeof(size_t)) { FATAL_ERROR("Unsupported pointer size: %u bytes", size); } ((size_t*)data)[index] = value; } void TypedValue::setSInt(int64_t value, unsigned index) { switch (size) { case 1: ((int8_t*)data)[index] = value; break; case 2: ((int16_t*)data)[index] = value; break; case 4: ((int32_t*)data)[index] = value; break; case 8: ((int64_t*)data)[index] = value; break; default: FATAL_ERROR("Unsupported signed int size: %u bytes", size); } } void TypedValue::setUInt(uint64_t value, unsigned index) { switch (size) { case 1: ((uint8_t*)data)[index] = value; break; case 2: ((uint16_t*)data)[index] = value; break; case 4: ((uint32_t*)data)[index] = value; break; case 8: ((uint64_t*)data)[index] = value; break; default: FATAL_ERROR("Unsupported unsigned int size: %u bytes", size); } } TypedValue TypedValue::clone() const { TypedValue result; result.size = size; result.num = num; result.data = new unsigned char[size*num]; memcpy(result.data, data, size*num); return result; } bool checkEnv(const char *var) { const char *value = getenv(var); return (value && !strcmp(value, "1")); } void dumpInstruction(ostream& out, const llvm::Instruction *instruction) { llvm::raw_os_ostream stream(out); instruction->print(stream); } const char* getAddressSpaceName(unsigned addrSpace) { switch (addrSpace) { case AddrSpacePrivate: return "private"; case AddrSpaceGlobal: return "global"; case AddrSpaceConstant: return "constant"; case AddrSpaceLocal: return "local"; default: return "(unknown)"; } } void getConstantData(unsigned char *data, const llvm::Constant *constant) { if (constant->getValueID() == llvm::Value::UndefValueVal) { return; } const llvm::Type *type = constant->getType(); unsigned size = getTypeSize(type); switch (type->getTypeID()) { case llvm::Type::IntegerTyID: memcpy(data, ((llvm::ConstantInt*)constant)->getValue().getRawData(), size); break; case llvm::Type::FloatTyID: { *(float*)data = ((llvm::ConstantFP*)constant)->getValueAPF().convertToFloat(); break; } case llvm::Type::DoubleTyID: { *(double*)data = ((llvm::ConstantFP*)constant)->getValueAPF().convertToDouble(); break; } case llvm::Type::VectorTyID: { unsigned num = type->getVectorNumElements(); const llvm::Type *elemType = type->getVectorElementType(); unsigned elemSize = getTypeSize(elemType); for (unsigned i = 0; i < num; i++) { getConstantData(data + i*elemSize, constant->getAggregateElement(i)); } break; } case llvm::Type::ArrayTyID: { unsigned num = type->getArrayNumElements(); const llvm::Type *elemType = type->getArrayElementType(); unsigned elemSize = getTypeSize(elemType); for (unsigned i = 0; i < num; i++) { getConstantData(data + i*elemSize, constant->getAggregateElement(i)); } break; } case llvm::Type::PointerTyID: { if (constant->getValueID() != llvm::Value::ConstantPointerNullVal) { FATAL_ERROR("Unsupported constant pointer value: %d", constant->getValueID()); } *(size_t*)data = 0; break; } case llvm::Type::StructTyID: { unsigned num = type->getStructNumElements(); for (unsigned i = 0; i < num; i++) { unsigned offset = getStructMemberOffset((const llvm::StructType*)type, i); getConstantData(data + offset, constant->getAggregateElement(i)); } break; } default: FATAL_ERROR("Unsupported constant type: %d", type->getTypeID()); } } const llvm::Instruction* getConstExprAsInstruction( const llvm::ConstantExpr *expr) { // Get operands unsigned numOperands = expr->getNumOperands(); llvm::Value **valueOperands = new llvm::Value*[numOperands]; for (unsigned i = 0; i < numOperands; i++) { valueOperands[i] = expr->getOperand(i); } llvm::ArrayRef operands(valueOperands, numOperands); // Create instruction unsigned opcode = expr->getOpcode(); switch (opcode) { case llvm::Instruction::Trunc: case llvm::Instruction::ZExt: case llvm::Instruction::SExt: case llvm::Instruction::FPTrunc: case llvm::Instruction::FPExt: case llvm::Instruction::UIToFP: case llvm::Instruction::SIToFP: case llvm::Instruction::FPToUI: case llvm::Instruction::FPToSI: case llvm::Instruction::PtrToInt: case llvm::Instruction::IntToPtr: case llvm::Instruction::BitCast: return llvm::CastInst::Create((llvm::Instruction::CastOps)opcode, operands[0], expr->getType()); case llvm::Instruction::Select: return llvm::SelectInst::Create(operands[0], operands[1], operands[2]); case llvm::Instruction::InsertElement: return llvm::InsertElementInst::Create(operands[0], operands[1], operands[2]); case llvm::Instruction::ExtractElement: return llvm::ExtractElementInst::Create(operands[0], operands[1]); case llvm::Instruction::InsertValue: return llvm::InsertValueInst::Create(operands[0], operands[1], expr->getIndices()); case llvm::Instruction::ExtractValue: return llvm::ExtractValueInst::Create(operands[0], expr->getIndices()); case llvm::Instruction::ShuffleVector: return new llvm::ShuffleVectorInst(operands[0], operands[1], operands[2]); case llvm::Instruction::GetElementPtr: if (((const llvm::GEPOperator*)expr)->isInBounds()) { return llvm::GetElementPtrInst::CreateInBounds(operands[0], operands.slice(1)); } else { #if LLVM_VERSION > 36 return llvm::GetElementPtrInst::Create(expr->getType(), operands[0], operands.slice(1)); #else return llvm::GetElementPtrInst::Create(operands[0], operands.slice(1)); #endif } case llvm::Instruction::ICmp: case llvm::Instruction::FCmp: return llvm::CmpInst::Create((llvm::Instruction::OtherOps)opcode, expr->getPredicate(), operands[0], operands[1]); default: assert(expr->getNumOperands() == 2 && "Must be binary operator?"); llvm::BinaryOperator *binaryOp = llvm::BinaryOperator::Create((llvm::Instruction::BinaryOps)opcode, operands[0], operands[1]); // Check for overflowing operator if (opcode == llvm::Instruction::Add || opcode == llvm::Instruction::Mul || opcode == llvm::Instruction::Shl || opcode == llvm::Instruction::Sub) { binaryOp->setHasNoUnsignedWrap( expr->getRawSubclassOptionalData() & llvm::OverflowingBinaryOperator::NoUnsignedWrap); binaryOp->setHasNoSignedWrap( expr->getRawSubclassOptionalData() & llvm::OverflowingBinaryOperator::NoSignedWrap); } // Check for possibly exact operator if (opcode == llvm::Instruction::AShr || opcode == llvm::Instruction::LShr || opcode == llvm::Instruction::SDiv || opcode == llvm::Instruction::UDiv) { binaryOp->setIsExact(expr->getRawSubclassOptionalData() & llvm::PossiblyExactOperator::IsExact); } return binaryOp; } } const llvm::ConstantInt* getMDOpAsConstInt(const llvm::MDOperand& op) { llvm::Metadata *md = op.get(); llvm::ConstantAsMetadata *cam = llvm::dyn_cast(md); if (!cam) return NULL; return llvm::dyn_cast(cam->getValue()); } unsigned getStructMemberOffset(const llvm::StructType *type, unsigned index) { bool packed = ((llvm::StructType*)type)->isPacked(); unsigned offset = 0; for (unsigned i = 0; i <= index; i++) { // Get member size and alignment const llvm::Type *elemType = type->getStructElementType(i); unsigned size = getTypeSize(elemType); unsigned align = getTypeAlignment(elemType); // Add padding if necessary if (!packed && offset % align) { offset += (align - (offset%align)); } if (i == index) { return offset; } offset += size; } // Unreachable assert(false); } unsigned getTypeSize(const llvm::Type *type) { if (type->isArrayTy()) { unsigned num = type->getArrayNumElements(); unsigned sz = getTypeSize(type->getArrayElementType()); return num*sz; } else if (type->isStructTy()) { bool packed = ((llvm::StructType*)type)->isPacked(); unsigned size = 0; unsigned alignment = 1; for (unsigned i = 0; i < type->getStructNumElements(); i++) { // Get member size and alignment const llvm::Type *elemType = type->getStructElementType(i); unsigned sz = getTypeSize(elemType); unsigned align = getTypeAlignment(elemType); // Add padding if necessary if (!packed && size % align) { size += (align - (size%align)); } size += sz; alignment = max(alignment, align); } // Alignment of struct should match member with largest alignment if (!packed && size % alignment) { size += (alignment - (size%alignment)); } return size; } else if (type->isVectorTy()) { unsigned num = type->getVectorNumElements(); unsigned sz = getTypeSize(type->getVectorElementType()); if (num == 3) num = 4; // Hack for 3-element vectors return num*sz; } else if (type->isPointerTy()) { return sizeof(size_t); } else { // For some reason, getScalarSizeInBits is not const llvm::Type* nonConstTy = const_cast(type); // Round up for types that have a bit size not multiple of 8 // like "bool". unsigned ret = nonConstTy->getScalarSizeInBits() / 8; if (nonConstTy->getScalarSizeInBits() % 8) ret++; return ret; } } /// Returns the byte alignment of this type unsigned getTypeAlignment(const llvm::Type* type) { using namespace llvm; // Array types are aligned to their element type if (const ArrayType* psAT = dyn_cast(type)) { return getTypeAlignment(psAT->getElementType()); } // Struct alignment is the size of its largest contained type if (const StructType* structT = dyn_cast(type)) { if (structT->isPacked()) return 1; StructType* nonConstTy = const_cast(structT); unsigned uAlign = 0, uMaxAlign = 1; unsigned uCount = structT->getNumElements(); for (unsigned i = 0; i < uCount; i++) { const Type* psElemType = nonConstTy->getTypeAtIndex(i); uAlign = getTypeAlignment(psElemType); if (uAlign > uMaxAlign) uMaxAlign = uAlign; } return uMaxAlign; } return getTypeSize(type); } pair getValueSize(const llvm::Value *value) { unsigned bits, numElements; const llvm::Type *type = value->getType(); if (type->isVectorTy()) { bits = type->getVectorElementType()->getPrimitiveSizeInBits(); numElements = type->getVectorNumElements(); } else if (type->isAggregateType()) { bits = getTypeSize(type)<<3; numElements = 1; } else { bits = type->getPrimitiveSizeInBits(); numElements = 1; } unsigned elemSize = bits >> 3; // Special case for pointer types if (type->isPointerTy()) { elemSize = sizeof(size_t); } // Special case for boolean results if (bits == 1) { elemSize = sizeof(bool); } return pair(elemSize,numElements); } bool isConstantOperand(const llvm::Value *operand) { unsigned id = operand->getValueID(); return (id >= llvm::Value::ConstantFirstVal && id <= llvm::Value::ConstantLastVal); } bool isVector3(const llvm::Value *value) { return (value->getType()->isVectorTy() && value->getType()->getVectorNumElements() == 3); } double now() { #if defined(_WIN32) && !defined(__MINGW32__) return time(NULL)*1e9; #else struct timeval tv; gettimeofday(&tv, NULL); return tv.tv_usec*1e3 + tv.tv_sec*1e9; #endif } void printTypedData(const llvm::Type *type, const unsigned char *data) { // TODO: Interpret other types (array, struct) unsigned size = getTypeSize(type); switch (type->getTypeID()) { case llvm::Type::FloatTyID: cout << *(float*)data; break; case llvm::Type::DoubleTyID: cout << *(double*)data; break; case llvm::Type::IntegerTyID: switch (size) { case 1: cout << (int)*(char*)data; break; case 2: cout << *(short*)data; break; case 4: cout << *(int*)data; break; case 8: cout << *(long*)data; break; default: cout << "(invalid integer size)"; break; } break; case llvm::Type::VectorTyID: { const llvm::Type *elemType = type->getVectorElementType(); cout << "("; for (unsigned i = 0; i < type->getVectorNumElements(); i++) { if (i > 0) { cout << ","; } printTypedData(elemType, data+i*getTypeSize(elemType)); } cout << ")"; break; } case llvm::Type::PointerTyID: cout << "0x" << hex << *(size_t*)data; break; default: cout << "(raw) 0x" << hex << uppercase << setfill('0'); for (unsigned i = 0; i < size; i++) { cout << setw(2) << (int)data[i]; } } } FatalError::FatalError(const string& msg, const string& file, size_t line) : std::runtime_error(msg) { m_file = file; m_line = line; } FatalError::~FatalError() throw() { } const string& FatalError::getFile() const { return m_file; } size_t FatalError::getLine() const { return m_line; } const char* FatalError::what() const throw() { return runtime_error::what(); } } Oclgrind-15.5/src/core/common.h000066400000000000000000000124171252441671000163740ustar00rootroot00000000000000// common.h (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #ifndef __common_h_ #define __common_h_ #include "config.h" #include "CL/cl.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define BIG_SEPARATOR "================================" #define SMALL_SEPARATOR "--------------------------------" #if defined(_WIN32) && !defined(__MINGW32__) #define snprintf _snprintf #undef ERROR #endif namespace llvm { class Constant; class ConstantExpr; class ConstantInt; class Instruction; class MDOperand; class StructType; class Type; class Value; } namespace oclgrind { class Kernel; // Enumeration for address spaces enum AddressSpace { AddrSpacePrivate = 0, AddrSpaceGlobal = 1, AddrSpaceConstant = 2, AddrSpaceLocal = 3, }; enum AtomicOp { AtomicAdd, AtomicAnd, AtomicCmpXchg, AtomicDec, AtomicInc, AtomicMax, AtomicMin, AtomicOr, AtomicSub, AtomicXchg, AtomicXor, }; // Enumeration for different log message types enum MessageType { DEBUG, INFO, WARNING, ERROR, }; // 3-dimensional size typedef struct _Size3_ { size_t x, y, z; _Size3_(); _Size3_(size_t x, size_t y, size_t z); _Size3_(size_t linear, _Size3_ dimensions); size_t& operator[](unsigned i); const size_t& operator[](unsigned i) const; bool operator==(const _Size3_& rhs) const; friend std::ostream& operator<<(std::ostream& stream, const _Size3_& sz); } Size3; // Structure for a value with a size/type struct _TypedValue_ { unsigned size; unsigned num; unsigned char *data; struct _TypedValue_ clone() const; double getFloat(unsigned index = 0) const; size_t getPointer(unsigned index = 0) const; int64_t getSInt(unsigned index = 0) const; uint64_t getUInt(unsigned index = 0) const; void setFloat(double value, unsigned index = 0); void setPointer(size_t value, unsigned index = 0); void setSInt(int64_t value, unsigned index = 0); void setUInt(uint64_t value, unsigned index = 0); }; typedef _TypedValue_ TypedValue; // Private memory map type typedef std::map TypedValueMap; // Image object typedef struct { size_t address; cl_image_format format; cl_image_desc desc; } Image; // Check if an environment variable is set to 1 bool checkEnv(const char *var); // Output an instruction in human-readable format void dumpInstruction(std::ostream& out, const llvm::Instruction *instruction); // Get the human readable name of an address space const char* getAddressSpaceName(unsigned addrSpace); // Retrieve the raw data for a constant void getConstantData(unsigned char *data, const llvm::Constant *constant); // Creates an instruction from a constant expression const llvm::Instruction* getConstExprAsInstruction( const llvm::ConstantExpr *expr); // Get the ConstantInt object for an MDOperand const llvm::ConstantInt* getMDOpAsConstInt(const llvm::MDOperand& op); // Get the byte offset of a struct member unsigned getStructMemberOffset(const llvm::StructType *type, unsigned index); // Returns the size of a type unsigned getTypeSize(const llvm::Type *type); /// Returns the alignment requirements of this type unsigned getTypeAlignment(const llvm::Type* type); // Returns the size of a value std::pair getValueSize(const llvm::Value *value); // Returns true if the operand is a constant value bool isConstantOperand(const llvm::Value *operand); // Returns true if the value is a 3-element vector bool isVector3(const llvm::Value *value); // Return the current time in nanoseconds since the epoch double now(); // Print data in a human readable format (according to its type) void printTypedData(const llvm::Type *type, const unsigned char *data); // Exception class for raising fatal errors class FatalError : std::runtime_error { public: FatalError(const std::string& msg, const std::string& file, size_t line); ~FatalError() throw(); virtual const std::string& getFile() const; virtual size_t getLine() const; virtual const char* what() const throw(); protected: std::string m_file; size_t m_line; }; // Utility macro for raising an exception with a sprintf-based message #define FATAL_ERROR(format, ...) \ { \ int sz = snprintf(NULL, 0, format, ##__VA_ARGS__); \ char *str = new char[sz+1]; \ sprintf(str, format, ##__VA_ARGS__); \ string msg = str; \ delete[] str; \ throw FatalError(msg, __FILE__, __LINE__); \ } } #endif // __common_h_ Oclgrind-15.5/src/core/gen_clc_h.cmake000066400000000000000000000005471252441671000176370ustar00rootroot00000000000000set(OUTPUT src/core/clc_h.cpp) file(WRITE ${OUTPUT} "extern const char CLC_H_DATA[] = \n\"") file(READ ${SOURCE_FILE} CLC_H) string(REGEX REPLACE "\\\\" "\\\\\\\\" CLC_H "${CLC_H}") string(REGEX REPLACE "\"" "\\\\\"" CLC_H "${CLC_H}") string(REGEX REPLACE "\n" "\\\\n\"\n\"" CLC_H "${CLC_H}") file(APPEND ${OUTPUT} "${CLC_H}") file(APPEND ${OUTPUT} "\";") Oclgrind-15.5/src/core/gen_clc_h.sh000077500000000000000000000004011252441671000171610ustar00rootroot00000000000000#!/bin/bash if [ $# -ne 2 ] then echo "Usage: gen_clc_h.sh INPUT OUTPUT" exit 1 fi IN=$1 OUT=$2 echo "extern const char CLC_H_DATA[] =" >$OUT sed -e 's/\\/\\\\/g;s/"/\\"/g;s/^/"/;s/$/\\n"/' $IN >>$OUT if [ $? -ne 0 ] then exit 1 fi echo ";" >>$OUT Oclgrind-15.5/src/core/half.h000066400000000000000000000066401252441671000160170ustar00rootroot00000000000000// half.h (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "common.h" static float halfToFloat(uint16_t half) { uint16_t h_sign, h_exponent, h_mantissa; uint32_t f_sign, f_exponent, f_mantissa; h_sign = half & 0x8000; // 1000 0000 0000 0000 h_exponent = half & 0x7C00; // 0111 1100 0000 0000 h_mantissa = half & 0x03FF; // 0000 0011 1111 1111 f_sign = ((uint32_t)h_sign) << 16; if (h_exponent == 0) { if (h_mantissa == 0) { // Zero f_exponent = 0; f_mantissa = 0; } else { // Denorm - convert to normalized float int e = -1; do { e++; h_mantissa <<= 1; } while((h_mantissa & 0x0400) == 0); f_exponent = (-15 + 127 - e) << 23; f_mantissa = ((uint32_t)(h_mantissa & 0x03FF)) << 13; } } else if (h_exponent == 0x7C00) { // Inf or NaN f_exponent = 0xFF << 23; f_mantissa = h_mantissa; } else { // Normalized f_exponent = (((int32_t)(h_exponent >> 10)) - 15 + 127) << 23; f_mantissa = ((uint32_t)h_mantissa) << 13; } uint32_t result = f_sign | f_exponent | f_mantissa; return *(float*)&result; } enum HalfRoundMode { // Towards negative infinity Half_RTN, // Towards zero Half_RTZ, // Towards positive infinity Half_RTP, // Towards nearest even Half_RTE }; static uint16_t floatToHalf(float sp, HalfRoundMode round = Half_RTZ) { uint16_t h_sign, h_exponent, h_mantissa; uint32_t f_sign, f_exponent, f_mantissa; union { float f; uint32_t ui; } FtoUI; FtoUI.f = sp; uint32_t f = FtoUI.ui; f_sign = f & 0x80000000; // 1000 0000 0000 0000 0000 0000 0000 0000 f_exponent = f & 0x7F800000; // 0111 1111 1000 0000 0000 0000 0000 0000 f_mantissa = f & 0x007FFFFF; // 0000 0000 0111 1111 1111 1111 1111 1111 h_sign = f_sign >> 16; if (f_exponent == 0) { // Zero h_exponent = 0; h_mantissa = 0; } else if (f_exponent == 0x7F800000) { // Inf or NaN h_exponent = 0x7C00; h_mantissa = f_mantissa; } else { int e = (((int32_t)(f_exponent >> 23)) - 127 + 15); if (e >= 0x1F) { // Value will overflow h_exponent = 0x7C00; h_mantissa = 0; } else if (e <= 0) { // Value will underflow h_exponent = 0; if (14 - e > 24) { // Too small - flush to zero h_mantissa = 0; } else { // Convert to denorm f_mantissa |= 0x800000; h_mantissa = (f_mantissa >> (14-e)); if ((f_mantissa >> (13 - e)) & 0x1) { h_mantissa += 0x1; } } } else { // Normalized h_exponent = e << 10; h_mantissa = f_mantissa >> 13; // The current f_mantissa is done in RTZ if (round == Half_RTE && (f & 0x00001000) != 0) { if ((f & 0x00002FFF) != 0) h_mantissa += 1; } else if (round == Half_RTP) { FtoUI.ui &= 0xFFFFE000; if (FtoUI.f < sp) h_mantissa += 1; } else if (round == Half_RTN) { FtoUI.ui &= 0xFFFFE000; if (sp < FtoUI.f) h_mantissa += 1; } } } return h_sign + h_exponent + h_mantissa; } Oclgrind-15.5/src/install/000077500000000000000000000000001252441671000154445ustar00rootroot00000000000000Oclgrind-15.5/src/install/INSTALL.darwin000066400000000000000000000014061252441671000177610ustar00rootroot00000000000000To install Oclgrind, simply copy the bin, lib and include directories to (for example) /usr/local/, ensuring that file modification times are preserved. The easiest way to do this is with the following command: sudo cp -rp {bin,lib,include} /usr/local Alternatively, Oclgrind can be used from a non-system directory. To do so, add $OCLGRIND_ROOT/bin to your PATH environment variable, and $OCLGRIND_ROOT/lib to your DYLD_LIBRARY_PATH environment variable (where $OCLGRIND_ROOT is the directory containing this file). If copying Oclgrind to a new location, ensure that the -p flag is passed to cp, to ensure that file modification times are preserved. Information about using Oclgrind can be found on the GitHub wiki page: http://github.com/jrprice/Oclgrind/wiki Oclgrind-15.5/src/install/INSTALL.linux000066400000000000000000000015471252441671000176420ustar00rootroot00000000000000To install Oclgrind, simply copy the bin, lib and include directories to (for example) /usr/local/, ensuring that file modification times are preserved. The easiest way to do this is with the following command: sudo cp -rp {bin,lib,include} /usr/local Alternatively, Oclgrind can be used from a non-system directory. To do so, add $OCLGRIND_ROOT/bin to your PATH environment variable, and $OCLGRIND_ROOT/lib to your LD_LIBRARY_PATH environment variable (where $OCLGRIND_ROOT is the directory containing this file). If copying Oclgrind to a new location, ensure that the -p flag is passed to cp, to ensure that file modification times are preserved. To use Oclgrind with the OpenCL ICD loader (optional), copy oclgrind.icd to /etc/OpenCL/vendors/. Information about using Oclgrind can be found on the GitHub wiki page: http://github.com/jrprice/Oclgrind/wiki Oclgrind-15.5/src/install/INSTALL.windows000066400000000000000000000006521252441671000201710ustar00rootroot00000000000000To install Oclgrind, run 'install.bat' as an Administrator. This will install Oclgrind to 'C:\Program Files\Oclgrind' and create a registry entry for the OpenCL ICD loader. Oclgrind can be uninstalled by running 'uninstall.bat' as an Administrator. Alternatively, Oclgrind can be run from any other directory. You will need to manually create OpenCL ICD loading points by editing the registry (see oclgrind-icd.reg). Oclgrind-15.5/src/install/install.bat000066400000000000000000000010271252441671000176020ustar00rootroot00000000000000@ECHO OFF cd %~dp0 set "ROOT=%programfiles%\Oclgrind" mkdir "%ROOT%" || goto :error xcopy include "%ROOT%\include" /S /Y /I || goto :error xcopy x86 "%ROOT%\x86" /S /Y /I || goto :error xcopy x64 "%ROOT%\x64" /S /Y /I || goto :error xcopy uninstall.bat "%ROOT%\" /Y || goto :error regedit /S oclgrind-icd.reg || goto :error goto :EOF :error echo INSTALLATION FAILED echo Did you run as Administrator? pause Oclgrind-15.5/src/install/oclgrind-icd.reg000066400000000000000000000020221252441671000204750ustar00rootroot00000000000000ÿþWindows Registry Editor Version 5.00 [HKEY_LOCAL_MACHINE\SOFTWARE\Khronos] [HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL] [HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors] "C:\\Program Files\\Oclgrind\\x64\\lib\\oclgrind-rt-icd.dll"=dword:00000000 [HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Khronos] [HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Khronos\OpenCL] [HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Khronos\OpenCL\Vendors] "C:\\Program Files\\Oclgrind\\x86\\lib\\oclgrind-rt-icd.dll"=dword:00000000 Oclgrind-15.5/src/install/uninstall.bat000066400000000000000000000000721252441671000201440ustar00rootroot00000000000000start /B "" cmd /C rmdir "%programfiles%\Oclgrind" /S /Q Oclgrind-15.5/src/kernel/000077500000000000000000000000001252441671000152565ustar00rootroot00000000000000Oclgrind-15.5/src/kernel/Simulation.cpp000066400000000000000000000442101252441671000201070ustar00rootroot00000000000000// Simulation.cpp (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "config.h" #include #include #include #include #include "core/Context.h" #include "core/Kernel.h" #include "core/KernelInvocation.h" #include "core/Memory.h" #include "core/Program.h" #include "kernel/Simulation.h" using namespace oclgrind; using namespace std; #define PARSING(parsing) m_parsing = parsing; // Convert an integer to char/uchar, checking if the value is valid #define INT_TO_CHAR(intval, result) \ result = intval; \ if (result != intval) \ { \ throw "Invalid char value"; \ } // Utility to read a typed value from a stream template T readValue(istream& stream); Simulation::Simulation() { m_context = new Context(); m_kernel = NULL; m_program = NULL; } Simulation::~Simulation() { delete m_kernel; delete m_program; delete m_context; } template void Simulation::dumpArgument(DumpArg& arg) { size_t num = arg.size / sizeof(T); T *data = new T[num]; m_context->getGlobalMemory()->load((uint8_t*)data, arg.address, arg.size); for (size_t i = 0; i < num; i++) { cout << " " << arg.name << "[" << i << "] = "; if (sizeof(T) == 1) cout << (int)data[i]; else cout << data[i]; cout << endl; } cout << endl; delete[] data; } template void Simulation::get(T& result) { do { // Check if line buffer has content streampos pos = m_lineBuffer.tellg(); string token; m_lineBuffer >> token; if (!m_lineBuffer.fail()) { // Line has content, rewind line buffer m_lineBuffer.clear(); m_lineBuffer.seekg(pos); // Read value from line buffer m_lineBuffer >> result; if (m_lineBuffer.fail()) { throw ifstream::failbit; } return; } // Get next line string line; getline(m_simfile, line); m_lineNumber++; // Remove comments size_t comment = line.find_first_of('#'); if (comment != string::npos) { line = line.substr(0, comment); } // Update line buffer m_lineBuffer.clear(); m_lineBuffer.str(line); } while (m_simfile.good()); // Couldn't read data from file, throw exception throw m_simfile.eof() ? ifstream::eofbit : ifstream::failbit; } bool Simulation::load(const char *filename) { // Open simulator file m_lineNumber = 0; m_lineBuffer.setstate(ios_base::eofbit); m_simfile.open(filename); if (m_simfile.fail()) { cerr << "Unable to open simulator file." << endl; return false; } try { // Read simulation parameters string progFileName; string kernelName; PARSING("program file"); get(progFileName); PARSING("kernel"); get(kernelName); PARSING("NDRange"); get(m_ndrange.x); get(m_ndrange.y); get(m_ndrange.z); PARSING("work-group size"); get(m_wgsize.x); get(m_wgsize.y); get(m_wgsize.z); // Ensure work-group size exactly divides NDRange if (m_ndrange.x % m_wgsize.x || m_ndrange.y % m_wgsize.y || m_ndrange.z % m_wgsize.z) { cerr << "Work group size must divide NDRange exactly." << endl; return false; } // Open program file ifstream progFile; progFile.open(progFileName.c_str(), ios_base::in | ios_base::binary); if (!progFile.good()) { cerr << "Unable to open " << progFileName << endl; return false; } // Check for LLVM bitcode magic numbers char magic[2] = {0,0}; progFile.read(magic, 2); if (magic[0] == 0x42 && magic[1] == 0x43) { // Load bitcode progFile.close(); m_program = Program::createFromBitcodeFile(m_context, progFileName); if (!m_program) { cerr << "Failed to load bitcode from " << progFileName << endl; return false; } } else { // Get size of file progFile.seekg(0, ios_base::end); size_t sz = progFile.tellg(); progFile.seekg(0, ios_base::beg); // Load source char *data = new char[sz + 1]; progFile.read(data, sz+1); progFile.close(); data[sz] = '\0'; m_program = new Program(m_context, data); delete[] data; // Build program if (!m_program->build("")) { cerr << "Build failure:" << endl << m_program->getBuildLog() << endl; return false; } } // Get kernel m_kernel = m_program->createKernel(kernelName); if (!m_kernel) { cerr << "Failed to create kernel " << kernelName << endl; return false; } // Clear global memory Memory *globalMemory = m_context->getGlobalMemory(); globalMemory->clear(); // Parse kernel arguments m_dumpArguments.clear(); for (unsigned index = 0; index < m_kernel->getNumArguments(); index++) { parseArgument(index); } // Make sure there is no more input string next; m_simfile >> next; if (m_simfile.good() || !m_simfile.eof()) { cerr << "Unexpected token '" << next << "' (expected EOF)" << endl; return false; } } catch (const char *err) { cerr << "Line " << m_lineNumber << ": " << err << " (" << m_parsing << ")" << endl; return false; } catch (ifstream::iostate e) { if (e == ifstream::eofbit) { cerr << "Unexpected EOF when parsing " << m_parsing << endl; return false; } else if (e == ifstream::failbit) { cerr << "Line " << m_lineNumber << ": Failed to parse " << m_parsing << endl; return false; } else { throw e; } } return true; } void Simulation::parseArgument(size_t index) { // Argument parsing parameters size_t size = -1; cl_mem_flags flags = 0; ArgDataType type = TYPE_NONE; size_t typeSize = 0; bool null = false; bool dump = false; string fill = ""; string range = ""; string name = m_kernel->getArgumentName(index).str(); // Set meaningful parsing status for error messages ostringstream stringstream; stringstream << "argument " << index << ": " << name; string formatted = stringstream.str(); PARSING(formatted.c_str()); // Get argument info size_t argSize = m_kernel->getArgumentSize(index); unsigned int addrSpace = m_kernel->getArgumentAddressQualifier(index); const llvm::StringRef argType = m_kernel->getArgumentTypeName(index); // Ensure we have an argument header char c; get(c); if (c != '<') { throw "Expected argument header <...>"; } // Get header streampos startpos = m_lineBuffer.tellg(); string headerStr; getline(m_lineBuffer, headerStr); size_t end = headerStr.find_last_of('>'); if (end == string::npos) { throw "Missing '>' at end of argument header"; } headerStr = headerStr.substr(0, end); // Move line buffer to end of header m_lineBuffer.clear(); m_lineBuffer.seekg((int)startpos + headerStr.size() + 1); // Save format flags ios_base::fmtflags previousFormat = m_lineBuffer.flags(); // Parse header istringstream header(headerStr); while (!header.eof()) { // Get next header token string token; header >> token; if (header.fail()) { break; } #define MATCH_TYPE(str, value, sz) \ else if (token == str) \ { \ if (type != TYPE_NONE) \ { \ throw "Argument type defined multiple times"; \ } \ type = value; \ typeSize = sz; \ } // Parse token if (false); MATCH_TYPE("char", TYPE_CHAR, 1) MATCH_TYPE("uchar", TYPE_UCHAR, 1) MATCH_TYPE("short", TYPE_SHORT, 2) MATCH_TYPE("ushort", TYPE_USHORT, 2) MATCH_TYPE("int", TYPE_INT, 4) MATCH_TYPE("uint", TYPE_UINT, 4) MATCH_TYPE("long", TYPE_LONG, 8) MATCH_TYPE("ulong", TYPE_ULONG, 8) MATCH_TYPE("float", TYPE_FLOAT, 4) MATCH_TYPE("double", TYPE_DOUBLE, 8) else if (token.compare(0, 4, "dump") == 0) { dump = true; } else if (token.compare(0, 4, "fill") == 0) { if (token.size() < 6 || token[4] != '=') { throw "Expected =VALUE after 'fill"; } fill = token.substr(5); } else if (token == "hex") { m_lineBuffer.setf(ios_base::hex); m_lineBuffer.unsetf(ios_base::dec | ios_base::oct); } else if (token == "null") { if (addrSpace != CL_KERNEL_ARG_ADDRESS_GLOBAL && addrSpace != CL_KERNEL_ARG_ADDRESS_CONSTANT) { throw "'null' only valid for buffer arguments"; } null = true; } else if (token.compare(0, 5, "range") == 0) { if (token.size() < 7 || token[5] != '=') { throw "Expected =START:INC:END after 'range"; } range = token.substr(6); } else if (token == "ro") { if (flags & CL_MEM_WRITE_ONLY) { throw "'ro' and 'wo' are mutually exclusive"; } if (addrSpace != CL_KERNEL_ARG_ADDRESS_GLOBAL) { throw "'ro' only valid for global memory buffers"; } flags |= CL_MEM_READ_ONLY; } else if (token.compare(0, 4, "size") == 0) { istringstream value(token.substr(4)); char equals = 0; value >> equals; if (equals != '=') { throw "Expected = after 'size'"; } value >> dec >> size; if (value.fail() || !value.eof()) { throw "Invalid value for 'size'"; } } else if (token == "wo") { if (flags & CL_MEM_READ_ONLY) { throw "'ro' and 'wo' are mutually exclusive"; } if (addrSpace != CL_KERNEL_ARG_ADDRESS_GLOBAL) { throw "'wo' only valid for global memory buffers"; } flags |= CL_MEM_WRITE_ONLY; } else { string err = "Unrecognised header token '"; err += token; err += "'"; throw err.c_str(); } } // Ensure size given if (null) { if (size != -1 || !fill.empty() || !range.empty()) { throw "'null' not valid with other argument descriptors"; } size = 0; } else if (size == -1) { throw "size required"; } if (type == TYPE_NONE) { #define MATCH_TYPE_PREFIX(str, value, sz) \ else if (argType.startswith(str)) \ { \ type = value; \ typeSize = sz; \ } // Set default type using kernel introspection if (false); MATCH_TYPE_PREFIX("char", TYPE_CHAR, 1) MATCH_TYPE_PREFIX("uchar", TYPE_UCHAR, 1) MATCH_TYPE_PREFIX("short", TYPE_SHORT, 2) MATCH_TYPE_PREFIX("ushort", TYPE_USHORT, 2) MATCH_TYPE_PREFIX("int", TYPE_INT, 4) MATCH_TYPE_PREFIX("uint", TYPE_UINT, 4) MATCH_TYPE_PREFIX("long", TYPE_LONG, 8) MATCH_TYPE_PREFIX("ulong", TYPE_ULONG, 8) MATCH_TYPE_PREFIX("float", TYPE_FLOAT, 4) MATCH_TYPE_PREFIX("double", TYPE_DOUBLE, 8) MATCH_TYPE_PREFIX("void*", TYPE_UCHAR, 1) else { throw "Invalid default kernel argument type"; } } // Ensure argument data size is a multiple of format type size if (size % typeSize) { throw "Initialiser type must exactly divide argument size"; } // Ensure 'dump' only used with non-null buffers if (dump) { if (addrSpace != CL_KERNEL_ARG_ADDRESS_GLOBAL && addrSpace != CL_KERNEL_ARG_ADDRESS_CONSTANT) { throw "'dump' only valid for memory objects"; } if (null) { throw "'dump' not valid with 'null' specifier"; } } // Generate argument data TypedValue value; value.size = argSize; value.num = 1; if (addrSpace == CL_KERNEL_ARG_ADDRESS_LOCAL) { value.size = size; value.data = NULL; } else if (null) { value.data = new unsigned char[value.size]; memset(value.data, 0, value.size); } else { // Parse argument data unsigned char *data = new unsigned char[size]; if (!fill.empty()) { istringstream fillStream(fill); fillStream.copyfmt(m_lineBuffer); #define FILL_TYPE(type, T) \ case type: \ parseFill(data, size, fillStream); \ break; switch (type) { FILL_TYPE(TYPE_CHAR, int8_t); FILL_TYPE(TYPE_UCHAR, uint8_t); FILL_TYPE(TYPE_SHORT, int16_t); FILL_TYPE(TYPE_USHORT, uint16_t); FILL_TYPE(TYPE_INT, int32_t); FILL_TYPE(TYPE_UINT, uint32_t); FILL_TYPE(TYPE_LONG, int64_t); FILL_TYPE(TYPE_ULONG, uint64_t); FILL_TYPE(TYPE_FLOAT, float); FILL_TYPE(TYPE_DOUBLE, double); default: throw "Invalid argument data type"; } } else if (!range.empty()) { istringstream rangeStream(range); rangeStream.copyfmt(m_lineBuffer); #define RANGE_TYPE(type, T) \ case type: \ parseRange(data, size, rangeStream); \ break; switch (type) { RANGE_TYPE(TYPE_CHAR, int8_t); RANGE_TYPE(TYPE_UCHAR, uint8_t); RANGE_TYPE(TYPE_SHORT, int16_t); RANGE_TYPE(TYPE_USHORT, uint16_t); RANGE_TYPE(TYPE_INT, int32_t); RANGE_TYPE(TYPE_UINT, uint32_t); RANGE_TYPE(TYPE_LONG, int64_t); RANGE_TYPE(TYPE_ULONG, uint64_t); RANGE_TYPE(TYPE_FLOAT, float); RANGE_TYPE(TYPE_DOUBLE, double); default: throw "Invalid argument data type"; } } else if (addrSpace != CL_KERNEL_ARG_ADDRESS_LOCAL) { #define PARSE_TYPE(type, T) \ case type: \ parseArgumentData(data, size); \ break; switch (type) { PARSE_TYPE(TYPE_CHAR, int8_t); PARSE_TYPE(TYPE_UCHAR, uint8_t); PARSE_TYPE(TYPE_SHORT, int16_t); PARSE_TYPE(TYPE_USHORT, uint16_t); PARSE_TYPE(TYPE_INT, int32_t); PARSE_TYPE(TYPE_UINT, uint32_t); PARSE_TYPE(TYPE_LONG, int64_t); PARSE_TYPE(TYPE_ULONG, uint64_t); PARSE_TYPE(TYPE_FLOAT, float); PARSE_TYPE(TYPE_DOUBLE, double); default: throw "Invalid argument data type"; } } if (addrSpace == CL_KERNEL_ARG_ADDRESS_PRIVATE) { value.data = data; } else { // Allocate buffer and store content Memory *globalMemory = m_context->getGlobalMemory(); size_t address = globalMemory->allocateBuffer(size, flags); globalMemory->store((unsigned char*)&data[0], address, size); value.data = new unsigned char[value.size]; value.setPointer(address); delete[] data; if (dump) { DumpArg dump = { address, size, type, name, }; m_dumpArguments.push_back(dump); } } } // Set argument value m_kernel->setArgument(index, value); if (value.data) { delete[] value.data; } // Reset parsing format m_lineBuffer.flags(previousFormat); } template void Simulation::parseArgumentData(unsigned char *result, size_t size) { vector data; for (int i = 0; i < size / sizeof(T); i++) { T value; if (sizeof(T) == 1) { int intval; get(intval); INT_TO_CHAR(intval, value); } else { get(value); } data.push_back(value); } memcpy(result, &data[0], size); } template void Simulation::parseFill(unsigned char *result, size_t size, istringstream& fill) { T value = readValue(fill); for (int i = 0; i < size/sizeof(T); i++) { ((T*)result)[i] = value; } if (fill.fail() || !fill.eof()) { throw "Invalid fill value"; } } template void Simulation::parseRange(unsigned char *result, size_t size, istringstream& range) { // Parse range format T values[3]; for (int i = 0; i < 3; i++) { values[i] = readValue(range); if (i < 2) { char colon = 0; range >> colon; if (range.fail() || colon != ':') { throw "Invalid range format"; } } } if (range.fail() || !range.eof()) { throw "Invalid range format"; } // Ensure range is value double num = (values[2] - values[0] + values[1]) / (double)values[1]; if (ceil(num) != num || num*sizeof(T) != size) { throw "Range doesn't produce correct buffer size"; } // Produce range values T value = values[0]; for (size_t i = 0; i < num; i++) { ((T*)result)[i] = value; value += values[1]; } } void Simulation::run(bool dumpGlobalMemory) { assert(m_kernel && m_program); assert(m_kernel->allArgumentsSet()); Size3 offset(0, 0, 0); KernelInvocation::run(m_context, m_kernel, 3, offset, m_ndrange, m_wgsize); // Dump individual arguments cout << dec; list::iterator itr; for (itr = m_dumpArguments.begin(); itr != m_dumpArguments.end(); itr++) { cout << endl << "Argument '" << itr->name << "': " << itr->size << " bytes" << endl; #define DUMP_TYPE(type, T) \ case type: \ dumpArgument(*itr); \ break; switch (itr->type) { DUMP_TYPE(TYPE_CHAR, char); DUMP_TYPE(TYPE_UCHAR, uint8_t); DUMP_TYPE(TYPE_SHORT, int16_t); DUMP_TYPE(TYPE_USHORT, uint16_t); DUMP_TYPE(TYPE_INT, int32_t); DUMP_TYPE(TYPE_UINT, uint32_t); DUMP_TYPE(TYPE_LONG, int64_t); DUMP_TYPE(TYPE_ULONG, uint64_t); DUMP_TYPE(TYPE_FLOAT, float); DUMP_TYPE(TYPE_DOUBLE, double); default: throw "Invalid argument data type"; } } // Dump global memory if required if (dumpGlobalMemory) { cout << endl << "Global Memory:" << endl; m_context->getGlobalMemory()->dump(); } } template T readValue(istream& stream) { T value; if (sizeof(T) == 1) { int intval; stream >> intval; INT_TO_CHAR(intval, value); } else { stream >> value; } return value; } Oclgrind-15.5/src/kernel/Simulation.h000066400000000000000000000033751252441671000175630ustar00rootroot00000000000000// Simulation.h (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "core/common.h" #include #include #include #include namespace oclgrind { class Context; class Kernel; class Program; }; class Simulation { enum ArgDataType { TYPE_NONE, TYPE_CHAR, TYPE_UCHAR, TYPE_SHORT, TYPE_USHORT, TYPE_INT, TYPE_UINT, TYPE_LONG, TYPE_ULONG, TYPE_FLOAT, TYPE_DOUBLE, }; public: Simulation(); virtual ~Simulation(); bool load(const char *filename); void run(bool dumpGlobalMemory=false); private: oclgrind::Context *m_context; oclgrind::Kernel *m_kernel; oclgrind::Program *m_program; oclgrind::Size3 m_ndrange; oclgrind::Size3 m_wgsize; std::ifstream m_simfile; std::string m_parsing; size_t m_lineNumber; std::istringstream m_lineBuffer; typedef struct { size_t address; size_t size; ArgDataType type; std::string name; } DumpArg; std::list m_dumpArguments; template void dumpArgument(DumpArg& arg); template void get(T& result); void parseArgument(size_t index); template void parseArgumentData(unsigned char *result, size_t size); template void parseFill(unsigned char *result, size_t size, std::istringstream& fill); template void parseRange(unsigned char *result, size_t size, std::istringstream& range); }; Oclgrind-15.5/src/kernel/oclgrind-kernel.cpp000066400000000000000000000145271252441671000210520ustar00rootroot00000000000000// main.cpp (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "config.h" #include #include #include #include #include "kernel/Simulation.h" using namespace oclgrind; using namespace std; static bool outputGlobalMemory = false; static const char *simfile = NULL; static bool parseArguments(int argc, char *argv[]); static void printUsage(); static void setEnvironment(const char *name, const char *value); int main(int argc, char *argv[]) { // Parse arguments if (!parseArguments(argc, argv)) { return 1; } // Initialise simulation Simulation simulation; if (!simulation.load(simfile)) { return 1; } // Run simulation simulation.run(outputGlobalMemory); } static bool parseArguments(int argc, char *argv[]) { for (int i = 1; i < argc; i++) { if (!strcmp(argv[i], "--build-options")) { if (++i >= argc) { cerr << "Missing argument to --build-options" << endl; return false; } setEnvironment("OCLGRIND_BUILD_OPTIONS", argv[i]); } else if (!strcmp(argv[i], "--data-races")) { setEnvironment("OCLGRIND_DATA_RACES", "1"); } else if (!strcmp(argv[i], "--disable-pch")) { setEnvironment("OCLGRIND_DISABLE_PCH", "1"); } else if (!strcmp(argv[i], "--dump-spir")) { setEnvironment("OCLGRIND_DUMP_SPIR", "1"); } else if (!strcmp(argv[i], "-g") || !strcmp(argv[i], "--global-mem")) { outputGlobalMemory = true; } else if (!strcmp(argv[i], "-h") || !strcmp(argv[i], "--help")) { printUsage(); exit(0); } else if (!strcmp(argv[i], "--inst-counts")) { setEnvironment("OCLGRIND_INST_COUNTS", "1"); } else if (!strcmp(argv[i], "-i") || !strcmp(argv[i], "--interactive")) { setEnvironment("OCLGRIND_INTERACTIVE", "1"); } else if (!strcmp(argv[i], "--log")) { if (++i >= argc) { cerr << "Missing argument to --log" << endl; return false; } setEnvironment("OCLGRIND_LOG", argv[i]); } else if (!strcmp(argv[i], "--max-errors")) { if (++i >= argc) { cerr << "Missing argument to --max-errors" << endl; return false; } setEnvironment("OCLGRIND_MAX_ERRORS", argv[i]); } else if (!strcmp(argv[i], "--num-threads")) { if (++i >= argc) { cerr << "Missing argument to --num-threads" << endl; return false; } setEnvironment("OCLGRIND_NUM_THREADS", argv[i]); } else if (!strcmp(argv[i], "--pch-dir")) { if (++i >= argc) { cerr << "Missing argument to --pch-dir" << endl; return false; } setEnvironment("OCLGRIND_PCH_DIR", argv[i]); } else if (!strcmp(argv[i], "--plugins")) { if (++i >= argc) { cerr << "Missing argument to --plugins" << endl; return false; } setEnvironment("OCLGRIND_PLUGINS", argv[i]); } else if (!strcmp(argv[i], "-q") || !strcmp(argv[i], "--quick")) { setEnvironment("OCLGRIND_QUICK", "1"); } else if (!strcmp(argv[i], "--uniform-writes")) { setEnvironment("OCLGRIND_UNIFORM_WRITES", "1"); } else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--version")) { cout << endl; cout << "Oclgrind " PACKAGE_VERSION << endl; cout << endl; cout << "Copyright (c) 2013-2015" << endl; cout << "James Price and Simon McIntosh-Smith, University of Bristol" << endl; cout << "https://github.com/jrprice/Oclgrind" << endl; cout << endl; exit(0); } else if (argv[i][0] == '-') { cerr << "Unrecognised option '" << argv[i] << "'" << endl; return false; } else { if (simfile == NULL) { simfile = argv[i]; } else { cerr << "Unexpected positional argument '" << argv[i] << "'" << endl; return false; } } } if (simfile == NULL) { printUsage(); return false; } return true; } static void printUsage() { cout << "Usage: oclgrind-kernel [OPTIONS] simfile" << endl << " oclgrind-kernel [--help | --version]" << endl << endl << "Options:" << endl << " --build-options OPTIONS " "Additional options to pass to the OpenCL compiler" << endl << " --data-races " "Enable data-race detection" << endl << " --disable-pch " "Don't use precompiled headers" << endl << " --dump-spir " "Dump SPIR to /tmp/oclgrind_*.{ll,bc}" << endl << " -g --global-mem " "Output global memory at exit" << endl << " -h --help " "Display usage information" << endl << " --inst-counts " "Output histograms of instructions executed" << endl << " -i --interactive " "Enable interactive mode" << endl << " --log LOGFILE " "Redirect log/error messages to a file" << endl << " --max-errors NUM " "Limit the number of error/warning messages" << endl << " --num-threads NUM " "Set the number of worker threads to use" << endl << " --pch-dir DIR " "Override directory containing precompiled headers" << endl << " --plugins PLUGINS " "Load colon seperated list of plugin libraries" << endl << " -q --quick " "Only run first and last work-group" << endl << " --uniform-writes " "Don't suppress uniform write-write data-races" << endl << " -v --version " "Display version information" << endl << endl << "For more information, please visit the Oclgrind wiki page:" << endl << "-> https://github.com/jrprice/Oclgrind/wiki" << endl << endl; } static void setEnvironment(const char *name, const char *value) { #if defined(_WIN32) && !defined(__MINGW32__) _putenv_s(name, value); #else setenv(name, value, 1); #endif } Oclgrind-15.5/src/plugins/000077500000000000000000000000001252441671000154575ustar00rootroot00000000000000Oclgrind-15.5/src/plugins/InstructionCounter.cpp000066400000000000000000000113711252441671000220470ustar00rootroot00000000000000// InstructionCounter.cpp (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "core/common.h" #include #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" #include "InstructionCounter.h" #include "core/Kernel.h" #include "core/KernelInvocation.h" using namespace oclgrind; using namespace std; #define COUNTED_LOAD_BASE (llvm::Instruction::OtherOpsEnd + 4) #define COUNTED_STORE_BASE (COUNTED_LOAD_BASE + 8) #define COUNTED_CALL_BASE (COUNTED_STORE_BASE + 8) static bool compareNamedCount(pair a, pair b) { return a.second > b.second; } string InstructionCounter::getOpcodeName(unsigned opcode) const { if (opcode >= COUNTED_CALL_BASE) { // Get functon name unsigned index = opcode - COUNTED_CALL_BASE; assert(index < m_functions.size()); return "call " + m_functions[index]->getName().str() + "()"; } else if (opcode >= COUNTED_LOAD_BASE) { // Create stream using default locale ostringstream name; locale defaultLocale(""); name.imbue(defaultLocale); // Get number of bytes size_t bytes = m_memopBytes[opcode-COUNTED_LOAD_BASE]; // Get name of operation if (opcode >= COUNTED_STORE_BASE) { opcode -= COUNTED_STORE_BASE; name << "store"; } else { opcode -= COUNTED_LOAD_BASE; name << "load"; } // Add address space to name name << " " << getAddressSpaceName(opcode); // Add number of bytes to name name << " (" << bytes << " bytes)"; return name.str(); } return llvm::Instruction::getOpcodeName(opcode); } void InstructionCounter::instructionExecuted( const WorkItem *workItem, const llvm::Instruction *instruction, const TypedValue& result) { unsigned opcode = instruction->getOpcode(); // Check for loads and stores if (opcode == llvm::Instruction::Load || opcode == llvm::Instruction::Store) { // Track operations in separate address spaces bool load = (opcode == llvm::Instruction::Load); const llvm::Type *type = instruction->getOperand(load?0:1)->getType(); unsigned addrSpace = type->getPointerAddressSpace(); opcode = (load ? COUNTED_LOAD_BASE : COUNTED_STORE_BASE) + addrSpace; // Count total number of bytes loaded/stored unsigned bytes = getTypeSize(type->getPointerElementType()); m_memopBytes[opcode-COUNTED_LOAD_BASE] += bytes; } else if (opcode == llvm::Instruction::Call) { // Track distinct function calls const llvm::CallInst *callInst = (const llvm::CallInst*)instruction; const llvm::Function *function = callInst->getCalledFunction(); if (function) { vector::iterator itr = find(m_functions.begin(), m_functions.end(), function); if (itr == m_functions.end()) { opcode = COUNTED_CALL_BASE + m_functions.size(); m_functions.push_back(function); } else { opcode = COUNTED_CALL_BASE + (itr - m_functions.begin()); } } } if (opcode >= m_instructionCounts.size()) { m_instructionCounts.resize(opcode+1); } m_instructionCounts[opcode]++; } bool InstructionCounter::isThreadSafe() const { return false; } void InstructionCounter::kernelBegin(const KernelInvocation *kernelInvocation) { m_instructionCounts.clear(); m_instructionCounts.resize(COUNTED_CALL_BASE); m_memopBytes.clear(); m_memopBytes.resize(16); m_functions.clear(); } void InstructionCounter::kernelEnd(const KernelInvocation *kernelInvocation) { // Load default locale locale previousLocale = cout.getloc(); locale defaultLocale(""); cout.imbue(defaultLocale); cout << "Instructions executed for kernel '" << kernelInvocation->getKernel()->getName() << "':"; cout << endl; // Generate list named instructions and their counts vector< pair > namedCounts; for (unsigned i = 0; i < m_instructionCounts.size(); i++) { if (m_instructionCounts[i] == 0) { continue; } string name = getOpcodeName(i); if (name.compare(0, 14, "call llvm.dbg.") == 0) { continue; } namedCounts.push_back(make_pair(name, m_instructionCounts[i])); } // Sort named counts sort(namedCounts.begin(), namedCounts.end(), compareNamedCount); // Output sorted instruction counts for (unsigned i = 0; i < namedCounts.size(); i++) { cout << setw(16) << dec << namedCounts[i].second << " - " << namedCounts[i].first << endl; } cout << endl; // Restore locale cout.imbue(previousLocale); } Oclgrind-15.5/src/plugins/InstructionCounter.h000066400000000000000000000021721252441671000215130ustar00rootroot00000000000000// InstructionCounter.h (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "core/Plugin.h" namespace llvm { class Function; } namespace oclgrind { class InstructionCounter : public Plugin { public: InstructionCounter(const Context *context) : Plugin(context){}; virtual void instructionExecuted(const WorkItem *workItem, const llvm::Instruction *instruction, const TypedValue& result) override; virtual void kernelBegin(const KernelInvocation *kernelInvocation) override; virtual void kernelEnd(const KernelInvocation *kernelInvocation) override; virtual bool isThreadSafe() const override; private: std::vector m_instructionCounts; std::vector m_memopBytes; std::vector m_functions; std::string getOpcodeName(unsigned opcode) const; }; } Oclgrind-15.5/src/plugins/InteractiveDebugger.cpp000066400000000000000000000614331252441671000221140ustar00rootroot00000000000000// InteractiveDebugger.cpp (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "core/common.h" #include #include #if !defined(_WIN32) || defined(__MINGW32__) #include #endif #if HAVE_READLINE #include #include #endif #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "InteractiveDebugger.h" #include "core/Context.h" #include "core/Kernel.h" #include "core/KernelInvocation.h" #include "core/Memory.h" #include "core/Program.h" #include "core/WorkGroup.h" #include "core/WorkItem.h" using namespace oclgrind; using namespace std; #define LIST_LENGTH 10 static bool sigintBreak = false; #if !defined(_WIN32) || defined(__MINGW32__) static struct sigaction m_oldSignalHandler; void handleSignal(int s) { if (s == SIGINT) sigintBreak = true; } #endif InteractiveDebugger::InteractiveDebugger(const Context *context) : Plugin(context) { m_running = true; m_forceBreak = false; m_nextBreakpoint = 1; m_program = NULL; m_kernelInvocation = NULL; // Set-up commands #define ADD_CMD(name, sname, func) \ m_commands[name] = &InteractiveDebugger::func; \ m_commands[sname] = &InteractiveDebugger::func; ADD_CMD("backtrace", "bt", backtrace); ADD_CMD("break", "b", brk); ADD_CMD("continue", "c", cont); ADD_CMD("delete", "d", del); ADD_CMD("gmem", "gm", mem); ADD_CMD("help", "h", help); ADD_CMD("info", "i", info); ADD_CMD("list", "l", list); ADD_CMD("lmem", "lm", mem); ADD_CMD("next", "n", next); ADD_CMD("pmem", "pm", mem); ADD_CMD("print", "p", print); ADD_CMD("quit", "q", quit); ADD_CMD("step", "s", step); ADD_CMD("workitem", "wi", workitem); } void InteractiveDebugger::instructionExecuted( const WorkItem *workItem, const llvm::Instruction *instruction, const TypedValue& result) { if (!shouldShowPrompt(workItem)) return; #if !defined(_WIN32) || defined(__MINGW32__) // Restore old signal handler sigaction(SIGINT, &m_oldSignalHandler, NULL); #endif m_forceBreak = false; sigintBreak = false; // Print function if changed if (m_previousDepth != workItem->getCallStack().size() && workItem->getState() != WorkItem::FINISHED) { cout << "In function "; printFunction(workItem->getCurrentInstruction()); } printCurrentLine(); m_listPosition = 0; m_continue = false; m_next = false; while (true) { // Prompt for command bool eof = false; string cmd; #if HAVE_READLINE char *line = readline("(oclgrind) "); if (line) { cmd = line; free(line); } else { eof = true; } #else cout << "(oclgrind) " << flush; getline(cin, cmd); eof = cin.eof(); #endif // Quit on EOF if (eof) { cout << "(quit)" << endl; quit(vector()); return; } // Split command into tokens vector tokens; istringstream iss(cmd); copy(istream_iterator(iss), istream_iterator(), back_inserter< vector >(tokens)); // Skip empty lines if (tokens.size() == 0) { continue; } #if HAVE_READLINE add_history(cmd.c_str()); #endif // Find command in map and execute map::iterator itr = m_commands.find(tokens[0]); if (itr != m_commands.end()) { if ((this->*itr->second)(tokens)) break; } else { cout << "Unrecognized command '" << tokens[0] << "'" << endl; } } } bool InteractiveDebugger::isThreadSafe() const { return false; } void InteractiveDebugger::kernelBegin(const KernelInvocation *kernelInvocation) { m_continue = false; m_lastBreakLine = 0; m_listPosition = 0; m_next = false; m_previousDepth = 0; m_previousLine = 0; m_kernelInvocation = kernelInvocation; m_program = kernelInvocation->getKernel()->getProgram(); } void InteractiveDebugger::kernelEnd(const KernelInvocation *kernelInvocation) { m_kernelInvocation = NULL; #if !defined(_WIN32) || defined(__MINGW32__) // Restore old signal handler sigaction(SIGINT, &m_oldSignalHandler, NULL); #endif } void InteractiveDebugger::log(MessageType type, const char *message) { if (type == ERROR) m_forceBreak = true; } /////////////////////////// //// Utility Functions //// /////////////////////////// size_t InteractiveDebugger::getCurrentLineNumber() const { const WorkItem *workItem = m_kernelInvocation->getCurrentWorkItem(); if (!workItem || workItem->getState() == WorkItem::FINISHED) { return 0; } return getLineNumber(workItem->getCurrentInstruction()); } size_t InteractiveDebugger::getLineNumber( const llvm::Instruction *instruction) const { llvm::MDNode *md = instruction->getMetadata("dbg"); if (md) { #if LLVM_VERSION > 36 llvm::DILocation *loc = (llvm::DILocation*)md; return loc->getLine(); #else llvm::DILocation loc((llvm::MDLocation*)md); return loc.getLineNumber(); #endif } return 0; } bool InteractiveDebugger::hasHitBreakpoint() { if (m_breakpoints.empty()) return false; // Check if we have passed over the previous breakpoint if (m_lastBreakLine) { if (getCurrentLineNumber() != m_lastBreakLine) m_lastBreakLine = 0; else return false;; } // Check if we're at a breakpoint size_t line = getCurrentLineNumber(); map::iterator itr; for (itr = m_breakpoints[m_program].begin(); itr != m_breakpoints[m_program].end(); itr++) { if (itr->second == line) { cout << "Breakpoint " << itr->first << " hit at line " << itr->second << " by work-item " << m_kernelInvocation->getCurrentWorkItem()->getGlobalID() << endl; m_lastBreakLine = line; m_listPosition = 0; return true; } } return false; } void InteractiveDebugger::printCurrentLine() const { const WorkItem *workItem = m_kernelInvocation->getCurrentWorkItem(); if (!workItem || workItem->getState() == WorkItem::FINISHED) { return; } size_t lineNum = getCurrentLineNumber(); if (m_program->getNumSourceLines() && lineNum > 0) { printSourceLine(lineNum); } else { cout << "Source line not available." << endl; dumpInstruction(cout, workItem->getCurrentInstruction()); cout << endl; } } void InteractiveDebugger::printFunction( const llvm::Instruction *instruction) const { // Get function const llvm::Function *function = instruction->getParent()->getParent(); cout << function->getName().str() << "("; // Print arguments llvm::Function::const_arg_iterator argItr; for (argItr = function->arg_begin(); argItr != function->arg_end(); argItr++) { if (argItr != function->arg_begin()) { cout << ", "; } cout << argItr->getName().str() << "="; m_kernelInvocation->getCurrentWorkItem()->printValue(argItr); } cout << ") at line " << dec << getLineNumber(instruction) << endl; } void InteractiveDebugger::printSourceLine(size_t lineNum) const { const char *line = m_program->getSourceLine(lineNum); if (line) { cout << dec << lineNum << "\t" << line << endl; } else { cout << "Invalid line number: " << lineNum << endl; } } bool InteractiveDebugger::shouldShowPrompt(const WorkItem *workItem) { if (!m_running) return false; if (m_forceBreak || sigintBreak) return true; if (hasHitBreakpoint()) return true; if (m_continue) return false; if (workItem->getState() == WorkItem::BARRIER) return true; if (workItem->getState() == WorkItem::FINISHED) return true; if (!m_program->getNumSourceLines()) return true; size_t line = getCurrentLineNumber(); if (m_next && workItem->getCallStack().size() > m_previousDepth) return false; if (!line || line == m_previousLine) return false; return true; } ////////////////////////////// //// Interactive Commands //// ////////////////////////////// bool InteractiveDebugger::backtrace(vector args) { const WorkItem *workItem = m_kernelInvocation->getCurrentWorkItem(); if (!workItem || workItem->getState() == WorkItem::FINISHED) { return false; } stack callStack = workItem->getCallStack(); // Print current instruction cout << "#" << callStack.size() << " "; printFunction(workItem->getCurrentInstruction()); // Print call stack while (!callStack.empty()) { cout << "#" << (callStack.size()-1) << " "; printFunction(callStack.top()); callStack.pop(); } return false; } bool InteractiveDebugger::brk(vector args) { if (!m_program->getNumSourceLines()) { cout << "Breakpoints only valid when source is available." << endl; return false; } size_t lineNum = getCurrentLineNumber(); if (args.size() > 1) { // Parse argument as a target line number istringstream ss(args[1]); ss >> lineNum; if (!ss.eof() || !lineNum || lineNum > m_program->getNumSourceLines()+1) { cout << "Invalid line number." << endl; return false; } } if (lineNum) { m_breakpoints[m_program][m_nextBreakpoint++] = lineNum; } else { cout << "Not currently on a line." << endl; } return false; } bool InteractiveDebugger::cont(vector args) { #if !defined(_WIN32) || defined(__MINGW32__) // Register a signal handler to catch interrupts struct sigaction sigHandler; sigHandler.sa_handler = handleSignal; sigemptyset(&sigHandler.sa_mask); sigHandler.sa_flags = 0; sigaction(SIGINT, &sigHandler, &m_oldSignalHandler); #endif m_continue = true; return true; } bool InteractiveDebugger::del(vector args) { if (args.size() > 1) { // Parse argument as a target breakpoint size_t bpNum = 0; istringstream ss(args[1]); ss >> bpNum; if (!ss.eof()) { cout << "Invalid breakpoint number." << endl; return false; } // Ensure breakpoint exists if (!m_breakpoints[m_program].count(bpNum)) { cout << "Breakpoint not found." << endl; return false; } m_breakpoints[m_program].erase(bpNum); } else { // Prompt for confimation string confirm; cout << "Delete all breakpoints? (y/n) " << flush; cin >> confirm; cin.ignore(); if (confirm == "y") { m_breakpoints.clear(); } } return false; } bool InteractiveDebugger::help(vector args) { if (args.size() < 2) { cout << "Command list:" << endl; cout << " backtrace (bt)" << endl; cout << " break (b)" << endl; cout << " continue (c)" << endl; cout << " delete (d)" << endl; cout << " gmem (gm)" << endl; cout << " help (h)" << endl; cout << " info (i)" << endl; cout << " list (l)" << endl; cout << " next (n)" << endl; cout << " lmem (lm)" << endl; cout << " pmem (pm)" << endl; cout << " print (p)" << endl; cout << " quit (q)" << endl; cout << " step (s)" << endl; cout << " workitem (wi)" << endl; cout << "(type 'help command' for more information)" << endl; return false; } if (args[1] == "backtrace" || args[1] == "bt") { cout << "Print function call stack." << endl; } else if (args[1] == "break" || args[1] == "b") { cout << "Set a breakpoint" << " (only functional when source is available)." << endl << "With no arguments, sets a breakpoint at the current line." << endl << "Use a numeric argument to set a breakpoint at a specific line." << endl; } else if (args[1] == "continue" || args[1] == "c") { cout << "Continue kernel execution until next breakpoint." << endl; } else if (args[1] == "delete" || args[1] == "d") { cout << "Delete a breakpoint." << endl << "With no arguments, deletes all breakpoints." << endl; } else if (args[1] == "help" || args[1] == "h") { cout << "Display usage information for a command." << endl; } else if (args[1] == "info" || args[1] == "i") { cout << "Display information about current debugging context." << endl << "With no arguments, displays general information." << endl << "'info break' lists breakpoints." << endl; } else if (args[1] == "list" || args[1] == "l") { cout << "List source lines." << endl << "With no argument, lists " << LIST_LENGTH << " lines after previous listing." << endl << "Use - to list " << LIST_LENGTH << " lines before the previous listing" << endl << "Use a numeric argument to list around a specific line number." << endl; } else if (args[1] == "gmem" || args[1] == "lmem" || args[1] == "pmem" || args[1] == "gm" || args[1] == "lm" || args[1] == "pm") { cout << "Examine contents of "; if (args[1] == "gmem") cout << "global"; if (args[1] == "lmem") cout << "local"; if (args[1] == "pmem") cout << "private"; cout << " memory." << endl << "With no arguments, dumps entire contents of memory." << endl << "'" << args[1] << " address [size]'" << endl << "address is hexadecimal and 4-byte aligned." << endl; } else if (args[1] == "next" || args[1] == "n") { cout << "Step forward," << " treating function calls as single instruction." << endl; } else if (args[1] == "print" || args[1] == "p") { cout << "Print the values of one or more variables." << endl << "'print x y' prints the values of x and y" << endl << "'print foo[i]' prints a value at a constant array index" << endl; } else if (args[1] == "quit" || args[1] == "q") { cout << "Quit interactive debugger." << endl; } else if (args[1] == "step" || args[1] == "s") { cout << "Step forward a single source line," << " or an instruction if no source available." << endl; } else if (args[1] == "workitem" || args[1] == "wi") { cout << "Switch to a different work-item." << endl << "Up to three (space separated) arguments allowed," << " specifying the global ID of the work-item." << endl; } else { cout << "Unrecognized command '" << args[1] << "'" << endl; } return false; } bool InteractiveDebugger::info(vector args) { if (args.size() > 1) { if (args[1] == "break") { // List breakpoints map::iterator itr; for (itr = m_breakpoints[m_program].begin(); itr != m_breakpoints[m_program].end(); itr++) { cout << "Breakpoint " << itr->first << ": Line " << itr->second << endl; } } else { cout << "Invalid info command: " << args[1] << endl; } return false; } // Kernel invocation information cout << dec << "Running kernel '" << m_kernelInvocation->getKernel()->getName() << "'" << endl << "-> Global work size: " << m_kernelInvocation->getGlobalSize() << endl << "-> Global work offset: " << m_kernelInvocation->getGlobalOffset() << endl << "-> Local work size: " << m_kernelInvocation->getLocalSize() << endl; // Current work-item const WorkItem *workItem = m_kernelInvocation->getCurrentWorkItem(); if (workItem) { cout << endl << "Current work-item: " << workItem->getGlobalID() << endl; if (workItem->getState() == WorkItem::FINISHED) { cout << "Work-item has finished." << endl; } else { cout << "In function "; printFunction(workItem->getCurrentInstruction()); printCurrentLine(); } } else { cout << "All work-items finished." << endl; } return false; } bool InteractiveDebugger::list(vector args) { const WorkItem *workItem = m_kernelInvocation->getCurrentWorkItem(); if (!workItem) { cout << "All work-items finished." << endl; return false; } if (!m_program->getNumSourceLines()) { cout << "No source code available." << endl; return false; } // Check for an argument size_t start = 0; bool forwards = true; if (args.size() > 1) { if (args[1] == "-") { forwards = false; } else { // Parse argument as a target line number istringstream ss(args[1]); ss >> start; if (!ss.eof()) { cout << "Invalid line number." << endl; return false; } start = start > LIST_LENGTH/2 ? start - LIST_LENGTH/2 : 1; } } if (!start) { if (forwards) { // Starting position is the previous list position + LIST_LENGTH start = m_listPosition ? m_listPosition + LIST_LENGTH : getCurrentLineNumber() + 1; if (start >= m_program->getNumSourceLines() + 1) { m_listPosition = m_program->getNumSourceLines() + 1; return false; } } else { // Starting position is the previous list position - LIST_LENGTH start = m_listPosition ? m_listPosition : getCurrentLineNumber(); start = start > LIST_LENGTH ? start - LIST_LENGTH : 1; } } // Display lines for (int i = 0; i < LIST_LENGTH; i++) { if (start + i >= m_program->getNumSourceLines() + 1) { break; } printSourceLine(start + i); } m_listPosition = start; return false; } bool InteractiveDebugger::mem(vector args) { // Get target memory object Memory *memory = NULL; if (args[0][0] == 'g') { memory = m_context->getGlobalMemory(); } else if (args[0][0] == 'l') { memory = m_kernelInvocation->getCurrentWorkGroup()->getLocalMemory(); } else if (args[0][0] == 'p') { memory = m_kernelInvocation->getCurrentWorkItem()->getPrivateMemory(); } // If no arguments, dump memory if (args.size() == 1) { memory->dump(); return false; } else if (args.size() > 3) { cout << "Invalid number of arguments." << endl; return false; } // Get target address size_t address; stringstream ss(args[1]); ss >> hex >> address; if (!ss.eof() || address%4 != 0) { cout << "Invalid address." << endl; return false; } // Get optional size size_t size = 8; if (args.size() == 3) { stringstream ss(args[2]); ss >> dec >> size; if (!ss.eof() || !size) { cout << "Invalid size" << endl; return false; } } // Check address is valid if (!memory->isAddressValid(address, size)) { cout << "Invalid memory address." << endl; return false; } // Output data unsigned char *data = (unsigned char*)memory->getPointer(address); for (unsigned i = 0; i < size; i++) { if (i%4 == 0) { cout << endl << hex << uppercase << setw(16) << setfill(' ') << right << (address + i) << ":"; } cout << " " << hex << uppercase << setw(2) << setfill('0') << (int)data[i]; } cout << endl << endl; return false; } bool InteractiveDebugger::next(vector args) { const WorkItem *workItem = m_kernelInvocation->getCurrentWorkItem(); if (!workItem) { cout << "All work-items finished." << endl; return false; } if (workItem->getState() == WorkItem::FINISHED) { cout << "Work-item has finished." << endl; return false; } else if (workItem->getState() == WorkItem::BARRIER) { cout << "Work-item is at barrier." << endl; return false; } // Step until we return to the same depth m_previousDepth = workItem->getCallStack().size(); m_previousLine = getCurrentLineNumber(); m_next = true; return true; } bool InteractiveDebugger::print(vector args) { if (args.size() < 2) { cout << "Variable name(s) required." << endl; return false; } const WorkItem *workItem = m_kernelInvocation->getCurrentWorkItem(); for (unsigned i = 1; i < args.size(); i++) { cout << args[i] << " = "; // Check for subscript operator size_t start = args[i].find("["); if (start != string::npos) { // Find end of subscript size_t end = args[i].find(']'); if (end == string::npos) { cout << "missing ']'" << endl; return false; } if (end != args[i].length() - 1) { cout << "invalid variable" << endl; return false; } // Parse index value size_t index = 0; string var = args[i].substr(0, start); stringstream ss(args[i].substr(start+1, end-start-1)); ss >> index; if (!ss.eof()) { cout << "invalid index" << endl; return false; } // Get variable value and type const llvm::Value *ptr = workItem->getVariable(var); if (!ptr) { cout << "not found" << endl; return false; } const llvm::Type *ptrType = ptr->getType(); // Check for alloca instruction, in which case look at allocated type bool alloca = false; if (ptr->getValueID() >= llvm::Value::InstructionVal && ((llvm::Instruction*)ptr)->getOpcode() == llvm::Instruction::Alloca) { ptrType = ((const llvm::AllocaInst*)ptr)->getAllocatedType(); alloca = true; } // Ensure type is a pointer if (!ptrType->isPointerTy()) { cout << "not a pointer" << endl; return false; } // Get base address size_t base = *(size_t*)workItem->getValueData(ptr); if (alloca) { // Load base address from private memory workItem->getPrivateMemory()->load((unsigned char*)&base, base, sizeof(size_t)); } // Get target memory object Memory *memory = NULL; switch (ptrType->getPointerAddressSpace()) { case AddrSpacePrivate: memory = workItem->getPrivateMemory(); break; case AddrSpaceGlobal: case AddrSpaceConstant: memory = m_context->getGlobalMemory(); break; case AddrSpaceLocal: memory = m_kernelInvocation->getCurrentWorkGroup()->getLocalMemory(); break; default: cout << "invalid address space" << endl; return false; } // Get element type const llvm::Type *elemType = ptrType->getPointerElementType(); unsigned elemSize = getTypeSize(elemType); // Load data if (!memory->isAddressValid(base + index*elemSize, elemSize)) { cout << "invalid memory address" << endl; } else { // Print data void *data = (void*)memory->getPointer(base+index*elemSize); printTypedData(elemType, (unsigned char*)data); cout << endl; } } else { if (!workItem->printVariable(args[i])) { cout << "not found"; } cout << endl; } } return false; } bool InteractiveDebugger::quit(vector args) { #if !defined(_WIN32) || defined(__MINGW32__) // Restore old signal handler sigaction(SIGINT, &m_oldSignalHandler, NULL); #endif m_running = false; return true; } bool InteractiveDebugger::step(vector args) { const WorkItem *workItem = m_kernelInvocation->getCurrentWorkItem(); if (!workItem) { cout << "All work-items finished." << endl; return false; } if (workItem->getState() == WorkItem::FINISHED) { cout << "Work-item has finished." << endl; return false; } else if (workItem->getState() == WorkItem::BARRIER) { cout << "Work-item is at barrier." << endl; return false; } // Save current position m_previousDepth = workItem->getCallStack().size(); m_previousLine = getCurrentLineNumber(); return true; } bool InteractiveDebugger::workitem(vector args) { // TODO: Take offsets into account? Size3 gid(0,0,0); for (unsigned i = 1; i < args.size(); i++) { // Parse argument as a target line number istringstream ss(args[i]); ss >> gid[i-1]; if (!ss.eof() || gid[i-1] >= m_kernelInvocation->getGlobalSize()[i-1]) { cout << "Invalid global ID." << endl; return false; } } // Ugly const_cast since this operation actually changes something about // the simulation. This goes against the idea that plugins are entirely // passive. if (!const_cast(m_kernelInvocation)->switchWorkItem(gid)) { cout << "Work-item has already finished, unable to load state." << endl; return false; } // Print new WI id cout << "Switched to work-item: (" << gid[0] << "," << gid[1] << "," << gid[2] << ")" << endl; if (m_kernelInvocation->getCurrentWorkItem()->getState() == WorkItem::FINISHED) { cout << "Work-item has finished execution." << endl; } else { printCurrentLine(); } return false; } Oclgrind-15.5/src/plugins/InteractiveDebugger.h000066400000000000000000000041221252441671000215510ustar00rootroot00000000000000// InteractiveDebugger.h (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "core/Plugin.h" namespace oclgrind { class Program; class InteractiveDebugger : public Plugin { public: InteractiveDebugger(const Context *context); virtual void instructionExecuted(const WorkItem *workItem, const llvm::Instruction *instruction, const TypedValue& result) override; virtual void kernelBegin(const KernelInvocation *kernelInvocation) override; virtual void kernelEnd(const KernelInvocation *kernelInvocation) override; virtual void log(MessageType type, const char *message) override; virtual bool isThreadSafe() const override; private: bool m_continue; bool m_running; bool m_forceBreak; size_t m_listPosition; bool m_next; size_t m_lastBreakLine; size_t m_nextBreakpoint; size_t m_previousDepth; size_t m_previousLine; std::map > m_breakpoints; const Program *m_program; const KernelInvocation *m_kernelInvocation; size_t getCurrentLineNumber() const; size_t getLineNumber(const llvm::Instruction *instruction) const; bool hasHitBreakpoint(); void printCurrentLine() const; void printFunction(const llvm::Instruction *instruction) const; void printSourceLine(size_t lineNum) const; bool shouldShowPrompt(const WorkItem *workItem); // Interactive commands typedef bool (InteractiveDebugger::*Command)(std::vector); std::map m_commands; #define CMD(name) bool name(std::vector args); CMD(backtrace); CMD(brk); CMD(cont); CMD(del); CMD(help); CMD(info); CMD(list); CMD(mem); CMD(next); CMD(print); CMD(quit); CMD(step); CMD(workitem); #undef CMD }; } Oclgrind-15.5/src/plugins/Logger.cpp000066400000000000000000000032601252441671000174030ustar00rootroot00000000000000// Logger.cpp (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "core/common.h" #include #include #include "Logger.h" using namespace oclgrind; using namespace std; #define DEFAULT_MAX_ERRORS 1000 unsigned Logger::m_numErrors = 0; static mutex logMutex; Logger::Logger(const Context *context) : Plugin(context) { m_log = &cerr; const char *logfile = getenv("OCLGRIND_LOG"); if (logfile) { m_log = new ofstream(logfile); if (!m_log->good()) { cerr << "Oclgrind: Unable to open log file '" << logfile << "'" << endl; m_log = &cerr; } } m_maxErrors = DEFAULT_MAX_ERRORS; const char *maxErrors = getenv("OCLGRIND_MAX_ERRORS"); if (maxErrors) { char *next; m_maxErrors = strtoul(maxErrors, &next, 10); if (strlen(next)) { cerr << "Oclgrind: Invalid value for OCLGRIND_MAX_ERRORS" << endl; } } } Logger::~Logger() { if (m_log != &cerr) { ((ofstream*)m_log)->close(); delete m_log; } } void Logger::log(MessageType type, const char *message) { lock_guard lock(logMutex); // Limit number of errors/warning printed if (type == ERROR || type == WARNING) { if (m_numErrors == m_maxErrors) { *m_log << endl << "Oclgrind: " << m_numErrors << " errors generated - suppressing further errors" << endl << endl; } if (m_numErrors++ >= m_maxErrors) return; } *m_log << endl << message << endl; } Oclgrind-15.5/src/plugins/Logger.h000066400000000000000000000011541252441671000170500ustar00rootroot00000000000000// Logger.h (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "core/Plugin.h" namespace oclgrind { class Logger : public Plugin { public: Logger(const Context *context); virtual ~Logger(); virtual void log(MessageType type, const char *message) override; private: std::ostream *m_log; unsigned m_maxErrors; static unsigned m_numErrors; }; } Oclgrind-15.5/src/plugins/MemCheck.cpp000066400000000000000000000057701252441671000176500ustar00rootroot00000000000000// MemCheck.cpp (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "core/common.h" #include "core/Context.h" #include "core/Memory.h" #include "MemCheck.h" using namespace oclgrind; using namespace std; MemCheck::MemCheck(const Context *context) : Plugin(context) { } void MemCheck::memoryAtomicLoad(const Memory *memory, const WorkItem *workItem, AtomicOp op, size_t address, size_t size) { checkLoad(memory, address, size); } void MemCheck::memoryAtomicStore(const Memory *memory, const WorkItem *workItem, AtomicOp op, size_t address, size_t size) { checkStore(memory, address, size); } void MemCheck::memoryLoad(const Memory *memory, const WorkItem *workItem, size_t address, size_t size) { checkLoad(memory, address, size); } void MemCheck::memoryLoad(const Memory *memory, const WorkGroup *workGroup, size_t address, size_t size) { checkLoad(memory, address, size); } void MemCheck::memoryStore(const Memory *memory, const WorkItem *workItem, size_t address, size_t size, const uint8_t *storeData) { checkStore(memory, address, size); } void MemCheck::memoryStore(const Memory *memory, const WorkGroup *workGroup, size_t address, size_t size, const uint8_t *storeData) { checkStore(memory, address, size); } void MemCheck::checkLoad(const Memory *memory, size_t address, size_t size) const { if (!memory->isAddressValid(address, size)) { logInvalidAccess(true, memory->getAddressSpace(), address, size); return; } if (memory->getBuffer(address)->flags & CL_MEM_WRITE_ONLY) { m_context->logError("Invalid read from write-only buffer"); } } void MemCheck::checkStore(const Memory *memory, size_t address, size_t size) const { if (!memory->isAddressValid(address, size)) { logInvalidAccess(false, memory->getAddressSpace(), address, size); return; } if (memory->getBuffer(address)->flags & CL_MEM_READ_ONLY) { m_context->logError("Invalid write to read-only buffer"); } } void MemCheck::logInvalidAccess(bool read, unsigned addrSpace, size_t address, size_t size) const { Context::Message msg(ERROR, m_context); msg << "Invalid " << (read ? "read" : "write") << " of size " << size << " at " << getAddressSpaceName(addrSpace) << " memory address 0x" << hex << address << endl << msg.INDENT << "Kernel: " << msg.CURRENT_KERNEL << endl << "Entity: " << msg.CURRENT_ENTITY << endl << msg.CURRENT_LOCATION << endl; msg.send(); }Oclgrind-15.5/src/plugins/MemCheck.h000066400000000000000000000035401252441671000173060ustar00rootroot00000000000000// MemCheck.h (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "core/Plugin.h" namespace oclgrind { class MemCheck : public Plugin { public: MemCheck(const Context *context); virtual void memoryAtomicLoad(const Memory *memory, const WorkItem *workItem, AtomicOp op, size_t address, size_t size) override; virtual void memoryAtomicStore(const Memory *memory, const WorkItem *workItem, AtomicOp op, size_t address, size_t size) override; virtual void memoryLoad(const Memory *memory, const WorkItem *workItem, size_t address, size_t size) override; virtual void memoryLoad(const Memory *memory, const WorkGroup *workGroup, size_t address, size_t size) override; virtual void memoryStore(const Memory *memory, const WorkItem *workItem, size_t address, size_t size, const uint8_t *storeData) override; virtual void memoryStore(const Memory *memory, const WorkGroup *workGroup, size_t address, size_t size, const uint8_t *storeData) override; private: void checkLoad(const Memory *memory, size_t address, size_t size) const; void checkStore(const Memory *memory, size_t address, size_t size) const; void logInvalidAccess(bool read, unsigned addrSpace, size_t address, size_t size) const; }; } Oclgrind-15.5/src/plugins/RaceDetector.cpp000066400000000000000000000233341252441671000205340ustar00rootroot00000000000000// RaceDetector.cpp (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "core/common.h" #include "core/Context.h" #include "core/KernelInvocation.h" #include "core/Memory.h" #include "core/WorkGroup.h" #include "core/WorkItem.h" #include "RaceDetector.h" using namespace oclgrind; using namespace std; #define KEY(memory,address) make_pair(memory, EXTRACT_BUFFER(address)) RaceDetector::RaceDetector(const Context *context) : Plugin(context) { m_kernelInvocation = NULL; m_allowUniformWrites = !checkEnv("OCLGRIND_UNIFORM_WRITES"); } bool RaceDetector::isThreadSafe() const { // TODO: Improve DRD efficiency for multi-threaded case instead. return false; } void RaceDetector::kernelBegin(const KernelInvocation *kernelInvocation) { m_kernelInvocation = kernelInvocation; } void RaceDetector::kernelEnd(const KernelInvocation *kernelInvocation) { synchronize(m_context->getGlobalMemory(), false); m_kernelInvocation = NULL; } void RaceDetector::memoryAllocated(const Memory *memory, size_t address, size_t size, cl_mem_flags flags) { if (memory->getAddressSpace() == AddrSpacePrivate || memory->getAddressSpace() == AddrSpaceConstant) return; m_state[KEY(memory,address)] = make_pair(new State[size], size); } void RaceDetector::memoryAtomicLoad(const Memory *memory, const WorkItem *workItem, AtomicOp op, size_t address, size_t size) { registerAtomic(memory, workItem, address, size, false); } void RaceDetector::memoryAtomicStore(const Memory *memory, const WorkItem *workItem, AtomicOp op, size_t address, size_t size) { registerAtomic(memory, workItem, address, size, true); } void RaceDetector::memoryDeallocated(const Memory *memory, size_t address) { if (memory->getAddressSpace() == AddrSpacePrivate || memory->getAddressSpace() == AddrSpaceConstant) return; delete[] m_state[KEY(memory,address)].first; m_state.erase(KEY(memory,address)); } void RaceDetector::memoryLoad(const Memory *memory, const WorkItem *workItem, size_t address, size_t size) { registerLoadStore(memory, workItem, workItem->getWorkGroup(), address, size, NULL); } void RaceDetector::memoryLoad(const Memory *memory, const WorkGroup *workGroup, size_t address, size_t size) { registerLoadStore(memory, NULL, workGroup, address, size, NULL); } void RaceDetector::memoryStore(const Memory *memory, const WorkItem *workItem, size_t address, size_t size, const uint8_t *storeData) { registerLoadStore(memory, workItem, workItem->getWorkGroup(), address, size, storeData); } void RaceDetector::memoryStore(const Memory *memory, const WorkGroup *workGroup, size_t address, size_t size, const uint8_t *storeData) { registerLoadStore(memory, NULL, workGroup, address, size, storeData); } void RaceDetector::logRace(DataRaceType type, unsigned int addrSpace, size_t address, size_t lastWorkGroup, size_t lastWorkItem, const llvm::Instruction *lastInstruction) const { const char *raceType = NULL; switch (type) { case ReadWriteRace: raceType = "Read-write"; break; case WriteWriteRace: raceType = "Write-write"; break; } Context::Message msg(ERROR, m_context); msg << raceType << " data race at " << getAddressSpaceName(addrSpace) << " memory address 0x" << hex << address << endl << msg.INDENT << "Kernel: " << msg.CURRENT_KERNEL << endl << endl << "First entity: " << msg.CURRENT_ENTITY << endl << msg.CURRENT_LOCATION << endl << endl << "Second entity: "; // Show details of other entity involved in race if (lastWorkItem != -1) { Size3 global(lastWorkItem, m_kernelInvocation->getGlobalSize()); Size3 local, group; local.x = global.x % m_kernelInvocation->getLocalSize().x; local.y = global.y % m_kernelInvocation->getLocalSize().y; local.z = global.z % m_kernelInvocation->getLocalSize().z; group.x = global.x / m_kernelInvocation->getLocalSize().x; group.y = global.y / m_kernelInvocation->getLocalSize().y; group.z = global.z / m_kernelInvocation->getLocalSize().z; msg << "Global" << global << " Local" << local << " Group" << group; } else if (lastWorkGroup != -1) { msg << "Group" << Size3(lastWorkGroup, m_kernelInvocation->getNumGroups()); } else { msg << "(unknown)"; } msg << endl << lastInstruction << endl; msg.send(); } void RaceDetector::registerAtomic(const Memory *memory, const WorkItem *workItem, size_t address, size_t size, bool store) { if (!memory->isAddressValid(address, size)) return; State *state = m_state[KEY(memory,address)].first + EXTRACT_OFFSET(address); // Get work-item index size_t workItemIndex = workItem->getGlobalIndex(); bool race = false; for (size_t offset = 0; offset < size; offset++, state++) { // Check for races with non-atomic operations bool conflict = store ? !state->canAtomicStore : !state->canAtomicLoad; if (!race && conflict && workItemIndex != state->workItem) { logRace(ReadWriteRace, memory->getAddressSpace(), address, state->workItem, state->workGroup, state->instruction); race = true; } // Update state if (store) state->canLoad = false; state->canStore = false; if (!state->wasWorkItem) { state->instruction = workItem->getCurrentInstruction(); state->workItem = workItemIndex; state->wasWorkItem = true; } } } void RaceDetector::registerLoadStore(const Memory *memory, const WorkItem *workItem, const WorkGroup *workGroup, size_t address, size_t size, const uint8_t *storeData) { if (!m_kernelInvocation) return; if (memory->getAddressSpace() == AddrSpacePrivate || memory->getAddressSpace() == AddrSpaceConstant) return; if (!memory->isAddressValid(address, size)) return; bool load = !storeData; bool store = storeData; // Get index of work-item and work-group performing access size_t workItemIndex = -1, workGroupIndex = -1; if (workItem) { workItemIndex = workItem->getGlobalIndex(); } if (workGroup) { workGroupIndex = workGroup->getGroupIndex(); } bool race = false; size_t base = EXTRACT_OFFSET(address); State *state = m_state[KEY(memory, address)].first + base; for (size_t offset = 0; offset < size; offset++, state++) { bool conflict = store ? !state->canStore : !state->canLoad; if (m_allowUniformWrites && storeData) { uint8_t *ptr = (uint8_t*)(memory->getPointer(address)); conflict &= (ptr[offset] != storeData[offset]); } if (!race && conflict && (state->wasWorkItem ? // If state set by work-item, state->workItem != workItemIndex : // must be same work-item, state->workGroup != workGroupIndex) // otherwise must be same group ) { // Report data-race DataRaceType type = load|state->canLoad ? ReadWriteRace : WriteWriteRace; logRace(type, memory->getAddressSpace(), address + offset, state->workItem, state->workGroup, state->instruction); race = true; } else { // Only update WI info if this operation is stronger than previous one bool updateWI = store || (load && state->canStore); // Update state if (store) state->canAtomicLoad = false; state->canAtomicStore = false; state->canLoad &= load; state->canStore = false; if (updateWI) { state->workGroup = workGroupIndex; if (workItem) { state->instruction = workItem->getCurrentInstruction(); state->workItem = workItemIndex; state->wasWorkItem = true; } } } } } void RaceDetector::synchronize(const Memory *memory, bool workGroup) { StateMap::iterator itr; for (itr = m_state.begin(); itr != m_state.end(); itr++) { if (itr->first.first != memory) continue; pair obj = itr->second; for (State *state = obj.first; state < obj.first+obj.second; state++) { // TODO: atomic_intergroup_race test failure state->canAtomicLoad = true; state->canAtomicStore = true; state->workItem = -1; state->wasWorkItem = false; if (!workGroup) { state->workGroup = -1; state->canLoad = true; state->canStore = true; } } } } void RaceDetector::workGroupBarrier(const WorkGroup *workGroup, uint32_t flags) { if (flags & CLK_LOCAL_MEM_FENCE) synchronize(workGroup->getLocalMemory(), false); if (flags & CLK_GLOBAL_MEM_FENCE) synchronize(m_context->getGlobalMemory(), true); } RaceDetector::State::State() { instruction = NULL; workItem = -1; workGroup = -1; canAtomicLoad = true; canAtomicStore = true; canLoad = true; canStore = true; wasWorkItem = false; } Oclgrind-15.5/src/plugins/RaceDetector.h000066400000000000000000000067751252441671000202130ustar00rootroot00000000000000// RaceDetector.h (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "core/Plugin.h" namespace oclgrind { class RaceDetector : public Plugin { public: RaceDetector(const Context *context); virtual void kernelBegin(const KernelInvocation *kernelInvocation) override; virtual void kernelEnd(const KernelInvocation *kernelInvocation) override; virtual void memoryAllocated(const Memory *memory, size_t address, size_t size, cl_mem_flags flags) override; virtual void memoryAtomicLoad(const Memory *memory, const WorkItem *workItem, AtomicOp op, size_t address, size_t size) override; virtual void memoryAtomicStore(const Memory *memory, const WorkItem *workItem, AtomicOp op, size_t address, size_t size) override; virtual void memoryDeallocated(const Memory *memory, size_t address); virtual void memoryLoad(const Memory *memory, const WorkItem *workItem, size_t address, size_t size) override; virtual void memoryLoad(const Memory *memory, const WorkGroup *workGroup, size_t address, size_t size) override; virtual void memoryStore(const Memory *memory, const WorkItem *workItem, size_t address, size_t size, const uint8_t *storeData) override; virtual void memoryStore(const Memory *memory, const WorkGroup *workGroup, size_t address, size_t size, const uint8_t *storeData) override; virtual void workGroupBarrier(const WorkGroup *workGroup, uint32_t flags) override; virtual bool isThreadSafe() const override; private: struct State { const llvm::Instruction *instruction; size_t workItem; size_t workGroup; bool canAtomicLoad; bool canAtomicStore; bool canLoad; bool canStore; bool wasWorkItem; State(); }; // Enumeration for types of data-race enum DataRaceType { ReadWriteRace, WriteWriteRace }; typedef std::map< std::pair, std::pair > StateMap; StateMap m_state; bool m_allowUniformWrites; const KernelInvocation *m_kernelInvocation; void logRace(DataRaceType type, unsigned int addrSpace, size_t address, size_t lastWorkGroup, size_t lastWorkItem, const llvm::Instruction *lastInstruction) const; void registerAtomic(const Memory *memory, const WorkItem *workItem, size_t address, size_t size, bool store); void registerLoadStore(const Memory *memory, const WorkItem *workItem, const WorkGroup *workGroup, size_t address, size_t size, const uint8_t *storeData); void synchronize(const Memory *memory, bool workGroup); }; } Oclgrind-15.5/src/runtime/000077500000000000000000000000001252441671000154615ustar00rootroot00000000000000Oclgrind-15.5/src/runtime/async_queue.cpp000066400000000000000000000065751252441671000205230ustar00rootroot00000000000000// async_queue.cpp (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "async_queue.h" #include #include #include #include #include "core/Kernel.h" #include "core/Queue.h" using namespace oclgrind; using namespace std; // Maps to keep track of retained objects static map< Queue::Command*, list > memObjectMap; static map< Queue::Command*, cl_kernel > kernelMap; static map< Queue::Command*, cl_event > eventMap; static map< Queue::Command*, list > waitListMap; void asyncEnqueue(cl_command_queue queue, cl_command_type type, Queue::Command *cmd, cl_uint numEvents, const cl_event *waitList, cl_event *eventOut) { // Add event wait list to command for (unsigned i = 0; i < numEvents; i++) { cmd->waitList.push_back(waitList[i]->event); waitListMap[cmd].push_back(waitList[i]); clRetainEvent(waitList[i]); } // Enqueue command Event *event = queue->queue->enqueue(cmd); // Create event objects cl_event _event = new _cl_event; _event->dispatch = m_dispatchTable; _event->context = queue->context; _event->queue = queue; _event->type = type; _event->event = event; _event->refCount = 1; // Add event to map eventMap[cmd] = _event; // Pass event as output and retain (if required) if (eventOut) { clRetainEvent(_event); *eventOut = _event; } } void asyncQueueRetain(Queue::Command *cmd, cl_mem mem) { // Retain object and add to map clRetainMemObject(mem); memObjectMap[cmd].push_back(mem); } void asyncQueueRetain(Queue::Command *cmd, cl_kernel kernel) { assert(kernelMap.find(cmd) == kernelMap.end()); // Retain kernel and add to map clRetainKernel(kernel); kernelMap[cmd] = kernel; // Retain memory objects arguments map::const_iterator itr; for (itr = kernel->memArgs.begin(); itr != kernel->memArgs.end(); itr++) { asyncQueueRetain(cmd, itr->second); } } void asyncQueueRelease(Queue::Command *cmd) { // Release memory objects if (memObjectMap.find(cmd) != memObjectMap.end()) { list memObjects = memObjectMap[cmd]; while (!memObjects.empty()) { clReleaseMemObject(memObjects.front()); memObjects.pop_front(); } memObjectMap.erase(cmd); } // Release kernel if (cmd->type == Queue::KERNEL) { assert(kernelMap.find(cmd) != kernelMap.end()); clReleaseKernel(kernelMap[cmd]); kernelMap.erase(cmd); delete ((Queue::KernelCommand*)cmd)->kernel; } // Remove event from map cl_event event = eventMap[cmd]; eventMap.erase(cmd); // Perform callbacks list< pair >::iterator callItr; for (callItr = event->callbacks.begin(); callItr != event->callbacks.end(); callItr++) { callItr->first(event, event->event->state, callItr->second); } // Release events list::iterator waitItr; for (waitItr = waitListMap[cmd].begin(); waitItr != waitListMap[cmd].end(); waitItr++) { clReleaseEvent(*waitItr); } waitListMap.erase(cmd); clReleaseEvent(event); } Oclgrind-15.5/src/runtime/async_queue.h000066400000000000000000000015121252441671000201520ustar00rootroot00000000000000// async_queue.h (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include "icd.h" #include "core/Queue.h" extern void asyncEnqueue(cl_command_queue queue, cl_command_type type, oclgrind::Queue::Command *cmd, cl_uint numEvents, const cl_event *waitList, cl_event *eventOut); extern void asyncQueueRetain(oclgrind::Queue::Command *cmd, cl_mem mem); extern void asyncQueueRetain(oclgrind::Queue::Command *cmd, cl_kernel); extern void asyncQueueRelease(oclgrind::Queue::Command *cmd); Oclgrind-15.5/src/runtime/icd.def000066400000000000000000000001521252441671000166760ustar00rootroot00000000000000EXPORTS ; Make ICD initialisation functions visible clGetExtensionFunctionAddress clIcdGetPlatformIDsKHR Oclgrind-15.5/src/runtime/icd.h000066400000000000000000000164741252441671000164050ustar00rootroot00000000000000// icd.h (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #ifndef _ICD_H_ #define _ICD_H_ // Rename OpenCL API functions to avoid clashes with ICD library #ifdef OCLGRIND_ICD #define clGetPlatformIDs _clGetPlatformIDs #define clGetPlatformInfo _clGetPlatformInfo #define clGetDeviceIDs _clGetDeviceIDs #define clGetDeviceInfo _clGetDeviceInfo #define clCreateSubDevices _clCreateSubDevices #define clRetainDevice _clRetainDevice #define clReleaseDevice _clReleaseDevice #define clCreateContext _clCreateContext #define clCreateContextFromType _clCreateContextFromType #define clRetainContext _clRetainContext #define clReleaseContext _clReleaseContext #define clGetContextInfo _clGetContextInfo #define clCreateCommandQueue _clCreateCommandQueue #define clSetCommandQueueProperty _clSetCommandQueueProperty #define clRetainCommandQueue _clRetainCommandQueue #define clReleaseCommandQueue _clReleaseCommandQueue #define clGetCommandQueueInfo _clGetCommandQueueInfo #define clCreateBuffer _clCreateBuffer #define clCreateSubBuffer _clCreateSubBuffer #define clCreateImage _clCreateImage #define clCreateImage2D _clCreateImage2D #define clCreateImage3D _clCreateImage3D #define clRetainMemObject _clRetainMemObject #define clReleaseMemObject _clReleaseMemObject #define clGetSupportedImageFormats _clGetSupportedImageFormats #define clGetMemObjectInfo _clGetMemObjectInfo #define clGetImageInfo _clGetImageInfo #define clSetMemObjectDestructorCallback _clSetMemObjectDestructorCallback #define clCreateSampler _clCreateSampler #define clRetainSampler _clRetainSampler #define clReleaseSampler _clReleaseSampler #define clGetSamplerInfo _clGetSamplerInfo #define clCreateProgramWithSource _clCreateProgramWithSource #define clCreateProgramWithBinary _clCreateProgramWithBinary #define clCreateProgramWithBuiltInKernels _clCreateProgramWithBuiltInKernels #define clRetainProgram _clRetainProgram #define clReleaseProgram _clReleaseProgram #define clBuildProgram _clBuildProgram #define clUnloadCompiler _clUnloadCompiler #define clCompileProgram _clCompileProgram #define clLinkProgram _clLinkProgram #define clUnloadPlatformCompiler _clUnloadPlatformCompiler #define clGetProgramInfo _clGetProgramInfo #define clGetProgramBuildInfo _clGetProgramBuildInfo #define clCreateKernel _clCreateKernel #define clCreateKernelsInProgram _clCreateKernelsInProgram #define clRetainKernel _clRetainKernel #define clReleaseKernel _clReleaseKernel #define clSetKernelArg _clSetKernelArg #define clGetKernelInfo _clGetKernelInfo #define clGetKernelArgInfo _clGetKernelArgInfo #define clGetKernelWorkGroupInfo _clGetKernelWorkGroupInfo #define clWaitForEvents _clWaitForEvents #define clGetEventInfo _clGetEventInfo #define clCreateUserEvent _clCreateUserEvent #define clRetainEvent _clRetainEvent #define clReleaseEvent _clReleaseEvent #define clSetUserEventStatus _clSetUserEventStatus #define clSetEventCallback _clSetEventCallback #define clGetEventProfilingInfo _clGetEventProfilingInfo #define clFlush _clFlush #define clFinish _clFinish #define clEnqueueReadBuffer _clEnqueueReadBuffer #define clEnqueueReadBufferRect _clEnqueueReadBufferRect #define clEnqueueWriteBuffer _clEnqueueWriteBuffer #define clEnqueueWriteBufferRect _clEnqueueWriteBufferRect #define clEnqueueCopyBuffer _clEnqueueCopyBuffer #define clEnqueueCopyBufferRect _clEnqueueCopyBufferRect #define clEnqueueFillBuffer _clEnqueueFillBuffer #define clEnqueueFillImage _clEnqueueFillImage #define clEnqueueReadImage _clEnqueueReadImage #define clEnqueueWriteImage _clEnqueueWriteImage #define clEnqueueCopyImage _clEnqueueCopyImage #define clEnqueueCopyImageToBuffer _clEnqueueCopyImageToBuffer #define clEnqueueCopyBufferToImage _clEnqueueCopyBufferToImage #define clEnqueueMapBuffer _clEnqueueMapBuffer #define clEnqueueMapImage _clEnqueueMapImage #define clEnqueueUnmapMemObject _clEnqueueUnmapMemObject #define clEnqueueMigrateMemObjects _clEnqueueMigrateMemObjects #define clEnqueueNDRangeKernel _clEnqueueNDRangeKernel #define clEnqueueTask _clEnqueueTask #define clEnqueueNativeKernel _clEnqueueNativeKernel #define clGetExtensionFunctionAddressForPlatform _clGetExtensionFunctionAddressForPlatform #define clEnqueueMarkerWithWaitList _clEnqueueMarkerWithWaitList #define clEnqueueBarrierWithWaitList _clEnqueueBarrierWithWaitList #define clSetPrintfCallback _clSetPrintfCallback #define clEnqueueMarker _clEnqueueMarker #define clEnqueueWaitForEvents _clEnqueueWaitForEvents #define clEnqueueBarrier _clEnqueueBarrier #define clCreateFromGLBuffer _clCreateFromGLBuffer #define clCreateFromGLTexture _clCreateFromGLTexture #define clCreateFromGLTexture2D _clCreateFromGLTexture2D #define clCreateFromGLTexture3D _clCreateFromGLTexture3D #define clCreateFromGLRenderbuffer _clCreateFromGLRenderbuffer #define clGetGLObjectInfo _clGetGLObjectInfo #define clGetGLTextureInfo _clGetGLTextureInfo #define clEnqueueAcquireGLObjects _clEnqueueAcquireGLObjects #define clEnqueueReleaseGLObjects _clEnqueueReleaseGLObjects #define clGetGLContextInfoKHR _clGetGLContextInfoKHR #define clCreateEventFromGLsyncKHR _clCreateEventFromGLsyncKHR #endif // OCLGRIND_ICD #include #include #include #include #define CL_USE_DEPRECATED_OPENCL_1_0_APIS #define CL_USE_DEPRECATED_OPENCL_1_1_APIS #include "CL/cl.h" #include "CL/cl_ext.h" #include "CL/cl_gl.h" #include "CL/cl_gl_ext.h" #if defined(_WIN32) && !defined(__MINGW32__) #include "CL/cl_d3d11.h" #include "CL/cl_d3d10.h" #include "CL/cl_dx9_media_sharing.h" #endif namespace oclgrind { class Context; class Kernel; class Program; class Queue; struct Event; } struct _cl_platform_id { void *dispatch; }; struct _cl_device_id { void **dispatch; }; struct _cl_context { void *dispatch; oclgrind::Context *context; void (CL_CALLBACK *notify)(const char *, const void *, size_t, void *); void *data; cl_context_properties *properties; size_t szProperties; unsigned int refCount; }; struct _cl_command_queue { void *dispatch; cl_command_queue_properties properties; cl_context context; oclgrind::Queue *queue; unsigned int refCount; }; struct _cl_mem { void *dispatch; cl_context context; cl_mem parent; size_t address; size_t size; size_t offset; cl_mem_flags flags; bool isImage; void *hostPtr; std::stack< std::pair > callbacks; unsigned int refCount; }; struct cl_image : _cl_mem { cl_image_format format; cl_image_desc desc; }; struct _cl_program { void *dispatch; oclgrind::Program *program; cl_context context; unsigned int refCount; }; struct _cl_kernel { void *dispatch; oclgrind::Kernel *kernel; cl_program program; std::map memArgs; unsigned int refCount; }; struct _cl_event { void *dispatch; cl_context context; cl_command_queue queue; cl_command_type type; oclgrind::Event *event; std::list< std::pair > callbacks; unsigned int refCount; }; struct _cl_sampler { void *dispatch; cl_context context; cl_bool normCoords; cl_addressing_mode addressMode; cl_filter_mode filterMode; uint32_t sampler; unsigned int refCount; }; extern void *m_dispatchTable[256]; #endif // _ICD_H_ Oclgrind-15.5/src/runtime/oclgrind000077500000000000000000000102301252441671000172040ustar00rootroot00000000000000#!/bin/bash # oclgrind (Oclgrind) # Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, # University of Bristol. All rights reserved. # # This program is provided under a three-clause BSD license. For full # license terms please see the LICENSE file distributed with this # source code. function usage { echo "Usage: " echo " oclgrind [OPTIONS] COMMAND" echo " oclgrind [--help | --version]" echo echo "Options:" echo -n " --build-options OPTIONS " echo "Additional options to pass to the OpenCL compiler" echo -n " --check-api " echo "Reports errors on API calls" echo -n " --data-races " echo "Enable data-race detection" echo -n " --disable-pch " echo "Don't use precompiled headers" echo -n " --dump-spir " echo "Dump SPIR to /tmp/oclgrind_*.{ll,bc}" echo -n " -h --help " echo "Display usage information" echo -n " --inst-counts " echo "Output histograms of instructions executed" echo -n " -i --interactive " echo "Enable interactive mode" echo -n " --log LOGFILE " echo "Redirect log/error messages to a file" echo -n " --max-errors NUM " echo "Limit the number of error/warning messages" echo -n " --num-threads NUM " echo "Set the number of worker threads to use" echo -n " --pch-dir DIR " echo "Override directory containing precompiled headers" echo -n " --plugins PLUGINS " echo "Load colon seperated list of plugin libraries" echo -n " -q --quick " echo "Only run first and last work-group" echo -n " --uniform-writes " echo "Don't suppress uniform write-write data-races" echo -n " -v --version " echo "Display version information" echo echo "For more information, please visit the Oclgrind wiki page:" echo "-> https://github.com/jrprice/Oclgrind/wiki" echo } # Parse arguments while [ $# -gt 0 -a "${1:0:1}" == "-" ] do if [ "$1" == "--build-options" ] then shift export OCLGRIND_BUILD_OPTIONS="$1" elif [ "$1" == "--check-api" ] then export OCLGRIND_CHECK_API=1 elif [ "$1" == "--data-races" ] then export OCLGRIND_DATA_RACES=1 elif [ "$1" == "--disable-pch" ] then export OCLGRIND_DISABLE_PCH=1 elif [ "$1" == "--dump-spir" ] then export OCLGRIND_DUMP_SPIR=1 elif [ "$1" == "-h" -o "$1" == "--help" ] then usage exit 0 elif [ "$1" == "--inst-counts" ] then export OCLGRIND_INST_COUNTS=1 elif [ "$1" == "-i" -o "$1" == "--interactive" ] then export OCLGRIND_INTERACTIVE=1 elif [ "$1" == "--log" ] then shift export OCLGRIND_LOG="$1" elif [ "$1" == "--max-errors" ] then shift export OCLGRIND_MAX_ERRORS="$1" elif [ "$1" == "--num-threads" ] then shift export OCLGRIND_NUM_THREADS="$1" elif [ "$1" == "--pch-dir" ] then shift export OCLGRIND_PCH_DIR="$1" elif [ "$1" == "--plugins" ] then shift export OCLGRIND_PLUGINS="$1" elif [ "$1" == "-q" -o "$1" == "--quick" ] then export OCLGRIND_QUICK=1 elif [ "$1" == "--uniform-writes" ] then export OCLGRIND_UNIFORM_WRITES=1 elif [ "$1" == "-v" -o "$1" == "--version" ] then echo echo "Oclgrind __VERSION__" echo echo "Copyright (c) 2013-2015" echo "James Price and Simon McIntosh-Smith, University of Bristol" echo "https://github.com/jrprice/Oclgrind" echo exit 0 else echo "Unrecognized argument '$1'" usage exit 1 fi shift done # Ensure target command supplied if [ $# -lt 1 ] then usage exit 1 fi # Inject liboclgrind.{so,dylib} and run command LIBDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/../lib" if [ "$(uname -s)" == "Darwin" ] then DYLD_LIBRARY_PATH=$LIBDIR:$DYLD_LIBRARY_PATH \ DYLD_INSERT_LIBRARIES=$LIBDIR/liboclgrind-rt.dylib \ DYLD_FORCE_FLAT_NAMESPACE=1 "$@" else LD_LIBRARY_PATH=$LIBDIR:$LD_LIBRARY_PATH \ LD_PRELOAD=$LIBDIR/liboclgrind-rt.so "$@" fi Oclgrind-15.5/src/runtime/runtime.cpp000066400000000000000000004375251252441671000176700ustar00rootroot00000000000000// runtime.cpp (Oclgrind) // Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, // University of Bristol. All rights reserved. // // This program is provided under a three-clause BSD license. For full // license terms please see the LICENSE file distributed with this // source code. #include #include #include #include #include #include "async_queue.h" #include "icd.h" #include "core/Context.h" #include "core/Kernel.h" #include "core/half.h" #include "core/Memory.h" #include "core/Program.h" #include "core/Queue.h" using namespace std; #define MAX_GLOBAL_MEM_SIZE (128 * 1048576) #define MAX_CONSTANT_BUFFER_SIZE (1048576) #define MAX_LOCAL_MEM_SIZE (32768) #define MAX_WI_SIZE (65536) #define PLATFORM_NAME "Oclgrind" #define PLATFORM_VENDOR "University of Bristol" #define PLATFORM_VERSION "OpenCL 1.2 (Oclgrind " PACKAGE_VERSION ")" #define PLATFORM_PROFILE "FULL_PROFILE" #define PLATFORM_SUFFIX "oclg" #define PLATFORM_EXTENSIONS "cl_khr_icd" #define DEVICE_NAME "Oclgrind Simulator" #define DEVICE_VENDOR "University of Bristol" #define DEVICE_VENDOR_ID 0x0042 #define DEVICE_VERSION "OpenCL 1.2 (Oclgrind " PACKAGE_VERSION ")" #define DEVICE_LANG_VERSION "OpenCL C 1.2 (Oclgrind " PACKAGE_VERSION ")" #define DRIVER_VERSION "Oclgrind " PACKAGE_VERSION #define DEVICE_PROFILE "FULL_PROFILE" #define DEVICE_SPIR_VERSIONS "1.2" #define DEVICE_EXTENSIONS " \ cl_khr_spir \ cl_khr_3d_image_writes \ cl_khr_global_int32_base_atomics \ cl_khr_global_int32_extended_atomics \ cl_khr_local_int32_base_atomics \ cl_khr_local_int32_extended_atomics \ cl_khr_byte_addressable_store \ cl_khr_fp64" namespace { #define CASE(X) case X: return #X; const char* CLErrorToString(cl_int err) { switch (err) { CASE(CL_SUCCESS) CASE(CL_DEVICE_NOT_FOUND) CASE(CL_DEVICE_NOT_AVAILABLE) CASE(CL_COMPILER_NOT_AVAILABLE) CASE(CL_MEM_OBJECT_ALLOCATION_FAILURE) CASE(CL_OUT_OF_RESOURCES) CASE(CL_OUT_OF_HOST_MEMORY) CASE(CL_PROFILING_INFO_NOT_AVAILABLE) CASE(CL_MEM_COPY_OVERLAP) CASE(CL_IMAGE_FORMAT_MISMATCH) CASE(CL_IMAGE_FORMAT_NOT_SUPPORTED) CASE(CL_BUILD_PROGRAM_FAILURE) CASE(CL_MAP_FAILURE) CASE(CL_MISALIGNED_SUB_BUFFER_OFFSET) CASE(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST) CASE(CL_COMPILE_PROGRAM_FAILURE) CASE(CL_LINKER_NOT_AVAILABLE) CASE(CL_LINK_PROGRAM_FAILURE) CASE(CL_DEVICE_PARTITION_FAILED) CASE(CL_KERNEL_ARG_INFO_NOT_AVAILABLE) CASE(CL_INVALID_VALUE) CASE(CL_INVALID_DEVICE_TYPE) CASE(CL_INVALID_PLATFORM) CASE(CL_INVALID_DEVICE) CASE(CL_INVALID_CONTEXT) CASE(CL_INVALID_QUEUE_PROPERTIES) CASE(CL_INVALID_COMMAND_QUEUE) CASE(CL_INVALID_HOST_PTR) CASE(CL_INVALID_MEM_OBJECT) CASE(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) CASE(CL_INVALID_IMAGE_SIZE) CASE(CL_INVALID_SAMPLER) CASE(CL_INVALID_BINARY) CASE(CL_INVALID_BUILD_OPTIONS) CASE(CL_INVALID_PROGRAM) CASE(CL_INVALID_PROGRAM_EXECUTABLE) CASE(CL_INVALID_KERNEL_NAME) CASE(CL_INVALID_KERNEL_DEFINITION) CASE(CL_INVALID_KERNEL) CASE(CL_INVALID_ARG_INDEX) CASE(CL_INVALID_ARG_VALUE) CASE(CL_INVALID_ARG_SIZE) CASE(CL_INVALID_KERNEL_ARGS) CASE(CL_INVALID_WORK_DIMENSION) CASE(CL_INVALID_WORK_GROUP_SIZE) CASE(CL_INVALID_WORK_ITEM_SIZE) CASE(CL_INVALID_GLOBAL_OFFSET) CASE(CL_INVALID_EVENT_WAIT_LIST) CASE(CL_INVALID_EVENT) CASE(CL_INVALID_OPERATION) CASE(CL_INVALID_GL_OBJECT) CASE(CL_INVALID_BUFFER_SIZE) CASE(CL_INVALID_MIP_LEVEL) CASE(CL_INVALID_GLOBAL_WORK_SIZE) CASE(CL_INVALID_PROPERTY) CASE(CL_INVALID_IMAGE_DESCRIPTOR) CASE(CL_INVALID_COMPILER_OPTIONS) CASE(CL_INVALID_LINKER_OPTIONS) CASE(CL_INVALID_DEVICE_PARTITION_COUNT) } return "Unknown"; } #undef CASE void notifyAPIError(cl_context context, cl_int err, const char* function, string info = "") { // Remove leading underscore from function name if necessary if (!strncmp(function, "_cl", 3)) { function++; } // Build error message ostringstream oss; oss << endl << "Oclgrind - OpenCL runtime error detected" << endl << "\tFunction: " << function << endl << "\tError: " << CLErrorToString(err) << endl; if (!info.empty()) { oss << "\t" << info << endl; } string error = oss.str(); // Output message to stderr if required const char *checkAPI = getenv("OCLGRIND_CHECK_API"); if (checkAPI && strcmp(checkAPI, "1") == 0) { cerr << error << endl; } // Fire context callback if set if (context && context->notify) { context->notify(error.c_str(), context->data, 0, NULL); } } } #if defined(_WIN32) && !defined(__MINGW32__) #define __func__ __FUNCTION__ #endif #define ReturnErrorInfo(context, err, info) \ { \ ostringstream oss; \ oss << info; \ notifyAPIError(context, err, __func__, oss.str()); \ return err; \ } #define ReturnErrorArg(context, err, arg) \ ReturnErrorInfo(context, err, "For argument '" #arg "'") #define ReturnError(context, err) \ ReturnErrorInfo(context, err, "") #define SetErrorInfo(context, err, info) \ if (err != CL_SUCCESS) \ { \ ostringstream oss; \ oss << info; \ notifyAPIError(context, err, __func__, oss.str()); \ } \ if (errcode_ret) \ { \ *errcode_ret = err; \ } #define SetErrorArg(context, err, arg) \ SetErrorInfo(context, err, "For argument '" #arg "'") #define SetError(context, err) \ SetErrorInfo(context, err, "") #define ParamValueSizeTooSmall \ "param_value_size is " << param_value_size << \ ", but result requires " << result_size << " bytes" static struct _cl_platform_id *m_platform = NULL; static struct _cl_device_id *m_device = NULL; CL_API_ENTRY cl_int CL_API_CALL clIcdGetPlatformIDsKHR ( cl_uint num_entries, cl_platform_id *platforms, cl_uint *num_platforms ) { if (!m_platform) { m_platform = new _cl_platform_id; m_platform->dispatch = m_dispatchTable; m_device = new _cl_device_id; m_device->dispatch = m_dispatchTable; } if (num_entries > 0) { platforms[0] = m_platform; } if (num_platforms) { *num_platforms = 1; } return CL_SUCCESS; } //////////////////////////////////// // OpenCL Runtime API Definitions // //////////////////////////////////// #ifndef CL_USE_DEPRECATED_OPENCL_1_0_APIS #define CL_USE_DEPRECATED_OPENCL_1_0_APIS #endif #ifndef CL_USE_DEPRECATED_OPENCL_1_1_APIS #define CL_USE_DEPRECATED_OPENCL_1_1_APIS #endif CL_API_ENTRY void* CL_API_CALL clGetExtensionFunctionAddress ( const char * funcname ) CL_API_SUFFIX__VERSION_1_2 { if (strcmp(funcname, "clIcdGetPlatformIDsKHR") == 0) { return (void*)clIcdGetPlatformIDsKHR; } else { return NULL; } } CL_API_ENTRY cl_int CL_API_CALL clGetPlatformIDs ( cl_uint num_entries, cl_platform_id * platforms, cl_uint * num_platforms ) CL_API_SUFFIX__VERSION_1_0 { return clIcdGetPlatformIDsKHR(num_entries, platforms, num_platforms); } CL_API_ENTRY cl_int CL_API_CALL clGetPlatformInfo ( cl_platform_id platform, cl_platform_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0 { // Select platform info string const char *result = NULL; switch(param_name) { case CL_PLATFORM_PROFILE: result = PLATFORM_PROFILE; break; case CL_PLATFORM_VERSION: result = PLATFORM_VERSION; break; case CL_PLATFORM_NAME: result = PLATFORM_NAME; break; case CL_PLATFORM_VENDOR: result = PLATFORM_VENDOR; break; case CL_PLATFORM_EXTENSIONS: result = PLATFORM_EXTENSIONS; break; case CL_PLATFORM_ICD_SUFFIX_KHR: result = PLATFORM_SUFFIX; break; default: ReturnErrorArg(NULL, CL_INVALID_VALUE, param_name); } // Compute size of result size_t result_size = strlen(result) + 1; if (param_value_size_ret) { *param_value_size_ret = result_size; } // Return result if (param_value) { // Check destination is large enough if (param_value_size < result_size) { ReturnErrorInfo(NULL, CL_INVALID_VALUE, ParamValueSizeTooSmall); } else { memcpy(param_value, result, result_size); } } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDs ( cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, cl_device_id * devices, cl_uint * num_devices ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (devices && num_entries < 1) { ReturnError(NULL, CL_INVALID_VALUE); } if (device_type != CL_DEVICE_TYPE_CPU && device_type != CL_DEVICE_TYPE_DEFAULT && device_type != CL_DEVICE_TYPE_ALL) { ReturnError(NULL, CL_DEVICE_NOT_FOUND); } if (devices) { *devices = m_device; } if (num_devices) { *num_devices = 1; } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo ( cl_device_id device, cl_device_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0 { // Check device is valid if (device != m_device) { ReturnErrorArg(NULL, CL_INVALID_DEVICE, device); } size_t dummy; size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy; // All possible return types union { cl_uint cluint; size_t sizet; size_t sizet3[3]; cl_ulong clulong; cl_bool clbool; cl_device_id cldeviceid; cl_device_type cldevicetype; cl_device_fp_config devicefpconfig; cl_device_mem_cache_type devicememcachetype; cl_device_local_mem_type devicelocalmemtype; cl_device_exec_capabilities cldevexeccap; cl_command_queue_properties clcmdqprop; cl_platform_id clplatid; cl_device_partition_property cldevpartprop; cl_device_affinity_domain cldevaffdom; } result_data; // The result is actually a string that needs copying const char* str = 0; switch (param_name) { case CL_DEVICE_TYPE: result_size = sizeof(cl_device_type); result_data.cldevicetype = CL_DEVICE_TYPE_CPU; break; case CL_DEVICE_VENDOR_ID: result_size = sizeof(cl_uint); result_data.cluint = DEVICE_VENDOR_ID; break; case CL_DEVICE_MAX_COMPUTE_UNITS: result_size = sizeof(cl_uint); result_data.cluint = 1; break; case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: result_size = sizeof(cl_uint); result_data.cluint = 3; break; case CL_DEVICE_MAX_WORK_GROUP_SIZE: result_size = sizeof(size_t); result_data.sizet = MAX_WI_SIZE; break; case CL_DEVICE_MAX_WORK_ITEM_SIZES: result_size = 3*sizeof(size_t); result_data.sizet3[0] = MAX_WI_SIZE; result_data.sizet3[1] = MAX_WI_SIZE; result_data.sizet3[2] = MAX_WI_SIZE; break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR: case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT: case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT: case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG: case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT: case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: result_size = sizeof(cl_uint); result_data.cluint = 1; break; case CL_DEVICE_MAX_CLOCK_FREQUENCY: result_size = sizeof(cl_uint); result_data.cluint = 1; break; case CL_DEVICE_ADDRESS_BITS: result_size = sizeof(cl_uint); result_data.cluint = sizeof(size_t)<<3; break; case CL_DEVICE_MAX_READ_IMAGE_ARGS: result_size = sizeof(cl_uint); result_data.cluint = 128; break; case CL_DEVICE_MAX_WRITE_IMAGE_ARGS: result_size = sizeof(cl_uint); result_data.cluint = 8; break; case CL_DEVICE_MAX_MEM_ALLOC_SIZE: result_size = sizeof(cl_ulong); result_data.clulong = MAX_GLOBAL_MEM_SIZE; break; case CL_DEVICE_IMAGE2D_MAX_WIDTH: case CL_DEVICE_IMAGE2D_MAX_HEIGHT: result_size = sizeof(size_t); result_data.sizet = 8192; break; case CL_DEVICE_IMAGE3D_MAX_WIDTH: case CL_DEVICE_IMAGE3D_MAX_DEPTH: case CL_DEVICE_IMAGE3D_MAX_HEIGHT: result_size = sizeof(size_t); result_data.sizet = 2048; break; case CL_DEVICE_IMAGE_SUPPORT: result_size = sizeof(cl_bool); result_data.clbool = CL_TRUE; break; case CL_DEVICE_MAX_PARAMETER_SIZE: result_size = sizeof(size_t); result_data.sizet = 1024; break; case CL_DEVICE_MAX_SAMPLERS: result_size = sizeof(cl_uint); result_data.sizet = 16; break; case CL_DEVICE_MEM_BASE_ADDR_ALIGN: result_size = sizeof(cl_uint); result_data.cluint = sizeof(cl_long16)<<3; break; case CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE: result_size = sizeof(cl_uint); result_data.cluint = 1; break; case CL_DEVICE_SINGLE_FP_CONFIG: result_size = sizeof(cl_device_fp_config); result_data.devicefpconfig = CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN | CL_FP_DENORM; break; case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: result_size = sizeof(cl_device_mem_cache_type); result_data.devicememcachetype = CL_NONE; break; case CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE: result_size = sizeof(cl_uint); result_data.cluint = 0; break; case CL_DEVICE_GLOBAL_MEM_CACHE_SIZE: result_size = sizeof(cl_ulong); result_data.clulong = 0; break; case CL_DEVICE_GLOBAL_MEM_SIZE: result_size = sizeof(cl_ulong); result_data.clulong = MAX_GLOBAL_MEM_SIZE; break; case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: result_size = sizeof(cl_ulong); result_data.clulong = MAX_CONSTANT_BUFFER_SIZE; break; case CL_DEVICE_MAX_CONSTANT_ARGS: result_size = sizeof(cl_uint); result_data.cluint = 1024; break; case CL_DEVICE_LOCAL_MEM_TYPE: result_size = sizeof(cl_device_local_mem_type); result_data.devicelocalmemtype = CL_LOCAL; break; case CL_DEVICE_LOCAL_MEM_SIZE: result_size = sizeof(cl_ulong); result_data.clulong = MAX_LOCAL_MEM_SIZE; break; case CL_DEVICE_ERROR_CORRECTION_SUPPORT: result_size = sizeof(cl_bool); result_data.clbool = CL_FALSE; break; case CL_DEVICE_PROFILING_TIMER_RESOLUTION: result_size = sizeof(size_t); result_data.sizet = 1000; break; case CL_DEVICE_ENDIAN_LITTLE: result_size = sizeof(cl_bool); result_data.clbool = CL_TRUE; break; case CL_DEVICE_AVAILABLE: result_size = sizeof(cl_bool); result_data.clbool = CL_TRUE; break; case CL_DEVICE_COMPILER_AVAILABLE: result_size = sizeof(cl_bool); result_data.clbool = CL_TRUE; break; case CL_DEVICE_EXECUTION_CAPABILITIES: result_size = sizeof(cl_device_exec_capabilities); result_data.cldevexeccap = CL_EXEC_KERNEL | CL_EXEC_NATIVE_KERNEL; break; case CL_DEVICE_QUEUE_PROPERTIES: result_size = sizeof(cl_command_queue_properties); result_data.clcmdqprop = CL_QUEUE_PROFILING_ENABLE; break; case CL_DEVICE_NAME: result_size = sizeof(DEVICE_NAME); str = DEVICE_NAME; break; case CL_DEVICE_VENDOR: result_size = sizeof(DEVICE_VENDOR); str = DEVICE_VENDOR; break; case CL_DRIVER_VERSION: result_size = sizeof(DRIVER_VERSION); str = DRIVER_VERSION; break; case CL_DEVICE_PROFILE: result_size = sizeof(DEVICE_PROFILE); str = DEVICE_PROFILE; break; case CL_DEVICE_VERSION: result_size = sizeof(DEVICE_VERSION); str = DEVICE_VERSION; break; case CL_DEVICE_EXTENSIONS: result_size = sizeof(DEVICE_EXTENSIONS); str = DEVICE_EXTENSIONS; break; case CL_DEVICE_PLATFORM: result_size = sizeof(cl_platform_id); result_data.clplatid = m_platform; break; case CL_DEVICE_DOUBLE_FP_CONFIG: result_size = sizeof(cl_device_fp_config); result_data.devicefpconfig = CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM; break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF: result_size = sizeof(cl_uint); result_data.cluint = 0; break; case CL_DEVICE_HOST_UNIFIED_MEMORY: result_size = sizeof(cl_bool); result_data.clbool = CL_FALSE; break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR: case CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT: case CL_DEVICE_NATIVE_VECTOR_WIDTH_INT: case CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG: case CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT: case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: result_size = sizeof(cl_uint); result_data.cluint = 1; break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF: result_size = sizeof(cl_uint); result_data.cluint = 0; break; case CL_DEVICE_OPENCL_C_VERSION: result_size = sizeof(DEVICE_LANG_VERSION); str = DEVICE_LANG_VERSION; break; case CL_DEVICE_LINKER_AVAILABLE: result_size = sizeof(cl_bool); result_data.clbool = CL_TRUE; break; case CL_DEVICE_BUILT_IN_KERNELS: result_size = 1; str = ""; break; case CL_DEVICE_IMAGE_MAX_BUFFER_SIZE: result_size = sizeof(size_t); result_data.sizet = 65536; break; case CL_DEVICE_IMAGE_MAX_ARRAY_SIZE: result_size = sizeof(size_t); result_data.sizet = 2048; break; case CL_DEVICE_PARENT_DEVICE: result_size = sizeof(cl_device_id); result_data.cldeviceid = NULL; break; case CL_DEVICE_PARTITION_MAX_SUB_DEVICES: result_size = sizeof(cl_uint); result_data.cluint = 0; break; case CL_DEVICE_PARTITION_PROPERTIES: case CL_DEVICE_PARTITION_TYPE: result_size = sizeof(cl_device_partition_property); result_data.cldevpartprop = 0; break; case CL_DEVICE_PARTITION_AFFINITY_DOMAIN: result_size = sizeof(cl_device_affinity_domain); result_data.cldevaffdom = 0; break; case CL_DEVICE_REFERENCE_COUNT: result_size = sizeof(cl_uint); result_data.cluint = 1; break; case CL_DEVICE_PREFERRED_INTEROP_USER_SYNC: result_size = sizeof(cl_bool); result_data.clbool = CL_TRUE; break; case CL_DEVICE_PRINTF_BUFFER_SIZE: result_size = sizeof(size_t); result_data.sizet = 1024; break; case CL_DEVICE_SPIR_VERSIONS: result_size = sizeof(DEVICE_SPIR_VERSIONS); str = DEVICE_SPIR_VERSIONS; break; default: ReturnErrorArg(NULL, CL_INVALID_VALUE, param_name); } if (param_value) { // Check destination is large enough if (param_value_size < result_size) { ReturnErrorInfo(NULL, CL_INVALID_VALUE, ParamValueSizeTooSmall); } else { const void* src = str ? (const void*)str : (const void*)&result_data; memcpy(param_value, src, result_size); } } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clCreateSubDevices ( cl_device_id in_device, const cl_device_partition_property * properties, cl_uint num_entries, cl_device_id * out_devices, cl_uint * num_devices ) CL_API_SUFFIX__VERSION_1_2 { ReturnErrorInfo(NULL, CL_INVALID_VALUE, "Not yet implemented"); } CL_API_ENTRY cl_int CL_API_CALL clRetainDevice ( cl_device_id device ) CL_API_SUFFIX__VERSION_1_2 { return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clReleaseDevice ( cl_device_id device ) CL_API_SUFFIX__VERSION_1_2 { return CL_SUCCESS; } CL_API_ENTRY cl_context CL_API_CALL clCreateContext ( const cl_context_properties * properties, cl_uint num_devices, const cl_device_id * devices, void (CL_CALLBACK * pfn_notify)(const char *, const void *, size_t, void *), void * user_data, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (num_devices != 1) { SetErrorArg(NULL, CL_INVALID_VALUE, num_devices); return NULL; } if (!devices) { SetErrorArg(NULL, CL_INVALID_VALUE, devices); return NULL; } if (devices[0] != m_device) { SetError(NULL, CL_INVALID_DEVICE); return NULL; } if (!pfn_notify && user_data) { SetErrorInfo(NULL, CL_INVALID_VALUE, "pfn_notify NULL but user_data non-NULL"); return NULL; } // Create context object cl_context context = new _cl_context; context->dispatch = m_dispatchTable; context->context = new oclgrind::Context(); context->notify = pfn_notify; context->data = user_data; context->properties = NULL; context->szProperties = 0; context->refCount = 1; if (properties) { int num = 1; while (properties[num]) { num++; } size_t sz = (num+1)*sizeof(cl_context_properties); context->szProperties = sz; context->properties = (cl_context_properties*)malloc(sz); memcpy(context->properties, properties, sz); } SetError(NULL, CL_SUCCESS); return context; } CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType ( const cl_context_properties * properties, cl_device_type device_type, void (CL_CALLBACK * pfn_notify)(const char *, const void *, size_t, void *), void * user_data, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (!pfn_notify && user_data) { SetErrorInfo(NULL, CL_INVALID_VALUE, "pfn_notify NULL but user_data non-NULL"); return NULL; } if (device_type != CL_DEVICE_TYPE_CPU && device_type != CL_DEVICE_TYPE_DEFAULT && device_type != CL_DEVICE_TYPE_ALL) { SetErrorArg(NULL, CL_DEVICE_NOT_FOUND, device_type); return NULL; } // Create context object cl_context context = new _cl_context; context->dispatch = m_dispatchTable; context->context = new oclgrind::Context(); context->notify = pfn_notify; context->data = user_data; context->properties = NULL; context->szProperties = 0; context->refCount = 1; if (properties) { int num = 0; while (properties[num]) { num++; } size_t sz = (num+1)*sizeof(cl_context_properties); context->szProperties = sz; context->properties = (cl_context_properties*)malloc(sz); memcpy(context->properties, properties, sz); } SetError(NULL, CL_SUCCESS); return context; } CL_API_ENTRY cl_int CL_API_CALL clRetainContext ( cl_context context ) CL_API_SUFFIX__VERSION_1_0 { if (!context) { ReturnErrorArg(NULL, CL_INVALID_CONTEXT, context); } context->refCount++; return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clReleaseContext ( cl_context context ) CL_API_SUFFIX__VERSION_1_0 { if (!context) { ReturnErrorArg(NULL, CL_INVALID_CONTEXT, context); } if (--context->refCount == 0) { delete context->context; delete context; } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetContextInfo ( cl_context context, cl_context_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0 { // Check context is valid if (!context) { ReturnErrorArg(NULL, CL_INVALID_CONTEXT, context); } size_t dummy = 0; size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy; union { cl_uint cluint; cl_device_id cldevid; } result_data; cl_context_properties* properties = NULL; switch (param_name) { case CL_CONTEXT_REFERENCE_COUNT: result_size = sizeof(cl_uint); result_data.cluint = context->refCount; break; case CL_CONTEXT_NUM_DEVICES: result_size = sizeof(cl_uint); result_data.cluint = 1; break; case CL_CONTEXT_DEVICES: result_size = sizeof(cl_device_id); result_data.cldevid = m_device; break; case CL_CONTEXT_PROPERTIES: result_size = context->szProperties; properties = context->properties; break; default: ReturnErrorArg(context, CL_INVALID_VALUE, param_name); } if (param_value) { // Check destination is large enough if (param_value_size < result_size) { ReturnErrorInfo(context, CL_INVALID_VALUE, ParamValueSizeTooSmall); } else { if (properties) memcpy(param_value, properties, result_size); else memcpy(param_value, &result_data, result_size); } } return CL_SUCCESS; } CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueue ( cl_context context, cl_device_id device, cl_command_queue_properties properties, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (!context) { SetErrorArg(NULL, CL_INVALID_CONTEXT, context); return NULL; } if (device != m_device) { SetErrorArg(context, CL_INVALID_DEVICE, device); return NULL; } if (properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) { SetErrorInfo(context, CL_INVALID_QUEUE_PROPERTIES, "Out-of-order command queues not supported"); return NULL; } // Create command-queue object cl_command_queue queue; queue = new _cl_command_queue; queue->queue = new oclgrind::Queue(context->context); queue->dispatch = m_dispatchTable; queue->properties = properties; queue->context = context; queue->refCount = 1; clRetainContext(context); SetError(context, CL_SUCCESS); return queue; } CL_API_ENTRY cl_int CL_API_CALL clSetCommandQueueProperty ( cl_command_queue command_queue, cl_command_queue_properties properties, cl_bool enable, cl_command_queue_properties * old_properties ) { return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clRetainCommandQueue ( cl_command_queue command_queue ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } command_queue->refCount++; return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clReleaseCommandQueue ( cl_command_queue command_queue ) CL_API_SUFFIX__VERSION_1_0 { if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (--command_queue->refCount == 0) { // TODO: Retain/release queue from async thread // TODO: Spec states that this function performs an implicit flush, // so maybe we are OK to delete queue here? clFinish(command_queue); delete command_queue->queue; clReleaseContext(command_queue->context); delete command_queue; } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetCommandQueueInfo ( cl_command_queue command_queue, cl_command_queue_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0 { // Check queue is valid if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } size_t dummy = 0; size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy; union { cl_uint cluint; cl_context context; cl_device_id cldevid; cl_command_queue_properties properties; } result_data; switch (param_name) { case CL_QUEUE_CONTEXT: result_size = sizeof(cl_context); result_data.context = command_queue->context; break; case CL_QUEUE_DEVICE: result_size = sizeof(cl_device_id); result_data.cldevid = m_device; break; case CL_QUEUE_REFERENCE_COUNT: result_size = sizeof(cl_uint); result_data.cluint = command_queue->refCount; break; case CL_QUEUE_PROPERTIES: result_size = sizeof(cl_command_queue_properties); result_data.properties = command_queue->properties; break; default: ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, param_name); } if (param_value) { // Check destination is large enough if (param_value_size < result_size) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, ParamValueSizeTooSmall); } else { memcpy(param_value, &result_data, result_size); } } return CL_SUCCESS; } CL_API_ENTRY cl_mem CL_API_CALL clCreateBuffer ( cl_context context, cl_mem_flags flags, size_t size, void * host_ptr, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (!context) { SetErrorArg(NULL, CL_INVALID_CONTEXT, context); return NULL; } if (size == 0) { SetErrorArg(context, CL_INVALID_BUFFER_SIZE, size); return NULL; } if ((host_ptr == NULL) == ((flags & CL_MEM_COPY_HOST_PTR) || flags & CL_MEM_USE_HOST_PTR)) { SetErrorInfo(context, CL_INVALID_HOST_PTR, "host_ptr NULL but CL_MEM_{COPY,USE}_HOST_PTR used"); return NULL; } if ((flags & CL_MEM_USE_HOST_PTR) && (flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR))) { SetErrorInfo(context, CL_INVALID_VALUE, "CL_MEM_USE_HOST_PTR cannot be used with " "CL_MEM_{COPY,ALLOC}_HOST_PTR"); return NULL; } // Create memory object oclgrind::Memory *globalMemory = context->context->getGlobalMemory(); cl_mem mem = new _cl_mem; mem->dispatch = m_dispatchTable; mem->context = context; mem->parent = NULL; mem->size = size; mem->offset = 0; mem->flags = flags; mem->isImage = false; mem->refCount = 1; if (flags & CL_MEM_USE_HOST_PTR) { mem->address = globalMemory->createHostBuffer(size, host_ptr, flags); mem->hostPtr = host_ptr; } else { mem->address = globalMemory->allocateBuffer(size, flags); mem->hostPtr = NULL; } if (!mem->address) { SetError(context, CL_MEM_OBJECT_ALLOCATION_FAILURE); delete mem; return NULL; } clRetainContext(context); if (flags & CL_MEM_COPY_HOST_PTR) { context->context->getGlobalMemory()->store((const unsigned char*)host_ptr, mem->address, size); } SetError(context, CL_SUCCESS); return mem; } CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBuffer ( cl_mem buffer, cl_mem_flags flags, cl_buffer_create_type buffer_create_type, const void * buffer_create_info, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_1 { // Check parameters if (!buffer) { SetErrorArg(NULL, CL_INVALID_MEM_OBJECT, buffer); return NULL; } if (buffer->parent) { SetErrorInfo(buffer->context, CL_INVALID_MEM_OBJECT, "Parent buffer cannot be a sub-buffer"); return NULL; } if (buffer_create_type != CL_BUFFER_CREATE_TYPE_REGION) { SetErrorArg(buffer->context, CL_INVALID_VALUE, buffer_create_type); return NULL; } if (!buffer_create_info) { SetErrorArg(buffer->context, CL_INVALID_VALUE, buffer_create_info); return NULL; } _cl_buffer_region region = *(_cl_buffer_region*)buffer_create_info; if (region.origin + region.size > buffer->size) { SetErrorInfo(buffer->context, CL_INVALID_VALUE, "Region doesn't fit inside parent buffer"); return NULL; } if (region.size == 0) { SetErrorInfo(buffer->context, CL_INVALID_VALUE, "Region size cannot be 0"); return NULL; } // Inherit flags from parent where appropriate cl_mem_flags memFlags = 0; cl_mem_flags rwFlags = CL_MEM_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY; cl_mem_flags hostAccess = CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY; cl_mem_flags hostPtr = CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR; if ((flags & rwFlags) == 0) { memFlags |= buffer->flags & rwFlags; } else { memFlags |= flags & rwFlags; } if ((flags & hostAccess) == 0) { memFlags |= buffer->flags & hostAccess; } else { memFlags |= flags & hostAccess; } memFlags |= buffer->flags & hostPtr; // Create memory object cl_mem mem = new _cl_mem; mem->dispatch = m_dispatchTable; mem->context = buffer->context; mem->parent = buffer; mem->size = region.size; mem->offset = region.origin; mem->isImage = false; mem->flags = memFlags; mem->hostPtr = (unsigned char*)buffer->hostPtr + region.origin; mem->refCount = 1; mem->address = buffer->address + region.origin; clRetainMemObject(buffer); SetError(buffer->context, CL_SUCCESS); return mem; } // Utility function for getting number of dimensions in image size_t getNumDimensions(cl_mem_object_type type) { switch (type) { case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE1D_ARRAY: case CL_MEM_OBJECT_IMAGE1D_BUFFER: return 1; case CL_MEM_OBJECT_IMAGE2D: case CL_MEM_OBJECT_IMAGE2D_ARRAY: return 2; case CL_MEM_OBJECT_IMAGE3D: return 3; default: return 0; } } // Utility function for getting number of channels in an image size_t getNumChannels(const cl_image_format *format) { switch (format->image_channel_order) { case CL_R: case CL_Rx: case CL_A: case CL_INTENSITY: case CL_LUMINANCE: return 1; case CL_RG: case CL_RGx: case CL_RA: return 2; case CL_RGB: case CL_RGBx: return 3; case CL_RGBA: case CL_ARGB: case CL_BGRA: return 4; default: return 0; } } // Utility function for computing an image format's pixel size (in bytes) size_t getPixelSize(const cl_image_format *format) { // Get number of channels size_t numChannels = getNumChannels(format); // Get size of each pixel (in bytes) switch (format->image_channel_data_type) { case CL_SNORM_INT8: case CL_UNORM_INT8: case CL_SIGNED_INT8: case CL_UNSIGNED_INT8: return numChannels; case CL_SNORM_INT16: case CL_UNORM_INT16: case CL_SIGNED_INT16: case CL_UNSIGNED_INT16: case CL_HALF_FLOAT: return 2*numChannels; case CL_SIGNED_INT32: case CL_UNSIGNED_INT32: case CL_FLOAT: return 4*numChannels; case CL_UNORM_SHORT_565: case CL_UNORM_SHORT_555: return 2; case CL_UNORM_INT_101010: return 4; default: return 0; } } bool isImageArray(cl_mem_object_type type) { if (type == CL_MEM_OBJECT_IMAGE1D_ARRAY || type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { return true; } return false; } CL_API_ENTRY cl_mem CL_API_CALL clCreateImage ( cl_context context, cl_mem_flags flags, const cl_image_format * image_format, const cl_image_desc * image_desc, void * host_ptr, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_2 { // Check parameters if (!context) { SetErrorArg(NULL, CL_INVALID_CONTEXT, context); return NULL; } if (!image_format) { SetErrorArg(context, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, image_format); return NULL; } if (!image_desc) { SetErrorArg(context, CL_INVALID_IMAGE_DESCRIPTOR, image_desc); return NULL; } // Get size of each pixel (in bytes) size_t pixelSize = getPixelSize(image_format); if (!pixelSize) { SetErrorArg(context, CL_INVALID_VALUE, image_format); return NULL; } // Get image dimensions size_t dims = getNumDimensions(image_desc->image_type); size_t width = image_desc->image_width; size_t height = 1, depth = 1; size_t arraySize = 1; if (dims > 1) { height = image_desc->image_height; } if (dims > 2) { depth = image_desc->image_depth; } if (isImageArray(image_desc->image_type)) { arraySize = image_desc->image_array_size; } // Calculate total size of image size_t size = width * height * depth * arraySize * pixelSize; cl_mem mem; if (image_desc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { // Use existing buffer if (!image_desc->buffer) { SetErrorInfo(context, CL_INVALID_VALUE, "image_desc->buffer cannot be NULL " "when using CL_MEM_OBJECT_IMAGE1D_BUFFER"); return NULL; } mem = image_desc->buffer; clRetainMemObject(image_desc->buffer); } else { // Create buffer // TODO: Use pitches mem = clCreateBuffer(context, flags, size, host_ptr, errcode_ret); if (!mem) { return NULL; } } // Create image object wrapper cl_image *image = new cl_image; *(cl_mem)image = *mem; image->isImage = true; image->format = *image_format; image->desc = *image_desc; image->desc.image_width = width; image->desc.image_height = height; image->desc.image_depth = depth; image->desc.image_array_size = arraySize; image->refCount = 1; if (image_desc->image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER) { delete mem; } SetError(context, CL_SUCCESS); return image; } CL_API_ENTRY cl_mem CL_API_CALL clCreateImage2D ( cl_context context, cl_mem_flags flags, const cl_image_format * image_format, size_t image_width, size_t image_height, size_t image_row_pitch, void * host_ptr, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 { cl_image_desc desc = { CL_MEM_OBJECT_IMAGE2D, image_width, image_height, 1, 1, image_row_pitch, 0, 0, 0, NULL }; return clCreateImage(context, flags, image_format, &desc, host_ptr, errcode_ret); } CL_API_ENTRY cl_mem CL_API_CALL clCreateImage3D ( cl_context context, cl_mem_flags flags, const cl_image_format * image_format, size_t image_width, size_t image_height, size_t image_depth, size_t image_row_pitch, size_t image_slice_pitch, void * host_ptr, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 { cl_image_desc desc = { CL_MEM_OBJECT_IMAGE3D, image_width, image_height, image_depth, 1, image_row_pitch, image_slice_pitch, 0, 0, NULL }; return clCreateImage(context, flags, image_format, &desc, host_ptr, errcode_ret); } CL_API_ENTRY cl_int CL_API_CALL clRetainMemObject ( cl_mem memobj ) CL_API_SUFFIX__VERSION_1_0 { if (!memobj) { ReturnErrorArg(NULL, CL_INVALID_MEM_OBJECT, memobj); } memobj->refCount++; return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObject ( cl_mem memobj ) CL_API_SUFFIX__VERSION_1_0 { if (!memobj) { ReturnErrorArg(NULL, CL_INVALID_MEM_OBJECT, memobj); } if (--memobj->refCount == 0) { if (memobj->isImage && ((cl_image*)memobj)->desc.image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { clReleaseMemObject(((cl_image*)memobj)->desc.buffer); } else { if (memobj->parent) { clReleaseMemObject(memobj->parent); } else { memobj->context->context->getGlobalMemory()->deallocateBuffer( memobj->address); clReleaseContext(memobj->context); } while (!memobj->callbacks.empty()) { pair callback = memobj->callbacks.top(); callback.first(memobj, callback.second); memobj->callbacks.pop(); } } delete memobj; } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetSupportedImageFormats ( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, cl_uint num_entries, cl_image_format * image_formats, cl_uint * num_image_formats ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (!context) { ReturnErrorArg(NULL, CL_INVALID_CONTEXT, context); } if (num_entries == 0 && image_formats) { ReturnErrorInfo(context, CL_INVALID_VALUE, "num_entries should be >0 if image_formats non-NULL"); } // TODO: Add support for packed image types // Channel orders const cl_channel_order ordersAll[] = { CL_R, CL_Rx, CL_A, CL_RG, CL_RGx, CL_RA, CL_RGBA, }; const cl_channel_order ordersNormalized[] = {CL_INTENSITY, CL_LUMINANCE}; const cl_channel_order ordersByte[] = {CL_ARGB, CL_BGRA}; const cl_channel_order ordersPacked[] = {CL_RGB, CL_RGBx}; const cl_channel_order *orders[] = { ordersAll, ordersNormalized, ordersByte //, ordersPacked }; const size_t numOrders[] = { sizeof(ordersAll) / sizeof(cl_channel_order), sizeof(ordersNormalized) / sizeof(cl_channel_order), sizeof(ordersByte) / sizeof(cl_channel_order), //sizeof(ordersPacked) / sizeof(cl_channel_order), }; // Channel types const cl_channel_type typesAll[] = { CL_SNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT8, CL_UNORM_INT16, CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_FLOAT, CL_HALF_FLOAT, }; const cl_channel_type typesNormalized[] = { CL_SNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT8, CL_UNORM_INT16, CL_FLOAT, CL_HALF_FLOAT, }; const cl_channel_type typesByte[] = { CL_SNORM_INT8, CL_UNORM_INT8, CL_SIGNED_INT8, CL_UNSIGNED_INT8, }; const cl_channel_type typesPacked[] = { CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010 }; const cl_channel_type *types[] = { typesAll, typesNormalized, typesByte //, typesPacked, }; const size_t numTypes[] = { sizeof(typesAll) / sizeof(cl_channel_order), sizeof(typesNormalized) / sizeof(cl_channel_order), sizeof(typesByte) / sizeof(cl_channel_order), //sizeof(typesPacked) / sizeof(cl_channel_order), }; // Calculate total number of formats size_t numCatagories = sizeof(orders)/sizeof(cl_channel_order*); size_t numFormats = 0; for (size_t c = 0; c < numCatagories; c++) { numFormats += numOrders[c] * numTypes[c]; } if (num_image_formats) { *num_image_formats = numFormats; } // Generate list of all valid order/type combinations if (image_formats) { unsigned i = 0; for (size_t c = 0; c < numCatagories; c++) { for (size_t o = 0; o < numOrders[c]; o++) { for (size_t t = 0; t < numTypes[c]; t++) { if (i >= num_entries) { return CL_SUCCESS; } cl_image_format format = {orders[c][o], types[c][t]}; image_formats[i++] = format; } } } } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetMemObjectInfo ( cl_mem memobj, cl_mem_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0 { // Check mem object is valid if (!memobj) { ReturnErrorArg(NULL, CL_INVALID_MEM_OBJECT, memobj); } size_t dummy = 0; size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy; union { cl_mem_object_type clmemobjty; cl_mem_flags clmemflags; cl_context context; cl_mem clmem; size_t sizet; cl_uint cluint; void* ptr; } result_data; switch (param_name) { case CL_MEM_TYPE: result_size = sizeof(cl_mem_object_type); result_data.clmemobjty = memobj->isImage ? ((cl_image*)memobj)->desc.image_type : CL_MEM_OBJECT_BUFFER; break; case CL_MEM_FLAGS: result_size = sizeof(cl_mem_flags); result_data.clmemflags = memobj->flags; break; case CL_MEM_SIZE: result_size = sizeof(size_t); result_data.sizet = memobj->size; break; case CL_MEM_HOST_PTR: result_size = sizeof(void*); result_data.ptr = memobj->hostPtr; break; case CL_MEM_MAP_COUNT: result_size = sizeof(cl_uint); result_data.cluint = 0; break; case CL_MEM_REFERENCE_COUNT: result_size = sizeof(cl_uint); result_data.cluint = memobj->refCount; break; case CL_MEM_CONTEXT: result_size = sizeof(cl_context); result_data.context = memobj->context; break; case CL_MEM_ASSOCIATED_MEMOBJECT: result_size = sizeof(cl_mem); result_data.clmem = memobj->parent; break; case CL_MEM_OFFSET: result_size = sizeof(size_t); result_data.sizet = memobj->offset; break; default: ReturnErrorArg(memobj->context, CL_INVALID_VALUE, param_name); } if (param_value) { // Check destination is large enough if (param_value_size < result_size) { ReturnErrorInfo(memobj->context, CL_INVALID_VALUE, ParamValueSizeTooSmall); } else { memcpy(param_value, &result_data, result_size); } } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetImageInfo ( cl_mem image, cl_image_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0 { // Check mem object is valid if (!image) { ReturnErrorArg(NULL, CL_INVALID_MEM_OBJECT, image); } cl_image *img = (cl_image*)image; size_t dummy = 0; size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy; union { cl_image_format climgfmt; size_t sizet; cl_mem clmem; cl_uint cluint; } result_data; switch (param_name) { case CL_IMAGE_FORMAT: result_size = sizeof(cl_image_format); result_data.climgfmt = img->format; break; case CL_IMAGE_ELEMENT_SIZE: result_size = sizeof(size_t); result_data.sizet = getPixelSize(&img->format); break; case CL_IMAGE_ROW_PITCH: result_size = sizeof(size_t); result_data.sizet = img->desc.image_row_pitch; break; case CL_IMAGE_SLICE_PITCH: result_size = sizeof(size_t); result_data.sizet = img->desc.image_slice_pitch; break; case CL_IMAGE_WIDTH: result_size = sizeof(size_t); result_data.sizet = img->desc.image_width; break; case CL_IMAGE_HEIGHT: result_size = sizeof(size_t); result_data.sizet = getNumDimensions(img->desc.image_type) > 1 ? img->desc.image_height : 0; break; case CL_IMAGE_DEPTH: result_size = sizeof(size_t); result_data.sizet = getNumDimensions(img->desc.image_type) > 2 ? img->desc.image_depth : 0; break; case CL_IMAGE_ARRAY_SIZE: result_size = sizeof(size_t); result_data.sizet = isImageArray(img->desc.image_type) ? img->desc.image_array_size : 0; break; case CL_IMAGE_BUFFER: result_size = sizeof(cl_mem); result_data.clmem = img->desc.buffer; break; case CL_IMAGE_NUM_MIP_LEVELS: result_size = sizeof(cl_uint); result_data.cluint = 0; break; case CL_IMAGE_NUM_SAMPLES: result_size = sizeof(cl_uint); result_data.cluint = 0; break; default: ReturnErrorArg(image->context, CL_INVALID_VALUE, param_name); } if (param_value) { // Check destination is large enough if (param_value_size < result_size) { ReturnErrorInfo(image->context, CL_INVALID_VALUE, ParamValueSizeTooSmall); } else { memcpy(param_value, &result_data, result_size); } } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clSetMemObjectDestructorCallback ( cl_mem memobj, void (CL_CALLBACK * pfn_notify)(cl_mem, void*), void * user_data ) CL_API_SUFFIX__VERSION_1_1 { // Check parameters if (!memobj) { ReturnErrorArg(NULL, CL_INVALID_MEM_OBJECT, memobj); } if (!pfn_notify) { ReturnErrorArg(memobj->context, CL_INVALID_VALUE, pfn_notify); } memobj->callbacks.push(make_pair(pfn_notify, user_data)); return CL_SUCCESS; } CL_API_ENTRY cl_sampler CL_API_CALL clCreateSampler ( cl_context context, cl_bool normalized_coords, cl_addressing_mode addressing_mode, cl_filter_mode filter_mode, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (!context) { SetErrorArg(NULL, CL_INVALID_CONTEXT, context); return NULL; } // Create sampler bitfield uint32_t bitfield = 0; if (normalized_coords) { bitfield |= 0x0001; } switch (addressing_mode) { case CL_ADDRESS_NONE: break; case CL_ADDRESS_CLAMP_TO_EDGE: bitfield |= 0x0002; break; case CL_ADDRESS_CLAMP: bitfield |= 0x0004; break; case CL_ADDRESS_REPEAT: bitfield |= 0x0006; break; case CL_ADDRESS_MIRRORED_REPEAT: bitfield |= 0x0008; break; default: SetErrorArg(context, CL_INVALID_VALUE, addressing_mode); return NULL; } switch (filter_mode) { case CL_FILTER_NEAREST: bitfield |= 0x0010; break; case CL_FILTER_LINEAR: bitfield |= 0x0020; break; default: SetErrorArg(context, CL_INVALID_VALUE, filter_mode); return NULL; } // Create sampler cl_sampler sampler = new _cl_sampler; sampler->dispatch = m_dispatchTable; sampler->context = context; sampler->normCoords = normalized_coords; sampler->addressMode = addressing_mode; sampler->filterMode = filter_mode; sampler->sampler = bitfield; SetError(context, CL_SUCCESS); return sampler; } CL_API_ENTRY cl_int CL_API_CALL clRetainSampler ( cl_sampler sampler ) CL_API_SUFFIX__VERSION_1_0 { if (!sampler) { ReturnErrorArg(NULL, CL_INVALID_SAMPLER, sampler); } sampler->refCount++; return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clReleaseSampler ( cl_sampler sampler ) CL_API_SUFFIX__VERSION_1_0 { if (!sampler) { ReturnErrorArg(NULL, CL_INVALID_SAMPLER, sampler); } if (--sampler->refCount == 0) { delete sampler; } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetSamplerInfo ( cl_sampler sampler, cl_sampler_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0 { // Check sampler is valid if (!sampler) { ReturnErrorArg(NULL, CL_INVALID_SAMPLER, sampler); } size_t dummy = 0; size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy; union { cl_uint cluint; cl_context clcontext; cl_bool clbool; cl_addressing_mode claddrmode; cl_filter_mode clfiltmode; } result_data; switch (param_name) { case CL_SAMPLER_REFERENCE_COUNT: result_size = sizeof(cl_uint); result_data.cluint = sampler->refCount; break; case CL_SAMPLER_CONTEXT: result_size = sizeof(cl_context); result_data.clcontext = sampler->context; break; case CL_SAMPLER_NORMALIZED_COORDS: result_size = sizeof(cl_bool); result_data.clbool = sampler->normCoords; break; case CL_SAMPLER_ADDRESSING_MODE: result_size = sizeof(cl_addressing_mode); result_data.claddrmode = sampler->addressMode; break; case CL_SAMPLER_FILTER_MODE: result_size = sizeof(cl_filter_mode); result_data.clfiltmode = sampler->filterMode; break; default: ReturnErrorArg(sampler->context, CL_INVALID_VALUE, param_name); } if (param_value) { // Check destination is large enough if (param_value_size < result_size) { ReturnErrorInfo(sampler->context, CL_INVALID_VALUE, ParamValueSizeTooSmall); } else { memcpy(param_value, &result_data, result_size); } } return CL_SUCCESS; } CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithSource ( cl_context context, cl_uint count, const char ** strings, const size_t * lengths, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (!context) { SetErrorArg(NULL, CL_INVALID_CONTEXT, context); return NULL; } if (count == 0) { SetErrorArg(context, CL_INVALID_VALUE, count); return NULL; } if (!strings || !strings[0]) { SetErrorArg(context, CL_INVALID_VALUE, strings); return NULL; } // Concatenate sources into a single string std::string source; for (unsigned i = 0; i < count; i++) { size_t length = (lengths && lengths[i]) ? lengths[i] : strlen(strings[i]); source.append(strings[i], length); } // Create program object cl_program prog = new _cl_program; prog->dispatch = m_dispatchTable; prog->program = new oclgrind::Program(context->context, source); prog->context = context; prog->refCount = 1; if (!prog->program) { SetError(context, CL_OUT_OF_HOST_MEMORY); delete prog; return NULL; } clRetainContext(context); SetError(context, CL_SUCCESS); return prog; } CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinary ( cl_context context, cl_uint num_devices, const cl_device_id * device_list, const size_t * lengths, const unsigned char ** binaries, cl_int * binary_status, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (!context) { SetErrorArg(NULL, CL_INVALID_CONTEXT, context); return NULL; } if (num_devices != 1 || !device_list) { SetErrorInfo(context, CL_INVALID_VALUE, "Invalid device list"); return NULL; } if (!lengths) { SetErrorArg(context, CL_INVALID_VALUE, lengths); return NULL; } if (!binaries) { SetErrorArg(context, CL_INVALID_VALUE, binaries); return NULL; } if (device_list[0] != m_device) { SetErrorArg(context, CL_INVALID_DEVICE, device_list); return NULL; } // Create program object cl_program prog = new _cl_program; prog->dispatch = m_dispatchTable; prog->program = oclgrind::Program::createFromBitcode(context->context, binaries[0], lengths[0]); prog->context = context; prog->refCount = 1; if (!prog->program) { SetError(context, CL_INVALID_BINARY); if (binary_status) { binary_status[0] = CL_INVALID_BINARY; } delete prog; return NULL; } if (binary_status) { binary_status[0] = CL_SUCCESS; } clRetainContext(context); SetError(context, CL_SUCCESS); return prog; } CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBuiltInKernels ( cl_context context, cl_uint num_devices, const cl_device_id * device_list, const char * kernel_names, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_2 { if (!context) { SetError(NULL, CL_INVALID_CONTEXT); return NULL; } SetErrorInfo(context, CL_INVALID_VALUE, "No built-in kernels available"); return NULL; } CL_API_ENTRY cl_int CL_API_CALL clRetainProgram ( cl_program program ) CL_API_SUFFIX__VERSION_1_0 { if (!program) { ReturnErrorArg(NULL, CL_INVALID_PROGRAM, program); } program->refCount++; return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clReleaseProgram ( cl_program program ) CL_API_SUFFIX__VERSION_1_0 { if (!program) { ReturnErrorArg(NULL, CL_INVALID_PROGRAM, program); } if (--program->refCount == 0) { delete program->program; clReleaseContext(program->context); delete program; } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clBuildProgram ( cl_program program, cl_uint num_devices, const cl_device_id * device_list, const char * options, void (CL_CALLBACK * pfn_notify)(cl_program, void*), void * user_data ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (!program || !program->program) { ReturnErrorArg(NULL, CL_INVALID_PROGRAM, program); } if (num_devices > 0 && !device_list) { ReturnErrorInfo(program->context, CL_INVALID_VALUE, "num_devices >0 but device_list is NULL"); } if (num_devices == 0 && device_list) { ReturnErrorInfo(program->context, CL_INVALID_VALUE, "num_devices == 0 but device_list non-NULL"); } if (!pfn_notify && user_data) { ReturnErrorInfo(program->context, CL_INVALID_VALUE, "pfn_notify NULL but user_data non-NULL"); } if (device_list && !device_list[0]) { ReturnErrorArg(program->context, CL_INVALID_DEVICE, device); } // Build program if (!program->program->build(options)) { ReturnError(program->context, CL_BUILD_PROGRAM_FAILURE); } // Fire callback if (pfn_notify) { pfn_notify(program, user_data); } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clUnloadCompiler ( void ) CL_API_SUFFIX__VERSION_1_0 { return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clCompileProgram ( cl_program program, cl_uint num_devices, const cl_device_id * device_list, const char * options, cl_uint num_input_headers, const cl_program * input_headers, const char ** header_include_names, void (CL_CALLBACK * pfn_notify)(cl_program, void*), void * user_data ) CL_API_SUFFIX__VERSION_1_2 { // Check parameters if (!program) { ReturnErrorArg(NULL, CL_INVALID_PROGRAM, program); } if (num_devices > 0 && !device_list) { ReturnErrorInfo(program->context, CL_INVALID_VALUE, "num_devices >0 but device_list is NULL"); } if (num_devices == 0 && device_list) { ReturnErrorInfo(program->context, CL_INVALID_VALUE, "num_devices == 0 but device_list non-NULL"); } if (!pfn_notify && user_data) { ReturnErrorInfo(program->context, CL_INVALID_VALUE, "pfn_notify NULL but user_data non-NULL"); } if (device_list && !device_list[0]) { ReturnErrorArg(program->context, CL_INVALID_DEVICE, device); } // Prepare headers list headers; for (unsigned i = 0; i < num_input_headers; i++) { headers.push_back(make_pair(header_include_names[i], input_headers[i]->program)); } // Build program if (!program->program->build(options, headers)) { ReturnError(program->context, CL_BUILD_PROGRAM_FAILURE); } // Fire callback if (pfn_notify) { pfn_notify(program, user_data); } return CL_SUCCESS; } CL_API_ENTRY cl_program CL_API_CALL clLinkProgram ( cl_context context, cl_uint num_devices, const cl_device_id * device_list, const char * options, cl_uint num_input_programs, const cl_program * input_programs, void (CL_CALLBACK * pfn_notify)(cl_program, void*), void * user_data, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_2 { // Check parameters if (!context) { SetErrorArg(NULL, CL_INVALID_CONTEXT, context); return NULL; } if (num_devices > 0 && !device_list) { SetErrorInfo(context, CL_INVALID_VALUE, "num_devices >0 but device_list is NULL"); return NULL; } if (num_devices == 0 && device_list) { SetErrorInfo(context, CL_INVALID_VALUE, "num_devices == 0 but device_list non-NULL"); return NULL; } if (!pfn_notify && user_data) { SetErrorInfo(context, CL_INVALID_VALUE, "pfn_notify NULL but user_data non-NULL"); return NULL; } if (device_list && !device_list[0]) { SetErrorArg(context, CL_INVALID_DEVICE, device_list); return NULL; } // Prepare programs list programs; for (unsigned i = 0; i < num_input_programs; i++) { programs.push_back(input_programs[i]->program); } // Create program object cl_program prog = new _cl_program; prog->dispatch = m_dispatchTable; prog->program = oclgrind::Program::createFromPrograms(context->context, programs); prog->context = context; prog->refCount = 1; if (!prog->program) { SetError(context, CL_INVALID_BINARY); delete prog; return NULL; } // Fire callback if (pfn_notify) { pfn_notify(prog, user_data); } clRetainContext(context); SetError(context, CL_SUCCESS); return prog; } CL_API_ENTRY cl_int CL_API_CALL clUnloadPlatformCompiler ( cl_platform_id platform ) CL_API_SUFFIX__VERSION_1_2 { return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfo ( cl_program program, cl_program_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0 { size_t result_size = 0; void *result_data = NULL; // Check program is valid if (!program) { ReturnErrorArg(NULL, CL_INVALID_PROGRAM, program); } if ((param_name == CL_PROGRAM_NUM_KERNELS || param_name == CL_PROGRAM_KERNEL_NAMES) && program->program->getBuildStatus() != CL_BUILD_SUCCESS) { ReturnErrorInfo(program->context, CL_INVALID_PROGRAM_EXECUTABLE, "Program not successfully built"); } switch (param_name) { case CL_PROGRAM_REFERENCE_COUNT: result_size = sizeof(cl_uint); result_data = malloc(result_size); *(cl_uint*)result_data = program->refCount; break; case CL_PROGRAM_CONTEXT: result_size = sizeof(cl_context); result_data = malloc(result_size); *(cl_context*)result_data = program->context; break; case CL_PROGRAM_NUM_DEVICES: result_size = sizeof(cl_uint); result_data = malloc(result_size); *(cl_uint*)result_data = 1; break; case CL_PROGRAM_DEVICES: result_size = sizeof(cl_device_id); result_data = malloc(result_size); *(cl_device_id*)result_data = m_device; break; case CL_PROGRAM_SOURCE: result_size = strlen(program->program->getSource().c_str()) + 1; result_data = malloc(result_size); strcpy((char*)result_data, program->program->getSource().c_str()); break; case CL_PROGRAM_BINARY_SIZES: result_size = sizeof(size_t); result_data = malloc(result_size); *(size_t*)result_data = program->program->getBinarySize(); break; case CL_PROGRAM_BINARIES: result_size = sizeof(unsigned char*); result_data = program->program->getBinary(); break; case CL_PROGRAM_NUM_KERNELS: result_size = sizeof(size_t); result_data = malloc(result_size); *(size_t*)result_data = program->program->getNumKernels(); break; case CL_PROGRAM_KERNEL_NAMES: { list names = program->program->getKernelNames(); string ret; for (list::iterator itr = names.begin(); itr != names.end(); itr++) { ret += *itr; ret += ";"; } if (!ret.empty()) { ret.erase(ret.length()-1); } result_size = strlen(ret.c_str()) + 1; result_data = malloc(result_size); strcpy((char*)result_data, ret.c_str()); break; } default: ReturnErrorArg(program->context, CL_INVALID_VALUE, param_name); } cl_int return_value = CL_SUCCESS; if (param_value) { if (param_name == CL_PROGRAM_BINARIES) { memcpy(((unsigned char**)param_value)[0], result_data, program->program->getBinarySize()); } else { // Check destination is large enough if (param_value_size < result_size) { // TODO: Use API error reporting mechanism return_value = CL_INVALID_VALUE; } else { memcpy(param_value, result_data, result_size); } } } if (param_value_size_ret) { *param_value_size_ret = result_size; } free(result_data); return return_value; } CL_API_ENTRY cl_int CL_API_CALL clGetProgramBuildInfo ( cl_program program, cl_device_id device, cl_program_build_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0 { // Check program is valid if (!program) { ReturnErrorArg(NULL, CL_INVALID_PROGRAM, program); } size_t dummy; size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy; union { cl_build_status status; cl_program_binary_type type; } result_data; const char* str = 0; switch (param_name) { case CL_PROGRAM_BUILD_STATUS: result_size = sizeof(cl_build_status); result_data.status = program->program->getBuildStatus(); break; case CL_PROGRAM_BUILD_OPTIONS: str = program->program->getBuildOptions().c_str(); result_size = strlen(str) + 1; break; case CL_PROGRAM_BUILD_LOG: str = program->program->getBuildLog().c_str(); result_size = strlen(str) + 1; break; case CL_PROGRAM_BINARY_TYPE: result_size = sizeof(cl_program_binary_type); result_data.type = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; break; default: ReturnErrorArg(program->context, CL_INVALID_VALUE, param_name); } if (param_value) { // Check destination is large enough if (param_value_size < result_size) { ReturnErrorInfo(program->context, CL_INVALID_VALUE, ParamValueSizeTooSmall); } else { if (str) memcpy(param_value, str, result_size); else memcpy(param_value, &result_data, result_size); } } return CL_SUCCESS; } CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernel ( cl_program program, const char * kernel_name, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (program->dispatch != m_dispatchTable) { SetError(NULL, CL_INVALID_PROGRAM); return NULL; } if (!kernel_name) { SetErrorArg(program->context, CL_INVALID_VALUE, kernel_name); return NULL; } // Create kernel object cl_kernel kernel = new _cl_kernel; kernel->dispatch = m_dispatchTable; kernel->kernel = program->program->createKernel(kernel_name); kernel->program = program; kernel->refCount = 1; if (!kernel->kernel) { SetErrorInfo(program->context, CL_INVALID_KERNEL_NAME, "Kernel '" << kernel_name << "' not found"); delete kernel; return NULL; } clRetainProgram(program); SetError(program->context, CL_SUCCESS); return kernel; } CL_API_ENTRY cl_int CL_API_CALL clCreateKernelsInProgram ( cl_program program, cl_uint num_kernels, cl_kernel * kernels, cl_uint * num_kernels_ret ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (!program) { ReturnErrorArg(NULL, CL_INVALID_PROGRAM, program); } if (program->program->getBuildStatus() != CL_BUILD_SUCCESS) { ReturnErrorInfo(program->context, CL_INVALID_PROGRAM_EXECUTABLE, "Program not built"); } unsigned int num = program->program->getNumKernels(); if (kernels && num_kernels < num) { ReturnErrorInfo(program->context, CL_INVALID_VALUE, "num_kernels is " << num_kernels << ", but " << num << " kernels found"); } if (kernels) { int i = 0; list names = program->program->getKernelNames(); for (list::iterator itr = names.begin(); itr != names.end(); itr++) { cl_kernel kernel = new _cl_kernel; kernel->dispatch = m_dispatchTable; kernel->kernel = program->program->createKernel(*itr); kernel->program = program; kernel->refCount = 1; kernels[i++] = kernel; clRetainProgram(program); } } if (num_kernels_ret) { *num_kernels_ret = num; } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clRetainKernel ( cl_kernel kernel ) CL_API_SUFFIX__VERSION_1_0 { if (!kernel) { ReturnErrorArg(NULL, CL_INVALID_KERNEL, kernel); } kernel->refCount++; return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clReleaseKernel ( cl_kernel kernel ) CL_API_SUFFIX__VERSION_1_0 { if (!kernel) { ReturnErrorArg(NULL, CL_INVALID_KERNEL, kernel); } if (--kernel->refCount == 0) { delete kernel->kernel; clReleaseProgram(kernel->program); delete kernel; } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clSetKernelArg ( cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void * arg_value ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (arg_index >= kernel->kernel->getNumArguments()) { ReturnErrorInfo(kernel->program->context, CL_INVALID_ARG_INDEX, "arg_index is " << arg_index << ", but kernel has " << kernel->kernel->getNumArguments() << " arguments"); } unsigned int addr = kernel->kernel->getArgumentAddressQualifier(arg_index); bool isSampler = kernel->kernel->getArgumentTypeName(arg_index) == "sampler_t"; if (kernel->kernel->getArgumentSize(arg_index) != arg_size && !isSampler && addr != CL_KERNEL_ARG_ADDRESS_LOCAL) { ReturnErrorInfo(kernel->program->context, CL_INVALID_ARG_SIZE, "arg_size is " << arg_size << ", but argument should be " << kernel->kernel->getArgumentSize(arg_index) << " bytes"); } // Prepare argument value oclgrind::TypedValue value; value.data = new unsigned char[arg_size]; value.size = arg_size; value.num = 1; switch (addr) { case CL_KERNEL_ARG_ADDRESS_PRIVATE: if (isSampler) { memcpy(value.data, &(*(cl_sampler*)arg_value)->sampler, 4); } else { memcpy(value.data, arg_value, arg_size); } break; case CL_KERNEL_ARG_ADDRESS_LOCAL: delete value.data; value.data = NULL; break; case CL_KERNEL_ARG_ADDRESS_GLOBAL: case CL_KERNEL_ARG_ADDRESS_CONSTANT: if (arg_value && *(cl_mem*)arg_value) { cl_mem mem = *(cl_mem*)arg_value; if (mem->isImage) { // Create Image struct oclgrind::Image *image = new oclgrind::Image; image->address = mem->address; image->format = ((cl_image*)mem)->format; image->desc = ((cl_image*)mem)->desc; *(oclgrind::Image**)value.data = image; } else { memcpy(value.data, &mem->address, arg_size); } kernel->memArgs[arg_index] = mem; } else { value.setPointer(0); kernel->memArgs.erase(arg_index); } break; default: ReturnErrorInfo(kernel->program->context, CL_INVALID_ARG_VALUE, "Unsupported address space"); } // Set argument kernel->kernel->setArgument(arg_index, value); delete[] value.data; return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetKernelInfo ( cl_kernel kernel, cl_kernel_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0 { // Check kernel is valid if (!kernel) { ReturnErrorArg(NULL, CL_INVALID_KERNEL, kernel); } size_t dummy; size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy; union { cl_uint cluint; cl_context context; cl_program program; } result_data; const char* str = 0; switch (param_name) { case CL_KERNEL_FUNCTION_NAME: result_size = kernel->kernel->getName().size() + 1; str = kernel->kernel->getName().c_str(); break; case CL_KERNEL_NUM_ARGS: result_size = sizeof(cl_uint); result_data.cluint = kernel->kernel->getNumArguments(); break; case CL_KERNEL_REFERENCE_COUNT: result_size = sizeof(cl_uint); result_data.cluint = kernel->refCount; break; case CL_KERNEL_CONTEXT: result_size = sizeof(cl_context); result_data.context = kernel->program->context; break; case CL_KERNEL_PROGRAM: result_size = sizeof(cl_program); result_data.program = kernel->program; break; case CL_KERNEL_ATTRIBUTES: result_size = kernel->kernel->getAttributes().size() + 1; str = kernel->kernel->getAttributes().c_str(); break; default: ReturnErrorArg(kernel->program->context, CL_INVALID_VALUE, param_name); } if (param_value) { // Check destination is large enough if (param_value_size < result_size) { ReturnErrorInfo(kernel->program->context, CL_INVALID_VALUE, ParamValueSizeTooSmall); } else { if (str) memcpy(param_value, str, result_size); else memcpy(param_value, &result_data, result_size); } } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetKernelArgInfo ( cl_kernel kernel, cl_uint arg_indx, cl_kernel_arg_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_2 { // Check parameters are valid if (!kernel) { ReturnErrorArg(NULL, CL_INVALID_KERNEL, kernel); } if (arg_indx >= kernel->kernel->getNumArguments()) { ReturnErrorInfo(kernel->program->context, CL_INVALID_ARG_INDEX, "arg_indx is " << arg_indx << ", but kernel has " << kernel->kernel->getNumArguments() << " arguments"); } size_t dummy = 0; size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy; union { cl_kernel_arg_address_qualifier addressQual; cl_kernel_arg_access_qualifier accessQual; cl_kernel_arg_type_qualifier typeQual; } result_data; std::string str_data; switch (param_name) { case CL_KERNEL_ARG_ADDRESS_QUALIFIER: result_size = sizeof(cl_kernel_arg_address_qualifier); result_data.addressQual = kernel->kernel->getArgumentAddressQualifier(arg_indx); break; case CL_KERNEL_ARG_ACCESS_QUALIFIER: result_size = sizeof(cl_kernel_arg_access_qualifier); result_data.accessQual = kernel->kernel->getArgumentAccessQualifier(arg_indx); break; case CL_KERNEL_ARG_TYPE_NAME: str_data = kernel->kernel->getArgumentTypeName(arg_indx).str(); result_size = str_data.size() + 1; break; case CL_KERNEL_ARG_TYPE_QUALIFIER: result_size = sizeof(cl_kernel_arg_type_qualifier); result_data.typeQual = kernel->kernel->getArgumentTypeQualifier(arg_indx); break; case CL_KERNEL_ARG_NAME: str_data = kernel->kernel->getArgumentName(arg_indx).str(); result_size = str_data.size() + 1; break; default: ReturnErrorArg(kernel->program->context, CL_INVALID_VALUE, param_name); } if (param_value) { // Check destination is large enough if (param_value_size < result_size) { ReturnErrorInfo(kernel->program->context, CL_INVALID_VALUE, ParamValueSizeTooSmall); } if (str_data.size()) memcpy(param_value, str_data.c_str(), result_size); else memcpy(param_value, &result_data, result_size); } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetKernelWorkGroupInfo ( cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters are valid if (!kernel) { ReturnErrorArg(NULL, CL_INVALID_KERNEL, kernel); } if (!device || device != m_device) { ReturnErrorArg(kernel->program->context, CL_INVALID_DEVICE, device); } size_t dummy; size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy; union { size_t sizet; size_t sizet3[3]; cl_ulong clulong; } result_data; switch (param_name) { case CL_KERNEL_GLOBAL_WORK_SIZE: ReturnErrorInfo(kernel->program->context, CL_INVALID_VALUE, "CL_KERNEL_GLOBAL_SIZE only valid on custom devices"); case CL_KERNEL_WORK_GROUP_SIZE: result_size = sizeof(size_t); result_data.sizet = MAX_WI_SIZE; break; case CL_KERNEL_COMPILE_WORK_GROUP_SIZE: result_size = sizeof(size_t[3]); kernel->kernel->getRequiredWorkGroupSize(result_data.sizet3); break; case CL_KERNEL_LOCAL_MEM_SIZE: result_size = sizeof(cl_ulong); result_data.clulong = kernel->kernel->getLocalMemorySize(); break; case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: result_size = sizeof(size_t); result_data.sizet = 1; break; case CL_KERNEL_PRIVATE_MEM_SIZE: result_size = sizeof(cl_ulong); result_data.clulong = 0; break; default: ReturnErrorArg(kernel->program->context, CL_INVALID_VALUE, param_name); } if (param_value) { // Check destination is large enough if (param_value_size < result_size) { ReturnErrorInfo(kernel->program->context, CL_INVALID_VALUE, ParamValueSizeTooSmall); } else { memcpy(param_value, &result_data, result_size); } } return CL_SUCCESS; } /* Event Object APIs */ namespace { // Utility to check if an event has completed (or terminated) inline bool isComplete(cl_event event) { return (event->event->state == CL_COMPLETE || event->event->state < 0); } } CL_API_ENTRY cl_int CL_API_CALL clWaitForEvents ( cl_uint num_events, const cl_event * event_list ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (!num_events) { ReturnErrorInfo(NULL, CL_INVALID_VALUE, "num_events cannot be 0"); } if (!event_list) { ReturnErrorInfo(NULL, CL_INVALID_VALUE, "event_list cannot be NULL"); } // Loop until all events complete bool complete = false; while (!complete) { complete = true; for (unsigned i = 0; i < num_events; i++) { // Skip event if already complete if (isComplete(event_list[i])) { continue; } // If it's not a user event, update the queue if (event_list[i]->queue) { oclgrind::Queue::Command *cmd = event_list[i]->queue->queue->update(); if (cmd) { asyncQueueRelease(cmd); delete cmd; } // If it's still not complete, update flag if (!isComplete(event_list[i])) { complete = false; } } else { complete = false; } } } // Check if any command terminated unsuccessfully for (unsigned i = 0; i < num_events; i++) { if (event_list[i]->event->state < 0) { ReturnErrorInfo(event_list[i]->context, CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, "Event " << i << " terminated with error " << event_list[i]->event->state); } } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetEventInfo ( cl_event event, cl_event_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0 { // Check event is valid if (!event) { ReturnErrorArg(NULL, CL_INVALID_EVENT, event); } size_t dummy; size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy; union { cl_command_queue queue; cl_context context; cl_command_type type; cl_int clint; cl_uint cluint; size_t sizet; size_t sizet3[3]; } result_data; switch (param_name) { case CL_EVENT_COMMAND_QUEUE: result_size = sizeof(cl_command_queue); result_data.queue = event->queue; break; case CL_EVENT_CONTEXT: result_size = sizeof(cl_context); result_data.context = event->context; break; case CL_EVENT_COMMAND_TYPE: result_size = sizeof(cl_command_type); result_data.type = event->type; break; case CL_EVENT_COMMAND_EXECUTION_STATUS: result_size = sizeof(cl_int); result_data.clint = event->event->state; break; case CL_EVENT_REFERENCE_COUNT: result_size = sizeof(cl_uint); result_data.cluint = event->refCount; break; default: ReturnErrorArg(event->context, CL_INVALID_VALUE, param_name); } if (param_value) { // Check destination is large enough if (param_value_size < result_size) { ReturnErrorInfo(event->context, CL_INVALID_VALUE, ParamValueSizeTooSmall); } else { memcpy(param_value, &result_data, result_size); } } return CL_SUCCESS; } CL_API_ENTRY cl_event CL_API_CALL clCreateUserEvent ( cl_context context, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_1 { // Check parameters if (!context) { SetErrorArg(NULL, CL_INVALID_CONTEXT, context); return NULL; } /// Create event object cl_event event = new _cl_event; event->dispatch = m_dispatchTable; event->context = context; event->queue = 0; event->type = CL_COMMAND_USER; event->event = new oclgrind::Event(); event->event->state = CL_SUBMITTED; event->refCount = 1; SetError(context, CL_SUCCESS); return event; } CL_API_ENTRY cl_int CL_API_CALL clRetainEvent ( cl_event event ) CL_API_SUFFIX__VERSION_1_0 { if (!event) { ReturnErrorArg(NULL, CL_INVALID_EVENT, event); } event->refCount++; return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clReleaseEvent ( cl_event event ) CL_API_SUFFIX__VERSION_1_0 { if (!event) { ReturnErrorArg(NULL, CL_INVALID_EVENT, event); } if (--event->refCount == 0) { if (event->event) { delete event->event; } delete event; } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clSetUserEventStatus ( cl_event event, cl_int execution_status ) CL_API_SUFFIX__VERSION_1_1 { // Check parameters if (!event) { ReturnErrorArg(NULL, CL_INVALID_EVENT, event); } if (event->queue) { ReturnErrorInfo(event->context, CL_INVALID_EVENT, "Not a user event"); } if (execution_status != CL_COMPLETE && execution_status >= 0) { ReturnErrorArg(event->context, CL_INVALID_VALUE, execution_status); } if (event->event->state == CL_COMPLETE || event->event->state < 0) { ReturnErrorInfo(event->context, CL_INVALID_OPERATION, "Event status already set"); } event->event->state = execution_status; // Perform callbacks list< pair >::iterator itr; for (itr = event->callbacks.begin(); itr != event->callbacks.end(); itr++) { itr->first(event, execution_status, itr->second); } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clSetEventCallback ( cl_event event, cl_int command_exec_callback_type, void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void*), void * user_data ) CL_API_SUFFIX__VERSION_1_1 { // Check parameters if (!event) { ReturnErrorArg(NULL, CL_INVALID_EVENT, event); } if (!pfn_notify) { ReturnErrorArg(event->context, CL_INVALID_VALUE, pfn_notify); } if (command_exec_callback_type != CL_COMPLETE && command_exec_callback_type != CL_SUBMITTED && command_exec_callback_type != CL_RUNNING) { ReturnErrorArg(event->context, CL_INVALID_VALUE, command_exec_callback_type); } event->callbacks.push_back(make_pair(pfn_notify, user_data)); return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clGetEventProfilingInfo ( cl_event event, cl_profiling_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0 { // Check event is valid if (!event) { ReturnErrorArg(NULL, CL_INVALID_EVENT, event); } if (!event->queue) { ReturnError(event->context, CL_PROFILING_INFO_NOT_AVAILABLE); } size_t dummy = 0; size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy; cl_ulong result; switch (param_name) { case CL_PROFILING_COMMAND_QUEUED: result_size = sizeof(cl_ulong); result = event->event->queueTime; break; case CL_PROFILING_COMMAND_SUBMIT: result_size = sizeof(cl_ulong); result = event->event->startTime; break; case CL_PROFILING_COMMAND_START: result_size = sizeof(cl_ulong); result = event->event->startTime; break; case CL_PROFILING_COMMAND_END: result_size = sizeof(cl_ulong); result = event->event->endTime; break; default: ReturnErrorArg(event->context, CL_INVALID_VALUE, param_name); } if (param_value) { // Check destination is large enough if (param_value_size < result_size) { ReturnErrorInfo(event->context, CL_INVALID_VALUE, ParamValueSizeTooSmall); } else { *(cl_ulong*)param_value = result; } } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clFlush ( cl_command_queue command_queue ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } // TODO: Implement properly? clFinish(command_queue); return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clFinish ( cl_command_queue command_queue ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } while (!command_queue->queue->isEmpty()) { // TODO: Move this update to async thread? oclgrind::Queue::Command *cmd = command_queue->queue->update(); if (cmd) { asyncQueueRelease(cmd); delete cmd; } } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBuffer ( cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, size_t offset, size_t cb, void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!buffer) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, memobj); } if (!ptr) { ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, ptr); } if (offset + cb > buffer->size) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "offset + cb (" << offset << " + " << cb << ") exceeds buffer size (" << buffer->size << " bytes)"); } if (buffer->flags & (CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_WRITE_ONLY)) { ReturnErrorInfo(command_queue->context, CL_INVALID_OPERATION, "Buffer flags specify host will not read data"); } // Enqueue command oclgrind::Queue::BufferCommand *cmd = new oclgrind::Queue::BufferCommand(oclgrind::Queue::READ); cmd->ptr = (unsigned char*)ptr; cmd->address = buffer->address + offset; cmd->size = cb; asyncQueueRetain(cmd, buffer); asyncEnqueue(command_queue, CL_COMMAND_READ_BUFFER, cmd, num_events_in_wait_list, event_wait_list, event); if (blocking_read) { return clFinish(command_queue); } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBufferRect ( cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, const size_t * buffer_origin, const size_t * host_origin, const size_t * region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event ) CL_API_SUFFIX__VERSION_1_1 { // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!buffer) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, memobj); } if (!ptr) { ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, ptr); } if (buffer->flags & (CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_WRITE_ONLY)) { ReturnErrorInfo(command_queue->context, CL_INVALID_OPERATION, "Buffer flags specify host will not read data"); } // Compute pitches if neccessary if (buffer_row_pitch == 0) { buffer_row_pitch = region[0]; } if (buffer_slice_pitch == 0) { buffer_slice_pitch = region[1] * buffer_row_pitch; } if (host_row_pitch == 0) { host_row_pitch = region[0]; } if (host_slice_pitch == 0) { host_slice_pitch = region[1] * host_row_pitch; } // Compute origin offsets size_t buffer_offset = buffer_origin[2] * buffer_slice_pitch + buffer_origin[1] * buffer_row_pitch + buffer_origin[0]; size_t host_offset = host_origin[2] * host_slice_pitch + host_origin[1] * host_row_pitch + host_origin[0]; // Ensure buffer region valid size_t end = buffer_offset + region[0] + (region[1]-1) * buffer_row_pitch + (region[2]-1) * buffer_slice_pitch; if (end > buffer->size) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "Region exceeds buffer size (" << buffer->size << " bytes)"); } // Enqueue command oclgrind::Queue::BufferRectCommand *cmd = new oclgrind::Queue::BufferRectCommand(oclgrind::Queue::READ_RECT); cmd->ptr = (unsigned char*)ptr; cmd->address = buffer->address; cmd->buffer_offset[0] = buffer_offset; cmd->buffer_offset[1] = buffer_row_pitch; cmd->buffer_offset[2] = buffer_slice_pitch; cmd->host_offset[0] = host_offset; cmd->host_offset[1] = host_row_pitch; cmd->host_offset[2] = host_slice_pitch; memcpy(cmd->region, region, 3*sizeof(size_t)); asyncQueueRetain(cmd, buffer); asyncEnqueue(command_queue, CL_COMMAND_READ_BUFFER, cmd, num_events_in_wait_list, event_wait_list, event); if (blocking_read) { return clFinish(command_queue); } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBuffer ( cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, size_t offset, size_t cb, const void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!buffer) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, memobj); } if (!ptr) { ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, ptr); } if (offset + cb > buffer->size) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "offset + cb (" << offset << " + " << cb << ") exceeds buffer size (" << buffer->size << " bytes)"); } if (buffer->flags & (CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_READ_ONLY)) { ReturnErrorInfo(command_queue->context, CL_INVALID_OPERATION, "Buffer flags specify host will not write data"); } // Enqueue command oclgrind::Queue::BufferCommand *cmd = new oclgrind::Queue::BufferCommand(oclgrind::Queue::WRITE); cmd->ptr = (unsigned char*)ptr; cmd->address = buffer->address + offset; cmd->size = cb; asyncQueueRetain(cmd, buffer); asyncEnqueue(command_queue, CL_COMMAND_WRITE_BUFFER, cmd, num_events_in_wait_list, event_wait_list, event); if (blocking_write) { return clFinish(command_queue); } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBufferRect ( cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, const size_t * buffer_origin, const size_t * host_origin, const size_t * region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event ) CL_API_SUFFIX__VERSION_1_1 { // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!buffer) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, memobj); } if (!ptr) { ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, ptr); } if (buffer->flags & (CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_READ_ONLY)) { ReturnErrorInfo(command_queue->context, CL_INVALID_OPERATION, "Buffer flags specify host will not write data"); } // Compute pitches if necessary if (buffer_row_pitch == 0) { buffer_row_pitch = region[0]; } if (buffer_slice_pitch == 0) { buffer_slice_pitch = region[1] * buffer_row_pitch; } if (host_row_pitch == 0) { host_row_pitch = region[0]; } if (host_slice_pitch == 0) { host_slice_pitch = region[1] * host_row_pitch; } // Compute origin offsets size_t buffer_offset = buffer_origin[2] * buffer_slice_pitch + buffer_origin[1] * buffer_row_pitch + buffer_origin[0]; size_t host_offset = host_origin[2] * host_slice_pitch + host_origin[1] * host_row_pitch + host_origin[0]; // Ensure buffer region valid size_t end = buffer_offset + region[0] + (region[1]-1) * buffer_row_pitch + (region[2]-1) * buffer_slice_pitch; if (end > buffer->size) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "Region exceeds buffer size (" << buffer->size << " bytes)"); } // Enqueue command oclgrind::Queue::BufferRectCommand *cmd = new oclgrind::Queue::BufferRectCommand(oclgrind::Queue::WRITE_RECT); cmd->ptr = (unsigned char*)ptr; cmd->address = buffer->address; cmd->buffer_offset[0] = buffer_offset; cmd->buffer_offset[1] = buffer_row_pitch; cmd->buffer_offset[2] = buffer_slice_pitch; cmd->host_offset[0] = host_offset; cmd->host_offset[1] = host_row_pitch; cmd->host_offset[2] = host_slice_pitch; memcpy(cmd->region, region, 3*sizeof(size_t)); asyncQueueRetain(cmd, buffer); asyncEnqueue(command_queue, CL_COMMAND_WRITE_BUFFER, cmd, num_events_in_wait_list, event_wait_list, event); if (blocking_write) { return clFinish(command_queue); } return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBuffer ( cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t cb, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!src_buffer) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, src_buffer); } if (!dst_buffer) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, dst_buffer); } if (dst_offset + cb > dst_buffer->size) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "dst_offset + cb (" << dst_offset << " + " << cb << ") exceeds buffer size (" << dst_buffer->size << " bytes)"); } if (src_offset + cb > src_buffer->size) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "src_offset + cb (" << src_offset << " + " << cb << ") exceeds buffer size (" << src_buffer->size << " bytes)"); } // Enqueue command oclgrind::Queue::CopyCommand *cmd = new oclgrind::Queue::CopyCommand(); cmd->dst = dst_buffer->address + dst_offset; cmd->src = src_buffer->address + src_offset; cmd->size = cb; asyncQueueRetain(cmd, src_buffer); asyncQueueRetain(cmd, dst_buffer); asyncEnqueue(command_queue, CL_COMMAND_COPY_BUFFER, cmd, num_events_in_wait_list, event_wait_list, event); return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferRect ( cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, const size_t * src_origin, const size_t * dst_origin, const size_t * region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event ) CL_API_SUFFIX__VERSION_1_1 { // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!src_buffer) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, src_buffer); } if (!dst_buffer) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, dst_buffer); } // Compute pitches if neccessary if (src_row_pitch == 0) { src_row_pitch = region[0]; } if (src_slice_pitch == 0) { src_slice_pitch = region[1] * src_row_pitch; } if (dst_row_pitch == 0) { dst_row_pitch = region[0]; } if (dst_slice_pitch == 0) { dst_slice_pitch = region[1] * dst_row_pitch; } // Compute origin offsets size_t src_offset = src_origin[2] * src_slice_pitch + src_origin[1] * src_row_pitch + src_origin[0]; size_t dst_offset = dst_origin[2] * dst_slice_pitch + dst_origin[1] * dst_row_pitch + dst_origin[0]; // Ensure buffer region valid size_t src_end = src_offset + region[0] + (region[1]-1) * src_row_pitch + (region[2]-1) * src_slice_pitch; size_t dst_end = dst_offset + region[0] + (region[1]-1) * dst_row_pitch + (region[2]-1) * dst_slice_pitch; if (src_end > src_buffer->size) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "Region exceeds source buffer size (" << src_buffer->size << " bytes)"); } if (dst_end > dst_buffer->size) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "Region exceeds destination buffer size (" << dst_buffer->size << " bytes)"); } // Enqueue command oclgrind::Queue::CopyRectCommand *cmd = new oclgrind::Queue::CopyRectCommand(); cmd->src = src_buffer->address; cmd->dst = dst_buffer->address; cmd->src_offset[0] = src_offset; cmd->src_offset[1] = src_row_pitch; cmd->src_offset[2] = src_slice_pitch; cmd->dst_offset[0] = dst_offset; cmd->dst_offset[1] = dst_row_pitch; cmd->dst_offset[2] = dst_slice_pitch; memcpy(cmd->region, region, 3*sizeof(size_t)); asyncQueueRetain(cmd, src_buffer); asyncQueueRetain(cmd, dst_buffer); asyncEnqueue(command_queue, CL_COMMAND_COPY_BUFFER, cmd, num_events_in_wait_list, event_wait_list, event); return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillBuffer ( cl_command_queue command_queue, cl_mem buffer, const void * pattern, size_t pattern_size, size_t offset, size_t cb, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event ) CL_API_SUFFIX__VERSION_1_2 { // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!buffer) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, buffer); } if (offset + cb > buffer->size) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "offset + cb (" << offset << " + " << cb << ") exceeds buffer size (" << buffer->size << " bytes)"); } if (!pattern) { ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, pattern); } if (pattern_size == 0) { ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, pattern_size); } if (offset%pattern_size) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "offset (" << offset << ")" << " not a multiple of pattern_size (" << pattern_size << ")"); } if (cb%pattern_size) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "cb (" << cb << ")" << " not a multiple of pattern_size (" << pattern_size << ")"); } // Enqueue command oclgrind::Queue::FillBufferCommand *cmd = new oclgrind::Queue::FillBufferCommand((const unsigned char*)pattern, pattern_size); cmd->address = buffer->address + offset; cmd->size = cb; asyncQueueRetain(cmd, buffer); asyncEnqueue(command_queue, CL_COMMAND_FILL_BUFFER, cmd, num_events_in_wait_list, event_wait_list, event); return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueFillImage ( cl_command_queue command_queue, cl_mem image, const void * fill_color, const size_t * origin, const size_t * region, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event ) CL_API_SUFFIX__VERSION_1_2 { // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!image) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, image); } if (!fill_color) { ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, fill_color); } if (!region[0] || !region[1] || !region[2]) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "Values in region cannot be 0"); } // Get image dimensions cl_image *img = (cl_image*)image; size_t width = img->desc.image_width; size_t height = img->desc.image_height; size_t depth = img->desc.image_depth; size_t arraySize = img->desc.image_array_size; size_t pixelSize = getPixelSize(&img->format); size_t row_pitch = width * pixelSize; size_t slice_pitch = height * row_pitch; if (img->desc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) height = arraySize; if (img->desc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) depth = arraySize; // Ensure region is within image bounds if (origin[0] + region[0] > width) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "origin[0] + region[0] > width (" << origin[0] << " + " << region[0] << " > " << width << " )"); } if (origin[1] + region[1] > height) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "origin[1] + region[1] > height (" << origin[1] << " + " << region[1] << " > " << height << " )"); } if (origin[2] + region[2] > depth) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "origin[2] + region[2] > depth (" << origin[2] << " + " << region[2] << " > " << depth << " )"); } // Generate color data with correct order and data type unsigned char *color = new unsigned char[pixelSize]; for (unsigned output = 0; output < getNumChannels(&img->format); output++) { // Get input channel index int input = output; switch (img->format.image_channel_order) { case CL_R: case CL_Rx: case CL_RG: case CL_RGx: case CL_RGB: case CL_RGBx: case CL_RGBA: break; case CL_BGRA: if (output == 0) input = 2; if (output == 2) input = 0; break; case CL_ARGB: if (output == 0) input = 3; if (output == 1) input = 0; if (output == 2) input = 1; if (output == 3) input = 2; break; case CL_A: if (output == 0) input = 3; break; case CL_RA: if (output == 1) input = 3; break; case CL_INTENSITY: case CL_LUMINANCE: input = 0; break; default: ReturnError(command_queue->context, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR); } // Interpret data switch (img->format.image_channel_data_type) { case CL_SNORM_INT8: ((int8_t*)color)[output] = rint(min(max(((float*)fill_color)[input]*127.f, -127.f), 128.f)); break; case CL_UNORM_INT8: ((uint8_t*)color)[output] = rint(min(max(((float*)fill_color)[input]*255.f, 0.f), 255.f)); break; case CL_SNORM_INT16: ((int16_t*)color)[output] = rint(min(max(((float*)fill_color)[input]*32767.f, -32768.f), 32767.f)); break; case CL_UNORM_INT16: ((uint16_t*)color)[output] = rint(min(max(((float*)fill_color)[input]*65535.f, 0.f), 65535.f)); break; case CL_FLOAT: ((float*)color)[output] = ((float*)fill_color)[input]; break; case CL_HALF_FLOAT: ((uint16_t*)color)[output] = floatToHalf(((float*)fill_color)[input]); break; case CL_SIGNED_INT8: ((int8_t*)color)[output] = ((int32_t*)fill_color)[input]; break; case CL_SIGNED_INT16: ((int16_t*)color)[output] = ((int32_t*)fill_color)[input]; break; case CL_SIGNED_INT32: ((int32_t*)color)[output] = ((int32_t*)fill_color)[input]; break; case CL_UNSIGNED_INT8: ((uint8_t*)color)[output] = ((uint32_t*)fill_color)[input]; break; case CL_UNSIGNED_INT16: ((uint16_t*)color)[output] = ((uint32_t*)fill_color)[input]; break; case CL_UNSIGNED_INT32: ((uint32_t*)color)[output] = ((uint32_t*)fill_color)[input]; break; default: ReturnError(command_queue->context, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR); } } // Enqueue command oclgrind::Queue::FillImageCommand *cmd = new oclgrind::Queue::FillImageCommand(image->address, origin, region, row_pitch, slice_pitch, pixelSize, color); asyncQueueRetain(cmd, image); asyncEnqueue(command_queue, CL_COMMAND_FILL_IMAGE, cmd, num_events_in_wait_list, event_wait_list, event); delete[] color; return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadImage ( cl_command_queue command_queue, cl_mem image, cl_bool blocking_read, const size_t * origin, const size_t * region, size_t row_pitch, size_t slice_pitch, void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!image) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, image); } cl_image *img = (cl_image*)image; size_t pixelSize = getPixelSize(&img->format); size_t buffer_origin[3] = {origin[0]*pixelSize, origin[1], origin[2]}; size_t pixel_region[3] = {region[0]*pixelSize, region[1], region[2]}; size_t host_origin[3] = {0, 0, 0}; size_t img_row_pitch = img->desc.image_width * pixelSize; size_t img_slice_pitch = img->desc.image_height * img_row_pitch; if (row_pitch == 0) { row_pitch = pixel_region[0]; } if (slice_pitch == 0) { slice_pitch = pixel_region[1] * row_pitch; } // Enqueue read cl_int ret = clEnqueueReadBufferRect( command_queue, image, blocking_read, buffer_origin, host_origin, pixel_region, img_row_pitch, img_slice_pitch, row_pitch, slice_pitch, ptr, num_events_in_wait_list, event_wait_list, event); if (event) { (*event)->type = CL_COMMAND_READ_IMAGE; } return ret; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteImage ( cl_command_queue command_queue, cl_mem image, cl_bool blocking_write, const size_t * origin, const size_t * region, size_t input_row_pitch, size_t input_slice_pitch, const void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!image) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, image); } cl_image *img = (cl_image*)image; size_t pixelSize = getPixelSize(&img->format); size_t buffer_origin[3] = {origin[0]*pixelSize, origin[1], origin[2]}; size_t pixel_region[3] = {region[0]*pixelSize, region[1], region[2]}; size_t host_origin[3] = {0, 0, 0}; size_t img_row_pitch = img->desc.image_width * pixelSize; size_t img_slice_pitch = img->desc.image_height * img_row_pitch; if (input_row_pitch == 0) { input_row_pitch = pixel_region[0]; } if (input_slice_pitch == 0) { input_slice_pitch = pixel_region[1] * input_row_pitch; } // Enqueue write cl_int ret = clEnqueueWriteBufferRect( command_queue, image, blocking_write, buffer_origin, host_origin, pixel_region, img_row_pitch, img_slice_pitch, input_row_pitch, input_slice_pitch, ptr, num_events_in_wait_list, event_wait_list, event); if (event) { (*event)->type = CL_COMMAND_WRITE_IMAGE; } return ret; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImage ( cl_command_queue command_queue, cl_mem src_image, cl_mem dst_image, const size_t * src_origin, const size_t * dst_origin, const size_t * region, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!src_image) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, src_image); } if (!dst_image) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, dst_image); } cl_image *src = (cl_image*)src_image; cl_image *dst = (cl_image*)dst_image; if (src->format.image_channel_order != dst->format.image_channel_order) { ReturnErrorInfo(command_queue->context, CL_IMAGE_FORMAT_MISMATCH, "Channel orders do not match"); } if (src->format.image_channel_data_type != dst->format.image_channel_data_type) { ReturnErrorInfo(command_queue->context, CL_IMAGE_FORMAT_MISMATCH, "Channel data types do no match"); } size_t srcPixelSize = getPixelSize(&src->format); size_t dstPixelSize = getPixelSize(&dst->format); size_t src_pixel_origin[3] = {src_origin[0]*srcPixelSize, src_origin[1], src_origin[2]}; size_t dst_pixel_origin[3] = {dst_origin[0]*dstPixelSize, dst_origin[1], dst_origin[2]}; size_t pixel_region[3] = {region[0]*srcPixelSize, region[1], region[2]}; size_t src_row_pitch = src->desc.image_width * srcPixelSize; size_t src_slice_pitch = src->desc.image_height * src_row_pitch; size_t dst_row_pitch = dst->desc.image_width * dstPixelSize; size_t dst_slice_pitch = dst->desc.image_height * dst_row_pitch; // Enqueue copy cl_int ret = clEnqueueCopyBufferRect( command_queue, src_image, dst_image, src_pixel_origin, dst_pixel_origin, pixel_region, src_row_pitch, src_slice_pitch, dst_row_pitch, dst_slice_pitch, num_events_in_wait_list, event_wait_list, event); if (event) { (*event)->type = CL_COMMAND_COPY_IMAGE; } return ret; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImageToBuffer ( cl_command_queue command_queue, cl_mem src_image, cl_mem dst_buffer, const size_t * src_origin, const size_t * region, size_t dst_offset, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!src_image) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, src_image); } if (!dst_buffer) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, dst_buffer); } cl_image *src = (cl_image*)src_image; size_t pixel_size = getPixelSize(&src->format); size_t src_pixel_origin[3] = {src_origin[0]*pixel_size, src_origin[1], src_origin[2]}; size_t src_row_pitch = src->desc.image_width * pixel_size; size_t src_slice_pitch = src->desc.image_height * src_row_pitch; size_t pixel_region[3] = {region[0]*pixel_size, region[1], region[2]}; size_t dst_origin[3] = {dst_offset, 0, 0}; // Enqueue copy cl_int ret = clEnqueueCopyBufferRect( command_queue, src_image, dst_buffer, src_pixel_origin, dst_origin, pixel_region, src_row_pitch, src_slice_pitch, 0, 0, num_events_in_wait_list, event_wait_list, event); if (event) { (*event)->type = CL_COMMAND_COPY_IMAGE_TO_BUFFER; } return ret; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferToImage ( cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_image, size_t src_offset, const size_t * dst_origin, const size_t * region, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!src_buffer) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, src_buffer); } if (!dst_image) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, dst_image); } cl_image *dst = (cl_image*)dst_image; size_t pixel_size = getPixelSize(&dst->format); size_t dst_pixel_origin[3] = {dst_origin[0]*pixel_size, dst_origin[1], dst_origin[2]}; size_t dst_row_pitch = dst->desc.image_width * pixel_size; size_t dst_slice_pitch = dst->desc.image_height * dst_row_pitch; size_t pixel_region[3] = {region[0]*pixel_size, region[1], region[2]}; size_t src_origin[3] = {src_offset, 0, 0}; // Enqueue copy cl_int ret = clEnqueueCopyBufferRect( command_queue, src_buffer, dst_image, src_origin, dst_pixel_origin, pixel_region, 0, 0, dst_row_pitch, dst_slice_pitch, num_events_in_wait_list, event_wait_list, event); if (event) { (*event)->type = CL_COMMAND_COPY_BUFFER_TO_IMAGE; } return ret; } CL_API_ENTRY void* CL_API_CALL clEnqueueMapBuffer ( cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, cl_map_flags map_flags, size_t offset, size_t cb, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (!command_queue) { SetErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); return NULL; } if (!buffer) { SetErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, buffer); return NULL; } if (map_flags & CL_MAP_WRITE && buffer->flags & (CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_READ_ONLY)) { SetErrorInfo(command_queue->context, CL_INVALID_OPERATION, "Buffer flags specify host will not write data"); return NULL; } if (map_flags & CL_MAP_READ && buffer->flags & (CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_WRITE_ONLY)) { SetErrorInfo(command_queue->context, CL_INVALID_OPERATION, "Buffer flags specify host will not read data"); return NULL; } // Check map region if (offset + cb > buffer->size) { SetErrorInfo(command_queue->context, CL_INVALID_VALUE, "offset + cb (" << offset << " + " << cb << ") exceeds buffer size (" << buffer->size << " bytes)"); return NULL; } // Map buffer void *ptr = buffer->context->context->getGlobalMemory()->mapBuffer( buffer->address, offset, cb); if (ptr == NULL) { SetError(command_queue->context, CL_INVALID_VALUE); return NULL; } // Enqueue command oclgrind::Queue::Command *cmd = new oclgrind::Queue::Command(); asyncQueueRetain(cmd, buffer); asyncEnqueue(command_queue, CL_COMMAND_MAP_BUFFER, cmd, num_events_in_wait_list, event_wait_list, event); SetError(command_queue->context, CL_SUCCESS); if (blocking_map) { SetError(command_queue->context, clFinish(command_queue)); } return ptr; } CL_API_ENTRY void* CL_API_CALL clEnqueueMapImage ( cl_command_queue command_queue, cl_mem image, cl_bool blocking_map, cl_map_flags map_flags, const size_t * origin, const size_t * region, size_t * image_row_pitch, size_t * image_slice_pitch, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (!command_queue) { SetErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); return NULL; } if (!image) { SetErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, image); return NULL; } if (!image_row_pitch) { SetErrorArg(command_queue->context, CL_INVALID_VALUE, image_row_pitch); return NULL; } if (map_flags & CL_MAP_WRITE && image->flags & (CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_READ_ONLY)) { SetErrorInfo(command_queue->context, CL_INVALID_OPERATION, "Image flags specify host will not write data"); return NULL; } if (map_flags & CL_MAP_READ && image->flags & (CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_WRITE_ONLY)) { SetErrorInfo(command_queue->context, CL_INVALID_OPERATION, "Image flags specify host will not read data"); return NULL; } if (!region[0] || !region[1] || !region[2]) { SetErrorInfo(command_queue->context, CL_INVALID_VALUE, "Values in region cannot be 0"); } // Get image dimensions cl_image *img = (cl_image*)image; size_t width = img->desc.image_width; size_t height = img->desc.image_height; size_t depth = img->desc.image_depth; size_t arraySize = img->desc.image_array_size; size_t pixelSize = getPixelSize(&img->format); size_t row_pitch = width * pixelSize; size_t slice_pitch = height * row_pitch; if (img->desc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) height = arraySize; if (img->desc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) depth = arraySize; // Ensure region is within image bounds if (origin[0] + region[0] > width) { SetErrorInfo(command_queue->context, CL_INVALID_VALUE, "origin[0] + region[0] > width (" << origin[0] << " + " << region[0] << " > " << width << " )"); } if (origin[1] + region[1] > height) { SetErrorInfo(command_queue->context, CL_INVALID_VALUE, "origin[1] + region[1] > height (" << origin[1] << " + " << region[1] << " > " << height << " )"); } if (origin[2] + region[2] > depth) { SetErrorInfo(command_queue->context, CL_INVALID_VALUE, "origin[2] + region[2] > depth (" << origin[2] << " + " << region[2] << " > " << depth << " )"); } // Compute byte offset and size size_t offset = origin[0] * pixelSize + origin[1] * row_pitch + origin[2] * slice_pitch; size_t size = region[0] * pixelSize + (region[1]-1) * row_pitch + (region[2]-1) * slice_pitch; // Map image void *ptr = image->context->context->getGlobalMemory()->mapBuffer( image->address, offset, size); if (ptr == NULL) { SetError(command_queue->context, CL_INVALID_VALUE); return NULL; } *image_row_pitch = row_pitch; if (image_slice_pitch) { *image_slice_pitch = slice_pitch; } // Enqueue command oclgrind::Queue::Command *cmd = new oclgrind::Queue::Command(); asyncQueueRetain(cmd, image); asyncEnqueue(command_queue, CL_COMMAND_MAP_IMAGE, cmd, num_events_in_wait_list, event_wait_list, event); SetError(command_queue->context, CL_SUCCESS); if (blocking_map) { SetError(command_queue->context, clFinish(command_queue)); } return ptr; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueUnmapMemObject ( cl_command_queue command_queue, cl_mem memobj, void * mapped_ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!memobj) { ReturnErrorArg(command_queue->context, CL_INVALID_MEM_OBJECT, memobj); } // Enqueue command oclgrind::Queue::Command *cmd = new oclgrind::Queue::Command(); asyncQueueRetain(cmd, memobj); asyncEnqueue(command_queue, CL_COMMAND_UNMAP_MEM_OBJECT, cmd, num_events_in_wait_list, event_wait_list, event); return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjects ( cl_command_queue command_queue, cl_uint num_mem_objects, const cl_mem * mem_objects, cl_mem_migration_flags flags, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event ) CL_API_SUFFIX__VERSION_1_2 { // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } // Enqueue command oclgrind::Queue::Command *cmd = new oclgrind::Queue::Command(); asyncEnqueue(command_queue, CL_COMMAND_MIGRATE_MEM_OBJECTS, cmd, num_events_in_wait_list, event_wait_list, event); return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernel ( cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t * global_work_offset, const size_t * global_work_size, const size_t * local_work_size, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (work_dim < 1 || work_dim > 3) { ReturnErrorInfo(command_queue->context, CL_INVALID_WORK_DIMENSION, "Kernels must be 1, 2 or 3 dimensional (work_dim = " << work_dim << ")"); } if (!global_work_size) { ReturnErrorInfo(command_queue->context, CL_INVALID_GLOBAL_WORK_SIZE, "global_work_size cannot be NULL"); } // Check global and local sizes are valid for (unsigned i = 0; i < work_dim; i++) { if (!global_work_size[i]) { ReturnErrorInfo(command_queue->context, CL_INVALID_GLOBAL_WORK_SIZE, "global_work_size[" << i << "] = 0"); } if (local_work_size && global_work_size[i] % local_work_size[i]) { ReturnErrorInfo(command_queue->context, CL_INVALID_WORK_GROUP_SIZE, "Dimension " << i << ": local_work_size (" << local_work_size[i] << ") does not divide global_work_size (" << global_work_size[i] << ")"); } } // Ensure all arguments have been set if (!kernel->kernel->allArgumentsSet()) { ReturnErrorInfo(command_queue->context, CL_INVALID_KERNEL_ARGS, "Not all kernel arguments set"); } // Set-up offsets and sizes oclgrind::Queue::KernelCommand *cmd = new oclgrind::Queue::KernelCommand(); cmd->kernel = new oclgrind::Kernel(*kernel->kernel); cmd->work_dim = work_dim; cmd->globalSize = oclgrind::Size3(1, 1, 1); cmd->globalOffset = oclgrind::Size3(0, 0, 0); cmd->localSize = oclgrind::Size3(1, 1, 1); memcpy(&cmd->globalSize, global_work_size, work_dim*sizeof(size_t)); if (global_work_offset) { memcpy(&cmd->globalOffset, global_work_offset, work_dim*sizeof(size_t)); } if (local_work_size) { memcpy(&cmd->localSize, local_work_size, work_dim*sizeof(size_t)); } // Enqueue command asyncQueueRetain(cmd, kernel); asyncEnqueue(command_queue, CL_COMMAND_NDRANGE_KERNEL, cmd, num_events_in_wait_list, event_wait_list, event); return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueTask ( cl_command_queue command_queue, cl_kernel kernel, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event ) CL_API_SUFFIX__VERSION_1_0 { size_t work = 1; return clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &work, &work, num_events_in_wait_list, event_wait_list, event); } CL_API_ENTRY cl_int CL_API_CALL clEnqueueNativeKernel ( cl_command_queue command_queue, void (CL_CALLBACK *user_func)(void *), void * args, size_t cb_args, cl_uint num_mem_objects, const cl_mem * mem_list, const void ** args_mem_loc, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event ) CL_API_SUFFIX__VERSION_1_0 { // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } if (!user_func) { ReturnErrorArg(command_queue->context, CL_INVALID_VALUE, user_func); } if (!args && (cb_args > 0 || num_mem_objects > 0)) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "args is NULL but cb_args|num_mem_objects >0"); } if (args && cb_args == 0) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "args is non-NULL but cb_args is 0"); } if (num_mem_objects > 0 && (!mem_list || !args_mem_loc)) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "num_mem_objects >0 but mem_list|args_mem_loc is NULL"); } if (num_mem_objects == 0 && (mem_list || args_mem_loc)) { ReturnErrorInfo(command_queue->context, CL_INVALID_VALUE, "num_mem_objects is 0 but mem_list|args_mem_loc not NULL"); } // Replace mem objects with real pointers oclgrind::Memory *memory = command_queue->context->context->getGlobalMemory(); for (unsigned i = 0; i < num_mem_objects; i++) { if (!mem_list[i]) { ReturnErrorInfo(command_queue->context, CL_INVALID_MEM_OBJECT, "Memory object " << i << " is NULL"); } void *addr = memory->getPointer(mem_list[i]->address); if (addr == NULL) { ReturnErrorInfo(command_queue->context, CL_INVALID_MEM_OBJECT, "Memory object " << i << " not valid"); } memcpy((void*)args_mem_loc[i], &addr, sizeof(void*)); } // Create command oclgrind::Queue::NativeKernelCommand *cmd = new oclgrind::Queue::NativeKernelCommand(user_func, args, cb_args); // Retain memory objects for (unsigned i = 0; i < num_mem_objects; i++) { asyncQueueRetain(cmd, mem_list[i]); } // Enqueue commands asyncEnqueue(command_queue, CL_COMMAND_NATIVE_KERNEL, cmd, num_events_in_wait_list, event_wait_list, event); return CL_SUCCESS; } CL_API_ENTRY void* CL_API_CALL clGetExtensionFunctionAddressForPlatform ( cl_platform_id platform, const char * func_name ) CL_API_SUFFIX__VERSION_1_2 { return NULL; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueMarkerWithWaitList ( cl_command_queue command_queue, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event ) CL_API_SUFFIX__VERSION_1_2 { // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } // Enqueue command oclgrind::Queue::Command *cmd = new oclgrind::Queue::Command(); asyncEnqueue(command_queue, CL_COMMAND_MARKER, cmd, num_events_in_wait_list, event_wait_list, event); return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueBarrierWithWaitList ( cl_command_queue command_queue, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event ) CL_API_SUFFIX__VERSION_1_2 { // Check parameters if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } // Enqueue command oclgrind::Queue::Command *cmd = new oclgrind::Queue::Command(); asyncEnqueue(command_queue, CL_COMMAND_BARRIER, cmd, num_events_in_wait_list, event_wait_list, event); return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clSetPrintfCallback ( cl_context context, void (CL_CALLBACK * pfn_notify)(cl_context, cl_uint, char*, void*), void * user_data ) CL_API_SUFFIX__VERSION_1_2 { ReturnError(NULL, CL_INVALID_OPERATION); } CL_API_ENTRY cl_int CL_API_CALL clEnqueueMarker ( cl_command_queue command_queue, cl_event * event ) CL_API_SUFFIX__VERSION_1_0 { return clEnqueueMarkerWithWaitList(command_queue, 0, NULL, event); } CL_API_ENTRY cl_int CL_API_CALL clEnqueueWaitForEvents ( cl_command_queue command_queue, cl_uint num_events, const cl_event * event_list ) CL_API_SUFFIX__VERSION_1_0 { if (!command_queue) { ReturnErrorArg(NULL, CL_INVALID_COMMAND_QUEUE, command_queue); } // Enqueue command oclgrind::Queue::Command *cmd = new oclgrind::Queue::Command(); asyncEnqueue(command_queue, CL_COMMAND_BARRIER, cmd, num_events, event_list, NULL); return CL_SUCCESS; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueBarrier ( cl_command_queue command_queue ) CL_API_SUFFIX__VERSION_1_0 { return clEnqueueBarrierWithWaitList(command_queue, 0, NULL, NULL); } CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLBuffer ( cl_context context, cl_mem_flags flags, cl_GLuint bufret_mem, int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 { SetErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/GL interop not implemented"); return NULL; } CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLTexture ( cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_2 { SetErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/GL interop not implemented"); return NULL; } CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLTexture2D ( cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 { SetErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/GL interop not implemented"); return NULL; } CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLTexture3D ( cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 { SetErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/GL interop not implemented"); return NULL; } CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLRenderbuffer ( cl_context context, cl_mem_flags flags, cl_GLuint renderbuffer, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 { SetErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/GL interop not implemented"); return NULL; } CL_API_ENTRY cl_int CL_API_CALL clGetGLObjectInfo ( cl_mem memobj, cl_gl_object_type * gl_object_type, cl_GLuint * gl_object_name ) CL_API_SUFFIX__VERSION_1_0 { ReturnErrorInfo(NULL, CL_INVALID_MEM_OBJECT, "CL/GL interop not implements"); } CL_API_ENTRY cl_int CL_API_CALL clGetGLTextureInfo ( cl_mem memobj, cl_gl_texture_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0 { ReturnErrorInfo(NULL, CL_INVALID_MEM_OBJECT, "CL/GL interop not implemented"); } CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireGLObjects ( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event ) CL_API_SUFFIX__VERSION_1_0 { ReturnErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/GL interop not implemented"); } CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseGLObjects ( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event ) CL_API_SUFFIX__VERSION_1_0 { ReturnErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/GL interop not implemented"); } CL_API_ENTRY cl_int CL_API_CALL clGetGLContextInfoKHR ( const cl_context_properties * properties, cl_gl_context_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0 { ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/GL interop not implemented"); } CL_API_ENTRY cl_event CL_API_CALL clCreateEventFromGLsyncKHR ( cl_context context, cl_GLsync cl_GLsync, cl_int * errcode_ret ) CL_EXT_SUFFIX__VERSION_1_1 { SetErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/GL interop not implemented"); return NULL; } #if defined(_WIN32) && !defined(__MINGW32__) // DX extension functions CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromD3D10KHR ( cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source, void * d3d_object, cl_d3d10_device_set_khr d3d_device_set, cl_uint num_entries, cl_device_id * devices, cl_uint * num_devices ) CL_API_SUFFIX__VERSION_1_0 { ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented"); } CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10BufferKHR ( cl_context context, cl_mem_flags flags, ID3D10Buffer * resource, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 { SetErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/DX interop not implemented"); return NULL; } CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10Texture2DKHR ( cl_context context, cl_mem_flags flags, ID3D10Texture2D * resource, UINT subresource, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 { SetErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented"); return NULL; } CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10Texture3DKHR ( cl_context context, cl_mem_flags flags, ID3D10Texture3D * resource, UINT subresource, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 { SetErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented"); return NULL; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireD3D10ObjectsKHR ( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event )CL_API_SUFFIX__VERSION_1_0 { ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented"); } CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D10ObjectsKHR ( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event ) CL_API_SUFFIX__VERSION_1_0 { ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented"); } CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromD3D11KHR ( cl_platform_id platform, cl_d3d11_device_source_khr d3d_device_source, void * d3d_object, cl_d3d11_device_set_khr d3d_device_set, cl_uint num_entries, cl_device_id * devices, cl_uint * num_devices ) CL_API_SUFFIX__VERSION_1_0 { ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented"); } CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11BufferKHR ( cl_context context, cl_mem_flags flags, ID3D11Buffer * resource, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 { SetErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/DX interop not implemented"); return NULL; } CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11Texture2DKHR ( cl_context context, cl_mem_flags flags, ID3D11Texture2D * resource, UINT subresource, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 { SetErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented"); return NULL; } CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11Texture3DKHR ( cl_context context, cl_mem_flags flags, ID3D11Texture3D * resource, UINT subresource, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 { SetErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented"); return NULL; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireD3D11ObjectsKHR ( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event )CL_API_SUFFIX__VERSION_1_0 { ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented"); } CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D11ObjectsKHR ( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event ) CL_API_SUFFIX__VERSION_1_0 { ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented"); } CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromDX9MediaAdapterKHR ( cl_platform_id platform, cl_uint num_media_adapters, cl_dx9_media_adapter_type_khr * media_adapter_type, void * media_adapters, cl_dx9_media_adapter_set_khr media_adapter_set, cl_uint num_entries, cl_device_id * devices, cl_uint * num_devices ) CL_API_SUFFIX__VERSION_1_2 { ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented"); } CL_API_ENTRY cl_mem CL_API_CALL clCreateFromDX9MediaSurfaceKHR ( cl_context context, cl_mem_flags flags, cl_dx9_media_adapter_type_khr adapter_type, void * surface_info, cl_uint plane, cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_2 { SetErrorInfo(NULL, CL_INVALID_CONTEXT, "CL/DX interop not implemented"); return NULL; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireDX9MediaSurfacesKHR ( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event ) CL_API_SUFFIX__VERSION_1_2 { ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented"); } CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseDX9MediaSurfacesKHR ( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event ) CL_API_SUFFIX__VERSION_1_2 { ReturnErrorInfo(NULL, CL_INVALID_OPERATION, "CL/DX interop not implemented"); } #endif // DX extension functions //////////////////// // Dispatch Table // //////////////////// #define _NULL_ NULL #define DISPATCH_TABLE_ENTRY(FUNCTION) (void*)(FUNCTION) void *m_dispatchTable[] = { DISPATCH_TABLE_ENTRY(clGetPlatformIDs), DISPATCH_TABLE_ENTRY(clGetPlatformInfo), DISPATCH_TABLE_ENTRY(clGetDeviceIDs), DISPATCH_TABLE_ENTRY(clGetDeviceInfo), DISPATCH_TABLE_ENTRY(clCreateContext), DISPATCH_TABLE_ENTRY(clCreateContextFromType), DISPATCH_TABLE_ENTRY(clRetainContext), DISPATCH_TABLE_ENTRY(clReleaseContext), DISPATCH_TABLE_ENTRY(clGetContextInfo), DISPATCH_TABLE_ENTRY(clCreateCommandQueue), DISPATCH_TABLE_ENTRY(clRetainCommandQueue), DISPATCH_TABLE_ENTRY(clReleaseCommandQueue), DISPATCH_TABLE_ENTRY(clGetCommandQueueInfo), DISPATCH_TABLE_ENTRY(clSetCommandQueueProperty), DISPATCH_TABLE_ENTRY(clCreateBuffer), DISPATCH_TABLE_ENTRY(clCreateImage2D), DISPATCH_TABLE_ENTRY(clCreateImage3D), DISPATCH_TABLE_ENTRY(clRetainMemObject), DISPATCH_TABLE_ENTRY(clReleaseMemObject), DISPATCH_TABLE_ENTRY(clGetSupportedImageFormats), DISPATCH_TABLE_ENTRY(clGetMemObjectInfo), DISPATCH_TABLE_ENTRY(clGetImageInfo), DISPATCH_TABLE_ENTRY(clCreateSampler), DISPATCH_TABLE_ENTRY(clRetainSampler), DISPATCH_TABLE_ENTRY(clReleaseSampler), DISPATCH_TABLE_ENTRY(clGetSamplerInfo), DISPATCH_TABLE_ENTRY(clCreateProgramWithSource), DISPATCH_TABLE_ENTRY(clCreateProgramWithBinary), DISPATCH_TABLE_ENTRY(clRetainProgram), DISPATCH_TABLE_ENTRY(clReleaseProgram), DISPATCH_TABLE_ENTRY(clBuildProgram), DISPATCH_TABLE_ENTRY(clUnloadCompiler), DISPATCH_TABLE_ENTRY(clGetProgramInfo), DISPATCH_TABLE_ENTRY(clGetProgramBuildInfo), DISPATCH_TABLE_ENTRY(clCreateKernel), DISPATCH_TABLE_ENTRY(clCreateKernelsInProgram), DISPATCH_TABLE_ENTRY(clRetainKernel), DISPATCH_TABLE_ENTRY(clReleaseKernel), DISPATCH_TABLE_ENTRY(clSetKernelArg), DISPATCH_TABLE_ENTRY(clGetKernelInfo), DISPATCH_TABLE_ENTRY(clGetKernelWorkGroupInfo), DISPATCH_TABLE_ENTRY(clWaitForEvents), DISPATCH_TABLE_ENTRY(clGetEventInfo), DISPATCH_TABLE_ENTRY(clRetainEvent), DISPATCH_TABLE_ENTRY(clReleaseEvent), DISPATCH_TABLE_ENTRY(clGetEventProfilingInfo), DISPATCH_TABLE_ENTRY(clFlush), DISPATCH_TABLE_ENTRY(clFinish), DISPATCH_TABLE_ENTRY(clEnqueueReadBuffer), DISPATCH_TABLE_ENTRY(clEnqueueWriteBuffer), DISPATCH_TABLE_ENTRY(clEnqueueCopyBuffer), DISPATCH_TABLE_ENTRY(clEnqueueReadImage), DISPATCH_TABLE_ENTRY(clEnqueueWriteImage), DISPATCH_TABLE_ENTRY(clEnqueueCopyImage), DISPATCH_TABLE_ENTRY(clEnqueueCopyImageToBuffer), DISPATCH_TABLE_ENTRY(clEnqueueCopyBufferToImage), DISPATCH_TABLE_ENTRY(clEnqueueMapBuffer), DISPATCH_TABLE_ENTRY(clEnqueueMapImage), DISPATCH_TABLE_ENTRY(clEnqueueUnmapMemObject), DISPATCH_TABLE_ENTRY(clEnqueueNDRangeKernel), DISPATCH_TABLE_ENTRY(clEnqueueTask), DISPATCH_TABLE_ENTRY(clEnqueueNativeKernel), DISPATCH_TABLE_ENTRY(clEnqueueMarker), DISPATCH_TABLE_ENTRY(clEnqueueWaitForEvents), DISPATCH_TABLE_ENTRY(clEnqueueBarrier), DISPATCH_TABLE_ENTRY(clGetExtensionFunctionAddress), DISPATCH_TABLE_ENTRY(clCreateFromGLBuffer), DISPATCH_TABLE_ENTRY(clCreateFromGLTexture2D), DISPATCH_TABLE_ENTRY(clCreateFromGLTexture3D), DISPATCH_TABLE_ENTRY(clCreateFromGLRenderbuffer), DISPATCH_TABLE_ENTRY(clGetGLObjectInfo), DISPATCH_TABLE_ENTRY(clGetGLTextureInfo), DISPATCH_TABLE_ENTRY(clEnqueueAcquireGLObjects), DISPATCH_TABLE_ENTRY(clEnqueueReleaseGLObjects), DISPATCH_TABLE_ENTRY(clGetGLContextInfoKHR), #if defined(_WIN32) DISPATCH_TABLE_ENTRY(clGetDeviceIDsFromD3D10KHR), DISPATCH_TABLE_ENTRY(clCreateFromD3D10BufferKHR), DISPATCH_TABLE_ENTRY(clCreateFromD3D10Texture2DKHR), DISPATCH_TABLE_ENTRY(clCreateFromD3D10Texture3DKHR), DISPATCH_TABLE_ENTRY(clEnqueueAcquireD3D10ObjectsKHR), DISPATCH_TABLE_ENTRY(clEnqueueReleaseD3D10ObjectsKHR), #else DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), #endif // OpenCL 1.1 DISPATCH_TABLE_ENTRY(clSetEventCallback), DISPATCH_TABLE_ENTRY(clCreateSubBuffer), DISPATCH_TABLE_ENTRY(clSetMemObjectDestructorCallback), DISPATCH_TABLE_ENTRY(clCreateUserEvent), DISPATCH_TABLE_ENTRY(clSetUserEventStatus), DISPATCH_TABLE_ENTRY(clEnqueueReadBufferRect), DISPATCH_TABLE_ENTRY(clEnqueueWriteBufferRect), DISPATCH_TABLE_ENTRY(clEnqueueCopyBufferRect), DISPATCH_TABLE_ENTRY(NULL), // clCreateSubDevicesEXT DISPATCH_TABLE_ENTRY(NULL), // clRetainDeviceEXT DISPATCH_TABLE_ENTRY(NULL), // clReleaseDeviceEXT DISPATCH_TABLE_ENTRY(clCreateEventFromGLsyncKHR), // OpenCL 1.2 DISPATCH_TABLE_ENTRY(clCreateSubDevices), DISPATCH_TABLE_ENTRY(clRetainDevice), DISPATCH_TABLE_ENTRY(clReleaseDevice), DISPATCH_TABLE_ENTRY(clCreateImage), DISPATCH_TABLE_ENTRY(clCreateProgramWithBuiltInKernels), DISPATCH_TABLE_ENTRY(clCompileProgram), DISPATCH_TABLE_ENTRY(clLinkProgram), DISPATCH_TABLE_ENTRY(clUnloadPlatformCompiler), DISPATCH_TABLE_ENTRY(clGetKernelArgInfo), DISPATCH_TABLE_ENTRY(clEnqueueFillBuffer), DISPATCH_TABLE_ENTRY(clEnqueueFillImage), DISPATCH_TABLE_ENTRY(clEnqueueMigrateMemObjects), DISPATCH_TABLE_ENTRY(clEnqueueMarkerWithWaitList), DISPATCH_TABLE_ENTRY(clEnqueueBarrierWithWaitList), DISPATCH_TABLE_ENTRY(clGetExtensionFunctionAddressForPlatform), DISPATCH_TABLE_ENTRY(clCreateFromGLTexture), #if defined(_WIN32) DISPATCH_TABLE_ENTRY(clGetDeviceIDsFromD3D11KHR), DISPATCH_TABLE_ENTRY(clCreateFromD3D11BufferKHR), DISPATCH_TABLE_ENTRY(clCreateFromD3D11Texture2DKHR), DISPATCH_TABLE_ENTRY(clCreateFromD3D11Texture3DKHR), DISPATCH_TABLE_ENTRY(clCreateFromDX9MediaSurfaceKHR), DISPATCH_TABLE_ENTRY(clEnqueueAcquireD3D11ObjectsKHR), DISPATCH_TABLE_ENTRY(clEnqueueReleaseD3D11ObjectsKHR), DISPATCH_TABLE_ENTRY(clGetDeviceIDsFromDX9MediaAdapterKHR), DISPATCH_TABLE_ENTRY(clEnqueueAcquireDX9MediaSurfacesKHR), DISPATCH_TABLE_ENTRY(clEnqueueReleaseDX9MediaSurfacesKHR), #else DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), DISPATCH_TABLE_ENTRY(NULL), #endif }; Oclgrind-15.5/src/runtime/runtime.def000066400000000000000000000047731252441671000176370ustar00rootroot00000000000000EXPORTS ; Make runtime functions visible clGetPlatformIDs clGetPlatformInfo clGetDeviceIDs clGetDeviceInfo clCreateContext clCreateContextFromType clRetainContext clReleaseContext clGetContextInfo clCreateCommandQueue clRetainCommandQueue clReleaseCommandQueue clGetCommandQueueInfo clSetCommandQueueProperty clCreateBuffer clCreateImage2D clCreateImage3D clRetainMemObject clReleaseMemObject clGetSupportedImageFormats clGetMemObjectInfo clGetImageInfo clCreateSampler clRetainSampler clReleaseSampler clGetSamplerInfo clCreateProgramWithSource clCreateProgramWithBinary clRetainProgram clReleaseProgram clBuildProgram clUnloadCompiler clGetProgramInfo clGetProgramBuildInfo clCreateKernel clCreateKernelsInProgram clRetainKernel clReleaseKernel clSetKernelArg clGetKernelInfo clGetKernelWorkGroupInfo clWaitForEvents clGetEventInfo clRetainEvent clReleaseEvent clGetEventProfilingInfo clFlush clFinish clEnqueueReadBuffer clEnqueueWriteBuffer clEnqueueCopyBuffer clEnqueueReadImage clEnqueueWriteImage clEnqueueCopyImage clEnqueueCopyImageToBuffer clEnqueueCopyBufferToImage clEnqueueMapBuffer clEnqueueMapImage clEnqueueUnmapMemObject clEnqueueNDRangeKernel clEnqueueTask clEnqueueNativeKernel clEnqueueMarker clEnqueueWaitForEvents clEnqueueBarrier clGetExtensionFunctionAddress clCreateFromGLBuffer clCreateFromGLTexture2D clCreateFromGLTexture3D clCreateFromGLRenderbuffer clGetGLObjectInfo clGetGLTextureInfo clEnqueueAcquireGLObjects clEnqueueReleaseGLObjects clGetGLContextInfoKHR clGetDeviceIDsFromD3D10KHR clCreateFromD3D10BufferKHR clCreateFromD3D10Texture2DKHR clCreateFromD3D10Texture3DKHR clEnqueueAcquireD3D10ObjectsKHR clEnqueueReleaseD3D10ObjectsKHR clSetEventCallback clCreateSubBuffer clSetMemObjectDestructorCallback clCreateUserEvent clSetUserEventStatus clEnqueueReadBufferRect clEnqueueWriteBufferRect clEnqueueCopyBufferRect clCreateEventFromGLsyncKHR clCreateSubDevices clRetainDevice clReleaseDevice clCreateImage clCreateProgramWithBuiltInKernels clCompileProgram clLinkProgram clUnloadPlatformCompiler clGetKernelArgInfo clEnqueueFillBuffer clEnqueueFillImage clEnqueueMigrateMemObjects clEnqueueMarkerWithWaitList clEnqueueBarrierWithWaitList clGetExtensionFunctionAddressForPlatform clCreateFromGLTexture clGetDeviceIDsFromD3D11KHR clCreateFromD3D11BufferKHR clCreateFromD3D11Texture2DKHR clCreateFromD3D11Texture3DKHR clCreateFromDX9MediaSurfaceKHR clEnqueueAcquireD3D11ObjectsKHR clEnqueueReleaseD3D11ObjectsKHR clGetDeviceIDsFromDX9MediaAdapterKHR clEnqueueAcquireDX9MediaSurfacesKHR clEnqueueReleaseDX9MediaSurfacesKHR Oclgrind-15.5/tests/000077500000000000000000000000001252441671000143515ustar00rootroot00000000000000Oclgrind-15.5/tests/apps/000077500000000000000000000000001252441671000153145ustar00rootroot00000000000000Oclgrind-15.5/tests/apps/CMakeLists.txt000066400000000000000000000021621252441671000200550ustar00rootroot00000000000000# CMakeLists.txt (Oclgrind) # Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, # University of Bristol. All rights reserved. # # This program is provided under a three-clause BSD license. For full # license terms please see the LICENSE file distributed with this # source code. # Add app tests foreach(test vecadd) add_executable(${test} ${test}/${test}.c) target_link_libraries(${test} oclgrind-rt) # Generate test binaries in same dir as Oclgrind libraries on Windows if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") add_test(app_${test} "${CMAKE_BINARY_DIR}/${test}") set_target_properties(${test} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") else() add_test(app_${test} "${test}/${test}") set_target_properties(${test} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${test}") set_target_properties(${test} PROPERTIES LINKER_LANGUAGE CXX) endif() set_tests_properties(app_${test} PROPERTIES DEPENDS ${test}) # Set PCH directory set_tests_properties(app_${test} PROPERTIES ENVIRONMENT "OCLGRIND_PCH_DIR=${CMAKE_BINARY_DIR}/include/oclgrind") endforeach(${test}) Oclgrind-15.5/tests/apps/vecadd/000077500000000000000000000000001252441671000165425ustar00rootroot00000000000000Oclgrind-15.5/tests/apps/vecadd/vecadd.c000066400000000000000000000117201252441671000201350ustar00rootroot00000000000000#include #include #include #include #include #define TOL 1e-8 #define MAX_ERRORS 8 #define MAX_PLATFORMS 8 const char *KERNEL_SOURCE = "kernel void vecadd(global float *a, \n" " global float *b, \n" " global float *c) \n" "{ \n" " int i = get_global_id(0); \n" " c[i] = a[i] + b[i]; \n" "} \n" ; void checkError(cl_int err, const char *operation); int main(int argc, char *argv[]) { cl_int err; cl_platform_id platform; cl_device_id device; cl_context context; cl_command_queue queue; cl_program program; cl_kernel kernel; cl_mem d_a, d_b, d_c; float *h_a, *h_b, *h_c; size_t N = 1024; if (argc > 1) { N = atoi(argv[1]); } size_t global = N; if (argc > 2) { global = atoi(argv[2]); } if (!N || !global) { printf("Usage: ./vecadd N [GLOBAL_SIZE]\n"); exit(1); } // Get list of platforms cl_uint numPlatforms = 0; cl_platform_id platforms[MAX_PLATFORMS]; err = clGetPlatformIDs(MAX_PLATFORMS, platforms, &numPlatforms); checkError(err, "getting platforms"); // Find Oclgrind platform = NULL; for (int i = 0; i < numPlatforms; i++) { char name[256]; err = clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 256, name, NULL); checkError(err, "getting platform name"); if (!strcmp(name, "Oclgrind")) { platform = platforms[i]; break; } } if (!platform) { fprintf(stderr, "Unable to find Oclgrind platform\n"); exit(1); } err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL); checkError(err, "getting device"); context = clCreateContext(NULL, 1, &device, NULL, NULL, &err); checkError(err, "creating context"); queue = clCreateCommandQueue(context, device, 0, &err); checkError(err, "creating command queue"); program = clCreateProgramWithSource(context, 1, &KERNEL_SOURCE, NULL, &err); checkError(err, "creating program"); err = clBuildProgram(program, 1, &device, "", NULL, NULL); if (err == CL_BUILD_PROGRAM_FAILURE) { size_t sz; clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(size_t), NULL, &sz); char *buildLog = malloc(++sz); clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sz, buildLog, NULL); fprintf(stderr, "%s\n", buildLog); } checkError(err, "building program"); kernel = clCreateKernel(program, "vecadd", &err); checkError(err, "creating kernel"); size_t dataSize = N*sizeof(cl_float); // Initialise host data srand(0); h_a = malloc(dataSize); h_b = malloc(dataSize); h_c = malloc(dataSize); for (int i = 0; i < N; i++) { h_a[i] = rand()/(float)RAND_MAX; h_b[i] = rand()/(float)RAND_MAX; h_c[i] = 0; } d_a = clCreateBuffer(context, CL_MEM_READ_ONLY, dataSize, NULL, &err); checkError(err, "creating d_a buffer"); d_b = clCreateBuffer(context, CL_MEM_READ_ONLY, dataSize, NULL, &err); checkError(err, "creating d_b buffer"); d_c = clCreateBuffer(context, CL_MEM_WRITE_ONLY, dataSize, NULL, &err); checkError(err, "creating d_c buffer"); err = clEnqueueWriteBuffer(queue, d_a, CL_FALSE, 0, dataSize, h_a, 0, NULL, NULL); checkError(err, "writing d_a data"); err = clEnqueueWriteBuffer(queue, d_b, CL_FALSE, 0, dataSize, h_b, 0, NULL, NULL); checkError(err, "writing d_b data"); err = clEnqueueWriteBuffer(queue, d_c, CL_FALSE, 0, dataSize, h_c, 0, NULL, NULL); checkError(err, "writing d_c data"); err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_a); err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_b); err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_c); checkError(err, "setting kernel args"); err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL); checkError(err, "enqueuing kernel"); err = clFinish(queue); checkError(err, "running kernel"); err = clEnqueueReadBuffer(queue, d_c, CL_TRUE, 0, dataSize, h_c, 0, NULL, NULL); checkError(err, "reading d_c data"); // Check results int errors = 0; for (int i = 0; i < N; i++) { float ref = h_a[i] + h_b[i]; if (fabs(ref - h_c[i]) > TOL) { if (errors < MAX_ERRORS) { fprintf(stderr, "%4d: %.4f != %.4f\n", i, h_c[i], ref); } errors++; } } printf("%d errors detected\n", errors); free(h_a); free(h_b); free(h_c); clReleaseMemObject(d_a); clReleaseMemObject(d_b); clReleaseMemObject(d_c); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(queue); clReleaseContext(context); return (errors != 0); } void checkError(cl_int err, const char *operation) { if (err != CL_SUCCESS) { fprintf(stderr, "Error during operation '%s': %d\n", operation, err); exit(1); } } Oclgrind-15.5/tests/kernels/000077500000000000000000000000001252441671000160145ustar00rootroot00000000000000Oclgrind-15.5/tests/kernels/TESTS000066400000000000000000000030361252441671000166430ustar00rootroot00000000000000alignment/packed alignment/unaligned async_copy/async_copy async_copy/async_copy_divergent async_copy/async_copy_global_race async_copy/async_copy_local_race async_copy/async_copy_loop async_copy/async_copy_loop_divergent async_copy/async_copy_single_wi async_copy/async_copy_unwaited atomics/atomic_cmpxchg_false_race atomics/atomic_cmpxchg_read_race atomics/atomic_cmpxchg_write_race atomics/atomic_global_fence atomics/atomic_global_fence_race atomics/atomic_increment atomics/atomic_intergroup_race atomics/atomic_local_fence atomics/atomic_race_after atomics/atomic_race_before atomics/atomic_same_workitem barrier/barrier_different_instructions barrier/barrier_divergence bugs/gvn_arbitrary_integers bugs/kernel_struct_argument bugs/many_alloca bugs/multidim_array_in_struct bugs/null_argument bugs/sroa_addrspace_cast data-race/broadcast data-race/global_fence data-race/global_only_fence data-race/global_read_write_race data-race/global_write_write_race data-race/increment data-race/intergroup_hidden_race data-race/intragroup_hidden_race data-race/intergroup_race data-race/local_only_fence data-race/local_read_write_race data-race/local_write_write_race data-race/uniform_write_race memcheck/async_copy_out_of_bounds memcheck/atomic_out_of_bounds memcheck/dereference_null memcheck/read_out_of_bounds memcheck/read_write_only_memory memcheck/write_out_of_bounds memcheck/write_read_only_memory misc/array misc/reduce misc/vecadd wait_event/wait_event_chained wait_event/wait_event_divergent wait_event/wait_event_duplicates wait_event/wait_event_invalidOclgrind-15.5/tests/kernels/alignment/000077500000000000000000000000001252441671000177725ustar00rootroot00000000000000Oclgrind-15.5/tests/kernels/alignment/packed.cl000066400000000000000000000002001252441671000215310ustar00rootroot00000000000000struct __attribute__((packed)) Foo { char a; int b; }; kernel void packed(struct Foo x, global int *out) { *out = x.b; } Oclgrind-15.5/tests/kernels/alignment/packed.ref000066400000000000000000000000471252441671000217200ustar00rootroot00000000000000 Argument 'out': 4 bytes out[0] = 2 Oclgrind-15.5/tests/kernels/alignment/packed.sim000066400000000000000000000001361252441671000217330ustar00rootroot00000000000000packed.cl packed 1 1 1 1 1 1 0x01 0x02 0x00 0x0 0x00 Oclgrind-15.5/tests/kernels/alignment/unaligned.cl000066400000000000000000000002571252441671000222640ustar00rootroot00000000000000kernel void unaligned(global int *in, global int *out) { global char *char_ptr = (global char*)in + 2; global int *address = (global int*)char_ptr; *out = *address; } Oclgrind-15.5/tests/kernels/alignment/unaligned.ref000066400000000000000000000000741252441671000224370ustar00rootroot00000000000000ERROR EXPECTED Argument 'out': 4 bytes out[0] = 2752512 Oclgrind-15.5/tests/kernels/alignment/unaligned.sim000066400000000000000000000001121252441671000224440ustar00rootroot00000000000000unaligned.cl unaligned 1 1 1 1 1 1 Oclgrind-15.5/tests/kernels/async_copy/000077500000000000000000000000001252441671000201635ustar00rootroot00000000000000Oclgrind-15.5/tests/kernels/async_copy/async_copy.cl000066400000000000000000000003671252441671000226600ustar00rootroot00000000000000kernel void async_copy(global int *data, local int *scratch) { event_t event = async_work_group_copy(scratch, data, get_local_size(0), 0); wait_group_events(1, &event); int i = get_local_id(0); data[get_local_size(0)-i-1] = scratch[i]; } Oclgrind-15.5/tests/kernels/async_copy/async_copy.ref000066400000000000000000000001241252441671000230250ustar00rootroot00000000000000 Argument 'data': 16 bytes data[0] = 3 data[1] = 2 data[2] = 1 data[3] = 0 Oclgrind-15.5/tests/kernels/async_copy/async_copy.sim000066400000000000000000000001131252441671000230370ustar00rootroot00000000000000async_copy.cl async_copy 4 1 1 4 1 1 Oclgrind-15.5/tests/kernels/async_copy/async_copy_divergent.cl000066400000000000000000000005011252441671000247150ustar00rootroot00000000000000kernel void async_copy_divergent(global int *data, local int *scratch) { int i = get_local_id(0); size_t size = get_local_size(0); if (i == size-1) { size = 1; } event_t event = async_work_group_copy(scratch, data, size, 0); wait_group_events(1, &event); data[get_local_size(0)-i-1] = scratch[i]; } Oclgrind-15.5/tests/kernels/async_copy/async_copy_divergent.ref000066400000000000000000000001431252441671000250750ustar00rootroot00000000000000ERROR EXPECTED Argument 'data': 16 bytes data[0] = 3 data[1] = 2 data[2] = 1 data[3] = 0 Oclgrind-15.5/tests/kernels/async_copy/async_copy_divergent.sim000066400000000000000000000001371252441671000251140ustar00rootroot00000000000000async_copy_divergent.cl async_copy_divergent 4 1 1 4 1 1 Oclgrind-15.5/tests/kernels/async_copy/async_copy_global_race.cl000066400000000000000000000004311252441671000251620ustar00rootroot00000000000000kernel void async_copy_global_race(global int *data, local int *scratch) { int i = get_local_id(0); scratch[i] = i; barrier(CLK_LOCAL_MEM_FENCE); data[i] = 0; event_t event = async_work_group_copy(data, scratch, get_local_size(0), 0); wait_group_events(1, &event); } Oclgrind-15.5/tests/kernels/async_copy/async_copy_global_race.ref000066400000000000000000000001431252441671000253400ustar00rootroot00000000000000ERROR EXPECTED Argument 'data': 16 bytes data[0] = 0 data[1] = 1 data[2] = 2 data[3] = 3 Oclgrind-15.5/tests/kernels/async_copy/async_copy_global_race.sim000066400000000000000000000001431252441671000253540ustar00rootroot00000000000000async_copy_global_race.cl async_copy_global_race 4 1 1 4 1 1 Oclgrind-15.5/tests/kernels/async_copy/async_copy_local_race.cl000066400000000000000000000004251252441671000250170ustar00rootroot00000000000000kernel void async_copy_local_race(global int *data, local int *scratch) { int i = get_local_id(0); scratch[i] = 0; event_t event = async_work_group_copy(scratch, data, get_local_size(0), 0); wait_group_events(1, &event); data[get_local_size(0)-i-1] = scratch[i]; } Oclgrind-15.5/tests/kernels/async_copy/async_copy_local_race.ref000066400000000000000000000001431252441671000251720ustar00rootroot00000000000000ERROR EXPECTED Argument 'data': 16 bytes data[0] = 3 data[1] = 2 data[2] = 1 data[3] = 0 Oclgrind-15.5/tests/kernels/async_copy/async_copy_local_race.sim000066400000000000000000000001411252441671000252040ustar00rootroot00000000000000async_copy_local_race.cl async_copy_local_race 4 1 1 4 1 1 Oclgrind-15.5/tests/kernels/async_copy/async_copy_loop.cl000066400000000000000000000005301252441671000237010ustar00rootroot00000000000000kernel void async_copy_loop(global int *data, local int *scratch) { int i = get_local_id(0); event_t event = 0; for (int j = 0; j < get_local_size(0); j++) { int offset = j; event = async_work_group_copy(scratch+offset, data+offset, 1, event); } wait_group_events(1, &event); data[get_local_size(0)-i-1] = scratch[i]; } Oclgrind-15.5/tests/kernels/async_copy/async_copy_loop.ref000066400000000000000000000001241252441671000240560ustar00rootroot00000000000000 Argument 'data': 16 bytes data[0] = 3 data[1] = 2 data[2] = 1 data[3] = 0 Oclgrind-15.5/tests/kernels/async_copy/async_copy_loop.sim000066400000000000000000000001251252441671000240730ustar00rootroot00000000000000async_copy_loop.cl async_copy_loop 4 1 1 4 1 1 Oclgrind-15.5/tests/kernels/async_copy/async_copy_loop_divergent.cl000066400000000000000000000006331252441671000257540ustar00rootroot00000000000000kernel void async_copy_loop_divergent(global int *data, local int *scratch) { int i = get_local_id(0); event_t event = 0; for (int j = 0; j < get_local_size(0); j++) { int offset = j; if (i == 2 && j == 2) { offset = 0; } event = async_work_group_copy(scratch+offset, data+offset, 1, event); } wait_group_events(1, &event); data[get_local_size(0)-i-1] = scratch[i]; } Oclgrind-15.5/tests/kernels/async_copy/async_copy_loop_divergent.ref000066400000000000000000000001431252441671000261260ustar00rootroot00000000000000ERROR EXPECTED Argument 'data': 16 bytes data[0] = 3 data[1] = 2 data[2] = 1 data[3] = 0 Oclgrind-15.5/tests/kernels/async_copy/async_copy_loop_divergent.sim000066400000000000000000000001511252441671000261410ustar00rootroot00000000000000async_copy_loop_divergent.cl async_copy_loop_divergent 4 1 1 4 1 1 Oclgrind-15.5/tests/kernels/async_copy/async_copy_single_wi.cl000066400000000000000000000006261252441671000247160ustar00rootroot00000000000000kernel void async_copy_single_wi(global int *data, local int *scratch) { int i = get_local_id(0); event_t event = async_work_group_copy(scratch, data, get_local_size(0), 0); if (i == 0) { // An extra copy that will only be registered by one work-item event = async_work_group_copy(scratch, data, 1, event); } wait_group_events(1, &event); data[get_local_size(0)-i-1] = scratch[i]; } Oclgrind-15.5/tests/kernels/async_copy/async_copy_single_wi.ref000066400000000000000000000001431252441671000250660ustar00rootroot00000000000000ERROR EXPECTED Argument 'data': 16 bytes data[0] = 3 data[1] = 2 data[2] = 1 data[3] = 0 Oclgrind-15.5/tests/kernels/async_copy/async_copy_single_wi.sim000066400000000000000000000001371252441671000251050ustar00rootroot00000000000000async_copy_single_wi.cl async_copy_single_wi 4 1 1 4 1 1 Oclgrind-15.5/tests/kernels/async_copy/async_copy_unwaited.cl000066400000000000000000000003271252441671000245540ustar00rootroot00000000000000kernel void async_copy_unwaited(global int *data, local int *scratch) { event_t event = async_work_group_copy(scratch, data, get_local_size(0), 0); int i = get_local_id(0); data[get_local_size(0)-i-1] = i; } Oclgrind-15.5/tests/kernels/async_copy/async_copy_unwaited.ref000066400000000000000000000001431252441671000247260ustar00rootroot00000000000000ERROR EXPECTED Argument 'data': 16 bytes data[0] = 3 data[1] = 2 data[2] = 1 data[3] = 0 Oclgrind-15.5/tests/kernels/async_copy/async_copy_unwaited.sim000066400000000000000000000001351252441671000247430ustar00rootroot00000000000000async_copy_unwaited.cl async_copy_unwaited 4 1 1 4 1 1 Oclgrind-15.5/tests/kernels/atomics/000077500000000000000000000000001252441671000174535ustar00rootroot00000000000000Oclgrind-15.5/tests/kernels/atomics/atomic_cmpxchg_false_race.cl000066400000000000000000000012201252441671000251170ustar00rootroot00000000000000kernel void atomic_cmpxchg_false_race(global int *data, local int *scratch) { int l = get_local_id(0); if (l == 0) { scratch[0] = 0; } barrier(CLK_LOCAL_MEM_FENCE); bool done = false; int before, old; int result; for (int i = 0; i < get_local_size(0); i++) { barrier(CLK_LOCAL_MEM_FENCE); before = scratch[0]; barrier(CLK_LOCAL_MEM_FENCE); if (!done) { old = atomic_cmpxchg(scratch, before, before+1); if (old == before) { done = true; result = scratch[0]; } } } barrier(CLK_LOCAL_MEM_FENCE); if (l == 0) { *data = *scratch; } data[l+1] = result; } Oclgrind-15.5/tests/kernels/atomics/atomic_cmpxchg_false_race.ref000066400000000000000000000001421252441671000252770ustar00rootroot00000000000000 Argument 'data': 20 bytes data[0] = 4 data[1] = 1 data[2] = 2 data[3] = 3 data[4] = 4 Oclgrind-15.5/tests/kernels/atomics/atomic_cmpxchg_false_race.sim000066400000000000000000000001431252441671000253140ustar00rootroot00000000000000atomic_cmpxchg_false_race.cl atomic_cmpxchg_false_race 4 1 1 4 1 1 Oclgrind-15.5/tests/kernels/atomics/atomic_cmpxchg_read_race.cl000066400000000000000000000002531252441671000247450ustar00rootroot00000000000000kernel void atomic_cmpxchg_read_race(global int *data) { int i = get_global_id(0); if (i == 0) { *data = 0; } else { atomic_cmpxchg(data, 0, i); } } Oclgrind-15.5/tests/kernels/atomics/atomic_cmpxchg_read_race.ref000066400000000000000000000000701252441671000251200ustar00rootroot00000000000000ERROR EXPECTED Argument 'data': 4 bytes data[0] = 1 Oclgrind-15.5/tests/kernels/atomics/atomic_cmpxchg_read_race.sim000066400000000000000000000001231252441671000251330ustar00rootroot00000000000000atomic_cmpxchg_read_race.cl atomic_cmpxchg_read_race 2 1 1 2 1 1 -1 Oclgrind-15.5/tests/kernels/atomics/atomic_cmpxchg_write_race.cl000066400000000000000000000002341252441671000251630ustar00rootroot00000000000000kernel void atomic_cmpxchg_write_race(global int *data) { int i = get_global_id(0); if (i == 0) { *data = 0; } atomic_cmpxchg(data, i, 42); } Oclgrind-15.5/tests/kernels/atomics/atomic_cmpxchg_write_race.ref000066400000000000000000000000711252441671000253400ustar00rootroot00000000000000ERROR EXPECTED Argument 'data': 4 bytes data[0] = 42 Oclgrind-15.5/tests/kernels/atomics/atomic_cmpxchg_write_race.sim000066400000000000000000000001251252441671000253540ustar00rootroot00000000000000atomic_cmpxchg_write_race.cl atomic_cmpxchg_write_race 2 1 1 2 1 1 -1 Oclgrind-15.5/tests/kernels/atomics/atomic_global_fence.cl000066400000000000000000000005251252441671000237310ustar00rootroot00000000000000kernel void atomic_global_fence(global int *data, global int *scratch) { int i = get_global_id(0); int l = get_local_id(0); int g = get_group_id(0); if (l == 0) { scratch[g] = 0; } barrier(CLK_GLOBAL_MEM_FENCE); atomic_add(scratch+g, i); barrier(CLK_GLOBAL_MEM_FENCE); if (l == 0) { data[g] = scratch[g]; } } Oclgrind-15.5/tests/kernels/atomics/atomic_global_fence.ref000066400000000000000000000000701252441671000241020ustar00rootroot00000000000000 Argument 'data': 8 bytes data[0] = 6 data[1] = 22 Oclgrind-15.5/tests/kernels/atomics/atomic_global_fence.sim000066400000000000000000000001361252441671000241210ustar00rootroot00000000000000atomic_global_fence.cl atomic_global_fence 8 1 1 4 1 1 Oclgrind-15.5/tests/kernels/atomics/atomic_global_fence_race.cl000066400000000000000000000004131252441671000247170ustar00rootroot00000000000000kernel void atomic_global_fence_race(global int *data, global int *scratch) { int i = get_global_id(0); int l = get_local_id(0); int g = get_group_id(0); atomic_add(scratch, i); barrier(CLK_GLOBAL_MEM_FENCE); if (l == 0) { data[g] = *scratch; } } Oclgrind-15.5/tests/kernels/atomics/atomic_global_fence_race.ref000066400000000000000000000001071252441671000250750ustar00rootroot00000000000000ERROR EXPECTED Argument 'data': 8 bytes data[0] = 6 data[1] = 28 Oclgrind-15.5/tests/kernels/atomics/atomic_global_fence_race.sim000066400000000000000000000001471252441671000251150ustar00rootroot00000000000000atomic_global_fence_race.cl atomic_global_fence_race 8 1 1 4 1 1 Oclgrind-15.5/tests/kernels/atomics/atomic_increment.cl000066400000000000000000000001071252441671000233110ustar00rootroot00000000000000kernel void atomic_increment(global int *data) { atomic_inc(data); } Oclgrind-15.5/tests/kernels/atomics/atomic_increment.ref000066400000000000000000000000511252441671000234650ustar00rootroot00000000000000 Argument 'data': 4 bytes data[0] = 4 Oclgrind-15.5/tests/kernels/atomics/atomic_increment.sim000066400000000000000000000001071252441671000235030ustar00rootroot00000000000000atomic_increment.cl atomic_increment 4 1 1 1 1 1 Oclgrind-15.5/tests/kernels/atomics/atomic_intergroup_race.cl000066400000000000000000000002571252441671000245230ustar00rootroot00000000000000kernel void atomic_intergroup_race(global int *data) { int i = get_global_id(0); if (i == 0) { *data = 0; } barrier(CLK_GLOBAL_MEM_FENCE); atomic_inc(data); } Oclgrind-15.5/tests/kernels/atomics/atomic_intergroup_race.ref000066400000000000000000000000701252441671000246720ustar00rootroot00000000000000ERROR EXPECTED Argument 'data': 4 bytes data[0] = 8 Oclgrind-15.5/tests/kernels/atomics/atomic_intergroup_race.sim000066400000000000000000000001241252441671000247060ustar00rootroot00000000000000atomic_intergroup_race.cl atomic_intergroup_race 8 1 1 4 1 1 Oclgrind-15.5/tests/kernels/atomics/atomic_local_fence.cl000066400000000000000000000005131252441671000235600ustar00rootroot00000000000000kernel void atomic_local_fence(global int *data, local int *scratch) { int i = get_global_id(0); int l = get_local_id(0); int g = get_group_id(0); if (l == 0) { *scratch = 0; } barrier(CLK_LOCAL_MEM_FENCE); atomic_add(scratch, i); barrier(CLK_LOCAL_MEM_FENCE); if (l == 0) { data[g] = *scratch; } } Oclgrind-15.5/tests/kernels/atomics/atomic_local_fence.ref000066400000000000000000000000701252441671000237340ustar00rootroot00000000000000 Argument 'data': 8 bytes data[0] = 6 data[1] = 22 Oclgrind-15.5/tests/kernels/atomics/atomic_local_fence.sim000066400000000000000000000001241252441671000237500ustar00rootroot00000000000000atomic_local_fence.cl atomic_local_fence 8 1 1 4 1 1 Oclgrind-15.5/tests/kernels/atomics/atomic_race_after.cl000066400000000000000000000002171252441671000234220ustar00rootroot00000000000000kernel void atomic_race_after(global int *data) { atomic_inc(data); if (get_global_id(0) == get_global_size(0)-1) { (*data)++; } } Oclgrind-15.5/tests/kernels/atomics/atomic_race_after.ref000066400000000000000000000000701252441671000235750ustar00rootroot00000000000000ERROR EXPECTED Argument 'data': 4 bytes data[0] = 5 Oclgrind-15.5/tests/kernels/atomics/atomic_race_after.sim000066400000000000000000000001111252441671000236050ustar00rootroot00000000000000atomic_race_after.cl atomic_race_after 4 1 1 4 1 1 Oclgrind-15.5/tests/kernels/atomics/atomic_race_before.cl000066400000000000000000000001751252441671000235660ustar00rootroot00000000000000kernel void atomic_race_before(global int *data) { if (get_global_id(0) == 0) { *data = 0; } atomic_inc(data); } Oclgrind-15.5/tests/kernels/atomics/atomic_race_before.ref000066400000000000000000000000701252441671000237360ustar00rootroot00000000000000ERROR EXPECTED Argument 'data': 4 bytes data[0] = 4 Oclgrind-15.5/tests/kernels/atomics/atomic_race_before.sim000066400000000000000000000001131252441671000237500ustar00rootroot00000000000000atomic_race_before.cl atomic_race_before 4 1 1 4 1 1 Oclgrind-15.5/tests/kernels/atomics/atomic_same_workitem.cl000066400000000000000000000003321252441671000241730ustar00rootroot00000000000000kernel void atomic_same_workitem(global int *data) { int i = get_global_id(0); if ((i % 2) == 0) { data[i] = 0; atomic_inc(data+i); } else { atomic_inc(data+i); data[i] = data[i] + 1; } } Oclgrind-15.5/tests/kernels/atomics/atomic_same_workitem.ref000066400000000000000000000001241252441671000243500ustar00rootroot00000000000000 Argument 'data': 16 bytes data[0] = 1 data[1] = 2 data[2] = 1 data[3] = 2 Oclgrind-15.5/tests/kernels/atomics/atomic_same_workitem.sim000066400000000000000000000001201252441671000243600ustar00rootroot00000000000000atomic_same_workitem.cl atomic_same_workitem 4 1 1 4 1 1 Oclgrind-15.5/tests/kernels/barrier/000077500000000000000000000000001252441671000174425ustar00rootroot00000000000000Oclgrind-15.5/tests/kernels/barrier/barrier_different_instructions.cl000066400000000000000000000003651252441671000262660ustar00rootroot00000000000000kernel void barrier_different_instructions(global int *data) { int i = get_global_id(0); if (i == 0) { data[0] = 42; barrier(CLK_GLOBAL_MEM_FENCE); } else { barrier(CLK_GLOBAL_MEM_FENCE); data[i] = i + data[0]; } } Oclgrind-15.5/tests/kernels/barrier/barrier_different_instructions.ref000066400000000000000000000001471252441671000264420ustar00rootroot00000000000000ERROR EXPECTED Argument 'data': 16 bytes data[0] = 42 data[1] = 43 data[2] = 44 data[3] = 45 Oclgrind-15.5/tests/kernels/barrier/barrier_different_instructions.sim000066400000000000000000000001441252441671000264530ustar00rootroot00000000000000barrier_different_instructions.cl barrier_different_instructions 4 1 1 4 1 1 Oclgrind-15.5/tests/kernels/barrier/barrier_divergence.cl000066400000000000000000000002311252441671000235770ustar00rootroot00000000000000kernel void barrier_divergence(global int *data) { int i = get_global_id(0); if (i != 0) { barrier(CLK_GLOBAL_MEM_FENCE); } data[i] = i; } Oclgrind-15.5/tests/kernels/barrier/barrier_divergence.ref000066400000000000000000000001431252441671000237570ustar00rootroot00000000000000ERROR EXPECTED Argument 'data': 16 bytes data[0] = 0 data[1] = 1 data[2] = 2 data[3] = 3 Oclgrind-15.5/tests/kernels/barrier/barrier_divergence.sim000066400000000000000000000001141252441671000237710ustar00rootroot00000000000000barrier_divergence.cl barrier_divergence 4 1 1 4 1 1 Oclgrind-15.5/tests/kernels/bugs/000077500000000000000000000000001252441671000167545ustar00rootroot00000000000000Oclgrind-15.5/tests/kernels/bugs/gvn_arbitrary_integers.cl000066400000000000000000000003261252441671000240460ustar00rootroot00000000000000__kernel void gvn_arbitrary_integers(__global int *source, __global int *dest) { size_t i = get_global_id(0); int3 tmp = 0; tmp.S2 = source[i]; vstore3(tmp, 0, dest); } Oclgrind-15.5/tests/kernels/bugs/gvn_arbitrary_integers.ref000066400000000000000000000001071252441671000242210ustar00rootroot00000000000000 Argument 'dest': 12 bytes dest[0] = 0 dest[1] = 0 dest[2] = 42 Oclgrind-15.5/tests/kernels/bugs/gvn_arbitrary_integers.sim000066400000000000000000000001451252441671000242370ustar00rootroot00000000000000gvn_arbitrary_integers.cl gvn_arbitrary_integers 1 1 1 1 1 1 Oclgrind-15.5/tests/kernels/bugs/kernel_struct_argument.cl000066400000000000000000000002411252441671000240570ustar00rootroot00000000000000typedef struct { float a; float b; float c; } Structure; kernel void kernel_struct_argument(Structure x, global float *out) { *out = x.a * x.b + x.c; } Oclgrind-15.5/tests/kernels/bugs/kernel_struct_argument.ref000066400000000000000000000000511252441671000242340ustar00rootroot00000000000000 Argument 'out': 4 bytes out[0] = 144 Oclgrind-15.5/tests/kernels/bugs/kernel_struct_argument.sim000066400000000000000000000001541252441671000242540ustar00rootroot00000000000000kernel_struct_argument.cl kernel_struct_argument 1 1 1 1 1 1 42 3 18 Oclgrind-15.5/tests/kernels/bugs/many_alloca.cl000066400000000000000000000003521252441671000215530ustar00rootroot00000000000000void bar(int *x) { *x += 1; } int foo() { int x = 0; bar(&x); return x; } kernel void many_alloca(global int *data, int n) { int x = 0; for (int i = 0; i < n; i++) { x += foo(); } data[get_global_id(0)] = x; } Oclgrind-15.5/tests/kernels/bugs/many_alloca.ref000066400000000000000000000000561252441671000217320ustar00rootroot00000000000000 Argument 'data': 4 bytes data[0] = 100000 Oclgrind-15.5/tests/kernels/bugs/many_alloca.sim000066400000000000000000000001161252441671000217430ustar00rootroot00000000000000many_alloca.cl many_alloca 1 1 1 1 1 1 100000 Oclgrind-15.5/tests/kernels/bugs/multidim_array_in_struct.cl000066400000000000000000000011021252441671000244020ustar00rootroot00000000000000// // Issue #64 on GitHub: // https://github.com/jrprice/Oclgrind/issues/64 // // Required alignment for multi-dimensional arrays was incorrect. // struct S0 { uchar a; ulong b[2][3][1]; }; kernel void multidim_array_in_struct(global ulong *output) { struct S0 s = { 1UL, { { {1L}, {1L}, {1L} }, { {1L}, {1L}, {1L} } }, }; ulong c = 0UL; for (int i = 0; i < 2; i++) for (int j = 0; j < 3; j++) for (int k = 0; k < 1; k++) c += s.b[i][j][k]; *output = c; } Oclgrind-15.5/tests/kernels/bugs/multidim_array_in_struct.ref000066400000000000000000000000551252441671000245660ustar00rootroot00000000000000 Argument 'output': 8 bytes output[0] = 6 Oclgrind-15.5/tests/kernels/bugs/multidim_array_in_struct.sim000066400000000000000000000003461252441671000246050ustar00rootroot00000000000000# # Issue #64 on GitHub: # https://github.com/jrprice/Oclgrind/issues/64 # # Required alignment for multi-dimensional arrays was incorrect. # multidim_array_in_struct.cl multidim_array_in_struct 1 1 1 1 1 1 Oclgrind-15.5/tests/kernels/bugs/null_argument.cl000066400000000000000000000001751252441671000221530ustar00rootroot00000000000000ulong func_1(ulong * p_1) { return 1; } kernel void null_argument(global ulong *output) { *output = func_1((void*)0); } Oclgrind-15.5/tests/kernels/bugs/null_argument.ref000066400000000000000000000000551252441671000223260ustar00rootroot00000000000000 Argument 'output': 8 bytes output[0] = 1 Oclgrind-15.5/tests/kernels/bugs/null_argument.sim000066400000000000000000000001011252441671000223320ustar00rootroot00000000000000null_argument.cl null_argument 1 1 1 1 1 1 Oclgrind-15.5/tests/kernels/bugs/sroa_addrspace_cast.cl000066400000000000000000000003641252441671000232630ustar00rootroot00000000000000typedef struct { float x; } DataStruct; __kernel void sroa_addrspace_cast(__global DataStruct *input, __global float *output) { size_t i = get_global_id(0); DataStruct s = input[i]; output[i] = s.x; } Oclgrind-15.5/tests/kernels/bugs/sroa_addrspace_cast.ref000066400000000000000000000000611252441671000234330ustar00rootroot00000000000000 Argument 'output': 4 bytes output[0] = 42.24 Oclgrind-15.5/tests/kernels/bugs/sroa_addrspace_cast.sim000066400000000000000000000001471252441671000234540ustar00rootroot00000000000000sroa_addrspace_cast.cl sroa_addrspace_cast 1 1 1 1 1 1 Oclgrind-15.5/tests/kernels/data-race/000077500000000000000000000000001252441671000176355ustar00rootroot00000000000000Oclgrind-15.5/tests/kernels/data-race/broadcast.cl000066400000000000000000000001651252441671000221210ustar00rootroot00000000000000kernel void broadcast(global int *value, global int *output) { int i = get_global_id(0); output[i] = value[0]; } Oclgrind-15.5/tests/kernels/data-race/broadcast.ref000066400000000000000000000001421252441671000222720ustar00rootroot00000000000000 Argument 'output': 16 bytes output[0] = 42 output[1] = 42 output[2] = 42 output[3] = 42 Oclgrind-15.5/tests/kernels/data-race/broadcast.sim000066400000000000000000000001071252441671000223070ustar00rootroot00000000000000broadcast.cl broadcast 4 1 1 1 1 1 42 Oclgrind-15.5/tests/kernels/data-race/global_fence.cl000066400000000000000000000005371252441671000225620ustar00rootroot00000000000000kernel void global_fence(global int *scratch, global int *output) { int i = get_global_id(0); int g = get_group_id(0); scratch[i] = i; barrier(CLK_GLOBAL_MEM_FENCE); if (get_local_id(0) == 0) { int x = 0; for (int l = 0; l < get_local_size(0); l++) { x += scratch[get_local_size(0)*g + l]; } output[g] = x; } } Oclgrind-15.5/tests/kernels/data-race/global_fence.ref000066400000000000000000000001411252441671000227270ustar00rootroot00000000000000 Argument 'output': 16 bytes output[0] = 6 output[1] = 22 output[2] = 38 output[3] = 54 Oclgrind-15.5/tests/kernels/data-race/global_fence.sim000066400000000000000000000001221252441671000227420ustar00rootroot00000000000000global_fence.cl global_fence 16 1 1 4 1 1 Oclgrind-15.5/tests/kernels/data-race/global_only_fence.cl000066400000000000000000000005141252441671000236160ustar00rootroot00000000000000kernel void global_only_fence(local int *scratch, global int *output) { int l = get_local_id(0); int g = get_group_id(0); scratch[l] = l; barrier(CLK_GLOBAL_MEM_FENCE); if (get_local_id(0) == 0) { int x = 0; for (int i = 0; i < get_local_size(0); i++) { x += scratch[i]; } output[g] = x; } } Oclgrind-15.5/tests/kernels/data-race/global_only_fence.ref000066400000000000000000000001551252441671000237750ustar00rootroot00000000000000ERROR EXPECTED Argument 'output': 16 bytes output[0] = 6 output[1] = 0 output[2] = 0 output[3] = 0 Oclgrind-15.5/tests/kernels/data-race/global_only_fence.sim000066400000000000000000000001241252441671000240050ustar00rootroot00000000000000global_only_fence.cl global_only_fence 4 1 1 4 1 1 Oclgrind-15.5/tests/kernels/data-race/global_read_write_race.cl000066400000000000000000000002031252441671000246070ustar00rootroot00000000000000kernel void global_read_write_race(global int *data) { int i = get_global_id(0); if (i > 0) { data[i] = data[i-1]; } } Oclgrind-15.5/tests/kernels/data-race/global_read_write_race.ref000066400000000000000000000001431252441671000247700ustar00rootroot00000000000000ERROR EXPECTED Argument 'data': 16 bytes data[0] = 0 data[1] = 0 data[2] = 0 data[3] = 0 Oclgrind-15.5/tests/kernels/data-race/global_read_write_race.sim000066400000000000000000000001311252441671000250010ustar00rootroot00000000000000global_read_write_race.cl global_read_write_race 4 1 1 1 1 1 Oclgrind-15.5/tests/kernels/data-race/global_write_write_race.cl000066400000000000000000000001301252441671000250250ustar00rootroot00000000000000kernel void global_write_write_race(global int *data) { data[0] = get_global_id(0); } Oclgrind-15.5/tests/kernels/data-race/global_write_write_race.ref000066400000000000000000000000701252441671000252060ustar00rootroot00000000000000ERROR EXPECTED Argument 'data': 4 bytes data[0] = 3 Oclgrind-15.5/tests/kernels/data-race/global_write_write_race.sim000066400000000000000000000001251252441671000252230ustar00rootroot00000000000000global_write_write_race.cl global_write_write_race 4 1 1 1 1 1 Oclgrind-15.5/tests/kernels/data-race/increment.cl000066400000000000000000000001421252441671000221360ustar00rootroot00000000000000kernel void increment(global int *data) { int i = get_global_id(0); data[i] = data[i] + 1; } Oclgrind-15.5/tests/kernels/data-race/increment.ref000066400000000000000000000001241252441671000223140ustar00rootroot00000000000000 Argument 'data': 16 bytes data[0] = 1 data[1] = 2 data[2] = 3 data[3] = 4 Oclgrind-15.5/tests/kernels/data-race/increment.sim000066400000000000000000000000771252441671000223370ustar00rootroot00000000000000increment.cl increment 4 1 1 1 1 1 Oclgrind-15.5/tests/kernels/data-race/intergroup_hidden_race.cl000066400000000000000000000002661252441671000246640ustar00rootroot00000000000000kernel void intergroup_hidden_race(global int *data, global int *output) { int group = get_group_id(0); output[group] = data[0]; if (group == 1) { data[0] = group; } } Oclgrind-15.5/tests/kernels/data-race/intergroup_hidden_race.ref000066400000000000000000000001141252441671000250320ustar00rootroot00000000000000ERROR EXPECTED Argument 'output': 8 bytes output[0] = 0 output[1] = 0 Oclgrind-15.5/tests/kernels/data-race/intergroup_hidden_race.sim000066400000000000000000000001431252441671000250500ustar00rootroot00000000000000intergroup_hidden_race.cl intergroup_hidden_race 2 1 1 1 1 1 Oclgrind-15.5/tests/kernels/data-race/intergroup_race.cl000066400000000000000000000005311252441671000233440ustar00rootroot00000000000000kernel void intergroup_race(global int *data) { int g = get_group_id(0); if (get_local_id(0) == 0) { data[g] = g; } barrier(CLK_GLOBAL_MEM_FENCE); if (get_global_id(0) == 0) { int x = 0; for (int i = 0; i < get_num_groups(0); i++) { x += data[i]; } data[0] = x; } barrier(CLK_GLOBAL_MEM_FENCE); } Oclgrind-15.5/tests/kernels/data-race/intergroup_race.ref000066400000000000000000000001431252441671000235210ustar00rootroot00000000000000ERROR EXPECTED Argument 'data': 16 bytes data[0] = 0 data[1] = 1 data[2] = 2 data[3] = 3 Oclgrind-15.5/tests/kernels/data-race/intergroup_race.sim000066400000000000000000000001071252441671000235350ustar00rootroot00000000000000intergroup_race.cl intergroup_race 16 1 1 4 1 1 Oclgrind-15.5/tests/kernels/data-race/intragroup_hidden_race.cl000066400000000000000000000003121252441671000246500ustar00rootroot00000000000000kernel void intragroup_hidden_race(global int *data, global int *output) { int id = get_local_id(0); output[id] = data[0]; barrier(CLK_LOCAL_MEM_FENCE); if (id == 0) { data[0] = -1; } } Oclgrind-15.5/tests/kernels/data-race/intragroup_hidden_race.ref000066400000000000000000000001161252441671000250300ustar00rootroot00000000000000ERROR EXPECTED Argument 'output': 8 bytes output[0] = 42 output[1] = 42 Oclgrind-15.5/tests/kernels/data-race/intragroup_hidden_race.sim000066400000000000000000000001441252441671000250450ustar00rootroot00000000000000intragroup_hidden_race.cl intragroup_hidden_race 2 1 1 2 1 1 Oclgrind-15.5/tests/kernels/data-race/local_only_fence.cl000066400000000000000000000005421252441671000234510ustar00rootroot00000000000000kernel void local_only_fence(global int *scratch, global int *output) { int i = get_global_id(0); int g = get_group_id(0); scratch[i] = i; barrier(CLK_LOCAL_MEM_FENCE); if (get_local_id(0) == 0) { int x = 0; for (int l = 0; l < get_local_size(0); l++) { x += scratch[get_local_size(0)*g + l]; } output[g] = x; } } Oclgrind-15.5/tests/kernels/data-race/local_only_fence.ref000066400000000000000000000001601252441671000236230ustar00rootroot00000000000000ERROR EXPECTED Argument 'output': 16 bytes output[0] = 6 output[1] = 22 output[2] = 38 output[3] = 54 Oclgrind-15.5/tests/kernels/data-race/local_only_fence.sim000066400000000000000000000001321252441671000236360ustar00rootroot00000000000000local_only_fence.cl local_only_fence 16 1 1 4 1 1 Oclgrind-15.5/tests/kernels/data-race/local_read_write_race.cl000066400000000000000000000004001252441671000244400ustar00rootroot00000000000000kernel void local_read_write_race(global int *data, local int *scratch) { int l = get_local_id(0); scratch[l] = l; if (l == 0) { int x = 0; for (int i = 0; i < get_local_size(0); i++) { x += scratch[i]; } *data = x; } } Oclgrind-15.5/tests/kernels/data-race/local_read_write_race.ref000066400000000000000000000000701252441671000246210ustar00rootroot00000000000000ERROR EXPECETD Argument 'data': 4 bytes data[0] = 0 Oclgrind-15.5/tests/kernels/data-race/local_read_write_race.sim000066400000000000000000000001331252441671000246350ustar00rootroot00000000000000local_read_write_race.cl local_read_write_race 4 1 1 4 1 1 Oclgrind-15.5/tests/kernels/data-race/local_write_write_race.cl000066400000000000000000000002571252441671000246710ustar00rootroot00000000000000kernel void local_write_write_race(global int *data, local int *scratch) { int i = get_global_id(0); *scratch = i; barrier(CLK_LOCAL_MEM_FENCE); data[i] = *scratch; } Oclgrind-15.5/tests/kernels/data-race/local_write_write_race.ref000066400000000000000000000001431252441671000250410ustar00rootroot00000000000000ERROR EXPECTED Argument 'data': 16 bytes data[0] = 3 data[1] = 3 data[2] = 3 data[3] = 3 Oclgrind-15.5/tests/kernels/data-race/local_write_write_race.sim000066400000000000000000000001351252441671000250560ustar00rootroot00000000000000local_write_write_race.cl local_write_write_race 4 1 1 4 1 1 Oclgrind-15.5/tests/kernels/data-race/uniform_write_race.cl000066400000000000000000000001021252441671000240310ustar00rootroot00000000000000kernel void uniform_write_race(global int *data) { *data = 0; } Oclgrind-15.5/tests/kernels/data-race/uniform_write_race.ref000066400000000000000000000000511252441671000242120ustar00rootroot00000000000000 Argument 'data': 4 bytes data[0] = 0 Oclgrind-15.5/tests/kernels/data-race/uniform_write_race.sim000066400000000000000000000001141252441671000242260ustar00rootroot00000000000000uniform_write_race.cl uniform_write_race 4 1 1 4 1 1 Oclgrind-15.5/tests/kernels/memcheck/000077500000000000000000000000001252441671000175705ustar00rootroot00000000000000Oclgrind-15.5/tests/kernels/memcheck/async_copy_out_of_bounds.cl000066400000000000000000000003771252441671000252130ustar00rootroot00000000000000kernel void async_copy_out_of_bounds(local int *src, global int *dst) { int l = get_local_id(0); src[l] = l; barrier(CLK_LOCAL_MEM_FENCE); event_t event = async_work_group_copy(dst+1, src, get_local_size(0), 0); wait_group_events(1, &event); } Oclgrind-15.5/tests/kernels/memcheck/async_copy_out_of_bounds.ref000066400000000000000000000001361252441671000253620ustar00rootroot00000000000000ERROR EXPECTED Argument 'dst': 16 bytes dst[0] = 0 dst[1] = 0 dst[2] = 1 dst[3] = 2 Oclgrind-15.5/tests/kernels/memcheck/async_copy_out_of_bounds.sim000066400000000000000000000001421252441671000253730ustar00rootroot00000000000000async_copy_out_of_bounds.cl async_copy_out_of_bounds 4 1 1 4 1 1 Oclgrind-15.5/tests/kernels/memcheck/atomic_out_of_bounds.cl000066400000000000000000000001611252441671000243070ustar00rootroot00000000000000kernel void atomic_out_of_bounds(global int *counters) { int i = get_global_id(0); atomic_inc(counters+i); } Oclgrind-15.5/tests/kernels/memcheck/atomic_out_of_bounds.ref000066400000000000000000000001671252441671000244730ustar00rootroot00000000000000ERROR EXPECTED Argument 'counters': 16 bytes counters[0] = 1 counters[1] = 1 counters[2] = 1 counters[3] = 1 Oclgrind-15.5/tests/kernels/memcheck/atomic_out_of_bounds.sim000066400000000000000000000001201252441671000244740ustar00rootroot00000000000000atomic_out_of_bounds.cl atomic_out_of_bounds 5 1 1 1 1 1 Oclgrind-15.5/tests/kernels/memcheck/dereference_null.cl000066400000000000000000000001411252441671000234050ustar00rootroot00000000000000kernel void dereference_null(global int *input, global int *output) { output[0] *= input[0]; } Oclgrind-15.5/tests/kernels/memcheck/dereference_null.ref000066400000000000000000000000741252441671000235700ustar00rootroot00000000000000ERROR EXPECTED Argument 'output': 4 bytes output[0] = 0 Oclgrind-15.5/tests/kernels/memcheck/dereference_null.sim000066400000000000000000000001161252441671000236010ustar00rootroot00000000000000dereference_null.cl dereference_null 1 1 1 1 1 1 Oclgrind-15.5/tests/kernels/memcheck/read_out_of_bounds.cl000066400000000000000000000003111252441671000237430ustar00rootroot00000000000000kernel void read_out_of_bounds(global int *a, global int *b, global int *c) { int i = get_global_id(0); if (i < 4) { c[i] = a[i] + b[i]; } else { c[i] = a[0] * (a[i] + b[i]); } } Oclgrind-15.5/tests/kernels/memcheck/read_out_of_bounds.ref000066400000000000000000000001371252441671000241270ustar00rootroot00000000000000ERROR EXPECTED Argument 'c': 20 bytes c[0] = 0 c[1] = 2 c[2] = 4 c[3] = 6 c[4] = 0 Oclgrind-15.5/tests/kernels/memcheck/read_out_of_bounds.sim000066400000000000000000000001701252441671000241400ustar00rootroot00000000000000read_out_of_bounds.cl read_out_of_bounds 5 1 1 5 1 1 Oclgrind-15.5/tests/kernels/memcheck/read_write_only_memory.cl000066400000000000000000000002031252441671000246610ustar00rootroot00000000000000kernel void read_write_only_memory(global int *input, global int *output) { int i = get_global_id(0); output[i] += input[i]; } Oclgrind-15.5/tests/kernels/memcheck/read_write_only_memory.ref000066400000000000000000000001551252441671000250450ustar00rootroot00000000000000ERROR EXPECTED Argument 'output': 16 bytes output[0] = 0 output[1] = 1 output[2] = 2 output[3] = 3 Oclgrind-15.5/tests/kernels/memcheck/read_write_only_memory.sim000066400000000000000000000001601252441671000250550ustar00rootroot00000000000000read_write_only_memory.cl read_write_only_memory 4 1 1 4 1 1 Oclgrind-15.5/tests/kernels/memcheck/write_out_of_bounds.cl000066400000000000000000000002031252441671000241620ustar00rootroot00000000000000kernel void write_out_of_bounds(global int *a, global int *b, global int *c) { int i = get_global_id(0); c[i] = a[i] + b[i]; } Oclgrind-15.5/tests/kernels/memcheck/write_out_of_bounds.ref000066400000000000000000000001241252441671000243420ustar00rootroot00000000000000ERROR EXPECTED Argument 'c': 16 bytes c[0] = 0 c[1] = 2 c[2] = 4 c[3] = 6 Oclgrind-15.5/tests/kernels/memcheck/write_out_of_bounds.sim000066400000000000000000000001721252441671000243610ustar00rootroot00000000000000write_out_of_bounds.cl write_out_of_bounds 5 1 1 5 1 1 Oclgrind-15.5/tests/kernels/memcheck/write_read_only_memory.cl000066400000000000000000000002041252441671000246620ustar00rootroot00000000000000kernel void write_read_only_memory(global int *input, global int *output) { int i = get_global_id(0); output[i] = input[i]++; } Oclgrind-15.5/tests/kernels/memcheck/write_read_only_memory.ref000066400000000000000000000001551252441671000250450ustar00rootroot00000000000000ERROR EXPECTED Argument 'output': 16 bytes output[0] = 0 output[1] = 1 output[2] = 2 output[3] = 3 Oclgrind-15.5/tests/kernels/memcheck/write_read_only_memory.sim000066400000000000000000000001551252441671000250610ustar00rootroot00000000000000write_read_only_memory.cl write_read_only_memory 4 1 1 4 1 1 Oclgrind-15.5/tests/kernels/misc/000077500000000000000000000000001252441671000167475ustar00rootroot00000000000000Oclgrind-15.5/tests/kernels/misc/array.cl000066400000000000000000000002621252441671000204050ustar00rootroot00000000000000kernel void array(global long16 *output) { long16 data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; int i = get_global_id(0); long16 *foo = data; output[i] = foo[i]; } Oclgrind-15.5/tests/kernels/misc/array.ref000066400000000000000000000042621252441671000205670ustar00rootroot00000000000000 Argument 'output': 1024 bytes output[0] = 0 output[1] = 0 output[2] = 0 output[3] = 0 output[4] = 0 output[5] = 0 output[6] = 0 output[7] = 0 output[8] = 0 output[9] = 0 output[10] = 0 output[11] = 0 output[12] = 0 output[13] = 0 output[14] = 0 output[15] = 0 output[16] = 1 output[17] = 1 output[18] = 1 output[19] = 1 output[20] = 1 output[21] = 1 output[22] = 1 output[23] = 1 output[24] = 1 output[25] = 1 output[26] = 1 output[27] = 1 output[28] = 1 output[29] = 1 output[30] = 1 output[31] = 1 output[32] = 2 output[33] = 2 output[34] = 2 output[35] = 2 output[36] = 2 output[37] = 2 output[38] = 2 output[39] = 2 output[40] = 2 output[41] = 2 output[42] = 2 output[43] = 2 output[44] = 2 output[45] = 2 output[46] = 2 output[47] = 2 output[48] = 3 output[49] = 3 output[50] = 3 output[51] = 3 output[52] = 3 output[53] = 3 output[54] = 3 output[55] = 3 output[56] = 3 output[57] = 3 output[58] = 3 output[59] = 3 output[60] = 3 output[61] = 3 output[62] = 3 output[63] = 3 output[64] = 4 output[65] = 4 output[66] = 4 output[67] = 4 output[68] = 4 output[69] = 4 output[70] = 4 output[71] = 4 output[72] = 4 output[73] = 4 output[74] = 4 output[75] = 4 output[76] = 4 output[77] = 4 output[78] = 4 output[79] = 4 output[80] = 5 output[81] = 5 output[82] = 5 output[83] = 5 output[84] = 5 output[85] = 5 output[86] = 5 output[87] = 5 output[88] = 5 output[89] = 5 output[90] = 5 output[91] = 5 output[92] = 5 output[93] = 5 output[94] = 5 output[95] = 5 output[96] = 6 output[97] = 6 output[98] = 6 output[99] = 6 output[100] = 6 output[101] = 6 output[102] = 6 output[103] = 6 output[104] = 6 output[105] = 6 output[106] = 6 output[107] = 6 output[108] = 6 output[109] = 6 output[110] = 6 output[111] = 6 output[112] = 7 output[113] = 7 output[114] = 7 output[115] = 7 output[116] = 7 output[117] = 7 output[118] = 7 output[119] = 7 output[120] = 7 output[121] = 7 output[122] = 7 output[123] = 7 output[124] = 7 output[125] = 7 output[126] = 7 output[127] = 7 Oclgrind-15.5/tests/kernels/misc/array.sim000066400000000000000000000000641252441671000205770ustar00rootroot00000000000000array.cl array 8 1 1 1 1 1 Oclgrind-15.5/tests/kernels/misc/reduce.cl000066400000000000000000000011551252441671000205400ustar00rootroot00000000000000kernel void reduce(unsigned int n, global unsigned int *data, global unsigned int *result, local unsigned int *localData) { unsigned int lid = get_local_id(0); unsigned int lsz = get_local_size(0); unsigned int sum = 0; for (unsigned int i = lid; i < n; i+=lsz) { sum += data[i]; } localData[lid] = sum; for (unsigned int offset = lsz/2; offset > 0; offset/=2) { barrier(CLK_LOCAL_MEM_FENCE); if (lid < offset) { localData[lid] += localData[lid + offset]; } } if (lid == 0) { *result = localData[lid]; } } Oclgrind-15.5/tests/kernels/misc/reduce.ref000066400000000000000000000000571252441671000207160ustar00rootroot00000000000000 Argument 'result': 4 bytes result[0] = 120 Oclgrind-15.5/tests/kernels/misc/reduce.sim000066400000000000000000000001411252441671000207240ustar00rootroot00000000000000reduce.cl reduce 4 1 1 4 1 1 16 Oclgrind-15.5/tests/kernels/misc/vecadd.cl000066400000000000000000000001771252441671000205220ustar00rootroot00000000000000kernel void vecadd(global float *a, global float *b, global float *c) { size_t i = get_global_id(0); c[i] = a[i] + b[i]; } Oclgrind-15.5/tests/kernels/misc/vecadd.ref000066400000000000000000000366321252441671000207050ustar00rootroot00000000000000 Argument 'c': 4096 bytes c[0] = 0 c[1] = 2 c[2] = 4 c[3] = 6 c[4] = 8 c[5] = 10 c[6] = 12 c[7] = 14 c[8] = 16 c[9] = 18 c[10] = 20 c[11] = 22 c[12] = 24 c[13] = 26 c[14] = 28 c[15] = 30 c[16] = 32 c[17] = 34 c[18] = 36 c[19] = 38 c[20] = 40 c[21] = 42 c[22] = 44 c[23] = 46 c[24] = 48 c[25] = 50 c[26] = 52 c[27] = 54 c[28] = 56 c[29] = 58 c[30] = 60 c[31] = 62 c[32] = 64 c[33] = 66 c[34] = 68 c[35] = 70 c[36] = 72 c[37] = 74 c[38] = 76 c[39] = 78 c[40] = 80 c[41] = 82 c[42] = 84 c[43] = 86 c[44] = 88 c[45] = 90 c[46] = 92 c[47] = 94 c[48] = 96 c[49] = 98 c[50] = 100 c[51] = 102 c[52] = 104 c[53] = 106 c[54] = 108 c[55] = 110 c[56] = 112 c[57] = 114 c[58] = 116 c[59] = 118 c[60] = 120 c[61] = 122 c[62] = 124 c[63] = 126 c[64] = 128 c[65] = 130 c[66] = 132 c[67] = 134 c[68] = 136 c[69] = 138 c[70] = 140 c[71] = 142 c[72] = 144 c[73] = 146 c[74] = 148 c[75] = 150 c[76] = 152 c[77] = 154 c[78] = 156 c[79] = 158 c[80] = 160 c[81] = 162 c[82] = 164 c[83] = 166 c[84] = 168 c[85] = 170 c[86] = 172 c[87] = 174 c[88] = 176 c[89] = 178 c[90] = 180 c[91] = 182 c[92] = 184 c[93] = 186 c[94] = 188 c[95] = 190 c[96] = 192 c[97] = 194 c[98] = 196 c[99] = 198 c[100] = 200 c[101] = 202 c[102] = 204 c[103] = 206 c[104] = 208 c[105] = 210 c[106] = 212 c[107] = 214 c[108] = 216 c[109] = 218 c[110] = 220 c[111] = 222 c[112] = 224 c[113] = 226 c[114] = 228 c[115] = 230 c[116] = 232 c[117] = 234 c[118] = 236 c[119] = 238 c[120] = 240 c[121] = 242 c[122] = 244 c[123] = 246 c[124] = 248 c[125] = 250 c[126] = 252 c[127] = 254 c[128] = 256 c[129] = 258 c[130] = 260 c[131] = 262 c[132] = 264 c[133] = 266 c[134] = 268 c[135] = 270 c[136] = 272 c[137] = 274 c[138] = 276 c[139] = 278 c[140] = 280 c[141] = 282 c[142] = 284 c[143] = 286 c[144] = 288 c[145] = 290 c[146] = 292 c[147] = 294 c[148] = 296 c[149] = 298 c[150] = 300 c[151] = 302 c[152] = 304 c[153] = 306 c[154] = 308 c[155] = 310 c[156] = 312 c[157] = 314 c[158] = 316 c[159] = 318 c[160] = 320 c[161] = 322 c[162] = 324 c[163] = 326 c[164] = 328 c[165] = 330 c[166] = 332 c[167] = 334 c[168] = 336 c[169] = 338 c[170] = 340 c[171] = 342 c[172] = 344 c[173] = 346 c[174] = 348 c[175] = 350 c[176] = 352 c[177] = 354 c[178] = 356 c[179] = 358 c[180] = 360 c[181] = 362 c[182] = 364 c[183] = 366 c[184] = 368 c[185] = 370 c[186] = 372 c[187] = 374 c[188] = 376 c[189] = 378 c[190] = 380 c[191] = 382 c[192] = 384 c[193] = 386 c[194] = 388 c[195] = 390 c[196] = 392 c[197] = 394 c[198] = 396 c[199] = 398 c[200] = 400 c[201] = 402 c[202] = 404 c[203] = 406 c[204] = 408 c[205] = 410 c[206] = 412 c[207] = 414 c[208] = 416 c[209] = 418 c[210] = 420 c[211] = 422 c[212] = 424 c[213] = 426 c[214] = 428 c[215] = 430 c[216] = 432 c[217] = 434 c[218] = 436 c[219] = 438 c[220] = 440 c[221] = 442 c[222] = 444 c[223] = 446 c[224] = 448 c[225] = 450 c[226] = 452 c[227] = 454 c[228] = 456 c[229] = 458 c[230] = 460 c[231] = 462 c[232] = 464 c[233] = 466 c[234] = 468 c[235] = 470 c[236] = 472 c[237] = 474 c[238] = 476 c[239] = 478 c[240] = 480 c[241] = 482 c[242] = 484 c[243] = 486 c[244] = 488 c[245] = 490 c[246] = 492 c[247] = 494 c[248] = 496 c[249] = 498 c[250] = 500 c[251] = 502 c[252] = 504 c[253] = 506 c[254] = 508 c[255] = 510 c[256] = 512 c[257] = 514 c[258] = 516 c[259] = 518 c[260] = 520 c[261] = 522 c[262] = 524 c[263] = 526 c[264] = 528 c[265] = 530 c[266] = 532 c[267] = 534 c[268] = 536 c[269] = 538 c[270] = 540 c[271] = 542 c[272] = 544 c[273] = 546 c[274] = 548 c[275] = 550 c[276] = 552 c[277] = 554 c[278] = 556 c[279] = 558 c[280] = 560 c[281] = 562 c[282] = 564 c[283] = 566 c[284] = 568 c[285] = 570 c[286] = 572 c[287] = 574 c[288] = 576 c[289] = 578 c[290] = 580 c[291] = 582 c[292] = 584 c[293] = 586 c[294] = 588 c[295] = 590 c[296] = 592 c[297] = 594 c[298] = 596 c[299] = 598 c[300] = 600 c[301] = 602 c[302] = 604 c[303] = 606 c[304] = 608 c[305] = 610 c[306] = 612 c[307] = 614 c[308] = 616 c[309] = 618 c[310] = 620 c[311] = 622 c[312] = 624 c[313] = 626 c[314] = 628 c[315] = 630 c[316] = 632 c[317] = 634 c[318] = 636 c[319] = 638 c[320] = 640 c[321] = 642 c[322] = 644 c[323] = 646 c[324] = 648 c[325] = 650 c[326] = 652 c[327] = 654 c[328] = 656 c[329] = 658 c[330] = 660 c[331] = 662 c[332] = 664 c[333] = 666 c[334] = 668 c[335] = 670 c[336] = 672 c[337] = 674 c[338] = 676 c[339] = 678 c[340] = 680 c[341] = 682 c[342] = 684 c[343] = 686 c[344] = 688 c[345] = 690 c[346] = 692 c[347] = 694 c[348] = 696 c[349] = 698 c[350] = 700 c[351] = 702 c[352] = 704 c[353] = 706 c[354] = 708 c[355] = 710 c[356] = 712 c[357] = 714 c[358] = 716 c[359] = 718 c[360] = 720 c[361] = 722 c[362] = 724 c[363] = 726 c[364] = 728 c[365] = 730 c[366] = 732 c[367] = 734 c[368] = 736 c[369] = 738 c[370] = 740 c[371] = 742 c[372] = 744 c[373] = 746 c[374] = 748 c[375] = 750 c[376] = 752 c[377] = 754 c[378] = 756 c[379] = 758 c[380] = 760 c[381] = 762 c[382] = 764 c[383] = 766 c[384] = 768 c[385] = 770 c[386] = 772 c[387] = 774 c[388] = 776 c[389] = 778 c[390] = 780 c[391] = 782 c[392] = 784 c[393] = 786 c[394] = 788 c[395] = 790 c[396] = 792 c[397] = 794 c[398] = 796 c[399] = 798 c[400] = 800 c[401] = 802 c[402] = 804 c[403] = 806 c[404] = 808 c[405] = 810 c[406] = 812 c[407] = 814 c[408] = 816 c[409] = 818 c[410] = 820 c[411] = 822 c[412] = 824 c[413] = 826 c[414] = 828 c[415] = 830 c[416] = 832 c[417] = 834 c[418] = 836 c[419] = 838 c[420] = 840 c[421] = 842 c[422] = 844 c[423] = 846 c[424] = 848 c[425] = 850 c[426] = 852 c[427] = 854 c[428] = 856 c[429] = 858 c[430] = 860 c[431] = 862 c[432] = 864 c[433] = 866 c[434] = 868 c[435] = 870 c[436] = 872 c[437] = 874 c[438] = 876 c[439] = 878 c[440] = 880 c[441] = 882 c[442] = 884 c[443] = 886 c[444] = 888 c[445] = 890 c[446] = 892 c[447] = 894 c[448] = 896 c[449] = 898 c[450] = 900 c[451] = 902 c[452] = 904 c[453] = 906 c[454] = 908 c[455] = 910 c[456] = 912 c[457] = 914 c[458] = 916 c[459] = 918 c[460] = 920 c[461] = 922 c[462] = 924 c[463] = 926 c[464] = 928 c[465] = 930 c[466] = 932 c[467] = 934 c[468] = 936 c[469] = 938 c[470] = 940 c[471] = 942 c[472] = 944 c[473] = 946 c[474] = 948 c[475] = 950 c[476] = 952 c[477] = 954 c[478] = 956 c[479] = 958 c[480] = 960 c[481] = 962 c[482] = 964 c[483] = 966 c[484] = 968 c[485] = 970 c[486] = 972 c[487] = 974 c[488] = 976 c[489] = 978 c[490] = 980 c[491] = 982 c[492] = 984 c[493] = 986 c[494] = 988 c[495] = 990 c[496] = 992 c[497] = 994 c[498] = 996 c[499] = 998 c[500] = 1000 c[501] = 1002 c[502] = 1004 c[503] = 1006 c[504] = 1008 c[505] = 1010 c[506] = 1012 c[507] = 1014 c[508] = 1016 c[509] = 1018 c[510] = 1020 c[511] = 1022 c[512] = 1024 c[513] = 1026 c[514] = 1028 c[515] = 1030 c[516] = 1032 c[517] = 1034 c[518] = 1036 c[519] = 1038 c[520] = 1040 c[521] = 1042 c[522] = 1044 c[523] = 1046 c[524] = 1048 c[525] = 1050 c[526] = 1052 c[527] = 1054 c[528] = 1056 c[529] = 1058 c[530] = 1060 c[531] = 1062 c[532] = 1064 c[533] = 1066 c[534] = 1068 c[535] = 1070 c[536] = 1072 c[537] = 1074 c[538] = 1076 c[539] = 1078 c[540] = 1080 c[541] = 1082 c[542] = 1084 c[543] = 1086 c[544] = 1088 c[545] = 1090 c[546] = 1092 c[547] = 1094 c[548] = 1096 c[549] = 1098 c[550] = 1100 c[551] = 1102 c[552] = 1104 c[553] = 1106 c[554] = 1108 c[555] = 1110 c[556] = 1112 c[557] = 1114 c[558] = 1116 c[559] = 1118 c[560] = 1120 c[561] = 1122 c[562] = 1124 c[563] = 1126 c[564] = 1128 c[565] = 1130 c[566] = 1132 c[567] = 1134 c[568] = 1136 c[569] = 1138 c[570] = 1140 c[571] = 1142 c[572] = 1144 c[573] = 1146 c[574] = 1148 c[575] = 1150 c[576] = 1152 c[577] = 1154 c[578] = 1156 c[579] = 1158 c[580] = 1160 c[581] = 1162 c[582] = 1164 c[583] = 1166 c[584] = 1168 c[585] = 1170 c[586] = 1172 c[587] = 1174 c[588] = 1176 c[589] = 1178 c[590] = 1180 c[591] = 1182 c[592] = 1184 c[593] = 1186 c[594] = 1188 c[595] = 1190 c[596] = 1192 c[597] = 1194 c[598] = 1196 c[599] = 1198 c[600] = 1200 c[601] = 1202 c[602] = 1204 c[603] = 1206 c[604] = 1208 c[605] = 1210 c[606] = 1212 c[607] = 1214 c[608] = 1216 c[609] = 1218 c[610] = 1220 c[611] = 1222 c[612] = 1224 c[613] = 1226 c[614] = 1228 c[615] = 1230 c[616] = 1232 c[617] = 1234 c[618] = 1236 c[619] = 1238 c[620] = 1240 c[621] = 1242 c[622] = 1244 c[623] = 1246 c[624] = 1248 c[625] = 1250 c[626] = 1252 c[627] = 1254 c[628] = 1256 c[629] = 1258 c[630] = 1260 c[631] = 1262 c[632] = 1264 c[633] = 1266 c[634] = 1268 c[635] = 1270 c[636] = 1272 c[637] = 1274 c[638] = 1276 c[639] = 1278 c[640] = 1280 c[641] = 1282 c[642] = 1284 c[643] = 1286 c[644] = 1288 c[645] = 1290 c[646] = 1292 c[647] = 1294 c[648] = 1296 c[649] = 1298 c[650] = 1300 c[651] = 1302 c[652] = 1304 c[653] = 1306 c[654] = 1308 c[655] = 1310 c[656] = 1312 c[657] = 1314 c[658] = 1316 c[659] = 1318 c[660] = 1320 c[661] = 1322 c[662] = 1324 c[663] = 1326 c[664] = 1328 c[665] = 1330 c[666] = 1332 c[667] = 1334 c[668] = 1336 c[669] = 1338 c[670] = 1340 c[671] = 1342 c[672] = 1344 c[673] = 1346 c[674] = 1348 c[675] = 1350 c[676] = 1352 c[677] = 1354 c[678] = 1356 c[679] = 1358 c[680] = 1360 c[681] = 1362 c[682] = 1364 c[683] = 1366 c[684] = 1368 c[685] = 1370 c[686] = 1372 c[687] = 1374 c[688] = 1376 c[689] = 1378 c[690] = 1380 c[691] = 1382 c[692] = 1384 c[693] = 1386 c[694] = 1388 c[695] = 1390 c[696] = 1392 c[697] = 1394 c[698] = 1396 c[699] = 1398 c[700] = 1400 c[701] = 1402 c[702] = 1404 c[703] = 1406 c[704] = 1408 c[705] = 1410 c[706] = 1412 c[707] = 1414 c[708] = 1416 c[709] = 1418 c[710] = 1420 c[711] = 1422 c[712] = 1424 c[713] = 1426 c[714] = 1428 c[715] = 1430 c[716] = 1432 c[717] = 1434 c[718] = 1436 c[719] = 1438 c[720] = 1440 c[721] = 1442 c[722] = 1444 c[723] = 1446 c[724] = 1448 c[725] = 1450 c[726] = 1452 c[727] = 1454 c[728] = 1456 c[729] = 1458 c[730] = 1460 c[731] = 1462 c[732] = 1464 c[733] = 1466 c[734] = 1468 c[735] = 1470 c[736] = 1472 c[737] = 1474 c[738] = 1476 c[739] = 1478 c[740] = 1480 c[741] = 1482 c[742] = 1484 c[743] = 1486 c[744] = 1488 c[745] = 1490 c[746] = 1492 c[747] = 1494 c[748] = 1496 c[749] = 1498 c[750] = 1500 c[751] = 1502 c[752] = 1504 c[753] = 1506 c[754] = 1508 c[755] = 1510 c[756] = 1512 c[757] = 1514 c[758] = 1516 c[759] = 1518 c[760] = 1520 c[761] = 1522 c[762] = 1524 c[763] = 1526 c[764] = 1528 c[765] = 1530 c[766] = 1532 c[767] = 1534 c[768] = 1536 c[769] = 1538 c[770] = 1540 c[771] = 1542 c[772] = 1544 c[773] = 1546 c[774] = 1548 c[775] = 1550 c[776] = 1552 c[777] = 1554 c[778] = 1556 c[779] = 1558 c[780] = 1560 c[781] = 1562 c[782] = 1564 c[783] = 1566 c[784] = 1568 c[785] = 1570 c[786] = 1572 c[787] = 1574 c[788] = 1576 c[789] = 1578 c[790] = 1580 c[791] = 1582 c[792] = 1584 c[793] = 1586 c[794] = 1588 c[795] = 1590 c[796] = 1592 c[797] = 1594 c[798] = 1596 c[799] = 1598 c[800] = 1600 c[801] = 1602 c[802] = 1604 c[803] = 1606 c[804] = 1608 c[805] = 1610 c[806] = 1612 c[807] = 1614 c[808] = 1616 c[809] = 1618 c[810] = 1620 c[811] = 1622 c[812] = 1624 c[813] = 1626 c[814] = 1628 c[815] = 1630 c[816] = 1632 c[817] = 1634 c[818] = 1636 c[819] = 1638 c[820] = 1640 c[821] = 1642 c[822] = 1644 c[823] = 1646 c[824] = 1648 c[825] = 1650 c[826] = 1652 c[827] = 1654 c[828] = 1656 c[829] = 1658 c[830] = 1660 c[831] = 1662 c[832] = 1664 c[833] = 1666 c[834] = 1668 c[835] = 1670 c[836] = 1672 c[837] = 1674 c[838] = 1676 c[839] = 1678 c[840] = 1680 c[841] = 1682 c[842] = 1684 c[843] = 1686 c[844] = 1688 c[845] = 1690 c[846] = 1692 c[847] = 1694 c[848] = 1696 c[849] = 1698 c[850] = 1700 c[851] = 1702 c[852] = 1704 c[853] = 1706 c[854] = 1708 c[855] = 1710 c[856] = 1712 c[857] = 1714 c[858] = 1716 c[859] = 1718 c[860] = 1720 c[861] = 1722 c[862] = 1724 c[863] = 1726 c[864] = 1728 c[865] = 1730 c[866] = 1732 c[867] = 1734 c[868] = 1736 c[869] = 1738 c[870] = 1740 c[871] = 1742 c[872] = 1744 c[873] = 1746 c[874] = 1748 c[875] = 1750 c[876] = 1752 c[877] = 1754 c[878] = 1756 c[879] = 1758 c[880] = 1760 c[881] = 1762 c[882] = 1764 c[883] = 1766 c[884] = 1768 c[885] = 1770 c[886] = 1772 c[887] = 1774 c[888] = 1776 c[889] = 1778 c[890] = 1780 c[891] = 1782 c[892] = 1784 c[893] = 1786 c[894] = 1788 c[895] = 1790 c[896] = 1792 c[897] = 1794 c[898] = 1796 c[899] = 1798 c[900] = 1800 c[901] = 1802 c[902] = 1804 c[903] = 1806 c[904] = 1808 c[905] = 1810 c[906] = 1812 c[907] = 1814 c[908] = 1816 c[909] = 1818 c[910] = 1820 c[911] = 1822 c[912] = 1824 c[913] = 1826 c[914] = 1828 c[915] = 1830 c[916] = 1832 c[917] = 1834 c[918] = 1836 c[919] = 1838 c[920] = 1840 c[921] = 1842 c[922] = 1844 c[923] = 1846 c[924] = 1848 c[925] = 1850 c[926] = 1852 c[927] = 1854 c[928] = 1856 c[929] = 1858 c[930] = 1860 c[931] = 1862 c[932] = 1864 c[933] = 1866 c[934] = 1868 c[935] = 1870 c[936] = 1872 c[937] = 1874 c[938] = 1876 c[939] = 1878 c[940] = 1880 c[941] = 1882 c[942] = 1884 c[943] = 1886 c[944] = 1888 c[945] = 1890 c[946] = 1892 c[947] = 1894 c[948] = 1896 c[949] = 1898 c[950] = 1900 c[951] = 1902 c[952] = 1904 c[953] = 1906 c[954] = 1908 c[955] = 1910 c[956] = 1912 c[957] = 1914 c[958] = 1916 c[959] = 1918 c[960] = 1920 c[961] = 1922 c[962] = 1924 c[963] = 1926 c[964] = 1928 c[965] = 1930 c[966] = 1932 c[967] = 1934 c[968] = 1936 c[969] = 1938 c[970] = 1940 c[971] = 1942 c[972] = 1944 c[973] = 1946 c[974] = 1948 c[975] = 1950 c[976] = 1952 c[977] = 1954 c[978] = 1956 c[979] = 1958 c[980] = 1960 c[981] = 1962 c[982] = 1964 c[983] = 1966 c[984] = 1968 c[985] = 1970 c[986] = 1972 c[987] = 1974 c[988] = 1976 c[989] = 1978 c[990] = 1980 c[991] = 1982 c[992] = 1984 c[993] = 1986 c[994] = 1988 c[995] = 1990 c[996] = 1992 c[997] = 1994 c[998] = 1996 c[999] = 1998 c[1000] = 2000 c[1001] = 2002 c[1002] = 2004 c[1003] = 2006 c[1004] = 2008 c[1005] = 2010 c[1006] = 2012 c[1007] = 2014 c[1008] = 2016 c[1009] = 2018 c[1010] = 2020 c[1011] = 2022 c[1012] = 2024 c[1013] = 2026 c[1014] = 2028 c[1015] = 2030 c[1016] = 2032 c[1017] = 2034 c[1018] = 2036 c[1019] = 2038 c[1020] = 2040 c[1021] = 2042 c[1022] = 2044 c[1023] = 2046 Oclgrind-15.5/tests/kernels/misc/vecadd.sim000066400000000000000000000001601252441671000207040ustar00rootroot00000000000000vecadd.cl vecadd 1024 1 1 16 1 1 Oclgrind-15.5/tests/kernels/run_kernel_test.py000066400000000000000000000053041252441671000215730ustar00rootroot00000000000000# run_kernel_test.py (Oclgrind) # Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith, # University of Bristol. All rights reserved. # # This program is provided under a three-clause BSD license. For full # license terms please see the LICENSE file distributed with this # source code. import os import re import subprocess import sys # Check arguments if len(sys.argv) != 3: print 'Usage: python run_kernel_test.py EXE SIMFILE' sys.exit(1) if not os.path.isfile(sys.argv[2]): print 'Test file not found' sys.exit(1) # Construct paths to test inputs/outputs test_exe = sys.argv[1] test_file = sys.argv[2] test_dir = os.path.dirname(os.path.realpath(test_file)) test_file = os.path.basename(test_file) test_name = os.path.splitext(test_file)[0] test_out = test_name + '.out' test_ref = test_dir + os.path.sep + test_name + '.ref' current_dir = os.getcwd() if os.environ.get('AM_TESTS') == '1': # If running via automake, use build directory for output file test_out = 'tests' + os.path.sep + 'kernels' + os.path.sep + \ test_dir.split(os.path.sep)[-1] + os.path.sep + test_out else: # Otherwise, use test directory for output file test_out = test_dir + os.path.sep + test_out # Run oclgrind-kernel out = open(test_out, 'w') os.chdir(test_dir) retval = subprocess.call([test_exe, '--data-races', test_file], stdout=out, stderr=out) out.close() if retval != 0: print 'oclgrind-kernel returned non-zero value (' + str(retval) + ')' sys.exit(retval) # Open output and reference files os.chdir(current_dir) out = open(test_out).read().splitlines() ref = open(test_ref).read().splitlines() # Scan through file to reach argument data oi = 0 ri = 0 try: while re.match('Argument \'.*\': [0-9]+ *bytes', out[oi]) == None: oi += 1 while re.match('Argument \'.*\': [0-9]+ *bytes', ref[ri]) == None: ri += 1 except: print 'Error searching for argument data' sys.exit(1) # Check that an error was produced iff an error was expected # An error occured if global memory dump isn't at start of file # TODO: Improve this so that more details about the error are checked should_error = ri > 1 if should_error and oi < 2: print 'Error expected, but no error reported' sys.exit(1) if not should_error and oi > 1: print 'Error reported, but no error expected' sys.exit(1) # Check that the global memory dump matches the reference # TODO: 32-bit machines will fail this due to memory address comparisons match = 1 while oi < len(out): if out[oi] != ref[ri]: print '[%d:%d] "%s" vs "%s"' % (oi, ri, out[oi], ref[ri]) match = 0 oi += 1 ri += 1 if not match: print print 'Output didn\'t match reference' sys.exit(1) # Test passed sys.exit(0) Oclgrind-15.5/tests/kernels/wait_event/000077500000000000000000000000001252441671000201615ustar00rootroot00000000000000Oclgrind-15.5/tests/kernels/wait_event/wait_event_chained.cl000066400000000000000000000005261252441671000243240ustar00rootroot00000000000000kernel void wait_event_chained(global int *data, local int *scratch) { event_t event; event = async_work_group_copy(scratch, data, 1, 0); for (int i = 1; i < 4; i++) { async_work_group_copy(scratch+i, data+i, 1, event); } wait_group_events(1, &event); int i = get_local_id(0); data[get_local_size(0)-i-1] = scratch[i]; } Oclgrind-15.5/tests/kernels/wait_event/wait_event_chained.ref000066400000000000000000000001241252441671000244740ustar00rootroot00000000000000 Argument 'data': 16 bytes data[0] = 3 data[1] = 2 data[2] = 1 data[3] = 0 Oclgrind-15.5/tests/kernels/wait_event/wait_event_chained.sim000066400000000000000000000001331252441671000245100ustar00rootroot00000000000000wait_event_chained.cl wait_event_chained 4 1 1 4 1 1 Oclgrind-15.5/tests/kernels/wait_event/wait_event_divergent.cl000066400000000000000000000005031252441671000247130ustar00rootroot00000000000000kernel void wait_event_divergent(global int *data, local int *scratch) { int i = get_local_id(0); event_t events[2]; events[0] = async_work_group_copy(scratch, data, 1, 0); events[1] = async_work_group_copy(scratch+1, data+1, 1, 0); wait_group_events(1, events+i); data[get_local_size(0)-i-1] = scratch[i]; } Oclgrind-15.5/tests/kernels/wait_event/wait_event_divergent.ref000066400000000000000000000001061252441671000250700ustar00rootroot00000000000000ERROR EXPECTED Argument 'data': 8 bytes data[0] = 0 data[1] = 0 Oclgrind-15.5/tests/kernels/wait_event/wait_event_divergent.sim000066400000000000000000000001351252441671000251060ustar00rootroot00000000000000wait_event_divergent.cl wait_event_divergent 2 1 1 2 1 1 Oclgrind-15.5/tests/kernels/wait_event/wait_event_duplicates.cl000066400000000000000000000005641252441671000250700ustar00rootroot00000000000000kernel void wait_event_duplicates(global int *data, local int *scratch) { event_t events[4]; events[0] = async_work_group_copy(scratch, data, 1, 0); events[1] = events[0]; events[2] = async_work_group_copy(scratch+1, data+1, 3, 0); events[3] = events[0]; wait_group_events(4, events); int i = get_local_id(0); data[get_local_size(0)-i-1] = scratch[i]; } Oclgrind-15.5/tests/kernels/wait_event/wait_event_duplicates.ref000066400000000000000000000001241252441671000252360ustar00rootroot00000000000000 Argument 'data': 16 bytes data[0] = 3 data[1] = 2 data[2] = 1 data[3] = 0 Oclgrind-15.5/tests/kernels/wait_event/wait_event_duplicates.sim000066400000000000000000000001411252441671000252510ustar00rootroot00000000000000wait_event_duplicates.cl wait_event_duplicates 4 1 1 4 1 1 Oclgrind-15.5/tests/kernels/wait_event/wait_event_invalid.cl000066400000000000000000000001531252441671000243530ustar00rootroot00000000000000kernel void wait_event_invalid(global int *data) { event_t event = 42; wait_group_events(1, &event); } Oclgrind-15.5/tests/kernels/wait_event/wait_event_invalid.ref000066400000000000000000000001431252441671000245300ustar00rootroot00000000000000ERROR EXPECTED Argument 'data': 16 bytes data[0] = 0 data[1] = 1 data[2] = 2 data[3] = 3 Oclgrind-15.5/tests/kernels/wait_event/wait_event_invalid.sim000066400000000000000000000001211252441671000245400ustar00rootroot00000000000000wait_event_invalid.cl wait_event_invalid 4 1 1 4 1 1