pax_global_header00006660000000000000000000000064126356624460014530gustar00rootroot0000000000000052 comment=301c541824daf37978dabe66344767cfaa36455b compute-0.5/000077500000000000000000000000001263566244600130505ustar00rootroot00000000000000compute-0.5/.coveralls.yml000066400000000000000000000000301263566244600156340ustar00rootroot00000000000000service_name: travis-ci compute-0.5/.gitignore000066400000000000000000000002241263566244600150360ustar00rootroot00000000000000# build directory build/ # python compiled files *.pyc # vim temp files .*.sw* # generated documentation bin/ doc/bin/ doc/html/ doc/autodoc.xml compute-0.5/.travis.yml000066400000000000000000000024221263566244600151610ustar00rootroot00000000000000language: cpp compiler: - gcc - clang before_install: - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test - sudo apt-get update -qq - sudo apt-get install -qq fglrx=2:8.960-0ubuntu1 opencl-headers libboost-chrono1.48-dev libboost-date-time1.48-dev libboost-test1.48-dev libboost-system1.48-dev libboost-filesystem1.48-dev libboost-timer1.48-dev libboost-program-options1.48-dev libboost-thread1.48-dev python-yaml lcov libopencv-dev g++-4.8 - gem install coveralls-lcov - if [ "$CXX" = "g++" ]; then export CXX="g++-4.8" CC="gcc-4.8"; fi script: - mkdir -p build - cd build - cmake -DBOOST_COMPUTE_BUILD_TESTS=ON -DBOOST_COMPUTE_BUILD_EXAMPLES=ON -DBOOST_COMPUTE_BUILD_BENCHMARKS=ON -DBOOST_COMPUTE_USE_OFFLINE_CACHE=ON -DBOOST_COMPUTE_ENABLE_COVERAGE=ON -DBOOST_COMPUTE_HAVE_OPENCV=ON -DBOOST_COMPUTE_THREAD_SAFE=ON -DCMAKE_CXX_FLAGS="-Wall -pedantic -Werror -Wno-variadic-macros -Wno-long-long -Wno-shadow" .. - make -j4 - ./example/list_devices - ctest --output-on-failure - ctest --output-on-failure after_success: - lcov --directory test --base-directory ../include/boost/compute/ --capture --output-file coverage.info - lcov --remove coverage.info '/usr*' -o coverage.info - cd .. && coveralls-lcov build/coverage.info compute-0.5/CMakeLists.txt000066400000000000000000000064341263566244600156170ustar00rootroot00000000000000# --------------------------------------------------------------------------- # Copyright (c) 2013 Kyle Lutz # # Distributed under the Boost Software License, Version 1.0 # See accompanying file LICENSE_1_0.txt or copy at # http://www.boost.org/LICENSE_1_0.txt # # --------------------------------------------------------------------------- cmake_minimum_required(VERSION 2.8) project(BoostCompute) set(CMAKE_MODULE_PATH ${BoostCompute_SOURCE_DIR}/cmake) # find OpenCL find_package(OpenCL REQUIRED) include_directories(SYSTEM ${OPENCL_INCLUDE_DIRS}) # find Boost find_package(Boost 1.48 REQUIRED) include_directories(SYSTEM ${Boost_INCLUDE_DIRS}) # optional support for c++11 option(BOOST_COMPUTE_USE_CPP11 "Use C++11 features" OFF) if(NOT MSVC) if(${BOOST_COMPUTE_USE_CPP11}) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") endif() endif() # optional support for offline-caching option(BOOST_COMPUTE_USE_OFFLINE_CACHE "Use offline cache for OpenCL program binaries" OFF) if(${BOOST_COMPUTE_USE_OFFLINE_CACHE}) add_definitions(-DBOOST_COMPUTE_USE_OFFLINE_CACHE) endif() # thread-safety options option(BOOST_COMPUTE_THREAD_SAFE "Compile with BOOST_COMPUTE_THREAD_SAFE defined" OFF) if(${BOOST_COMPUTE_THREAD_SAFE}) add_definitions(-DBOOST_COMPUTE_THREAD_SAFE) if(${BOOST_COMPUTE_USE_CPP11}) if(MSVC) if (MSVC_VERSION GREATER 1800) add_definitions(-DBOOST_COMPUTE_HAVE_THREAD_LOCAL) endif() else() add_definitions(-DBOOST_COMPUTE_HAVE_THREAD_LOCAL) endif() endif() endif() # optional third-party libraries option(BOOST_COMPUTE_HAVE_EIGEN "Have Eigen" OFF) option(BOOST_COMPUTE_HAVE_OPENCV "Have OpenCV" OFF) option(BOOST_COMPUTE_HAVE_QT "Have Qt" OFF) option(BOOST_COMPUTE_HAVE_VTK "Have VTK" OFF) option(BOOST_COMPUTE_HAVE_CUDA "Have CUDA" OFF) option(BOOST_COMPUTE_HAVE_TBB "Have TBB" OFF) option(BOOST_COMPUTE_HAVE_BOLT "Have BOLT" OFF) include_directories(include) if(${OpenCL_HEADER_CL_EXT_FOUND}) add_definitions(-DBOOST_COMPUTE_HAVE_HDR_CL_EXT) endif() if(MSVC) # optional support for boost dynamic libraries option(BOOST_COMPUTE_BOOST_ALL_DYN_LINK "Use boost dynamic link libraries" OFF) if(${BOOST_COMPUTE_BOOST_ALL_DYN_LINK}) add_definitions(-DBOOST_ALL_DYN_LINK) endif() link_directories(${Boost_LIBRARY_DIRS}) add_definitions(-D_CRT_SECURE_NO_WARNINGS) add_definitions(-D_SCL_SECURE_NO_WARNINGS) endif() # compiler options option(BOOST_COMPUTE_ENABLE_COVERAGE "Enable code coverage generation" OFF) option(BOOST_COMPUTE_BUILD_TESTS "Build the Boost.Compute tests" OFF) if(${BOOST_COMPUTE_BUILD_TESTS}) enable_testing() add_subdirectory(test) endif() option(BOOST_COMPUTE_BUILD_BENCHMARKS "Build the Boost.Compute benchmarks" OFF) if(${BOOST_COMPUTE_BUILD_BENCHMARKS}) add_subdirectory(perf) endif() option(BOOST_COMPUTE_BUILD_EXAMPLES "Build the Boost.Compute examples" OFF) if(${BOOST_COMPUTE_BUILD_EXAMPLES}) add_subdirectory(example) endif() # configure cmake config file configure_file( cmake/BoostComputeConfig.cmake.in ${BoostCompute_BINARY_DIR}/BoostComputeConfig.cmake @ONLY ) # install cmake config file install( FILES ${BoostCompute_BINARY_DIR}/BoostComputeConfig.cmake DESTINATION lib/cmake/BoostCompute ) # install header files install(DIRECTORY include/boost DESTINATION include/compute) compute-0.5/CONTRIBUTING.md000066400000000000000000000030621263566244600153020ustar00rootroot00000000000000# Contributing to Boost.Compute # ## Reporting Issues ## We value your feedback about issues you encounter. The more information you provide the easier it is for developers to resolve the problem. Issues should be reported to the [issue tracker]( https://github.com/boostorg/compute/issues?state=open). Issues can also be used to submit feature requests. ## Submitting Pull Requests ## Base your changes on `master` but submit your pull-request to `develop`. This can be changed by clicking the "Edit" button on the pull-request page. The develop branch is used for integration and testing of changes before merging into the stable `master` branch. Please try to rebase your changes on the current master branch before submitting. This keeps the git history cleaner and easier to understand. ## Coding Style ## * Indentation is four-spaces (not tabs) * Try to keep line-length under 80 characters * Follow the STL/Boost naming conventions (e.g. lower case with underscores) * When in doubt, match the style of existing code * Otherwise, do whatever you want Also see the [Boost Library Requirements] (http://www.boost.org/development/requirements.html)). ## Running Tests ## To build the tests you must enable the `BOOST_COMPUTE_BUILD_TESTS` option in `cmake`. The tests can be run by executing the `ctest` command from the build directory. Please report any tests failures to the issue tracker along with the test output and information on your system and compute device. ## Support ## Feel free to send an email to kyle.r.lutz@gmail.com with any problems or questions. compute-0.5/LICENSE_1_0.txt000066400000000000000000000024721263566244600153370ustar00rootroot00000000000000Boost Software License - Version 1.0 - August 17th, 2003 Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license (the "Software") to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following: The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. compute-0.5/README.md000066400000000000000000000057471263566244600143440ustar00rootroot00000000000000# Boost.Compute # [![Build Status](https://travis-ci.org/boostorg/compute.png?branch=master)] (https://travis-ci.org/boostorg/compute) [![Coverage Status](https://coveralls.io/repos/boostorg/compute/badge.png?branch=master)] (https://coveralls.io/r/boostorg/compute) Boost.Compute is a GPU/parallel-computing library for C++ based on OpenCL. The core library is a thin C++ wrapper over the OpenCL API and provides access to compute devices, contexts, command queues and memory buffers. On top of the core library is a generic, STL-like interface providing common algorithms (e.g. `transform()`, `accumulate()`, `sort()`) along with common containers (e.g. `vector`, `flat_set`). It also features a number of extensions including parallel-computing algorithms (e.g. `exclusive_scan()`, `scatter()`, `reduce()`) and a number of fancy iterators (e.g. `transform_iterator<>`, `permutation_iterator<>`, `zip_iterator<>`). The full documentation is available at http://boostorg.github.io/compute/. ## Example ## The following example shows how to sort a vector of floats on the GPU: ```c++ #include #include #include namespace compute = boost::compute; int main() { // get the default compute device compute::device gpu = compute::system::default_device(); // create a compute context and command queue compute::context ctx(gpu); compute::command_queue queue(ctx, gpu); // generate random numbers on the host std::vector host_vector(1000000); std::generate(host_vector.begin(), host_vector.end(), rand); // create vector on the device compute::vector device_vector(1000000, ctx); // copy data to the device compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); // sort data on the device compute::sort( device_vector.begin(), device_vector.end(), queue ); // copy data back to the host compute::copy( device_vector.begin(), device_vector.end(), host_vector.begin(), queue ); return 0; } ``` Boost.Compute is a header-only library, so no linking is required. The example above can be compiled with: `g++ -I/path/to/compute/include sort.cpp -lOpenCL` More examples can be found in the [tutorial]( http://boostorg.github.io/compute/boost_compute/tutorial.html) and under the [examples](https://github.com/boostorg/compute/tree/master/example) directory. ## Support ## Questions about the library (both usage and development) can be posted to the [mailing list](https://groups.google.com/forum/#!forum/boost-compute). Bugs and feature requests can be reported through the [issue tracker]( https://github.com/boostorg/compute/issues?state=open). Also feel free to send me an email with any problems, questions, or feedback. ## Help Wanted ## The Boost.Compute project is currently looking for additional developers with interest in parallel computing. Please send an email to Kyle Lutz (kyle.r.lutz@gmail.com) for more information. compute-0.5/cmake/000077500000000000000000000000001263566244600141305ustar00rootroot00000000000000compute-0.5/cmake/BoostComputeConfig.cmake.in000066400000000000000000000003671263566244600213160ustar00rootroot00000000000000# Config file for Boost.Compute (https://github.com/boostorg/compute) # # Sets the following variables: # BoostCompute_INCLUDE_DIRS - include directories for Boost.Compute set(BoostCompute_INCLUDE_DIRS "@CMAKE_INSTALL_PREFIX@/include/compute") compute-0.5/cmake/FindBolt.cmake000066400000000000000000000127531263566244600166430ustar00rootroot00000000000000############################################################################ # © 2012,2014 Advanced Micro Devices, Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################ # Locate an BOLT implementation. # # Defines the following variables: # # BOLT_FOUND - Found an Bolt imlementation # # Also defines the library variables below as normal # variables. # # BOLT_LIBRARIES - These contain debug/optimized keywords when a debugging library is found # BOLT_INCLUDE_DIRS - All relevant Bolt include directories # # Accepts the following variables as input: # # BOLT_ROOT - (as a CMake or environment variable) # The root directory of an BOLT installation # # FIND_LIBRARY_USE_LIB64_PATHS - Global property that controls whether FindBOLT should search for # 64bit or 32bit libs # #----------------------- # Example Usage: # # find_package(BOLT REQUIRED) # include_directories(${BOLT_INCLUDE_DIRS}) # # add_executable(foo foo.cc) # target_link_libraries(foo ${BOLT_LIBRARIES}) # #----------------------- # This module helps to use BOLT_FIND_COMPONENTS, BOLT_FIND_REQUIRED, BOLT_FIND_QUIETLY include( FindPackageHandleStandardArgs ) # Search for 64bit libs if FIND_LIBRARY_USE_LIB64_PATHS is set to true in the global environment, 32bit libs else get_property( LIB64 GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS ) # Debug print statements #message( "BOLT_LIBRARY_PATH_SUFFIXES: ${BOLT_LIBRARY_PATH_SUFFIXES}" ) #message( "ENV{BOLT_ROOT}: $ENV{BOLT_ROOT}" ) #message( "BOLT_FIND_COMPONENTS: ${BOLT_FIND_COMPONENTS}" ) #message( "BOLT_FIND_REQUIRED: ${BOLT_FIND_REQUIRED}" ) # Set the component to find if the user does not specify explicitely if( NOT BOLT_FIND_COMPONENTS ) set( BOLT_FIND_COMPONENTS CL ) endif( ) if(WIN32) if( MSVC_VERSION VERSION_LESS 1600 ) set( myMSVCVer "vc90" ) elseif( MSVC_VERSION VERSION_LESS 1700 ) set( myMSVCVer "vc100" ) elseif( MSVC_VERSION VERSION_LESS 1800 ) set( myMSVCVer "vc110" ) else() set( myMSVCVer "vc120" ) endif( ) else() set( myMSVCVer "gcc" ) endif() if(WIN32) set( BoltLibName "clBolt.runtime.${myMSVCVer}") set( LIB_EXT "lib") else() set( BoltLibName "libclBolt.runtime.${myMSVCVer}") set( LIB_EXT "a") endif() # Eventually, Bolt may support multiple backends, but for now it only supports CL list( FIND BOLT_FIND_COMPONENTS CL find_CL ) if( NOT find_CL EQUAL -1 ) set( BOLT_LIBNAME_BASE ${BoltLibName} ) endif( ) if( NOT find_CL EQUAL -1 ) # Find and set the location of main BOLT static lib file find_library( BOLT_LIBRARY_STATIC_RELEASE NAMES ${BOLT_LIBNAME_BASE}.${LIB_EXT} HINTS ${BOLT_ROOT} ENV BOLT_ROOT DOC "BOLT static library path" PATH_SUFFIXES lib ) mark_as_advanced( BOLT_LIBRARY_STATIC_RELEASE ) # Find and set the location of main BOLT static lib file find_library( BOLT_LIBRARY_STATIC_DEBUG NAMES ${BOLT_LIBNAME_BASE}.debug.${LIB_EXT} HINTS ${BOLT_ROOT} ENV BOLT_ROOT DOC "BOLT static library path" PATH_SUFFIXES lib ) mark_as_advanced( BOLT_LIBRARY_STATIC_DEBUG ) if( BOLT_LIBRARY_STATIC_RELEASE ) set( BOLT_LIBRARY_STATIC optimized ${BOLT_LIBRARY_STATIC_RELEASE} ) else( ) set( BOLT_LIBRARY_STATIC "" ) message( "${BOLT_LIBNAME_BASE}.${LIB_EXT}: Release static bolt library not found" ) endif( ) if( BOLT_LIBRARY_STATIC_DEBUG ) set( BOLT_LIBRARY_STATIC ${BOLT_LIBRARY_STATIC} debug ${BOLT_LIBRARY_STATIC_DEBUG} ) else( ) message( "${BOLT_LIBNAME_BASE}.debug.${LIB_EXT}: Debug static bolt library not found" ) endif( ) find_path( BOLT_INCLUDE_DIRS NAMES bolt/cl/bolt.h HINTS ${BOLT_ROOT} ENV BOLT_ROOT DOC "BOLT header file path" PATH_SUFFIXES include ) mark_as_advanced( BOLT_INCLUDE_DIRS ) FIND_PACKAGE_HANDLE_STANDARD_ARGS( BOLT DEFAULT_MSG BOLT_LIBRARY_STATIC BOLT_INCLUDE_DIRS ) endif( ) if( BOLT_FOUND ) list( APPEND BOLT_LIBRARIES ${BOLT_LIBRARY_STATIC} ) else( ) if( NOT BOLT_FIND_QUIETLY ) message( WARNING "FindBOLT could not find the BOLT library" ) message( STATUS "Did you remember to set the BOLT_ROOT environment variable?" ) endif( ) endif() compute-0.5/cmake/FindEigen.cmake000066400000000000000000000163251263566244600167710ustar00rootroot00000000000000# Ceres Solver - A fast non-linear least squares minimizer # Copyright 2013 Google Inc. All rights reserved. # http://code.google.com/p/ceres-solver/ # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # * Neither the name of Google Inc. nor the names of its contributors may be # used to endorse or promote products derived from this software without # specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # Author: alexs.mac@gmail.com (Alex Stewart) # # FindEigen.cmake - Find Eigen library, version >= 3. # # This module defines the following variables: # # EIGEN_FOUND: TRUE iff Eigen is found. # EIGEN_INCLUDE_DIRS: Include directories for Eigen. # # EIGEN_VERSION: Extracted from Eigen/src/Core/util/Macros.h # EIGEN_WORLD_VERSION: Equal to 3 if EIGEN_VERSION = 3.2.0 # EIGEN_MAJOR_VERSION: Equal to 2 if EIGEN_VERSION = 3.2.0 # EIGEN_MINOR_VERSION: Equal to 0 if EIGEN_VERSION = 3.2.0 # # The following variables control the behaviour of this module: # # EIGEN_INCLUDE_DIR_HINTS: List of additional directories in which to # search for eigen includes, e.g: /timbuktu/eigen3. # # The following variables are also defined by this module, but in line with # CMake recommended FindPackage() module style should NOT be referenced directly # by callers (use the plural variables detailed above instead). These variables # do however affect the behaviour of the module via FIND_[PATH/LIBRARY]() which # are NOT re-called (i.e. search for library is not repeated) if these variables # are set with valid values _in the CMake cache_. This means that if these # variables are set directly in the cache, either by the user in the CMake GUI, # or by the user passing -DVAR=VALUE directives to CMake when called (which # explicitly defines a cache variable), then they will be used verbatim, # bypassing the HINTS variables and other hard-coded search locations. # # EIGEN_INCLUDE_DIR: Include directory for CXSparse, not including the # include directory of any dependencies. # Called if we failed to find Eigen or any of it's required dependencies, # unsets all public (designed to be used externally) variables and reports # error message at priority depending upon [REQUIRED/QUIET/] argument. MACRO(EIGEN_REPORT_NOT_FOUND REASON_MSG) UNSET(EIGEN_FOUND) UNSET(EIGEN_INCLUDE_DIRS) # Make results of search visible in the CMake GUI if Eigen has not # been found so that user does not have to toggle to advanced view. MARK_AS_ADVANCED(CLEAR EIGEN_INCLUDE_DIR) # Note _FIND_[REQUIRED/QUIETLY] variables defined by FindPackage() # use the camelcase library name, not uppercase. IF (Eigen_FIND_QUIETLY) MESSAGE(STATUS "Failed to find Eigen - " ${REASON_MSG} ${ARGN}) ELSEIF (Eigen_FIND_REQUIRED) MESSAGE(FATAL_ERROR "Failed to find Eigen - " ${REASON_MSG} ${ARGN}) ELSE() # Neither QUIETLY nor REQUIRED, use no priority which emits a message # but continues configuration and allows generation. MESSAGE("-- Failed to find Eigen - " ${REASON_MSG} ${ARGN}) ENDIF () ENDMACRO(EIGEN_REPORT_NOT_FOUND) # Search user-installed locations first, so that we prefer user installs # to system installs where both exist. # # TODO: Add standard Windows search locations for Eigen. LIST(APPEND EIGEN_CHECK_INCLUDE_DIRS /usr/local/include/eigen3 /usr/local/homebrew/include/eigen3 # Mac OS X /opt/local/var/macports/software/eigen3 # Mac OS X. /opt/local/include/eigen3 /usr/include/eigen3) # Search supplied hint directories first if supplied. FIND_PATH(EIGEN_INCLUDE_DIR NAMES Eigen/Core PATHS ${EIGEN_INCLUDE_DIR_HINTS} ${EIGEN_CHECK_INCLUDE_DIRS}) IF (NOT EIGEN_INCLUDE_DIR OR NOT EXISTS ${EIGEN_INCLUDE_DIR}) EIGEN_REPORT_NOT_FOUND( "Could not find eigen3 include directory, set EIGEN_INCLUDE_DIR to " "path to eigen3 include directory, e.g. /usr/local/include/eigen3.") ENDIF (NOT EIGEN_INCLUDE_DIR OR NOT EXISTS ${EIGEN_INCLUDE_DIR}) # Mark internally as found, then verify. EIGEN_REPORT_NOT_FOUND() unsets # if called. SET(EIGEN_FOUND TRUE) # Extract Eigen version from Eigen/src/Core/util/Macros.h IF (EIGEN_INCLUDE_DIR) SET(EIGEN_VERSION_FILE ${EIGEN_INCLUDE_DIR}/Eigen/src/Core/util/Macros.h) IF (NOT EXISTS ${EIGEN_VERSION_FILE}) EIGEN_REPORT_NOT_FOUND( "Could not find file: ${EIGEN_VERSION_FILE} " "containing version information in Eigen install located at: " "${EIGEN_INCLUDE_DIR}.") ELSE (NOT EXISTS ${EIGEN_VERSION_FILE}) FILE(READ ${EIGEN_VERSION_FILE} EIGEN_VERSION_FILE_CONTENTS) STRING(REGEX MATCH "#define EIGEN_WORLD_VERSION [0-9]+" EIGEN_WORLD_VERSION "${EIGEN_VERSION_FILE_CONTENTS}") STRING(REGEX REPLACE "#define EIGEN_WORLD_VERSION ([0-9]+)" "\\1" EIGEN_WORLD_VERSION "${EIGEN_WORLD_VERSION}") STRING(REGEX MATCH "#define EIGEN_MAJOR_VERSION [0-9]+" EIGEN_MAJOR_VERSION "${EIGEN_VERSION_FILE_CONTENTS}") STRING(REGEX REPLACE "#define EIGEN_MAJOR_VERSION ([0-9]+)" "\\1" EIGEN_MAJOR_VERSION "${EIGEN_MAJOR_VERSION}") STRING(REGEX MATCH "#define EIGEN_MINOR_VERSION [0-9]+" EIGEN_MINOR_VERSION "${EIGEN_VERSION_FILE_CONTENTS}") STRING(REGEX REPLACE "#define EIGEN_MINOR_VERSION ([0-9]+)" "\\1" EIGEN_MINOR_VERSION "${EIGEN_MINOR_VERSION}") # This is on a single line s/t CMake does not interpret it as a list of # elements and insert ';' separators which would result in 3.;2.;0 nonsense. SET(EIGEN_VERSION "${EIGEN_WORLD_VERSION}.${EIGEN_MAJOR_VERSION}.${EIGEN_MINOR_VERSION}") ENDIF (NOT EXISTS ${EIGEN_VERSION_FILE}) ENDIF (EIGEN_INCLUDE_DIR) # Set standard CMake FindPackage variables if found. IF (EIGEN_FOUND) SET(EIGEN_INCLUDE_DIRS ${EIGEN_INCLUDE_DIR}) ENDIF (EIGEN_FOUND) # Handle REQUIRED / QUIET optional arguments and version. INCLUDE(FindPackageHandleStandardArgs) FIND_PACKAGE_HANDLE_STANDARD_ARGS(Eigen REQUIRED_VARS EIGEN_INCLUDE_DIRS VERSION_VAR EIGEN_VERSION) # Only mark internal variables as advanced if we found Eigen, otherwise # leave it visible in the standard GUI for the user to set manually. IF (EIGEN_FOUND) MARK_AS_ADVANCED(FORCE EIGEN_INCLUDE_DIR) ENDIF (EIGEN_FOUND) compute-0.5/cmake/FindOpenCL.cmake000066400000000000000000000063611263566244600170610ustar00rootroot00000000000000# - Try to find OpenCL # This module tries to find an OpenCL implementation on your system. It supports # AMD / ATI, Apple and NVIDIA implementations. # # To set the paths manually, define these environment variables: # OpenCL_INCPATH - Include path (e.g. OpenCL_INCPATH=/opt/cuda/4.0/cuda/include) # OpenCL_LIBPATH - Library path (e.h. OpenCL_LIBPATH=/usr/lib64/nvidia) # # Once done this will define # OPENCL_FOUND - system has OpenCL # OPENCL_INCLUDE_DIRS - the OpenCL include directory # OPENCL_LIBRARIES - link these to use OpenCL FIND_PACKAGE(PackageHandleStandardArgs) SET (OPENCL_VERSION_STRING "0.1.0") SET (OPENCL_VERSION_MAJOR 0) SET (OPENCL_VERSION_MINOR 1) SET (OPENCL_VERSION_PATCH 0) IF (APPLE) FIND_LIBRARY(OPENCL_LIBRARIES OpenCL DOC "OpenCL lib for OSX") FIND_PATH(OPENCL_INCLUDE_DIRS OpenCL/cl.h DOC "Include for OpenCL on OSX") FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS OpenCL/cl.hpp DOC "Include for OpenCL CPP bindings on OSX") ELSE (APPLE) IF (WIN32) # The AMD SDK currently installs both x86 and x86_64 libraries # This is only a hack to find out architecture IF( CMAKE_SIZEOF_VOID_P EQUAL 8 ) SET(OPENCL_LIB_DIR "$ENV{AMDAPPSDKROOT}/lib/x86_64") ELSE( CMAKE_SIZEOF_VOID_P EQUAL 8 ) SET(OPENCL_LIB_DIR "$ENV{AMDAPPSDKROOT}/lib/x86") ENDIF( CMAKE_SIZEOF_VOID_P EQUAL 8 ) FIND_LIBRARY(OPENCL_LIBRARIES OpenCL.lib PATHS ${OPENCL_LIB_DIR} $ENV{OpenCL_LIBPATH} $ENV{CUDA_LIB_PATH}) GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE) # On Win32 search relative to the library FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS ${_OPENCL_INC_CAND} $ENV{OpenCL_INCPATH} $ENV{CUDA_INC_PATH}) FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS ${_OPENCL_INC_CAND} $ENV{OpenCL_INCPATH} $ENV{CUDA_INC_PATH}) ELSE (WIN32) # Unix style platforms FIND_LIBRARY(OPENCL_LIBRARIES OpenCL PATHS ENV LD_LIBRARY_PATH ENV OpenCL_LIBPATH ) GET_FILENAME_COMPONENT(OPENCL_LIB_DIR ${OPENCL_LIBRARIES} PATH) GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE) # The AMD SDK currently does not place its headers # in /usr/include, therefore also search relative # to the library FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS "$ENV{CUDADIR}/include" ${_OPENCL_INC_CAND} "/usr/local/cuda/include" "/opt/cuda/include" "/opt/AMDAPP/include" ENV OpenCL_INCPATH) FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS "$ENV{CUDADIR}/include" ${_OPENCL_INC_CAND} "/usr/local/cuda/include" "/opt/cuda/include" "/opt/AMDAPP/include" ENV OpenCL_INCPATH) # for Mali GPUs, libmali must be linked FIND_LIBRARY(OPENCL_LIBRARIES_MALI mali PATHS ENV LD_LIBRARY_PATH ENV OpenCL_LIBPATH "/usr/lib64" "$ENV{CUDADIR}/lib64" ) IF (OPENCL_LIBRARIES_MALI) SET(OPENCL_LIBRARIES ${OPENCL_LIBRARIES} ${OPENCL_LIBRARIES_MALI}) ENDIF (OPENCL_LIBRARIES_MALI) ENDIF (WIN32) ENDIF (APPLE) FIND_PACKAGE_HANDLE_STANDARD_ARGS(OpenCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS) IF(_OPENCL_CPP_INCLUDE_DIRS) SET( OPENCL_HAS_CPP_BINDINGS TRUE ) LIST( APPEND OPENCL_INCLUDE_DIRS ${_OPENCL_CPP_INCLUDE_DIRS} ) # This is often the same, so clean up LIST( REMOVE_DUPLICATES OPENCL_INCLUDE_DIRS ) ENDIF(_OPENCL_CPP_INCLUDE_DIRS) MARK_AS_ADVANCED( OPENCL_INCLUDE_DIRS ) compute-0.5/cmake/FindTBB.cmake000066400000000000000000000312341263566244600163450ustar00rootroot00000000000000# Locate Intel Threading Building Blocks include paths and libraries # FindTBB.cmake can be found at https://code.google.com/p/findtbb/ # Written by Hannes Hofmann # Improvements by Gino van den Bergen , # Florian Uhlig , # Jiri Marsik # The MIT License # # Copyright (c) 2011 Hannes Hofmann # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # GvdB: This module uses the environment variable TBB_ARCH_PLATFORM which defines architecture and compiler. # e.g. "ia32/vc8" or "em64t/cc4.1.0_libc2.4_kernel2.6.16.21" # TBB_ARCH_PLATFORM is set by the build script `tbbvars[.bat|.sh|.csh], which can be found # in the TBB installation directory (TBB_INSTALL_DIR). # # GvdB: Mac OS X distribution places libraries directly in lib directory. # # For backwards compatibility, you may explicitely set the CMake variables TBB_ARCHITECTURE and TBB_COMPILER. # TBB_ARCHITECTURE [ ia32 | em64t | itanium ] # which architecture to use # TBB_COMPILER e.g. vc9 or cc3.2.3_libc2.3.2_kernel2.4.21 or cc4.0.1_os10.4.9 # which compiler to use (detected automatically on Windows) # This module respects # TBB_INSTALL_DIR or $ENV{TBB21_INSTALL_DIR} or $ENV{TBB_INSTALL_DIR} # This module defines # TBB_INCLUDE_DIRS, where to find task_scheduler_init.h, etc. # TBB_LIBRARY_DIRS, where to find libtbb, libtbbmalloc # TBB_DEBUG_LIBRARY_DIRS, where to find libtbb_debug, libtbbmalloc_debug # TBB_INSTALL_DIR, the base TBB install directory # TBB_LIBRARIES, the libraries to link against to use TBB. # TBB_DEBUG_LIBRARIES, the libraries to link against to use TBB with debug symbols. # TBB_FOUND, If false, don't try to use TBB. # TBB_INTERFACE_VERSION, as defined in tbb/tbb_stddef.h if (WIN32) # has em64t/vc8 em64t/vc9 # has ia32/vc7.1 ia32/vc8 ia32/vc9 set(_TBB_DEFAULT_INSTALL_DIR "C:/Program Files/Intel/TBB" "C:/Program Files (x86)/Intel/TBB") set(_TBB_LIB_NAME "tbb") set(_TBB_LIB_MALLOC_NAME "${_TBB_LIB_NAME}malloc") set(_TBB_LIB_DEBUG_NAME "${_TBB_LIB_NAME}_debug") set(_TBB_LIB_MALLOC_DEBUG_NAME "${_TBB_LIB_MALLOC_NAME}_debug") if (MSVC71) set (_TBB_COMPILER "vc7.1") endif(MSVC71) if (MSVC80) set(_TBB_COMPILER "vc8") endif(MSVC80) if (MSVC90) set(_TBB_COMPILER "vc9") endif(MSVC90) if(MSVC10) set(_TBB_COMPILER "vc10") endif(MSVC10) # Todo: add other Windows compilers such as ICL. set(_TBB_ARCHITECTURE ${TBB_ARCHITECTURE}) endif (WIN32) if (UNIX) if (APPLE) # MAC set(_TBB_DEFAULT_INSTALL_DIR "/Library/Frameworks/Intel_TBB.framework/Versions") # libs: libtbb.dylib, libtbbmalloc.dylib, *_debug set(_TBB_LIB_NAME "tbb") set(_TBB_LIB_MALLOC_NAME "${_TBB_LIB_NAME}malloc") set(_TBB_LIB_DEBUG_NAME "${_TBB_LIB_NAME}_debug") set(_TBB_LIB_MALLOC_DEBUG_NAME "${_TBB_LIB_MALLOC_NAME}_debug") # default flavor on apple: ia32/cc4.0.1_os10.4.9 # Jiri: There is no reason to presume there is only one flavor and # that user's setting of variables should be ignored. if(NOT TBB_COMPILER) set(_TBB_COMPILER "cc4.0.1_os10.4.9") elseif (NOT TBB_COMPILER) set(_TBB_COMPILER ${TBB_COMPILER}) endif(NOT TBB_COMPILER) if(NOT TBB_ARCHITECTURE) set(_TBB_ARCHITECTURE "ia32") elseif(NOT TBB_ARCHITECTURE) set(_TBB_ARCHITECTURE ${TBB_ARCHITECTURE}) endif(NOT TBB_ARCHITECTURE) else (APPLE) # LINUX set(_TBB_DEFAULT_INSTALL_DIR "/opt/intel/tbb" "/usr/local/include" "/usr/include") set(_TBB_LIB_NAME "tbb") set(_TBB_LIB_MALLOC_NAME "${_TBB_LIB_NAME}malloc") set(_TBB_LIB_DEBUG_NAME "${_TBB_LIB_NAME}_debug") set(_TBB_LIB_MALLOC_DEBUG_NAME "${_TBB_LIB_MALLOC_NAME}_debug") # has em64t/cc3.2.3_libc2.3.2_kernel2.4.21 em64t/cc3.3.3_libc2.3.3_kernel2.6.5 em64t/cc3.4.3_libc2.3.4_kernel2.6.9 em64t/cc4.1.0_libc2.4_kernel2.6.16.21 # has ia32/* # has itanium/* set(_TBB_COMPILER ${TBB_COMPILER}) set(_TBB_ARCHITECTURE ${TBB_ARCHITECTURE}) endif (APPLE) endif (UNIX) if (CMAKE_SYSTEM MATCHES "SunOS.*") # SUN # not yet supported # has em64t/cc3.4.3_kernel5.10 # has ia32/* endif (CMAKE_SYSTEM MATCHES "SunOS.*") #-- Clear the public variables set (TBB_FOUND "NO") #-- Find TBB install dir and set ${_TBB_INSTALL_DIR} and cached ${TBB_INSTALL_DIR} # first: use CMake variable TBB_INSTALL_DIR if (TBB_INSTALL_DIR) set (_TBB_INSTALL_DIR ${TBB_INSTALL_DIR}) endif (TBB_INSTALL_DIR) # second: use environment variable if (NOT _TBB_INSTALL_DIR) if (NOT "$ENV{TBB_INSTALL_DIR}" STREQUAL "") set (_TBB_INSTALL_DIR $ENV{TBB_INSTALL_DIR}) endif (NOT "$ENV{TBB_INSTALL_DIR}" STREQUAL "") # Intel recommends setting TBB21_INSTALL_DIR if (NOT "$ENV{TBB21_INSTALL_DIR}" STREQUAL "") set (_TBB_INSTALL_DIR $ENV{TBB21_INSTALL_DIR}) endif (NOT "$ENV{TBB21_INSTALL_DIR}" STREQUAL "") if (NOT "$ENV{TBB22_INSTALL_DIR}" STREQUAL "") set (_TBB_INSTALL_DIR $ENV{TBB22_INSTALL_DIR}) endif (NOT "$ENV{TBB22_INSTALL_DIR}" STREQUAL "") if (NOT "$ENV{TBB30_INSTALL_DIR}" STREQUAL "") set (_TBB_INSTALL_DIR $ENV{TBB30_INSTALL_DIR}) endif (NOT "$ENV{TBB30_INSTALL_DIR}" STREQUAL "") endif (NOT _TBB_INSTALL_DIR) # third: try to find path automatically if (NOT _TBB_INSTALL_DIR) if (_TBB_DEFAULT_INSTALL_DIR) set (_TBB_INSTALL_DIR ${_TBB_DEFAULT_INSTALL_DIR}) endif (_TBB_DEFAULT_INSTALL_DIR) endif (NOT _TBB_INSTALL_DIR) # sanity check if (NOT _TBB_INSTALL_DIR) message ("ERROR: Unable to find Intel TBB install directory. ${_TBB_INSTALL_DIR}") else (NOT _TBB_INSTALL_DIR) # finally: set the cached CMake variable TBB_INSTALL_DIR if (NOT TBB_INSTALL_DIR) set (TBB_INSTALL_DIR ${_TBB_INSTALL_DIR} CACHE PATH "Intel TBB install directory") mark_as_advanced(TBB_INSTALL_DIR) endif (NOT TBB_INSTALL_DIR) #-- A macro to rewrite the paths of the library. This is necessary, because # find_library() always found the em64t/vc9 version of the TBB libs macro(TBB_CORRECT_LIB_DIR var_name) # if (NOT "${_TBB_ARCHITECTURE}" STREQUAL "em64t") string(REPLACE em64t "${_TBB_ARCHITECTURE}" ${var_name} ${${var_name}}) # endif (NOT "${_TBB_ARCHITECTURE}" STREQUAL "em64t") string(REPLACE ia32 "${_TBB_ARCHITECTURE}" ${var_name} ${${var_name}}) string(REPLACE vc7.1 "${_TBB_COMPILER}" ${var_name} ${${var_name}}) string(REPLACE vc8 "${_TBB_COMPILER}" ${var_name} ${${var_name}}) string(REPLACE vc9 "${_TBB_COMPILER}" ${var_name} ${${var_name}}) string(REPLACE vc10 "${_TBB_COMPILER}" ${var_name} ${${var_name}}) endmacro(TBB_CORRECT_LIB_DIR var_content) #-- Look for include directory and set ${TBB_INCLUDE_DIR} set (TBB_INC_SEARCH_DIR ${_TBB_INSTALL_DIR}/include) # Jiri: tbbvars now sets the CPATH environment variable to the directory # containing the headers. find_path(TBB_INCLUDE_DIR tbb/task_scheduler_init.h PATHS ${TBB_INC_SEARCH_DIR} ENV CPATH ) mark_as_advanced(TBB_INCLUDE_DIR) #-- Look for libraries # GvdB: $ENV{TBB_ARCH_PLATFORM} is set by the build script tbbvars[.bat|.sh|.csh] if (NOT $ENV{TBB_ARCH_PLATFORM} STREQUAL "") set (_TBB_LIBRARY_DIR ${_TBB_INSTALL_DIR}/lib/$ENV{TBB_ARCH_PLATFORM} ${_TBB_INSTALL_DIR}/$ENV{TBB_ARCH_PLATFORM}/lib ) endif (NOT $ENV{TBB_ARCH_PLATFORM} STREQUAL "") # Jiri: This block isn't mutually exclusive with the previous one # (hence no else), instead I test if the user really specified # the variables in question. if ((NOT ${TBB_ARCHITECTURE} STREQUAL "") AND (NOT ${TBB_COMPILER} STREQUAL "")) # HH: deprecated message(STATUS "[Warning] FindTBB.cmake: The use of TBB_ARCHITECTURE and TBB_COMPILER is deprecated and may not be supported in future versions. Please set \$ENV{TBB_ARCH_PLATFORM} (using tbbvars.[bat|csh|sh]).") # Jiri: It doesn't hurt to look in more places, so I store the hints from # ENV{TBB_ARCH_PLATFORM} and the TBB_ARCHITECTURE and TBB_COMPILER # variables and search them both. set (_TBB_LIBRARY_DIR "${_TBB_INSTALL_DIR}/${_TBB_ARCHITECTURE}/${_TBB_COMPILER}/lib" ${_TBB_LIBRARY_DIR}) endif ((NOT ${TBB_ARCHITECTURE} STREQUAL "") AND (NOT ${TBB_COMPILER} STREQUAL "")) # GvdB: Mac OS X distribution places libraries directly in lib directory. list(APPEND _TBB_LIBRARY_DIR ${_TBB_INSTALL_DIR}/lib) # Jiri: No reason not to check the default paths. From recent versions, # tbbvars has started exporting the LIBRARY_PATH and LD_LIBRARY_PATH # variables, which now point to the directories of the lib files. # It all makes more sense to use the ${_TBB_LIBRARY_DIR} as a HINTS # argument instead of the implicit PATHS as it isn't hard-coded # but computed by system introspection. Searching the LIBRARY_PATH # and LD_LIBRARY_PATH environment variables is now even more important # that tbbvars doesn't export TBB_ARCH_PLATFORM and it facilitates # the use of TBB built from sources. find_library(TBB_LIBRARY ${_TBB_LIB_NAME} HINTS ${_TBB_LIBRARY_DIR} PATHS ENV LIBRARY_PATH ENV LD_LIBRARY_PATH) find_library(TBB_MALLOC_LIBRARY ${_TBB_LIB_MALLOC_NAME} HINTS ${_TBB_LIBRARY_DIR} PATHS ENV LIBRARY_PATH ENV LD_LIBRARY_PATH) #Extract path from TBB_LIBRARY name get_filename_component(TBB_LIBRARY_DIR ${TBB_LIBRARY} PATH) #TBB_CORRECT_LIB_DIR(TBB_LIBRARY) #TBB_CORRECT_LIB_DIR(TBB_MALLOC_LIBRARY) mark_as_advanced(TBB_LIBRARY TBB_MALLOC_LIBRARY) #-- Look for debug libraries # Jiri: Changed the same way as for the release libraries. find_library(TBB_LIBRARY_DEBUG ${_TBB_LIB_DEBUG_NAME} HINTS ${_TBB_LIBRARY_DIR} PATHS ENV LIBRARY_PATH ENV LD_LIBRARY_PATH) find_library(TBB_MALLOC_LIBRARY_DEBUG ${_TBB_LIB_MALLOC_DEBUG_NAME} HINTS ${_TBB_LIBRARY_DIR} PATHS ENV LIBRARY_PATH ENV LD_LIBRARY_PATH) # Jiri: Self-built TBB stores the debug libraries in a separate directory. # Extract path from TBB_LIBRARY_DEBUG name get_filename_component(TBB_LIBRARY_DEBUG_DIR ${TBB_LIBRARY_DEBUG} PATH) #TBB_CORRECT_LIB_DIR(TBB_LIBRARY_DEBUG) #TBB_CORRECT_LIB_DIR(TBB_MALLOC_LIBRARY_DEBUG) mark_as_advanced(TBB_LIBRARY_DEBUG TBB_MALLOC_LIBRARY_DEBUG) if (TBB_INCLUDE_DIR) if (TBB_LIBRARY) set (TBB_FOUND "YES") set (TBB_LIBRARIES ${TBB_LIBRARY} ${TBB_MALLOC_LIBRARY} ${TBB_LIBRARIES}) set (TBB_DEBUG_LIBRARIES ${TBB_LIBRARY_DEBUG} ${TBB_MALLOC_LIBRARY_DEBUG} ${TBB_DEBUG_LIBRARIES}) set (TBB_INCLUDE_DIRS ${TBB_INCLUDE_DIR} CACHE PATH "TBB include directory" FORCE) set (TBB_LIBRARY_DIRS ${TBB_LIBRARY_DIR} CACHE PATH "TBB library directory" FORCE) # Jiri: Self-built TBB stores the debug libraries in a separate directory. set (TBB_DEBUG_LIBRARY_DIRS ${TBB_LIBRARY_DEBUG_DIR} CACHE PATH "TBB debug library directory" FORCE) mark_as_advanced(TBB_INCLUDE_DIRS TBB_LIBRARY_DIRS TBB_DEBUG_LIBRARY_DIRS TBB_LIBRARIES TBB_DEBUG_LIBRARIES) message(STATUS "Found Intel TBB") endif (TBB_LIBRARY) endif (TBB_INCLUDE_DIR) if (NOT TBB_FOUND) message("ERROR: Intel TBB NOT found!") message(STATUS "Looked for Threading Building Blocks in ${_TBB_INSTALL_DIR}") # do only throw fatal, if this pkg is REQUIRED if (TBB_FIND_REQUIRED) message(FATAL_ERROR "Could NOT find TBB library. On ubuntu try apt-get install libtbb-dev ") endif (TBB_FIND_REQUIRED) endif (NOT TBB_FOUND) endif (NOT _TBB_INSTALL_DIR) if (TBB_FOUND) set(TBB_INTERFACE_VERSION 0) FILE(READ "${TBB_INCLUDE_DIRS}/tbb/tbb_stddef.h" _TBB_VERSION_CONTENTS) STRING(REGEX REPLACE ".*#define TBB_INTERFACE_VERSION ([0-9]+).*" "\\1" TBB_INTERFACE_VERSION "${_TBB_VERSION_CONTENTS}") set(TBB_INTERFACE_VERSION "${TBB_INTERFACE_VERSION}") endif (TBB_FOUND) compute-0.5/doc/000077500000000000000000000000001263566244600136155ustar00rootroot00000000000000compute-0.5/doc/Jamfile.v2000066400000000000000000000075311263566244600154430ustar00rootroot00000000000000# --------------------------------------------------------------------------- # Copyright (c) 2013 Kyle Lutz # # Distributed under the Boost Software License, Version 1.0 # See accompanying file LICENSE_1_0.txt or copy at # http://www.boost.org/LICENSE_1_0.txt # # --------------------------------------------------------------------------- import quickbook ; import boostbook ; import doxygen ; import modules ; doxygen autodoc : ../include/boost/compute.hpp ../include/boost/compute/buffer.hpp ../include/boost/compute/cl.hpp ../include/boost/compute/closure.hpp ../include/boost/compute/command_queue.hpp ../include/boost/compute/config.hpp ../include/boost/compute/context.hpp ../include/boost/compute/core.hpp ../include/boost/compute/device.hpp ../include/boost/compute/event.hpp ../include/boost/compute/function.hpp ../include/boost/compute/kernel.hpp ../include/boost/compute/memory_object.hpp ../include/boost/compute/pipe.hpp ../include/boost/compute/platform.hpp ../include/boost/compute/program.hpp ../include/boost/compute/svm.hpp ../include/boost/compute/system.hpp ../include/boost/compute/user_event.hpp ../include/boost/compute/version.hpp ../include/boost/compute/algorithm.hpp [ glob ../include/boost/compute/algorithm/*.hpp ] ../include/boost/compute/allocator.hpp [ glob ../include/boost/compute/allocator/*.hpp ] ../include/boost/compute/async.hpp [ glob ../include/boost/compute/async/*.hpp ] ../include/boost/compute/container.hpp [ glob ../include/boost/compute/container/*.hpp ] ../include/boost/compute/exception.hpp [ glob ../include/boost/compute/exception/*.hpp ] ../include/boost/compute/functional.hpp [ glob ../include/boost/compute/functional/*.hpp ] ../include/boost/compute/interop/opengl.hpp [ glob ../include/boost/compute/interop/opengl/*.hpp ] ../include/boost/compute/image.hpp [ glob ../include/boost/compute/image/*.hpp ] ../include/boost/compute/iterator.hpp [ glob ../include/boost/compute/iterator/*.hpp ] ../include/boost/compute/memory.hpp [ glob ../include/boost/compute/memory/*.hpp ] ../include/boost/compute/random.hpp [ glob ../include/boost/compute/random/*.hpp ] ../include/boost/compute/types.hpp ../include/boost/compute/types/struct.hpp ../include/boost/compute/type_traits.hpp [ glob ../include/boost/compute/type_traits/*.hpp ] ../include/boost/compute/utility.hpp [ glob ../include/boost/compute/utility/*.hpp ] : QUIET=YES WARNINGS=YES WARN_IF_DOC_ERROR=YES EXTRACT_ALL=NO EXTRACT_PRIVATE=NO HIDE_UNDOC_MEMBERS=YES HIDE_UNDOC_CLASSES=YES EXPAND_ONLY_PREDEF=YES SORT_MEMBER_DOCS=NO TAB_SIZE=4 EXAMPLE_PATH=../ PREDEFINED=BOOST_COMPUTE_DOXYGEN_INVOKED "ALIASES= \\ internal_=\"INTERNAL ONLY\" \\ opencl_ref{1}=\"\\1()\" \\ see_opencl_ref{1}=\"See the documentation for \\opencl_ref{\\1} for more information.\" \\ opencl2_ref{1}=\"\\1()\" \\ see_opencl2_ref{1}=\"See the documentation for \\opencl2_ref{\\1} for more information.\" \\ opencl_version_warning{2}=\"\\warning This method is only available if the OpenCL version is \\1.\\2 or later.\" \\ " "boost.doxygen.reftitle=Header Reference" ; xml compute : compute.qbk : ; boostbook standalone : compute : html.stylesheet=boostbook.css autodoc ; compute-0.5/doc/advanced_topics.qbk000066400000000000000000000144441263566244600174510ustar00rootroot00000000000000[/=========================================================================== Copyright (c) 2013-2015 Kyle Lutz Distributed under the Boost Software License, Version 1.0 See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt =============================================================================/] [section Advanced Topics] The following topics show advanced features of the Boost Compute library. [section Vector Data Types] In addition to the built-in scalar types (e.g. `int` and `float`), OpenCL also provides vector data types (e.g. `int2` and `vector4`). These can be used with the Boost Compute library on both the host and device. Boost.Compute provides typedefs for these types which take the form: `boost::compute::scalarN_` where `scalar` is a scalar data type (e.g. `int`, `float`, `char`) and `N` is the size of the vector. Supported vector sizes are: 2, 4, 8, and 16. The following example shows how to transfer a set of 3D points stored as an array of `float`s on the host the device and then calculate the sum of the point coordinates using the [funcref boost::compute::accumulate accumulate()] function. The sum is transferred to the host and the centroid computed by dividing by the total number of points. Note that even though the points are in 3D, they are stored as `float4` due to OpenCL's alignment requirements. [import ../example/point_centroid.cpp] [point_centroid_example] [endsect] [/ vector data types] [section Custom Functions] The OpenCL runtime and the Boost Compute library provide a number of built-in functions such as sqrt() and dot() but many times these are not sufficient for solving the problem at hand. The Boost Compute library provides a few different ways to create custom functions that can be passed to the provided algorithms such as [funcref boost::compute::transform transform()] and [funcref boost::compute::reduce reduce()]. The most basic method is to provide the raw source code for a function: `` boost::compute::function add_four = boost::compute::make_function_from_source( "add_four", "int add_four(int x) { return x + 4; }" ); boost::compute::transform(input.begin(), input.end(), output.begin(), add_four, queue); `` This can also be done more succinctly using the [macroref BOOST_COMPUTE_FUNCTION BOOST_COMPUTE_FUNCTION()] macro: `` BOOST_COMPUTE_FUNCTION(int, add_four, (int x), { return x + 4; }); boost::compute::transform(input.begin(), input.end(), output.begin(), add_four, queue); `` Also see [@http://kylelutz.blogspot.com/2014/03/custom-opencl-functions-in-c-with.html "Custom OpenCL functions in C++ with Boost.Compute"] for more details. [endsect] [/ custom functions] [section Custom Types] Boost.Compute provides the [macroref BOOST_COMPUTE_ADAPT_STRUCT BOOST_COMPUTE_ADAPT_STRUCT()] macro which allows a C++ struct/class to be wrapped and used in OpenCL. [endsect] [/ custom types] [section Complex Values] While OpenCL itself doesn't natively support complex data types, the Boost Compute library provides them. To use complex values first include the following header: `` #include `` A vector of complex values can be created like so: `` // create vector on device boost::compute::vector > vector; // insert two complex values vector.push_back(std::complex(1.0f, 3.0f)); vector.push_back(std::complex(2.0f, 4.0f)); `` [endsect] [/ complex values] [section Lambda Expressions] The lambda expression framework allows for functions and predicates to be defined at the call-site of an algorithm. Lambda expressions use the placeholders `_1` and `_2` to indicate the arguments. The following declarations will bring the lambda placeholders into the current scope: `` using boost::compute::lambda::_1; using boost::compute::lambda::_2; `` The following examples show how to use lambda expressions along with the Boost.Compute algorithms to perform more complex operations on the device. To count the number of odd values in a vector: `` boost::compute::count_if(vector.begin(), vector.end(), _1 % 2 == 1, queue); `` To multiply each value in a vector by three and subtract four: `` boost::compute::transform(vector.begin(), vector.end(), vector.begin(), _1 * 3 - 4, queue); `` Lambda expressions can also be used to create function<> objects: `` boost::compute::function add_four = _1 + 4; `` [endsect] [/ lambda expressions] [section Asynchronous Operations] A major performance bottleneck in GPGPU applications is memory transfer. This can be alleviated by overlapping memory transfer with computation. The Boost Compute library provides the [funcref boost::compute::copy_async copy_async()] function which performs an asynchronous memory transfers between the host and the device. For example, to initiate a copy from the host to the device and then perform other actions: `` // data on the host std::vector host_vector = ... // create a vector on the device boost::compute::vector device_vector(host_vector.size(), context); // copy data to the device asynchronously boost::compute::future f = boost::compute::copy_async( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); // perform other work on the host or device // ... // ensure the copy is completed f.wait(); // use data on the device (e.g. sort) boost::compute::sort(device_vector.begin(), device_vector.end(), queue); `` [endsect] [/ asynchronous operations] [section Performance Timing] For example, to measure the time to copy a vector of data from the host to the device: [import ../example/time_copy.cpp] [time_copy_example] [endsect] [section OpenCL API Interoperability] The Boost Compute library is designed to easily interoperate with the OpenCL API. All of the wrapped classes have conversion operators to their underlying OpenCL types which allows them to be passed directly to the OpenCL functions. For example, `` // create context object boost::compute::context ctx = boost::compute::default_context(); // query number of devices using the OpenCL API cl_uint num_devices; clGetContextInfo(ctx, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &num_devices, 0); std::cout << "num_devices: " << num_devices << std::endl; `` [endsect] [/ opencl api interoperability] [endsect] [/ advanced topics] compute-0.5/doc/compute.qbk000066400000000000000000000020571263566244600157740ustar00rootroot00000000000000[/=========================================================================== Copyright (c) 2013-2015 Kyle Lutz Distributed under the Boost Software License, Version 1.0 See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt =============================================================================/] [library Boost.Compute [quickbook 1.5] [authors [Lutz, Kyle]] [copyright 2013-2014 Kyle Lutz] [purpose A multi-core/GPGPU computing library] [category concurrent] [id compute] [dirname compute] [license Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at [@http://www.boost.org/LICENSE_1_0.txt]) ] ] [include introduction.qbk] [include getting_started.qbk] [include design.qbk] [include tutorial.qbk] [include advanced_topics.qbk] [include interop.qbk] [include porting_guide.qbk] [include platforms_and_compilers.qbk] [include reference.qbk] [include performance.qbk] [include faq.qbk] compute-0.5/doc/design.qbk000066400000000000000000000053411263566244600155700ustar00rootroot00000000000000[/=========================================================================== Copyright (c) 2013-2015 Kyle Lutz Distributed under the Boost Software License, Version 1.0 See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt =============================================================================/] [section:design Design] [section Library Architecture] The Boost Compute library consists of several different components. The core layer provides a "thin" C++ wrapper over the OpenCL API. This includes classes to manage OpenCL objects such as [classref boost::compute::device device]'s, [classref boost::compute::device kernel]'s and [classref boost::compute::device command_queue]'s. On top of the core layer is a partial implementation of the C++ standard library providing common containers (e.g. [classref boost::compute::vector vector], [classref boost::compute::array array]) along with common algorithms (e.g. [funcref boost::compute::transform transform()] and [funcref boost::compute::sort sort()]). The library also provides a number of "fancy" iterators (e.g. [classref boost::compute::transform_iterator transform_iterator] and [classref boost::compute::permutation_iterator permutation_iterator]) which enhance the functionality of the standard algorithms. Boost.Compute also supplies a number of facilities for interoperation with other C and C++ libraries. See the section on [link boost_compute.interop interoperability] for more information. See the [link boost_compute.reference.api_overview API Overview] section for a full list of functions, classes, and macros provided by Boost.Compute. [endsect] [/ library architecture] [section Why OpenCL] Boost.Compute uses [@http://en.wikipedia.org/wiki/OpenCL OpenCL] as its interface for executing code on parallel devices such as GPUs and multi-core CPUs. OpenCL was chosen for a number of reasons: * Vendor-neutral, standard C/C++, and doesn't require a special compiler, non-standard pragmas, or compiler extensions. * It is not just another parallel-library abstraction layer, it provides direct access to the underlying hardware. * Its runtime compilation model allows for kernels to be optimized and tuned dynamically for the device present when the application is run rather that the device that was present when the code was compiled (which is often a separate machine). * Using OpenCL allows Boost.Compute to directly interoperate with other OpenCL libraries (such as VexCL and OpenCV), as well as existing code written with OpenCL. * The "thin" C++ wrapper provided by Boost.Compute allows the user to break-out and write their own custom kernels when the provided APIs are not suitable. [endsect] [/ why opencl] [endsect] compute-0.5/doc/faq.qbk000066400000000000000000000161561263566244600150740ustar00rootroot00000000000000[/=========================================================================== Copyright (c) 2013-2015 Kyle Lutz Distributed under the Boost Software License, Version 1.0 See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt =============================================================================/] [section:faq Frequently Asked Questions] [h3 How do I report a bug, issue, or feature request?] Please submit an issue on the GitHub issue tracker at [@https://github.com/boostorg/compute/issues]. [h3 Where can I find more documentation?] * The main documentation is here: [@http://boostorg.github.io/compute/] * The README is here: [@https://github.com/boostorg/compute/blob/master/README.md] * The wiki is here: [@https://github.com/boostorg/compute/wiki] * The contributor guide is here: [@https://github.com/boostorg/compute/blob/master/CONTRIBUTING.md] * The reference is here: [@http://boostorg.github.io/compute/compute/reference.html] [h3 Where is the best place to ask questions about the library?] The mailing list at [@https://groups.google.com/forum/#!forum/boost-compute]. [h3 What compute devices (e.g. GPUs) are supported?] Any device which implements the OpenCL standard is supported. This includes GPUs from NVIDIA, AMD, and Intel as well as CPUs from AMD and Intel and other accelerator cards such as the Xeon Phi. [h3 Can you compare Boost.Compute to other GPGPU libraries such as Thrust, Bolt and VexCL?] Thrust implements a C++ STL-like API for GPUs and CPUs. It is built with multiple backends. NVIDIA GPUs use the CUDA backend and multi-core CPUs can use the Intel TBB or OpenMP backends. However, thrust will not work with AMD graphics cards or other lesser-known accelerators. I feel Boost.Compute is superior in that it uses the vendor-neutral OpenCL library to achieve portability across all types of compute devices. Bolt is an AMD specific C++ wrapper around the OpenCL API which extends the C99-based OpenCL language to support C++ features (most notably templates). It is similar to NVIDIA's Thrust library and shares the same failure, lack of portability. VexCL is an expression-template based linear-algebra library for OpenCL. The aims and scope are a bit different from the Boost Compute library. VexCL is closer in nature to the Eigen library while Boost.Compute is closer to the C++ standard library. I don't feel that Boost.Compute really fills the same role as VexCL and in fact VexCL could be built on top of Boost.Compute. Also see this StackOverflow question: [@http://stackoverflow.com/questions/20154179/differences-between-vexcl-thrust-and-boost-compute] [h3 Why not write just write a new OpenCL back-end for Thrust?] It would not be possible to provide the same API that Thrust expects for OpenCL. The fundamental reason is that functions/functors passed to Thrust algorithms are actual compiled C++ functions whereas for Boost.Compute these form expression objects which are then translated into C99 code which is then compiled for OpenCL. [h3 Why not target CUDA and/or support multiple back-ends?] CUDA and OpenCL are two very different technologies. OpenCL works by compiling C99 code at run-time to generate kernel objects which can then be executed on the GPU. CUDA, on the other hand, works by compiling its kernels using a special compiler (nvcc) which then produces binaries which can executed on the GPU. OpenCL already has multiple implementations which allow it to be used on a variety of platforms (e.g. NVIDIA GPUs, Intel CPUs, etc.). I feel that adding another abstraction level within Boost.Compute would only complicate and bloat the library. [h3 Is it possible to use ordinary C++ functions/functors or C++11 lambdas with Boost.Compute?] Unfortunately no. OpenCL relies on having C99 source code available at run-time in order to execute code on the GPU. Thus compiled C++ functions or C++11 lambdas cannot simply be passed to the OpenCL environment to be executed on the GPU. This is the reason why I wrote the Boost.Compute lambda library. Basically it takes C++ lambda expressions (e.g. _1 * sqrt(_1) + 4) and transforms them into C99 source code fragments (e.g. “input[i] * sqrt(input[i]) + 4)”) which are then passed to the Boost.Compute STL-style algorithms for execution. While not perfect, it allows the user to write code closer to C++ that still can be executed through OpenCL. Also check out the BOOST_COMPUTE_FUNCTION() macro which allows OpenCL functions to be defined inline with C++ code. An example can be found in the monte_carlo example code. [h3 What is the command_queue argument that appears in all of the algorithms?] Command queues specify the context and device for the algorithm's execution. For all of the standard algorithms the command_queue parameter is optional. If not provided, a default command_queue will be created for the default GPU device and the algorithm will be executed there. [h3 How can I print out the contents of a buffer/vector on the GPU?] This can be accompilshed easily using the generic boost::compute::copy() algorithm along with std::ostream_iterator. For example: [import ../example/print_vector.cpp] [print_vector_example] [h3 Does Boost.Compute support zero-copy memory?] Zero-copy memory allows OpenCL kernels to directly operate on regions of host memory (if supported by the platform). Boost.Compute supports zero-copy memory in multiple ways. The low-level interface is provided by allocating [classref boost::compute::buffer buffer] objects with the `CL_MEM_USE_HOST_PTR` flag. The high-level interface is provided by the [classref boost::compute::mapped_view mapped_view] class which provides a std::vector-like interface to a region of host-memory and can be used directly with all of the Boost.Compute algorithms. [h3 Is Boost.Compute thread-safe?] The low-level Boost.Compute APIs offer the same thread-safety guarantees as the underyling OpenCL library implementation. However, the high-level APIs make use of a few global static objects for features such as automatic program caching which makes them not thread-safe by default. To compile Boost.Compute in thread-safe mode define `BOOST_COMPUTE_THREAD_SAFE` before including any of the Boost.Compute headers. By default this will require linking your application/library with the Boost.Thread library. [h3 What applications/libraries use Boost.Compute?] Boost.Compute is used by a number of open-source libraries and applications including: * ArrayFire ([@http://arrayfire.com]) * Ceemple ([@http://www.ceemple.com]) * Odeint ([@http://headmyshoulder.github.io/odeint-v2]) * VexCL ([@https://github.com/ddemidov/vexcl]) If you use Boost.Compute in your project and would like it to be listed here please send an email to Kyle Lutz (kyle.r.lutz@gmail.com). [h3 How can I contribute?] We are actively seeking additional C++ developers with experience in GPGPU and parallel-computing. Please send an email to Kyle Lutz (kyle.r.lutz@gmail.com) for more information. Also see the [@https://github.com/boostorg/compute/blob/master/CONTRIBUTING.md contributor guide] and check out the list of issues at: [@https://github.com/boostorg/compute/issues]. [endsect] [/ faq ] compute-0.5/doc/getting_started.qbk000066400000000000000000000071271263566244600175120ustar00rootroot00000000000000[/=========================================================================== Copyright (c) 2013-2015 Kyle Lutz Distributed under the Boost Software License, Version 1.0 See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt =============================================================================/] [section:getting_started Getting Started] [section Downloading] Boost.Compute is not yet an offical Boost library and therefore is not packaged with the standard Boost distribution. To download the library use the following command: [endsect] `` git clone git://github.com/boostorg/compute.git `` Ubuntu users can install Boost.Compute from the PPA: `` # add the boost.compute ppa sudo add-apt-repository ppa:kylelutz/compute # update and install boost-compute sudo apt-get update && sudo apt-get install boost-compute `` [section Compilation and Usage] Boost.Compute is a header-only library, so no linking is required. To use the library just add the include directory to the compilation flags and link with the system's OpenCL library. For example, with GCC: `` g++ -I/path/to/compute/include main.cpp -lOpenCL `` All of the Boost.Compute headers can be included with the following directive: `` #include `` If you only want to include the core OpenCL wrapper headers (which have minimal dependencies on the rest of Boost), use the following directive: `` #include `` All of the classes and functions in Boost.Compute live in the `boost::compute` namespace and can be brought into global scope with: `` using namespace boost::compute; `` [endsect] [section Configuration Macros] Boost.Compute provides a number of optional features which can be configured with the following macros. [table [[Macro] [Description]] [ [[^BOOST_COMPUTE_DEBUG_KERNEL_COMPILATION]][ When defined, if program::build() fails, the program source and build log will be written to stdout. ] ] [ [[^BOOST_COMPUTE_HAVE_THREAD_LOCAL]][ Enables the use of C++11 [^thread_local] storage specifier. ] ] [ [[^BOOST_COMPUTE_THREAD_SAFE]][ Builds Boost.Compute in a thread-safe mode. This requires either support for C++11 thread-local storage (via defining the [^BOOST_COMPUTE_HAVE_THREAD_LOCAL] macro) or linking with Boost.Thread. ] ] [ [[^BOOST_COMPUTE_USE_OFFLINE_CACHE]][ Enables the offline-cache which stores compiled binaries on disk. This option requires linking with Boost.Filesystem and Boost.System. ] ] ] [endsect] [section Installation] Boost.Compute can also be installed system-wide. After compiling, run the following command to install the Boost.Compute headers. By default, they will be installed under `/usr/local/include`. [pre make install ] After installing, Boost.Compute can be used through cmake by adding the following to your `CMakeLists.txt`: [pre find_package(BoostCompute REQUIRED) include_directories(${BoostCompute_INCLUDE_DIRS}) ] Ensure you also find and setup the OpenCL libraries for your system. [endsect] [/ installation] [section Support] Bugs and issues can be reported to the [@https://github.com/boostorg/compute/issues?state=open issue tracker]. There is also a mailing list for users and developers at [@https://groups.google.com/forum/#!forum/boost-compute]. Look through the [link boost_compute.faq FAQ] to see if you're encountering a known or common issue. [endsect] [/ support] [endsect] compute-0.5/doc/interop.qbk000066400000000000000000000041461263566244600160010ustar00rootroot00000000000000[/=========================================================================== Copyright (c) 2013-2015 Kyle Lutz Distributed under the Boost Software License, Version 1.0 See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt =============================================================================/] [section:interop Interoperability] Boost.Compute provides a number of facilities to ease interoperability with other C and C++ libraries including [@http://en.wikipedia.org/wiki/OpenGL OpenGL], [@http://opencv.org/ OpenCV], [@http://eigen.tuxfamily.org Eigen], [@http://qt-project.org/ Qt], and [@http://vtk.org/ VTK]. All of the interop headers are located in the `` directory. [section OpenGL] OpenGL is a standard API for 3D graphics and closely related to OpenCL. Boost.Compute provides an easy way to create a shared OpenCL/OpenGL context via the [funcref boost::compute::opengl_create_shared_context opengl_create_shared_context()] function. This creates a Boost.Compute [classref boost::compute::context context] object which can be used to create OpenCL memory buffers corresponding to OpenGL memory objects (such as VBOs). This allows for OpenCL kernels to read and write data which is then used directly for rendering with OpenGL. Boost.Compute provides a few classes to manage OpenGL objects and make them accessible to OpenCL. These include [classref boost::compute::opengl_buffer opengl_buffer] (for OpenGL memory objects) and [classref boost::compute::opengl_texture opengl_texture] (for OpenGL texture objects). Control of shared OpenCL/OpenGL memory objects is transfered between OpenCL and OpenGL using the [funcref boost::compute::opengl_enqueue_acquire_gl_objects opengl_enqueue_acquire_gl_objects()] and [funcref boost::compute::opengl_enqueue_release_gl_objects opengl_enqueue_release_gl_objects()] functions. See the OpenCL documentation for the [@https://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/cl_khr_gl_sharing.html cl_khr_gl_sharing] extension for more information. [endsect] [/ opengl] [endsect] [/ interop ] compute-0.5/doc/introduction.qbk000066400000000000000000000014451263566244600170410ustar00rootroot00000000000000[/=========================================================================== Copyright (c) 2013-2015 Kyle Lutz Distributed under the Boost Software License, Version 1.0 See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt =============================================================================/] [section:introduction Introduction] The Boost Compute library provides a C++ interface to multi-core CPU and GPGPU computing platforms based on [@http://en.wikipedia.org/wiki/OpenCL OpenCL]. The project is hosted on GitHub at [@https://github.com/boostorg/compute]. Click the arrow below to see the guide on [link boost_compute.getting_started Getting Started]. [warning Boost.Compute is not (yet) an official Boost library] [endsect] compute-0.5/doc/performance.qbk000066400000000000000000000032751263566244600166240ustar00rootroot00000000000000[/=========================================================================== Copyright (c) 2013-2015 Kyle Lutz Distributed under the Boost Software License, Version 1.0 See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt =============================================================================/] [section:performance Performance] The following tests were run with an NVIDIA Tesla K40c GPU on a system with an Intel Core i7 920 2.67GHz CPU. Source code for the benchmarks can be found under the [@https://github.com/boostorg/compute/tree/master/perf perf] directory. All benchmarks were compiled with optimizations enabled (i.e. "gcc -O3"). [h3 Accumulate] [$images/perf/accumulate_time_plot.png [width 850px] [align center]] [h3 Count] [$images/perf/count_time_plot.png [width 850px] [align center]] [h3 Inner Product] [$images/perf/inner_product_time_plot.png [width 850px] [align center]] [h3 Merge] [$images/perf/merge_time_plot.png [width 850px] [align center]] [h3 Partial Sum] [$images/perf/partial_sum_time_plot.png [width 850px] [align center]] [h3 Partition] [$images/perf/partition_time_plot.png [width 850px] [align center]] [h3 Reverse] [$images/perf/reverse_time_plot.png [width 850px] [align center]] [h3 Rotate] [$images/perf/rotate_time_plot.png [width 850px] [align center]] [h3 Set Difference] [$images/perf/set_difference_time_plot.png [width 850px] [align center]] [h3 Sort] [$images/perf/sort_time_plot.png [width 850px] [align center]] [h3 Transform] [$images/perf/saxpy_time_plot.png [width 850px] [align center]] [h3 Unique] [$images/perf/unique_time_plot.png [width 850px] [align center]] [endsect] [/ performance] compute-0.5/doc/platforms_and_compilers.qbk000066400000000000000000000022451263566244600212250ustar00rootroot00000000000000[/=========================================================================== Copyright (c) 2013-2015 Kyle Lutz Distributed under the Boost Software License, Version 1.0 See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt =============================================================================/] [section:platforms_and_compilers Platforms and Compilers] Boost.Compute has been tested on the following: [section Compilers] * GCC (>= 4.6) * Clang (>= 3.0) * MSVC (>= 2010) Boost.Compute requires variadic macro support. Variadic macros are supported by most modern C++98 compilers and all C++11 compilers. To explicitly enable variadic support for Boost.Preprocessor add `-DBOOST_PP_VARIADICS=1` to your compiler flags. [endsect] [section Platforms] * Linux * Mac OSX * Windows [endsect] [section OpenCL Implementations] * NVIDIA (NVIDIA GPUs only) * AMD (CPUs and AMD GPUs) * Intel (CPUs only) * POCL (CPUs only) * Beignet (Intel GPUs only) [endsect] If you have an additional platform you would like to see supported please [@https://github.com/boostorg/compute/issues/new submit] a bug-report. [endsect] compute-0.5/doc/porting_guide.qbk000066400000000000000000000161371263566244600171630ustar00rootroot00000000000000[/=========================================================================== Copyright (c) 2013-2015 Kyle Lutz Distributed under the Boost Software License, Version 1.0 See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt =============================================================================/] [section Porting Guide] [section OpenCL API] [table OpenCL API Translation Table [[OpenCL Function] [Boost.Compute Function]] [[*Buffers*]] [[`clCreateBuffer()`] [[classref boost::compute::buffer buffer::buffer()]]] [[`clReleaseMemObject()`] [[classref boost::compute::buffer buffer::~buffer()]]] [[`clGetMemObjectInfo()`] [[memberref boost::compute::buffer::get_info buffer::get_info()]]] [[*Command Queues*]] [[`clCreateCommandQueue()`] [[classref boost::compute::command_queue command_queue::command_queue()]]] [[`clReleaseCommandQueue()`] [[classref boost::compute::command_queue command_queue::~command_queue()]]] [[`clGetCommandQueueInfo()`] [[memberref boost::compute::command_queue::get_info command_queue::get_info()]]] [[`clEnqueueReadBuffer()`] [[memberref boost::compute::command_queue::enqueue_read_buffer command_queue::enqueue_read_buffer()]]] [[`clEnqueueWriteBuffer()`] [[memberref boost::compute::command_queue::enqueue_write_buffer command_queue::enqueue_write_buffer()]]] [[`clEnqueueCopyBuffer()`] [[memberref boost::compute::command_queue::enqueue_copy_buffer command_queue::enqueue_copy_buffer()]]] [[`clEnqueueNDRangeKernel()`] [[memberref boost::compute::command_queue::enqueue_nd_range_kernel command_queue::enqueue_nd_range_kernel()]]] [[`clEnqueueTask()`] [[memberref boost::compute::command_queue::enqueue_task command_queue::enqueue_task()]]] [[`clEnqueueBarrier()`] [[memberref boost::compute::command_queue::enqueue_barrier command_queue::enqueue_barrier()]]] [[`clEnqueueMarker()`] [[memberref boost::compute::command_queue::enqueue_marker command_queue::enqueue_marker()]]] [[`clFlush()`] [[memberref boost::compute::command_queue::enqueue_task command_queue::flush()]]] [[`clFinish()`] [[memberref boost::compute::command_queue::enqueue_task command_queue::finish()]]] [[*Contexts*]] [[`clCreateContext()`] [[classref boost::compute::context context::context()]]] [[`clReleaseContext()`] [[classref boost::compute::context context::~context()]]] [[`clGetContextInfo()`] [[memberref boost::compute::context::get_info context::get_info()]]] [[*Devices*]] [[`clGetDeviceInfo()`] [[memberref boost::compute::device::get_info device::get_info()]]] [[*Events*]] [[`clReleaseEvent()`] [[classref boost::compute::event event::~event()]]] [[`clGetEventInfo()`] [[memberref boost::compute::event::get_info event::get_info()]]] [[`clGetEventProfilingInfo()`] [[memberref boost::compute::event::get_profiling_info event::get_profiling_info()]]] [[`clWaitForEvents()`] [[memberref boost::compute::wait_list::wait() wait_list::wait()]]] [[`clCreateUserEvent()`] [[classref boost::compute::user_event user_event::user_event()]]] [[`clSetUserEventStatus()`] [[memberref boost::compute::user_event::set_status() user_event::set_status()]]] [[*Kernels*]] [[`clCreateKernel()`] [[classref boost::compute::kernel kernel::kernel()]]] [[`clReleaseKernel()`] [[classref boost::compute::kernel kernel::~kernel()]]] [[`clGetKernelInfo()`] [[memberref boost::compute::kernel::get_info kernel::get_info()]]] [[`clGetKernelArgInfo()`] [[memberref boost::compute::kernel::get_arg_info kernel::get_arg_info()]]] [[`clGetKernelWorkGroupInfo()`] [[memberref boost::compute::kernel::get_work_group_info kernel::get_work_group_info()]]] [[`clSetKernelArg()`] [[memberref boost::compute::kernel::set_arg kernel::set_arg()]]] [[`clSetKernelExecInfo()`] [[memberref boost::compute::kernel::set_exec_info kernel::set_exec_info()]]] [[*Pipes*]] [[`clCreatePipe()`] [[classref boost::compute::pipe pipe::pipe()]]] [[`clGetPipeInfo()`] [[memberref boost::compute::pipe::get_info pipe::get_info()]]] [[*Platforms*]] [[`clGetDeviceIDs()`] [[memberref boost::compute::platform::devices platform::devices()]]] [[`clGetPlatformInfo()`] [[memberref boost::compute::platform::get_info platform::get_info()]]] [[`clGetExtensionFunctionAddress()`] [[memberref boost::compute::platform::get_extension_function_address platform::get_extension_function_address()]]] [[`clUnloadCompiler()`] [[memberref boost::compute::platform::unload_compiler platform::unload_compiler()]]] [[*Programs*]] [[`clCreateProgramWithSource()`] [[memberref boost::compute::program::create_with_source program::create_with_source()]]] [[`clCreateProgramWithBinary()`] [[memberref boost::compute::program::create_with_binary program::create_with_binary()]]] [[`clCreateProgramWithBuiltInKernels()`] [[memberref boost::compute::program::create_with_builtin_kernels program::create_with_builtin_kernels()]]] [[`clReleaseProgram()`] [[classref boost::compute::program program::~program()]]] [[`clGetProgramInfo()`] [[memberref boost::compute::program::get_info program::get_info()]]] [[`clBuildProgram()`] [[memberref boost::compute::program::build program::build()]]] [[`clCompileProgram()`] [[memberref boost::compute::program::compile program::compile()]]] [[`clLinkProgram()`] [[memberref boost::compute::program::link program::link()]]] [[*Shared Virtual Memory*]] [[`clSVMAlloc()`] [[funcref boost::compute::svm_alloc svm_alloc()]]] [[`clSVMFree()`] [[funcref boost::compute::svm_free svm_free()]]] [[`clEnqueueSVMFree()`] [[memberref boost::compute::command_queue::enqueue_svm_free command_queue::enqueue_svm_free()]]] [[`clEnqueueSVMMemcpy()`] [[memberref boost::compute::command_queue::enqueue_svm_memcpy command_queue::enqueue_svm_memcpy()]]] [[`clEnqueueSVMMemFill()`] [[memberref boost::compute::command_queue::enqueue_svm_fill command_queue::enqueue_svm_fill()]]] [[`clEnqueueSVMMap()`] [[memberref boost::compute::command_queue::enqueue_svm_map command_queue::enqueue_svm_map()]]] [[`clEnqueueSVMUnmap()`] [[memberref boost::compute::command_queue::enqueue_svm_unmap command_queue::enqueue_svm_unmap()]]] [[*System*]] [[`clGetPlatformIDs()`] [[memberref boost::compute::system::platforms system::platforms()]]] [[*OpenGL Sharing*]] [[`clCreateFromGLBuffer()`] [[classref boost::compute::opengl_buffer opengl_buffer::opengl_buffer()]]] [[`clCreateFromGLRenderbuffer()`] [[classref boost::compute::opengl_renderbuffer opengl_renderbuffer::opengl_renderbuffer()]]] [[`clCreateFromGLTexture()`] [[classref boost::compute::opengl_texture opengl_texture::opengl_texture()]]] [[`clGetGLTextureInfo()`] [[classref boost::compute::opengl_texture opengl_texture::get_texture_info()]]] [[`clEnqueueAcquireGLObjects()`] [[funcref boost::compute::opengl_enqueue_acquire_gl_objects opengl_enqueue_acquire_gl_objects()]]] [[`clEnqueueReleaseGLObjects()`] [[funcref boost::compute::opengl_enqueue_release_gl_objects opengl_enqueue_release_gl_objects()]]] ] [endsect] [/ opencl api] [endsect] compute-0.5/doc/reference.qbk000066400000000000000000000231701263566244600162550ustar00rootroot00000000000000[/=========================================================================== Copyright (c) 2013-2015 Kyle Lutz Distributed under the Boost Software License, Version 1.0 See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt =============================================================================/] [section:reference Reference] [section API Overview] [h3 Core Library] Header: `` * [classref boost::compute::buffer buffer] * [classref boost::compute::command_queue command_queue] * [classref boost::compute::context context] * [classref boost::compute::device device] * [classref boost::compute::event event] * [classref boost::compute::kernel kernel] * [classref boost::compute::memory_object memory_object] * [classref boost::compute::pipe pipe] * [classref boost::compute::platform platform] * [classref boost::compute::program program] * [classref boost::compute::system system] * [classref boost::compute::user_event user_event] [h3 Utilities] Header: `` * [funcref boost::compute::dim dim()] * [classref boost::compute::extents extents] * [classref boost::compute::program_cache program_cache] * [classref boost::compute::wait_list wait_list] [h3 Algorithms] Header: `` * [funcref boost::compute::accumulate accumulate()] * [funcref boost::compute::adjacent_difference adjacent_difference()] * [funcref boost::compute::adjacent_find adjacent_find()] * [funcref boost::compute::all_of all_of()] * [funcref boost::compute::any_of any_of()] * [funcref boost::compute::binary_search binary_search()] * [funcref boost::compute::copy copy()] * [funcref boost::compute::copy_if copy_if()] * [funcref boost::compute::copy_n copy_n()] * [funcref boost::compute::count count()] * [funcref boost::compute::count_if count_if()] * [funcref boost::compute::equal equal()] * [funcref boost::compute::equal_range equal_range()] * [funcref boost::compute::exclusive_scan exclusive_scan()] * [funcref boost::compute::fill fill()] * [funcref boost::compute::fill_n fill_n()] * [funcref boost::compute::find find()] * [funcref boost::compute::find_end find_end()] * [funcref boost::compute::find_if find_if()] * [funcref boost::compute::find_if_not find_if_not()] * [funcref boost::compute::for_each for_each()] * [funcref boost::compute::for_each_n for_each_n()] * [funcref boost::compute::gather gather()] * [funcref boost::compute::generate generate()] * [funcref boost::compute::generate_n generate_n()] * [funcref boost::compute::includes includes()] * [funcref boost::compute::inclusive_scan inclusive_scan()] * [funcref boost::compute::inner_product inner_product()] * [funcref boost::compute::inplace_merge inplace_merge()] * [funcref boost::compute::iota iota()] * [funcref boost::compute::is_partitioned is_partitioned()] * [funcref boost::compute::is_permutation is_permutation()] * [funcref boost::compute::is_sorted is_sorted()] * [funcref boost::compute::lower_bound lower_bound()] * [funcref boost::compute::lexicographical_compare lexicographical_compare()] * [funcref boost::compute::max_element max_element()] * [funcref boost::compute::merge merge()] * [funcref boost::compute::min_element min_element()] * [funcref boost::compute::minmax_element minmax_element()] * [funcref boost::compute::mismatch mismatch()] * [funcref boost::compute::next_permutation next_permutation()] * [funcref boost::compute::none_of none_of()] * [funcref boost::compute::nth_element nth_element()] * [funcref boost::compute::partial_sum partial_sum()] * [funcref boost::compute::partition partition()] * [funcref boost::compute::partition_copy partition_copy()] * [funcref boost::compute::partition_point partition_point()] * [funcref boost::compute::prev_permutation prev_permutation()] * [funcref boost::compute::random_shuffle random_shuffle()] * [funcref boost::compute::reduce reduce()] * [funcref boost::compute::remove remove()] * [funcref boost::compute::remove_if remove_if()] * [funcref boost::compute::replace replace()] * [funcref boost::compute::replace_copy replace_copy()] * [funcref boost::compute::reverse reverse()] * [funcref boost::compute::reverse_copy reverse_copy()] * [funcref boost::compute::rotate rotate()] * [funcref boost::compute::rotate_copy rotate_copy()] * [funcref boost::compute::scatter scatter()] * [funcref boost::compute::search search()] * [funcref boost::compute::search_n search_n()] * [funcref boost::compute::set_difference set_difference()] * [funcref boost::compute::set_intersection set_intersection()] * [funcref boost::compute::set_symmetric_difference set_symmetric_difference()] * [funcref boost::compute::set_union set_union()] * [funcref boost::compute::sort sort()] * [funcref boost::compute::sort_by_key sort_by_key()] * [funcref boost::compute::stable_partition stable_partition()] * [funcref boost::compute::stable_sort stable_sort()] * [funcref boost::compute::swap_ranges swap_ranges()] * [funcref boost::compute::transform transform()] * [funcref boost::compute::transform_reduce transform_reduce()] * [funcref boost::compute::unique unique()] * [funcref boost::compute::unique_copy unique_copy()] * [funcref boost::compute::upper_bound upper_bound()] [h3 Async] Header: `` * [classref boost::compute::future future] * [funcref boost::compute::wait_for_all wait_for_all()] * [classref boost::compute::wait_guard wait_guard] [h3 Containers] Header: `` * [classref boost::compute::array array] * [classref boost::compute::basic_string basic_string] * [classref boost::compute::dynamic_bitset dynamic_bitset<>] * [classref boost::compute::flat_map flat_map] * [classref boost::compute::flat_set flat_set] * [classref boost::compute::mapped_view mapped_view] * [classref boost::compute::stack stack] * [classref boost::compute::string string] * [classref boost::compute::valarray valarray] * [classref boost::compute::vector vector] [h3 Exceptions] Header: `` * [classref boost::compute::context_error context_error] * [classref boost::compute::no_device_found no_device_found] * [classref boost::compute::opencl_error opencl_error] * [classref boost::compute::unsupported_extension_error unsupported_extension_error] [h3 Iterators] Header: `` * [classref boost::compute::buffer_iterator buffer_iterator] * [classref boost::compute::constant_buffer_iterator constant_buffer_iterator] * [classref boost::compute::constant_iterator constant_iterator] * [classref boost::compute::counting_iterator counting_iterator] * [classref boost::compute::discard_iterator discard_iterator] * [classref boost::compute::function_input_iterator function_input_iterator] * [classref boost::compute::permutation_iterator permutation_iterator] * [classref boost::compute::transform_iterator transform_iterator] * [classref boost::compute::zip_iterator zip_iterator] [h3 Images] Header: `` * [classref boost::compute::image1d image1d] * [classref boost::compute::image2d image2d] * [classref boost::compute::image3d image3d] * [classref boost::compute::image_format image_format] * [classref boost::compute::image_object image_object] * [classref boost::compute::image_sampler image_sampler] [h3 Shared Virtual Memory] * [classref boost::compute::svm_ptr svm_ptr] * [funcref boost::compute::svm_alloc svm_alloc()] * [funcref boost::compute::svm_free svm_free()] [h3 Macros] * [macroref BOOST_COMPUTE_ADAPT_STRUCT BOOST_COMPUTE_ADAPT_STRUCT()] * [macroref BOOST_COMPUTE_FUNCTION BOOST_COMPUTE_FUNCTION()] * [macroref BOOST_COMPUTE_STRINGIZE_SOURCE BOOST_COMPUTE_STRINGIZE_SOURCE()] [h3 OpenGL Sharing] Header: `` * [classref boost::compute::opengl_buffer opengl_buffer] * [funcref boost::compute::opengl_create_shared_context opengl_create_shared_context()] * [funcref boost::compute::opengl_enqueue_acquire_buffer opengl_enqueue_acquire_buffer()] * [funcref boost::compute::opengl_enqueue_acquire_gl_objects opengl_enqueue_acquire_gl_objects()] * [funcref boost::compute::opengl_enqueue_release_buffer opengl_enqueue_release_buffer()] * [funcref boost::compute::opengl_enqueue_release_gl_objects opengl_enqueue_acquire_gl_objects()] * [classref boost::compute::opengl_renderbuffer opengl_renderbuffer] * [classref boost::compute::opengl_texture opengl_texture] [h3 Random Number Generators] Header: `` * [classref boost::compute::bernoulli_distribution bernoulli_distribution] * [classref boost::compute::default_random_engine default_random_engine] * [classref boost::compute::discrete_distribution discrete_distribution] * [classref boost::compute::linear_congruential_engine linear_congruential_engine] * [classref boost::compute::mersenne_twister_engine mersenne_twister_engine] * [classref boost::compute::normal_distribution normal_distribution] * [classref boost::compute::uniform_int_distribution uniform_int_distribution] * [classref boost::compute::uniform_real_distribution uniform_real_distribution] [h3 Type Traits] Header: `` * [classref boost::compute::is_device_iterator is_device_iterator] * [classref boost::compute::is_fundamental is_fundamental] * [classref boost::compute::is_vector_type is_vector_type] * [classref boost::compute::make_vector_type make_vector_type] * [classref boost::compute::result_of result_of] * [classref boost::compute::scalar_type scalar_type] * [funcref boost::compute::type_name type_name()] [endsect] [/ overview] [xinclude autodoc.xml] [endsect] [/ reference ] compute-0.5/doc/tutorial.qbk000066400000000000000000000036341263566244600161650ustar00rootroot00000000000000[/=========================================================================== Copyright (c) 2013-2015 Kyle Lutz Distributed under the Boost Software License, Version 1.0 See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt =============================================================================/] [section:tutorial Tutorial] [section Hello World] The hello world example gives a simple application that prints the name of the default compute device on the system. The [classref boost::compute::system] class provides access to the OpenCL platforms and devices present on the host system. Compute devices are represented with the [classref boost::compute::device device] class. [import ../example/hello_world.cpp] [hello_world_example] [endsect] [/ hello world] [section Transferring Data] Before any computation occurs, data must be transferred from the host to the compute device. The generic [funcref boost::compute::copy copy()] function provides a simple interface for transfering data and the generic [classref boost::compute::vector vector] class provides a container for storing data on a compute device. The following example shows how to transfer data from an array on the host to a [classref boost::compute::vector vector] on the device and then back to a separate `std::vector` on the host. At the end of the example both `host_array` and `host_vector` contain the same values which were copied through the memory on the compute device. [import ../example/copy_data.cpp] [copy_data_example] [endsect] [/ transferring data] [section Transforming Data] The following example shows how to calculate the square-root of a vector of `float`s on a compute device using the [funcref boost::compute::transform transform()] function. [import ../example/transform_sqrt.cpp] [transform_sqrt_example] [endsect] [/ transforming data] [endsect] [/ tutorial ] compute-0.5/example/000077500000000000000000000000001263566244600145035ustar00rootroot00000000000000compute-0.5/example/CMakeLists.txt000066400000000000000000000104301263566244600172410ustar00rootroot00000000000000# --------------------------------------------------------------------------- # Copyright (c) 2013-2014 Kyle Lutz # # Distributed under the Boost Software License, Version 1.0 # See accompanying file LICENSE_1_0.txt or copy at # http://www.boost.org/LICENSE_1_0.txt # # --------------------------------------------------------------------------- include_directories(../include) set(EXAMPLES amd_cpp_kernel black_scholes copy_data fizz_buzz hello_world host_sort inline_ptx longest_vector list_devices mapped_view memory_limits monte_carlo point_centroid price_cross print_vector sort_vector simple_kernel time_copy transform_sqrt vector_addition simple_moving_average matrix_transpose ) # boost library link dependencies set(EXAMPLE_BOOST_COMPONENTS program_options) if (${BOOST_COMPUTE_USE_OFFLINE_CACHE}) set(EXAMPLE_BOOST_COMPONENTS ${EXAMPLE_BOOST_COMPONENTS} system filesystem) endif() if(${BOOST_COMPUTE_THREAD_SAFE} AND NOT ${BOOST_COMPUTE_USE_CPP11}) set(EXAMPLE_BOOST_COMPONENTS ${EXAMPLE_BOOST_COMPONENTS} thread) endif() find_package(Boost 1.48 REQUIRED COMPONENTS ${EXAMPLE_BOOST_COMPONENTS}) include_directories(SYSTEM ${Boost_INCLUDE_DIRS}) foreach(EXAMPLE ${EXAMPLES}) add_executable(${EXAMPLE} ${EXAMPLE}.cpp) target_link_libraries(${EXAMPLE} ${OPENCL_LIBRARIES} ${Boost_LIBRARIES}) # add example program to list of tests (if testing is enabled) if(${BOOST_COMPUTE_BUILD_TESTS}) add_test("example.${EXAMPLE}" ${EXAMPLE}) endif() endforeach() # opencl test example add_executable(opencl_test opencl_test.cpp) target_link_libraries(opencl_test ${OPENCL_LIBRARIES}) # eigen examples if(${BOOST_COMPUTE_HAVE_EIGEN}) find_package(Eigen REQUIRED) include_directories(SYSTEM ${EIGEN_INCLUDE_DIRS}) add_executable(batched_determinant batched_determinant.cpp) target_link_libraries(batched_determinant ${OPENCL_LIBRARIES} ${Boost_LIBRARIES}) endif() # opencv examples if(${BOOST_COMPUTE_HAVE_OPENCV}) find_package(OpenCV REQUIRED) include_directories(SYSTEM ${OpenCV_INCLUDE_DIRS}) set(OPENCV_EXAMPLES k_means opencv_flip random_walk opencv_optical_flow opencv_convolution opencv_sobel_filter opencv_histogram ) foreach(EXAMPLE ${OPENCV_EXAMPLES}) add_executable(${EXAMPLE} ${EXAMPLE}.cpp) target_link_libraries(${EXAMPLE} ${OPENCL_LIBRARIES} ${Boost_LIBRARIES} ${OpenCV_LIBS}) endforeach() endif() # opengl/vtk examples if(${BOOST_COMPUTE_HAVE_VTK}) find_package(VTK REQUIRED) include(${VTK_USE_FILE}) add_executable(opengl_sphere opengl_sphere.cpp) target_link_libraries(opengl_sphere ${OPENCL_LIBRARIES} ${Boost_LIBRARIES} ${VTK_LIBRARIES}) if(APPLE) target_link_libraries(opengl_sphere "-framework OpenGL") elseif(UNIX) target_link_libraries(opengl_sphere GL) endif() endif() # qt examples if(${BOOST_COMPUTE_HAVE_QT}) # look for Qt4 in the first place find_package(Qt4 QUIET) if(${QT4_FOUND}) # build with Qt4 find_package(Qt4 REQUIRED COMPONENTS QtCore QtGui QtOpenGL) set(QT_USE_QTOPENGL TRUE) include(${QT_USE_FILE}) else() # look for Qt5 find_package(Qt5Widgets QUIET) if(${Qt5Widgets_FOUND}) # build with Qt5 find_package(Qt5Core REQUIRED) find_package(Qt5Widgets REQUIRED) find_package(Qt5OpenGL REQUIRED) include_directories(${Qt5OpenGL_INCLUDE_DIRS}) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${Qt5OpenGL_EXECUTABLE_COMPILE_FLAGS}") set(QT_LIBRARIES ${Qt5OpenGL_LIBRARIES}) else() # no valid Qt framework found message(FATAL_ERROR "Error: Did not find Qt4 or Qt5") endif() endif() # required by both versions set(CMAKE_AUTOMOC TRUE) include_directories(${CMAKE_CURRENT_BINARY_DIR}) # add examples add_executable(qimage_blur qimage_blur.cpp) target_link_libraries(qimage_blur ${OPENCL_LIBRARIES} ${Boost_LIBRARIES} ${QT_LIBRARIES}) set(QT_OPENGL_EXAMPLES mandelbrot nbody resize_image ) foreach(EXAMPLE ${QT_OPENGL_EXAMPLES}) add_executable(${EXAMPLE} ${EXAMPLE}.cpp) target_link_libraries(${EXAMPLE} ${OPENCL_LIBRARIES} ${Boost_LIBRARIES} ${QT_LIBRARIES}) if(APPLE) target_link_libraries(${EXAMPLE} "-framework OpenGL") elseif(UNIX) target_link_libraries(${EXAMPLE} GL) endif() endforeach() endif() compute-0.5/example/amd_cpp_kernel.cpp000066400000000000000000000103301263566244600201470ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include namespace compute = boost::compute; // this example shows how to use the static c++ kernel language // extension (currently only supported by AMD) to compile and // execute a templated c++ kernel. // Using platform vendor info to decide if this is AMD platform int main() { // get default device and setup context compute::device device = compute::system::default_device(); compute::context context(device); compute::command_queue queue(context, device); // check the platform vendor string if(device.platform().vendor() != "Advanced Micro Devices, Inc."){ std::cerr << "error: static C++ kernel language is only " << "supported on AMD devices." << std::endl; return 0; } // create input int values and copy them to the device int int_data[] = { 1, 2, 3, 4}; compute::vector int_vector(int_data, int_data + 4, queue); // create input float values and copy them to the device float float_data[] = { 2.0f, 4.0f, 6.0f, 8.0f }; compute::vector float_vector(float_data, float_data + 4, queue); // create kernel source with a templated function and templated kernel const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( // define our templated function which returns the square of its input template inline T square(const T x) { return x * x; } // define our templated kernel which calls square on each value in data template __kernel void square_kernel(__global T *data) { const uint i = get_global_id(0); data[i] = square(data[i]); } // explicitly instantiate the square kernel for int's. this allows // for it to be called from the host with the given mangled name. template __attribute__((mangled_name(square_kernel_int))) __kernel void square_kernel(__global int *data); // also instantiate the square kernel for float's. template __attribute__((mangled_name(square_kernel_float))) __kernel void square_kernel(__global float *data); ); // build the program. must enable the c++ static kernel language // by passing the "-x clc++" compile option. compute::program square_program = compute::program::build_with_source(source, context, "-x clc++"); // create the square kernel for int's by using its mangled name declared // in the explicit template instantiation. compute::kernel square_int_kernel(square_program, "square_kernel_int"); square_int_kernel.set_arg(0, int_vector); // execute the square int kernel queue.enqueue_1d_range_kernel(square_int_kernel, 0, int_vector.size(), 4); // print out the squared int values std::cout << "int's: "; compute::copy( int_vector.begin(), int_vector.end(), std::ostream_iterator(std::cout, " "), queue ); std::cout << std::endl; // now create the square kernel for float's compute::kernel square_float_kernel(square_program, "square_kernel_float"); square_float_kernel.set_arg(0, float_vector); // execute the square int kernel queue.enqueue_1d_range_kernel(square_float_kernel, 0, float_vector.size(), 4); // print out the squared float values std::cout << "float's: "; compute::copy( float_vector.begin(), float_vector.end(), std::ostream_iterator(std::cout, " "), queue ); std::cout << std::endl; return 0; } compute-0.5/example/batched_determinant.cpp000066400000000000000000000066251263566244600212040ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include namespace compute = boost::compute; // this example shows how to compute the determinant of many 4x4 matrices // using a determinant function and the transform() algorithm. in OpenCL the // float16 type can be used to store a 4x4 matrix and the components are laid // out in the following order: // // M = [ s0 s4 s8 sc ] // [ s1 s5 s9 sd ] // [ s2 s6 sa se ] // [ s3 s7 sb sf ] // // the input matrices are created using eigen's random matrix and then // used again at the end to verify the results of the determinant function. int main() { // get default device and setup context compute::device gpu = compute::system::default_device(); compute::context context(gpu); compute::command_queue queue(context, gpu); std::cout << "device: " << gpu.name() << std::endl; size_t n = 1000; // create random 4x4 matrices on the host std::vector matrices(n); for(size_t i = 0; i < n; i++){ matrices[i] = Eigen::Matrix4f::Random(); } // copy matrices to the device using compute::float16_; compute::vector input(n, context); compute::copy( matrices.begin(), matrices.end(), input.begin(), queue ); // function returning the determinant of a 4x4 matrix. BOOST_COMPUTE_FUNCTION(float, determinant4x4, (const float16_ m), { return m.s0*m.s5*m.sa*m.sf + m.s0*m.s6*m.sb*m.sd + m.s0*m.s7*m.s9*m.se + m.s1*m.s4*m.sb*m.se + m.s1*m.s6*m.s8*m.sf + m.s1*m.s7*m.sa*m.sc + m.s2*m.s4*m.s9*m.sf + m.s2*m.s5*m.sb*m.sc + m.s2*m.s7*m.s8*m.sd + m.s3*m.s4*m.sa*m.sd + m.s3*m.s5*m.s8*m.se + m.s3*m.s6*m.s9*m.sc - m.s0*m.s5*m.sb*m.se - m.s0*m.s6*m.s9*m.sf - m.s0*m.s7*m.sa*m.sd - m.s1*m.s4*m.sa*m.sf - m.s1*m.s6*m.sb*m.sc - m.s1*m.s7*m.s8*m.se - m.s2*m.s4*m.sb*m.sd - m.s2*m.s5*m.s8*m.sf - m.s2*m.s7*m.s9*m.sc - m.s3*m.s4*m.s9*m.se - m.s3*m.s5*m.sa*m.sc - m.s3*m.s6*m.s8*m.sd; }); // calculate determinants on the gpu compute::vector determinants(n, context); compute::transform( input.begin(), input.end(), determinants.begin(), determinant4x4, queue ); // check determinants std::vector host_determinants(n); compute::copy( determinants.begin(), determinants.end(), host_determinants.begin(), queue ); for(size_t i = 0; i < n; i++){ float det = matrices[i].determinant(); if(std::abs(det - host_determinants[i]) > 1e-6){ std::cerr << "error: wrong determinant at " << i << " (" << host_determinants[i] << " != " << det << ")" << std::endl; return -1; } } return 0; } compute-0.5/example/black_scholes.cpp000066400000000000000000000135141263566244600200070ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include namespace compute = boost::compute; // return a random float between lo and hi float rand_float(float lo, float hi) { float x = (float) std::rand() / (float) RAND_MAX; return (1.0f - x) * lo + x * hi; } // this example demostrates a black-scholes option pricing kernel. int main() { // number of options const int N = 4000000; // black-scholes parameters const float risk_free_rate = 0.02f; const float volatility = 0.30f; // get default device and setup context compute::device gpu = compute::system::default_device(); compute::context context(gpu); compute::command_queue queue(context, gpu); std::cout << "device: " << gpu.name() << std::endl; // initialize option data on host std::vector stock_price_data(N); std::vector option_strike_data(N); std::vector option_years_data(N); std::srand(5347); for(int i = 0; i < N; i++){ stock_price_data[i] = rand_float(5.0f, 30.0f); option_strike_data[i] = rand_float(1.0f, 100.0f); option_years_data[i] = rand_float(0.25f, 10.0f); } // create memory buffers on the device compute::vector call_result(N, context); compute::vector put_result(N, context); compute::vector stock_price(N, context); compute::vector option_strike(N, context); compute::vector option_years(N, context); // copy initial values to the device compute::copy_n(stock_price_data.begin(), N, stock_price.begin(), queue); compute::copy_n(option_strike_data.begin(), N, option_strike.begin(), queue); compute::copy_n(option_years_data.begin(), N, option_years.begin(), queue); // source code for black-scholes program const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( // approximation of the cumulative normal distribution function static float cnd(float d) { const float A1 = 0.319381530f; const float A2 = -0.356563782f; const float A3 = 1.781477937f; const float A4 = -1.821255978f; const float A5 = 1.330274429f; const float RSQRT2PI = 0.39894228040143267793994605993438f; float K = 1.0f / (1.0f + 0.2316419f * fabs(d)); float cnd = RSQRT2PI * exp(-0.5f * d * d) * (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5))))); if(d > 0){ cnd = 1.0f - cnd; } return cnd; } // black-scholes option pricing kernel __kernel void black_scholes(__global float *call_result, __global float *put_result, __global const float *stock_price, __global const float *option_strike, __global const float *option_years, float risk_free_rate, float volatility) { const uint opt = get_global_id(0); float S = stock_price[opt]; float X = option_strike[opt]; float T = option_years[opt]; float R = risk_free_rate; float V = volatility; float sqrtT = sqrt(T); float d1 = (log(S / X) + (R + 0.5f * V * V) * T) / (V * sqrtT); float d2 = d1 - V * sqrtT; float CNDD1 = cnd(d1); float CNDD2 = cnd(d2); float expRT = exp(-R * T); call_result[opt] = S * CNDD1 - X * expRT * CNDD2; put_result[opt] = X * expRT * (1.0f - CNDD2) - S * (1.0f - CNDD1); } ); // build black-scholes program compute::program program = compute::program::create_with_source(source, context); program.build(); // setup black-scholes kernel compute::kernel kernel(program, "black_scholes"); kernel.set_arg(0, call_result); kernel.set_arg(1, put_result); kernel.set_arg(2, stock_price); kernel.set_arg(3, option_strike); kernel.set_arg(4, option_years); kernel.set_arg(5, risk_free_rate); kernel.set_arg(6, volatility); // execute black-scholes kernel queue.enqueue_1d_range_kernel(kernel, 0, N, 0); // print out the first option's put and call prices float call0, put0; compute::copy_n(put_result.begin(), 1, &put0, queue); compute::copy_n(call_result.begin(), 1, &call0, queue); std::cout << "option 0 call price: " << call0 << std::endl; std::cout << "option 0 put price: " << put0 << std::endl; // due to the differences in the random-number generators between Operating Systems // and/or compilers, we will get different "expected" results for this example #ifdef __APPLE__ double expected_call0 = 0.000249461; double expected_put0 = 26.2798; #elif _MSC_VER double expected_call0 = 8.21412; double expected_put0 = 2.25904; #else double expected_call0 = 0.0999f; double expected_put0 = 43.0524f; #endif // check option prices if(std::abs(call0 - expected_call0) > 1e-4 || std::abs(put0 - expected_put0) > 1e-4){ std::cerr << "error: option prices are wrong" << std::endl; return -1; } return 0; } compute-0.5/example/copy_data.cpp000066400000000000000000000024551263566244600171600ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// //[copy_data_example #include #include #include namespace compute = boost::compute; int main() { // get default device and setup context compute::device device = compute::system::default_device(); compute::context context(device); compute::command_queue queue(context, device); // create data array on host int host_data[] = { 1, 3, 5, 7, 9 }; // create vector on device compute::vector device_vector(5, context); // copy from host to device compute::copy( host_data, host_data + 5, device_vector.begin(), queue ); // create vector on host std::vector host_vector(5); // copy data back to host compute::copy( device_vector.begin(), device_vector.end(), host_vector.begin(), queue ); return 0; } //] compute-0.5/example/fizz_buzz.cpp000066400000000000000000000121401263566244600172410ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include namespace compute = boost::compute; const char fizz_buzz_source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( // returns the length of the string for the number 'n'. This is used // during the first pass when we calculate the amount of space needed // for each string in the fizz-buzz sequence. inline uint fizz_buzz_string_length(uint n) { if((n % 5 == 0) && (n % 3 == 0)){ return sizeof("fizzbuzz"); } else if(n % 5 == 0){ return sizeof("fizz"); } else if(n % 3 == 0){ return sizeof("buzz"); } else { uint digits = 0; while(n){ n /= 10; digits++; } return digits + 1; } } // first-pass kernel which calculates the string length for each number // and writes it to the string_lengths array. these will then be passed // to exclusive_scan() to calculate the output offsets for each string. __kernel void fizz_buzz_allocate_strings(__global uint *string_lengths) { const uint i = get_global_id(0); const uint n = i + 1; string_lengths[i] = fizz_buzz_string_length(n); } // copy the string 's' with length 'n' to 'result' (just like strncpy()) inline void copy_string(__constant const char *s, uint n, __global char *result) { while(n--){ result[n] = s[n]; } } // reverse the string [start, end). inline void reverse_string(__global char *start, __global char *end) { while(start < end){ char tmp = *end; *end = *start; *start = tmp; start++; end--; } } // second-pass kernel which copies the fizz-buzz string for each number to // buffer using the previously calculated offsets. __kernel void fizz_buzz_copy_strings(__global const uint *offsets, __global char *buffer) { const uint i = get_global_id(0); const uint n = i + 1; const uint offset = offsets[i]; if((n % 5 == 0) && (n % 3 == 0)){ copy_string("fizzbuzz\n", 9, buffer + offset); } else if(n % 5 == 0){ copy_string("fizz\n", 5, buffer + offset); } else if(n % 3 == 0){ copy_string("buzz\n", 5, buffer + offset); } else { // convert number to string and write it to the output __global char *number = buffer + offset; uint n_ = n; while(n_){ *number++ = (n_%10) + '0'; n_ /= 10; } reverse_string(buffer + offset, number - 1); *number = '\n'; } } ); int main() { using compute::dim; using compute::uint_; // fizz-buzz up to 100 size_t n = 100; // get the default device compute::device device = compute::system::default_device(); compute::context ctx(device); compute::command_queue queue(ctx, device); // compile the fizz-buzz program compute::program fizz_buzz_program = compute::program::create_with_source(fizz_buzz_source, ctx); fizz_buzz_program.build(); // create a vector for the output string and computing offsets compute::vector output(ctx); compute::vector offsets(n, ctx); // run the allocate kernel to calculate string lengths compute::kernel allocate_kernel(fizz_buzz_program, "fizz_buzz_allocate_strings"); allocate_kernel.set_arg(0, offsets); queue.enqueue_nd_range_kernel(allocate_kernel, dim(0), dim(n), dim(1)); // allocate space for the output string output.resize( compute::accumulate(offsets.begin(), offsets.end(), 0, queue) ); // scan string lengths for each number to calculate the output offsets compute::exclusive_scan( offsets.begin(), offsets.end(), offsets.begin(), queue ); // run the copy kernel to fill the output buffer compute::kernel copy_kernel(fizz_buzz_program, "fizz_buzz_copy_strings"); copy_kernel.set_arg(0, offsets); copy_kernel.set_arg(1, output); queue.enqueue_nd_range_kernel(copy_kernel, dim(0), dim(n), dim(1)); // copy the string to the host and print it to stdout std::string str; str.resize(output.size()); compute::copy(output.begin(), output.end(), str.begin(), queue); std::cout << str; return 0; } compute-0.5/example/hello_world.cpp000066400000000000000000000014131263566244600175200ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// //[hello_world_example #include #include namespace compute = boost::compute; int main() { // get the default device compute::device device = compute::system::default_device(); // print the device's name std::cout << "hello from " << device.name() << std::endl; return 0; } //] compute-0.5/example/host_sort.cpp000066400000000000000000000031351263566244600172350ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include namespace compute = boost::compute; namespace karma = boost::spirit::karma; int rand_int() { return rand() % 100; } // this example demonstrates how to sort a std::vector of ints on the GPU int main() { // get default device and setup context compute::device gpu = compute::system::default_device(); compute::context context(gpu); compute::command_queue queue(context, gpu); std::cout << "device: " << gpu.name() << std::endl; // create vector of random values on the host std::vector vector(8); std::generate(vector.begin(), vector.end(), rand_int); // print input vector std::cout << "input: [ " << karma::format(karma::int_ % ", ", vector) << " ]" << std::endl; // sort vector compute::sort(vector.begin(), vector.end(), queue); // print sorted vector std::cout << "output: [ " << karma::format(karma::int_ % ", ", vector) << " ]" << std::endl; return 0; } compute-0.5/example/inline_ptx.cpp000066400000000000000000000046611263566244600173670ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include namespace compute = boost::compute; // this example shows how to embed PTX assembly instructions // directly into boost.compute functions and use them with the // transform() algorithm. int main() { // get default device and setup context compute::device gpu = compute::system::default_device(); compute::context context(gpu); compute::command_queue queue(context, gpu); std::cout << "device: " << gpu.name() << std::endl; // check to ensure we have an nvidia device if(gpu.vendor() != "NVIDIA Corporation"){ std::cerr << "error: inline PTX assembly is only supported " << "on NVIDIA devices." << std::endl; return 0; } // create input values and copy them to the device using compute::uint_; uint_ data[] = { 0x00, 0x01, 0x11, 0xFF }; compute::vector input(data, data + 4, queue); // function returning the number of bits set (aka population count or // popcount) using the "popc" inline ptx assembly instruction. BOOST_COMPUTE_FUNCTION(uint_, nvidia_popc, (uint_ x), { uint count; asm("popc.b32 %0, %1;" : "=r"(count) : "r"(x)); return count; }); // calculate the popcount for each input value compute::vector output(input.size(), context); compute::transform( input.begin(), input.end(), output.begin(), nvidia_popc, queue ); // copy results back to the host and print them out std::vector counts(output.size()); compute::copy(output.begin(), output.end(), counts.begin(), queue); for(size_t i = 0; i < counts.size(); i++){ std::cout << "0x" << std::hex << data[i] << " has " << counts[i] << " bits set" << std::endl; } return 0; } compute-0.5/example/k_means.cpp000066400000000000000000000174631263566244600166370ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include #include #include #include #include namespace compute = boost::compute; using compute::dim; using compute::int_; using compute::float_; using compute::float2_; // the k-means example implements the k-means clustering algorithm int main() { // number of clusters size_t k = 6; // number of points size_t n_points = 4500; // height and width of image size_t height = 800; size_t width = 800; // get default device and setup context compute::device gpu = compute::system::default_device(); compute::context context(gpu); compute::command_queue queue(context, gpu); // generate random, uniformily-distributed points compute::default_random_engine random_engine(queue); compute::uniform_real_distribution uniform_distribution(0, 800); compute::vector points(n_points, context); uniform_distribution.generate( compute::make_buffer_iterator(points.get_buffer(), 0), compute::make_buffer_iterator(points.get_buffer(), n_points * 2), random_engine, queue ); // initialize all points to cluster 0 compute::vector clusters(n_points, context); compute::fill(clusters.begin(), clusters.end(), 0, queue); // create initial means with the first k points compute::vector means(k, context); compute::copy_n(points.begin(), k, means.begin(), queue); // k-means clustering program source const char k_means_source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( __kernel void assign_clusters(__global const float2 *points, __global const float2 *means, const int k, __global int *clusters) { const uint gid = get_global_id(0); const float2 point = points[gid]; // find the closest cluster float current_distance = 0; int closest_cluster = -1; // find closest cluster mean to the point for(int i = 0; i < k; i++){ const float2 mean = means[i]; int distance_to_mean = distance(point, mean); if(closest_cluster == -1 || distance_to_mean < current_distance){ current_distance = distance_to_mean; closest_cluster = i; } } // write new cluster clusters[gid] = closest_cluster; } __kernel void update_means(__global const float2 *points, const uint n_points, __global float2 *means, __global const int *clusters) { const uint k = get_global_id(0); float2 sum = { 0, 0 }; float count = 0; for(uint i = 0; i < n_points; i++){ if(clusters[i] == k){ sum += points[i]; count += 1; } } means[k] = sum / count; } ); // build the k-means program compute::program k_means_program = compute::program::build_with_source(k_means_source, context); // setup the k-means kernels compute::kernel assign_clusters_kernel(k_means_program, "assign_clusters"); assign_clusters_kernel.set_arg(0, points); assign_clusters_kernel.set_arg(1, means); assign_clusters_kernel.set_arg(2, int_(k)); assign_clusters_kernel.set_arg(3, clusters); compute::kernel update_means_kernel(k_means_program, "update_means"); update_means_kernel.set_arg(0, points); update_means_kernel.set_arg(1, int_(n_points)); update_means_kernel.set_arg(2, means); update_means_kernel.set_arg(3, clusters); // run the k-means algorithm for(int iteration = 0; iteration < 25; iteration++){ queue.enqueue_1d_range_kernel(assign_clusters_kernel, 0, n_points, 0); queue.enqueue_1d_range_kernel(update_means_kernel, 0, k, 0); } // create output image compute::image2d image( context, width, height, compute::image_format(CL_RGBA, CL_UNSIGNED_INT8) ); // program with two kernels, one to fill the image with white, and then // one the draw to points calculated in coordinates on the image const char draw_walk_source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( __kernel void draw_points(__global const float2 *points, __global const int *clusters, __write_only image2d_t image) { const uint i = get_global_id(0); const float2 coord = points[i]; // map cluster number to color uint4 color = { 0, 0, 0, 0 }; switch(clusters[i]){ case 0: color = (uint4)(255, 0, 0, 255); break; case 1: color = (uint4)(0, 255, 0, 255); break; case 2: color = (uint4)(0, 0, 255, 255); break; case 3: color = (uint4)(255, 255, 0, 255); break; case 4: color = (uint4)(255, 0, 255, 255); break; case 5: color = (uint4)(0, 255, 255, 255); break; } // draw a 3x3 pixel point for(int x = -1; x <= 1; x++){ for(int y = -1; y <= 1; y++){ if(coord.x + x > 0 && coord.x + x < get_image_width(image) && coord.y + y > 0 && coord.y + y < get_image_height(image)){ write_imageui(image, (int2)(coord.x, coord.y) + (int2)(x, y), color); } } } } __kernel void fill_gray(__write_only image2d_t image) { const int2 coord = { get_global_id(0), get_global_id(1) }; if(coord.x < get_image_width(image) && coord.y < get_image_height(image)){ uint4 gray = { 15, 15, 15, 15 }; write_imageui(image, coord, gray); } } ); // build the program compute::program draw_program = compute::program::build_with_source(draw_walk_source, context); // fill image with dark gray compute::kernel fill_kernel(draw_program, "fill_gray"); fill_kernel.set_arg(0, image); queue.enqueue_nd_range_kernel( fill_kernel, dim(0, 0), dim(width, height), dim(1, 1) ); // draw points colored according to cluster compute::kernel draw_kernel(draw_program, "draw_points"); draw_kernel.set_arg(0, points); draw_kernel.set_arg(1, clusters); draw_kernel.set_arg(2, image); queue.enqueue_1d_range_kernel(draw_kernel, 0, n_points, 0); // show image compute::opencv_imshow("k-means", image, queue); // wait and return cv::waitKey(0); return 0; } compute-0.5/example/list_devices.cpp000066400000000000000000000027071263566244600176720ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include namespace compute = boost::compute; int main() { std::vector platforms = compute::system::platforms(); for(size_t i = 0; i < platforms.size(); i++){ const compute::platform &platform = platforms[i]; std::cout << "Platform '" << platform.name() << "'" << std::endl; std::vector devices = platform.devices(); for(size_t j = 0; j < devices.size(); j++){ const compute::device &device = devices[j]; std::string type; if(device.type() & compute::device::gpu) type = "GPU Device"; else if(device.type() & compute::device::cpu) type = "CPU Device"; else if(device.type() & compute::device::accelerator) type = "Accelerator Device"; else type = "Unknown Device"; std::cout << " " << type << ": " << device.name() << std::endl; } } return 0; } compute-0.5/example/longest_vector.cpp000066400000000000000000000036221263566244600202470ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include namespace compute = boost::compute; // this example shows how to use the max_element() algorithm along with // a transform_iterator and the length() function to find the longest // 4-component vector in an array of vectors int main() { using compute::float4_; // vectors data float data[] = { 1.0f, 2.0f, 3.0f, 0.0f, 4.0f, 5.0f, 6.0f, 0.0f, 7.0f, 8.0f, 9.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }; // create device vector with the vector data compute::vector vector( reinterpret_cast(data), reinterpret_cast(data) + 4 ); // find the longest vector compute::vector::const_iterator iter = compute::max_element( compute::make_transform_iterator( vector.begin(), compute::length() ), compute::make_transform_iterator( vector.end(), compute::length() ) ).base(); // print the index of the longest vector std::cout << "longest vector index: " << std::distance(vector.begin(), iter) << std::endl; return 0; } compute-0.5/example/mandelbrot.cpp000066400000000000000000000134711263566244600173440ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #if QT_VERSION >= 0x050000 #include #else #include #endif #include #ifndef Q_MOC_RUN #include #include #include #include #include #include #include #endif // Q_MOC_RUN namespace compute = boost::compute; // opencl source code const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( // map value to color float4 color(uint i) { uchar c = i; uchar x = 35; uchar y = 25; uchar z = 15; uchar max = 255; if(i == 256) return (float4)(0, 0, 0, 255); else return (float4)(max-x*i, max-y*i, max-z*i, max) / 255.f; } __kernel void mandelbrot(__write_only image2d_t image) { const uint x_coord = get_global_id(0); const uint y_coord = get_global_id(1); const uint width = get_global_size(0); const uint height = get_global_size(1); float x_origin = ((float) x_coord / width) * 3.25f - 2.0f; float y_origin = ((float) y_coord / height) * 2.5f - 1.25f; float x = 0.0f; float y = 0.0f; uint i = 0; while(x*x + y*y <= 4.f && i < 256){ float tmp = x*x - y*y + x_origin; y = 2*x*y + y_origin; x = tmp; i++; } int2 coord = { x_coord, y_coord }; write_imagef(image, coord, color(i)); }; ); class MandelbrotWidget : public QGLWidget { Q_OBJECT public: MandelbrotWidget(QWidget *parent = 0); ~MandelbrotWidget(); void initializeGL(); void resizeGL(int width, int height); void paintGL(); void keyPressEvent(QKeyEvent* event); private: compute::context context_; compute::command_queue queue_; compute::program program_; GLuint gl_texture_; compute::opengl_texture cl_texture_; }; MandelbrotWidget::MandelbrotWidget(QWidget *parent) : QGLWidget(parent) { gl_texture_ = 0; } MandelbrotWidget::~MandelbrotWidget() { } void MandelbrotWidget::initializeGL() { // setup opengl glDisable(GL_LIGHTING); // create the OpenGL/OpenCL shared context context_ = compute::opengl_create_shared_context(); // get gpu device compute::device gpu = context_.get_device(); std::cout << "device: " << gpu.name() << std::endl; // setup command queue queue_ = compute::command_queue(context_, gpu); // build mandelbrot program program_ = compute::program::create_with_source(source, context_); program_.build(); } void MandelbrotWidget::resizeGL(int width, int height) { #if QT_VERSION >= 0x050000 // scale height/width based on device pixel ratio width /= windowHandle()->devicePixelRatio(); height /= windowHandle()->devicePixelRatio(); #endif // resize viewport glViewport(0, 0, width, height); // delete old texture if(gl_texture_){ glDeleteTextures(1, &gl_texture_); gl_texture_ = 0; } // generate new texture glGenTextures(1, &gl_texture_); glBindTexture(GL_TEXTURE_2D, gl_texture_); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0 ); // create opencl object for the texture cl_texture_ = compute::opengl_texture( context_, GL_TEXTURE_2D, 0, gl_texture_, CL_MEM_WRITE_ONLY ); } void MandelbrotWidget::paintGL() { using compute::dim; float w = width(); float h = height(); glMatrixMode(GL_PROJECTION); glLoadIdentity(); glOrtho(0.0, w, 0.0, h, -1.0, 1.0); glMatrixMode(GL_MODELVIEW); glLoadIdentity(); // setup the mandelbrot kernel compute::kernel kernel(program_, "mandelbrot"); kernel.set_arg(0, cl_texture_); // acquire the opengl texture so it can be used in opencl compute::opengl_enqueue_acquire_gl_objects(1, &cl_texture_.get(), queue_); // execute the mandelbrot kernel queue_.enqueue_nd_range_kernel( kernel, dim(0, 0), dim(width(), height()), dim(1, 1) ); // release the opengl texture so it can be used by opengl compute::opengl_enqueue_release_gl_objects(1, &cl_texture_.get(), queue_); // ensure opencl is finished before rendering in opengl queue_.finish(); // draw a single quad with the mandelbrot image texture glEnable(GL_TEXTURE_2D); glBindTexture(GL_TEXTURE_2D, gl_texture_); glBegin(GL_QUADS); glTexCoord2f(0, 0); glVertex2f(0, 0); glTexCoord2f(0, 1); glVertex2f(0, h); glTexCoord2f(1, 1); glVertex2f(w, h); glTexCoord2f(1, 0); glVertex2f(w, 0); glEnd(); } void MandelbrotWidget::keyPressEvent(QKeyEvent* event) { if(event->key() == Qt::Key_Escape) { this->close(); } } // the mandelbrot example shows how to create a mandelbrot image in // OpenCL and render the image as a texture in OpenGL int main(int argc, char *argv[]) { QApplication app(argc, argv); MandelbrotWidget widget; widget.show(); return app.exec(); } #include "mandelbrot.moc" compute-0.5/example/mapped_view.cpp000066400000000000000000000026731263566244600175170ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include namespace compute = boost::compute; // this example demonstrates how to use the mapped_view class to map // an array of numbers to device memory and use the reduce() algorithm // to calculate the sum. int main() { // get default device and setup context compute::device gpu = compute::system::default_device(); compute::context context(gpu); compute::command_queue queue(context, gpu); std::cout << "device: " << gpu.name() << std::endl; // create data on host int data[] = { 4, 2, 3, 7, 8, 9, 1, 6 }; // create mapped view on device compute::mapped_view view(data, 8, context); // use reduce() to calculate sum on the device int sum = 0; compute::reduce(view.begin(), view.end(), &sum, queue); // print the sum on the host std::cout << "sum: " << sum << std::endl; return 0; } compute-0.5/example/matrix_transpose.cpp000066400000000000000000000307311263566244600206150ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Benoit Dequidt // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include namespace compute = boost::compute; namespace po = boost::program_options; using compute::uint_; const uint_ TILE_DIM = 32; const uint_ BLOCK_ROWS = 8; // generate a copy kernel program compute::kernel make_copy_kernel(const compute::context& context) { // source for the copy_kernel program const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( __kernel void copy_kernel(__global const float *src, __global float *dst) { uint x = get_group_id(0) * TILE_DIM + get_local_id(0); uint y = get_group_id(1) * TILE_DIM + get_local_id(1); uint width = get_num_groups(0) * TILE_DIM; for(uint i = 0 ; i < TILE_DIM ; i+= BLOCK_ROWS){ dst[(y+i)*width +x] = src[(y+i)*width + x]; } } ); // setup compilation flags for the copy program std::stringstream options; options << "-DTILE_DIM=" << TILE_DIM << " -DBLOCK_ROWS=" << BLOCK_ROWS; // create and build the copy program compute::program program = compute::program::build_with_source(source, context, options.str()); // create and return the copy kernel return program.create_kernel("copy_kernel"); } // generate a naive transpose kernel compute::kernel make_naive_transpose_kernel(const compute::context& context) { // source for the naive_transpose kernel const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( __kernel void naive_transpose(__global const float *src, __global float *dst) { uint x = get_group_id(0) * TILE_DIM + get_local_id(0); uint y = get_group_id(1) * TILE_DIM + get_local_id(1); uint width = get_num_groups(0) * TILE_DIM; for(uint i = 0 ; i < TILE_DIM; i+= BLOCK_ROWS){ dst[x*width + y+i] = src[(y+i)*width + x]; } } ); // setup compilation flags for the naive_transpose program std::stringstream options; options << "-DTILE_DIM=" << TILE_DIM << " -DBLOCK_ROWS=" << BLOCK_ROWS; // create and build the naive_transpose program compute::program program = compute::program::build_with_source(source, context, options.str()); // create and return the naive_transpose kernel return program.create_kernel("naive_transpose"); } // generates a coalesced transpose kernel compute::kernel make_coalesced_transpose_kernel(const compute::context& context) { // source for the coalesced_transpose kernel const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( __kernel void coalesced_transpose(__global const float *src, __global float *dst) { __local float tile[TILE_DIM][TILE_DIM]; // compute indexes uint x = get_group_id(0) * TILE_DIM + get_local_id(0); uint y = get_group_id(1) * TILE_DIM + get_local_id(1); uint width = get_num_groups(0) * TILE_DIM; // load inside local memory for(uint i = 0 ; i < TILE_DIM; i+= BLOCK_ROWS){ tile[get_local_id(1)+i][get_local_id(0)] = src[(y+i)*width + x]; } barrier(CLK_LOCAL_MEM_FENCE); // transpose indexes x = get_group_id(1) * TILE_DIM + get_local_id(0); y = get_group_id(0) * TILE_DIM + get_local_id(1); // write output from local memory for(uint i = 0 ; i < TILE_DIM ; i+=BLOCK_ROWS){ dst[(y+i)*width + x] = tile[get_local_id(0)][get_local_id(1)+i]; } } ); // setup compilation flags for the coalesced_transpose program std::stringstream options; options << "-DTILE_DIM=" << TILE_DIM << " -DBLOCK_ROWS=" << BLOCK_ROWS; // create and build the coalesced_transpose program compute::program program = compute::program::build_with_source(source, context, options.str()); // create and return coalesced_transpose kernel return program.create_kernel("coalesced_transpose"); } // generate a coalesced withtout bank conflicts kernel compute::kernel make_coalesced_no_bank_conflicts_kernel(const compute::context& context) { const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( __kernel void coalesced_no_bank_conflicts(__global const float *src, __global float *dst) { // TILE_DIM+1 is here to avoid bank conflicts in local memory __local float tile[TILE_DIM][TILE_DIM+1]; // compute indexes uint x = get_group_id(0) * TILE_DIM + get_local_id(0); uint y = get_group_id(1) * TILE_DIM + get_local_id(1); uint width = get_num_groups(0) * TILE_DIM; // load inside local memory for(uint i = 0 ; i < TILE_DIM; i+= BLOCK_ROWS){ tile[get_local_id(1)+i][get_local_id(0)] = src[(y+i)*width + x]; } barrier(CLK_LOCAL_MEM_FENCE); // transpose indexes x = get_group_id(1) * TILE_DIM + get_local_id(0); y = get_group_id(0) * TILE_DIM + get_local_id(1); // write output from local memory for(uint i = 0 ; i < TILE_DIM ; i+=BLOCK_ROWS){ dst[(y+i)*width + x] = tile[get_local_id(0)][get_local_id(1)+i]; } } ); // setup compilation flags for the coalesced_no_bank_conflicts program std::stringstream options; options << "-DTILE_DIM=" << TILE_DIM << " -DBLOCK_ROWS=" << BLOCK_ROWS; // create and build the coalesced_no_bank_conflicts program compute::program program = compute::program::build_with_source(source, context, options.str()); // create and return the coalesced_no_bank_conflicts kernel return program.create_kernel("coalesced_no_bank_conflicts"); } // compare 'expectedResult' to 'transposedMatrix'. prints an error message if not equal. bool check_transposition(const std::vector& expectedResult, uint_ size, const std::vector& transposedMatrix) { for(uint_ i = 0 ; i < size ; ++i){ if(expectedResult[i] != transposedMatrix[i]){ std::cout << "idx = " << i << " , expected " << expectedResult[i] << " , got " << transposedMatrix[i] << std::endl; std::cout << "FAILED" << std::endl; return false; } } return true; } // generate a matrix inside 'in' and do the tranposition inside 'out' void generate_matrix(std::vector& in, std::vector& out, uint_ rows, uint_ cols) { // generate a matrix for(uint_ i = 0 ; i < rows ; ++i){ for(uint_ j = 0 ; j < cols ; ++j){ in[i*cols + j] = i*cols + j; } } // store transposed result for(uint_ j = 0; j < cols ; ++j){ for(uint_ i = 0 ; i < rows ; ++i){ out[j*rows + i] = in[i*cols + j]; } } } // neccessary for 64-bit integer on win32 #ifdef _WIN32 #define uint64_t unsigned __int64 #endif int main(int argc, char *argv[]) { // setup command line arguments po::options_description options("options"); options.add_options() ("help", "show usage instructions") ("rows", po::value()->default_value(4096), "number of matrix rows") ("cols", po::value()->default_value(4096), "number of matrix columns") ; // parse command line po::variables_map vm; po::store(po::parse_command_line(argc, argv, options), vm); po::notify(vm); // check command line arguments if(vm.count("help")){ std::cout << options << std::endl; return 0; } // get number rows and columns for the matrix const uint_ rows = vm["rows"].as(); const uint_ cols = vm["cols"].as(); // get the default device compute::device device = compute::system::default_device(); // print out device name and matrix information std::cout << "Device: " << device.name() << std::endl; std::cout << "Matrix Size: " << rows << "x" << cols << std::endl; std::cout << "Grid Size: " << rows/TILE_DIM << "x" << cols/TILE_DIM << " blocks" << std::endl; std::cout << "Local Size: " << TILE_DIM << "x" << BLOCK_ROWS << " threads" << std::endl; std::cout << std::endl; const size_t global_work_size[2] = {rows, cols*BLOCK_ROWS/TILE_DIM}; const size_t local_work_size[2] = {TILE_DIM, BLOCK_ROWS}; // setup input data on the host const uint_ size = rows * cols; std::vector h_input(size); std::vector h_output(size); std::vector expectedResult(size); generate_matrix(h_input, expectedResult, rows, cols); // create a context for the device compute::context context(device); // device vectors compute::vector d_input(size, context); compute::vector d_output(size, context); // command_queue with profiling compute::command_queue queue(context, device, compute::command_queue::enable_profiling); // copy input data compute::copy(h_input.begin(), h_input.end(), d_input.begin(), queue); // simple copy kernel std::cout << "Testing copy_kernel:" << std::endl; compute::kernel kernel = make_copy_kernel(context); kernel.set_arg(0, d_input); kernel.set_arg(1, d_output); compute::event start; start = queue.enqueue_nd_range_kernel(kernel, 2, 0, global_work_size, local_work_size); queue.finish(); uint64_t elapsed = start.duration().count(); std::cout << " Elapsed: " << elapsed << " ns" << std::endl; std::cout << " BandWidth: " << 2*rows*cols*sizeof(float) / elapsed << " GB/s" << std::endl; compute::copy(d_output.begin(), d_output.end(), h_output.begin(), queue); check_transposition(h_input, rows*cols, h_output); std::cout << std::endl; // naive_transpose kernel std::cout << "Testing naive_transpose:" << std::endl; kernel = make_naive_transpose_kernel(context); kernel.set_arg(0, d_input); kernel.set_arg(1, d_output); start = queue.enqueue_nd_range_kernel(kernel, 2, 0, global_work_size, local_work_size); queue.finish(); elapsed = start.duration().count(); std::cout << " Elapsed: " << elapsed << " ns" << std::endl; std::cout << " BandWidth: " << 2*rows*cols*sizeof(float) / elapsed << " GB/s" << std::endl; compute::copy(d_output.begin(), d_output.end(), h_output.begin(), queue); check_transposition(expectedResult, rows*cols, h_output); std::cout << std::endl; // coalesced_transpose kernel std::cout << "Testing coalesced_transpose:" << std::endl; kernel = make_coalesced_transpose_kernel(context); kernel.set_arg(0, d_input); kernel.set_arg(1, d_output); start = queue.enqueue_nd_range_kernel(kernel, 2, 0, global_work_size, local_work_size); queue.finish(); elapsed = start.duration().count(); std::cout << " Elapsed: " << elapsed << " ns" << std::endl; std::cout << " BandWidth: " << 2*rows*cols*sizeof(float) / elapsed << " GB/s" << std::endl; compute::copy(d_output.begin(), d_output.end(), h_output.begin(), queue); check_transposition(expectedResult, rows*cols, h_output); std::cout << std::endl; // coalesced_no_bank_conflicts kernel std::cout << "Testing coalesced_no_bank_conflicts:" << std::endl; kernel = make_coalesced_no_bank_conflicts_kernel(context); kernel.set_arg(0, d_input); kernel.set_arg(1, d_output); start = queue.enqueue_nd_range_kernel(kernel, 2, 0, global_work_size, local_work_size); queue.finish(); elapsed = start.duration().count(); std::cout << " Elapsed: " << elapsed << " ns" << std::endl; std::cout << " BandWidth: " << 2*rows*cols*sizeof(float) / elapsed << " GB/s" << std::endl; compute::copy(d_output.begin(), d_output.end(), h_output.begin(), queue); check_transposition(expectedResult, rows*cols, h_output); std::cout << std::endl; return 0; } compute-0.5/example/memory_limits.cpp000066400000000000000000000023401263566244600200770ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include namespace compute = boost::compute; int main() { // get the default device compute::device device = compute::system::default_device(); std::cout << "device: " << device.name() << std::endl; std::cout << " global memory size: " << device.get_info(CL_DEVICE_GLOBAL_MEM_SIZE) / 1024 / 1024 << " MB" << std::endl; std::cout << " local memory size: " << device.get_info(CL_DEVICE_LOCAL_MEM_SIZE) / 1024 << " KB" << std::endl; std::cout << " constant memory size: " << device.get_info(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) / 1024 << " KB" << std::endl; return 0; } compute-0.5/example/monte_carlo.cpp000066400000000000000000000045501263566244600175150ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include namespace compute = boost::compute; int main() { // get default device and setup context compute::device gpu = compute::system::default_device(); compute::context context(gpu); compute::command_queue queue(context, gpu); std::cout << "device: " << gpu.name() << std::endl; using compute::uint_; using compute::uint2_; // ten million random points size_t n = 10000000; // generate random numbers compute::default_random_engine rng(queue); compute::vector vector(n * 2, context); rng.generate(vector.begin(), vector.end(), queue); // function returing true if the point is within the unit circle BOOST_COMPUTE_FUNCTION(bool, is_in_unit_circle, (const uint2_ point), { const float x = point.x / (float) UINT_MAX - 1; const float y = point.y / (float) UINT_MAX - 1; return (x*x + y*y) < 1.0f; }); // iterate over vector as vector compute::buffer_iterator start = compute::make_buffer_iterator(vector.get_buffer(), 0); compute::buffer_iterator end = compute::make_buffer_iterator(vector.get_buffer(), vector.size() / 2); // count number of random points within the unit circle size_t count = compute::count_if(start, end, is_in_unit_circle, queue); // print out values float count_f = static_cast(count); std::cout << "count: " << count << " / " << n << std::endl; std::cout << "ratio: " << count_f / float(n) << std::endl; std::cout << "pi = " << (count_f / float(n)) * 4.0f << std::endl; return 0; } compute-0.5/example/nbody.cpp000066400000000000000000000154041263566244600163260ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Fabian Köhler // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #define GL_GLEXT_PROTOTYPES #ifdef __APPLE__ #include #include #else #include #include #endif #include #if QT_VERSION >= 0x050000 #include #else #include #endif #include #include #include #include #include #ifndef Q_MOC_RUN #include #include #include #include #include #endif // Q_MOC_RUN namespace compute = boost::compute; namespace po = boost::program_options; using compute::uint_; using compute::float4_; const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( __kernel void updateVelocity(__global const float4* position, __global float4* velocity, float dt, uint N) { uint gid = get_global_id(0); float4 r = { 0.0f, 0.0f, 0.0f, 0.0f }; float f = 0.0f; for(uint i = 0; i != gid; i++) { if(i != gid) { r = position[i]-position[gid]; f = length(r)+0.001f; f *= f*f; f = dt/f; velocity[gid] += f*r; } } } __kernel void updatePosition(__global float4* position, __global const float4* velocity, float dt) { uint gid = get_global_id(0); position[gid].xyz += dt*velocity[gid].xyz; } ); class NBodyWidget : public QGLWidget { Q_OBJECT public: NBodyWidget(std::size_t particles, float dt, QWidget* parent = 0); ~NBodyWidget(); void initializeGL(); void resizeGL(int width, int height); void paintGL(); void updateParticles(); void keyPressEvent(QKeyEvent* event); private: QTimer* timer; compute::context m_context; compute::command_queue m_queue; compute::program m_program; compute::opengl_buffer m_position; compute::vector* m_velocity; compute::kernel m_velocity_kernel; compute::kernel m_position_kernel; bool m_initial_draw; const uint_ m_particles; const float m_dt; }; NBodyWidget::NBodyWidget(std::size_t particles, float dt, QWidget* parent) : QGLWidget(parent), m_initial_draw(true), m_particles(particles), m_dt(dt) { // create a timer to redraw as fast as possible timer = new QTimer(this); connect(timer, SIGNAL(timeout()), this, SLOT(updateGL())); timer->start(1); } NBodyWidget::~NBodyWidget() { delete m_velocity; // delete the opengl buffer GLuint vbo = m_position.get_opengl_object(); glDeleteBuffers(1, &vbo); } void NBodyWidget::initializeGL() { // create context, command queue and program m_context = compute::opengl_create_shared_context(); m_queue = compute::command_queue(m_context, m_context.get_device()); m_program = compute::program::create_with_source(source, m_context); m_program.build(); // prepare random particle positions that will be transferred to the vbo float4_* temp = new float4_[m_particles]; boost::random::uniform_real_distribution dist(-0.5f, 0.5f); boost::random::mt19937_64 gen; for(size_t i = 0; i < m_particles; i++) { temp[i][0] = dist(gen); temp[i][1] = dist(gen); temp[i][2] = dist(gen); temp[i][3] = 1.0f; } // create an OpenGL vbo GLuint vbo = 0; glGenBuffers(1, &vbo); glBindBuffer(GL_ARRAY_BUFFER, vbo); glBufferData(GL_ARRAY_BUFFER, m_particles*sizeof(float4_), temp, GL_DYNAMIC_DRAW); // create a OpenCL buffer from the vbo m_position = compute::opengl_buffer(m_context, vbo); delete[] temp; // create buffer for velocities m_velocity = new compute::vector(m_particles, m_context); compute::fill(m_velocity->begin(), m_velocity->end(), float4_(0.0f, 0.0f, 0.0f, 0.0f), m_queue); // create compute kernels m_velocity_kernel = m_program.create_kernel("updateVelocity"); m_velocity_kernel.set_arg(0, m_position); m_velocity_kernel.set_arg(1, m_velocity->get_buffer()); m_velocity_kernel.set_arg(2, m_dt); m_velocity_kernel.set_arg(3, m_particles); m_position_kernel = m_program.create_kernel("updatePosition"); m_position_kernel.set_arg(0, m_position); m_position_kernel.set_arg(1, m_velocity->get_buffer()); m_position_kernel.set_arg(2, m_dt); } void NBodyWidget::resizeGL(int width, int height) { // update viewport glViewport(0, 0, width, height); } void NBodyWidget::paintGL() { // clear buffer glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // check if this is the first draw if(m_initial_draw) { // do not update particles m_initial_draw = false; } else { // update particles updateParticles(); } // draw glVertexPointer(4, GL_FLOAT, 0, 0); glEnableClientState(GL_VERTEX_ARRAY); glDrawArrays(GL_POINTS, 0, m_particles); glFinish(); } void NBodyWidget::updateParticles() { // enqueue kernels to update particles and make sure that the command queue is finished compute::opengl_enqueue_acquire_buffer(m_position, m_queue); m_queue.enqueue_1d_range_kernel(m_velocity_kernel, 0, m_particles, 0).wait(); m_queue.enqueue_1d_range_kernel(m_position_kernel, 0, m_particles, 0).wait(); m_queue.finish(); compute::opengl_enqueue_release_buffer(m_position, m_queue); } void NBodyWidget::keyPressEvent(QKeyEvent* event) { if(event->key() == Qt::Key_Escape) { this->close(); } } int main(int argc, char** argv) { // parse command line arguments po::options_description options("options"); options.add_options() ("help", "show usage") ("particles", po::value()->default_value(1000), "number of particles") ("dt", po::value()->default_value(0.00001f), "width of each integration step"); po::variables_map vm; po::store(po::parse_command_line(argc, argv, options), vm); po::notify(vm); if(vm.count("help") > 0) { std::cout << options << std::endl; return 0; } const uint_ particles = vm["particles"].as(); const float dt = vm["dt"].as(); QApplication app(argc, argv); NBodyWidget nbody(particles, dt); nbody.show(); return app.exec(); } #include "nbody.moc" compute-0.5/example/opencl_test.cpp000066400000000000000000000070431263566244600175320ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include // include the proper opencl header for the system #if defined(__APPLE__) #include #else #include #endif // the opencl_test example displays the opencl platforms and devices found // on the system using the opencl api directly. if this test fails to compile // and/or run, there is a problem with the opencl implementation found on the // system. users should ensure this test runs successfuly before using any of // the boost.compute apis (which depend on a working opencl implementation). int main() { // query number of opencl platforms cl_uint num_platforms = 0; cl_int ret = clGetPlatformIDs(0, NULL, &num_platforms); if(ret != CL_SUCCESS){ std::cerr << "failed to query platforms: " << ret << std::endl; return -1; } // check that at least one platform was found if(num_platforms == 0){ std::cerr << "found 0 platforms" << std::endl; return 0; } // get platform ids cl_platform_id *platforms = new cl_platform_id[num_platforms]; clGetPlatformIDs(num_platforms, platforms, NULL); // iterate through each platform and query its devices for(cl_uint i = 0; i < num_platforms; i++){ cl_platform_id platform = platforms[i]; // query number of opencl devices cl_uint num_devices = 0; ret = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices); if(ret != CL_SUCCESS){ std::cerr << "failed to lookup devices for platform " << i << std::endl; continue; } // print number of devices found std::cout << "platform " << i << " has " << num_devices << " devices:" << std::endl; // get device ids for the platform cl_device_id *devices = new cl_device_id[num_devices]; ret = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, num_devices, devices, NULL); if(ret != CL_SUCCESS){ std::cerr << "failed to query platform devices" << std::endl; delete[] devices; continue; } // iterate through each device on the platform and print its name for(cl_uint j = 0; j < num_devices; j++){ cl_device_id device = devices[j]; // get length of the device name string size_t name_length = 0; ret = clGetDeviceInfo(device, CL_DEVICE_NAME, 0, NULL, &name_length); if(ret != CL_SUCCESS){ std::cerr << "failed to query device name length for device " << j << std::endl; continue; } // get the device name string char *name = new char[name_length]; ret = clGetDeviceInfo(device, CL_DEVICE_NAME, name_length, name, NULL); if(ret != CL_SUCCESS){ std::cerr << "failed to query device name string for device " << j << std::endl; delete[] name; continue; } // print out the device name std::cout << " device: " << name << std::endl; delete[] name; } delete[] devices; } delete[] platforms; return 0; } compute-0.5/example/opencv_convolution.cpp000066400000000000000000000204051263566244600211410ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Mageswaran.D // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include #include #include namespace compute = boost::compute; namespace po = boost::program_options; // Create convolution program const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE ( __kernel void convolution(__read_only image2d_t sourceImage, __write_only image2d_t outputImage, __constant float* filter, int filterWidth) { const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; // Store each work-item's unique row and column int x = get_global_id(0); int y = get_global_id(1); // Half the width of the filter is needed for indexing // memory later int halfWidth = (int)(filterWidth/2); // All accesses to images return data as four-element vector // (i.e., float4). float4 sum = {0.0f, 0.0f, 0.0f, 0.0f}; // Iterator for the filter int filterIdx = 0; // Each work-item iterates around its local area based on the // size of the filter int2 coords; // Coordinates for accessing the image // Iterate the filter rows for(int i = -halfWidth; i <= halfWidth; i++) { coords.y = y + i; // Iterate over the filter columns for(int j = -halfWidth; j <= halfWidth; j++) { coords.x = x + j; float4 pixel; // Read a pixel from the image. // Work on a channel pixel = read_imagef(sourceImage, sampler, coords); sum.x += pixel.x * filter[filterIdx++]; //sum.y += pixel.y * filter[filterIdx++]; //sum.z += pixel.z * filter[filterIdx++]; } } barrier(CLK_GLOBAL_MEM_FENCE); // Copy the data to the output image if the // work-item is in bounds if(y < get_image_height(sourceImage) && x < get_image_width(sourceImage)) { coords.x = x; coords.y = y; //Same channel is copied in all three channels //write_imagef(outputImage, coords, // (float4)(sum.x,sum.x,sum.x,1.0f)); write_imagef(outputImage, coords, sum); } } ); // This example shows how to read two images or use camera // with OpenCV, transfer the frames to the GPU, // and apply a convolution written in OpenCL int main(int argc, char *argv[]) { /////////////////////////////////////////////////////////////////////////// // setup the command line arguments po::options_description desc; desc.add_options() ("help", "show available options") ("camera", po::value()->default_value(-1), "if not default camera, specify a camera id") ("image", po::value(), "path to image file"); // Parse the command lines po::variables_map vm; po::store(po::parse_command_line(argc, argv, desc), vm); po::notify(vm); //check the command line arguments if(vm.count("help")) { std::cout << desc << std::endl; return 0; } /////////////////////////////////////////////////////////////////////////// //OpenCV variables cv::Mat cv_mat; cv::VideoCapture cap; //OpenCV camera handle. //Filter Variables float filter[9] = { -1.0, 0.0, 1.0, -2.0, 0.0, 2.0, -1.0, 0.0, 1.0, }; // The convolution filter is 3x3 int filterWidth = 3; //OpenCL variables // Get default device and setup context compute::device gpu = compute::system::default_device(); compute::context context(gpu); compute::command_queue queue(context, gpu); compute::buffer dev_filter(context, sizeof(filter), compute::memory_object::read_only | compute::memory_object::copy_host_ptr, filter); compute::program filter_program = compute::program::create_with_source(source, context); try { filter_program.build(); } catch(compute::opencl_error e) { std::cout<<"Build Error: "<(), CV_LOAD_IMAGE_COLOR); if(!cv_mat.data){ std::cerr << "Failed to load image" << std::endl; return -1; } } else //by default use camera { //open camera cap.open(vm["camera"].as()); // read first frame cap >> cv_mat; if(!cv_mat.data){ std::cerr << "failed to capture frame" << std::endl; return -1; } } // Convert image to BGRA (OpenCL requires 16-byte aligned data) cv::cvtColor(cv_mat, cv_mat, CV_BGR2BGRA); // Transfer image/frame data to gpu compute::image2d dev_input_image = compute::opencv_create_image2d_with_mat( cv_mat, compute::image2d::read_write, queue ); // Create output image // Be sure what will be your ouput image/frame size compute::image2d dev_output_image( context, dev_input_image.width(), dev_input_image.height(), dev_input_image.format(), compute::image2d::write_only ); filter_kernel.set_arg(0, dev_input_image); filter_kernel.set_arg(1, dev_output_image); filter_kernel.set_arg(2, dev_filter); filter_kernel.set_arg(3, filterWidth); // run flip kernel size_t origin[2] = { 0, 0 }; size_t region[2] = { dev_input_image.width(), dev_input_image.height() }; /////////////////////////////////////////////////////////////////////////// queue.enqueue_nd_range_kernel(filter_kernel, 2, origin, region, 0); //check for image paths if(vm.count("image")) { // show host image cv::imshow("Original Image", cv_mat); // show gpu image compute::opencv_imshow("Convoluted Image", dev_output_image, queue); // wait and return cv::waitKey(0); } else { char key = '\0'; while(key != 27) //check for escape key { cap >> cv_mat; // Convert image to BGRA (OpenCL requires 16-byte aligned data) cv::cvtColor(cv_mat, cv_mat, CV_BGR2BGRA); // Update the device image memory with current frame data compute::opencv_copy_mat_to_image(cv_mat, dev_input_image,queue); // Run the kernel on the device queue.enqueue_nd_range_kernel(filter_kernel, 2, origin, region, 0); // Show host image cv::imshow("Camera Frame", cv_mat); // Show GPU image compute::opencv_imshow("Convoluted Frame", dev_output_image, queue); // wait key = cv::waitKey(10); } } return 0; } compute-0.5/example/opencv_flip.cpp000066400000000000000000000064761263566244600175300ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include namespace compute = boost::compute; // this example shows how to read an image with OpenCV, transfer the // image to the GPU, and apply a simple flip filter written in OpenCL int main(int argc, char *argv[]) { // check command line if(argc < 2){ std::cerr << "usage: " << argv[0] << " FILENAME" << std::endl; return -1; } // read image with opencv cv::Mat cv_image = cv::imread(argv[1], CV_LOAD_IMAGE_COLOR); if(!cv_image.data){ std::cerr << "failed to load image" << std::endl; return -1; } // get default device and setup context compute::device gpu = compute::system::default_device(); compute::context context(gpu); compute::command_queue queue(context, gpu); // convert image to BGRA (OpenCL requires 16-byte aligned data) cv::cvtColor(cv_image, cv_image, CV_BGR2BGRA); // transfer image to gpu compute::image2d input_image = compute::opencv_create_image2d_with_mat( cv_image, compute::image2d::read_write, queue ); // create output image compute::image2d output_image( context, input_image.width(), input_image.height(), input_image.format(), compute::image2d::write_only ); // create flip program const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( __kernel void flip_kernel(__read_only image2d_t input, __write_only image2d_t output) { const sampler_t sampler = CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; int height = get_image_height(input); int2 input_coord = { get_global_id(0), get_global_id(1) }; int2 output_coord = { input_coord.x, height - input_coord.y - 1 }; float4 value = read_imagef(input, sampler, input_coord); write_imagef(output, output_coord, value); } ); compute::program flip_program = compute::program::create_with_source(source, context); flip_program.build(); // create flip kernel and set arguments compute::kernel flip_kernel(flip_program, "flip_kernel"); flip_kernel.set_arg(0, input_image); flip_kernel.set_arg(1, output_image); // run flip kernel size_t origin[2] = { 0, 0 }; size_t region[2] = { input_image.width(), input_image.height() }; queue.enqueue_nd_range_kernel(flip_kernel, 2, origin, region, 0); // show host image cv::imshow("opencv image", cv_image); // show gpu image compute::opencv_imshow("filtered image", output_image, queue); // wait and return cv::waitKey(0); return 0; } compute-0.5/example/opencv_histogram.cpp000066400000000000000000000174231263566244600205650ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Mageswaran.D // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// //Code sample for calculating histogram using OpenCL and //displaying image histogram in OpenCV. #define BOOST_COMPUTE_DEBUG_KERNEL_COMPILATION #include #include #include #include #include #include #include #include #include #include namespace compute = boost::compute; namespace po = boost::program_options; // number of bins int histSize = 256; // Set the ranges ( for B,G,R) ) // TryOut: consider the range in kernel calculation float range[] = { 0, 256 } ; const float* histRange = { range }; // Create naive histogram program // Needs "cl_khr_local_int32_base_atomics" extension const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE ( __kernel void histogram(read_only image2d_t src_image, __global int* b_hist, __global int* g_hist, __global int* r_hist) { sampler_t sampler =( CLK_NORMALIZED_COORDS_FALSE | CLK_FILTER_NEAREST | CLK_ADDRESS_CLAMP_TO_EDGE); int image_width = get_image_width(src_image); int image_height = get_image_height(src_image); int2 coords = (int2)(get_global_id(0), get_global_id(1)); float4 pixel = read_imagef(src_image,sampler, coords); //boundary condition if ((coords.x < image_width) && (coords.y < image_height)) { uchar indx_x, indx_y, indx_z; indx_x = convert_uchar_sat(pixel.x * 255.0f); indx_y = convert_uchar_sat(pixel.y * 255.0f); indx_z = convert_uchar_sat(pixel.z * 255.0f); atomic_inc(&b_hist[(uint)indx_z]); atomic_inc(&g_hist[(uint)indx_y]); atomic_inc(&r_hist[(uint)indx_x]); } } ); inline void showHistogramWindow(cv::Mat &b_hist, cv::Mat &g_hist, cv::Mat &r_hist, std::string window_name) { // Draw the histograms for B, G and R int hist_w = 1024; int hist_h = 768; int bin_w = cvRound((double)hist_w/histSize); cv::Mat histImage(hist_h, hist_w, CV_8UC3, cv::Scalar(0,0,0)); // Normalize the result to [ 0, histImage.rows ] cv::normalize(b_hist, b_hist, 0, histImage.rows, cv::NORM_MINMAX, -1, cv::Mat()); cv::normalize(g_hist, g_hist, 0, histImage.rows, cv::NORM_MINMAX, -1, cv::Mat()); cv::normalize(r_hist, r_hist, 0, histImage.rows, cv::NORM_MINMAX, -1, cv::Mat()); // Draw for each channel for (int i = 1; i < histSize; i++ ) { cv::line(histImage, cv::Point(bin_w*(i-1), hist_h - cvRound(b_hist.at(i-1))), cv::Point(bin_w*(i), hist_h - cvRound(b_hist.at(i))), cv::Scalar(255, 0, 0), 2, 8, 0); cv::line(histImage, cv::Point(bin_w*(i-1), hist_h - cvRound(g_hist.at(i-1))), cv::Point(bin_w*(i), hist_h - cvRound(g_hist.at(i))), cv::Scalar(0, 255, 0), 2, 8, 0); cv::line(histImage, cv::Point( bin_w*(i-1), hist_h - cvRound(r_hist.at(i-1))), cv::Point( bin_w*(i), hist_h - cvRound(r_hist.at(i)) ), cv::Scalar( 0, 0, 255), 2, 8, 0); } // Display cv::namedWindow(window_name, CV_WINDOW_AUTOSIZE ); cv::imshow(window_name, histImage ); } //Get the device context //Create GPU array/vector //Copy the image & set up the kernel //Execute the kernel //Copy GPU data back to CPU cv::Mat data pointer //OpenCV conversion for convienient display void calculateHistogramUsingCL(cv::Mat src, compute::command_queue &queue) { compute::context context = queue.get_context(); // Convert image to BGRA (OpenCL requires 16-byte aligned data) cv::cvtColor(src, src, CV_BGR2BGRA); //3 channels & 256 bins : alpha channel is ignored compute::vector gpu_b_hist(histSize, context); compute::vector gpu_g_hist(histSize, context); compute::vector gpu_r_hist(histSize, context); // Transfer image to gpu compute::image2d gpu_src = compute::opencv_create_image2d_with_mat( src, compute::image2d::read_only, queue ); compute::program histogram_program = compute::program::create_with_source(source, context); histogram_program.build(); // create histogram kernel and set arguments compute::kernel histogram_kernel(histogram_program, "histogram"); histogram_kernel.set_arg(0, gpu_src); histogram_kernel.set_arg(1, gpu_b_hist.get_buffer()); histogram_kernel.set_arg(2, gpu_g_hist.get_buffer()); histogram_kernel.set_arg(3, gpu_r_hist.get_buffer()); // run histogram kernel // each kernel thread updating red, green & blue bins size_t origin[2] = { 0, 0 }; size_t region[2] = { gpu_src.width(), gpu_src.height() }; queue.enqueue_nd_range_kernel(histogram_kernel, 2, origin, region, 0); //Make sure kernel get executed and data copied back queue.finish(); //create Mat and copy GPU bins to CPU memory cv::Mat b_hist(256, 1, CV_32SC1); compute::copy(gpu_b_hist.begin(), gpu_b_hist.end(), b_hist.data, queue); cv::Mat g_hist(256, 1, CV_32SC1); compute::copy(gpu_g_hist.begin(), gpu_g_hist.end(), g_hist.data, queue); cv::Mat r_hist(256, 1, CV_32SC1); compute::copy(gpu_r_hist.begin(), gpu_r_hist.end(), r_hist.data, queue); b_hist.convertTo(b_hist, CV_32FC1); //converted for displaying g_hist.convertTo(g_hist, CV_32FC1); r_hist.convertTo(r_hist, CV_32FC1); showHistogramWindow(b_hist, g_hist, r_hist, "Histogram"); } int main( int argc, char** argv ) { // Get default device and setup context compute::device gpu = compute::system::default_device(); compute::context context(gpu); compute::command_queue queue(context, gpu); cv::Mat src; // setup the command line arguments po::options_description desc; desc.add_options() ("help", "show available options") ("image", po::value(), "path to image file"); // Parse the command lines po::variables_map vm; po::store(po::parse_command_line(argc, argv, desc), vm); po::notify(vm); //check the command line arguments if(vm.count("help")) { std::cout << desc << std::endl; return 0; } //check for image paths if(vm.count("image")) { // Read image with OpenCV src = cv::imread(vm["image"].as(), CV_LOAD_IMAGE_COLOR); if(!src.data){ std::cerr << "Failed to load image" << std::endl; return -1; } calculateHistogramUsingCL(src, queue); cv::imshow("Image", src); cv::waitKey(0); } else { std::cout << desc << std::endl; return 0; } return 0; } compute-0.5/example/opencv_optical_flow.cpp000066400000000000000000000240171263566244600212470ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Mageswaran.D // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include #include #include namespace compute = boost::compute; namespace po = boost::program_options; // Create naive optical flow program const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE ( __kernel void optical_flow ( read_only image2d_t current_image, read_only image2d_t previous_image, write_only image2d_t optical_flow, const float scale, const float offset, const float lambda, const float threshold ) { sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE; int2 coords = (int2)(get_global_id(0), get_global_id(1)); float4 current_pixel = read_imagef(current_image, sampler, coords); float4 previous_pixel = read_imagef(previous_image, sampler, coords); int2 x1 = (int2)(offset, 0.f); int2 y1 = (int2)(0.f, offset); //get the difference float4 curdif = previous_pixel - current_pixel; //calculate the gradient //Image 2 first float4 gradx = read_imagef(previous_image, sampler, coords+x1) - read_imagef(previous_image, sampler, coords-x1); //Image 1 gradx += read_imagef(current_image, sampler, coords+x1) - read_imagef(current_image, sampler, coords-x1); //Image 2 first float4 grady = read_imagef(previous_image, sampler, coords+y1) - read_imagef(previous_image, sampler, coords-y1); //Image 1 grady += read_imagef(current_image, sampler, coords+y1) - read_imagef(current_image, sampler, coords-y1); float4 sqr = (gradx*gradx) + (grady*grady) + (float4)(lambda,lambda, lambda, lambda); float4 gradmag = sqrt(sqr); /////////////////////////////////////////////////// float4 vx = curdif * (gradx / gradmag); float vxd = vx.x;//assumes greyscale //format output for flowrepos, out(-x,+x,-y,+y) float2 xout = (float2)(fmax(vxd,0.f),fabs(fmin(vxd,0.f))); xout *= scale; /////////////////////////////////////////////////// float4 vy = curdif*(grady/gradmag); float vyd = vy.x;//assumes greyscale //format output for flowrepos, out(-x,+x,-y,+y) float2 yout = (float2)(fmax(vyd,0.f),fabs(fmin(vyd,0.f))); yout *= scale; /////////////////////////////////////////////////// float4 out = (float4)(xout, yout); float cond = (float)isgreaterequal(length(out), threshold); out *= cond; write_imagef(optical_flow, coords, out); } ); // This example shows how to read two images or use camera // with OpenCV, transfer the frames to the GPU, // and apply a naive optical flow algorithm // written in OpenCL int main(int argc, char *argv[]) { // setup the command line arguments po::options_description desc; desc.add_options() ("help", "show available options") ("camera", po::value()->default_value(-1), "if not default camera, specify a camera id") ("image1", po::value(), "path to image file 1") ("image2", po::value(), "path to image file 2"); // Parse the command lines po::variables_map vm; po::store(po::parse_command_line(argc, argv, desc), vm); po::notify(vm); //check the command line arguments if(vm.count("help")) { std::cout << desc << std::endl; return 0; } //OpenCV variables cv::Mat previous_cv_image; cv::Mat current_cv_image; cv::VideoCapture cap; //OpenCV camera handle //check for image paths if(vm.count("image1") && vm.count("image2")) { // Read image 1 with OpenCV previous_cv_image = cv::imread(vm["image1"].as(), CV_LOAD_IMAGE_COLOR); if(!previous_cv_image.data){ std::cerr << "Failed to load image" << std::endl; return -1; } // Read image 2 with opencv current_cv_image = cv::imread(vm["image2"].as(), CV_LOAD_IMAGE_COLOR); if(!current_cv_image.data){ std::cerr << "Failed to load image" << std::endl; return -1; } } else //by default use camera { //open camera cap.open(vm["camera"].as()); // read first frame cap >> previous_cv_image; if(!previous_cv_image.data){ std::cerr << "failed to capture frame" << std::endl; return -1; } // read second frame cap >> current_cv_image; if(!current_cv_image.data){ std::cerr << "failed to capture frame" << std::endl; return -1; } } // Get default device and setup context compute::device gpu = compute::system::default_device(); compute::context context(gpu); compute::command_queue queue(context, gpu); // Convert image to BGRA (OpenCL requires 16-byte aligned data) cv::cvtColor(previous_cv_image, previous_cv_image, CV_BGR2BGRA); cv::cvtColor(current_cv_image, current_cv_image, CV_BGR2BGRA); // Transfer image to gpu compute::image2d dev_previous_image = compute::opencv_create_image2d_with_mat( previous_cv_image, compute::image2d::read_write, queue ); // Transfer image to gpu compute::image2d dev_current_image = compute::opencv_create_image2d_with_mat( current_cv_image, compute::image2d::read_write, queue ); // Create output image compute::image2d dev_output_image( context, dev_previous_image.width(), dev_previous_image.height(), dev_previous_image.format(), compute::image2d::write_only ); compute::program optical_program = compute::program::create_with_source(source, context); optical_program.build(); // create flip kernel and set arguments compute::kernel optical_kernel(optical_program, "optical_flow"); float scale = 10; float offset = 1; float lambda = 0.0025; float threshold = 1.0; optical_kernel.set_arg(0, dev_previous_image); optical_kernel.set_arg(1, dev_current_image); optical_kernel.set_arg(2, dev_output_image); optical_kernel.set_arg(3, scale); optical_kernel.set_arg(4, offset); optical_kernel.set_arg(5, lambda); optical_kernel.set_arg(6, threshold); // run flip kernel size_t origin[2] = { 0, 0 }; size_t region[2] = { dev_previous_image.width(), dev_previous_image.height() }; queue.enqueue_nd_range_kernel(optical_kernel, 2, origin, region, 0); //check for image paths if(vm.count("image1") && vm.count("image2")) { // show host image cv::imshow("Previous Frame", previous_cv_image); cv::imshow("Current Frame", current_cv_image); // show gpu image compute::opencv_imshow("filtered image", dev_output_image, queue); // wait and return cv::waitKey(0); } else { char key = '\0'; while(key != 27) //check for escape key { cap >> current_cv_image; // Convert image to BGRA (OpenCL requires 16-byte aligned data) cv::cvtColor(current_cv_image, current_cv_image, CV_BGR2BGRA); // Update the device image memory with current frame data compute::opencv_copy_mat_to_image(previous_cv_image, dev_previous_image, queue); compute::opencv_copy_mat_to_image(current_cv_image, dev_current_image, queue); // Run the kernel on the device queue.enqueue_nd_range_kernel(optical_kernel, 2, origin, region, 0); // Show host image cv::imshow("Previous Frame", previous_cv_image); cv::imshow("Current Frame", current_cv_image); // Show GPU image compute::opencv_imshow("filtered image", dev_output_image, queue); // Copy current frame container to previous frame container current_cv_image.copyTo(previous_cv_image); // wait key = cv::waitKey(10); } } return 0; } compute-0.5/example/opencv_sobel_filter.cpp000066400000000000000000000205731263566244600212410ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Mageswaran.D // // Book Refered: OpenCL Programming Guide // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// //---------------------------------------------------------------------------// // About Sobel Filter: // * Edge Filter - distinguishes the differrent color region // * Finds the gradient in x and y-axes // * Three step process // -> Find x-axis gradient with kernel/matrix // Gx = [-1 0 +1] // [-2 0 +2] // [-1 0 +1] // -> Find y-axis gradient with kernel/matrix // Gy = [-1 -2 -1] // [ 0 0 0] // [+1 +2 +1] // * Gradient magnitude G = sqrt(Gx^2 + Gy^2) //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include #include #include namespace compute = boost::compute; namespace po = boost::program_options; // Create sobel filter program const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE ( //For out of boundary pixels, edge pixel // value is returned const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; kernel void sobel_rgb(read_only image2d_t src, write_only image2d_t dst) { int x = (int)get_global_id(0); int y = (int)get_global_id(1); if (x >= get_image_width(src) || y >= get_image_height(src)) return; // [(x-1, y+1), (x, y+1), (x+1, y+1)] // [(x-1, y ), (x, y ), (x+1, y )] // [(x-1, y-1), (x, y-1), (x+1, y-1)] // [p02, p12, p22] // [p01, pixel, p21] // [p00, p10, p20] //Basically finding influence of neighbour pixels on current pixel float4 p00 = read_imagef(src, sampler, (int2)(x - 1, y - 1)); float4 p10 = read_imagef(src, sampler, (int2)(x, y - 1)); float4 p20 = read_imagef(src, sampler, (int2)(x + 1, y - 1)); float4 p01 = read_imagef(src, sampler, (int2)(x - 1, y)); //pixel that we are working on float4 p21 = read_imagef(src, sampler, (int2)(x + 1, y)); float4 p02 = read_imagef(src, sampler, (int2)(x - 1, y + 1)); float4 p12 = read_imagef(src, sampler, (int2)(x, y + 1)); float4 p22 = read_imagef(src, sampler, (int2)(x + 1, y + 1)); //Find Gx = kernel + 3x3 around current pixel // Gx = [-1 0 +1] [p02, p12, p22] // [-2 0 +2] + [p01, pixel, p21] // [-1 0 +1] [p00, p10, p20] float3 gx = -p00.xyz + p20.xyz + 2.0f * (p21.xyz - p01.xyz) -p02.xyz + p22.xyz; //Find Gy = kernel + 3x3 around current pixel // Gy = [-1 -2 -1] [p02, p12, p22] // [ 0 0 0] + [p01, pixel, p21] // [+1 +2 +1] [p00, p10, p20] float3 gy = p00.xyz + p20.xyz + 2.0f * (- p12.xyz + p10.xyz) - p02.xyz - p22.xyz; //Find G float3 g = native_sqrt(gx * gx + gy * gy); // we could also approximate this as g = fabs(gx) + fabs(gy) write_imagef(dst, (int2)(x, y), (float4)(g.x, g.y, g.z, 1.0f)); } ); // This example shows how to apply sobel filter on images or on camera frames // with OpenCV, transfer the frames to the GPU, and apply a sobel filter // written in OpenCL int main(int argc, char *argv[]) { /////////////////////////////////////////////////////////////////////////// // setup the command line arguments po::options_description desc; desc.add_options() ("help", "show available options") ("camera", po::value()->default_value(-1), "if not default camera, specify a camera id") ("image", po::value(), "path to image file"); // Parse the command lines po::variables_map vm; po::store(po::parse_command_line(argc, argv, desc), vm); po::notify(vm); //check the command line arguments if(vm.count("help")) { std::cout << desc << std::endl; return 0; } /////////////////////////////////////////////////////////////////////////// //OpenCV variables cv::Mat cv_mat; cv::VideoCapture cap; //OpenCV camera handle. //OpenCL variables // Get default device and setup context compute::device gpu = compute::system::default_device(); compute::context context(gpu); compute::command_queue queue(context, gpu); compute::program filter_program = compute::program::create_with_source(source, context); try { filter_program.build(); } catch(compute::opencl_error e) { std::cout<<"Build Error: "<(), CV_LOAD_IMAGE_COLOR); if(!cv_mat.data){ std::cerr << "Failed to load image" << std::endl; return -1; } } else //by default use camera { //open camera cap.open(vm["camera"].as()); // read first frame cap >> cv_mat; if(!cv_mat.data){ std::cerr << "failed to capture frame" << std::endl; return -1; } } // Convert image to BGRA (OpenCL requires 16-byte aligned data) cv::cvtColor(cv_mat, cv_mat, CV_BGR2BGRA); // Transfer image/frame data to gpu compute::image2d dev_input_image = compute::opencv_create_image2d_with_mat( cv_mat, compute::image2d::read_write, queue ); // Create output image // Be sure what will be your ouput image/frame size compute::image2d dev_output_image( context, dev_input_image.width(), dev_input_image.height(), dev_input_image.format(), compute::image2d::write_only ); filter_kernel.set_arg(0, dev_input_image); filter_kernel.set_arg(1, dev_output_image); // run flip kernel size_t origin[2] = { 0, 0 }; size_t region[2] = { dev_input_image.width(), dev_input_image.height() }; /////////////////////////////////////////////////////////////////////////// queue.enqueue_nd_range_kernel(filter_kernel, 2, origin, region, 0); //check for image paths if(vm.count("image")) { // show host image cv::imshow("Original Image", cv_mat); // show gpu image compute::opencv_imshow("Filtered Image", dev_output_image, queue); // wait and return cv::waitKey(0); } else { char key = '\0'; while(key != 27) //check for escape key { cap >> cv_mat; // Convert image to BGRA (OpenCL requires 16-byte aligned data) cv::cvtColor(cv_mat, cv_mat, CV_BGR2BGRA); // Update the device image memory with current frame data compute::opencv_copy_mat_to_image(cv_mat, dev_input_image,queue); // Run the kernel on the device queue.enqueue_nd_range_kernel(filter_kernel, 2, origin, region, 0); // Show host image cv::imshow("Camera Frame", cv_mat); // Show GPU image compute::opencv_imshow("Filtered RGB Frame", dev_output_image, queue); // wait key = cv::waitKey(10); } } return 0; } compute-0.5/example/opengl_sphere.cpp000066400000000000000000000173321263566244600200470ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace compute = boost::compute; // tesselates a sphere with radius, phi_slices, and theta_slices. returns // a shared opencl/opengl buffer containing the vertex data. compute::opengl_buffer tesselate_sphere(float radius, size_t phi_slices, size_t theta_slices, compute::command_queue &queue) { using compute::dim; const compute::context &context = queue.get_context(); const size_t vertex_count = phi_slices * theta_slices; // create opengl buffer GLuint vbo; vtkgl::GenBuffersARB(1, &vbo); vtkgl::BindBufferARB(vtkgl::ARRAY_BUFFER, vbo); vtkgl::BufferDataARB(vtkgl::ARRAY_BUFFER, sizeof(float) * 4 * vertex_count, NULL, vtkgl::STREAM_DRAW); vtkgl::BindBufferARB(vtkgl::ARRAY_BUFFER, 0); // create shared opengl/opencl buffer compute::opengl_buffer vertex_buffer(context, vbo); // tesselate_sphere kernel source const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( __kernel void tesselate_sphere(float radius, uint phi_slices, uint theta_slices, __global float4 *vertex_buffer) { const uint phi_i = get_global_id(0); const uint theta_i = get_global_id(1); const float phi = phi_i * 2.f * M_PI_F / phi_slices; const float theta = theta_i * 2.f * M_PI_F / theta_slices; float4 v; v.x = radius * cos(theta) * cos(phi); v.y = radius * cos(theta) * sin(phi); v.z = radius * sin(theta); v.w = 1.f; vertex_buffer[phi_i*phi_slices+theta_i] = v; } ); // build tesselate_sphere program compute::program program = compute::program::create_with_source(source, context); program.build(); // setup tesselate_sphere kernel compute::kernel kernel(program, "tesselate_sphere"); kernel.set_arg(0, radius); kernel.set_arg(1, phi_slices); kernel.set_arg(2, theta_slices); kernel.set_arg(3, vertex_buffer); // acqurire buffer so that it is accessible to OpenCL compute::opengl_enqueue_acquire_buffer(vertex_buffer, queue); // execute tesselate_sphere kernel queue.enqueue_nd_range_kernel( kernel, dim(0, 0), dim(phi_slices, theta_slices), dim(1, 1) ); // release buffer so that it is accessible to OpenGL compute::opengl_enqueue_release_buffer(vertex_buffer, queue); return vertex_buffer; } // simple vtkMapper subclass to render the tesselated sphere on the gpu. class gpu_sphere_mapper : public vtkMapper { public: vtkTypeMacro(gpu_sphere_mapper, vtkMapper) static gpu_sphere_mapper* New() { return new gpu_sphere_mapper; } void Render(vtkRenderer *renderer, vtkActor *actor) { if(!m_initialized){ Initialize(renderer, actor); m_initialized = true; } if(!m_tesselated){ m_vertex_count = m_phi_slices * m_theta_slices; // tesselate sphere m_vertex_buffer = tesselate_sphere( m_radius, m_phi_slices, m_theta_slices, m_command_queue ); // ensure tesselation is finished (seems to be required on AMD) m_command_queue.finish(); // set tesselated flag to true m_tesselated = true; } // draw sphere glEnableClientState(GL_VERTEX_ARRAY); vtkgl::BindBufferARB(vtkgl::ARRAY_BUFFER, m_vertex_buffer.get_opengl_object()); glVertexPointer(4, GL_FLOAT, sizeof(float)*4, 0); glDrawArrays(GL_POINTS, 0, m_vertex_count); } void Initialize(vtkRenderer *renderer, vtkActor *actor) { // initialize opengl extensions vtkOpenGLExtensionManager *extensions = static_cast(renderer->GetRenderWindow()) ->GetExtensionManager(); extensions->LoadExtension("GL_ARB_vertex_buffer_object"); // initialize opencl/opengl shared context m_context = compute::opengl_create_shared_context(); compute::device device = m_context.get_device(); std::cout << "device: " << device.name() << std::endl; // create command queue for the gpu device m_command_queue = compute::command_queue(m_context, device); } double* GetBounds() { static double bounds[6]; bounds[0] = -m_radius; bounds[1] = m_radius; bounds[2] = -m_radius; bounds[3] = m_radius; bounds[4] = -m_radius; bounds[5] = m_radius; return bounds; } protected: gpu_sphere_mapper() { m_radius = 5.0f; m_phi_slices = 100; m_theta_slices = 100; m_initialized = false; m_tesselated = false; } private: float m_radius; int m_phi_slices; int m_theta_slices; int m_vertex_count; bool m_initialized; bool m_tesselated; compute::context m_context; compute::command_queue m_command_queue; compute::opengl_buffer m_vertex_buffer; }; int main(int argc, char *argv[]) { // create gpu sphere mapper vtkSmartPointer mapper = vtkSmartPointer::New(); // create actor for gpu sphere mapper vtkSmartPointer actor = vtkSmartPointer::New(); actor->GetProperty()->LightingOff(); actor->GetProperty()->SetInterpolationToFlat(); actor->SetMapper(mapper); // create render window vtkSmartPointer renderer = vtkSmartPointer::New(); renderer->SetBackground(.1, .2, .31); vtkSmartPointer renderWindow = vtkSmartPointer::New(); renderWindow->SetSize(800, 600); renderWindow->AddRenderer(renderer); vtkSmartPointer renderWindowInteractor = vtkSmartPointer::New(); vtkInteractorStyleSwitch *interactorStyle = vtkInteractorStyleSwitch::SafeDownCast( renderWindowInteractor->GetInteractorStyle() ); interactorStyle->SetCurrentStyleToTrackballCamera(); renderWindowInteractor->SetRenderWindow(renderWindow); renderer->AddActor(actor); // render renderer->ResetCamera(); vtkCamera *camera = renderer->GetActiveCamera(); camera->Elevation(-90.0); renderWindowInteractor->Initialize(); renderWindow->Render(); renderWindowInteractor->Start(); return 0; } compute-0.5/example/point_centroid.cpp000066400000000000000000000036711263566244600202360ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// //[point_centroid_example #include #include #include #include #include namespace compute = boost::compute; // the point centroid example calculates and displays the // centroid of a set of 3D points stored as float4's int main() { using compute::float4_; // get default device and setup context compute::device device = compute::system::default_device(); compute::context context(device); compute::command_queue queue(context, device); // point coordinates float points[] = { 1.0f, 2.0f, 3.0f, 0.0f, -2.0f, -3.0f, 4.0f, 0.0f, 1.0f, -2.0f, 2.5f, 0.0f, -7.0f, -3.0f, -2.0f, 0.0f, 3.0f, 4.0f, -5.0f, 0.0f }; // create vector for five points compute::vector vector(5, context); // copy point data to the device compute::copy( reinterpret_cast(points), reinterpret_cast(points) + 5, vector.begin(), queue ); // calculate sum float4_ sum = compute::accumulate( vector.begin(), vector.end(), float4_(0, 0, 0, 0), queue ); // calculate centroid float4_ centroid; for(size_t i = 0; i < 3; i++){ centroid[i] = sum[i] / 5.0f; } // print centroid std::cout << "centroid: " << centroid << std::endl; return 0; } //] compute-0.5/example/price_cross.cpp000066400000000000000000000055761263566244600175370ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include namespace compute = boost::compute; // this example shows how to use the find_if() algorithm to detect the // point at which two vectors of prices (such as stock prices) cross. int main() { // get default device and setup context compute::device gpu = compute::system::default_device(); compute::context context(gpu); compute::command_queue queue(context, gpu); // prices #1 (from 10.0 to 11.0) std::vector prices1; for(float i = 10.0; i <= 11.0; i += 0.1){ prices1.push_back(i); } // prices #2 (from 11.0 to 10.0) std::vector prices2; for(float i = 11.0; i >= 10.0; i -= 0.1){ prices2.push_back(i); } // create gpu vectors compute::vector gpu_prices1(prices1.size(), context); compute::vector gpu_prices2(prices2.size(), context); // copy prices to gpu compute::copy(prices1.begin(), prices1.end(), gpu_prices1.begin(), queue); compute::copy(prices2.begin(), prices2.end(), gpu_prices2.begin(), queue); // function returning true if the second price is less than the first price BOOST_COMPUTE_FUNCTION(bool, check_price_cross, (boost::tuple prices), { // first price const float first = boost_tuple_get(prices, 0); // second price const float second = boost_tuple_get(prices, 1); // return true if second price is less than first return second < first; }); // find cross point (should be 10.5) compute::vector::iterator iter = boost::get<0>( compute::find_if( compute::make_zip_iterator( boost::make_tuple(gpu_prices1.begin(), gpu_prices2.begin()) ), compute::make_zip_iterator( boost::make_tuple(gpu_prices1.end(), gpu_prices2.end()) ), check_price_cross, queue ).get_iterator_tuple() ); // print out result int index = std::distance(gpu_prices1.begin(), iter); std::cout << "price cross at index: " << index << std::endl; float value; compute::copy_n(iter, 1, &value, queue); std::cout << "value: " << value << std::endl; return 0; } compute-0.5/example/print_vector.cpp000066400000000000000000000026361263566244600177340ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include namespace compute = boost::compute; // this example demonstrates how to print the values in a vector int main() { // get default device and setup context compute::device gpu = compute::system::default_device(); compute::context context(gpu); compute::command_queue queue(context, gpu); std::cout << "device: " << gpu.name() << std::endl; // create vector on the device and fill with the sequence 1..10 compute::vector vector(10, context); compute::iota(vector.begin(), vector.end(), 1, queue); //[print_vector_example std::cout << "vector: [ "; boost::compute::copy( vector.begin(), vector.end(), std::ostream_iterator(std::cout, ", "), queue ); std::cout << "]" << std::endl; //] return 0; } compute-0.5/example/qimage_blur.cpp000066400000000000000000000112371263566244600175020ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #if QT_VERSION >= 0x050000 #include #else #include #endif #ifndef Q_MOC_RUN #include #include #include #include #include #endif // Q_MOC_RUN namespace compute = boost::compute; inline void box_filter_image(const compute::image2d &input, compute::image2d &output, compute::uint_ box_height, compute::uint_ box_width, compute::command_queue &queue) { using compute::dim; const compute::context &context = queue.get_context(); // simple box filter kernel source const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( __kernel void box_filter(__read_only image2d_t input, __write_only image2d_t output, uint box_height, uint box_width) { int x = get_global_id(0); int y = get_global_id(1); int h = get_image_height(input); int w = get_image_width(input); int k = box_width; int l = box_height; if(x < k/2 || y < l/2 || x >= w-(k/2) || y >= h-(l/2)){ write_imagef(output, (int2)(x, y), (float4)(0, 0, 0, 1)); } else { const sampler_t sampler = CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; float4 sum = { 0, 0, 0, 0 }; for(int i = 0; i < k; i++){ for(int j = 0; j < l; j++){ sum += read_imagef(input, sampler, (int2)(x+i-k, y+j-l)); } } sum /= (float) k * l; float4 value = (float4)( sum.x, sum.y, sum.z, 1.f ); write_imagef(output, (int2)(x, y), value); } } ); // build box filter program compute::program program = compute::program::create_with_source(source, context); program.build(); // setup box filter kernel compute::kernel kernel(program, "box_filter"); kernel.set_arg(0, input); kernel.set_arg(1, output); kernel.set_arg(2, box_height); kernel.set_arg(3, box_width); // execute the box filter kernel queue.enqueue_nd_range_kernel(kernel, dim(0, 0), input.size(), dim(1, 1)); } // this example shows how to load an image using Qt, apply a simple // box blur filter, and then display it in a Qt window. int main(int argc, char *argv[]) { QApplication app(argc, argv); // check command line if(argc < 2){ std::cout << "usage: qimage_blur [FILENAME]" << std::endl; return -1; } // load image using Qt QString fileName = argv[1]; QImage qimage(fileName); size_t height = qimage.height(); size_t width = qimage.width(); size_t bytes_per_line = qimage.bytesPerLine(); qDebug() << "height:" << height << "width:" << width << "bytes per line:" << bytes_per_line << "depth:" << qimage.depth() << "format:" << qimage.format(); // create compute context compute::device gpu = compute::system::default_device(); compute::context context(gpu); compute::command_queue queue(context, gpu); std::cout << "device: " << gpu.name() << std::endl; // get the opencl image format for the qimage compute::image_format format = compute::qt_qimage_format_to_image_format(qimage.format()); // create input and output images on the gpu compute::image2d input_image(context, width, height, format); compute::image2d output_image(context, width, height, format); // copy host qimage to gpu image compute::qt_copy_qimage_to_image2d(qimage, input_image, queue); // apply box filter box_filter_image(input_image, output_image, 7, 7, queue); // copy gpu blurred image from to host qimage compute::qt_copy_image2d_to_qimage(output_image, qimage, queue); // show image as a pixmap QLabel label; label.setPixmap(QPixmap::fromImage(qimage)); label.show(); return app.exec(); } compute-0.5/example/random_walk.cpp000066400000000000000000000117411263566244600175110ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include #include #include #include #include namespace compute = boost::compute; // this example uses the random-number generation functions in Boost.Compute // to calculate a large number of random "steps" and then plots the final // random "walk" in a 2D image on the GPU and displays it with OpenCV int main() { // number of random steps to take size_t steps = 250000; // height and width of image size_t height = 800; size_t width = 800; // get default device and setup context compute::device gpu = compute::system::default_device(); compute::context context(gpu); compute::command_queue queue(context, gpu); using compute::int2_; // calaculate random values for each step compute::vector random_values(steps, context); compute::default_random_engine random_engine(queue); compute::uniform_real_distribution random_distribution(0.f, 4.f); random_distribution.generate( random_values.begin(), random_values.end(), random_engine, queue ); // calaculate coordinates for each step compute::vector coordinates(steps, context); // function to convert random values to random directions (in 2D) BOOST_COMPUTE_FUNCTION(int2_, take_step, (const float x), { if(x < 1.f){ // move right return (int2)(1, 0); } if(x < 2.f){ // move up return (int2)(0, 1); } if(x < 3.f){ // move left return (int2)(-1, 0); } else { // move down return (int2)(0, -1); } }); // transform the random values into random steps compute::transform( random_values.begin(), random_values.end(), coordinates.begin(), take_step, queue ); // set staring position int2_ starting_position(width / 2, height / 2); compute::copy_n(&starting_position, 1, coordinates.begin(), queue); // scan steps to calculate position after each step compute::inclusive_scan( coordinates.begin(), coordinates.end(), coordinates.begin(), queue ); // create output image compute::image2d image( context, width, height, compute::image_format(CL_RGBA, CL_UNSIGNED_INT8) ); // program with two kernels, one to fill the image with white, and then // one the draw to points calculated in coordinates on the image const char draw_walk_source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( __kernel void draw_walk(__global const int2 *coordinates, __write_only image2d_t image) { const uint i = get_global_id(0); const int2 coord = coordinates[i]; if(coord.x > 0 && coord.x < get_image_width(image) && coord.y > 0 && coord.y < get_image_height(image)){ uint4 black = { 0, 0, 0, 0 }; write_imageui(image, coord, black); } } __kernel void fill_white(__write_only image2d_t image) { const int2 coord = { get_global_id(0), get_global_id(1) }; if(coord.x < get_image_width(image) && coord.y < get_image_height(image)){ uint4 white = { 255, 255, 255, 255 }; write_imageui(image, coord, white); } } ); // build the program compute::program draw_program = compute::program::build_with_source(draw_walk_source, context); // fill image with white compute::kernel fill_kernel(draw_program, "fill_white"); fill_kernel.set_arg(0, image); const size_t offset[] = { 0, 0 }; const size_t bounds[] = { width, height }; queue.enqueue_nd_range_kernel(fill_kernel, 2, offset, bounds, 0); // draw random walk compute::kernel draw_kernel(draw_program, "draw_walk"); draw_kernel.set_arg(0, coordinates); draw_kernel.set_arg(1, image); queue.enqueue_1d_range_kernel(draw_kernel, 0, coordinates.size(), 0); // show image compute::opencv_imshow("random walk", image, queue); // wait and return cv::waitKey(0); return 0; } compute-0.5/example/resize_image.cpp000066400000000000000000000162411263566244600176560ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #if QT_VERSION >= 0x050000 #include #else #include #endif #include #include #ifndef Q_MOC_RUN #include #include #include #include #include #include #include #include #include #endif // Q_MOC_RUN namespace compute = boost::compute; namespace po = boost::program_options; // opencl source code const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( __kernel void resize_image(__read_only image2d_t input, const sampler_t sampler, __write_only image2d_t output) { const uint x = get_global_id(0); const uint y = get_global_id(1); const float w = get_image_width(output); const float h = get_image_height(output); float2 coord = { ((float) x / w) * get_image_width(input), ((float) y / h) * get_image_height(input) }; float4 pixel = read_imagef(input, sampler, coord); write_imagef(output, (int2)(x, h - y - 1), pixel); }; ); class ImageWidget : public QGLWidget { Q_OBJECT public: ImageWidget(QString fileName, QWidget *parent = 0); ~ImageWidget(); void initializeGL(); void resizeGL(int width, int height); void paintGL(); private: QImage qt_image_; compute::context context_; compute::command_queue queue_; compute::program program_; compute::image2d image_; compute::image_sampler sampler_; GLuint gl_texture_; compute::opengl_texture cl_texture_; }; ImageWidget::ImageWidget(QString fileName, QWidget *parent) : QGLWidget(parent), qt_image_(fileName) { gl_texture_ = 0; } ImageWidget::~ImageWidget() { } void ImageWidget::initializeGL() { // setup opengl glDisable(GL_LIGHTING); // create the OpenGL/OpenCL shared context context_ = compute::opengl_create_shared_context(); // get gpu device compute::device gpu = context_.get_device(); std::cout << "device: " << gpu.name() << std::endl; // setup command queue queue_ = compute::command_queue(context_, gpu); // allocate image on the device compute::image_format format = compute::qt_qimage_format_to_image_format(qt_image_.format()); image_ = compute::image2d( context_, qt_image_.width(), qt_image_.height(), format, CL_MEM_READ_ONLY ); // transfer image to the device compute::qt_copy_qimage_to_image2d(qt_image_, image_, queue_); // setup image sampler (use CL_FILTER_NEAREST to disable linear interpolation) sampler_ = compute::image_sampler( context_, false, CL_ADDRESS_NONE, CL_FILTER_LINEAR ); // build resize program program_ = compute::program::build_with_source(source, context_); } void ImageWidget::resizeGL(int width, int height) { #if QT_VERSION >= 0x050000 // scale height/width based on device pixel ratio width /= windowHandle()->devicePixelRatio(); height /= windowHandle()->devicePixelRatio(); #endif // resize viewport glViewport(0, 0, width, height); // delete old texture if(gl_texture_){ glDeleteTextures(1, &gl_texture_); gl_texture_ = 0; } // generate new texture glGenTextures(1, &gl_texture_); glBindTexture(GL_TEXTURE_2D, gl_texture_); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0 ); // create opencl object for the texture cl_texture_ = compute::opengl_texture( context_, GL_TEXTURE_2D, 0, gl_texture_, CL_MEM_WRITE_ONLY ); } void ImageWidget::paintGL() { float w = width(); float h = height(); glMatrixMode(GL_PROJECTION); glLoadIdentity(); glOrtho(0.0, w, 0.0, h, -1.0, 1.0); glMatrixMode(GL_MODELVIEW); glLoadIdentity(); // setup the resize kernel compute::kernel kernel(program_, "resize_image"); kernel.set_arg(0, image_); kernel.set_arg(1, sampler_); kernel.set_arg(2, cl_texture_); // acquire the opengl texture so it can be used in opencl compute::opengl_enqueue_acquire_gl_objects(1, &cl_texture_.get(), queue_); // execute the resize kernel const size_t global_work_offset[] = { 0, 0 }; const size_t global_work_size[] = { size_t(width()), size_t(height()) }; queue_.enqueue_nd_range_kernel( kernel, 2, global_work_offset, global_work_size, 0 ); // release the opengl texture so it can be used by opengl compute::opengl_enqueue_release_gl_objects(1, &cl_texture_.get(), queue_); // ensure opencl is finished before rendering in opengl queue_.finish(); // draw a single quad with the resized image texture glEnable(GL_TEXTURE_2D); glBindTexture(GL_TEXTURE_2D, gl_texture_); glBegin(GL_QUADS); glTexCoord2f(0, 0); glVertex2f(0, 0); glTexCoord2f(0, 1); glVertex2f(0, h); glTexCoord2f(1, 1); glVertex2f(w, h); glTexCoord2f(1, 0); glVertex2f(w, 0); glEnd(); } // the resize image example demonstrates how to interactively resize a // 2D image and display it using OpenGL. a image sampler is used to perform // hardware-accelerated linear interpolation for the resized image. int main(int argc, char *argv[]) { // setup command line arguments po::options_description options("options"); options.add_options() ("help", "show usage instructions") ("file", po::value(), "image file name (e.g. /path/to/image.png)") ; po::positional_options_description positional_options; positional_options.add("file", 1); // parse command line po::variables_map vm; po::store( po::command_line_parser(argc, argv) .options(options) .positional(positional_options) .run(), vm ); po::notify(vm); // check for file argument if(vm.count("help") || !vm.count("file")){ std::cout << options << std::endl; return -1; } // get file name std::string file_name = vm["file"].as(); // setup qt application QApplication app(argc, argv); // setup image widget ImageWidget widget(QString::fromStdString(file_name)); widget.show(); // run qt application return app.exec(); } #include "resize_image.moc" compute-0.5/example/simple_kernel.cpp000066400000000000000000000051131263566244600200400ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include namespace compute = boost::compute; // this example demonstrates how to use the Boost.Compute classes to // setup and run a simple vector addition kernel on the GPU int main() { // get the default device compute::device device = compute::system::default_device(); // create a context for the device compute::context context(device); // setup input arrays float a[] = { 1, 2, 3, 4 }; float b[] = { 5, 6, 7, 8 }; // make space for the output float c[] = { 0, 0, 0, 0 }; // create memory buffers for the input and output compute::buffer buffer_a(context, 4 * sizeof(float)); compute::buffer buffer_b(context, 4 * sizeof(float)); compute::buffer buffer_c(context, 4 * sizeof(float)); // source code for the add kernel const char source[] = "__kernel void add(__global const float *a," " __global const float *b," " __global float *c)" "{" " const uint i = get_global_id(0);" " c[i] = a[i] + b[i];" "}"; // create the program with the source compute::program program = compute::program::create_with_source(source, context); // compile the program program.build(); // create the kernel compute::kernel kernel(program, "add"); // set the kernel arguments kernel.set_arg(0, buffer_a); kernel.set_arg(1, buffer_b); kernel.set_arg(2, buffer_c); // create a command queue compute::command_queue queue(context, device); // write the data from 'a' and 'b' to the device queue.enqueue_write_buffer(buffer_a, 0, 4 * sizeof(float), a); queue.enqueue_write_buffer(buffer_b, 0, 4 * sizeof(float), b); // run the add kernel queue.enqueue_1d_range_kernel(kernel, 0, 4, 0); // transfer results back to the host array 'c' queue.enqueue_read_buffer(buffer_c, 0, 4 * sizeof(float), c); // print out results in 'c' std::cout << "c: [" << c[0] << ", " << c[1] << ", " << c[2] << ", " << c[3] << "]" << std::endl; return 0; } compute-0.5/example/simple_moving_average.cpp000066400000000000000000000103601263566244600215510ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Benoit Dequidt // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include namespace compute = boost::compute; /// warning precision is not precise due /// to the float error accumulation when size is large enough /// for more precision use double /// or a kahan sum else results can diverge /// from the CPU implementation compute::program make_sma_program(const compute::context& context) { const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( __kernel void SMA(__global const float *scannedValues, int size, __global float *output, int wSize) { const int gid = get_global_id(0); float cumValues = 0.f; int endIdx = gid + wSize/2; int startIdx = gid -1 - wSize/2; if(endIdx > size -1) endIdx = size -1; cumValues += scannedValues[endIdx]; if(startIdx < 0) startIdx = -1; else cumValues -= scannedValues[startIdx]; output[gid] =(float)( cumValues / ( float )(endIdx - startIdx)); } ); // create sma program return compute::program::build_with_source(source,context); } bool check_results(const std::vector& values, const std::vector& smoothValues, unsigned int wSize) { int size = values.size(); if(size != (int)smoothValues.size()) return false; int semiWidth = wSize/2; bool res = true; for(int idx = 0 ; idx < size ; ++idx) { int start = (std::max)(idx - semiWidth,0); int end = (std::min)(idx + semiWidth,size-1); float res = 0; for(int j = start ; j <= end ; ++j) { res+= values[j]; } res /= float(end - start +1); if(std::abs(res-smoothValues[idx]) > 1e-3) { std::cout << "idx = " << idx << " -- expected = " << res << " -- result = " << smoothValues[idx] << std::endl; res = false; } } return res; } // generate a uniform law over [0,10] float myRand() { static const double divisor = double(RAND_MAX)+1.; return double(rand())/divisor * 10.; } int main() { unsigned int size = 1024; // wSize must be odd unsigned int wSize = 21; // get the default device compute::device device = compute::system::default_device(); // create a context for the device compute::context context(device); // get the program compute::program program = make_sma_program(context); // create vector of random numbers on the host std::vector host_vector(size); std::vector host_result(size); std::generate(host_vector.begin(), host_vector.end(), myRand); compute::vector a(size,context); compute::vector b(size,context); compute::vector c(size,context); compute::command_queue queue(context, device); compute::copy(host_vector.begin(),host_vector.end(),a.begin(),queue); // scan values compute::inclusive_scan(a.begin(),a.end(),b.begin(),queue); // sma kernel compute::kernel kernel(program, "SMA"); kernel.set_arg(0,b.get_buffer()); kernel.set_arg(1,(int)b.size()); kernel.set_arg(2,c.get_buffer()); kernel.set_arg(3,(int)wSize); using compute::uint_; uint_ tpb = 128; uint_ workSize = size; queue.enqueue_1d_range_kernel(kernel,0,workSize,tpb); compute::copy(c.begin(),c.end(),host_result.begin(),queue); bool res = check_results(host_vector,host_result,wSize); std::string status = res ? "results are equivalent" : "GPU results differs from CPU one's"; std::cout << status << std::endl; return 0; } compute-0.5/example/sort_vector.cpp000066400000000000000000000036101263566244600175600ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include namespace compute = boost::compute; int rand_int() { return rand() % 100; } // this example demonstrates how to sort a vector of ints on the GPU int main() { // create vector of random values on the host std::vector host_vector(10); std::generate(host_vector.begin(), host_vector.end(), rand_int); // print out input vector std::cout << "input: [ "; for(size_t i = 0; i < host_vector.size(); i++){ std::cout << host_vector[i]; if(i != host_vector.size() - 1){ std::cout << ", "; } } std::cout << " ]" << std::endl; // transfer the values to the device compute::vector device_vector = host_vector; // sort the values on the device compute::sort(device_vector.begin(), device_vector.end()); // transfer the values back to the host compute::copy(device_vector.begin(), device_vector.end(), host_vector.begin()); // print out the sorted vector std::cout << "output: [ "; for(size_t i = 0; i < host_vector.size(); i++){ std::cout << host_vector[i]; if(i != host_vector.size() - 1){ std::cout << ", "; } } std::cout << " ]" << std::endl; return 0; } compute-0.5/example/threefry_engine.cpp000066400000000000000000000027611263566244600203720ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Muhammad Junaid Muzammil // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://kylelutz.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include int main() { using boost::compute::uint_; boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); boost::compute::threefry_engine<> rng(queue); boost::compute::vector vector_ctr(20, context); uint32_t ctr[20]; for(int i = 0; i < 10; i++) { ctr[i*2] = i; ctr[i*2+1] = 0; } boost::compute::copy(ctr, ctr+20, vector_ctr.begin(), queue); rng.generate(vector_ctr.begin(), vector_ctr.end(), queue); boost::compute::copy(vector_ctr.begin(), vector_ctr.end(), ctr, queue); for(int i = 0; i < 10; i++) { std::cout << std::hex << ctr[i*2] << " " << ctr[i*2+1] << std::endl; } return 0; } compute-0.5/example/time_copy.cpp000066400000000000000000000035471263566244600172100ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// //[time_copy_example #include #include #include #include #include #include #include #include namespace compute = boost::compute; int main() { // get the default device compute::device gpu = compute::system::default_device(); // create context for default device compute::context context(gpu); // create command queue with profiling enabled compute::command_queue queue( context, gpu, compute::command_queue::enable_profiling ); // generate random data on the host std::vector host_vector(16000000); std::generate(host_vector.begin(), host_vector.end(), rand); // create a vector on the device compute::vector device_vector(host_vector.size(), context); // copy data from the host to the device compute::future future = compute::copy_async( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); // wait for copy to finish future.wait(); // get elapsed time from event profiling information boost::chrono::milliseconds duration = future.get_event().duration(); // print elapsed time in milliseconds std::cout << "time: " << duration.count() << " ms" << std::endl; return 0; } //] compute-0.5/example/transform_sqrt.cpp000066400000000000000000000032411263566244600202730ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// //[transform_sqrt_example #include #include #include #include #include namespace compute = boost::compute; int main() { // get default device and setup context compute::device device = compute::system::default_device(); compute::context context(device); compute::command_queue queue(context, device); // generate random data on the host std::vector host_vector(10000); std::generate(host_vector.begin(), host_vector.end(), rand); // create a vector on the device compute::vector device_vector(host_vector.size(), context); // transfer data from the host to the device compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); // calculate the square-root of each element in-place compute::transform( device_vector.begin(), device_vector.end(), device_vector.begin(), compute::sqrt(), queue ); // copy values back to the host compute::copy( device_vector.begin(), device_vector.end(), host_vector.begin(), queue ); return 0; } //] compute-0.5/example/vector_addition.cpp000066400000000000000000000033171263566244600203700ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include namespace compute = boost::compute; // this example demonstrates how to use Boost.Compute's STL // implementation to add two vectors on the GPU int main() { // setup input arrays float a[] = { 1, 2, 3, 4 }; float b[] = { 5, 6, 7, 8 }; // make space for the output float c[] = { 0, 0, 0, 0 }; // create vectors and transfer data for the input arrays 'a' and 'b' compute::vector vector_a(a, a + 4); compute::vector vector_b(b, b + 4); // create vector for the output array compute::vector vector_c(4); // add the vectors together compute::transform( vector_a.begin(), vector_a.end(), vector_b.begin(), vector_c.begin(), compute::plus() ); // transfer results back to the host array 'c' compute::copy(vector_c.begin(), vector_c.end(), c); // print out results in 'c' std::cout << "c: [" << c[0] << ", " << c[1] << ", " << c[2] << ", " << c[3] << "]" << std::endl; return 0; } compute-0.5/include/000077500000000000000000000000001263566244600144735ustar00rootroot00000000000000compute-0.5/include/boost/000077500000000000000000000000001263566244600156215ustar00rootroot00000000000000compute-0.5/include/boost/compute.hpp000066400000000000000000000027631263566244600200160ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_HPP #define BOOST_COMPUTE_HPP #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef BOOST_COMPUTE_HAVE_HDR_CL_EXT #include #endif #endif // BOOST_COMPUTE_HPP compute-0.5/include/boost/compute/000077500000000000000000000000001263566244600172755ustar00rootroot00000000000000compute-0.5/include/boost/compute/algorithm.hpp000066400000000000000000000103651263566244600220010ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_HPP #define BOOST_COMPUTE_ALGORITHM_HPP /// \file /// /// Meta-header to include all Boost.Compute algorithm headers. #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #endif // BOOST_COMPUTE_ALGORITHM_HPP compute-0.5/include/boost/compute/algorithm/000077500000000000000000000000001263566244600212635ustar00rootroot00000000000000compute-0.5/include/boost/compute/algorithm/accumulate.hpp000066400000000000000000000136021263566244600241210ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP #define BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template inline T generic_accumulate(InputIterator first, InputIterator last, T init, BinaryFunction function, command_queue &queue) { const context &context = queue.get_context(); size_t size = iterator_range_size(first, last); if(size == 0){ return init; } // accumulate on device array device_result(context); detail::serial_accumulate( first, last, device_result.begin(), init, function, queue ); // copy result to host T result; ::boost::compute::copy_n(device_result.begin(), 1, &result, queue); return result; } // returns true if we can use reduce() instead of accumulate() when // accumulate() this is true when the function is commutative (such as // addition of integers) and the initial value is the identity value // for the operation (zero for addition, one for multiplication). template inline bool can_accumulate_with_reduce(T init, F function) { (void) init; (void) function; return false; } /// \internal_ #define BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE(r, data, type) \ inline bool can_accumulate_with_reduce(type init, plus) \ { \ return init == type(0); \ } \ inline bool can_accumulate_with_reduce(type init, multiplies) \ { \ return init == type(1); \ } BOOST_PP_SEQ_FOR_EACH( BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE, _, (char_)(uchar_)(short_)(ushort_)(int_)(uint_)(long_)(ulong_) ) template inline bool can_accumulate_with_reduce(T init, min) { return init == (std::numeric_limits::max)(); } template inline bool can_accumulate_with_reduce(T init, max) { return init == (std::numeric_limits::min)(); } #undef BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE template inline T dispatch_accumulate(InputIterator first, InputIterator last, T init, BinaryFunction function, command_queue &queue) { size_t size = iterator_range_size(first, last); if(size == 0){ return init; } if(can_accumulate_with_reduce(init, function)){ T result; reduce(first, last, &result, function, queue); return result; } else { return generic_accumulate(first, last, init, function, queue); } } } // end detail namespace /// Returns the result of applying \p function to the elements in the /// range [\p first, \p last) and \p init. /// /// If no function is specified, \c plus will be used. /// /// \param first first element in the input range /// \param last last element in the input range /// \param init initial value /// \param function binary reduction function /// \param queue command queue to perform the operation /// /// \return the accumulated result value /// /// In specific situations the call to \c accumulate() can be automatically /// optimized to a call to the more efficient \c reduce() algorithm. This /// occurs when the binary reduction function is recognized as associative /// (such as the \c plus function). /// /// Note that because floating-point addition is not associative, calling /// \c accumulate() with \c plus results in a less efficient serial /// reduction algorithm being executed. If a slight loss in precision is /// acceptable, the more efficient parallel \c reduce() algorithm should be /// used instead. /// /// For example: /// \code /// // with vec = boost::compute::vector /// accumulate(vec.begin(), vec.end(), 0, plus()); // fast /// reduce(vec.begin(), vec.end(), &result, plus()); // fast /// /// // with vec = boost::compute::vector /// accumulate(vec.begin(), vec.end(), 0, plus()); // slow /// reduce(vec.begin(), vec.end(), &result, plus()); // fast /// \endcode /// /// \see reduce() template inline T accumulate(InputIterator first, InputIterator last, T init, BinaryFunction function, command_queue &queue = system::default_queue()) { return detail::dispatch_accumulate(first, last, init, function, queue); } /// \overload template inline T accumulate(InputIterator first, InputIterator last, T init, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type IT; return detail::dispatch_accumulate(first, last, init, plus(), queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP compute-0.5/include/boost/compute/algorithm/adjacent_difference.hpp000066400000000000000000000064261263566244600257270ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_ADJACENT_DIFFERENCE_HPP #define BOOST_COMPUTE_ALGORITHM_ADJACENT_DIFFERENCE_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { /// Stores the difference of each pair of consecutive values in the range /// [\p first, \p last) to the range beginning at \p result. If \p op is not /// provided, \c minus is used. /// /// \param first first element in the input range /// \param last last element in the input range /// \param result first element in the output range /// \param op binary difference function /// \param queue command queue to perform the operation /// /// \return \c OutputIterator to the end of the result range /// /// \see adjacent_find() template inline OutputIterator adjacent_difference(InputIterator first, InputIterator last, OutputIterator result, BinaryFunction op, command_queue &queue = system::default_queue()) { if(first == last){ return result; } size_t count = detail::iterator_range_size(first, last); detail::meta_kernel k("adjacent_difference"); k << "const uint i = get_global_id(0);\n" << "if(i == 0){\n" << " " << result[k.var("0")] << " = " << first[k.var("0")] << ";\n" << "}\n" << "else {\n" << " " << result[k.var("i")] << " = " << op(first[k.var("i")], first[k.var("i-1")]) << ";\n" << "}\n"; k.exec_1d(queue, 0, count, 1); return result + count; } /// \overload template inline OutputIterator adjacent_difference(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; if (first == result) { vector temp(detail::iterator_range_size(first, last), queue.get_context()); copy(first, last, temp.begin(), queue); return ::boost::compute::adjacent_difference( temp.begin(), temp.end(), result, ::boost::compute::minus(), queue ); } else { return ::boost::compute::adjacent_difference( first, last, result, ::boost::compute::minus(), queue ); } } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_ADJACENT_DIFFERENCE_HPP compute-0.5/include/boost/compute/algorithm/adjacent_find.hpp000066400000000000000000000116251263566244600245520ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_ADJACENT_FIND_HPP #define BOOST_COMPUTE_ALGORITHM_ADJACENT_FIND_HPP #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template inline InputIterator serial_adjacent_find(InputIterator first, InputIterator last, Compare compare, command_queue &queue) { if(first == last){ return last; } const context &context = queue.get_context(); detail::scalar output(context); detail::meta_kernel k("serial_adjacent_find"); size_t size_arg = k.add_arg("size"); size_t output_arg = k.add_arg(memory_object::global_memory, "output"); k << k.decl("result") << " = size;\n" << "for(uint i = 0; i < size - 1; i++){\n" << " if(" << compare(first[k.expr("i")], first[k.expr("i+1")]) << "){\n" << " result = i;\n" << " break;\n" << " }\n" << "}\n" << "*output = result;\n"; k.set_arg(size_arg, detail::iterator_range_size(first, last)); k.set_arg(output_arg, output.get_buffer()); k.exec_1d(queue, 0, 1, 1); return first + output.read(queue); } template inline InputIterator adjacent_find_with_atomics(InputIterator first, InputIterator last, Compare compare, command_queue &queue) { if(first == last){ return last; } const context &context = queue.get_context(); size_t count = detail::iterator_range_size(first, last); // initialize output to the last index detail::scalar output(context); output.write(static_cast(count), queue); detail::meta_kernel k("adjacent_find_with_atomics"); size_t output_arg = k.add_arg(memory_object::global_memory, "output"); k << "const uint i = get_global_id(0);\n" << "if(" << compare(first[k.expr("i")], first[k.expr("i+1")]) << "){\n" << " atomic_min(output, i);\n" << "}\n"; k.set_arg(output_arg, output.get_buffer()); k.exec_1d(queue, 0, count - 1, 1); return first + output.read(queue); } } // end detail namespace /// Searches the range [\p first, \p last) for two identical adjacent /// elements and returns an iterator pointing to the first. /// /// \param first first element in the range to search /// \param last last element in the range to search /// \param compare binary comparison function /// \param queue command queue to perform the operation /// /// \return \c InputIteratorm to the first element which compares equal /// to the following element. If none are equal, returns \c last. /// /// \see find(), adjacent_difference() template inline InputIterator adjacent_find(InputIterator first, InputIterator last, Compare compare, command_queue &queue = system::default_queue()) { size_t count = detail::iterator_range_size(first, last); if(count < 32){ return detail::serial_adjacent_find(first, last, compare, queue); } else { return detail::adjacent_find_with_atomics(first, last, compare, queue); } } /// \overload template inline InputIterator adjacent_find(InputIterator first, InputIterator last, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; using ::boost::compute::lambda::_1; using ::boost::compute::lambda::_2; using ::boost::compute::lambda::all; if(vector_size::value == 1){ return ::boost::compute::adjacent_find( first, last, _1 == _2, queue ); } else { return ::boost::compute::adjacent_find( first, last, all(_1 == _2), queue ); } } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_ADJACENT_FIND_HPP compute-0.5/include/boost/compute/algorithm/all_of.hpp000066400000000000000000000023141263566244600232300ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_ALL_OF_HPP #define BOOST_COMPUTE_ALGORITHM_ALL_OF_HPP #include #include namespace boost { namespace compute { /// Returns \c true if \p predicate returns \c true for all of the elements in /// the range [\p first, \p last). /// /// \see any_of(), none_of() template inline bool all_of(InputIterator first, InputIterator last, UnaryPredicate predicate, command_queue &queue = system::default_queue()) { return ::boost::compute::find_if_not(first, last, predicate, queue) == last; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_ALL_OF_HPP compute-0.5/include/boost/compute/algorithm/any_of.hpp000066400000000000000000000025011263566244600232450ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_ANY_OF_HPP #define BOOST_COMPUTE_ALGORITHM_ANY_OF_HPP #include #include namespace boost { namespace compute { /// Returns \c true if \p predicate returns \c true for any of the elements in /// the range [\p first, \p last). /// /// For example, to test if a vector contains any negative values: /// /// \snippet test/test_any_all_none_of.cpp any_of /// /// \see all_of(), none_of() template inline bool any_of(InputIterator first, InputIterator last, UnaryPredicate predicate, command_queue &queue = system::default_queue()) { return ::boost::compute::find_if(first, last, predicate, queue) != last; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_ANY_OF_HPP compute-0.5/include/boost/compute/algorithm/binary_search.hpp000066400000000000000000000024011263566244600246020ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_BINARY_SEARCH_HPP #define BOOST_COMPUTE_ALGORITHM_BINARY_SEARCH_HPP #include #include #include namespace boost { namespace compute { /// Returns \c true if \p value is in the sorted range [\p first, /// \p last). template inline bool binary_search(InputIterator first, InputIterator last, const T &value, command_queue &queue = system::default_queue()) { InputIterator position = lower_bound(first, last, value, queue); return position != last && position.read(queue) == value; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_BINARY_SEARCH_HPP compute-0.5/include/boost/compute/algorithm/copy.hpp000066400000000000000000000306001263566244600227450ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_COPY_HPP #define BOOST_COMPUTE_ALGORITHM_COPY_HPP #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { namespace mpl = boost::mpl; // meta-function returning true if copy() between InputIterator and // OutputIterator can be implemented with clEnqueueCopyBuffer(). template struct can_copy_with_copy_buffer : mpl::and_< boost::is_same< InputIterator, buffer_iterator >, boost::is_same< OutputIterator, buffer_iterator >, boost::is_same< typename InputIterator::value_type, typename OutputIterator::value_type > >::type {}; // host -> device template inline OutputIterator dispatch_copy(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, typename boost::enable_if_c< !is_device_iterator::value && is_device_iterator::value >::type* = 0) { if(is_contiguous_iterator::value){ return copy_to_device(first, last, result, queue); } else { // for non-contiguous input we first copy the values to // a temporary std::vector and then copy from there typedef typename std::iterator_traits::value_type T; std::vector vector(first, last); return copy_to_device(vector.begin(), vector.end(), result, queue); } } // host -> device (async) template inline future dispatch_copy_async(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, typename boost::enable_if_c< !is_device_iterator::value && is_device_iterator::value >::type* = 0) { BOOST_STATIC_ASSERT_MSG( is_contiguous_iterator::value, "copy_async() is only supported for contiguous host iterators" ); return copy_to_device_async(first, last, result, queue); } // device -> host template inline OutputIterator dispatch_copy(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, typename boost::enable_if_c< is_device_iterator::value && !is_device_iterator::value >::type* = 0) { if(is_contiguous_iterator::value){ return copy_to_host(first, last, result, queue); } else { // for non-contiguous input we first copy the values to // a temporary std::vector and then copy from there typedef typename std::iterator_traits::value_type T; std::vector vector(iterator_range_size(first, last)); copy_to_host(first, last, vector.begin(), queue); return std::copy(vector.begin(), vector.end(), result); } } // device -> host (async) template inline future dispatch_copy_async(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, typename boost::enable_if_c< is_device_iterator::value && !is_device_iterator::value >::type* = 0) { BOOST_STATIC_ASSERT_MSG( is_contiguous_iterator::value, "copy_async() is only supported for contiguous host iterators" ); return copy_to_host_async(first, last, result, queue); } // device -> device template inline OutputIterator dispatch_copy(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, typename boost::enable_if< mpl::and_< is_device_iterator, is_device_iterator, mpl::not_< can_copy_with_copy_buffer< InputIterator, OutputIterator > > > >::type* = 0) { return copy_on_device(first, last, result, queue); } // device -> device (specialization for buffer iterators) template inline OutputIterator dispatch_copy(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, typename boost::enable_if< mpl::and_< is_device_iterator, is_device_iterator, can_copy_with_copy_buffer< InputIterator, OutputIterator > > >::type* = 0) { typedef typename std::iterator_traits::value_type value_type; typedef typename std::iterator_traits::difference_type difference_type; difference_type n = std::distance(first, last); if(n < 1){ // nothing to copy return result; } queue.enqueue_copy_buffer(first.get_buffer(), result.get_buffer(), first.get_index() * sizeof(value_type), result.get_index() * sizeof(value_type), static_cast(n) * sizeof(value_type)); return result + n; } // device -> device (async) template inline future dispatch_copy_async(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, typename boost::enable_if< mpl::and_< is_device_iterator, is_device_iterator, mpl::not_< can_copy_with_copy_buffer< InputIterator, OutputIterator > > > >::type* = 0) { return copy_on_device_async(first, last, result, queue); } // device -> device (async, specialization for buffer iterators) template inline future dispatch_copy_async(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, typename boost::enable_if< mpl::and_< is_device_iterator, is_device_iterator, can_copy_with_copy_buffer< InputIterator, OutputIterator > > >::type* = 0) { typedef typename std::iterator_traits::value_type value_type; typedef typename std::iterator_traits::difference_type difference_type; difference_type n = std::distance(first, last); if(n < 1){ // nothing to copy return make_future(result, event()); } event event_ = queue.enqueue_copy_buffer( first.get_buffer(), result.get_buffer(), first.get_index() * sizeof(value_type), result.get_index() * sizeof(value_type), static_cast(n) * sizeof(value_type) ); return make_future(result + n, event_); } // host -> host template inline OutputIterator dispatch_copy(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, typename boost::enable_if_c< !is_device_iterator::value && !is_device_iterator::value >::type* = 0) { (void) queue; return std::copy(first, last, result); } } // end detail namespace /// Copies the values in the range [\p first, \p last) to the range /// beginning at \p result. /// /// The generic copy() function can be used for a variety of data /// transfer tasks and provides a standard interface to the following /// OpenCL functions: /// /// \li \c clEnqueueReadBuffer() /// \li \c clEnqueueWriteBuffer() /// \li \c clEnqueueCopyBuffer() /// /// Unlike the aforementioned OpenCL functions, copy() will also work /// with non-contiguous data-structures (e.g. \c std::list) as /// well as with "fancy" iterators (e.g. transform_iterator). /// /// \param first first element in the range to copy /// \param last last element in the range to copy /// \param result first element in the result range /// \param queue command queue to perform the operation /// /// \return \c OutputIterator to the end of the result range /// /// For example, to copy an array of \c int values on the host to a vector on /// the device: /// \code /// // array on the host /// int data[] = { 1, 2, 3, 4 }; /// /// // vector on the device /// boost::compute::vector vec(4, context); /// /// // copy values to the device vector /// boost::compute::copy(data, data + 4, vec.begin(), queue); /// \endcode /// /// The copy algorithm can also be used with standard containers such as /// \c std::vector: /// \code /// std::vector host_vector = ... /// boost::compute::vector device_vector = ... /// /// // copy from the host to the device /// boost::compute::copy( /// host_vector.begin(), host_vector.end(), device_vector.begin(), queue /// ); /// /// // copy from the device to the host /// boost::compute::copy( /// device_vector.begin(), device_vector.end(), host_vector.begin(), queue /// ); /// \endcode /// /// \see copy_n(), copy_if(), copy_async() template inline OutputIterator copy(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue = system::default_queue()) { return detail::dispatch_copy(first, last, result, queue); } /// Copies the values in the range [\p first, \p last) to the range /// beginning at \p result. The copy is performed asynchronously. /// /// \see copy() template inline future copy_async(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue = system::default_queue()) { return detail::dispatch_copy_async(first, last, result, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_COPY_HPP compute-0.5/include/boost/compute/algorithm/copy_if.hpp000066400000000000000000000041731263566244600234310ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_COPY_IF_HPP #define BOOST_COMPUTE_ALGORITHM_COPY_IF_HPP #include #include namespace boost { namespace compute { namespace detail { // like the copy_if() algorithm but writes the indices of the values for which // predicate returns true. template inline OutputIterator copy_index_if(InputIterator first, InputIterator last, OutputIterator result, Predicate predicate, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type T; return detail::transform_if_impl( first, last, result, identity(), predicate, true, queue ); } } // end detail namespace /// Copies each element in the range [\p first, \p last) for which /// \p predicate returns \c true to the range beginning at \p result. template inline OutputIterator copy_if(InputIterator first, InputIterator last, OutputIterator result, Predicate predicate, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type T; return ::boost::compute::transform_if( first, last, result, identity(), predicate, queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_COPY_IF_HPP compute-0.5/include/boost/compute/algorithm/copy_n.hpp000066400000000000000000000034021263566244600232620ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_COPY_N_HPP #define BOOST_COMPUTE_ALGORITHM_COPY_N_HPP #include #include #include namespace boost { namespace compute { /// Copies \p count elements from \p first to \p result. /// /// For example, to copy four values from the host to the device: /// \code /// // values on the host and vector on the device /// float values[4] = { 1.f, 2.f, 3.f, 4.f }; /// boost::compute::vector vec(4, context); /// /// // copy from the host to the device /// boost::compute::copy_n(values, 4, vec.begin(), queue); /// \endcode /// /// \see copy() template inline OutputIterator copy_n(InputIterator first, Size count, OutputIterator result, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::difference_type difference_type; return ::boost::compute::copy(first, first + static_cast(count), result, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_COPY_N_HPP compute-0.5/include/boost/compute/algorithm/count.hpp000066400000000000000000000034741263566244600231340ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_COUNT_HPP #define BOOST_COMPUTE_ALGORITHM_COUNT_HPP #include #include #include #include #include namespace boost { namespace compute { /// Returns the number of occurrences of \p value in the range /// [\p first, \p last). /// /// \see count_if() template inline size_t count(InputIterator first, InputIterator last, const T &value, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; using ::boost::compute::_1; using ::boost::compute::lambda::all; if(vector_size::value == 1){ return ::boost::compute::count_if(first, last, _1 == value, queue); } else { return ::boost::compute::count_if(first, last, all(_1 == value), queue); } } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_COUNT_HPP compute-0.5/include/boost/compute/algorithm/count_if.hpp000066400000000000000000000041311263566244600236010ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_COUNT_IF_HPP #define BOOST_COMPUTE_ALGORITHM_COUNT_IF_HPP #include #include #include #include #include #include #include #include namespace boost { namespace compute { /// Returns the number of elements in the range [\p first, \p last) /// for which \p predicate returns \c true. template inline size_t count_if(InputIterator first, InputIterator last, Predicate predicate, command_queue &queue = system::default_queue()) { const device &device = queue.get_device(); size_t input_size = detail::iterator_range_size(first, last); if(input_size == 0){ return 0; } if(device.type() & device::cpu){ if(input_size < 1024){ return detail::serial_count_if(first, last, predicate, queue); } else { return detail::count_if_with_threads(first, last, predicate, queue); } } else { if(input_size < 32){ return detail::serial_count_if(first, last, predicate, queue); } else { return detail::count_if_with_reduce(first, last, predicate, queue); } } } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_COUNT_IF_HPP compute-0.5/include/boost/compute/algorithm/detail/000077500000000000000000000000001263566244600225255ustar00rootroot00000000000000compute-0.5/include/boost/compute/algorithm/detail/balanced_path.hpp000066400000000000000000000141731263566244600260110ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_BALANCED_PATH_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_BALANCED_PATH_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { /// /// \brief Balanced Path kernel class /// /// Subclass of meta_kernel to break two sets into tiles according /// to their balanced path. /// class balanced_path_kernel : public meta_kernel { public: unsigned int tile_size; balanced_path_kernel() : meta_kernel("balanced_path") { tile_size = 4; } template void set_range(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator1 result_a, OutputIterator2 result_b, Compare comp) { typedef typename std::iterator_traits::value_type value_type; m_a_count = iterator_range_size(first1, last1); m_a_count_arg = add_arg("a_count"); m_b_count = iterator_range_size(first2, last2); m_b_count_arg = add_arg("b_count"); *this << "uint i = get_global_id(0);\n" << "uint target = (i+1)*" << tile_size << ";\n" << "uint start = max(convert_int(0),convert_int(target)-convert_int(b_count));\n" << "uint end = min(target,a_count);\n" << "uint a_index, b_index;\n" << "while(start("b_index")], first1[expr("a_index")]) << "))\n" << " start = a_index + 1;\n" << " else end = a_index;\n" << "}\n" << "a_index = start;\n" << "b_index = target - start;\n" << "if(b_index < b_count)\n" << "{\n" << " " << decl("x") << " = " << first2[expr("b_index")] << ";\n" << " uint a_start = 0, a_end = a_index, a_mid;\n" << " uint b_start = 0, b_end = b_index, b_mid;\n" << " while(a_start("a_mid")], expr("x")) << ")\n" << " a_start = a_mid+1;\n" << " else a_end = a_mid;\n" << " }\n" << " while(b_start("b_mid")], expr("x")) << ")\n" << " b_start = b_mid+1;\n" << " else b_end = b_mid;\n" << " }\n" << " uint a_run = a_index - a_start;\n" << " uint b_run = b_index - b_start;\n" << " uint x_count = a_run + b_run;\n" << " uint b_advance = max(x_count / 2, x_count - a_run);\n" << " b_end = min(b_count, b_start + b_advance + 1);\n" << " uint temp_start = b_index, temp_end = b_end, temp_mid;" << " while(temp_start < temp_end)\n" << " {\n" << " temp_mid = (temp_start + temp_end + 1)/2;\n" << " if(" << comp(expr("x"), first2[expr("temp_mid")]) << ")\n" << " temp_end = temp_mid-1;\n" << " else temp_start = temp_mid;\n" << " }\n" << " b_run = temp_start - b_start + 1;\n" << " b_advance = min(b_advance, b_run);\n" << " uint a_advance = x_count - b_advance;\n" << " uint star = convert_uint((a_advance == b_advance + 1) " << "&& (b_advance < b_run));\n" << " a_index = a_start + a_advance;\n" << " b_index = target - a_index + star;\n" << "}\n" << result_a[expr("i")] << " = a_index;\n" << result_b[expr("i")] << " = b_index;\n"; } template void set_range(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator1 result_a, OutputIterator2 result_b) { typedef typename std::iterator_traits::value_type value_type; ::boost::compute::less less_than; set_range(first1, last1, first2, last2, result_a, result_b, less_than); } event exec(command_queue &queue) { if((m_a_count + m_b_count)/tile_size == 0) { return event(); } set_arg(m_a_count_arg, uint_(m_a_count)); set_arg(m_b_count_arg, uint_(m_b_count)); return exec_1d(queue, 0, (m_a_count + m_b_count)/tile_size); } private: size_t m_a_count; size_t m_a_count_arg; size_t m_b_count; size_t m_b_count_arg; }; } //end detail namespace } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_BALANCED_PATH_HPP compute-0.5/include/boost/compute/algorithm/detail/binary_find.hpp000066400000000000000000000107761263566244600255350ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_BINARY_FIND_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_BINARY_FIND_HPP #include #include #include #include #include namespace boost { namespace compute { namespace detail{ /// /// \brief Binary find kernel class /// /// Subclass of meta_kernel to perform single step in binary find. /// template class binary_find_kernel : public meta_kernel { public: binary_find_kernel(InputIterator first, InputIterator last, UnaryPredicate predicate) : meta_kernel("binary_find") { typedef typename std::iterator_traits::value_type value_type; m_index_arg = add_arg(memory_object::global_memory, "index"); m_block_arg = add_arg("block"); atomic_min atomic_min_uint; *this << "uint i = get_global_id(0) * block;\n" << decl("value") << "=" << first[var("i")] << ";\n" << "if(" << predicate(var("value")) << ") {\n" << atomic_min_uint(var("index"), var("i")) << ";\n" << "}\n"; } size_t m_index_arg; size_t m_block_arg; }; /// /// \brief Binary find algorithm /// /// Finds the end of true values in the partitioned range [first, last). /// \return Iterator pointing to end of true values /// /// \param first Iterator pointing to start of range /// \param last Iterator pointing to end of range /// \param predicate Predicate according to which the range is partitioned /// \param queue Queue on which to execute /// template inline InputIterator binary_find(InputIterator first, InputIterator last, UnaryPredicate predicate, command_queue &queue = system::default_queue()) { const device &device = queue.get_device(); boost::shared_ptr parameters = detail::parameter_cache::get_global_cache(device); const std::string cache_key = "__boost_binary_find"; size_t find_if_limit = 128; size_t threads = parameters->get(cache_key, "tpb", 128); size_t count = iterator_range_size(first, last); InputIterator search_first = first; InputIterator search_last = last; scalar index(queue.get_context()); // construct and compile binary_find kernel binary_find_kernel binary_find_kernel(search_first, search_last, predicate); ::boost::compute::kernel kernel = binary_find_kernel.compile(queue.get_context()); // set buffer for index kernel.set_arg(binary_find_kernel.m_index_arg, index.get_buffer()); while(count > find_if_limit) { index.write(static_cast(count), queue); // set block and run binary_find kernel uint_ block = (count - 1)/(threads - 1); kernel.set_arg(binary_find_kernel.m_block_arg, block); queue.enqueue_1d_range_kernel(kernel, 0, threads, 0); size_t i = index.read(queue); if(i == count) { search_first = search_last - ((count - 1)%(threads - 1)); break; } else { search_last = search_first + i; search_first = search_last - ((count - 1)/(threads - 1)); } // Make sure that first and last stay within the input range search_last = (std::min)(search_last, last); search_last = (std::max)(search_last, first); search_first = (std::max)(search_first, first); search_first = (std::min)(search_first, last); count = iterator_range_size(search_first, search_last); } return find_if(search_first, search_last, predicate, queue); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_BINARY_FIND_HPP compute-0.5/include/boost/compute/algorithm/detail/compact.hpp000066400000000000000000000042141263566244600246650ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COMPACT_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_COMPACT_HPP #include #include #include #include #include namespace boost { namespace compute { namespace detail { /// /// \brief Compact kernel class /// /// Subclass of meta_kernel to compact the result of set kernels to /// get actual sets /// class compact_kernel : public meta_kernel { public: unsigned int tile_size; compact_kernel() : meta_kernel("compact") { tile_size = 4; } template void set_range(InputIterator1 start, InputIterator2 counts_begin, InputIterator2 counts_end, OutputIterator result) { m_count = iterator_range_size(counts_begin, counts_end) - 1; *this << "uint i = get_global_id(0);\n" << "uint count = i*" << tile_size << ";\n" << "for(uint j = " << counts_begin[expr("i")] << "; j<" << counts_begin[expr("i+1")] << "; j++, count++)\n" << "{\n" << result[expr("j")] << " = " << start[expr("count")] << ";\n" << "}\n"; } event exec(command_queue &queue) { if(m_count == 0) { return event(); } return exec_1d(queue, 0, m_count); } private: size_t m_count; }; } //end detail namespace } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COMPACT_HPP compute-0.5/include/boost/compute/algorithm/detail/copy_on_device.hpp000066400000000000000000000133151263566244600262260ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_ON_DEVICE_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_ON_DEVICE_HPP #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { inline size_t pick_copy_work_group_size(size_t n, const device &device) { (void) device; if(n % 32 == 0) return 32; else if(n % 16 == 0) return 16; else if(n % 8 == 0) return 8; else if(n % 4 == 0) return 4; else if(n % 2 == 0) return 2; else return 1; } template class copy_kernel : public meta_kernel { public: copy_kernel(const device &device) : meta_kernel("copy") { m_count = 0; typedef typename std::iterator_traits::value_type input_type; boost::shared_ptr parameters = detail::parameter_cache::get_global_cache(device); std::string cache_key = "__boost_copy_kernel_" + boost::lexical_cast(sizeof(input_type)); m_vpt = parameters->get(cache_key, "vpt", 4); m_tpb = parameters->get(cache_key, "tpb", 128); } void set_range(InputIterator first, InputIterator last, OutputIterator result) { m_count_arg = add_arg("count"); *this << "uint index = get_local_id(0) + " << "(" << m_vpt * m_tpb << " * get_group_id(0));\n" << "for(uint i = 0; i < " << m_vpt << "; i++){\n" << " if(index < count){\n" << result[expr("index")] << '=' << first[expr("index")] << ";\n" << " index += " << m_tpb << ";\n" " }\n" "}\n"; m_count = detail::iterator_range_size(first, last); } event exec(command_queue &queue) { if(m_count == 0){ // nothing to do return event(); } size_t global_work_size = calculate_work_size(m_count, m_vpt, m_tpb); set_arg(m_count_arg, uint_(m_count)); return exec_1d(queue, 0, global_work_size, m_tpb); } private: size_t m_count; size_t m_count_arg; uint_ m_vpt; uint_ m_tpb; }; template inline OutputIterator copy_on_device(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue) { const device &device = queue.get_device(); copy_kernel kernel(device); kernel.set_range(first, last, result); kernel.exec(queue); return result + std::distance(first, last); } template inline discard_iterator copy_on_device(InputIterator first, InputIterator last, discard_iterator result, command_queue &queue) { (void) queue; return result + std::distance(first, last); } template inline future copy_on_device_async(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue) { const device &device = queue.get_device(); copy_kernel kernel(device); kernel.set_range(first, last, result); event event_ = kernel.exec(queue); return make_future(result + std::distance(first, last), event_); } #ifdef CL_VERSION_2_0 // copy_on_device() specialization for svm_ptr template inline svm_ptr copy_on_device(svm_ptr first, svm_ptr last, svm_ptr result, command_queue &queue) { size_t count = iterator_range_size(first, last); if(count == 0){ return result; } queue.enqueue_svm_memcpy( result.get(), first.get(), count * sizeof(T) ); return result + count; } template inline future > copy_on_device_async(svm_ptr first, svm_ptr last, svm_ptr result, command_queue &queue) { size_t count = iterator_range_size(first, last); if(count == 0){ return result; } event event_ = queue.enqueue_svm_memcpy_async( result.get(), first.get(), count * sizeof(T) ); return make_future(result + count, event_); } #endif // CL_VERSION_2_0 } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_ON_DEVICE_HPP compute-0.5/include/boost/compute/algorithm/detail/copy_to_device.hpp000066400000000000000000000101331263566244600262270ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_TO_DEVICE_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_TO_DEVICE_HPP #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template inline DeviceIterator copy_to_device(HostIterator first, HostIterator last, DeviceIterator result, command_queue &queue) { typedef typename std::iterator_traits::value_type value_type; typedef typename std::iterator_traits::difference_type difference_type; size_t count = iterator_range_size(first, last); if(count == 0){ return result; } size_t offset = result.get_index(); queue.enqueue_write_buffer(result.get_buffer(), offset * sizeof(value_type), count * sizeof(value_type), ::boost::addressof(*first)); return result + static_cast(count); } template inline future copy_to_device_async(HostIterator first, HostIterator last, DeviceIterator result, command_queue &queue) { typedef typename std::iterator_traits::value_type value_type; typedef typename std::iterator_traits::difference_type difference_type; size_t count = iterator_range_size(first, last); if(count == 0){ return future(); } size_t offset = result.get_index(); event event_ = queue.enqueue_write_buffer_async(result.get_buffer(), offset * sizeof(value_type), count * sizeof(value_type), ::boost::addressof(*first)); return make_future(result + static_cast(count), event_); } #ifdef CL_VERSION_2_0 // copy_to_device() specialization for svm_ptr template inline svm_ptr copy_to_device(HostIterator first, HostIterator last, svm_ptr result, command_queue &queue) { size_t count = iterator_range_size(first, last); if(count == 0){ return result; } queue.enqueue_svm_memcpy( result.get(), ::boost::addressof(*first), count * sizeof(T) ); return result + count; } template inline future > copy_to_device_async(HostIterator first, HostIterator last, svm_ptr result, command_queue &queue) { size_t count = iterator_range_size(first, last); if(count == 0){ return result; } event event_ = queue.enqueue_svm_memcpy_async( result.get(), ::boost::addressof(*first), count * sizeof(T) ); return make_future(result + count, event_); } #endif // CL_VERSION_2_0 } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_TO_DEVICE_HPP compute-0.5/include/boost/compute/algorithm/detail/copy_to_host.hpp000066400000000000000000000106231263566244600257510ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_TO_HOST_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_TO_HOST_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template inline HostIterator copy_to_host(DeviceIterator first, DeviceIterator last, HostIterator result, command_queue &queue) { typedef typename std::iterator_traits::value_type value_type; size_t count = iterator_range_size(first, last); if(count == 0){ return result; } const buffer &buffer = first.get_buffer(); size_t offset = first.get_index(); queue.enqueue_read_buffer(buffer, offset * sizeof(value_type), count * sizeof(value_type), ::boost::addressof(*result)); return iterator_plus_distance(result, count); } // copy_to_host() specialization for std::vector template inline std::vector::iterator copy_to_host(DeviceIterator first, DeviceIterator last, std::vector::iterator result, command_queue &queue) { std::vector temp(std::distance(first, last)); copy_to_host(first, last, temp.begin(), queue); return std::copy(temp.begin(), temp.end(), result); } template inline future copy_to_host_async(DeviceIterator first, DeviceIterator last, HostIterator result, command_queue &queue) { typedef typename std::iterator_traits::value_type value_type; size_t count = iterator_range_size(first, last); if(count == 0){ return future(); } const buffer &buffer = first.get_buffer(); size_t offset = first.get_index(); event event_ = queue.enqueue_read_buffer_async(buffer, offset * sizeof(value_type), count * sizeof(value_type), ::boost::addressof(*result)); return make_future(iterator_plus_distance(result, count), event_); } #ifdef CL_VERSION_2_0 // copy_to_host() specialization for svm_ptr template inline HostIterator copy_to_host(svm_ptr first, svm_ptr last, HostIterator result, command_queue &queue) { size_t count = iterator_range_size(first, last); if(count == 0){ return result; } queue.enqueue_svm_memcpy( ::boost::addressof(*result), first.get(), count * sizeof(T) ); return result + count; } template inline future copy_to_host_async(svm_ptr first, svm_ptr last, HostIterator result, command_queue &queue) { size_t count = iterator_range_size(first, last); if(count == 0){ return result; } event event_ = queue.enqueue_svm_memcpy_async( ::boost::addressof(*result), first.get(), count * sizeof(T) ); return make_future(iterator_plus_distance(result, count), event_); } #endif // CL_VERSION_2_0 } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_TO_HOST_HPP compute-0.5/include/boost/compute/algorithm/detail/count_if_with_ballot.hpp000066400000000000000000000050551263566244600274410ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template inline size_t count_if_with_ballot(InputIterator first, InputIterator last, Predicate predicate, command_queue &queue) { size_t count = iterator_range_size(first, last); size_t block_size = 32; size_t block_count = count / block_size; if(block_count * block_size != count){ block_count++; } const ::boost::compute::context &context = queue.get_context(); ::boost::compute::vector counts(block_count, context); ::boost::compute::detail::nvidia_popcount popc; ::boost::compute::detail::nvidia_ballot ballot; meta_kernel k("count_if_with_ballot"); k << "const uint gid = get_global_id(0);\n" << "bool value = false;\n" << "if(gid < count)\n" << " value = " << predicate(first[k.var("gid")]) << ";\n" << "uint bits = " << ballot(k.var("value")) << ";\n" << "if(get_local_id(0) == 0)\n" << counts.begin()[k.var("get_group_id(0)") ] << " = " << popc(k.var("bits")) << ";\n"; k.add_set_arg("count", count); k.exec_1d(queue, 0, block_size * block_count, block_size); uint_ result; ::boost::compute::reduce( counts.begin(), counts.end(), &result, queue ); return result; } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP compute-0.5/include/boost/compute/algorithm/detail/count_if_with_reduce.hpp000066400000000000000000000051301263566244600274250ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_REDUCE_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_REDUCE_HPP #include #include #include namespace boost { namespace compute { namespace detail { template struct invoked_countable_predicate { invoked_countable_predicate(Predicate p, Arg a) : predicate(p), arg(a) { } Predicate predicate; Arg arg; }; template inline meta_kernel& operator<<(meta_kernel &kernel, const invoked_countable_predicate &expr) { return kernel << "(" << expr.predicate(expr.arg) << " ? 1 : 0)"; } // the countable_predicate wraps Predicate and converts its result from // bool to ulong so that it can be used with reduce() template struct countable_predicate { typedef ulong_ result_type; countable_predicate(Predicate predicate) : m_predicate(predicate) { } template invoked_countable_predicate operator()(const Arg &arg) const { return invoked_countable_predicate(m_predicate, arg); } Predicate m_predicate; }; // counts the number of elements matching predicate using reduce() template inline size_t count_if_with_reduce(InputIterator first, InputIterator last, Predicate predicate, command_queue &queue) { countable_predicate reduce_predicate(predicate); ulong_ count = 0; ::boost::compute::reduce( ::boost::compute::make_transform_iterator(first, reduce_predicate), ::boost::compute::make_transform_iterator(last, reduce_predicate), &count, ::boost::compute::plus(), queue ); return static_cast(count); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_REDUCE_HPP compute-0.5/include/boost/compute/algorithm/detail/count_if_with_threads.hpp000066400000000000000000000102211263566244600276050ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP #include #include #include namespace boost { namespace compute { namespace detail { template class count_if_with_threads_kernel : meta_kernel { public: typedef typename std::iterator_traits::value_type value_type; count_if_with_threads_kernel() : meta_kernel("count_if_with_threads") { } void set_args(InputIterator first, InputIterator last, Predicate predicate) { typedef typename std::iterator_traits::value_type T; m_size = detail::iterator_range_size(first, last); m_size_arg = add_arg("size"); m_counts_arg = add_arg(memory_object::global_memory, "counts"); *this << // thread parameters "const uint gid = get_global_id(0);\n" << "const uint block_size = size / get_global_size(0);\n" << "const uint start = block_size * gid;\n" << "uint end = 0;\n" << "if(gid == get_global_size(0) - 1)\n" << " end = size;\n" << "else\n" << " end = block_size * gid + block_size;\n" << // count values "uint count = 0;\n" << "for(uint i = start; i < end; i++){\n" << decl("value") << "=" << first[expr("i")] << ";\n" << if_(predicate(var("value"))) << "{\n" << "count++;\n" << "}\n" << "}\n" << // write count "counts[gid] = count;\n"; } size_t exec(command_queue &queue) { const device &device = queue.get_device(); const context &context = queue.get_context(); size_t threads = device.compute_units(); const size_t minimum_block_size = 2048; if(m_size / threads < minimum_block_size){ threads = static_cast( (std::max)( std::ceil(float(m_size) / minimum_block_size), 1.0f ) ); } // storage for counts ::boost::compute::vector counts(threads, context); // exec kernel set_arg(m_size_arg, static_cast(m_size)); set_arg(m_counts_arg, counts.get_buffer()); exec_1d(queue, 0, threads, 1); // copy counts to the host std::vector host_counts(threads); ::boost::compute::copy(counts.begin(), counts.end(), host_counts.begin(), queue); // return sum of counts return std::accumulate(host_counts.begin(), host_counts.end(), size_t(0)); } private: size_t m_size; size_t m_size_arg; size_t m_counts_arg; }; // counts values that match the predicate using one thread per block. this is // optimized for cpu-type devices with a small number of compute units. template inline size_t count_if_with_threads(InputIterator first, InputIterator last, Predicate predicate, command_queue &queue) { count_if_with_threads_kernel kernel; kernel.set_args(first, last, predicate); return kernel.exec(queue); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP compute-0.5/include/boost/compute/algorithm/detail/find_extrema.hpp000066400000000000000000000043441263566244600257100ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_HPP #include #include #include #include namespace boost { namespace compute { namespace detail { template inline InputIterator find_extrema(InputIterator first, InputIterator last, Compare compare, const bool find_minimum, command_queue &queue) { size_t count = iterator_range_size(first, last); // handle trivial cases if(count == 0 || count == 1){ return first; } const device &device = queue.get_device(); // use serial method for small inputs // and when device is a CPU if(count < 512 || (device.type() & device::cpu)){ return serial_find_extrema(first, last, compare, find_minimum, queue); } // find_extrema_with_reduce() is used only if requirements are met if(find_extrema_with_reduce_requirements_met(first, last, queue)) { return find_extrema_with_reduce(first, last, compare, find_minimum, queue); } // use serial method for OpenCL version 1.0 due to // problems with atomic_cmpxchg() #ifndef CL_VERSION_1_1 return serial_find_extrema(first, last, compare, find_minimum, queue); #endif return find_extrema_with_atomics(first, last, compare, find_minimum, queue); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_HPP compute-0.5/include/boost/compute/algorithm/detail/find_extrema_with_atomics.hpp000066400000000000000000000102331263566244600304540ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_WITH_ATOMICS_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_WITH_ATOMICS_HPP #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template inline InputIterator find_extrema_with_atomics(InputIterator first, InputIterator last, Compare compare, const bool find_minimum, command_queue &queue) { typedef typename std::iterator_traits::value_type value_type; typedef typename std::iterator_traits::difference_type difference_type; const context &context = queue.get_context(); meta_kernel k("find_extrema"); atomic_cmpxchg atomic_cmpxchg_uint; k << "const uint gid = get_global_id(0);\n" << "uint old_index = *index;\n" << k.decl("old") << " = " << first[k.var("old_index")] << ";\n" << k.decl("new") << " = " << first[k.var("gid")] << ";\n" << k.decl("compare_result") << ";\n" << "#ifdef BOOST_COMPUTE_FIND_MAXIMUM\n" << "while(" << "(compare_result = " << compare(k.var("old"), k.var("new")) << ")" << " || (!(compare_result" << " || " << compare(k.var("new"), k.var("old")) << ") " "&& gid < old_index)){\n" << "#else\n" << // while condition explained for minimum case with less (<) // as comparison function: // while(new_value < old_value // OR (new_value == old_value AND new_index < old_index)) "while(" << "(compare_result = " << compare(k.var("new"), k.var("old")) << ")" << " || (!(compare_result" << " || " << compare(k.var("old"), k.var("new")) << ") " "&& gid < old_index)){\n" << "#endif\n" << " if(" << atomic_cmpxchg_uint(k.var("index"), k.var("old_index"), k.var("gid")) << " == old_index)\n" << " break;\n" << " else\n" << " old_index = *index;\n" << "old = " << first[k.var("old_index")] << ";\n" << "}\n"; size_t index_arg_index = k.add_arg(memory_object::global_memory, "index"); std::string options; if(!find_minimum){ options = "-DBOOST_COMPUTE_FIND_MAXIMUM"; } kernel kernel = k.compile(context, options); // setup index buffer scalar index(context); kernel.set_arg(index_arg_index, index.get_buffer()); // initialize index index.write(0, queue); // run kernel size_t count = iterator_range_size(first, last); queue.enqueue_1d_range_kernel(kernel, 0, count, 0); // read index and return iterator return first + static_cast(index.read(queue)); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_WITH_ATOMICS_HPP compute-0.5/include/boost/compute/algorithm/detail/find_extrema_with_reduce.hpp000066400000000000000000000441331263566244600302720ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_WITH_REDUCE_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_WITH_REDUCE_HPP #include #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template bool find_extrema_with_reduce_requirements_met(InputIterator first, InputIterator last, command_queue &queue) { typedef typename std::iterator_traits::value_type input_type; const device &device = queue.get_device(); // device must have dedicated local memory storage // otherwise reduction would be highly inefficient if(device.get_info() != CL_LOCAL) { return false; } const size_t max_work_group_size = device.get_info(); // local memory size in bytes (per compute unit) const size_t local_mem_size = device.get_info(); std::string cache_key = std::string("__boost_find_extrema_reduce_") + type_name(); // load parameters boost::shared_ptr parameters = detail::parameter_cache::get_global_cache(device); // Get preferred work group size size_t work_group_size = parameters->get(cache_key, "wgsize", 256); work_group_size = (std::min)(max_work_group_size, work_group_size); // local memory size needed to perform parallel reduction size_t required_local_mem_size = 0; // indices size required_local_mem_size += sizeof(uint_) * work_group_size; // values size required_local_mem_size += sizeof(input_type) * work_group_size; // at least 4 work groups per compute unit otherwise reduction // would be highly inefficient return ((required_local_mem_size * 4) <= local_mem_size); } /// \internal_ /// Algorithm finds the first extremum in given range, i.e., with the lowest /// index. /// /// If \p use_input_idx is false, it's assumed that input data is ordered by /// increasing index and \p input_idx is not used in the algorithm. template inline void find_extrema_with_reduce(InputIterator input, vector::iterator input_idx, size_t count, ResultIterator result, vector::iterator result_idx, size_t work_groups_no, size_t work_group_size, Compare compare, const bool find_minimum, const bool use_input_idx, command_queue &queue) { typedef typename std::iterator_traits::value_type input_type; const context &context = queue.get_context(); meta_kernel k("find_extrema_reduce"); size_t count_arg = k.add_arg("count"); size_t block_arg = k.add_arg(memory_object::local_memory, "block"); size_t block_idx_arg = k.add_arg(memory_object::local_memory, "block_idx"); k << // Work item global id k.decl("gid") << " = get_global_id(0);\n" << "if(gid >= count) {\n return;\n}\n" << // Index of element that will be read from input buffer k.decl("idx") << " = gid;\n" << k.decl("acc") << ";\n" << // Real index of currently best element "#ifdef BOOST_COMPUTE_USE_INPUT_IDX\n" << k.decl("acc_idx") << " = " << input_idx[k.var("idx")] << ";\n" << "#else\n" << k.decl("acc_idx") << " = idx;\n" << "#endif\n" << // Init accumulator with first[get_global_id(0)] "acc = " << input[k.var("idx")] << ";\n" << "idx += get_global_size(0);\n" << k.decl("compare_result") << ";\n" << k.decl("equal") << ";\n\n" << "while( idx < count ){\n" << // Next element k.decl("next") << " = " << input[k.var("idx")] << ";\n" << "#ifdef BOOST_COMPUTE_USE_INPUT_IDX\n" << k.decl("next_idx") << " = " << input_idx[k.var("idx")] << ";\n" << "#endif\n" << // Comparison between currently best element (acc) and next element "#ifdef BOOST_COMPUTE_FIND_MAXIMUM\n" << "compare_result = " << compare(k.var("next"), k.var("acc")) << ";\n" << "# ifdef BOOST_COMPUTE_USE_INPUT_IDX\n" << "equal = !compare_result && !" << compare(k.var("acc"), k.var("next")) << ";\n" << "# endif\n" << "#else\n" << "compare_result = " << compare(k.var("acc"), k.var("next")) << ";\n" << "# ifdef BOOST_COMPUTE_USE_INPUT_IDX\n" << "equal = !compare_result && !" << compare(k.var("next"), k.var("acc")) << ";\n" << "# endif\n" << "#endif\n" << // save the winner "acc = compare_result ? acc : next;\n" << "#ifdef BOOST_COMPUTE_USE_INPUT_IDX\n" << "acc_idx = compare_result ? " << "acc_idx : " << "(equal ? min(acc_idx, next_idx) : next_idx);\n" << "#else\n" << "acc_idx = compare_result ? acc_idx : idx;\n" << "#endif\n" << "idx += get_global_size(0);\n" << "}\n\n" << // Work item local id k.decl("lid") << " = get_local_id(0);\n" << "block[lid] = acc;\n" << "block_idx[lid] = acc_idx;\n" << "barrier(CLK_LOCAL_MEM_FENCE);\n" << k.decl("group_offset") << " = count - (get_local_size(0) * get_group_id(0));\n\n"; k << "#pragma unroll\n" "for(" << k.decl("offset") << " = " << uint_(work_group_size) << " / 2; offset > 0; " << "offset = offset / 2) {\n" << "if((lid < offset) && ((lid + offset) < group_offset)) { \n" << k.decl("mine") << " = block[lid];\n" << k.decl("other") << " = block[lid+offset];\n" << "#ifdef BOOST_COMPUTE_FIND_MAXIMUM\n" << "compare_result = " << compare(k.var("other"), k.var("mine")) << ";\n" << "equal = !compare_result && !" << compare(k.var("mine"), k.var("other")) << ";\n" << "#else\n" << "compare_result = " << compare(k.var("mine"), k.var("other")) << ";\n" << "equal = !compare_result && !" << compare(k.var("other"), k.var("mine")) << ";\n" << "#endif\n" << "block[lid] = compare_result ? mine : other;\n" << k.decl("mine_idx") << " = block_idx[lid];\n" << k.decl("other_idx") << " = block_idx[lid+offset];\n" << "block_idx[lid] = compare_result ? " << "mine_idx : " << "(equal ? min(mine_idx, other_idx) : other_idx);\n" << "}\n" "barrier(CLK_LOCAL_MEM_FENCE);\n" << "}\n\n" << // write block result to global output "if(lid == 0){\n" << result[k.var("get_group_id(0)")] << " = block[0];\n" << result_idx[k.var("get_group_id(0)")] << " = block_idx[0];\n" << "}"; std::string options; if(!find_minimum){ options = "-DBOOST_COMPUTE_FIND_MAXIMUM"; } if(use_input_idx){ options += " -DBOOST_COMPUTE_USE_INPUT_IDX"; } kernel kernel = k.compile(context, options); kernel.set_arg(count_arg, static_cast(count)); kernel.set_arg(block_arg, local_buffer(work_group_size)); kernel.set_arg(block_idx_arg, local_buffer(work_group_size)); queue.enqueue_1d_range_kernel(kernel, 0, work_groups_no * work_group_size, work_group_size); } template inline void find_extrema_with_reduce(InputIterator input, size_t count, ResultIterator result, vector::iterator result_idx, size_t work_groups_no, size_t work_group_size, Compare compare, const bool find_minimum, command_queue &queue) { // dummy will not be used buffer_iterator dummy = result_idx; return find_extrema_with_reduce( input, dummy, count, result, result_idx, work_groups_no, work_group_size, compare, find_minimum, false, queue ); } template InputIterator find_extrema_with_reduce(InputIterator first, InputIterator last, Compare compare, const bool find_minimum, command_queue &queue) { typedef typename std::iterator_traits::difference_type difference_type; typedef typename std::iterator_traits::value_type input_type; const context &context = queue.get_context(); const device &device = queue.get_device(); // Getting information about used queue and device const size_t compute_units_no = device.get_info(); const size_t max_work_group_size = device.get_info(); const size_t count = detail::iterator_range_size(first, last); std::string cache_key = std::string("__boost_find_extrema_with_reduce_") + type_name(); // load parameters boost::shared_ptr parameters = detail::parameter_cache::get_global_cache(device); // get preferred work group size and preferred number // of work groups per compute unit size_t work_group_size = parameters->get(cache_key, "wgsize", 256); size_t work_groups_per_cu = parameters->get(cache_key, "wgpcu", 100); // calculate work group size and number of work groups work_group_size = (std::min)(max_work_group_size, work_group_size); size_t work_groups_no = compute_units_no * work_groups_per_cu; work_groups_no = (std::min)( work_groups_no, static_cast(std::ceil(float(count) / work_group_size)) ); // phase I: finding candidates for extremum // device buffors for extremum candidates and their indices // each work-group computes its candidate vector candidates(work_groups_no, context); vector candidates_idx(work_groups_no, context); // finding candidates for first extremum and their indices find_extrema_with_reduce( first, count, candidates.begin(), candidates_idx.begin(), work_groups_no, work_group_size, compare, find_minimum, queue ); // phase II: finding extremum from among the candidates // zero-copy buffers for final result (value and index) vector > result(1, context); vector > result_idx(1, context); // get extremum from among the candidates find_extrema_with_reduce( candidates.begin(), candidates_idx.begin(), work_groups_no, result.begin(), result_idx.begin(), 1, work_group_size, compare, find_minimum, true, queue ); // mapping extremum index to host uint_* result_idx_host_ptr = static_cast( queue.enqueue_map_buffer( result_idx.get_buffer(), command_queue::map_read, 0, sizeof(uint_) ) ); return first + static_cast(*result_idx_host_ptr); } template InputIterator find_extrema_with_reduce(InputIterator first, InputIterator last, ::boost::compute::less< typename std::iterator_traits< InputIterator >::value_type > compare, const bool find_minimum, command_queue &queue) { typedef typename std::iterator_traits::difference_type difference_type; typedef typename std::iterator_traits::value_type input_type; const context &context = queue.get_context(); const device &device = queue.get_device(); // Getting information about used queue and device const size_t compute_units_no = device.get_info(); const size_t max_work_group_size = device.get_info(); const size_t count = detail::iterator_range_size(first, last); std::string cache_key = std::string("__boost_find_extrema_with_reduce_") + type_name(); // load parameters boost::shared_ptr parameters = detail::parameter_cache::get_global_cache(device); // get preferred work group size and preferred number // of work groups per compute unit size_t work_group_size = parameters->get(cache_key, "wgsize", 256); size_t work_groups_per_cu = parameters->get(cache_key, "wgpcu", 64); // calculate work group size and number of work groups work_group_size = (std::min)(max_work_group_size, work_group_size); size_t work_groups_no = compute_units_no * work_groups_per_cu; work_groups_no = (std::min)( work_groups_no, static_cast(std::ceil(float(count) / work_group_size)) ); // phase I: finding candidates for extremum // device buffors for extremum candidates and their indices // each work-group computes its candidate // zero-copy buffers are used to eliminate copying data back to host vector > candidates(work_groups_no, context); vector > candidates_idx(work_groups_no, context); // finding candidates for first extremum and their indices find_extrema_with_reduce( first, count, candidates.begin(), candidates_idx.begin(), work_groups_no, work_group_size, compare, find_minimum, queue ); // phase II: finding extremum from among the candidates // mapping candidates and their indices to host input_type* candidates_host_ptr = static_cast( queue.enqueue_map_buffer( candidates.get_buffer(), command_queue::map_read, 0, work_groups_no * sizeof(input_type) ) ); uint_* candidates_idx_host_ptr = static_cast( queue.enqueue_map_buffer( candidates_idx.get_buffer(), command_queue::map_read, 0, work_groups_no * sizeof(uint_) ) ); input_type* i = candidates_host_ptr; uint_* idx = candidates_idx_host_ptr; uint_* extremum_idx = idx; input_type extremum = *candidates_host_ptr; i++; idx++; // find extremum (serial) from among the candidates on host if(!find_minimum) { while(idx != (candidates_idx_host_ptr + work_groups_no)) { input_type next = *i; bool compare_result = next > extremum; bool equal = next == extremum; extremum = compare_result ? next : extremum; extremum_idx = compare_result ? idx : extremum_idx; extremum_idx = equal ? ((*extremum_idx < *idx) ? extremum_idx : idx) : extremum_idx; idx++, i++; } } else { while(idx != (candidates_idx_host_ptr + work_groups_no)) { input_type next = *i; bool compare_result = next < extremum; bool equal = next == extremum; extremum = compare_result ? next : extremum; extremum_idx = compare_result ? idx : extremum_idx; extremum_idx = equal ? ((*extremum_idx < *idx) ? extremum_idx : idx) : extremum_idx; idx++, i++; } } return first + static_cast(*extremum_idx); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_WITH_REDUCE_HPP compute-0.5/include/boost/compute/algorithm/detail/find_if_with_atomics.hpp000066400000000000000000000204421263566244600274100ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_IF_WITH_ATOMICS_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_IF_WITH_ATOMICS_HPP #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template inline InputIterator find_if_with_atomics_one_vpt(InputIterator first, InputIterator last, UnaryPredicate predicate, const size_t count, command_queue &queue) { typedef typename std::iterator_traits::value_type value_type; typedef typename std::iterator_traits::difference_type difference_type; const context &context = queue.get_context(); detail::meta_kernel k("find_if"); size_t index_arg = k.add_arg(memory_object::global_memory, "index"); atomic_min atomic_min_uint; k << k.decl("i") << " = get_global_id(0);\n" << k.decl("value") << "=" << first[k.var("i")] << ";\n" << "if(" << predicate(k.var("value")) << "){\n" << " " << atomic_min_uint(k.var("index"), k.var("i")) << ";\n" << "}\n"; kernel kernel = k.compile(context); scalar index(context); kernel.set_arg(index_arg, index.get_buffer()); // initialize index to the last iterator's index index.write(static_cast(count), queue); queue.enqueue_1d_range_kernel(kernel, 0, count, 0); // read index and return iterator return first + static_cast(index.read(queue)); } template inline InputIterator find_if_with_atomics_multiple_vpt(InputIterator first, InputIterator last, UnaryPredicate predicate, const size_t count, const uint_ vpt, command_queue &queue) { typedef typename std::iterator_traits::value_type value_type; typedef typename std::iterator_traits::difference_type difference_type; const context &context = queue.get_context(); const device &device = queue.get_device(); detail::meta_kernel k("find_if"); size_t index_arg = k.add_arg(memory_object::global_memory, "index"); size_t count_arg = k.add_arg("count"); size_t vpt_arg = k.add_arg("vpt"); atomic_min atomic_min_uint; // for GPUs reads from global memory are coalesced if(device.type() & device::gpu) { k << k.decl("lsize") << " = get_local_size(0);\n" << k.decl("id") << " = get_local_id(0) + get_group_id(0) * lsize * vpt;\n" << k.decl("end") << " = min(" << "id + (lsize *" << k.var("vpt") << ")," << "count" << ");\n" << // checking if the index is already found "__local uint local_index;\n" << "if(get_local_id(0) == 0){\n" << " local_index = *index;\n " << "};\n" << "barrier(CLK_LOCAL_MEM_FENCE);\n" << "if(local_index < id){\n" << " return;\n" << "}\n" << "while(id < end){\n" << " " << k.decl("value") << " = " << first[k.var("id")] << ";\n" " if(" << predicate(k.var("value")) << "){\n" << " " << atomic_min_uint(k.var("index"), k.var("id")) << ";\n" << " return;\n" " }\n" << " id+=lsize;\n" << "}\n"; // for CPUs (and other devices) reads are ordered so the big cache is // efficiently used. } else { k << k.decl("id") << " = get_global_id(0) * " << k.var("vpt") << ";\n" << k.decl("end") << " = min(" << "id + " << k.var("vpt") << "," << "count" << ");\n" << "while(id < end && (*index) > id){\n" << " " << k.decl("value") << " = " << first[k.var("id")] << ";\n" " if(" << predicate(k.var("value")) << "){\n" << " " << atomic_min_uint(k.var("index"), k.var("id")) << ";\n" << " return;\n" << " }\n" << " id++;\n" << "}\n"; } kernel kernel = k.compile(context); scalar index(context); kernel.set_arg(index_arg, index.get_buffer()); kernel.set_arg(count_arg, static_cast(count)); kernel.set_arg(vpt_arg, static_cast(vpt)); // initialize index to the last iterator's index index.write(static_cast(count), queue); const size_t global_wg_size = static_cast( std::ceil(float(count) / vpt) ); queue.enqueue_1d_range_kernel(kernel, 0, global_wg_size, 0); // read index and return iterator return first + static_cast(index.read(queue)); } template inline InputIterator find_if_with_atomics(InputIterator first, InputIterator last, UnaryPredicate predicate, command_queue &queue) { typedef typename std::iterator_traits::value_type value_type; size_t count = detail::iterator_range_size(first, last); if(count == 0){ return last; } const device &device = queue.get_device(); // load cached parameters std::string cache_key = std::string("__boost_find_if_with_atomics_") + type_name(); boost::shared_ptr parameters = detail::parameter_cache::get_global_cache(device); // for relatively small inputs on GPUs kernel checking one value per thread // (work-item) is more efficient than its multiple values per thread version if(device.type() & device::gpu){ const size_t one_vpt_threshold = parameters->get(cache_key, "one_vpt_threshold", 1048576); if(count <= one_vpt_threshold){ return find_if_with_atomics_one_vpt( first, last, predicate, count, queue ); } } // values per thread size_t vpt; if(device.type() & device::gpu){ // get vpt parameter vpt = parameters->get(cache_key, "vpt", 32); } else { // for CPUs work is split equally between compute units const size_t max_compute_units = device.get_info(); vpt = static_cast( std::ceil(float(count) / max_compute_units) ); } return find_if_with_atomics_multiple_vpt( first, last, predicate, count, vpt, queue ); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_IF_WITH_ATOMICS_HPP compute-0.5/include/boost/compute/algorithm/detail/inplace_reduce.hpp000066400000000000000000000113471263566244600262060ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_INPLACE_REDUCE_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_INPLACE_REDUCE_HPP #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template inline void inplace_reduce(Iterator first, Iterator last, BinaryFunction function, command_queue &queue) { typedef typename std::iterator_traits::value_type value_type; size_t input_size = iterator_range_size(first, last); if(input_size < 2){ return; } const context &context = queue.get_context(); size_t block_size = 64; size_t values_per_thread = 8; size_t block_count = input_size / (block_size * values_per_thread); if(block_count * block_size * values_per_thread != input_size) block_count++; vector output(block_count, context); meta_kernel k("inplace_reduce"); size_t input_arg = k.add_arg(memory_object::global_memory, "input"); size_t input_size_arg = k.add_arg("input_size"); size_t output_arg = k.add_arg(memory_object::global_memory, "output"); size_t scratch_arg = k.add_arg(memory_object::local_memory, "scratch"); k << "const uint gid = get_global_id(0);\n" << "const uint lid = get_local_id(0);\n" << "const uint values_per_thread =\n" << uint_(values_per_thread) << ";\n" << // thread reduce "const uint index = gid * values_per_thread;\n" << "if(index < input_size){\n" << k.decl("sum") << " = input[index];\n" << "for(uint i = 1;\n" << "i < values_per_thread && (index + i) < input_size;\n" << "i++){\n" << " sum = " << function(k.var("sum"), k.var("input[index+i]")) << ";\n" << "}\n" << "scratch[lid] = sum;\n" << "}\n" << // local reduce "for(uint i = 1; i < get_local_size(0); i <<= 1){\n" << " barrier(CLK_LOCAL_MEM_FENCE);\n" << " uint mask = (i << 1) - 1;\n" << " uint next_index = (gid + i) * values_per_thread;\n" " if((lid & mask) == 0 && next_index < input_size){\n" << " scratch[lid] = " << function(k.var("scratch[lid]"), k.var("scratch[lid+i]")) << ";\n" << " }\n" << "}\n" << // write output for block "if(lid == 0){\n" << " output[get_group_id(0)] = scratch[0];\n" << "}\n" ; const buffer *input_buffer = &first.get_buffer(); const buffer *output_buffer = &output.get_buffer(); kernel kernel = k.compile(context); while(input_size > 1){ kernel.set_arg(input_arg, *input_buffer); kernel.set_arg(input_size_arg, static_cast(input_size)); kernel.set_arg(output_arg, *output_buffer); kernel.set_arg(scratch_arg, local_buffer(block_size)); queue.enqueue_1d_range_kernel(kernel, 0, block_count * block_size, block_size); input_size = static_cast( std::ceil(float(input_size) / (block_size * values_per_thread) ) ); block_count = input_size / (block_size * values_per_thread); if(block_count * block_size * values_per_thread != input_size) block_count++; std::swap(input_buffer, output_buffer); } if(input_buffer != &first.get_buffer()){ ::boost::compute::copy(output.begin(), output.begin() + 1, first, queue); } } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_INPLACE_REDUCE_HPP compute-0.5/include/boost/compute/algorithm/detail/insertion_sort.hpp000066400000000000000000000136161263566244600263260ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_INSERTION_SORT_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_INSERTION_SORT_HPP #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template inline void serial_insertion_sort(Iterator first, Iterator last, Compare compare, command_queue &queue) { typedef typename std::iterator_traits::value_type T; size_t count = iterator_range_size(first, last); if(count < 2){ return; } meta_kernel k("serial_insertion_sort"); size_t local_data_arg = k.add_arg(memory_object::local_memory, "data"); size_t count_arg = k.add_arg("n"); k << // copy data to local memory "for(uint i = 0; i < n; i++){\n" << " data[i] = " << first[k.var("i")] << ";\n" "}\n" // sort data in local memory "for(uint i = 1; i < n; i++){\n" << " " << k.decl("value") << " = data[i];\n" << " uint pos = i;\n" << " while(pos > 0 && " << compare(k.var("value"), k.var("data[pos-1]")) << "){\n" << " data[pos] = data[pos-1];\n" << " pos--;\n" << " }\n" << " data[pos] = value;\n" << "}\n" << // copy sorted data to output "for(uint i = 0; i < n; i++){\n" << " " << first[k.var("i")] << " = data[i];\n" "}\n"; const context &context = queue.get_context(); ::boost::compute::kernel kernel = k.compile(context); kernel.set_arg(local_data_arg, local_buffer(count)); kernel.set_arg(count_arg, static_cast(count)); queue.enqueue_task(kernel); } template inline void serial_insertion_sort(Iterator first, Iterator last, command_queue &queue) { typedef typename std::iterator_traits::value_type T; ::boost::compute::less less; return serial_insertion_sort(first, last, less, queue); } template inline void serial_insertion_sort_by_key(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, Compare compare, command_queue &queue) { typedef typename std::iterator_traits::value_type key_type; typedef typename std::iterator_traits::value_type value_type; size_t count = iterator_range_size(keys_first, keys_last); if(count < 2){ return; } meta_kernel k("serial_insertion_sort_by_key"); size_t local_keys_arg = k.add_arg(memory_object::local_memory, "keys"); size_t local_data_arg = k.add_arg(memory_object::local_memory, "data"); size_t count_arg = k.add_arg("n"); k << // copy data to local memory "for(uint i = 0; i < n; i++){\n" << " keys[i] = " << keys_first[k.var("i")] << ";\n" " data[i] = " << values_first[k.var("i")] << ";\n" "}\n" // sort data in local memory "for(uint i = 1; i < n; i++){\n" << " " << k.decl("key") << " = keys[i];\n" << " " << k.decl("value") << " = data[i];\n" << " uint pos = i;\n" << " while(pos > 0 && " << compare(k.var("key"), k.var("keys[pos-1]")) << "){\n" << " keys[pos] = keys[pos-1];\n" << " data[pos] = data[pos-1];\n" << " pos--;\n" << " }\n" << " keys[pos] = key;\n" << " data[pos] = value;\n" << "}\n" << // copy sorted data to output "for(uint i = 0; i < n; i++){\n" << " " << keys_first[k.var("i")] << " = keys[i];\n" " " << values_first[k.var("i")] << " = data[i];\n" "}\n"; const context &context = queue.get_context(); ::boost::compute::kernel kernel = k.compile(context); kernel.set_arg(local_keys_arg, static_cast(count * sizeof(key_type)), 0); kernel.set_arg(local_data_arg, static_cast(count * sizeof(value_type)), 0); kernel.set_arg(count_arg, static_cast(count)); queue.enqueue_task(kernel); } template inline void serial_insertion_sort_by_key(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, command_queue &queue) { typedef typename std::iterator_traits::value_type key_type; serial_insertion_sort_by_key( keys_first, keys_last, values_first, boost::compute::less(), queue ); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_INSERTION_SORT_HPP compute-0.5/include/boost/compute/algorithm/detail/merge_path.hpp000066400000000000000000000074421263566244600253600ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_PATH_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_PATH_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { /// /// \brief Merge Path kernel class /// /// Subclass of meta_kernel to break two sets into tiles according /// to their merge path /// class merge_path_kernel : public meta_kernel { public: unsigned int tile_size; merge_path_kernel() : meta_kernel("merge_path") { tile_size = 4; } template void set_range(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator1 result_a, OutputIterator2 result_b, Compare comp) { m_a_count = iterator_range_size(first1, last1); m_a_count_arg = add_arg("a_count"); m_b_count = iterator_range_size(first2, last2); m_b_count_arg = add_arg("b_count"); *this << "uint i = get_global_id(0);\n" << "uint target = (i+1)*" << tile_size << ";\n" << "uint start = max(convert_int(0),convert_int(target)-convert_int(b_count));\n" << "uint end = min(target,a_count);\n" << "uint a_index, b_index;\n" << "while(start("b_index")], first1[expr("a_index")]) << "))\n" << " start = a_index + 1;\n" << " else end = a_index;\n" << "}\n" << result_a[expr("i")] << " = start;\n" << result_b[expr("i")] << " = target - start;\n"; } template void set_range(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator1 result_a, OutputIterator2 result_b) { typedef typename std::iterator_traits::value_type value_type; ::boost::compute::less less_than; set_range(first1, last1, first2, last2, result_a, result_b, less_than); } event exec(command_queue &queue) { if((m_a_count + m_b_count)/tile_size == 0) { return event(); } set_arg(m_a_count_arg, uint_(m_a_count)); set_arg(m_b_count_arg, uint_(m_b_count)); return exec_1d(queue, 0, (m_a_count + m_b_count)/tile_size); } private: size_t m_a_count; size_t m_a_count_arg; size_t m_b_count; size_t m_b_count_arg; }; } //end detail namespace } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_PATH_HPP compute-0.5/include/boost/compute/algorithm/detail/merge_sort_on_cpu.hpp000066400000000000000000000344761263566244600267650ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_SORT_ON_CPU_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_SORT_ON_CPU_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template inline void merge_blocks(KeyIterator keys_first, ValueIterator values_first, KeyIterator keys_result, ValueIterator values_result, Compare compare, size_t count, const size_t block_size, const bool sort_by_key, command_queue &queue) { (void) values_result; (void) values_first; meta_kernel k("merge_sort_on_cpu_merge_blocks"); size_t count_arg = k.add_arg("count"); size_t block_size_arg = k.add_arg("block_size"); k << k.decl("b1_start") << " = get_global_id(0) * block_size * 2;\n" << k.decl("b1_end") << " = min(count, b1_start + block_size);\n" << k.decl("b2_start") << " = min(count, b1_start + block_size);\n" << k.decl("b2_end") << " = min(count, b2_start + block_size);\n" << k.decl("result_idx") << " = b1_start;\n" << // merging block 1 and block 2 (stable) "while(b1_start < b1_end && b2_start < b2_end){\n" << " if( " << compare(keys_first[k.var("b2_start")], keys_first[k.var("b1_start")]) << "){\n" << " " << keys_result[k.var("result_idx")] << " = " << keys_first[k.var("b2_start")] << ";\n"; if(sort_by_key){ k << " " << values_result[k.var("result_idx")] << " = " << values_first[k.var("b2_start")] << ";\n"; } k << " b2_start++;\n" << " }\n" << " else {\n" << " " << keys_result[k.var("result_idx")] << " = " << keys_first[k.var("b1_start")] << ";\n"; if(sort_by_key){ k << " " << values_result[k.var("result_idx")] << " = " << values_first[k.var("b1_start")] << ";\n"; } k << " b1_start++;\n" << " }\n" << " result_idx++;\n" << "}\n" << "while(b1_start < b1_end){\n" << " " << keys_result[k.var("result_idx")] << " = " << keys_first[k.var("b1_start")] << ";\n"; if(sort_by_key){ k << " " << values_result[k.var("result_idx")] << " = " << values_first[k.var("b1_start")] << ";\n"; } k << " b1_start++;\n" << " result_idx++;\n" << "}\n" << "while(b2_start < b2_end){\n" << " " << keys_result[k.var("result_idx")] << " = " << keys_first[k.var("b2_start")] << ";\n"; if(sort_by_key){ k << " " << values_result[k.var("result_idx")] << " = " << values_first[k.var("b2_start")] << ";\n"; } k << " b2_start++;\n" << " result_idx++;\n" << "}\n"; const context &context = queue.get_context(); ::boost::compute::kernel kernel = k.compile(context); kernel.set_arg(count_arg, static_cast(count)); kernel.set_arg(block_size_arg, static_cast(block_size)); const size_t global_size = static_cast( std::ceil(float(count) / (2 * block_size)) ); queue.enqueue_1d_range_kernel(kernel, 0, global_size, 0); } template inline void merge_blocks(Iterator first, Iterator result, Compare compare, size_t count, const size_t block_size, const bool sort_by_key, command_queue &queue) { // dummy iterator as it's not sort by key Iterator dummy; merge_blocks(first, dummy, result, dummy, compare, count, block_size, false, queue); } template inline void dispatch_merge_blocks(Iterator first, Iterator result, Compare compare, size_t count, const size_t block_size, const size_t input_size_threshold, const size_t blocks_no_threshold, command_queue &queue) { const size_t blocks_no = static_cast( std::ceil(float(count) / block_size) ); // merge with merge path should used only for the large arrays and at the // end of merging part when there are only a few big blocks left to be merged if(blocks_no <= blocks_no_threshold && count >= input_size_threshold){ Iterator last = first + count; for(size_t i = 0; i < count; i+= 2*block_size) { Iterator first1 = (std::min)(first + i, last); Iterator last1 = (std::min)(first1 + block_size, last); Iterator first2 = last1; Iterator last2 = (std::min)(first2 + block_size, last); Iterator block_result = (std::min)(result + i, result + count); merge_with_merge_path(first1, last1, first2, last2, block_result, compare, queue); } } else { merge_blocks(first, result, compare, count, block_size, false, queue); } } template inline void block_insertion_sort(KeyIterator keys_first, ValueIterator values_first, Compare compare, const size_t count, const size_t block_size, const bool sort_by_key, command_queue &queue) { (void) values_first; typedef typename std::iterator_traits::value_type K; typedef typename std::iterator_traits::value_type T; meta_kernel k("merge_sort_on_cpu_block_insertion_sort"); size_t count_arg = k.add_arg("count"); size_t block_size_arg = k.add_arg("block_size"); k << k.decl("start") << " = get_global_id(0) * block_size;\n" << k.decl("end") << " = min(count, start + block_size);\n" << // block insertion sort (stable) "for(uint i = start+1; i < end; i++){\n" << " " << k.decl("key") << " = " << keys_first[k.var("i")] << ";\n"; if(sort_by_key){ k << " " << k.decl("value") << " = " << values_first[k.var("i")] << ";\n"; } k << " uint pos = i;\n" << " while(pos > start && " << compare(k.var("key"), keys_first[k.var("pos-1")]) << "){\n" << " " << keys_first[k.var("pos")] << " = " << keys_first[k.var("pos-1")] << ";\n"; if(sort_by_key){ k << " " << values_first[k.var("pos")] << " = " << values_first[k.var("pos-1")] << ";\n"; } k << " pos--;\n" << " }\n" << " " << keys_first[k.var("pos")] << " = key;\n"; if(sort_by_key) { k << " " << values_first[k.var("pos")] << " = value;\n"; } k << "}\n"; // block insertion sort const context &context = queue.get_context(); ::boost::compute::kernel kernel = k.compile(context); kernel.set_arg(count_arg, static_cast(count)); kernel.set_arg(block_size_arg, static_cast(block_size)); const size_t global_size = static_cast(std::ceil(float(count) / block_size)); queue.enqueue_1d_range_kernel(kernel, 0, global_size, 0); } template inline void block_insertion_sort(Iterator first, Compare compare, const size_t count, const size_t block_size, command_queue &queue) { // dummy iterator as it's not sort by key Iterator dummy; block_insertion_sort(first, dummy, compare, count, block_size, false, queue); } // This sort is stable. template inline void merge_sort_on_cpu(Iterator first, Iterator last, Compare compare, command_queue &queue) { typedef typename std::iterator_traits::value_type value_type; size_t count = iterator_range_size(first, last); if(count < 2){ return; } // for small input size only insertion sort is performed else if(count <= 512){ block_insertion_sort(first, compare, count, count, queue); return; } const context &context = queue.get_context(); const device &device = queue.get_device(); // loading parameters std::string cache_key = std::string("__boost_merge_sort_on_cpu_") + type_name(); boost::shared_ptr parameters = detail::parameter_cache::get_global_cache(device); // When there is merge_with_path_blocks_no_threshold or less blocks left to // merge AND input size is merge_with_merge_path_input_size_threshold or more // merge_with_merge_path() algorithm is used to merge sorted blocks; // otherwise merge_blocks() is used. const size_t merge_with_path_blocks_no_threshold = parameters->get(cache_key, "merge_with_merge_path_blocks_no_threshold", 8); const size_t merge_with_path_input_size_threshold = parameters->get(cache_key, "merge_with_merge_path_input_size_threshold", 2097152); const size_t block_size = parameters->get(cache_key, "insertion_sort_block_size", 64); block_insertion_sort(first, compare, count, block_size, queue); // temporary buffer for merge result vector temp(count, context); bool result_in_temporary_buffer = false; for(size_t i = block_size; i < count; i *= 2){ result_in_temporary_buffer = !result_in_temporary_buffer; if(result_in_temporary_buffer) { dispatch_merge_blocks(first, temp.begin(), compare, count, i, merge_with_path_input_size_threshold, merge_with_path_blocks_no_threshold, queue); } else { dispatch_merge_blocks(temp.begin(), first, compare, count, i, merge_with_path_input_size_threshold, merge_with_path_blocks_no_threshold, queue); } } if(result_in_temporary_buffer) { copy(temp.begin(), temp.end(), first, queue); } } // This sort is stable. template inline void merge_sort_by_key_on_cpu(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, Compare compare, command_queue &queue) { typedef typename std::iterator_traits::value_type key_type; typedef typename std::iterator_traits::value_type value_type; size_t count = iterator_range_size(keys_first, keys_last); if(count < 2){ return; } // for small input size only insertion sort is performed else if(count <= 512){ block_insertion_sort(keys_first, values_first, compare, count, count, true, queue); return; } const context &context = queue.get_context(); const device &device = queue.get_device(); // loading parameters std::string cache_key = std::string("__boost_merge_sort_by_key_on_cpu_") + type_name() + "_with_" + type_name(); boost::shared_ptr parameters = detail::parameter_cache::get_global_cache(device); const size_t block_size = parameters->get(cache_key, "insertion_sort_by_key_block_size", 64); block_insertion_sort(keys_first, values_first, compare, count, block_size, true, queue); // temporary buffer for merge results vector values_temp(count, context); vector keys_temp(count, context); bool result_in_temporary_buffer = false; for(size_t i = block_size; i < count; i *= 2){ result_in_temporary_buffer = !result_in_temporary_buffer; if(result_in_temporary_buffer) { merge_blocks(keys_first, values_first, keys_temp.begin(), values_temp.begin(), compare, count, i, true, queue); } else { merge_blocks(keys_temp.begin(), values_temp.begin(), keys_first, values_first, compare, count, i, true, queue); } } if(result_in_temporary_buffer) { copy(keys_temp.begin(), keys_temp.end(), keys_first, queue); copy(values_temp.begin(), values_temp.end(), values_first, queue); } } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_SORT_ON_CPU_HPP compute-0.5/include/boost/compute/algorithm/detail/merge_with_merge_path.hpp000066400000000000000000000160521263566244600275670ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_WIH_MERGE_PATH_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_WIH_MERGE_PATH_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { /// /// \brief Serial merge kernel class /// /// Subclass of meta_kernel to perform serial merge after tiling /// class serial_merge_kernel : meta_kernel { public: unsigned int tile_size; serial_merge_kernel() : meta_kernel("merge") { tile_size = 4; } template void set_range(InputIterator1 first1, InputIterator2 first2, InputIterator3 tile_first1, InputIterator3 tile_last1, InputIterator4 tile_first2, OutputIterator result, Compare comp) { m_count = iterator_range_size(tile_first1, tile_last1) - 1; *this << "uint i = get_global_id(0);\n" << "uint start1 = " << tile_first1[expr("i")] << ";\n" << "uint end1 = " << tile_first1[expr("i+1")] << ";\n" << "uint start2 = " << tile_first2[expr("i")] << ";\n" << "uint end2 = " << tile_first2[expr("i+1")] << ";\n" << "uint index = i*" << tile_size << ";\n" << "while(start1("start2")], first1[expr("start1")]) << "))\n" << " {\n" << result[expr("index")] << " = " << first1[expr("start1")] << ";\n" << " index++;\n" << " start1++;\n" << " }\n" << " else\n" << " {\n" << result[expr("index")] << " = " << first2[expr("start2")] << ";\n" << " index++;\n" << " start2++;\n" << " }\n" << "}\n" << "while(start1("index")] << " = " << first1[expr("start1")] << ";\n" << " index++;\n" << " start1++;\n" << "}\n" << "while(start2("index")] << " = " << first2[expr("start2")] << ";\n" << " index++;\n" << " start2++;\n" << "}\n"; } template void set_range(InputIterator1 first1, InputIterator2 first2, InputIterator3 tile_first1, InputIterator3 tile_last1, InputIterator4 tile_first2, OutputIterator result) { typedef typename std::iterator_traits::value_type value_type; ::boost::compute::less less_than; set_range(first1, first2, tile_first1, tile_last1, tile_first2, result, less_than); } event exec(command_queue &queue) { if(m_count == 0) { return event(); } return exec_1d(queue, 0, m_count); } private: size_t m_count; }; /// /// \brief Merge algorithm with merge path /// /// Merges the sorted values in the range [\p first1, \p last1) with /// the sorted values in the range [\p first2, last2) and stores the /// result in the range beginning at \p result /// /// \param first1 Iterator pointing to start of first set /// \param last1 Iterator pointing to end of first set /// \param first2 Iterator pointing to start of second set /// \param last2 Iterator pointing to end of second set /// \param result Iterator pointing to start of range in which the result /// will be stored /// \param comp Comparator which performs less than function /// \param queue Queue on which to execute /// template inline OutputIterator merge_with_merge_path(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::difference_type result_difference_type; int tile_size = 1024; int count1 = iterator_range_size(first1, last1); int count2 = iterator_range_size(first2, last2); vector tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); vector tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); // Tile the sets merge_path_kernel tiling_kernel; tiling_kernel.tile_size = 1024; tiling_kernel.set_range(first1, last1, first2, last2, tile_a.begin()+1, tile_b.begin()+1, comp); fill_n(tile_a.begin(), 1, 0, queue); fill_n(tile_b.begin(), 1, 0, queue); tiling_kernel.exec(queue); fill_n(tile_a.end()-1, 1, count1, queue); fill_n(tile_b.end()-1, 1, count2, queue); // Merge serial_merge_kernel merge_kernel; merge_kernel.tile_size = 1024; merge_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(), tile_b.begin(), result, comp); merge_kernel.exec(queue); return result + static_cast(count1 + count2); } /// \overload template inline OutputIterator merge_with_merge_path(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; ::boost::compute::less less_than; return merge_with_merge_path(first1, last1, first2, last2, result, less_than, queue); } } //end detail namespace } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_WIH_MERGE_PATH_HPP compute-0.5/include/boost/compute/algorithm/detail/radix_sort.hpp000066400000000000000000000324561263566244600254260ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_RADIX_SORT_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_RADIX_SORT_HPP #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { // meta-function returning true if type T is radix-sortable template struct is_radix_sortable : boost::mpl::and_< typename ::boost::compute::is_fundamental::type, typename boost::mpl::not_::type>::type > { }; template struct radix_sort_value_type { }; template<> struct radix_sort_value_type<1> { typedef uchar_ type; }; template<> struct radix_sort_value_type<2> { typedef ushort_ type; }; template<> struct radix_sort_value_type<4> { typedef uint_ type; }; template<> struct radix_sort_value_type<8> { typedef ulong_ type; }; template inline const char* enable_double() { return " -DT2_double=0"; } template<> inline const char* enable_double() { return " -DT2_double=1"; } const char radix_sort_source[] = "#if T2_double\n" "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" "#endif\n" "#define K2_BITS (1 << K_BITS)\n" "#define RADIX_MASK ((((T)(1)) << K_BITS) - 1)\n" "#define SIGN_BIT ((sizeof(T) * CHAR_BIT) - 1)\n" "inline uint radix(const T x, const uint low_bit)\n" "{\n" "#if defined(IS_FLOATING_POINT)\n" " const T mask = -(x >> SIGN_BIT) | (((T)(1)) << SIGN_BIT);\n" " return ((x ^ mask) >> low_bit) & RADIX_MASK;\n" "#elif defined(IS_SIGNED)\n" " return ((x ^ (((T)(1)) << SIGN_BIT)) >> low_bit) & RADIX_MASK;\n" "#else\n" " return (x >> low_bit) & RADIX_MASK;\n" "#endif\n" "}\n" "__kernel void count(__global const T *input,\n" " const uint input_offset,\n" " const uint input_size,\n" " __global uint *global_counts,\n" " __global uint *global_offsets,\n" " __local uint *local_counts,\n" " const uint low_bit)\n" "{\n" // work-item parameters " const uint gid = get_global_id(0);\n" " const uint lid = get_local_id(0);\n" // zero local counts " if(lid < K2_BITS){\n" " local_counts[lid] = 0;\n" " }\n" " barrier(CLK_LOCAL_MEM_FENCE);\n" // reduce local counts " if(gid < input_size){\n" " T value = input[input_offset+gid];\n" " uint bucket = radix(value, low_bit);\n" " atomic_inc(local_counts + bucket);\n" " }\n" " barrier(CLK_LOCAL_MEM_FENCE);\n" // write block-relative offsets " if(lid < K2_BITS){\n" " global_counts[K2_BITS*get_group_id(0) + lid] = local_counts[lid];\n" // write global offsets " if(get_group_id(0) == (get_num_groups(0) - 1)){\n" " global_offsets[lid] = local_counts[lid];\n" " }\n" " }\n" "}\n" "__kernel void scan(__global const uint *block_offsets,\n" " __global uint *global_offsets,\n" " const uint block_count)\n" "{\n" " __global const uint *last_block_offsets =\n" " block_offsets + K2_BITS * (block_count - 1);\n" // calculate and scan global_offsets " uint sum = 0;\n" " for(uint i = 0; i < K2_BITS; i++){\n" " uint x = global_offsets[i] + last_block_offsets[i];\n" " global_offsets[i] = sum;\n" " sum += x;\n" " }\n" "}\n" "__kernel void scatter(__global const T *input,\n" " const uint input_offset,\n" " const uint input_size,\n" " const uint low_bit,\n" " __global const uint *counts,\n" " __global const uint *global_offsets,\n" "#ifndef SORT_BY_KEY\n" " __global T *output,\n" " const uint output_offset)\n" "#else\n" " __global T *keys_output,\n" " const uint keys_output_offset,\n" " __global T2 *values_input,\n" " const uint values_input_offset,\n" " __global T2 *values_output,\n" " const uint values_output_offset)\n" "#endif\n" "{\n" // work-item parameters " const uint gid = get_global_id(0);\n" " const uint lid = get_local_id(0);\n" // copy input to local memory " T value;\n" " uint bucket;\n" " __local uint local_input[BLOCK_SIZE];\n" " if(gid < input_size){\n" " value = input[input_offset+gid];\n" " bucket = radix(value, low_bit);\n" " local_input[lid] = bucket;\n" " }\n" // copy block counts to local memory " __local uint local_counts[(1 << K_BITS)];\n" " if(lid < K2_BITS){\n" " local_counts[lid] = counts[get_group_id(0) * K2_BITS + lid];\n" " }\n" // wait until local memory is ready " barrier(CLK_LOCAL_MEM_FENCE);\n" " if(gid >= input_size){\n" " return;\n" " }\n" // get global offset " uint offset = global_offsets[bucket] + local_counts[bucket];\n" // calculate local offset " uint local_offset = 0;\n" " for(uint i = 0; i < lid; i++){\n" " if(local_input[i] == bucket)\n" " local_offset++;\n" " }\n" "#ifndef SORT_BY_KEY\n" // write value to output " output[output_offset + offset + local_offset] = value;\n" "#else\n" // write key and value if doing sort_by_key " keys_output[keys_output_offset+offset + local_offset] = value;\n" " values_output[values_output_offset+offset + local_offset] =\n" " values_input[values_input_offset+gid];\n" "#endif\n" "}\n"; template inline void radix_sort_impl(const buffer_iterator first, const buffer_iterator last, const buffer_iterator values_first, command_queue &queue) { typedef T value_type; typedef typename radix_sort_value_type::type sort_type; const device &device = queue.get_device(); const context &context = queue.get_context(); // if we have a valid values iterator then we are doing a // sort by key and have to set up the values buffer bool sort_by_key = (values_first.get_buffer().get() != 0); // load (or create) radix sort program std::string cache_key = std::string("__boost_radix_sort_") + type_name(); if(sort_by_key){ cache_key += std::string("_with_") + type_name(); } boost::shared_ptr cache = program_cache::get_global_cache(context); boost::shared_ptr parameters = detail::parameter_cache::get_global_cache(device); // sort parameters const uint_ k = parameters->get(cache_key, "k", 4); const uint_ k2 = 1 << k; const uint_ block_size = parameters->get(cache_key, "tpb", 128); // sort program compiler options std::stringstream options; options << "-DK_BITS=" << k; options << " -DT=" << type_name(); options << " -DBLOCK_SIZE=" << block_size; if(boost::is_floating_point::value){ options << " -DIS_FLOATING_POINT"; } if(boost::is_signed::value){ options << " -DIS_SIGNED"; } if(sort_by_key){ options << " -DSORT_BY_KEY"; options << " -DT2=" << type_name(); options << enable_double(); } // load radix sort program program radix_sort_program = cache->get_or_build( cache_key, options.str(), radix_sort_source, context ); kernel count_kernel(radix_sort_program, "count"); kernel scan_kernel(radix_sort_program, "scan"); kernel scatter_kernel(radix_sort_program, "scatter"); size_t count = detail::iterator_range_size(first, last); uint_ block_count = static_cast(count / block_size); if(block_count * block_size != count){ block_count++; } // setup temporary buffers vector output(count, context); vector values_output(sort_by_key ? count : 0, context); vector offsets(k2, context); vector counts(block_count * k2, context); const buffer *input_buffer = &first.get_buffer(); uint_ input_offset = first.get_index(); const buffer *output_buffer = &output.get_buffer(); uint_ output_offset = 0; const buffer *values_input_buffer = &values_first.get_buffer(); uint_ values_input_offset = values_first.get_index(); const buffer *values_output_buffer = &values_output.get_buffer(); uint_ values_output_offset = 0; for(uint_ i = 0; i < sizeof(sort_type) * CHAR_BIT / k; i++){ // write counts count_kernel.set_arg(0, *input_buffer); count_kernel.set_arg(1, input_offset); count_kernel.set_arg(2, static_cast(count)); count_kernel.set_arg(3, counts); count_kernel.set_arg(4, offsets); count_kernel.set_arg(5, block_size * sizeof(uint_), 0); count_kernel.set_arg(6, i * k); queue.enqueue_1d_range_kernel(count_kernel, 0, block_count * block_size, block_size); // scan counts if(k == 1){ typedef uint2_ counter_type; ::boost::compute::exclusive_scan( make_buffer_iterator(counts.get_buffer(), 0), make_buffer_iterator(counts.get_buffer(), counts.size() / 2), make_buffer_iterator(counts.get_buffer()), queue ); } else if(k == 2){ typedef uint4_ counter_type; ::boost::compute::exclusive_scan( make_buffer_iterator(counts.get_buffer(), 0), make_buffer_iterator(counts.get_buffer(), counts.size() / 4), make_buffer_iterator(counts.get_buffer()), queue ); } else if(k == 4){ typedef uint16_ counter_type; ::boost::compute::exclusive_scan( make_buffer_iterator(counts.get_buffer(), 0), make_buffer_iterator(counts.get_buffer(), counts.size() / 16), make_buffer_iterator(counts.get_buffer()), queue ); } else { BOOST_ASSERT(false && "unknown k"); break; } // scan global offsets scan_kernel.set_arg(0, counts); scan_kernel.set_arg(1, offsets); scan_kernel.set_arg(2, block_count); queue.enqueue_task(scan_kernel); // scatter values scatter_kernel.set_arg(0, *input_buffer); scatter_kernel.set_arg(1, input_offset); scatter_kernel.set_arg(2, static_cast(count)); scatter_kernel.set_arg(3, i * k); scatter_kernel.set_arg(4, counts); scatter_kernel.set_arg(5, offsets); scatter_kernel.set_arg(6, *output_buffer); scatter_kernel.set_arg(7, output_offset); if(sort_by_key){ scatter_kernel.set_arg(8, *values_input_buffer); scatter_kernel.set_arg(9, values_input_offset); scatter_kernel.set_arg(10, *values_output_buffer); scatter_kernel.set_arg(11, values_output_offset); } queue.enqueue_1d_range_kernel(scatter_kernel, 0, block_count * block_size, block_size); // swap buffers std::swap(input_buffer, output_buffer); std::swap(values_input_buffer, values_output_buffer); std::swap(input_offset, output_offset); std::swap(values_input_offset, values_output_offset); } } template inline void radix_sort(Iterator first, Iterator last, command_queue &queue) { radix_sort_impl(first, last, buffer_iterator(), queue); } template inline void radix_sort_by_key(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, command_queue &queue) { radix_sort_impl(keys_first, keys_last, values_first, queue); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_RADIX_SORT_HPP compute-0.5/include/boost/compute/algorithm/detail/random_fill.hpp000066400000000000000000000035301263566244600255250ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_RANDOM_FILL_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_RANDOM_FILL_HPP #include #include #include #include namespace boost { namespace compute { namespace detail { template inline void random_fill(OutputIterator first, OutputIterator last, Generator &g, command_queue &queue) { g.fill(first, last, queue); } template inline void random_fill(OutputIterator first, OutputIterator last, typename std::iterator_traits::value_type lo, typename std::iterator_traits::value_type hi, command_queue &queue) { typedef typename std::iterator_traits::value_type value_type; typedef typename boost::compute::default_random_engine engine_type; typedef typename boost::compute::uniform_real_distribution distribution_type; engine_type engine(queue); distribution_type generator(lo, hi); generator.fill(first, last, engine, queue); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_RANDOM_FILL_HPP compute-0.5/include/boost/compute/algorithm/detail/reduce_by_key.hpp000066400000000000000000000121731263566244600260530ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_BY_KEY_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_BY_KEY_HPP #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template size_t reduce_by_key_on_gpu(InputKeyIterator keys_first, InputKeyIterator keys_last, InputValueIterator values_first, OutputKeyIterator keys_result, OutputValueIterator values_result, BinaryFunction function, BinaryPredicate predicate, command_queue &queue) { return detail::reduce_by_key_with_scan(keys_first, keys_last, values_first, keys_result, values_result, function, predicate, queue); } template bool reduce_by_key_on_gpu_requirements_met(InputKeyIterator keys_first, InputValueIterator values_first, OutputKeyIterator keys_result, OutputValueIterator values_result, const size_t count, command_queue &queue) { const device &device = queue.get_device(); return (count > 256) && !(device.type() & device::cpu) && reduce_by_key_with_scan_requirements_met(keys_first, values_first, keys_result,values_result, count, queue); return true; } template inline std::pair dispatch_reduce_by_key(InputKeyIterator keys_first, InputKeyIterator keys_last, InputValueIterator values_first, OutputKeyIterator keys_result, OutputValueIterator values_result, BinaryFunction function, BinaryPredicate predicate, command_queue &queue) { typedef typename std::iterator_traits::difference_type key_difference_type; typedef typename std::iterator_traits::difference_type value_difference_type; const size_t count = detail::iterator_range_size(keys_first, keys_last); if (count < 2) { boost::compute::copy_n(keys_first, count, keys_result, queue); boost::compute::copy_n(values_first, count, values_result, queue); return std::make_pair( keys_result + static_cast(count), values_result + static_cast(count) ); } size_t result_size = 0; if(reduce_by_key_on_gpu_requirements_met(keys_first, values_first, keys_result, values_result, count, queue)){ result_size = detail::reduce_by_key_on_gpu(keys_first, keys_last, values_first, keys_result, values_result, function, predicate, queue); } else { result_size = detail::serial_reduce_by_key(keys_first, keys_last, values_first, keys_result, values_result, function, predicate, queue); } return std::make_pair( keys_result + static_cast(result_size), values_result + static_cast(result_size) ); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_BY_KEY_HPP compute-0.5/include/boost/compute/algorithm/detail/reduce_by_key_with_scan.hpp000066400000000000000000000547311263566244600301200ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_BY_KEY_WITH_SCAN_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_BY_KEY_WITH_SCAN_HPP #include #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { /// \internal_ /// /// Fills \p new_keys_first with unsigned integer keys generated from vector /// of original keys \p keys_first. New keys can be distinguish by simple equality /// predicate. /// /// \param keys_first iterator pointing to the first key /// \param number_of_keys number of keys /// \param predicate binary predicate for key comparison /// \param new_keys_first iterator pointing to the new keys vector /// \param preferred_work_group_size preferred work group size /// \param queue command queue to perform the operation /// /// Binary function \p predicate must take two keys as arguments and /// return true only if they are considered the same. /// /// The first new key equals zero and the last equals number of unique keys /// minus one. /// /// No local memory usage. template inline void generate_uint_keys(InputKeyIterator keys_first, size_t number_of_keys, BinaryPredicate predicate, vector::iterator new_keys_first, size_t preferred_work_group_size, command_queue &queue) { typedef typename std::iterator_traits::value_type key_type; detail::meta_kernel k("reduce_by_key_new_key_flags"); k.add_set_arg("count", uint_(number_of_keys)); k << k.decl("gid") << " = get_global_id(0);\n" << k.decl("value") << " = 0;\n" << "if(gid >= count){\n return;\n}\n" << "if(gid > 0){ \n" << k.decl("key") << " = " << keys_first[k.var("gid")] << ";\n" << k.decl("previous_key") << " = " << keys_first[k.var("gid - 1")] << ";\n" << " value = " << predicate(k.var("previous_key"), k.var("key")) << " ? 0 : 1;\n" << "}\n else {\n" << " value = 0;\n" << "}\n" << new_keys_first[k.var("gid")] << " = value;\n"; const context &context = queue.get_context(); kernel kernel = k.compile(context); size_t work_group_size = preferred_work_group_size; size_t work_groups_no = static_cast( std::ceil(float(number_of_keys) / work_group_size) ); queue.enqueue_1d_range_kernel(kernel, 0, work_groups_no * work_group_size, work_group_size); inclusive_scan(new_keys_first, new_keys_first + number_of_keys, new_keys_first, queue); } /// \internal_ /// Calculate carry-out for each work group. /// Carry-out is a pair of the last key processed by a work group and sum of all /// values under this key in this work group. template inline void carry_outs(vector::iterator keys_first, InputValueIterator values_first, size_t count, vector::iterator carry_out_keys_first, OutputValueIterator carry_out_values_first, BinaryFunction function, size_t work_group_size, command_queue &queue) { typedef typename std::iterator_traits::value_type value_out_type; detail::meta_kernel k("reduce_by_key_with_scan_carry_outs"); k.add_set_arg("count", uint_(count)); size_t local_keys_arg = k.add_arg(memory_object::local_memory, "lkeys"); size_t local_vals_arg = k.add_arg(memory_object::local_memory, "lvals"); k << k.decl("gid") << " = get_global_id(0);\n" << k.decl("wg_size") << " = get_local_size(0);\n" << k.decl("lid") << " = get_local_id(0);\n" << k.decl("group_id") << " = get_group_id(0);\n" << "if(gid >= count){\n return;\n}\n" << k.decl("key") << " = " << keys_first[k.var("gid")] << ";\n" << k.decl("value") << " = " << values_first[k.var("gid")] << ";\n" << "lkeys[lid] = key;\n" << "lvals[lid] = value;\n" << // Calculate carry out for each work group by performing Hillis/Steele scan // where only last element (key-value pair) is saved k.decl("result") << " = value;\n" << k.decl("other_key") << ";\n" << k.decl("other_value") << ";\n" << "for(" << k.decl("offset") << " = 1; " << "offset < wg_size && lid >= offset; offset *= 2){\n" " barrier(CLK_LOCAL_MEM_FENCE);\n" << " other_key = lkeys[lid - offset];\n" << " if(other_key == key){\n" << " other_value = lvals[lid - offset];\n" << " result = " << function(k.var("result"), k.var("other_value")) << ";\n" << " }\n" << " barrier(CLK_LOCAL_MEM_FENCE);\n" << " lvals[lid] = result;\n" << "}\n" << // save carry out "if(lid == (wg_size - 1)){\n" << carry_out_keys_first[k.var("group_id")] << " = key;\n" << carry_out_values_first[k.var("group_id")] << " = result;\n" << "}\n"; size_t work_groups_no = static_cast( std::ceil(float(count) / work_group_size) ); const context &context = queue.get_context(); kernel kernel = k.compile(context); kernel.set_arg(local_keys_arg, local_buffer(work_group_size)); kernel.set_arg(local_vals_arg, local_buffer(work_group_size)); queue.enqueue_1d_range_kernel(kernel, 0, work_groups_no * work_group_size, work_group_size); } /// \internal_ /// Calculate carry-in by performing inclusive scan by key on carry-outs vector. template inline void carry_ins(vector::iterator carry_out_keys_first, OutputValueIterator carry_out_values_first, OutputValueIterator carry_in_values_first, size_t carry_out_size, BinaryFunction function, size_t work_group_size, command_queue &queue) { typedef typename std::iterator_traits::value_type value_out_type; uint_ values_pre_work_item = static_cast( std::ceil(float(carry_out_size) / work_group_size) ); detail::meta_kernel k("reduce_by_key_with_scan_carry_ins"); k.add_set_arg("carry_out_size", uint_(carry_out_size)); k.add_set_arg("values_per_work_item", values_pre_work_item); size_t local_keys_arg = k.add_arg(memory_object::local_memory, "lkeys"); size_t local_vals_arg = k.add_arg(memory_object::local_memory, "lvals"); k << k.decl("id") << " = get_global_id(0) * values_per_work_item;\n" << k.decl("idx") << " = id;\n" << k.decl("wg_size") << " = get_local_size(0);\n" << k.decl("lid") << " = get_local_id(0);\n" << k.decl("group_id") << " = get_group_id(0);\n" << "if(id >= carry_out_size){\n return;\n}\n" << k.decl("key") << ";\n" << k.decl("value") << ";\n" << k.decl("previous_key") << " = " << carry_out_keys_first[k.var("idx")] << ";\n" << k.decl("result") << " = " << carry_out_values_first[k.var("idx")] << ";\n" << carry_in_values_first[k.var("idx")] << " = result;\n" << k.decl("end") << " = (id + values_per_work_item) <= carry_out_size" << " ? (values_per_work_item + id) : carry_out_size;\n" << "for(idx = idx + 1; idx < end; idx += 1){\n" << " key = " << carry_out_keys_first[k.var("idx")] << ";\n" << " value = " << carry_out_values_first[k.var("idx")] << ";\n" << " if(previous_key == key){\n" << " result = " << function(k.var("result"), k.var("value")) << ";\n" << " }\n else { \n" << " result = value;\n" " }\n" << " " << carry_in_values_first[k.var("idx")] << " = result;\n" << " previous_key = key;\n" "}\n" << // save the last key and result to local memory "lkeys[lid] = previous_key;\n" << "lvals[lid] = result;\n" << // Hillis/Steele scan "for(" << k.decl("offset") << " = 1; " << "offset < wg_size && lid >= offset; offset *= 2){\n" " barrier(CLK_LOCAL_MEM_FENCE);\n" << " key = lkeys[lid - offset];\n" << " if(previous_key == key){\n" << " value = lvals[lid - offset];\n" << " result = " << function(k.var("result"), k.var("value")) << ";\n" << " }\n" << " barrier(CLK_LOCAL_MEM_FENCE);\n" << " lvals[lid] = result;\n" << "}\n" << "barrier(CLK_LOCAL_MEM_FENCE);\n" << // first in the group has nothing to do "if(lid == 0){\n return;\n}\n" << // load key-value reduced by previous work item "previous_key = lkeys[lid - 1];\n" << "result = lvals[lid - 1];\n" << // make sure all carry-ins are saved in global memory "barrier( CLK_GLOBAL_MEM_FENCE );\n" << // add key-value reduced by previous work item "for(idx = id; idx < end; idx += 1){\n" << " key = " << carry_out_keys_first[k.var("idx")] << ";\n" << " value = " << carry_in_values_first[k.var("idx")] << ";\n" << " if(previous_key == key){\n" << " value = " << function(k.var("result"), k.var("value")) << ";\n" << " }\n" << " " << carry_in_values_first[k.var("idx")] << " = value;\n" << "}\n"; const context &context = queue.get_context(); kernel kernel = k.compile(context); kernel.set_arg(local_keys_arg, local_buffer(work_group_size)); kernel.set_arg(local_vals_arg, local_buffer(work_group_size)); queue.enqueue_1d_range_kernel(kernel, 0, work_group_size, work_group_size); } /// \internal_ /// /// Perform final reduction by key. Each work item: /// 1. Perform local work-group reduction (Hillis/Steele scan) /// 2. Add carry-in (if keys are right) /// 3. Save reduced value if next key is different than processed one template inline void final_reduction(InputKeyIterator keys_first, InputValueIterator values_first, OutputKeyIterator keys_result, OutputValueIterator values_result, size_t count, BinaryFunction function, vector::iterator new_keys_first, vector::iterator carry_in_keys_first, OutputValueIterator carry_in_values_first, size_t carry_in_size, size_t work_group_size, command_queue &queue) { typedef typename std::iterator_traits::value_type value_out_type; detail::meta_kernel k("reduce_by_key_with_scan_final_reduction"); k.add_set_arg("count", uint_(count)); size_t local_keys_arg = k.add_arg(memory_object::local_memory, "lkeys"); size_t local_vals_arg = k.add_arg(memory_object::local_memory, "lvals"); k << k.decl("gid") << " = get_global_id(0);\n" << k.decl("wg_size") << " = get_local_size(0);\n" << k.decl("lid") << " = get_local_id(0);\n" << k.decl("group_id") << " = get_group_id(0);\n" << "if(gid >= count){\n return;\n}\n" << k.decl("key") << " = " << new_keys_first[k.var("gid")] << ";\n" << k.decl("value") << " = " << values_first[k.var("gid")] << ";\n" << "lkeys[lid] = key;\n" << "lvals[lid] = value;\n" << // Hillis/Steele scan k.decl("result") << " = value;\n" << k.decl("other_key") << ";\n" << k.decl("other_value") << ";\n" << "for(" << k.decl("offset") << " = 1; " << "offset < wg_size && lid >= offset; offset *= 2){\n" " barrier(CLK_LOCAL_MEM_FENCE);\n" << " other_key = lkeys[lid - offset];\n" << " if(other_key == key){\n" << " other_value = lvals[lid - offset];\n" << " result = " << function(k.var("result"), k.var("other_value")) << ";\n" << " }\n" << " barrier(CLK_LOCAL_MEM_FENCE);\n" << " lvals[lid] = result;\n" << "}\n" << k.decl("save") << " = (gid < (count - 1)) ?" << new_keys_first[k.var("gid + 1")] << " != key" << ": true;\n" << // Add carry in k.decl("carry_in_key") << ";\n" << "if(group_id > 0 && save) {\n" << " carry_in_key = " << carry_in_keys_first[k.var("group_id - 1")] << ";\n" << " if(key == carry_in_key){\n" << " other_value = " << carry_in_values_first[k.var("group_id - 1")] << ";\n" << " result = " << function(k.var("result"), k.var("other_value")) << ";\n" << " }\n" << "}\n" << // Save result only if the next key is different or it's the last element. "if(save){\n" << keys_result[k.var("key")] << " = " << keys_first[k.var("gid")] << ";\n" << values_result[k.var("key")] << " = result;\n" << "}\n" ; size_t work_groups_no = static_cast( std::ceil(float(count) / work_group_size) ); const context &context = queue.get_context(); kernel kernel = k.compile(context); kernel.set_arg(local_keys_arg, local_buffer(work_group_size)); kernel.set_arg(local_vals_arg, local_buffer(work_group_size)); queue.enqueue_1d_range_kernel(kernel, 0, work_groups_no * work_group_size, work_group_size); } /// \internal_ /// Returns preferred work group size for reduce by key with scan algorithm. template inline size_t get_work_group_size(const device& device) { std::string cache_key = std::string("__boost_reduce_by_key_with_scan") + "k_" + type_name() + "_v_" + type_name(); // load parameters boost::shared_ptr parameters = detail::parameter_cache::get_global_cache(device); return (std::max)( static_cast(parameters->get(cache_key, "wgsize", 256)), static_cast(device.get_info()) ); } /// \internal_ /// /// 1. For each work group carry-out value is calculated (it's done by key-oriented /// Hillis/Steele scan). Carry-out is a pair of the last key processed by work /// group and sum of all values under this key in work group. /// 2. From every carry-out carry-in is calculated by performing inclusive scan /// by key. /// 3. Final reduction by key is performed (key-oriented Hillis/Steele scan), /// carry-in values are added where needed. template inline size_t reduce_by_key_with_scan(InputKeyIterator keys_first, InputKeyIterator keys_last, InputValueIterator values_first, OutputKeyIterator keys_result, OutputValueIterator values_result, BinaryFunction function, BinaryPredicate predicate, command_queue &queue) { typedef typename std::iterator_traits::value_type value_type; typedef typename std::iterator_traits::value_type key_type; typedef typename std::iterator_traits::value_type value_out_type; const context &context = queue.get_context(); size_t count = detail::iterator_range_size(keys_first, keys_last); if(count == 0){ return size_t(0); } const device &device = queue.get_device(); size_t work_group_size = get_work_group_size(device); // Replace original key with unsigned integer keys generated based on given // predicate. New key is also an index for keys_result and values_result vectors, // which points to place where reduced value should be saved. vector new_keys(count, context); vector::iterator new_keys_first = new_keys.begin(); generate_uint_keys(keys_first, count, predicate, new_keys_first, work_group_size, queue); // Calculate carry-out and carry-in vectors size const size_t carry_out_size = static_cast( std::ceil(float(count) / work_group_size) ); vector carry_out_keys(carry_out_size, context); vector carry_out_values(carry_out_size, context); carry_outs(new_keys_first, values_first, count, carry_out_keys.begin(), carry_out_values.begin(), function, work_group_size, queue); vector carry_in_values(carry_out_size, context); carry_ins(carry_out_keys.begin(), carry_out_values.begin(), carry_in_values.begin(), carry_out_size, function, work_group_size, queue); final_reduction(keys_first, values_first, keys_result, values_result, count, function, new_keys_first, carry_out_keys.begin(), carry_in_values.begin(), carry_out_size, work_group_size, queue); const size_t result = read_single_value(new_keys.get_buffer(), count - 1, queue); return result + 1; } /// \internal_ /// Return true if requirements for running reduce by key with scan on given /// device are met (at least one work group of preferred size can be run). template bool reduce_by_key_with_scan_requirements_met(InputKeyIterator keys_first, InputValueIterator values_first, OutputKeyIterator keys_result, OutputValueIterator values_result, const size_t count, command_queue &queue) { typedef typename std::iterator_traits::value_type value_type; typedef typename std::iterator_traits::value_type key_type; typedef typename std::iterator_traits::value_type value_out_type; (void) keys_first; (void) values_first; (void) keys_result; (void) values_result; const device &device = queue.get_device(); // device must have dedicated local memory storage if(device.get_info() != CL_LOCAL) { return false; } // local memory size in bytes (per compute unit) const size_t local_mem_size = device.get_info(); // preferred work group size size_t work_group_size = get_work_group_size(device); // local memory size needed to perform parallel reduction size_t required_local_mem_size = 0; // keys size required_local_mem_size += sizeof(uint_) * work_group_size; // reduced values size required_local_mem_size += sizeof(value_out_type) * work_group_size; return (required_local_mem_size <= local_mem_size); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_BY_KEY_WITH_SCAN_HPP compute-0.5/include/boost/compute/algorithm/detail/reduce_on_gpu.hpp000066400000000000000000000241031263566244600260540ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_ON_GPU_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_ON_GPU_HPP #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { /// \internal /// body reduction inside a warp template struct ReduceBody { static std::string body() { std::stringstream k; // local reduction k << "for(int i = 1; i < TPB; i <<= 1){\n" << " barrier(CLK_LOCAL_MEM_FENCE);\n" << " uint mask = (i << 1) - 1;\n" << " if((lid & mask) == 0){\n" << " scratch[lid] += scratch[lid+i];\n" << " }\n" << "}\n"; return k.str(); } }; /// \internal /// body reduction inside a warp /// for nvidia device we can use the "unsafe" /// memory optimisation template struct ReduceBody { static std::string body() { std::stringstream k; // local reduction // we use TPB to compile only useful instruction // local reduction when size is greater than warp size k << "barrier(CLK_LOCAL_MEM_FENCE);\n" << "if(TPB >= 1024){\n" << "if(lid < 512) { sum += scratch[lid + 512]; scratch[lid] = sum;} barrier(CLK_LOCAL_MEM_FENCE);}\n" << "if(TPB >= 512){\n" << "if(lid < 256) { sum += scratch[lid + 256]; scratch[lid] = sum;} barrier(CLK_LOCAL_MEM_FENCE);}\n" << "if(TPB >= 256){\n" << "if(lid < 128) { sum += scratch[lid + 128]; scratch[lid] = sum;} barrier(CLK_LOCAL_MEM_FENCE);}\n" << "if(TPB >= 128){\n" << "if(lid < 64) { sum += scratch[lid + 64]; scratch[lid] = sum;} barrier(CLK_LOCAL_MEM_FENCE);} \n" << // warp reduction "if(lid < 32){\n" << // volatile this way we don't need any barrier "volatile __local " << type_name() << " *lmem = scratch;\n" << "if(TPB >= 64) { lmem[lid] = sum = sum + lmem[lid+32];} \n" << "if(TPB >= 32) { lmem[lid] = sum = sum + lmem[lid+16];} \n" << "if(TPB >= 16) { lmem[lid] = sum = sum + lmem[lid+ 8];} \n" << "if(TPB >= 8) { lmem[lid] = sum = sum + lmem[lid+ 4];} \n" << "if(TPB >= 4) { lmem[lid] = sum = sum + lmem[lid+ 2];} \n" << "if(TPB >= 2) { lmem[lid] = sum = sum + lmem[lid+ 1];} \n" << "}\n"; return k.str(); } }; template inline void initial_reduce(InputIterator first, InputIterator last, buffer result, const Function &function, kernel &reduce_kernel, const uint_ vpt, const uint_ tpb, command_queue &queue) { (void) function; (void) reduce_kernel; typedef typename std::iterator_traits::value_type Arg; typedef typename boost::tr1_result_of::type T; size_t count = std::distance(first, last); detail::meta_kernel k("initial_reduce"); k.add_set_arg("count", uint_(count)); size_t output_arg = k.add_arg(memory_object::global_memory, "output"); k << k.decl("offset") << " = get_group_id(0) * VPT * TPB;\n" << k.decl("lid") << " = get_local_id(0);\n" << "__local " << type_name() << " scratch[TPB];\n" << // private reduction k.decl("sum") << " = 0;\n" << "for(uint i = 0; i < VPT; i++){\n" << " if(offset + lid + i*TPB < count){\n" << " sum = sum + " << first[k.var("offset+lid+i*TPB")] << ";\n" << " }\n" << "}\n" << "scratch[lid] = sum;\n" << // local reduction ReduceBody::body() << // write sum to output "if(lid == 0){\n" << " output[get_group_id(0)] = scratch[0];\n" << "}\n"; const context &context = queue.get_context(); std::stringstream options; options << "-DVPT=" << vpt << " -DTPB=" << tpb; kernel generic_reduce_kernel = k.compile(context, options.str()); generic_reduce_kernel.set_arg(output_arg, result); size_t work_size = calculate_work_size(count, vpt, tpb); queue.enqueue_1d_range_kernel(generic_reduce_kernel, 0, work_size, tpb); } template inline void initial_reduce(const buffer_iterator &first, const buffer_iterator &last, const buffer &result, const plus &function, kernel &reduce_kernel, const uint_ vpt, const uint_ tpb, command_queue &queue) { (void) function; size_t count = std::distance(first, last); reduce_kernel.set_arg(0, first.get_buffer()); reduce_kernel.set_arg(1, uint_(first.get_index())); reduce_kernel.set_arg(2, uint_(count)); reduce_kernel.set_arg(3, result); reduce_kernel.set_arg(4, uint_(0)); size_t work_size = calculate_work_size(count, vpt, tpb); queue.enqueue_1d_range_kernel(reduce_kernel, 0, work_size, tpb); } template inline void reduce_on_gpu(InputIterator first, InputIterator last, buffer_iterator result, Function function, command_queue &queue) { const device &device = queue.get_device(); const context &context = queue.get_context(); detail::meta_kernel k("reduce"); k.add_arg(memory_object::global_memory, "input"); k.add_arg("offset"); k.add_arg("count"); k.add_arg(memory_object::global_memory, "output"); k.add_arg("output_offset"); k << k.decl("block_offset") << " = get_group_id(0) * VPT * TPB;\n" << "__global const " << type_name() << " *block = input + offset + block_offset;\n" << k.decl("lid") << " = get_local_id(0);\n" << "__local " << type_name() << " scratch[TPB];\n" << // private reduction k.decl("sum") << " = 0;\n" << "for(uint i = 0; i < VPT; i++){\n" << " if(block_offset + lid + i*TPB < count){\n" << " sum = sum + block[lid+i*TPB]; \n" << " }\n" << "}\n" << "scratch[lid] = sum;\n"; // discrimination on vendor name if(is_nvidia_device(device)) k << ReduceBody::body(); else k << ReduceBody::body(); k << // write sum to output "if(lid == 0){\n" << " output[output_offset + get_group_id(0)] = scratch[0];\n" << "}\n"; std::string cache_key = std::string("__boost_reduce_on_gpu_") + type_name(); // load parameters boost::shared_ptr parameters = detail::parameter_cache::get_global_cache(device); uint_ vpt = parameters->get(cache_key, "vpt", 8); uint_ tpb = parameters->get(cache_key, "tpb", 128); // reduce program compiler flags std::stringstream options; options << "-DT=" << type_name() << " -DVPT=" << vpt << " -DTPB=" << tpb; // load program boost::shared_ptr cache = program_cache::get_global_cache(context); program reduce_program = cache->get_or_build( cache_key, options.str(), k.source(), context ); // create reduce kernel kernel reduce_kernel(reduce_program, "reduce"); size_t count = std::distance(first, last); // first pass, reduce from input to ping buffer ping(context, std::ceil(float(count) / vpt / tpb) * sizeof(T)); initial_reduce(first, last, ping, function, reduce_kernel, vpt, tpb, queue); // update count after initial reduce count = std::ceil(float(count) / vpt / tpb); // middle pass(es), reduce between ping and pong const buffer *input_buffer = &ping; buffer pong(context, count / vpt / tpb * sizeof(T)); const buffer *output_buffer = &pong; if(count > vpt * tpb){ while(count > vpt * tpb){ reduce_kernel.set_arg(0, *input_buffer); reduce_kernel.set_arg(1, uint_(0)); reduce_kernel.set_arg(2, uint_(count)); reduce_kernel.set_arg(3, *output_buffer); reduce_kernel.set_arg(4, uint_(0)); size_t work_size = std::ceil(float(count) / vpt); if(work_size % tpb != 0){ work_size += tpb - work_size % tpb; } queue.enqueue_1d_range_kernel(reduce_kernel, 0, work_size, tpb); std::swap(input_buffer, output_buffer); count = std::ceil(float(count) / vpt / tpb); } } // final pass, reduce from ping/pong to result reduce_kernel.set_arg(0, *input_buffer); reduce_kernel.set_arg(1, uint_(0)); reduce_kernel.set_arg(2, uint_(count)); reduce_kernel.set_arg(3, result.get_buffer()); reduce_kernel.set_arg(4, uint_(result.get_index())); queue.enqueue_1d_range_kernel(reduce_kernel, 0, tpb, tpb); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_ON_GPU_HPP compute-0.5/include/boost/compute/algorithm/detail/scan.hpp000066400000000000000000000030111263566244600241550ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_HPP #include #include #include namespace boost { namespace compute { namespace detail { template inline OutputIterator scan(InputIterator first, InputIterator last, OutputIterator result, bool exclusive, T init, BinaryOperator op, command_queue &queue) { const device &device = queue.get_device(); if(device.type() & device::cpu){ return scan_on_cpu(first, last, result, exclusive, init, op, queue); } else { return scan_on_gpu(first, last, result, exclusive, init, op, queue); } } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_HPP compute-0.5/include/boost/compute/algorithm/detail/scan_on_cpu.hpp000066400000000000000000000061601263566244600255300ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_ON_CPU_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_ON_CPU_HPP #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template inline OutputIterator scan_on_cpu(InputIterator first, InputIterator last, OutputIterator result, bool exclusive, T init, BinaryOperator op, command_queue &queue) { if(first == last){ return result; } typedef typename std::iterator_traits::value_type input_type; typedef typename std::iterator_traits::value_type output_type; const context &context = queue.get_context(); // create scan kernel meta_kernel k("scan_on_cpu"); // Arguments size_t n_arg = k.add_arg("n"); size_t init_arg = k.add_arg("initial_value"); if(!exclusive){ k << k.decl("start_idx") << " = 1;\n" << k.decl("sum") << " = " << first[0] << ";\n" << result[0] << " = sum;\n"; } else { k << k.decl("start_idx") << " = 0;\n" << k.decl("sum") << " = initial_value;\n"; } k << "for(ulong i = start_idx; i < n; i++){\n" << k.decl("x") << " = " << first[k.var("i")] << ";\n"; if(exclusive){ k << result[k.var("i")] << " = sum;\n"; } k << " sum = " << op(k.var("sum"), k.var("x")) << ";\n"; if(!exclusive){ k << result[k.var("i")] << " = sum;\n"; } k << "}\n"; // compile scan kernel kernel scan_kernel = k.compile(context); // setup kernel arguments size_t n = detail::iterator_range_size(first, last); scan_kernel.set_arg(n_arg, n); scan_kernel.set_arg(init_arg, static_cast(init)); // execute the kernel queue.enqueue_1d_range_kernel(scan_kernel, 0, 1, 1); // return iterator pointing to the end of the result range return result + n; } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_ON_CPU_HPP compute-0.5/include/boost/compute/algorithm/detail/scan_on_gpu.hpp000066400000000000000000000261421263566244600255360ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_ON_GPU_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_ON_GPU_HPP #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template class local_scan_kernel : public meta_kernel { public: local_scan_kernel(InputIterator first, InputIterator last, OutputIterator result, bool exclusive, BinaryOperator op) : meta_kernel("local_scan") { typedef typename std::iterator_traits::value_type T; (void) last; bool checked = true; m_block_sums_arg = add_arg(memory_object::global_memory, "block_sums"); m_scratch_arg = add_arg(memory_object::local_memory, "scratch"); m_block_size_arg = add_arg("block_size"); m_count_arg = add_arg("count"); m_init_value_arg = add_arg("init"); // work-item parameters *this << "const uint gid = get_global_id(0);\n" << "const uint lid = get_local_id(0);\n"; // check against data size if(checked){ *this << "if(gid < count){\n"; } // copy values from input to local memory if(exclusive){ *this << decl("local_init") << "= (gid == 0) ? init : 0;\n" << "if(lid == 0){ scratch[lid] = local_init; }\n" << "else { scratch[lid] = " << first[expr("gid-1")] << "; }\n"; } else{ *this << "scratch[lid] = " << first[expr("gid")] << ";\n"; } if(checked){ *this << "}\n" "else {\n" << " scratch[lid] = 0;\n" << "}\n"; } // wait for all threads to read from input *this << "barrier(CLK_LOCAL_MEM_FENCE);\n"; // perform scan *this << "for(uint i = 1; i < block_size; i <<= 1){\n" << " " << decl("x") << " = lid >= i ? scratch[lid-i] : 0;\n" << " barrier(CLK_LOCAL_MEM_FENCE);\n" << " if(lid >= i){\n" << " scratch[lid] = " << op(var("scratch[lid]"), var("x")) << ";\n" << " }\n" << " barrier(CLK_LOCAL_MEM_FENCE);\n" << "}\n"; // copy results to output if(checked){ *this << "if(gid < count){\n"; } *this << result[expr("gid")] << " = scratch[lid];\n"; if(checked){ *this << "}\n"; } // store sum for the block if(exclusive){ *this << "if(lid == block_size - 1){\n" << " block_sums[get_group_id(0)] = " << op(first[expr("gid")], var("scratch[lid]")) << ";\n" << "}\n"; } else { *this << "if(lid == block_size - 1){\n" << " block_sums[get_group_id(0)] = scratch[lid];\n" << "}\n"; } } size_t m_block_sums_arg; size_t m_scratch_arg; size_t m_block_size_arg; size_t m_count_arg; size_t m_init_value_arg; }; template class write_scanned_output_kernel : public meta_kernel { public: write_scanned_output_kernel(BinaryOperator op) : meta_kernel("write_scanned_output") { bool checked = true; m_output_arg = add_arg(memory_object::global_memory, "output"); m_block_sums_arg = add_arg(memory_object::global_memory, "block_sums"); m_count_arg = add_arg("count"); // work-item parameters *this << "const uint gid = get_global_id(0);\n" << "const uint block_id = get_group_id(0);\n"; // check against data size if(checked){ *this << "if(gid < count){\n"; } // write output *this << "output[gid] = " << op(var("block_sums[block_id]"), var("output[gid] ")) << ";\n"; if(checked){ *this << "}\n"; } } size_t m_output_arg; size_t m_block_sums_arg; size_t m_count_arg; }; template inline size_t pick_scan_block_size(InputIterator first, InputIterator last) { size_t count = iterator_range_size(first, last); if(count == 0) { return 0; } else if(count <= 1) { return 1; } else if(count <= 2) { return 2; } else if(count <= 4) { return 4; } else if(count <= 8) { return 8; } else if(count <= 16) { return 16; } else if(count <= 32) { return 32; } else if(count <= 64) { return 64; } else if(count <= 128) { return 128; } else { return 256; } } template inline OutputIterator scan_impl(InputIterator first, InputIterator last, OutputIterator result, bool exclusive, T init, BinaryOperator op, command_queue &queue) { typedef typename std::iterator_traits::value_type input_type; typedef typename std::iterator_traits::difference_type difference_type; typedef typename std::iterator_traits::value_type output_type; const context &context = queue.get_context(); const size_t count = detail::iterator_range_size(first, last); size_t block_size = pick_scan_block_size(first, last); size_t block_count = count / block_size; if(block_count * block_size < count){ block_count++; } ::boost::compute::vector block_sums(block_count, context); // zero block sums input_type zero; std::memset(&zero, 0, sizeof(input_type)); ::boost::compute::fill(block_sums.begin(), block_sums.end(), zero, queue); // local scan local_scan_kernel local_scan_kernel(first, last, result, exclusive, op); ::boost::compute::kernel kernel = local_scan_kernel.compile(context); kernel.set_arg(local_scan_kernel.m_scratch_arg, local_buffer(block_size)); kernel.set_arg(local_scan_kernel.m_block_sums_arg, block_sums); kernel.set_arg(local_scan_kernel.m_block_size_arg, static_cast(block_size)); kernel.set_arg(local_scan_kernel.m_count_arg, static_cast(count)); kernel.set_arg(local_scan_kernel.m_init_value_arg, static_cast(init)); queue.enqueue_1d_range_kernel(kernel, 0, block_count * block_size, block_size); // inclusive scan block sums if(block_count > 1){ scan_impl(block_sums.begin(), block_sums.end(), block_sums.begin(), false, init, op, queue ); } // add block sums to each block if(block_count > 1){ write_scanned_output_kernel write_output_kernel(op); kernel = write_output_kernel.compile(context); kernel.set_arg(write_output_kernel.m_output_arg, result.get_buffer()); kernel.set_arg(write_output_kernel.m_block_sums_arg, block_sums); kernel.set_arg(write_output_kernel.m_count_arg, static_cast(count)); queue.enqueue_1d_range_kernel(kernel, block_size, block_count * block_size, block_size); } return result + static_cast(count); } template inline OutputIterator dispatch_scan(InputIterator first, InputIterator last, OutputIterator result, bool exclusive, T init, BinaryOperator op, command_queue &queue) { return scan_impl(first, last, result, exclusive, init, op, queue); } template inline InputIterator dispatch_scan(InputIterator first, InputIterator last, InputIterator result, bool exclusive, T init, BinaryOperator op, command_queue &queue) { typedef typename std::iterator_traits::value_type value_type; if(first == result){ // scan input in-place const context &context = queue.get_context(); // make a temporary copy the input size_t count = iterator_range_size(first, last); vector tmp(count, context); copy(first, last, tmp.begin(), queue); // scan from temporary values return scan_impl(tmp.begin(), tmp.end(), first, exclusive, init, op, queue); } else { // scan input to output return scan_impl(first, last, result, exclusive, init, op, queue); } } template inline OutputIterator scan_on_gpu(InputIterator first, InputIterator last, OutputIterator result, bool exclusive, T init, BinaryOperator op, command_queue &queue) { if(first == last){ return result; } return dispatch_scan(first, last, result, exclusive, init, op, queue); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_ON_GPU_HPP compute-0.5/include/boost/compute/algorithm/detail/search_all.hpp000066400000000000000000000050151263566244600253340ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SEARCH_ALL_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_SEARCH_ALL_HPP #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { /// /// \brief Search kernel class /// /// Subclass of meta_kernel which is capable of performing pattern matching /// template class search_kernel : public meta_kernel { public: search_kernel() : meta_kernel("search") {} void set_range(PatternIterator p_first, PatternIterator p_last, TextIterator t_first, TextIterator t_last, OutputIterator result) { m_p_count = iterator_range_size(p_first, p_last); m_p_count_arg = add_arg("p_count"); m_count = iterator_range_size(t_first, t_last); m_count = m_count + 1 - m_p_count; *this << "uint i = get_global_id(0);\n" << "uint i1 = i;\n" << "uint j;\n" << "for(j = 0; j("j")] << " != " << t_first[expr("i")] << ")\n" << " j = p_count + 1;\n" << "}\n" << "if(j == p_count)\n" << result[expr("i1")] << " = 1;\n" << "else\n" << result[expr("i1")] << " = 0;\n"; } event exec(command_queue &queue) { if(m_count == 0) { return event(); } set_arg(m_p_count_arg, uint_(m_p_count)); return exec_1d(queue, 0, m_count); } private: size_t m_p_count; size_t m_p_count_arg; size_t m_count; }; } //end detail namespace } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SEARCH_ALL_HPP compute-0.5/include/boost/compute/algorithm/detail/serial_accumulate.hpp000066400000000000000000000037061263566244600267260ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_ACCUMULATE_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_ACCUMULATE_HPP #include #include #include namespace boost { namespace compute { namespace detail { template inline void serial_accumulate(InputIterator first, InputIterator last, OutputIterator result, T init, BinaryFunction function, command_queue &queue) { const context &context = queue.get_context(); size_t count = detail::iterator_range_size(first, last); meta_kernel k("serial_accumulate"); size_t init_arg = k.add_arg("init"); size_t count_arg = k.add_arg("count"); k << k.decl("result") << " = init;\n" << "for(uint i = 0; i < count; i++)\n" << " result = " << function(k.var("result"), first[k.var("i")]) << ";\n" << result[0] << " = result;\n"; kernel kernel = k.compile(context); kernel.set_arg(init_arg, init); kernel.set_arg(count_arg, static_cast(count)); queue.enqueue_task(kernel); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_ACCUMULATE_HPP compute-0.5/include/boost/compute/algorithm/detail/serial_count_if.hpp000066400000000000000000000043031263566244600264030ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_COUNT_IF_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_COUNT_IF_HPP #include #include #include #include namespace boost { namespace compute { namespace detail { // counts values that match the predicate using a single thread template inline size_t serial_count_if(InputIterator first, InputIterator last, Predicate predicate, command_queue &queue) { typedef typename std::iterator_traits::value_type value_type; const context &context = queue.get_context(); size_t size = iterator_range_size(first, last); meta_kernel k("serial_count_if"); k.add_set_arg("size", static_cast(size)); size_t result_arg = k.add_arg(memory_object::global_memory, "result"); k << "uint count = 0;\n" << "for(uint i = 0; i < size; i++){\n" << k.decl("value") << "=" << first[k.var("i")] << ";\n" << "if(" << predicate(k.var("value")) << "){\n" << "count++;\n" << "}\n" "}\n" "*result = count;\n"; kernel kernel = k.compile(context); // setup result buffer scalar result(context); kernel.set_arg(result_arg, result.get_buffer()); // run kernel queue.enqueue_task(kernel); // read index return result.read(queue); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_COUNT_IF_HPP compute-0.5/include/boost/compute/algorithm/detail/serial_find_extrema.hpp000066400000000000000000000061171263566244600272470ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_FIND_EXTREMA_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_FIND_EXTREMA_HPP #include #include #include #include #include namespace boost { namespace compute { namespace detail { template inline InputIterator serial_find_extrema(InputIterator first, InputIterator last, Compare compare, const bool find_minimum, command_queue &queue) { typedef typename std::iterator_traits::value_type value_type; typedef typename std::iterator_traits::difference_type difference_type; const context &context = queue.get_context(); meta_kernel k("serial_find_extrema"); k << k.decl("value") << " = " << first[k.expr("0")] << ";\n" << k.decl("value_index") << " = 0;\n" << "for(uint i = 1; i < size; i++){\n" << " " << k.decl("candidate") << "=" << first[k.expr("i")] << ";\n" << "#ifndef BOOST_COMPUTE_FIND_MAXIMUM\n" << " if(" << compare(k.var("candidate"), k.var("value")) << "){\n" << "#else\n" << " if(" << compare(k.var("value"), k.var("candidate")) << "){\n" << "#endif\n" << " value = candidate;\n" << " value_index = i;\n" << " }\n" << "}\n" << "*index = value_index;\n"; size_t index_arg_index = k.add_arg(memory_object::global_memory, "index"); size_t size_arg_index = k.add_arg("size"); std::string options; if(!find_minimum){ options = "-DBOOST_COMPUTE_FIND_MAXIMUM"; } kernel kernel = k.compile(context, options); // setup index buffer scalar index(context); kernel.set_arg(index_arg_index, index.get_buffer()); // setup count size_t count = iterator_range_size(first, last); kernel.set_arg(size_arg_index, static_cast(count)); // run kernel queue.enqueue_task(kernel); // read index and return iterator return first + static_cast(index.read(queue)); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_FIND_EXTREMA_HPP compute-0.5/include/boost/compute/algorithm/detail/serial_merge.hpp000066400000000000000000000066601263566244600257040ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_SERIAL_MERGE_HPP #define BOOST_COMPUTE_ALGORITHM_SERIAL_MERGE_HPP #include #include #include #include namespace boost { namespace compute { namespace detail { template inline OutputIterator serial_merge(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp, command_queue &queue) { typedef typename std::iterator_traits::value_type input_type1; typedef typename std::iterator_traits::value_type input_type2; typedef typename std::iterator_traits::difference_type result_difference_type; std::ptrdiff_t size1 = std::distance(first1, last1); std::ptrdiff_t size2 = std::distance(first2, last2); meta_kernel k("serial_merge"); k.add_set_arg("size1", static_cast(size1)); k.add_set_arg("size2", static_cast(size2)); k << "uint i = 0;\n" << // index in result range "uint j = 0;\n" << // index in first input range "uint k = 0;\n" << // index in second input range // fetch initial values from each range k.decl("j_value") << " = " << first1[0] << ";\n" << k.decl("k_value") << " = " << first2[0] << ";\n" << // merge values from both input ranges to the result range "while(j < size1 && k < size2){\n" << " if(" << comp(k.var("j_value"), k.var("k_value")) << "){\n" << " " << result[k.var("i++")] << " = j_value;\n" << " j_value = " << first1[k.var("++j")] << ";\n" << " }\n" << " else{\n" " " << result[k.var("i++")] << " = k_value;\n" " k_value = " << first2[k.var("++k")] << ";\n" << " }\n" "}\n" // copy any remaining values from first range "while(j < size1){\n" << result[k.var("i++")] << " = " << first1[k.var("j++")] << ";\n" << "}\n" // copy any remaining values from second range "while(k < size2){\n" << result[k.var("i++")] << " = " << first2[k.var("k++")] << ";\n" << "}\n"; // run kernel k.exec(queue); return result + static_cast(size1 + size2); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_SERIAL_MERGE_HPP compute-0.5/include/boost/compute/algorithm/detail/serial_reduce.hpp000066400000000000000000000041071263566244600260460ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_REDUCE_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_REDUCE_HPP #include #include #include #include namespace boost { namespace compute { namespace detail { template inline void serial_reduce(InputIterator first, InputIterator last, OutputIterator result, BinaryFunction function, command_queue &queue) { typedef typename std::iterator_traits::value_type T; typedef typename ::boost::compute::result_of::type result_type; const context &context = queue.get_context(); size_t count = detail::iterator_range_size(first, last); if(count == 0){ return; } meta_kernel k("serial_reduce"); size_t count_arg = k.add_arg("count"); k << k.decl("result") << " = " << first[0] << ";\n" << "for(uint i = 1; i < count; i++)\n" << " result = " << function(k.var("result"), first[k.var("i")]) << ";\n" << result[0] << " = result;\n"; kernel kernel = k.compile(context); kernel.set_arg(count_arg, static_cast(count)); queue.enqueue_task(kernel); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_REDUCE_HPP compute-0.5/include/boost/compute/algorithm/detail/serial_reduce_by_key.hpp000066400000000000000000000103321263566244600274050ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_REDUCE_BY_KEY_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_REDUCE_BY_KEY_HPP #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template inline size_t serial_reduce_by_key(InputKeyIterator keys_first, InputKeyIterator keys_last, InputValueIterator values_first, OutputKeyIterator keys_result, OutputValueIterator values_result, BinaryFunction function, BinaryPredicate predicate, command_queue &queue) { typedef typename std::iterator_traits::value_type value_type; typedef typename std::iterator_traits::value_type key_type; typedef typename ::boost::compute::result_of::type result_type; const context &context = queue.get_context(); size_t count = detail::iterator_range_size(keys_first, keys_last); if(count < 1){ return count; } meta_kernel k("serial_reduce_by_key"); size_t count_arg = k.add_arg("count"); size_t result_size_arg = k.add_arg(memory_object::global_memory, "result_size"); convert to_result_type; k << k.decl("result") << " = " << to_result_type(values_first[0]) << ";\n" << k.decl("previous_key") << " = " << keys_first[0] << ";\n" << k.decl("value") << ";\n" << k.decl("key") << ";\n" << k.decl("size") << " = 1;\n" << keys_result[0] << " = previous_key;\n" << values_result[0] << " = result;\n" << "for(ulong i = 1; i < count; i++) {\n" << " value = " << to_result_type(values_first[k.var("i")]) << ";\n" << " key = " << keys_first[k.var("i")] << ";\n" << " if (" << predicate(k.var("previous_key"), k.var("key")) << ") {\n" << " result = " << function(k.var("result"), k.var("value")) << ";\n" << " }\n " << " else { \n" << keys_result[k.var("size - 1")] << " = previous_key;\n" << values_result[k.var("size - 1")] << " = result;\n" << " result = value;\n" << " size++;\n" << " } \n" << " previous_key = key;\n" << "}\n" << keys_result[k.var("size - 1")] << " = previous_key;\n" << values_result[k.var("size - 1")] << " = result;\n" << "*result_size = size;"; kernel kernel = k.compile(context); scalar result_size(context); kernel.set_arg(result_size_arg, result_size.get_buffer()); kernel.set_arg(count_arg, static_cast(count)); queue.enqueue_task(kernel); return static_cast(result_size.read(queue)); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_REDUCE_BY_KEY_HPP compute-0.5/include/boost/compute/algorithm/equal.hpp000066400000000000000000000034151263566244600231060ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_EQUAL_HPP #define BOOST_COMPUTE_ALGORITHM_EQUAL_HPP #include #include #include namespace boost { namespace compute { /// Returns \c true if the range [\p first1, \p last1) and the range /// beginning at \p first2 are equal. template inline bool equal(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, command_queue &queue = system::default_queue()) { return ::boost::compute::mismatch(first1, last1, first2, queue).first == last1; } /// \overload template inline bool equal(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, command_queue &queue = system::default_queue()) { if(std::distance(first1, last1) != std::distance(first2, last2)){ return false; } return ::boost::compute::equal(first1, last1, first2, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_EQUAL_HPP compute-0.5/include/boost/compute/algorithm/equal_range.hpp000066400000000000000000000026351263566244600242650ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_EQUAL_RANGE_HPP #define BOOST_COMPUTE_ALGORITHM_EQUAL_RANGE_HPP #include #include #include #include #include namespace boost { namespace compute { /// Returns a pair of iterators containing the range of values equal /// to \p value in the sorted range [\p first, \p last). template inline std::pair equal_range(InputIterator first, InputIterator last, const T &value, command_queue &queue = system::default_queue()) { return std::make_pair( ::boost::compute::lower_bound(first, last, value, queue), ::boost::compute::upper_bound(first, last, value, queue) ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_EQUAL_RANGE_HPP compute-0.5/include/boost/compute/algorithm/exclusive_scan.hpp000066400000000000000000000064651263566244600250220ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_EXCLUSIVE_SCAN_HPP #define BOOST_COMPUTE_ALGORITHM_EXCLUSIVE_SCAN_HPP #include #include #include #include namespace boost { namespace compute { /// Performs an exclusive scan of the elements in the range [\p first, \p last) /// and stores the results in the range beginning at \p result. /// /// Each element in the output is assigned to the sum of all the previous /// values in the input. /// /// \param first first element in the range to scan /// \param last last element in the range to scan /// \param result first element in the result range /// \param init value used to initialize the scan sequence /// \param binary_op associative binary operator /// \param queue command queue to perform the operation /// /// \return \c OutputIterator to the end of the result range /// /// The default operation is to add the elements up. /// /// \snippet test/test_scan.cpp exclusive_scan_int /// /// But different associative operation can be specified as \p binary_op /// instead (e.g., multiplication, maximum, minimum). Also value used to /// initialized the scan sequence can be specified. /// /// \snippet test/test_scan.cpp exclusive_scan_int_multiplies /// /// \see inclusive_scan() template inline OutputIterator exclusive_scan(InputIterator first, InputIterator last, OutputIterator result, T init, BinaryOperator binary_op, command_queue &queue = system::default_queue()) { return detail::scan(first, last, result, true, init, binary_op, queue); } /// \overload template inline OutputIterator exclusive_scan(InputIterator first, InputIterator last, OutputIterator result, T init, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type output_type; return detail::scan(first, last, result, true, init, boost::compute::plus(), queue); } /// \overload template inline OutputIterator exclusive_scan(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type output_type; return detail::scan(first, last, result, true, output_type(0), boost::compute::plus(), queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_EXCLUSIVE_SCAN_HPP compute-0.5/include/boost/compute/algorithm/fill.hpp000066400000000000000000000227421263566244600227310ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_FILL_HPP #define BOOST_COMPUTE_ALGORITHM_FILL_HPP #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { namespace mpl = boost::mpl; // fills the range [first, first + count) with value using copy() template inline void fill_with_copy(BufferIterator first, size_t count, const T &value, command_queue &queue) { ::boost::compute::copy( ::boost::compute::make_constant_iterator(value, 0), ::boost::compute::make_constant_iterator(value, count), first, queue ); } // fills the range [first, first + count) with value using copy_async() template inline future fill_async_with_copy(BufferIterator first, size_t count, const T &value, command_queue &queue) { return ::boost::compute::copy_async( ::boost::compute::make_constant_iterator(value, 0), ::boost::compute::make_constant_iterator(value, count), first, queue ); } #if defined(CL_VERSION_1_2) // meta-function returing true if Iterator points to a range of values // that can be filled using clEnqueueFillBuffer(). to meet this criteria // it must have a buffer accessible through iter.get_buffer() and the // size of its value_type must by in {1, 2, 4, 8, 16, 32, 64, 128}. template struct is_valid_fill_buffer_iterator : public mpl::and_< is_buffer_iterator, mpl::contains< mpl::vector< mpl::int_<1>, mpl::int_<2>, mpl::int_<4>, mpl::int_<8>, mpl::int_<16>, mpl::int_<32>, mpl::int_<64>, mpl::int_<128> >, mpl::int_< sizeof(typename std::iterator_traits::value_type) > > >::type { }; template<> struct is_valid_fill_buffer_iterator : public boost::false_type {}; // specialization which uses clEnqueueFillBuffer for buffer iterators template inline void dispatch_fill(BufferIterator first, size_t count, const T &value, command_queue &queue, typename boost::enable_if< is_valid_fill_buffer_iterator >::type* = 0) { typedef typename std::iterator_traits::value_type value_type; if(count == 0){ // nothing to do return; } // check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer) if(!queue.check_device_version(1, 2)){ return fill_with_copy(first, count, value, queue); } value_type pattern = static_cast(value); size_t offset = static_cast(first.get_index()); if(count == 1){ // use clEnqueueWriteBuffer() directly when writing a single value // to the device buffer. this is potentially more efficient and also // works around a bug in the intel opencl driver. queue.enqueue_write_buffer( first.get_buffer(), offset * sizeof(value_type), sizeof(value_type), &pattern ); } else { queue.enqueue_fill_buffer( first.get_buffer(), &pattern, sizeof(value_type), offset * sizeof(value_type), count * sizeof(value_type) ); } } template inline future dispatch_fill_async(BufferIterator first, size_t count, const T &value, command_queue &queue, typename boost::enable_if< is_valid_fill_buffer_iterator >::type* = 0) { typedef typename std::iterator_traits::value_type value_type; // check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer) if(!queue.check_device_version(1, 2)){ return fill_async_with_copy(first, count, value, queue); } value_type pattern = static_cast(value); size_t offset = static_cast(first.get_index()); event event_ = queue.enqueue_fill_buffer(first.get_buffer(), &pattern, sizeof(value_type), offset * sizeof(value_type), count * sizeof(value_type)); return future(event_); } #ifdef CL_VERSION_2_0 // specializations for svm_ptr template inline void dispatch_fill(svm_ptr first, size_t count, const T &value, command_queue &queue) { if(count == 0){ return; } queue.enqueue_svm_fill( first.get(), &value, sizeof(T), count * sizeof(T) ); } template inline future dispatch_fill_async(svm_ptr first, size_t count, const T &value, command_queue &queue) { if(count == 0){ return future(); } event event_ = queue.enqueue_svm_fill( first.get(), &value, sizeof(T), count * sizeof(T) ); return future(event_); } #endif // CL_VERSION_2_0 // default implementations template inline void dispatch_fill(BufferIterator first, size_t count, const T &value, command_queue &queue, typename boost::disable_if< is_valid_fill_buffer_iterator >::type* = 0) { fill_with_copy(first, count, value, queue); } template inline future dispatch_fill_async(BufferIterator first, size_t count, const T &value, command_queue &queue, typename boost::disable_if< is_valid_fill_buffer_iterator >::type* = 0) { return fill_async_with_copy(first, count, value, queue); } #else template inline void dispatch_fill(BufferIterator first, size_t count, const T &value, command_queue &queue) { fill_with_copy(first, count, value, queue); } template inline future dispatch_fill_async(BufferIterator first, size_t count, const T &value, command_queue &queue) { return fill_async_with_copy(first, count, value, queue); } #endif // !defined(CL_VERSION_1_2) } // end detail namespace /// Fills the range [\p first, \p last) with \p value. /// /// \param first first element in the range to fill /// \param last last element in the range to fill /// \param value value to copy to each element /// \param queue command queue to perform the operation /// /// For example, to fill a vector on the device with sevens: /// \code /// // vector on the device /// boost::compute::vector vec(10, context); /// /// // fill vector with sevens /// boost::compute::fill(vec.begin(), vec.end(), 7, queue); /// \endcode /// /// \see boost::compute::fill_n() template inline void fill(BufferIterator first, BufferIterator last, const T &value, command_queue &queue = system::default_queue()) { size_t count = detail::iterator_range_size(first, last); if(count == 0){ return; } detail::dispatch_fill(first, count, value, queue); } template inline future fill_async(BufferIterator first, BufferIterator last, const T &value, command_queue &queue = system::default_queue()) { size_t count = detail::iterator_range_size(first, last); if(count == 0){ return future(); } return detail::dispatch_fill_async(first, count, value, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_FILL_HPP compute-0.5/include/boost/compute/algorithm/fill_n.hpp000066400000000000000000000022171263566244600232410ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_FILL_N_HPP #define BOOST_COMPUTE_ALGORITHM_FILL_N_HPP #include #include #include namespace boost { namespace compute { /// Fills the range [\p first, \p first + count) with \p value. /// /// \see fill() template inline void fill_n(BufferIterator first, Size count, const T &value, command_queue &queue = system::default_queue()) { ::boost::compute::fill(first, first + count, value, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_FILL_N_HPP compute-0.5/include/boost/compute/algorithm/find.hpp000066400000000000000000000034151263566244600227170ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_FIND_HPP #define BOOST_COMPUTE_ALGORITHM_FIND_HPP #include #include #include #include #include namespace boost { namespace compute { /// Returns an iterator pointing to the first element in the range /// [\p first, \p last) that equals \p value. template inline InputIterator find(InputIterator first, InputIterator last, const T &value, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; using ::boost::compute::_1; using ::boost::compute::lambda::all; if(vector_size::value == 1){ return ::boost::compute::find_if( first, last, _1 == value, queue ); } else { return ::boost::compute::find_if( first, last, all(_1 == value), queue ); } } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_FIND_HPP compute-0.5/include/boost/compute/algorithm/find_end.hpp000066400000000000000000000101131263566244600235360ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_FIND_END_HPP #define BOOST_COMPUTE_ALGORITHM_FIND_END_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { /// /// \brief Helper function for find_end /// /// Basically a copy of find_if which returns last occurence /// instead of first occurence /// template inline InputIterator find_end_helper(InputIterator first, InputIterator last, UnaryPredicate predicate, command_queue &queue) { typedef typename std::iterator_traits::value_type value_type; size_t count = detail::iterator_range_size(first, last); if(count == 0){ return last; } const context &context = queue.get_context(); detail::meta_kernel k("find_end"); size_t index_arg = k.add_arg(memory_object::global_memory, "index"); atomic_max atomic_max_int; k << k.decl("i") << " = get_global_id(0);\n" << k.decl("value") << "=" << first[k.var("i")] << ";\n" << "if(" << predicate(k.var("value")) << "){\n" << " " << atomic_max_int(k.var("index"), k.var("i")) << ";\n" << "}\n"; kernel kernel = k.compile(context); scalar index(context); kernel.set_arg(index_arg, index.get_buffer()); index.write(static_cast(-1), queue); queue.enqueue_1d_range_kernel(kernel, 0, count, 0); int result = static_cast(index.read(queue)); if(result == -1) return last; else return first + result; } } // end detail namespace /// /// \brief Substring matching algorithm /// /// Searches for the last match of the pattern [p_first, p_last) /// in text [t_first, t_last). /// \return Iterator pointing to beginning of last occurence /// /// \param t_first Iterator pointing to start of text /// \param t_last Iterator pointing to end of text /// \param p_first Iterator pointing to start of pattern /// \param p_last Iterator pointing to end of pattern /// \param queue Queue on which to execute /// template inline TextIterator find_end(TextIterator t_first, TextIterator t_last, PatternIterator p_first, PatternIterator p_last, command_queue &queue = system::default_queue()) { const context &context = queue.get_context(); vector matching_indices(detail::iterator_range_size(t_first, t_last), context); detail::search_kernel::iterator> kernel; kernel.set_range(p_first, p_last, t_first, t_last, matching_indices.begin()); kernel.exec(queue); using boost::compute::_1; vector::iterator index = detail::find_end_helper(matching_indices.begin(), matching_indices.end(), _1 == 1, queue); return t_first + detail::iterator_range_size(matching_indices.begin(), index); } } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_FIND_END_HPP compute-0.5/include/boost/compute/algorithm/find_if.hpp000066400000000000000000000024311263566244600233720ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_FIND_IF_HPP #define BOOST_COMPUTE_ALGORITHM_FIND_IF_HPP #include #include #include namespace boost { namespace compute { /// Returns an iterator pointing to the first element in the range /// [\p first, \p last) for which \p predicate returns \c true. template inline InputIterator find_if(InputIterator first, InputIterator last, UnaryPredicate predicate, command_queue &queue = system::default_queue()) { return detail::find_if_with_atomics(first, last, predicate, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_FIND_IF_HPP compute-0.5/include/boost/compute/algorithm/find_if_not.hpp000066400000000000000000000026551263566244600242620ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_FIND_IF_NOT_HPP #define BOOST_COMPUTE_ALGORITHM_FIND_IF_NOT_HPP #include #include #include #include namespace boost { namespace compute { /// Returns an iterator pointing to the first element in the range /// [\p first, \p last) for which \p predicate returns \c false. /// /// \see find_if() template inline InputIterator find_if_not(InputIterator first, InputIterator last, UnaryPredicate predicate, command_queue &queue = system::default_queue()) { return ::boost::compute::find_if( first, last, not1(predicate), queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_FIND_IF_NOT_HPP compute-0.5/include/boost/compute/algorithm/for_each.hpp000066400000000000000000000036131263566244600235450ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_FOR_EACH_HPP #define BOOST_COMPUTE_ALGORITHM_FOR_EACH_HPP #include #include #include #include namespace boost { namespace compute { namespace detail { template struct for_each_kernel : public meta_kernel { for_each_kernel(InputIterator first, InputIterator last, Function function) : meta_kernel("for_each") { // store range size m_count = detail::iterator_range_size(first, last); // setup kernel source *this << function(first[get_global_id(0)]) << ";\n"; } void exec(command_queue &queue) { exec_1d(queue, 0, m_count); } size_t m_count; }; } // end detail namespace /// Calls \p function on each element in the range [\p first, \p last). /// /// \see transform() template inline UnaryFunction for_each(InputIterator first, InputIterator last, UnaryFunction function, command_queue &queue = system::default_queue()) { detail::for_each_kernel kernel(first, last, function); kernel.exec(queue); return function; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_FOR_EACH_HPP compute-0.5/include/boost/compute/algorithm/for_each_n.hpp000066400000000000000000000023101263566244600240530ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_FOR_EACH_N_HPP #define BOOST_COMPUTE_ALGORITHM_FOR_EACH_N_HPP #include namespace boost { namespace compute { /// Calls \p function on each element in the range [\p first, \p first /// \c + \p count). /// /// \see for_each() template inline UnaryFunction for_each_n(InputIterator first, Size count, UnaryFunction function, command_queue &queue = system::default_queue()) { return ::boost::compute::for_each(first, first + count, function, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_FOR_EACH_N_HPP compute-0.5/include/boost/compute/algorithm/gather.hpp000066400000000000000000000047411263566244600232540ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_GATHER_HPP #define BOOST_COMPUTE_ALGORITHM_GATHER_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template class gather_kernel : public meta_kernel { public: gather_kernel() : meta_kernel("gather") {} void set_range(MapIterator first, MapIterator last, InputIterator input, OutputIterator result) { m_count = iterator_range_size(first, last); m_offset = first.get_index(); *this << "const uint i = get_global_id(0);\n" << result[expr("i")] << "=" << input[first[expr("i")]] << ";\n"; } event exec(command_queue &queue) { if(m_count == 0) { return event(); } return exec_1d(queue, m_offset, m_count); } private: size_t m_count; size_t m_offset; }; } // end detail namespace /// Copies the elements using the indices from the range [\p first, \p last) /// to the range beginning at \p result using the input values from the range /// beginning at \p input. /// /// \see scatter() template inline void gather(MapIterator first, MapIterator last, InputIterator input, OutputIterator result, command_queue &queue = system::default_queue()) { detail::gather_kernel kernel; kernel.set_range(first, last, input, result); kernel.exec(queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_GATHER_HPP compute-0.5/include/boost/compute/algorithm/generate.hpp000066400000000000000000000032471263566244600235740ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_GENERATE_HPP #define BOOST_COMPUTE_ALGORITHM_GENERATE_HPP #include #include #include #include #include namespace boost { namespace compute { /// Stores the result of \p generator for each element in the range /// [\p first, \p last). template inline void generate(OutputIterator first, OutputIterator last, Generator generator, command_queue &queue = system::default_queue()) { size_t count = detail::iterator_range_size(first, last); if(count == 0){ return; } ::boost::compute::copy( ::boost::compute::make_function_input_iterator(generator, first.get_index()), ::boost::compute::make_function_input_iterator(generator, last.get_index()), first, queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_GENERATE_HPP compute-0.5/include/boost/compute/algorithm/generate_n.hpp000066400000000000000000000023311263566244600241020ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_GENERATE_N_HPP #define BOOST_COMPUTE_ALGORITHM_GENERATE_N_HPP #include #include #include namespace boost { namespace compute { /// Stores the result of \p generator for each element in the range /// [\p first, \p first + \p count). template inline void generate_n(OutputIterator first, Size count, Generator generator, command_queue &queue = system::default_queue()) { ::boost::compute::generate(first, first + count, generator, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_GENERATE_N_HPP compute-0.5/include/boost/compute/algorithm/includes.hpp000066400000000000000000000122571263566244600236110ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_INCLUDES_HPP #define BOOST_COMPUTE_ALGORITHM_INCLUDES_HPP #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { /// /// \brief Serial includes kernel class /// /// Subclass of meta_kernel to perform includes operation after tiling /// class serial_includes_kernel : meta_kernel { public: unsigned int tile_size; serial_includes_kernel() : meta_kernel("includes") { tile_size = 4; } template void set_range(InputIterator1 first1, InputIterator2 first2, InputIterator3 tile_first1, InputIterator3 tile_last1, InputIterator4 tile_first2, OutputIterator result) { m_count = iterator_range_size(tile_first1, tile_last1) - 1; *this << "uint i = get_global_id(0);\n" << "uint start1 = " << tile_first1[expr("i")] << ";\n" << "uint end1 = " << tile_first1[expr("i+1")] << ";\n" << "uint start2 = " << tile_first2[expr("i")] << ";\n" << "uint end2 = " << tile_first2[expr("i+1")] << ";\n" << "uint includes = 1;\n" << "while(start1("start1")] << " == " << first2[expr("start2")] << ")\n" << " {\n" << " start1++; start2++;\n" << " }\n" << " else if(" << first1[expr("start1")] << " < " << first2[expr("start2")] << ")\n" << " start1++;\n" << " else\n" << " {\n" << " includes = 0;\n" << " break;\n" << " }\n" << "}\n" << "if(start2("i")] << " = includes;\n"; } event exec(command_queue &queue) { if(m_count == 0) { return event(); } return exec_1d(queue, 0, m_count); } private: size_t m_count; }; } //end detail namespace /// /// \brief Includes algorithm /// /// Finds if the sorted range [first1, last1) includes the sorted /// range [first2, last2). In other words, it checks if [first1, last1) is /// a superset of [first2, last2). /// /// \return True, if [first1, last1) includes [first2, last2). False otherwise. /// /// \param first1 Iterator pointing to start of first set /// \param last1 Iterator pointing to end of first set /// \param first2 Iterator pointing to start of second set /// \param last2 Iterator pointing to end of second set /// \param queue Queue on which to execute /// template inline bool includes(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, command_queue &queue = system::default_queue()) { int tile_size = 1024; int count1 = detail::iterator_range_size(first1, last1); int count2 = detail::iterator_range_size(first2, last2); vector tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); vector tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); // Tile the sets detail::balanced_path_kernel tiling_kernel; tiling_kernel.tile_size = tile_size; tiling_kernel.set_range(first1, last1, first2, last2, tile_a.begin()+1, tile_b.begin()+1); fill_n(tile_a.begin(), 1, 0, queue); fill_n(tile_b.begin(), 1, 0, queue); tiling_kernel.exec(queue); fill_n(tile_a.end()-1, 1, count1, queue); fill_n(tile_b.end()-1, 1, count2, queue); vector result((count1+count2+tile_size-1)/tile_size, queue.get_context()); // Find individually detail::serial_includes_kernel includes_kernel; includes_kernel.tile_size = tile_size; includes_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(), tile_b.begin(), result.begin()); includes_kernel.exec(queue); return find(result.begin(), result.end(), 0, queue) == result.end(); } } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_SET_UNION_HPP compute-0.5/include/boost/compute/algorithm/inclusive_scan.hpp000066400000000000000000000054741263566244600250130ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_INCLUSIVE_SCAN_HPP #define BOOST_COMPUTE_ALGORITHM_INCLUSIVE_SCAN_HPP #include #include #include #include namespace boost { namespace compute { /// Performs an inclusive scan of the elements in the range [\p first, \p last) /// and stores the results in the range beginning at \p result. /// /// Each element in the output is assigned to the sum of the current value in /// the input with the sum of every previous value in the input. /// /// \param first first element in the range to scan /// \param last last element in the range to scan /// \param result first element in the result range /// \param binary_op associative binary operator /// \param queue command queue to perform the operation /// /// \return \c OutputIterator to the end of the result range /// /// The default operation is to add the elements up. /// /// \snippet test/test_scan.cpp inclusive_scan_int /// /// But different associative operation can be specified as \p binary_op /// instead (e.g., multiplication, maximum, minimum). /// /// \snippet test/test_scan.cpp inclusive_scan_int_multiplies /// /// \see exclusive_scan() template inline OutputIterator inclusive_scan(InputIterator first, InputIterator last, OutputIterator result, BinaryOperator binary_op, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type output_type; return detail::scan(first, last, result, false, output_type(0), binary_op, queue); } /// \overload template inline OutputIterator inclusive_scan(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type output_type; return detail::scan(first, last, result, false, output_type(0), boost::compute::plus(), queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_INCLUSIVE_SCAN_HPP compute-0.5/include/boost/compute/algorithm/inner_product.hpp000066400000000000000000000064371263566244600246610ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_INNER_PRODUCT_HPP #define BOOST_COMPUTE_ALGORITHM_INNER_PRODUCT_HPP #include #include #include #include #include #include #include #include namespace boost { namespace compute { /// Returns the inner product of the elements in the range /// [\p first1, \p last1) with the elements in the range beginning /// at \p first2. template inline T inner_product(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, T init, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type input_type; ptrdiff_t n = std::distance(first1, last1); return ::boost::compute::accumulate( ::boost::compute::make_transform_iterator( ::boost::compute::make_zip_iterator( boost::make_tuple(first1, first2) ), detail::unpack(multiplies()) ), ::boost::compute::make_transform_iterator( ::boost::compute::make_zip_iterator( boost::make_tuple(last1, first2 + n) ), detail::unpack(multiplies()) ), init, queue ); } /// \overload template inline T inner_product(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, T init, BinaryAccumulateFunction accumulate_function, BinaryTransformFunction transform_function, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; size_t count = detail::iterator_range_size(first1, last1); vector result(count, queue.get_context()); transform(first1, last1, first2, result.begin(), transform_function, queue); return ::boost::compute::accumulate(result.begin(), result.end(), init, accumulate_function, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_INNER_PRODUCT_HPP compute-0.5/include/boost/compute/algorithm/inplace_merge.hpp000066400000000000000000000034751263566244600245770ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_INPLACE_MERGE_HPP #define BOOST_COMPUTE_ALGORITHM_INPLACE_MERGE_HPP #include #include #include #include #include namespace boost { namespace compute { /// Merges the sorted values in the range [\p first, \p middle) with /// the sorted values in the range [\p middle, \p last) in-place. template inline void inplace_merge(Iterator first, Iterator middle, Iterator last, command_queue &queue = system::default_queue()) { BOOST_ASSERT(first < middle && middle < last); typedef typename std::iterator_traits::value_type T; const context &context = queue.get_context(); ptrdiff_t left_size = std::distance(first, middle); ptrdiff_t right_size = std::distance(middle, last); vector left(left_size, context); vector right(right_size, context); copy(first, middle, left.begin(), queue); copy(middle, last, right.begin(), queue); ::boost::compute::merge( left.begin(), left.end(), right.begin(), right.end(), first, queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_INPLACE_MERGE_HPP compute-0.5/include/boost/compute/algorithm/iota.hpp000066400000000000000000000027771263566244600227450ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_IOTA_HPP #define BOOST_COMPUTE_ALGORITHM_IOTA_HPP #include #include #include #include namespace boost { namespace compute { /// Fills the range [\p first, \p last) with sequential values starting at /// \p value. /// /// For example, the following code: /// \snippet test/test_iota.cpp iota /// /// Will fill \c vec with the values (\c 0, \c 1, \c 2, \c ...). template inline void iota(BufferIterator first, BufferIterator last, const T &value, command_queue &queue = system::default_queue()) { T count = static_cast(detail::iterator_range_size(first, last)); copy( ::boost::compute::make_counting_iterator(value), ::boost::compute::make_counting_iterator(value + count), first, queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_IOTA_HPP compute-0.5/include/boost/compute/algorithm/is_partitioned.hpp000066400000000000000000000030621263566244600250120ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_IS_PARTITIONED_HPP #define BOOST_COMPUTE_ALGORITHM_IS_PARTITIONED_HPP #include #include #include #include namespace boost { namespace compute { /// Returns \c true if the values in the range [\p first, \p last) /// are partitioned according to \p predicate. template inline bool is_partitioned(InputIterator first, InputIterator last, UnaryPredicate predicate, command_queue &queue = system::default_queue()) { return ::boost::compute::find_if( ::boost::compute::find_if_not(first, last, predicate, queue), last, predicate, queue) == last; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_PARTITION_HPP compute-0.5/include/boost/compute/algorithm/is_permutation.hpp000066400000000000000000000045641263566244600250470ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_IS_PERMUTATION_HPP #define BOOST_COMPUTE_ALGORITHM_IS_PERMUTATION_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { /// /// \brief Permutation checking algorithm /// /// Checks if the range [first1, last1) can be permuted into the /// range [first2, last2) /// \return True, if it can be permuted. False, otherwise. /// /// \param first1 Iterator pointing to start of first range /// \param last1 Iterator pointing to end of first range /// \param first2 Iterator pointing to start of second range /// \param last2 Iterator pointing to end of second range /// \param queue Queue on which to execute /// template inline bool is_permutation(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type1; typedef typename std::iterator_traits::value_type value_type2; size_t count1 = detail::iterator_range_size(first1, last1); size_t count2 = detail::iterator_range_size(first2, last2); if(count1 != count2) return false; vector temp1(first1, last1, queue); vector temp2(first2, last2, queue); sort(temp1.begin(), temp1.end(), queue); sort(temp2.begin(), temp2.end(), queue); return equal(temp1.begin(), temp1.end(), temp2.begin(), queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_IS_PERMUTATION_HPP compute-0.5/include/boost/compute/algorithm/is_sorted.hpp000066400000000000000000000041721263566244600237730ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_IS_SORTED_HPP #define BOOST_COMPUTE_ALGORITHM_IS_SORTED_HPP #include #include #include #include #include namespace boost { namespace compute { /// Returns \c true if the values in the range [\p first, \p last) /// are in sorted order. /// /// \param first first element in the range to check /// \param last last element in the range to check /// \param compare comparison function (by default \c less) /// \param queue command queue to perform the operation /// /// \return \c true if the range [\p first, \p last) is sorted /// /// \see sort() template inline bool is_sorted(InputIterator first, InputIterator last, Compare compare, command_queue &queue = system::default_queue()) { using ::boost::compute::placeholders::_1; using ::boost::compute::placeholders::_2; return ::boost::compute::adjacent_find( first, last, ::boost::compute::bind(compare, _2, _1), queue ) == last; } /// \overload template inline bool is_sorted(InputIterator first, InputIterator last, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; return ::boost::compute::is_sorted( first, last, ::boost::compute::less(), queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_IS_SORTED_HPP compute-0.5/include/boost/compute/algorithm/lexicographical_compare.hpp000066400000000000000000000107451263566244600266470ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Mageswaran.D // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { const char lexicographical_compare_source[] = "__kernel void lexicographical_compare(const uint size1,\n" " const uint size2,\n" " __global const T1 *range1,\n" " __global const T2 *range2,\n" " __global bool *result_buf)\n" "{\n" " const uint i = get_global_id(0);\n" " if((i != size1) && (i != size2)){\n" //Individual elements are compared and results are stored in parallel. //0 is true " if(range1[i] < range2[i])\n" " result_buf[i] = 0;\n" " else\n" " result_buf[i] = 1;\n" " }\n" " else\n" " result_buf[i] = !((i == size1) && (i != size2));\n" "}\n"; template inline bool dispatch_lexicographical_compare(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, command_queue &queue) { const boost::compute::context &context = queue.get_context(); boost::shared_ptr cache = program_cache::get_global_cache(context); size_t iterator_size1 = iterator_range_size(first1, last1); size_t iterator_size2 = iterator_range_size(first2, last2); size_t max_size = (std::max)(iterator_size1, iterator_size2); if(max_size == 0){ return false; } boost::compute::vector result_vector(max_size, context); typedef typename std::iterator_traits::value_type value_type1; typedef typename std::iterator_traits::value_type value_type2; // load (or create) lexicographical compare program std::string cache_key = std::string("__boost_lexicographical_compare") + type_name() + type_name(); std::stringstream options; options << " -DT1=" << type_name(); options << " -DT2=" << type_name(); program lexicographical_compare_program = cache->get_or_build( cache_key, options.str(), lexicographical_compare_source, context ); kernel lexicographical_compare_kernel(lexicographical_compare_program, "lexicographical_compare"); lexicographical_compare_kernel.set_arg(0, iterator_size1); lexicographical_compare_kernel.set_arg(1, iterator_size2); lexicographical_compare_kernel.set_arg(2, first1.get_buffer()); lexicographical_compare_kernel.set_arg(3, first2.get_buffer()); lexicographical_compare_kernel.set_arg(4, result_vector.get_buffer()); queue.enqueue_1d_range_kernel(lexicographical_compare_kernel, 0, max_size, 0); return boost::compute::any_of(result_vector.begin(), result_vector.end(), _1 == 0, queue); } } // end detail namespace /// Checks if the first range [first1, last1) is lexicographically /// less than the second range [first2, last2). template inline bool lexicographical_compare(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, command_queue &queue = system::default_queue()) { return detail::dispatch_lexicographical_compare(first1, last1, first2, last2, queue); } } // end compute namespace } // end boost namespac compute-0.5/include/boost/compute/algorithm/lower_bound.hpp000066400000000000000000000025361263566244600243210ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_LOWER_BOUND_HPP #define BOOST_COMPUTE_ALGORITHM_LOWER_BOUND_HPP #include #include #include #include namespace boost { namespace compute { /// Returns an iterator pointing to the first element in the sorted /// range [\p first, \p last) that is not less than \p value. /// /// \see upper_bound() template inline InputIterator lower_bound(InputIterator first, InputIterator last, const T &value, command_queue &queue = system::default_queue()) { using ::boost::compute::_1; InputIterator position = detail::binary_find(first, last, _1 >= value, queue); return position; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_LOWER_BOUND_HPP compute-0.5/include/boost/compute/algorithm/max_element.hpp000066400000000000000000000046371263566244600243040ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_MAX_ELEMENT_HPP #define BOOST_COMPUTE_ALGORITHM_MAX_ELEMENT_HPP #include #include #include #include namespace boost { namespace compute { /// Returns an iterator pointing to the element in the range /// [\p first, \p last) with the maximum value. /// /// \param first first element in the input range /// \param last last element in the input range /// \param compare comparison function object which returns true if the first /// argument is less than (i.e. is ordered before) the second. /// \param queue command queue to perform the operation /// /// For example, to find \c int2 value with maximum first component in given vector: /// \code /// // comparison function object /// BOOST_COMPUTE_FUNCTION(bool, compare_first, (const int2_ &a, const int2_ &b), /// { /// return a.x < b.x; /// }); /// /// // create vector /// boost::compute::vector data = ... /// /// boost::compute::vector::iterator max = /// boost::compute::max_element(data.begin(), data.end(), compare_first, queue); /// \endcode /// /// \see min_element() template inline InputIterator max_element(InputIterator first, InputIterator last, Compare compare, command_queue &queue = system::default_queue()) { return detail::find_extrema(first, last, compare, false, queue); } ///\overload template inline InputIterator max_element(InputIterator first, InputIterator last, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; return ::boost::compute::max_element( first, last, ::boost::compute::less(), queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_MAX_ELEMENT_HPP compute-0.5/include/boost/compute/algorithm/merge.hpp000066400000000000000000000105271263566244600231000ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_MERGE_HPP #define BOOST_COMPUTE_ALGORITHM_MERGE_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { /// Merges the sorted values in the range [\p first1, \p last1) with the sorted /// values in the range [\p first2, last2) and stores the result in the range /// beginning at \p result. Values are compared using the \p comp function. If /// no comparision function is given, \c less is used. /// /// \param first1 first element in the first range to merge /// \param last1 last element in the first range to merge /// \param first2 first element in the second range to merge /// \param last2 last element in the second range to merge /// \param result first element in the result range /// \param comp comparison function (by default \c less) /// \param queue command queue to perform the operation /// /// \return \c OutputIterator to the end of the result range /// /// \see inplace_merge() template inline OutputIterator merge(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type input1_type; typedef typename std::iterator_traits::value_type input2_type; typedef typename std::iterator_traits::value_type output_type; const device &device = queue.get_device(); std::string cache_key = std::string("__boost_merge_") + type_name() + "_" + type_name() + "_" + type_name(); boost::shared_ptr parameters = detail::parameter_cache::get_global_cache(device); // default serial merge threshold depends on device type size_t default_serial_merge_threshold = 32768; if(device.type() & device::gpu) { default_serial_merge_threshold = 2048; } // loading serial merge threshold parameter const size_t serial_merge_threshold = parameters->get(cache_key, "serial_merge_threshold", default_serial_merge_threshold); // choosing merge algorithm const size_t total_count = detail::iterator_range_size(first1, last1) + detail::iterator_range_size(first2, last2); // for small inputs serial merge turns out to outperform // merge with merge path algorithm if(total_count <= serial_merge_threshold){ return detail::serial_merge(first1, last1, first2, last2, result, comp, queue); } return detail::merge_with_merge_path(first1, last1, first2, last2, result, comp, queue); } /// \overload template inline OutputIterator merge(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; less less_than; return merge(first1, last1, first2, last2, result, less_than, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_MERGE_HPP compute-0.5/include/boost/compute/algorithm/min_element.hpp000066400000000000000000000046361263566244600243010ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_MIN_ELEMENT_HPP #define BOOST_COMPUTE_ALGORITHM_MIN_ELEMENT_HPP #include #include #include #include namespace boost { namespace compute { /// Returns an iterator pointing to the element in range /// [\p first, \p last) with the minimum value. /// /// \param first first element in the input range /// \param last last element in the input range /// \param compare comparison function object which returns true if the first /// argument is less than (i.e. is ordered before) the second. /// \param queue command queue to perform the operation /// /// For example, to find \c int2 value with minimum first component in given vector: /// \code /// // comparison function object /// BOOST_COMPUTE_FUNCTION(bool, compare_first, (const int2_ &a, const int2_ &b), /// { /// return a.x < b.x; /// }); /// /// // create vector /// boost::compute::vector data = ... /// /// boost::compute::vector::iterator min = /// boost::compute::min_element(data.begin(), data.end(), compare_first, queue); /// \endcode /// /// \see max_element() template inline InputIterator min_element(InputIterator first, InputIterator last, Compare compare, command_queue &queue = system::default_queue()) { return detail::find_extrema(first, last, compare, true, queue); } ///\overload template inline InputIterator min_element(InputIterator first, InputIterator last, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; return ::boost::compute::min_element( first, last, ::boost::compute::less(), queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_MIN_ELEMENT_HPP compute-0.5/include/boost/compute/algorithm/minmax_element.hpp000066400000000000000000000044751263566244600250100ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_MINMAX_ELEMENT_HPP #define BOOST_COMPUTE_ALGORITHM_MINMAX_ELEMENT_HPP #include #include #include #include #include namespace boost { namespace compute { /// Returns a pair of iterators with the first pointing to the minimum /// element and the second pointing to the maximum element in the range /// [\p first, \p last). /// /// \param first first element in the input range /// \param last last element in the input range /// \param compare comparison function object which returns true if the first /// argument is less than (i.e. is ordered before) the second. /// \param queue command queue to perform the operation /// /// \see max_element(), min_element() template inline std::pair minmax_element(InputIterator first, InputIterator last, Compare compare, command_queue &queue = system::default_queue()) { if(first == last){ // empty range return std::make_pair(first, first); } return std::make_pair(min_element(first, last, compare, queue), max_element(first, last, compare, queue)); } ///\overload template inline std::pair minmax_element(InputIterator first, InputIterator last, command_queue &queue = system::default_queue()) { if(first == last){ // empty range return std::make_pair(first, first); } return std::make_pair(min_element(first, last, queue), max_element(first, last, queue)); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_MINMAX_ELEMENT_HPP compute-0.5/include/boost/compute/algorithm/mismatch.hpp000066400000000000000000000056751263566244600236160ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_MISMATCH_HPP #define BOOST_COMPUTE_ALGORITHM_MISMATCH_HPP #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { /// Returns a pair of iterators pointing to the first position where the /// range [\p first1, \p last1) and the range starting at \p first2 /// differ. template inline std::pair mismatch(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; ::boost::compute::equal_to op; InputIterator2 last2 = first2 + std::distance(first1, last1); InputIterator1 iter = boost::get<0>( ::boost::compute::find( ::boost::compute::make_transform_iterator( ::boost::compute::make_zip_iterator( boost::make_tuple(first1, first2) ), detail::unpack(op) ), ::boost::compute::make_transform_iterator( ::boost::compute::make_zip_iterator( boost::make_tuple(last1, last2) ), detail::unpack(op) ), false, queue ).base().get_iterator_tuple() ); return std::make_pair(iter, first2 + std::distance(first1, iter)); } /// \overload template inline std::pair mismatch(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, command_queue &queue = system::default_queue()) { if(std::distance(first1, last1) < std::distance(first2, last2)){ return ::boost::compute::mismatch(first1, last1, first2, queue); } else { return ::boost::compute::mismatch( first1, first1 + std::distance(first2, last2), first2, queue ); } } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_MISMATCH_HPP compute-0.5/include/boost/compute/algorithm/next_permutation.hpp000066400000000000000000000126041263566244600254040ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_NEXT_PERMUTATION_HPP #define BOOST_COMPUTE_ALGORITHM_NEXT_PERMUTATION_HPP #include #include #include #include #include namespace boost { namespace compute { namespace detail { /// /// \brief Helper function for next_permutation /// /// To find rightmost element which is smaller /// than its next element /// template inline InputIterator next_permutation_helper(InputIterator first, InputIterator last, command_queue &queue) { typedef typename std::iterator_traits::value_type value_type; size_t count = detail::iterator_range_size(first, last); if(count == 0 || count == 1){ return last; } count = count - 1; const context &context = queue.get_context(); detail::meta_kernel k("next_permutation"); size_t index_arg = k.add_arg(memory_object::global_memory, "index"); atomic_max atomic_max_int; k << k.decl("i") << " = get_global_id(0);\n" << k.decl("cur_value") << "=" << first[k.var("i")] << ";\n" << k.decl("next_value") << "=" << first[k.expr("i+1")] << ";\n" << "if(cur_value < next_value){\n" << " " << atomic_max_int(k.var("index"), k.var("i")) << ";\n" << "}\n"; kernel kernel = k.compile(context); scalar index(context); kernel.set_arg(index_arg, index.get_buffer()); index.write(static_cast(-1), queue); queue.enqueue_1d_range_kernel(kernel, 0, count, 0); int result = static_cast(index.read(queue)); if(result == -1) return last; else return first + result; } /// /// \brief Helper function for next_permutation /// /// To find the smallest element to the right of the element found above /// that is greater than it /// template inline InputIterator np_ceiling(InputIterator first, InputIterator last, ValueType value, command_queue &queue) { typedef typename std::iterator_traits::value_type value_type; size_t count = detail::iterator_range_size(first, last); if(count == 0){ return last; } const context &context = queue.get_context(); detail::meta_kernel k("np_ceiling"); size_t index_arg = k.add_arg(memory_object::global_memory, "index"); size_t value_arg = k.add_arg(memory_object::private_memory, "value"); atomic_max atomic_max_int; k << k.decl("i") << " = get_global_id(0);\n" << k.decl("cur_value") << "=" << first[k.var("i")] << ";\n" << "if(cur_value <= " << first[k.expr("*index")] << " && cur_value > value){\n" << " " << atomic_max_int(k.var("index"), k.var("i")) << ";\n" << "}\n"; kernel kernel = k.compile(context); scalar index(context); kernel.set_arg(index_arg, index.get_buffer()); index.write(static_cast(0), queue); kernel.set_arg(value_arg, value); queue.enqueue_1d_range_kernel(kernel, 0, count, 0); int result = static_cast(index.read(queue)); return first + result; } } // end detail namespace /// /// \brief Permutation generating algorithm /// /// Transforms the range [first, last) into the next permutation from the /// set of all permutations arranged in lexicographic order /// \return Boolean value signifying if the last permutation was crossed /// and the range was reset /// /// \param first Iterator pointing to start of range /// \param last Iterator pointing to end of range /// \param queue Queue on which to execute /// template inline bool next_permutation(InputIterator first, InputIterator last, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; if(first == last) return false; InputIterator first_element = detail::next_permutation_helper(first, last, queue); if(first_element == last) { reverse(first, last, queue); return false; } value_type first_value = first_element.read(queue); InputIterator ceiling_element = detail::np_ceiling(first_element + 1, last, first_value, queue); value_type ceiling_value = ceiling_element.read(queue); first_element.write(ceiling_value, queue); ceiling_element.write(first_value, queue); reverse(first_element + 1, last, queue); return true; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_NEXT_PERMUTATION_HPP compute-0.5/include/boost/compute/algorithm/none_of.hpp000066400000000000000000000023131263566244600234160ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_NONE_OF_HPP #define BOOST_COMPUTE_ALGORITHM_NONE_OF_HPP #include #include namespace boost { namespace compute { /// Returns \c true if \p predicate returns \c true for none of the elements in /// the range [\p first, \p last). /// /// \see all_of(), any_of() template inline bool none_of(InputIterator first, InputIterator last, UnaryPredicate predicate, command_queue &queue = system::default_queue()) { return ::boost::compute::find_if(first, last, predicate, queue) == last; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_NONE_OF_HPP compute-0.5/include/boost/compute/algorithm/nth_element.hpp000066400000000000000000000050631263566244600243020ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_NTH_ELEMENT_HPP #define BOOST_COMPUTE_ALGORITHM_NTH_ELEMENT_HPP #include #include #include #include #include #include namespace boost { namespace compute { /// Rearranges the elements in the range [\p first, \p last) such that /// the \p nth element would be in that position in a sorted sequence. template inline void nth_element(Iterator first, Iterator nth, Iterator last, Compare compare, command_queue &queue = system::default_queue()) { if(nth == last) return; typedef typename std::iterator_traits::value_type value_type; while(1) { value_type value = nth.read(queue); using boost::compute::placeholders::_1; Iterator new_nth = partition( first, last, ::boost::compute::bind(compare, _1, value), queue ); Iterator old_nth = find(new_nth, last, value, queue); value_type new_value = new_nth.read(queue); fill_n(new_nth, 1, value, queue); fill_n(old_nth, 1, new_value, queue); new_value = nth.read(queue); if(value == new_value) break; if(std::distance(first, nth) < std::distance(first, new_nth)) { last = new_nth; } else { first = new_nth; } } } /// \overload template inline void nth_element(Iterator first, Iterator nth, Iterator last, command_queue &queue = system::default_queue()) { if(nth == last) return; typedef typename std::iterator_traits::value_type value_type; less less_than; return nth_element(first, nth, last, less_than, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_NTH_ELEMENT_HPP compute-0.5/include/boost/compute/algorithm/partial_sum.hpp000066400000000000000000000024051263566244600243150ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_PARTIAL_SUM_HPP #define BOOST_COMPUTE_ALGORITHM_PARTIAL_SUM_HPP #include #include #include namespace boost { namespace compute { /// Calculates the cumulative sum of the elements in the range [\p first, /// \p last) and writes the resulting values to the range beginning at /// \p result. template inline OutputIterator partial_sum(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue = system::default_queue()) { return ::boost::compute::inclusive_scan(first, last, result, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_PARTIAL_SUM_HPP compute-0.5/include/boost/compute/algorithm/partition.hpp000066400000000000000000000024601263566244600240070ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_PARTITION_HPP #define BOOST_COMPUTE_ALGORITHM_PARTITION_HPP #include #include #include namespace boost { namespace compute { /// /// Partitions the elements in the range [\p first, \p last) according to /// \p predicate. Order of the elements need not be preserved. /// /// \see is_partitioned() and stable_partition() /// template inline Iterator partition(Iterator first, Iterator last, UnaryPredicate predicate, command_queue &queue = system::default_queue()) { return stable_partition(first, last, predicate, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_PARTITION_HPP compute-0.5/include/boost/compute/algorithm/partition_copy.hpp000066400000000000000000000043151263566244600250420ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_PARTITION_COPY_HPP #define BOOST_COMPUTE_ALGORITHM_PARTITION_COPY_HPP #include #include #include #include namespace boost { namespace compute { /// Copies all of the elements in the range [\p first, \p last) for which /// \p predicate returns \c true to the range beginning at \p first_true /// and all of the elements for which \p predicate returns \c false to /// the range beginning at \p first_false. /// /// \see partition() template inline std::pair partition_copy(InputIterator first, InputIterator last, OutputIterator1 first_true, OutputIterator2 first_false, UnaryPredicate predicate, command_queue &queue = system::default_queue()) { // copy true values OutputIterator1 last_true = ::boost::compute::copy_if(first, last, first_true, predicate, queue); // copy false values OutputIterator2 last_false = ::boost::compute::copy_if(first, last, first_false, not1(predicate), queue); // return iterators to the end of the true and the false ranges return std::make_pair(last_true, last_false); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_PARTITION_COPY_HPP compute-0.5/include/boost/compute/algorithm/partition_point.hpp000066400000000000000000000031701263566244600252170ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_PARTITION_POINT_HPP #define BOOST_COMPUTE_ALGORITHM_PARTITION_POINT_HPP #include #include #include namespace boost { namespace compute { /// /// \brief Partition point algorithm /// /// Finds the end of true values in the partitioned range [first, last) /// \return Iterator pointing to end of true values /// /// \param first Iterator pointing to start of range /// \param last Iterator pointing to end of range /// \param predicate Unary predicate to be applied on each element /// \param queue Queue on which to execute /// /// \see partition() and stable_partition() /// template inline InputIterator partition_point(InputIterator first, InputIterator last, UnaryPredicate predicate, command_queue &queue = system::default_queue()) { return detail::binary_find(first, last, not1(predicate), queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_PARTITION_POINT_HPP compute-0.5/include/boost/compute/algorithm/prev_permutation.hpp000066400000000000000000000125741263566244600254100ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_PREV_PERMUTATION_HPP #define BOOST_COMPUTE_ALGORITHM_PREV_PERMUTATION_HPP #include #include #include #include #include namespace boost { namespace compute { namespace detail { /// /// \brief Helper function for prev_permutation /// /// To find rightmost element which is greater /// than its next element /// template inline InputIterator prev_permutation_helper(InputIterator first, InputIterator last, command_queue &queue) { typedef typename std::iterator_traits::value_type value_type; size_t count = detail::iterator_range_size(first, last); if(count == 0 || count == 1){ return last; } count = count - 1; const context &context = queue.get_context(); detail::meta_kernel k("prev_permutation"); size_t index_arg = k.add_arg(memory_object::global_memory, "index"); atomic_max atomic_max_int; k << k.decl("i") << " = get_global_id(0);\n" << k.decl("cur_value") << "=" << first[k.var("i")] << ";\n" << k.decl("next_value") << "=" << first[k.expr("i+1")] << ";\n" << "if(cur_value > next_value){\n" << " " << atomic_max_int(k.var("index"), k.var("i")) << ";\n" << "}\n"; kernel kernel = k.compile(context); scalar index(context); kernel.set_arg(index_arg, index.get_buffer()); index.write(static_cast(-1), queue); queue.enqueue_1d_range_kernel(kernel, 0, count, 0); int result = static_cast(index.read(queue)); if(result == -1) return last; else return first + result; } /// /// \brief Helper function for prev_permutation /// /// To find the largest element to the right of the element found above /// that is smaller than it /// template inline InputIterator pp_floor(InputIterator first, InputIterator last, ValueType value, command_queue &queue) { typedef typename std::iterator_traits::value_type value_type; size_t count = detail::iterator_range_size(first, last); if(count == 0){ return last; } const context &context = queue.get_context(); detail::meta_kernel k("pp_floor"); size_t index_arg = k.add_arg(memory_object::global_memory, "index"); size_t value_arg = k.add_arg(memory_object::private_memory, "value"); atomic_max atomic_max_int; k << k.decl("i") << " = get_global_id(0);\n" << k.decl("cur_value") << "=" << first[k.var("i")] << ";\n" << "if(cur_value >= " << first[k.expr("*index")] << " && cur_value < value){\n" << " " << atomic_max_int(k.var("index"), k.var("i")) << ";\n" << "}\n"; kernel kernel = k.compile(context); scalar index(context); kernel.set_arg(index_arg, index.get_buffer()); index.write(static_cast(0), queue); kernel.set_arg(value_arg, value); queue.enqueue_1d_range_kernel(kernel, 0, count, 0); int result = static_cast(index.read(queue)); return first + result; } } // end detail namespace /// /// \brief Permutation generating algorithm /// /// Transforms the range [first, last) into the previous permutation from /// the set of all permutations arranged in lexicographic order /// \return Boolean value signifying if the first permutation was crossed /// and the range was reset /// /// \param first Iterator pointing to start of range /// \param last Iterator pointing to end of range /// \param queue Queue on which to execute /// template inline bool prev_permutation(InputIterator first, InputIterator last, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; if(first == last) return false; InputIterator first_element = detail::prev_permutation_helper(first, last, queue); if(first_element == last) { reverse(first, last, queue); return false; } value_type first_value = first_element.read(queue); InputIterator ceiling_element = detail::pp_floor(first_element + 1, last, first_value, queue); value_type ceiling_value = ceiling_element.read(queue); first_element.write(ceiling_value, queue); ceiling_element.write(first_value, queue); reverse(first_element + 1, last, queue); return true; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_PREV_PERMUTATION_HPP compute-0.5/include/boost/compute/algorithm/random_shuffle.hpp000066400000000000000000000047131263566244600247750ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_RANDOM_SHUFFLE_HPP #define BOOST_COMPUTE_ALGORITHM_RANDOM_SHUFFLE_HPP #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { /// Randomly shuffles the elements in the range [\p first, \p last). /// /// \see scatter() template inline void random_shuffle(Iterator first, Iterator last, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; size_t count = detail::iterator_range_size(first, last); if(count == 0){ return; } // generate shuffled indices on the host std::vector random_indices(count); boost::iota(random_indices, 0); std::random_shuffle(random_indices.begin(), random_indices.end()); // copy random indices to the device const context &context = queue.get_context(); vector indices(count, context); ::boost::compute::copy(random_indices.begin(), random_indices.end(), indices.begin(), queue); // make a copy of the values on the device vector tmp(count, context); ::boost::compute::copy(first, last, tmp.begin(), queue); // write values to their new locations ::boost::compute::scatter(tmp.begin(), tmp.end(), indices.begin(), first, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_RANDOM_SHUFFLE_HPP compute-0.5/include/boost/compute/algorithm/reduce.hpp000066400000000000000000000255351263566244600232550ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_REDUCE_HPP #define BOOST_COMPUTE_ALGORITHM_REDUCE_HPP #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template size_t reduce(InputIterator first, size_t count, OutputIterator result, size_t block_size, BinaryFunction function, command_queue &queue) { typedef typename std::iterator_traits::value_type input_type; typedef typename boost::compute::result_of::type result_type; const context &context = queue.get_context(); size_t block_count = count / 2 / block_size; size_t total_block_count = static_cast(std::ceil(float(count) / 2.f / float(block_size))); if(block_count != 0){ meta_kernel k("block_reduce"); size_t output_arg = k.add_arg(memory_object::global_memory, "output"); size_t block_arg = k.add_arg(memory_object::local_memory, "block"); k << "const uint gid = get_global_id(0);\n" << "const uint lid = get_local_id(0);\n" << // copy values to local memory "block[lid] = " << function(first[k.make_var("gid*2+0")], first[k.make_var("gid*2+1")]) << ";\n" << // perform reduction "for(uint i = 1; i < " << uint_(block_size) << "; i <<= 1){\n" << " barrier(CLK_LOCAL_MEM_FENCE);\n" << " uint mask = (i << 1) - 1;\n" << " if((lid & mask) == 0){\n" << " block[lid] = " << function(k.expr("block[lid]"), k.expr("block[lid+i]")) << ";\n" << " }\n" << "}\n" << // write block result to global output "if(lid == 0)\n" << " output[get_group_id(0)] = block[0];\n"; kernel kernel = k.compile(context); kernel.set_arg(output_arg, result.get_buffer()); kernel.set_arg(block_arg, local_buffer(block_size)); queue.enqueue_1d_range_kernel(kernel, 0, block_count * block_size, block_size); } // serially reduce any leftovers if(block_count * block_size * 2 < count){ size_t last_block_start = block_count * block_size * 2; meta_kernel k("extra_serial_reduce"); size_t count_arg = k.add_arg("count"); size_t offset_arg = k.add_arg("offset"); size_t output_arg = k.add_arg(memory_object::global_memory, "output"); size_t output_offset_arg = k.add_arg("output_offset"); k << k.decl("result") << " = \n" << first[k.expr("offset")] << ";\n" << "for(uint i = offset + 1; i < count; i++)\n" << " result = " << function(k.var("result"), first[k.var("i")]) << ";\n" << "output[output_offset] = result;\n"; kernel kernel = k.compile(context); kernel.set_arg(count_arg, static_cast(count)); kernel.set_arg(offset_arg, static_cast(last_block_start)); kernel.set_arg(output_arg, result.get_buffer()); kernel.set_arg(output_offset_arg, static_cast(block_count)); queue.enqueue_task(kernel); } return total_block_count; } template inline vector< typename boost::compute::result_of< BinaryFunction( typename std::iterator_traits::value_type, typename std::iterator_traits::value_type ) >::type > block_reduce(InputIterator first, size_t count, size_t block_size, BinaryFunction function, command_queue &queue) { typedef typename std::iterator_traits::value_type input_type; typedef typename boost::compute::result_of::type result_type; const context &context = queue.get_context(); size_t total_block_count = static_cast(std::ceil(float(count) / 2.f / float(block_size))); vector result_vector(total_block_count, context); reduce(first, count, result_vector.begin(), block_size, function, queue); return result_vector; } template inline void generic_reduce(InputIterator first, InputIterator last, OutputIterator result, BinaryFunction function, command_queue &queue) { typedef typename std::iterator_traits::value_type input_type; typedef typename boost::compute::result_of::type result_type; const device &device = queue.get_device(); const context &context = queue.get_context(); size_t count = detail::iterator_range_size(first, last); if(device.type() & device::cpu){ boost::compute::vector value(1, context); detail::serial_reduce(first, last, value.begin(), function, queue); boost::compute::copy_n(value.begin(), 1, result, queue); } else { size_t block_size = 256; // first pass vector results = detail::block_reduce(first, count, block_size, function, queue); if(results.size() > 1){ detail::inplace_reduce(results.begin(), results.end(), function, queue); } boost::compute::copy_n(results.begin(), 1, result, queue); } } template inline void dispatch_reduce(InputIterator first, InputIterator last, OutputIterator result, const plus &function, command_queue &queue) { const context &context = queue.get_context(); const device &device = queue.get_device(); // reduce to temporary buffer on device array tmp(context); if(device.type() & device::cpu){ detail::serial_reduce(first, last, tmp.begin(), function, queue); } else { reduce_on_gpu(first, last, tmp.begin(), function, queue); } // copy to result iterator copy_n(tmp.begin(), 1, result, queue); } template inline void dispatch_reduce(InputIterator first, InputIterator last, OutputIterator result, BinaryFunction function, command_queue &queue) { generic_reduce(first, last, result, function, queue); } } // end detail namespace /// Returns the result of applying \p function to the elements in the /// range [\p first, \p last). /// /// If no function is specified, \c plus will be used. /// /// \param first first element in the input range /// \param last last element in the input range /// \param result iterator pointing to the output /// \param function binary reduction function /// \param queue command queue to perform the operation /// /// The \c reduce() algorithm assumes that the binary reduction function is /// associative. When used with non-associative functions the result may /// be non-deterministic and vary in precision. Notably this affects the /// \c plus() function as floating-point addition is not associative /// and may produce slightly different results than a serial algorithm. /// /// This algorithm supports both host and device iterators for the /// result argument. This allows for values to be reduced and copied /// to the host all with a single function call. /// /// For example, to calculate the sum of the values in a device vector and /// copy the result to a value on the host: /// /// \snippet test/test_reduce.cpp sum_int /// /// Note that while the the \c reduce() algorithm is conceptually identical to /// the \c accumulate() algorithm, its implementation is substantially more /// efficient on parallel hardware. For more information, see the documentation /// on the \c accumulate() algorithm. /// /// \see accumulate() template inline void reduce(InputIterator first, InputIterator last, OutputIterator result, BinaryFunction function, command_queue &queue = system::default_queue()) { if(first == last){ return; } detail::dispatch_reduce(first, last, result, function, queue); } /// \overload template inline void reduce(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type T; if(first == last){ return; } detail::dispatch_reduce(first, last, result, plus(), queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_REDUCE_HPP compute-0.5/include/boost/compute/algorithm/reduce_by_key.hpp000066400000000000000000000114311263566244600246050ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_REDUCE_BY_KEY_HPP #define BOOST_COMPUTE_ALGORITHM_REDUCE_BY_KEY_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { /// The \c reduce_by_key() algorithm performs reduction for each contiguous /// subsequence of values determinate by equivalent keys. /// /// Returns a pair of iterators at the end of the ranges [\p keys_result, keys_result_last) /// and [\p values_result, \p values_result_last). /// /// If no function is specified, \c plus will be used. /// If no predicate is specified, \c equal_to will be used. /// /// \param keys_first the first key /// \param keys_last the last key /// \param values_first the first input value /// \param keys_result iterator pointing to the key output /// \param values_result iterator pointing to the reduced value output /// \param function binary reduction function /// \param predicate binary predicate which returns true only if two keys are equal /// \param queue command queue to perform the operation /// /// The \c reduce_by_key() algorithm assumes that the binary reduction function /// is associative. When used with non-associative functions the result may /// be non-deterministic and vary in precision. Notably this affects the /// \c plus() function as floating-point addition is not associative /// and may produce slightly different results than a serial algorithm. /// /// For example, to calculate the sum of the values for each key: /// /// \snippet test/test_reduce_by_key.cpp reduce_by_key_int /// /// \see reduce() template inline std::pair reduce_by_key(InputKeyIterator keys_first, InputKeyIterator keys_last, InputValueIterator values_first, OutputKeyIterator keys_result, OutputValueIterator values_result, BinaryFunction function, BinaryPredicate predicate, command_queue &queue = system::default_queue()) { return detail::dispatch_reduce_by_key(keys_first, keys_last, values_first, keys_result, values_result, function, predicate, queue); } /// \overload template inline std::pair reduce_by_key(InputKeyIterator keys_first, InputKeyIterator keys_last, InputValueIterator values_first, OutputKeyIterator keys_result, OutputValueIterator values_result, BinaryFunction function, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type key_type; return reduce_by_key(keys_first, keys_last, values_first, keys_result, values_result, function, equal_to(), queue); } /// \overload template inline std::pair reduce_by_key(InputKeyIterator keys_first, InputKeyIterator keys_last, InputValueIterator values_first, OutputKeyIterator keys_result, OutputValueIterator values_result, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type key_type; typedef typename std::iterator_traits::value_type value_type; return reduce_by_key(keys_first, keys_last, values_first, keys_result, values_result, plus(), equal_to(), queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_REDUCE_BY_KEY_HPP compute-0.5/include/boost/compute/algorithm/remove.hpp000066400000000000000000000034171263566244600232760ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_REMOVE_HPP #define BOOST_COMPUTE_ALGORITHM_REMOVE_HPP #include #include #include #include namespace boost { namespace compute { /// Removes each element equal to \p value in the range [\p first, /// \p last). /// /// \see remove_if() template inline Iterator remove(Iterator first, Iterator last, const T &value, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; using ::boost::compute::_1; using ::boost::compute::lambda::all; if(vector_size::value == 1){ return ::boost::compute::remove_if(first, last, _1 == value, queue); } else { return ::boost::compute::remove_if(first, last, all(_1 == value), queue); } } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_REMOVE_HPP compute-0.5/include/boost/compute/algorithm/remove_if.hpp000066400000000000000000000031671263566244600237560ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_REMOVE_IF_HPP #define BOOST_COMPUTE_ALGORITHM_REMOVE_IF_HPP #include #include #include #include namespace boost { namespace compute { /// Removes each element for which \p predicate returns \c true in the /// range [\p first, \p last). /// /// \see remove() template inline Iterator remove_if(Iterator first, Iterator last, Predicate predicate, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; // temporary storage for the input data ::boost::compute::vector tmp(first, last, queue); return ::boost::compute::copy_if(tmp.begin(), tmp.end(), first, not1(predicate), queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_REMOVE_IF_HPP compute-0.5/include/boost/compute/algorithm/replace.hpp000066400000000000000000000045321263566244600234130ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_REPLACE_HPP #define BOOST_COMPUTE_ALGORITHM_REPLACE_HPP #include #include #include #include namespace boost { namespace compute { namespace detail { template class replace_kernel : public meta_kernel { public: replace_kernel() : meta_kernel("replace") { m_count = 0; } void set_range(Iterator first, Iterator last) { m_count = detail::iterator_range_size(first, last); *this << "const uint i = get_global_id(0);\n" << "if(" << first[var("i")] << " == " << var("old_value") << ")\n" << " " << first[var("i")] << '=' << var("new_value") << ";\n"; } void set_old_value(const T &old_value) { add_set_arg("old_value", old_value); } void set_new_value(const T &new_value) { add_set_arg("new_value", new_value); } void exec(command_queue &queue) { if(m_count == 0){ // nothing to do return; } exec_1d(queue, 0, m_count); } private: size_t m_count; }; } // end detail namespace /// Replaces each instance of \p old_value in the range [\p first, /// \p last) with \p new_value. template inline void replace(Iterator first, Iterator last, const T &old_value, const T &new_value, command_queue &queue = system::default_queue()) { detail::replace_kernel kernel; kernel.set_range(first, last); kernel.set_old_value(old_value); kernel.set_new_value(new_value); kernel.exec(queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_REPLACE_HPP compute-0.5/include/boost/compute/algorithm/replace_copy.hpp000066400000000000000000000036521263566244600244470ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_REPLACE_COPY_HPP #define BOOST_COMPUTE_ALGORITHM_REPLACE_COPY_HPP #include #include #include #include #include namespace boost { namespace compute { /// Copies the value in the range [\p first, \p last) to the range /// beginning at \p result while replacing each instance of \p old_value /// with \p new_value. /// /// \see replace() template inline OutputIterator replace_copy(InputIterator first, InputIterator last, OutputIterator result, const T &old_value, const T &new_value, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::difference_type difference_type; difference_type count = std::distance(first, last); if(count == 0){ return result; } // copy data to result ::boost::compute::copy(first, last, result, queue); // replace in result ::boost::compute::replace(result, result + count, old_value, new_value, queue); // return iterator to the end of result return result + count; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_REPLACE_COPY_HPP compute-0.5/include/boost/compute/algorithm/reverse.hpp000066400000000000000000000043041263566244600234500ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_REVERSE_HPP #define BOOST_COMPUTE_ALGORITHM_REVERSE_HPP #include #include #include #include namespace boost { namespace compute { namespace detail { template struct reverse_kernel : public meta_kernel { reverse_kernel(Iterator first, Iterator last) : meta_kernel("reverse") { typedef typename std::iterator_traits::value_type value_type; // store size of the range m_size = detail::iterator_range_size(first, last); add_set_arg("size", static_cast(m_size)); *this << decl("i") << " = get_global_id(0);\n" << decl("j") << " = size - get_global_id(0) - 1;\n" << decl("tmp") << "=" << first[var("i")] << ";\n" << first[var("i")] << "=" << first[var("j")] << ";\n" << first[var("j")] << "= tmp;\n"; } void exec(command_queue &queue) { exec_1d(queue, 0, m_size / 2); } size_t m_size; }; } // end detail namespace /// Reverses the elements in the range [\p first, \p last). /// /// \see reverse_copy() template inline void reverse(Iterator first, Iterator last, command_queue &queue = system::default_queue()) { size_t count = detail::iterator_range_size(first, last); if(count < 2){ return; } detail::reverse_kernel kernel(first, last); kernel.exec(queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_REVERSE_HPP compute-0.5/include/boost/compute/algorithm/reverse_copy.hpp000066400000000000000000000045521263566244600245070ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_REVERSE_COPY_HPP #define BOOST_COMPUTE_ALGORITHM_REVERSE_COPY_HPP #include #include #include #include #include namespace boost { namespace compute { namespace detail { template struct reverse_copy_kernel : public meta_kernel { reverse_copy_kernel(Iterator first, Iterator last, OutputIterator result) : meta_kernel("reverse_copy") { // store size of the range m_size = detail::iterator_range_size(first, last); add_set_arg("size", static_cast(m_size)); *this << decl("i") << " = get_global_id(0);\n" << decl("j") << " = size - get_global_id(0) - 1;\n" << result[var("j")] << "=" << first[var("i")] << ";\n"; } void exec(command_queue &queue) { exec_1d(queue, 0, m_size); } size_t m_size; }; } // end detail namespace /// Copies the elements in the range [\p first, \p last) in reversed /// order to the range beginning at \p result. /// /// \see reverse() template inline OutputIterator reverse_copy(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::difference_type difference_type; difference_type count = std::distance(first, last); detail::reverse_copy_kernel kernel(first, last, result); // run kernel kernel.exec(queue); // return iterator to the end of result return result + count; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_REVERSE_COPY_HPP compute-0.5/include/boost/compute/algorithm/rotate.hpp000066400000000000000000000033271263566244600232770ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_ROTATE_HPP #define BOOST_COMPUTE_ALGORITHM_ROTATE_HPP #include #include #include namespace boost { namespace compute { /// Performs left rotation such that element at \p n_first comes to the /// beginning. /// /// \see rotate_copy() template inline void rotate(InputIterator first, InputIterator n_first, InputIterator last, command_queue &queue = system::default_queue()) { //Handle trivial cases if (n_first==first || n_first==last) { return; } //Handle others typedef typename std::iterator_traits::value_type T; size_t count = detail::iterator_range_size(first, n_first); size_t count2 = detail::iterator_range_size(first, last); const context &context = queue.get_context(); vector temp(count2, context); ::boost::compute::copy(first, last, temp.begin(), queue); ::boost::compute::copy(temp.begin()+count, temp.end(), first, queue); ::boost::compute::copy(temp.begin(), temp.begin()+count, last-count, queue); } } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_ROTATE_HPP compute-0.5/include/boost/compute/algorithm/rotate_copy.hpp000066400000000000000000000027251263566244600243320ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_ROTATE_COPY_HPP #define BOOST_COMPUTE_ALGORITHM_ROTATE_COPY_HPP #include #include namespace boost { namespace compute { /// Performs left rotation such that element at n_first comes to the /// beginning and the output is stored in range starting at result. /// /// \see rotate() template inline void rotate_copy(InputIterator first, InputIterator n_first, InputIterator last, OutputIterator result, command_queue &queue = system::default_queue()) { size_t count = detail::iterator_range_size(first, n_first); size_t count2 = detail::iterator_range_size(n_first, last); ::boost::compute::copy(first+count, last, result, queue); ::boost::compute::copy(first, first+count, result+count2, queue); } } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_ROTATE_COPY_HPP compute-0.5/include/boost/compute/algorithm/scatter.hpp000066400000000000000000000060371263566244600234470ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_SCATTER_HPP #define BOOST_COMPUTE_ALGORITHM_SCATTER_HPP #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template class scatter_kernel : meta_kernel { public: scatter_kernel() : meta_kernel("scatter") {} void set_range(InputIterator first, InputIterator last, MapIterator map, OutputIterator result) { m_count = iterator_range_size(first, last); m_input_offset = first.get_index(); m_output_offset = result.get_index(); m_input_offset_arg = add_arg("input_offset"); m_output_offset_arg = add_arg("output_offset"); *this << "const uint i = get_global_id(0);\n" << "uint i1 = " << map[expr("i")] << " + output_offset;\n" << "uint i2 = i + input_offset;\n" << result[expr("i1")] << "=" << first[expr("i2")] << ";\n"; } event exec(command_queue &queue) { if(m_count == 0) { return event(); } set_arg(m_input_offset_arg, uint_(m_input_offset)); set_arg(m_output_offset_arg, uint_(m_output_offset)); return exec_1d(queue, 0, m_count); } private: size_t m_count; size_t m_input_offset; size_t m_input_offset_arg; size_t m_output_offset; size_t m_output_offset_arg; }; } // end detail namespace /// Copies the elements from the range [\p first, \p last) to the range /// beginning at \p result using the output indices from the range beginning /// at \p map. /// /// \see gather() template inline void scatter(InputIterator first, InputIterator last, MapIterator map, OutputIterator result, command_queue &queue = system::default_queue()) { detail::scatter_kernel kernel; kernel.set_range(first, last, map, result); kernel.exec(queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_SCATTER_HPP compute-0.5/include/boost/compute/algorithm/scatter_if.hpp000066400000000000000000000077431263566244600241320ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Pola // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_SCATTER_IF_HPP #define BOOST_COMPUTE_ALGORITHM_SCATTER_IF_HPP #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template class scatter_if_kernel : meta_kernel { public: scatter_if_kernel() : meta_kernel("scatter_if") {} void set_range(InputIterator first, InputIterator last, MapIterator map, StencilIterator stencil, OutputIterator result, Predicate predicate) { m_count = iterator_range_size(first, last); m_input_offset = first.get_index(); m_output_offset = result.get_index(); m_input_offset_arg = add_arg("input_offset"); m_output_offset_arg = add_arg("output_offset"); *this << "const uint i = get_global_id(0);\n" << "uint i1 = " << map[expr("i")] << " + output_offset;\n" << "uint i2 = i + input_offset;\n" << if_(predicate(stencil[expr("i")])) << "\n" << result[expr("i1")] << "=" << first[expr("i2")] << ";\n"; } event exec(command_queue &queue) { if(m_count == 0) { return event(); } set_arg(m_input_offset_arg, uint_(m_input_offset)); set_arg(m_output_offset_arg, uint_(m_output_offset)); return exec_1d(queue, 0, m_count); } private: size_t m_count; size_t m_input_offset; size_t m_input_offset_arg; size_t m_output_offset; size_t m_output_offset_arg; }; } // end detail namespace /// Copies the elements from the range [\p first, \p last) to the range /// beginning at \p result using the output indices from the range beginning /// at \p map if stencil is resolved to true. By default the predicate is /// an identity /// /// template inline void scatter_if(InputIterator first, InputIterator last, MapIterator map, StencilIterator stencil, OutputIterator result, Predicate predicate, command_queue &queue = system::default_queue()) { detail::scatter_if_kernel kernel; kernel.set_range(first, last, map, stencil, result, predicate); kernel.exec(queue); } template inline void scatter_if(InputIterator first, InputIterator last, MapIterator map, StencilIterator stencil, OutputIterator result, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type T; scatter_if(first, last, map, stencil, result, identity(), queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_SCATTER_IF_HPP compute-0.5/include/boost/compute/algorithm/search.hpp000066400000000000000000000051611263566244600232440ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_SEARCH_HPP #define BOOST_COMPUTE_ALGORITHM_SEARCH_HPP #include #include #include #include #include #include namespace boost { namespace compute { /// /// \brief Substring matching algorithm /// /// Searches for the first match of the pattern [p_first, p_last) /// in text [t_first, t_last). /// \return Iterator pointing to beginning of first occurrence /// /// \param t_first Iterator pointing to start of text /// \param t_last Iterator pointing to end of text /// \param p_first Iterator pointing to start of pattern /// \param p_last Iterator pointing to end of pattern /// \param queue Queue on which to execute /// template inline TextIterator search(TextIterator t_first, TextIterator t_last, PatternIterator p_first, PatternIterator p_last, command_queue &queue = system::default_queue()) { // there is no need to check if pattern starts at last n - 1 indices vector matching_indices( detail::iterator_range_size(t_first, t_last) - detail::iterator_range_size(p_first, p_last) + 1, queue.get_context() ); // search_kernel puts value 1 at every index in vector where pattern starts at detail::search_kernel::iterator> kernel; kernel.set_range(p_first, p_last, t_first, t_last, matching_indices.begin()); kernel.exec(queue); vector::iterator index = ::boost::compute::find( matching_indices.begin(), matching_indices.end(), uint_(1), queue ); // pattern was not found if(index == matching_indices.end()) return t_last; return t_first + detail::iterator_range_size(matching_indices.begin(), index); } } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_SEARCH_HPP compute-0.5/include/boost/compute/algorithm/search_n.hpp000066400000000000000000000101531263566244600235560ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SEARCH_N_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_SEARCH_N_HPP #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { /// /// \brief Search kernel class /// /// Subclass of meta_kernel which is capable of performing search_n /// template class search_n_kernel : public meta_kernel { public: typedef typename std::iterator_traits::value_type value_type; search_n_kernel() : meta_kernel("search_n") {} void set_range(TextIterator t_first, TextIterator t_last, value_type value, size_t n, OutputIterator result) { m_n = n; m_n_arg = add_arg("n"); m_value = value; m_value_arg = add_arg("value"); m_count = iterator_range_size(t_first, t_last); m_count = m_count + 1 - m_n; *this << "uint i = get_global_id(0);\n" << "uint i1 = i;\n" << "uint j;\n" << "for(j = 0; j("i1")] << " = 1;\n" << "else\n" << result[expr("i1")] << " = 0;\n"; } event exec(command_queue &queue) { if(m_count == 0) { return event(); } set_arg(m_n_arg, uint_(m_n)); set_arg(m_value_arg, m_value); return exec_1d(queue, 0, m_count); } private: size_t m_n; size_t m_n_arg; size_t m_count; value_type m_value; size_t m_value_arg; }; } //end detail namespace /// /// \brief Substring matching algorithm /// /// Searches for the first occurrence of n consecutive occurrences of /// value in text [t_first, t_last). /// \return Iterator pointing to beginning of first occurrence /// /// \param t_first Iterator pointing to start of text /// \param t_last Iterator pointing to end of text /// \param n Number of times value repeats /// \param value Value which repeats /// \param queue Queue on which to execute /// template inline TextIterator search_n(TextIterator t_first, TextIterator t_last, size_t n, ValueType value, command_queue &queue = system::default_queue()) { // there is no need to check if pattern starts at last n - 1 indices vector matching_indices( detail::iterator_range_size(t_first, t_last) + 1 - n, queue.get_context() ); // search_n_kernel puts value 1 at every index in vector where pattern // of n values starts at detail::search_n_kernel::iterator> kernel; kernel.set_range(t_first, t_last, value, n, matching_indices.begin()); kernel.exec(queue); vector::iterator index = ::boost::compute::find( matching_indices.begin(), matching_indices.end(), uint_(1), queue ); // pattern was not found if(index == matching_indices.end()) return t_last; return t_first + detail::iterator_range_size(matching_indices.begin(), index); } } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SEARCH_N_HPP compute-0.5/include/boost/compute/algorithm/set_difference.hpp000066400000000000000000000146151263566244600247500ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_SET_DIFFERENCE_HPP #define BOOST_COMPUTE_ALGORITHM_SET_DIFFERENCE_HPP #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { /// /// \brief Serial set difference kernel class /// /// Subclass of meta_kernel to perform serial set difference after tiling /// class serial_set_difference_kernel : meta_kernel { public: unsigned int tile_size; serial_set_difference_kernel() : meta_kernel("set_difference") { tile_size = 4; } template void set_range(InputIterator1 first1, InputIterator2 first2, InputIterator3 tile_first1, InputIterator3 tile_last1, InputIterator4 tile_first2, OutputIterator1 result, OutputIterator2 counts) { m_count = iterator_range_size(tile_first1, tile_last1) - 1; *this << "uint i = get_global_id(0);\n" << "uint start1 = " << tile_first1[expr("i")] << ";\n" << "uint end1 = " << tile_first1[expr("i+1")] << ";\n" << "uint start2 = " << tile_first2[expr("i")] << ";\n" << "uint end2 = " << tile_first2[expr("i+1")] << ";\n" << "uint index = i*" << tile_size << ";\n" << "uint count = 0;\n" << "while(start1("start1")] << " == " << first2[expr("start2")] << ")\n" << " {\n" << " start1++; start2++;\n" << " }\n" << " else if(" << first1[expr("start1")] << " < " << first2[expr("start2")] << ")\n" << " {\n" << result[expr("index")] << " = " << first1[expr("start1")] << ";\n" << " index++; count++;\n" << " start1++;\n" << " }\n" << " else\n" << " {\n" << " start2++;\n" << " }\n" << "}\n" << "while(start1("index")] << " = " << first1[expr("start1")] << ";\n" << " index++; count++;\n" << " start1++;\n" << "}\n" << counts[expr("i")] << " = count;\n"; } event exec(command_queue &queue) { if(m_count == 0) { return event(); } return exec_1d(queue, 0, m_count); } private: size_t m_count; }; } //end detail namespace /// /// \brief Set difference algorithm /// /// Finds the difference of the sorted range [first2, last2) from the sorted /// range [first1, last1) and stores it in range starting at result /// \return Iterator pointing to end of difference /// /// \param first1 Iterator pointing to start of first set /// \param last1 Iterator pointing to end of first set /// \param first2 Iterator pointing to start of second set /// \param last2 Iterator pointing to end of second set /// \param result Iterator pointing to start of range in which the difference /// will be stored /// \param queue Queue on which to execute /// template inline OutputIterator set_difference(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; int tile_size = 1024; int count1 = detail::iterator_range_size(first1, last1); int count2 = detail::iterator_range_size(first2, last2); vector tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); vector tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); // Tile the sets detail::balanced_path_kernel tiling_kernel; tiling_kernel.tile_size = tile_size; tiling_kernel.set_range(first1, last1, first2, last2, tile_a.begin()+1, tile_b.begin()+1); fill_n(tile_a.begin(), 1, 0, queue); fill_n(tile_b.begin(), 1, 0, queue); tiling_kernel.exec(queue); fill_n(tile_a.end()-1, 1, count1, queue); fill_n(tile_b.end()-1, 1, count2, queue); vector temp_result(count1+count2, queue.get_context()); vector counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context()); fill_n(counts.end()-1, 1, 0, queue); // Find individual differences detail::serial_set_difference_kernel difference_kernel; difference_kernel.tile_size = tile_size; difference_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(), tile_b.begin(), temp_result.begin(), counts.begin()); difference_kernel.exec(queue); exclusive_scan(counts.begin(), counts.end(), counts.begin(), queue); // Compact the results detail::compact_kernel compact_kernel; compact_kernel.tile_size = tile_size; compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result); compact_kernel.exec(queue); return result + (counts.end() - 1).read(queue); } } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_SET_DIFFERENCE_HPP compute-0.5/include/boost/compute/algorithm/set_intersection.hpp000066400000000000000000000141461263566244600253630ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_SET_INTERSECTION_HPP #define BOOST_COMPUTE_ALGORITHM_SET_INTERSECTION_HPP #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { /// /// \brief Serial set intersection kernel class /// /// Subclass of meta_kernel to perform serial set intersection after tiling /// class serial_set_intersection_kernel : meta_kernel { public: unsigned int tile_size; serial_set_intersection_kernel() : meta_kernel("set_intersection") { tile_size = 4; } template void set_range(InputIterator1 first1, InputIterator2 first2, InputIterator3 tile_first1, InputIterator3 tile_last1, InputIterator4 tile_first2, OutputIterator1 result, OutputIterator2 counts) { m_count = iterator_range_size(tile_first1, tile_last1) - 1; *this << "uint i = get_global_id(0);\n" << "uint start1 = " << tile_first1[expr("i")] << ";\n" << "uint end1 = " << tile_first1[expr("i+1")] << ";\n" << "uint start2 = " << tile_first2[expr("i")] << ";\n" << "uint end2 = " << tile_first2[expr("i+1")] << ";\n" << "uint index = i*" << tile_size << ";\n" << "uint count = 0;\n" << "while(start1("start1")] << " == " << first2[expr("start2")] << ")\n" << " {\n" << result[expr("index")] << " = " << first1[expr("start1")] << ";\n" << " index++; count++;\n" << " start1++; start2++;\n" << " }\n" << " else if(" << first1[expr("start1")] << " < " << first2[expr("start2")] << ")\n" << " start1++;\n" << " else start2++;\n" << "}\n" << counts[expr("i")] << " = count;\n"; } event exec(command_queue &queue) { if(m_count == 0) { return event(); } return exec_1d(queue, 0, m_count); } private: size_t m_count; }; } //end detail namespace /// /// \brief Set intersection algorithm /// /// Finds the intersection of the sorted range [first1, last1) with the sorted /// range [first2, last2) and stores it in range starting at result /// \return Iterator pointing to end of intersection /// /// \param first1 Iterator pointing to start of first set /// \param last1 Iterator pointing to end of first set /// \param first2 Iterator pointing to start of second set /// \param last2 Iterator pointing to end of second set /// \param result Iterator pointing to start of range in which the intersection /// will be stored /// \param queue Queue on which to execute /// template inline OutputIterator set_intersection(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; int tile_size = 1024; int count1 = detail::iterator_range_size(first1, last1); int count2 = detail::iterator_range_size(first2, last2); vector tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); vector tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); // Tile the sets detail::balanced_path_kernel tiling_kernel; tiling_kernel.tile_size = tile_size; tiling_kernel.set_range(first1, last1, first2, last2, tile_a.begin()+1, tile_b.begin()+1); fill_n(tile_a.begin(), 1, 0, queue); fill_n(tile_b.begin(), 1, 0, queue); tiling_kernel.exec(queue); fill_n(tile_a.end()-1, 1, count1, queue); fill_n(tile_b.end()-1, 1, count2, queue); vector temp_result(count1+count2, queue.get_context()); vector counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context()); fill_n(counts.end()-1, 1, 0, queue); // Find individual intersections detail::serial_set_intersection_kernel intersection_kernel; intersection_kernel.tile_size = tile_size; intersection_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(), tile_b.begin(), temp_result.begin(), counts.begin()); intersection_kernel.exec(queue); exclusive_scan(counts.begin(), counts.end(), counts.begin(), queue); // Compact the results detail::compact_kernel compact_kernel; compact_kernel.tile_size = tile_size; compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result); compact_kernel.exec(queue); return result + (counts.end() - 1).read(queue); } } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_SET_INTERSECTION_HPP compute-0.5/include/boost/compute/algorithm/set_symmetric_difference.hpp000066400000000000000000000160311263566244600270360ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_SET_SYMMETRIC_DIFFERENCE_HPP #define BOOST_COMPUTE_ALGORITHM_SET_SYMMETRIC_DIFFERENCE_HPP #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { /// /// \brief Serial set symmetric difference kernel class /// /// Subclass of meta_kernel to perform serial set symmetric /// difference after tiling /// class serial_set_symmetric_difference_kernel : meta_kernel { public: unsigned int tile_size; serial_set_symmetric_difference_kernel() : meta_kernel("set_symmetric_difference") { tile_size = 4; } template void set_range(InputIterator1 first1, InputIterator2 first2, InputIterator3 tile_first1, InputIterator3 tile_last1, InputIterator4 tile_first2, OutputIterator1 result, OutputIterator2 counts) { m_count = iterator_range_size(tile_first1, tile_last1) - 1; *this << "uint i = get_global_id(0);\n" << "uint start1 = " << tile_first1[expr("i")] << ";\n" << "uint end1 = " << tile_first1[expr("i+1")] << ";\n" << "uint start2 = " << tile_first2[expr("i")] << ";\n" << "uint end2 = " << tile_first2[expr("i+1")] << ";\n" << "uint index = i*" << tile_size << ";\n" << "uint count = 0;\n" << "while(start1("start1")] << " == " << first2[expr("start2")] << ")\n" << " {\n" << " start1++; start2++;\n" << " }\n" << " else if(" << first1[expr("start1")] << " < " << first2[expr("start2")] << ")\n" << " {\n" << result[expr("index")] << " = " << first1[expr("start1")] << ";\n" << " index++; count++;\n" << " start1++;\n" << " }\n" << " else\n" << " {\n" << result[expr("index")] << " = " << first2[expr("start2")] << ";\n" << " index++; count++;\n" << " start2++;\n" << " }\n" << "}\n" << "while(start1("index")] << " = " << first1[expr("start1")] << ";\n" << " index++; count++;\n" << " start1++;\n" << "}\n" << "while(start2("index")] << " = " << first2[expr("start2")] << ";\n" << " index++; count++;\n" << " start2++;\n" << "}\n" << counts[expr("i")] << " = count;\n"; } event exec(command_queue &queue) { if(m_count == 0) { return event(); } return exec_1d(queue, 0, m_count); } private: size_t m_count; }; } //end detail namespace /// /// \brief Set symmetric difference algorithm /// /// Finds the symmetric difference of the sorted range [first2, last2) from /// the sorted range [first1, last1) and stores it in range starting at result /// \return Iterator pointing to end of symmetric difference /// /// \param first1 Iterator pointing to start of first set /// \param last1 Iterator pointing to end of first set /// \param first2 Iterator pointing to start of second set /// \param last2 Iterator pointing to end of second set /// \param result Iterator pointing to start of range in which the symmetric /// difference will be stored /// \param queue Queue on which to execute /// template inline OutputIterator set_symmetric_difference(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; int tile_size = 1024; int count1 = detail::iterator_range_size(first1, last1); int count2 = detail::iterator_range_size(first2, last2); vector tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); vector tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); // Tile the sets detail::balanced_path_kernel tiling_kernel; tiling_kernel.tile_size = tile_size; tiling_kernel.set_range(first1, last1, first2, last2, tile_a.begin()+1, tile_b.begin()+1); fill_n(tile_a.begin(), 1, 0, queue); fill_n(tile_b.begin(), 1, 0, queue); tiling_kernel.exec(queue); fill_n(tile_a.end()-1, 1, count1, queue); fill_n(tile_b.end()-1, 1, count2, queue); vector temp_result(count1+count2, queue.get_context()); vector counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context()); fill_n(counts.end()-1, 1, 0, queue); // Find individual symmetric differences detail::serial_set_symmetric_difference_kernel symmetric_difference_kernel; symmetric_difference_kernel.tile_size = tile_size; symmetric_difference_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(), tile_b.begin(), temp_result.begin(), counts.begin()); symmetric_difference_kernel.exec(queue); exclusive_scan(counts.begin(), counts.end(), counts.begin(), queue); // Compact the results detail::compact_kernel compact_kernel; compact_kernel.tile_size = tile_size; compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result); compact_kernel.exec(queue); return result + (counts.end() - 1).read(queue); } } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_SET_SYMMETRIC_DIFFERENCE_HPP compute-0.5/include/boost/compute/algorithm/set_union.hpp000066400000000000000000000155101263566244600240010ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_SET_UNION_HPP #define BOOST_COMPUTE_ALGORITHM_SET_UNION_HPP #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { /// /// \brief Serial set union kernel class /// /// Subclass of meta_kernel to perform serial set union after tiling /// class serial_set_union_kernel : meta_kernel { public: unsigned int tile_size; serial_set_union_kernel() : meta_kernel("set_union") { tile_size = 4; } template void set_range(InputIterator1 first1, InputIterator2 first2, InputIterator3 tile_first1, InputIterator3 tile_last1, InputIterator4 tile_first2, OutputIterator1 result, OutputIterator2 counts) { m_count = iterator_range_size(tile_first1, tile_last1) - 1; *this << "uint i = get_global_id(0);\n" << "uint start1 = " << tile_first1[expr("i")] << ";\n" << "uint end1 = " << tile_first1[expr("i+1")] << ";\n" << "uint start2 = " << tile_first2[expr("i")] << ";\n" << "uint end2 = " << tile_first2[expr("i+1")] << ";\n" << "uint index = i*" << tile_size << ";\n" << "uint count = 0;\n" << "while(start1("start1")] << " == " << first2[expr("start2")] << ")\n" << " {\n" << result[expr("index")] << " = " << first1[expr("start1")] << ";\n" << " index++; count++;\n" << " start1++; start2++;\n" << " }\n" << " else if(" << first1[expr("start1")] << " < " << first2[expr("start2")] << ")\n" << " {\n" << result[expr("index")] << " = " << first1[expr("start1")] << ";\n" << " index++; count++;\n" << " start1++;\n" << " }\n" << " else\n" << " {\n" << result[expr("index")] << " = " << first2[expr("start2")] << ";\n" << " index++; count++;\n" << " start2++;\n" << " }\n" << "}\n" << "while(start1("index")] << " = " << first1[expr("start1")] << ";\n" << " index++; count++;\n" << " start1++;\n" << "}\n" << "while(start2("index")] << " = " << first2[expr("start2")] << ";\n" << " index++; count++;\n" << " start2++;\n" << "}\n" << counts[expr("i")] << " = count;\n"; } event exec(command_queue &queue) { if(m_count == 0) { return event(); } return exec_1d(queue, 0, m_count); } private: size_t m_count; }; } //end detail namespace /// /// \brief Set union algorithm /// /// Finds the union of the sorted range [first1, last1) with the sorted /// range [first2, last2) and stores it in range starting at result /// \return Iterator pointing to end of union /// /// \param first1 Iterator pointing to start of first set /// \param last1 Iterator pointing to end of first set /// \param first2 Iterator pointing to start of second set /// \param last2 Iterator pointing to end of second set /// \param result Iterator pointing to start of range in which the union /// will be stored /// \param queue Queue on which to execute /// template inline OutputIterator set_union(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; int tile_size = 1024; int count1 = detail::iterator_range_size(first1, last1); int count2 = detail::iterator_range_size(first2, last2); vector tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); vector tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); // Tile the sets detail::balanced_path_kernel tiling_kernel; tiling_kernel.tile_size = tile_size; tiling_kernel.set_range(first1, last1, first2, last2, tile_a.begin()+1, tile_b.begin()+1); fill_n(tile_a.begin(), 1, 0, queue); fill_n(tile_b.begin(), 1, 0, queue); tiling_kernel.exec(queue); fill_n(tile_a.end()-1, 1, count1, queue); fill_n(tile_b.end()-1, 1, count2, queue); vector temp_result(count1+count2, queue.get_context()); vector counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context()); fill_n(counts.end()-1, 1, 0, queue); // Find individual unions detail::serial_set_union_kernel union_kernel; union_kernel.tile_size = tile_size; union_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(), tile_b.begin(), temp_result.begin(), counts.begin()); union_kernel.exec(queue); exclusive_scan(counts.begin(), counts.end(), counts.begin(), queue); // Compact the results detail::compact_kernel compact_kernel; compact_kernel.tile_size = tile_size; compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result); compact_kernel.exec(queue); return result + (counts.end() - 1).read(queue); } } //end compute namespace } //end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_SET_UNION_HPP compute-0.5/include/boost/compute/algorithm/sort.hpp000066400000000000000000000141741263566244600227720ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_SORT_HPP #define BOOST_COMPUTE_ALGORITHM_SORT_HPP #include #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template inline void dispatch_gpu_sort(buffer_iterator first, buffer_iterator last, less, command_queue &queue, typename boost::enable_if_c< is_radix_sortable::value >::type* = 0) { size_t count = detail::iterator_range_size(first, last); if(count < 2){ // nothing to do return; } else if(count <= 32){ ::boost::compute::detail::serial_insertion_sort(first, last, queue); } else { ::boost::compute::detail::radix_sort(first, last, queue); } } template inline void dispatch_gpu_sort(buffer_iterator first, buffer_iterator last, greater compare, command_queue &queue, typename boost::enable_if_c< is_radix_sortable::value >::type* = 0) { size_t count = detail::iterator_range_size(first, last); if(count < 2){ // nothing to do return; } else if(count <= 32){ ::boost::compute::detail::serial_insertion_sort( first, last, compare, queue ); } else { // radix sort in ascending order ::boost::compute::detail::radix_sort(first, last, queue); // reverse range to descending order ::boost::compute::reverse(first, last, queue); } } template inline void dispatch_gpu_sort(Iterator first, Iterator last, Compare compare, command_queue &queue) { ::boost::compute::detail::serial_insertion_sort( first, last, compare, queue ); } // sort() for device iterators template inline void dispatch_sort(Iterator first, Iterator last, Compare compare, command_queue &queue, typename boost::enable_if< is_device_iterator >::type* = 0) { if(queue.get_device().type() & device::gpu) { dispatch_gpu_sort(first, last, compare, queue); return; } ::boost::compute::detail::merge_sort_on_cpu(first, last, compare, queue); } // sort() for host iterators template inline void dispatch_sort(Iterator first, Iterator last, Compare compare, command_queue &queue, typename boost::disable_if< is_device_iterator >::type* = 0) { typedef typename std::iterator_traits::value_type T; size_t size = static_cast(std::distance(first, last)); // create mapped buffer mapped_view view( boost::addressof(*first), size, queue.get_context() ); // sort mapped buffer dispatch_sort(view.begin(), view.end(), compare, queue); // return results to host view.map(queue); } } // end detail namespace /// Sorts the values in the range [\p first, \p last) according to /// \p compare. /// /// \param first first element in the range to sort /// \param last last element in the range to sort /// \param compare comparison function (by default \c less) /// \param queue command queue to perform the operation /// /// For example, to sort a vector on the device: /// \code /// // create vector on the device with data /// float data[] = { 2.f, 4.f, 1.f, 3.f }; /// boost::compute::vector vec(data, data + 4, queue); /// /// // sort the vector on the device /// boost::compute::sort(vec.begin(), vec.end(), queue); /// \endcode /// /// The sort() algorithm can also be directly used with host iterators. This /// example will automatically transfer the data to the device, sort it, and /// then transfer the data back to the host: /// \code /// std::vector data = { 9, 3, 2, 5, 1, 4, 6, 7 }; /// /// boost::compute::sort(data.begin(), data.end(), queue); /// \endcode /// /// \see is_sorted() template inline void sort(Iterator first, Iterator last, Compare compare, command_queue &queue = system::default_queue()) { ::boost::compute::detail::dispatch_sort(first, last, compare, queue); } /// \overload template inline void sort(Iterator first, Iterator last, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; ::boost::compute::sort( first, last, ::boost::compute::less(), queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_SORT_HPP compute-0.5/include/boost/compute/algorithm/sort_by_key.hpp000066400000000000000000000130071263566244600243260ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_SORT_BY_KEY_HPP #define BOOST_COMPUTE_ALGORITHM_SORT_BY_KEY_HPP #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template inline void dispatch_gpu_sort_by_key(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, less::value_type> compare, command_queue &queue, typename boost::enable_if_c< is_radix_sortable< typename std::iterator_traits::value_type >::value >::type* = 0) { size_t count = detail::iterator_range_size(keys_first, keys_last); if(count < 32){ detail::serial_insertion_sort_by_key( keys_first, keys_last, values_first, compare, queue ); } else { detail::radix_sort_by_key( keys_first, keys_last, values_first, queue ); } } template inline void dispatch_gpu_sort_by_key(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, greater::value_type> compare, command_queue &queue, typename boost::enable_if_c< is_radix_sortable< typename std::iterator_traits::value_type >::value >::type* = 0) { size_t count = detail::iterator_range_size(keys_first, keys_last); if(count < 32){ detail::serial_insertion_sort_by_key( keys_first, keys_last, values_first, compare, queue ); } else { // radix sorts in ascending order detail::radix_sort_by_key( keys_first, keys_last, values_first, queue ); // Reverse keys, values for descending order ::boost::compute::reverse(keys_first, keys_last, queue); ::boost::compute::reverse(values_first, values_first + count, queue); } } template inline void dispatch_gpu_sort_by_key(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, Compare compare, command_queue &queue) { detail::serial_insertion_sort_by_key( keys_first, keys_last, values_first, compare, queue ); } template inline void dispatch_sort_by_key(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, Compare compare, command_queue &queue) { if(queue.get_device().type() & device::gpu) { dispatch_gpu_sort_by_key(keys_first, keys_last, values_first, compare, queue); return; } ::boost::compute::detail::merge_sort_by_key_on_cpu( keys_first, keys_last, values_first, compare, queue ); } } /// Performs a key-value sort using the keys in the range [\p keys_first, /// \p keys_last) on the values in the range [\p values_first, /// \p values_first \c + (\p keys_last \c - \p keys_first)) using \p compare. /// /// If no compare function is specified, \c less is used. /// /// \see sort() template inline void sort_by_key(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, Compare compare, command_queue &queue = system::default_queue()) { ::boost::compute::detail::dispatch_sort_by_key( keys_first, keys_last, values_first, compare, queue ); } /// \overload template inline void sort_by_key(KeyIterator keys_first, KeyIterator keys_last, ValueIterator values_first, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type key_type; ::boost::compute::sort_by_key( keys_first, keys_last, values_first, less(), queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_SORT_BY_KEY_HPP compute-0.5/include/boost/compute/algorithm/stable_partition.hpp000066400000000000000000000047561263566244600253530ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_STABLE_PARTITION_HPP #define BOOST_COMPUTE_ALGORITHM_STABLE_PARTITION_HPP #include #include #include #include #include #include namespace boost { namespace compute { /// /// \brief Partitioning algorithm /// /// Partitions the elements in the range [\p first, \p last) according to /// \p predicate. The order of the elements is preserved. /// \return Iterator pointing to end of true values /// /// \param first Iterator pointing to start of range /// \param last Iterator pointing to end of range /// \param predicate Unary predicate to be applied on each element /// \param queue Queue on which to execute /// /// \see is_partitioned() and partition() /// template inline Iterator stable_partition(Iterator first, Iterator last, UnaryPredicate predicate, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; // make temporary copy of the input ::boost::compute::vector tmp(first, last, queue); // copy true values Iterator last_true = ::boost::compute::copy_if(tmp.begin(), tmp.end(), first, predicate, queue); // copy false values Iterator last_false = ::boost::compute::copy_if(tmp.begin(), tmp.end(), last_true, not1(predicate), queue); // return iterator pointing to the last true value return last_true; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_STABLE_PARTITION_HPP compute-0.5/include/boost/compute/algorithm/stable_sort.hpp000066400000000000000000000063441263566244600243240ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_STABLE_SORT_HPP #define BOOST_COMPUTE_ALGORITHM_STABLE_SORT_HPP #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template inline void dispatch_gpu_stable_sort(Iterator first, Iterator last, Compare compare, command_queue &queue) { ::boost::compute::detail::serial_insertion_sort( first, last, compare, queue ); } template inline typename boost::enable_if_c::value>::type dispatch_gpu_stable_sort(buffer_iterator first, buffer_iterator last, less, command_queue &queue) { ::boost::compute::detail::radix_sort(first, last, queue); } template inline typename boost::enable_if_c::value>::type dispatch_gpu_stable_sort(buffer_iterator first, buffer_iterator last, greater, command_queue &queue) { // radix sort in ascending order ::boost::compute::detail::radix_sort(first, last, queue); // reverse range to descending order ::boost::compute::reverse(first, last, queue); } } // end detail namespace /// Sorts the values in the range [\p first, \p last) according to /// \p compare. The relative order of identical values is preserved. /// /// \see sort(), is_sorted() template inline void stable_sort(Iterator first, Iterator last, Compare compare, command_queue &queue = system::default_queue()) { if(queue.get_device().type() & device::gpu) { ::boost::compute::detail::dispatch_gpu_stable_sort( first, last, compare, queue ); } ::boost::compute::detail::merge_sort_on_cpu(first, last, compare, queue); } /// \overload template inline void stable_sort(Iterator first, Iterator last, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; ::boost::compute::less less; ::boost::compute::stable_sort(first, last, less, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_STABLE_SORT_HPP compute-0.5/include/boost/compute/algorithm/swap_ranges.hpp000066400000000000000000000030571263566244600243120ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_SWAP_RANGES_HPP #define BOOST_COMPUTE_ALGORITHM_SWAP_RANGES_HPP #include #include #include #include namespace boost { namespace compute { /// Swaps the elements in the range [\p first1, \p last1) with the /// elements in the range beginning at \p first2. template inline Iterator2 swap_ranges(Iterator1 first1, Iterator1 last1, Iterator2 first2, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; Iterator2 last2 = first2 + std::distance(first1, last1); ::boost::compute::vector tmp(first1, last1, queue); ::boost::compute::copy(first2, last2, first1, queue); ::boost::compute::copy(tmp.begin(), tmp.end(), first2, queue); return last2; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_SWAP_RANGES_HPP compute-0.5/include/boost/compute/algorithm/transform.hpp000066400000000000000000000052661263566244600240200ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_TRANSFORM_HPP #define BOOST_COMPUTE_ALGORITHM_TRANSFORM_HPP #include #include #include #include #include #include namespace boost { namespace compute { /// Transforms the elements in the range [\p first, \p last) using /// \p transform and stores the results in the range beginning at /// \p result. /// /// For example, to calculate the absolute value for each element in a vector: /// /// \snippet test/test_transform.cpp transform_abs /// /// \see copy() template inline OutputIterator transform(InputIterator first, InputIterator last, OutputIterator result, UnaryOperator op, command_queue &queue = system::default_queue()) { return copy( ::boost::compute::make_transform_iterator(first, op), ::boost::compute::make_transform_iterator(last, op), result, queue ); } /// \overload template inline OutputIterator transform(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result, BinaryOperator op, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::difference_type difference_type; difference_type n = std::distance(first1, last1); return transform( make_zip_iterator(boost::make_tuple(first1, first2)), make_zip_iterator(boost::make_tuple(last1, first2 + n)), result, detail::unpack(op), queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_TRANSFORM_HPP compute-0.5/include/boost/compute/algorithm/transform_if.hpp000066400000000000000000000105141263566244600244660ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_TRANSFORM_IF_HPP #define BOOST_COMPUTE_ALGORITHM_TRANSFORM_IF_HPP #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template inline OutputIterator transform_if_impl(InputIterator first, InputIterator last, OutputIterator result, UnaryFunction function, Predicate predicate, bool copyIndex, command_queue &queue) { typedef typename std::iterator_traits::difference_type difference_type; size_t count = detail::iterator_range_size(first, last); if(count == 0){ return result; } const context &context = queue.get_context(); // storage for destination indices ::boost::compute::vector indices(count, context); // write counts ::boost::compute::detail::meta_kernel k1("transform_if_write_counts"); k1 << indices.begin()[k1.get_global_id(0)] << " = " << predicate(first[k1.get_global_id(0)]) << " ? 1 : 0;\n"; k1.exec_1d(queue, 0, count); // count number of elements to be copied size_t copied_element_count = ::boost::compute::count(indices.begin(), indices.end(), 1, queue); // scan indices ::boost::compute::exclusive_scan( indices.begin(), indices.end(), indices.begin(), queue ); // copy values ::boost::compute::detail::meta_kernel k2("transform_if_do_copy"); k2 << "if(" << predicate(first[k2.get_global_id(0)]) << ")" << " " << result[indices.begin()[k2.get_global_id(0)]] << "="; if(copyIndex){ k2 << k2.get_global_id(0) << ";\n"; } else { k2 << function(first[k2.get_global_id(0)]) << ";\n"; } k2.exec_1d(queue, 0, count); return result + static_cast(copied_element_count); } template inline discard_iterator transform_if_impl(InputIterator first, InputIterator last, discard_iterator result, UnaryFunction function, Predicate predicate, bool copyIndex, command_queue &queue) { (void) function; (void) copyIndex; return result + count_if(first, last, predicate, queue); } } // end detail namespace /// Copies each element in the range [\p first, \p last) for which /// \p predicate returns \c true to the range beginning at \p result. template inline OutputIterator transform_if(InputIterator first, InputIterator last, OutputIterator result, UnaryFunction function, Predicate predicate, command_queue &queue = system::default_queue()) { return detail::transform_if_impl( first, last, result, function, predicate, false, queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_TRANSFORM_IF_HPP compute-0.5/include/boost/compute/algorithm/transform_reduce.hpp000066400000000000000000000062331263566244600253420ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_TRANSFORM_REDUCE_HPP #define BOOST_COMPUTE_ALGORITHM_TRANSFORM_REDUCE_HPP #include #include #include #include #include #include namespace boost { namespace compute { /// Transforms each value in the range [\p first, \p last) with the unary /// \p transform_function and then reduces each transformed value with /// \p reduce_function. /// /// For example, to calculate the sum of the absolute values of a vector /// of integers: /// /// \snippet test/test_transform_reduce.cpp sum_abs_int /// /// \see reduce(), inner_product() template inline void transform_reduce(InputIterator first, InputIterator last, OutputIterator result, UnaryTransformFunction transform_function, BinaryReduceFunction reduce_function, command_queue &queue = system::default_queue()) { ::boost::compute::reduce( ::boost::compute::make_transform_iterator(first, transform_function), ::boost::compute::make_transform_iterator(last, transform_function), result, reduce_function, queue ); } /// \overload template inline void transform_reduce(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result, BinaryTransformFunction transform_function, BinaryReduceFunction reduce_function, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::difference_type difference_type; difference_type n = std::distance(first1, last1); ::boost::compute::transform_reduce( ::boost::compute::make_zip_iterator( boost::make_tuple(first1, first2) ), ::boost::compute::make_zip_iterator( boost::make_tuple(last1, first2 + n) ), result, detail::unpack(transform_function), reduce_function, queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_TRANSFORM_REDUCE_HPP compute-0.5/include/boost/compute/algorithm/unique.hpp000066400000000000000000000043721263566244600233100ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_UNIQUE_HPP #define BOOST_COMPUTE_ALGORITHM_UNIQUE_HPP #include #include #include #include #include namespace boost { namespace compute { /// Removes all consecutive duplicate elements (determined by \p op) from the /// range [first, last). If \p op is not provided, the equality operator is /// used. /// /// \param first first element in the input range /// \param last last element in the input range /// \param op binary operator used to check for uniqueness /// \param queue command queue to perform the operation /// /// \return \c InputIterator to the new logical end of the range /// /// \see unique_copy() template inline InputIterator unique(InputIterator first, InputIterator last, BinaryPredicate op, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; vector temp(first, last, queue); return ::boost::compute::unique_copy( temp.begin(), temp.end(), first, op, queue ); } /// \overload template inline InputIterator unique(InputIterator first, InputIterator last, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; return ::boost::compute::unique( first, last, ::boost::compute::equal_to(), queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_UNIQUE_HPP compute-0.5/include/boost/compute/algorithm/unique_copy.hpp000066400000000000000000000136101263566244600243350ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_UNIQUE_COPY_HPP #define BOOST_COMPUTE_ALGORITHM_UNIQUE_COPY_HPP #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template inline OutputIterator serial_unique_copy(InputIterator first, InputIterator last, OutputIterator result, BinaryPredicate op, command_queue &queue) { if(first == last){ return result; } typedef typename std::iterator_traits::value_type value_type; const context &context = queue.get_context(); size_t count = detail::iterator_range_size(first, last); detail::meta_kernel k("serial_unique_copy"); vector unique_count_vector(1, context); size_t size_arg = k.add_arg("size"); size_t unique_count_arg = k.add_arg(memory_object::global_memory, "unique_count"); k << k.decl("index") << " = 0;\n" << k.decl("current") << " = " << first[k.var("0")] << ";\n" << result[k.var("0")] << " = current;\n" << "for(uint i = 1; i < size; i++){\n" << " " << k.decl("next") << " = " << first[k.var("i")] << ";\n" << " if(!" << op(k.var("current"), k.var("next")) << "){\n" << " " << result[k.var("++index")] << " = next;\n" << " " << "current = next;\n" << " }\n" << "}\n" << "*unique_count = index + 1;\n"; k.set_arg(size_arg, count); k.set_arg(unique_count_arg, unique_count_vector.get_buffer()); k.exec_1d(queue, 0, 1, 1); uint_ unique_count; copy_n(unique_count_vector.begin(), 1, &unique_count, queue); return result + unique_count; } template inline OutputIterator unique_copy(InputIterator first, InputIterator last, OutputIterator result, BinaryPredicate op, command_queue &queue) { if(first == last){ return result; } const context &context = queue.get_context(); size_t count = detail::iterator_range_size(first, last); // flags marking unique elements vector flags(count, context); // find each unique element and mark it with a one transform( first, last - 1, first + 1, flags.begin() + 1, not2(op), queue ); // first element is always unique fill_n(flags.begin(), 1, 1, queue); // storage for desination indices vector indices(count, context); // copy indices for each unique element vector::iterator last_index = detail::copy_index_if( flags.begin(), flags.end(), indices.begin(), lambda::_1 == 1, queue ); // copy unique values from input to output using the computed indices gather(indices.begin(), last_index, first, result, queue); // return an iterator to the end of the unique output range return result + std::distance(indices.begin(), last_index); } } // end detail namespace /// Makes a copy of the range [first, last) and removes all consecutive /// duplicate elements (determined by \p op) from the copy. If \p op is not /// provided, the equality operator is used. /// /// \param first first element in the input range /// \param last last element in the input range /// \param result first element in the result range /// \param op binary operator used to check for uniqueness /// \param queue command queue to perform the operation /// /// \return \c OutputIterator to the end of the result range /// /// \see unique() template inline OutputIterator unique_copy(InputIterator first, InputIterator last, OutputIterator result, BinaryPredicate op, command_queue &queue = system::default_queue()) { size_t count = detail::iterator_range_size(first, last); if(count < 32){ return detail::serial_unique_copy(first, last, result, op, queue); } else { return detail::unique_copy(first, last, result, op, queue); } } /// \overload template inline OutputIterator unique_copy(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; return ::boost::compute::unique_copy( first, last, result, ::boost::compute::equal_to(), queue ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_UNIQUE_COPY_HPP compute-0.5/include/boost/compute/algorithm/upper_bound.hpp000066400000000000000000000025221263566244600243170ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_UPPER_BOUND_HPP #define BOOST_COMPUTE_ALGORITHM_UPPER_BOUND_HPP #include #include #include #include namespace boost { namespace compute { /// Returns an iterator pointing to the first element in the sorted /// range [\p first, \p last) that is not less than or equal to /// \p value. template inline InputIterator upper_bound(InputIterator first, InputIterator last, const T &value, command_queue &queue = system::default_queue()) { using ::boost::compute::_1; InputIterator position = detail::binary_find(first, last, _1 > value, queue); return position; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_UPPER_BOUND_HPP compute-0.5/include/boost/compute/allocator.hpp000066400000000000000000000013541263566244600217710ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALLOCATOR_HPP #define BOOST_COMPUTE_ALLOCATOR_HPP /// \file /// /// Meta-header to include all Boost.Compute allocator headers. #include #include #endif // BOOST_COMPUTE_ALLOCATOR_HPP compute-0.5/include/boost/compute/allocator/000077500000000000000000000000001263566244600212555ustar00rootroot00000000000000compute-0.5/include/boost/compute/allocator/buffer_allocator.hpp000066400000000000000000000056561263566244600253130ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALLOCATOR_BUFFER_ALLOCATOR_HPP #define BOOST_COMPUTE_ALLOCATOR_BUFFER_ALLOCATOR_HPP #include #include #include #include namespace boost { namespace compute { /// \class buffer_allocator /// \brief The buffer_allocator class allocates memory with \ref buffer objects /// /// \see buffer template class buffer_allocator { public: typedef T value_type; typedef detail::device_ptr pointer; typedef const detail::device_ptr const_pointer; typedef std::size_t size_type; typedef std::ptrdiff_t difference_type; explicit buffer_allocator(const context &context) : m_context(context), m_mem_flags(buffer::read_write) { } buffer_allocator(const buffer_allocator &other) : m_context(other.m_context), m_mem_flags(other.m_mem_flags) { } buffer_allocator& operator=(const buffer_allocator &other) { if(this != &other){ m_context = other.m_context; m_mem_flags = other.m_mem_flags; } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES buffer_allocator(buffer_allocator&& other) BOOST_NOEXCEPT : m_context(std::move(other.m_context)), m_mem_flags(other.m_mem_flags) { } buffer_allocator& operator=(buffer_allocator&& other) BOOST_NOEXCEPT { m_context = std::move(other.m_context); m_mem_flags = other.m_mem_flags; return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES ~buffer_allocator() { } pointer allocate(size_type n) { buffer buf(m_context, n * sizeof(T), m_mem_flags); clRetainMemObject(buf.get()); return detail::device_ptr(buf); } void deallocate(pointer p, size_type n) { BOOST_ASSERT(p.get_buffer().get_context() == m_context); (void) n; clReleaseMemObject(p.get_buffer().get()); } size_type max_size() const { return m_context.get_device().max_memory_alloc_size() / sizeof(T); } context get_context() const { return m_context; } protected: void set_mem_flags(cl_mem_flags flags) { m_mem_flags = flags; } private: context m_context; cl_mem_flags m_mem_flags; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALLOCATOR_BUFFER_ALLOCATOR_HPP compute-0.5/include/boost/compute/allocator/pinned_allocator.hpp000066400000000000000000000025601263566244600253060ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALLOCATOR_PINNED_ALLOCATOR_HPP #define BOOST_COMPUTE_ALLOCATOR_PINNED_ALLOCATOR_HPP #include namespace boost { namespace compute { template class pinned_allocator : public buffer_allocator { public: explicit pinned_allocator(const context &context) : buffer_allocator(context) { buffer_allocator::set_mem_flags( buffer::read_write | buffer::alloc_host_ptr ); } pinned_allocator(const pinned_allocator &other) : buffer_allocator(other) { } pinned_allocator& operator=(const pinned_allocator &other) { if(this != &other){ buffer_allocator::operator=(other); } return *this; } ~pinned_allocator() { } }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALLOCATOR_PINNED_ALLOCATOR_HPP compute-0.5/include/boost/compute/async.hpp000066400000000000000000000012771263566244600211320ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ASYNC_HPP #define BOOST_COMPUTE_ASYNC_HPP /// \file /// /// Meta-header to include all Boost.Compute async headers. #include #include #endif // BOOST_COMPUTE_ASYNC_HPP compute-0.5/include/boost/compute/async/000077500000000000000000000000001263566244600204125ustar00rootroot00000000000000compute-0.5/include/boost/compute/async/future.hpp000066400000000000000000000055701263566244600224440ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ASYNC_FUTURE_HPP #define BOOST_COMPUTE_ASYNC_FUTURE_HPP #include namespace boost { namespace compute { /// \class future /// \brief Holds the result of an asynchronous computation. /// /// \see event, wait_list template class future { public: future() : m_event(0) { } future(const T &result, const event &event) : m_result(result), m_event(event) { } future(const future &other) : m_result(other.m_result), m_event(other.m_event) { } future& operator=(const future &other) { if(this != &other){ m_result = other.m_result; m_event = other.m_event; } return *this; } ~future() { } /// Returns the result of the computation. This will block until /// the result is ready. T get() { wait(); return m_result; } /// Returns \c true if the future is valid. bool valid() const { return m_event != 0; } /// Blocks until the computation is complete. void wait() const { m_event.wait(); } /// Returns the underlying event object. event get_event() const { return m_event; } private: T m_result; event m_event; }; /// \internal_ template<> class future { public: future() : m_event(0) { } template future(const future &other) : m_event(other.get_event()) { } explicit future(const event &event) : m_event(event) { } template future &operator=(const future &other) { m_event = other.get_event(); return *this; } future &operator=(const future &other) { if(this != &other){ m_event = other.m_event; } return *this; } ~future() { } void get() { wait(); } bool valid() const { return m_event != 0; } void wait() const { m_event.wait(); } event get_event() const { return m_event; } private: event m_event; }; /// \internal_ template inline future make_future(const Result &result, const event &event) { return future(result, event); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ASYNC_FUTURE_HPP compute-0.5/include/boost/compute/async/wait.hpp000066400000000000000000000031641263566244600220730ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ASYNC_WAIT_HPP #define BOOST_COMPUTE_ASYNC_WAIT_HPP #include #include namespace boost { namespace compute { namespace detail { #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES template inline void insert_events_variadic(wait_list &l, Event&& event) { l.insert(std::forward(event)); } template inline void insert_events_variadic(wait_list &l, Event&& event, Rest&&... rest) { l.insert(std::forward(event)); insert_events_variadic(l, std::forward(rest)...); } #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES } // end detail namespace #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES /// Blocks until all events have completed. Events can either be \ref event /// objects or \ref future "future" objects. /// /// \see event, wait_list template inline void wait_for_all(Events&&... events) { wait_list l; detail::insert_events_variadic(l, std::forward(events)...); l.wait(); } #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ASYNC_WAIT_HPP compute-0.5/include/boost/compute/async/wait_guard.hpp000066400000000000000000000035061263566244600232550ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ASYNC_WAIT_GUARD_HPP #define BOOST_COMPUTE_ASYNC_WAIT_GUARD_HPP #include namespace boost { namespace compute { /// \class wait_guard /// \brief A guard object for synchronizing an operation on the device /// /// The wait_guard class stores a waitable object representing an operation /// on a compute device (e.g. \ref event, \ref future "future") and calls /// its \c wait() method when the guard object goes out of scope. /// /// This is useful for ensuring that an OpenCL operation completes before /// leaving the current scope and cleaning up any resources. /// /// For example: /// \code /// // enqueue a compute kernel for execution /// event e = queue.enqueue_nd_range_kernel(...); /// /// // call e.wait() upon exiting the current scope /// wait_guard guard(e); /// \endcode /// /// \ref wait_list, wait_for_all() template class wait_guard : boost::noncopyable { public: /// Creates a new wait_guard object for \p waitable. wait_guard(const Waitable &waitable) : m_waitable(waitable) { } /// Destroys the wait_guard object. The default implementation will call /// \c wait() on the stored waitable object. ~wait_guard() { m_waitable.wait(); } private: Waitable m_waitable; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ASYNC_WAIT_GUARD_HPP compute-0.5/include/boost/compute/buffer.hpp000066400000000000000000000151071263566244600212630ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_BUFFER_HPP #define BOOST_COMPUTE_BUFFER_HPP #include #include #include #include #include namespace boost { namespace compute { // forward declarations class command_queue; /// \class buffer /// \brief A memory buffer on a compute device. /// /// The buffer class represents a memory buffer on a compute device. /// /// Buffers are allocated within a compute context. For example, to allocate /// a memory buffer for 32 float's: /// /// \snippet test/test_buffer.cpp constructor /// /// Once created, data can be copied to and from the buffer using the /// \c enqueue_*_buffer() methods in the command_queue class. For example, to /// copy a set of \c int values from the host to the device: /// \code /// int data[] = { 1, 2, 3, 4 }; /// /// queue.enqueue_write_buffer(buf, 0, 4 * sizeof(int), data); /// \endcode /// /// Also see the copy() algorithm for a higher-level interface to copying data /// between the host and the device. For a higher-level, dynamically-resizable, /// type-safe container for data on a compute device, use the vector class. /// /// Buffer objects have reference semantics. Creating a copy of a buffer /// object simply creates another reference to the underlying OpenCL memory /// object. To create an actual copy use the buffer::clone() method. /// /// \see context, command_queue class buffer : public memory_object { public: /// Creates a null buffer object. buffer() : memory_object() { } /// Creates a buffer object for \p mem. If \p retain is \c true, the /// reference count for \p mem will be incremented. explicit buffer(cl_mem mem, bool retain = true) : memory_object(mem, retain) { } /// Create a new memory buffer in of \p size with \p flags in /// \p context. /// /// \see_opencl_ref{clCreateBuffer} buffer(const context &context, size_t size, cl_mem_flags flags = read_write, void *host_ptr = 0) { cl_int error = 0; m_mem = clCreateBuffer(context, flags, (std::max)(size, size_t(1)), host_ptr, &error); if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// Creates a new buffer object as a copy of \p other. buffer(const buffer &other) : memory_object(other) { } /// Copies the buffer object from \p other to \c *this. buffer& operator=(const buffer &other) { if(this != &other){ memory_object::operator=(other); } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new buffer object from \p other. buffer(buffer&& other) BOOST_NOEXCEPT : memory_object(std::move(other)) { } /// Move-assigns the buffer from \p other to \c *this. buffer& operator=(buffer&& other) BOOST_NOEXCEPT { memory_object::operator=(std::move(other)); return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the buffer object. ~buffer() { } /// Returns the size of the buffer in bytes. size_t size() const { return get_memory_size(); } /// \internal_ size_t max_size() const { return get_context().get_device().max_memory_alloc_size(); } /// Returns information about the buffer. /// /// \see_opencl_ref{clGetMemObjectInfo} template T get_info(cl_mem_info info) const { return get_memory_info(info); } /// \overload template typename detail::get_object_info_type::type get_info() const; /// Creates a new buffer with a copy of the data in \c *this. Uses /// \p queue to perform the copy. buffer clone(command_queue &queue) const; #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Creates a new buffer out of this buffer. /// The new buffer is a sub region of this buffer. /// \p flags The mem_flags which should be used to create the new buffer /// \p origin The start index in this buffer /// \p size The size of the new sub buffer /// /// \see_opencl_ref{clCreateSubBuffer} /// /// \opencl_version_warning{1,1} buffer create_subbuffer(cl_mem_flags flags, size_t origin, size_t size) { BOOST_ASSERT(origin + size <= this->size()); BOOST_ASSERT(origin % (get_context(). get_device(). get_info() / 8) == 0); cl_int error = 0; cl_buffer_region region = { origin, size }; cl_mem mem = clCreateSubBuffer(m_mem, flags, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); if(!mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); } return buffer(mem, false); } #endif // CL_VERSION_1_1 }; /// \internal_ define get_info() specializations for buffer BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(buffer, ((cl_mem_object_type, CL_MEM_TYPE)) ((cl_mem_flags, CL_MEM_FLAGS)) ((size_t, CL_MEM_SIZE)) ((void *, CL_MEM_HOST_PTR)) ((cl_uint, CL_MEM_MAP_COUNT)) ((cl_uint, CL_MEM_REFERENCE_COUNT)) ((cl_context, CL_MEM_CONTEXT)) ) #ifdef CL_VERSION_1_1 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(buffer, ((cl_mem, CL_MEM_ASSOCIATED_MEMOBJECT)) ((size_t, CL_MEM_OFFSET)) ) #endif // CL_VERSION_1_1 namespace detail { // set_kernel_arg specialization for buffer template<> struct set_kernel_arg { void operator()(kernel &kernel_, size_t index, const buffer &buffer_) { kernel_.set_arg(index, buffer_.get()); } }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_BUFFER_HPP compute-0.5/include/boost/compute/cl.hpp000066400000000000000000000011411263566244600204010ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CL_HPP #define BOOST_COMPUTE_CL_HPP #if defined(__APPLE__) #include #else #include #endif #endif // BOOST_COMPUTE_CL_HPP compute-0.5/include/boost/compute/cl_ext.hpp000066400000000000000000000011651263566244600212670ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CL_EXT_HPP #define BOOST_COMPUTE_CL_EXT_HPP #if defined(__APPLE__) #include #else #include #endif #endif // BOOST_COMPUTE_CL_EXT_HPP compute-0.5/include/boost/compute/closure.hpp000066400000000000000000000235771263566244600215000ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CLOSURE_HPP #define BOOST_COMPUTE_CLOSURE_HPP #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template class invoked_closure { public: typedef ResultType result_type; BOOST_STATIC_CONSTANT( size_t, arity = boost::tuples::length::value ); invoked_closure(const std::string &name, const std::string &source, const std::map &definitions, const ArgTuple &args, const CaptureTuple &capture) : m_name(name), m_source(source), m_definitions(definitions), m_args(args), m_capture(capture) { } std::string name() const { return m_name; } std::string source() const { return m_source; } const std::map& definitions() const { return m_definitions; } const ArgTuple& args() const { return m_args; } const CaptureTuple& capture() const { return m_capture; } private: std::string m_name; std::string m_source; std::map m_definitions; ArgTuple m_args; CaptureTuple m_capture; }; } // end detail namespace /// \internal_ template class closure { public: typedef typename boost::function_traits::result_type result_type; BOOST_STATIC_CONSTANT( size_t, arity = boost::function_traits::arity ); closure(const std::string &name, const CaptureTuple &capture, const std::string &source) : m_name(name), m_source(source), m_capture(capture) { } ~closure() { } std::string name() const { return m_name; } /// \internal_ std::string source() const { return m_source; } /// \internal_ void define(std::string name, std::string value = std::string()) { m_definitions[name] = value; } /// \internal_ detail::invoked_closure, CaptureTuple> operator()() const { BOOST_STATIC_ASSERT_MSG( arity == 0, "Non-nullary closure function invoked with zero arguments" ); return detail::invoked_closure, CaptureTuple>( m_name, m_source, m_definitions, boost::make_tuple(), m_capture ); } /// \internal_ template detail::invoked_closure, CaptureTuple> operator()(const Arg1 &arg1) const { BOOST_STATIC_ASSERT_MSG( arity == 1, "Non-unary closure function invoked with one argument" ); return detail::invoked_closure, CaptureTuple>( m_name, m_source, m_definitions, boost::make_tuple(arg1), m_capture ); } /// \internal_ template detail::invoked_closure, CaptureTuple> operator()(const Arg1 &arg1, const Arg2 &arg2) const { BOOST_STATIC_ASSERT_MSG( arity == 2, "Non-binary closure function invoked with two arguments" ); return detail::invoked_closure, CaptureTuple>( m_name, m_source, m_definitions, boost::make_tuple(arg1, arg2), m_capture ); } /// \internal_ template detail::invoked_closure, CaptureTuple> operator()(const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) const { BOOST_STATIC_ASSERT_MSG( arity == 3, "Non-ternary closure function invoked with three arguments" ); return detail::invoked_closure, CaptureTuple>( m_name, m_source, m_definitions, boost::make_tuple(arg1, arg2, arg3), m_capture ); } private: std::string m_name; std::string m_source; std::map m_definitions; CaptureTuple m_capture; }; namespace detail { struct closure_signature_argument_inserter { closure_signature_argument_inserter(std::stringstream &s_, const char *capture_string, size_t last) : s(s_) { n = 0; m_last = last; size_t capture_string_length = std::strlen(capture_string); BOOST_ASSERT(capture_string[0] == '(' && capture_string[capture_string_length-1] == ')'); std::string capture_string_(capture_string + 1, capture_string_length - 2); boost::split(m_capture_names, capture_string_ , boost::is_any_of(",")); } template void operator()(const T&) const { BOOST_ASSERT(n < m_capture_names.size()); // get captured variable name std::string variable_name = m_capture_names[n]; // remove leading and trailing whitespace from variable name boost::trim(variable_name); s << capture_traits::type_name() << " " << variable_name; if(n+1 < m_last){ s << ", "; } n++; } mutable size_t n; size_t m_last; std::vector m_capture_names; std::stringstream &s; }; template inline std::string make_closure_declaration(const char *name, const char *arguments, const CaptureTuple &capture_tuple, const char *capture_string) { typedef typename boost::function_traits::result_type result_type; typedef typename boost::function_types::parameter_types::type parameter_types; typedef typename mpl::size::type arity_type; std::stringstream s; s << "inline " << type_name() << " " << name; s << "("; // insert function arguments signature_argument_inserter i(s, arguments, arity_type::value); mpl::for_each< typename mpl::transform >::type>(i); s << ", "; // insert capture arguments closure_signature_argument_inserter j( s, capture_string, boost::tuples::length::value ); fusion::for_each(capture_tuple, j); s << ")"; return s.str(); } // used by the BOOST_COMPUTE_CLOSURE() macro to create a closure // function with the given signature, name, capture, and source. template inline closure make_closure_impl(const char *name, const char *arguments, const CaptureTuple &capture, const char *capture_string, const std::string &source) { std::stringstream s; s << make_closure_declaration(name, arguments, capture, capture_string); s << source; return closure(name, capture, s.str()); } } // end detail namespace } // end compute namespace } // end boost namespace /// Creates a closure function object with \p name and \p source. /// /// \param return_type The return type for the function. /// \param name The name of the function. /// \param arguments A list of arguments for the function. /// \param capture A list of variables to capture. /// \param source The OpenCL C source code for the function. /// /// For example, to create a function which checks if a 2D point is /// contained in a circle of a given radius: /// \code /// // radius variable declared in C++ /// float radius = 1.5f; /// /// // create a closure function which returns true if the 2D point /// // argument is contained within a circle of the given radius /// BOOST_COMPUTE_CLOSURE(bool, is_in_circle, (const float2_ p), (radius), /// { /// return sqrt(p.x*p.x + p.y*p.y) < radius; /// }); /// /// // vector of 2D points /// boost::compute::vector points = ... /// /// // count number of points in the circle /// size_t count = boost::compute::count_if( /// points.begin(), points.end(), is_in_circle, queue /// ); /// \endcode /// /// \see BOOST_COMPUTE_FUNCTION() #ifdef BOOST_COMPUTE_DOXYGEN_INVOKED #define BOOST_COMPUTE_CLOSURE(return_type, name, arguments, capture, source) #else #define BOOST_COMPUTE_CLOSURE(return_type, name, arguments, capture, ...) \ ::boost::compute::closure< \ return_type arguments, BOOST_TYPEOF(boost::tie capture) \ > name = \ ::boost::compute::detail::make_closure_impl< \ return_type arguments \ >( \ #name, #arguments, boost::tie capture, #capture, #__VA_ARGS__ \ ) #endif #endif // BOOST_COMPUTE_CLOSURE_HPP compute-0.5/include/boost/compute/command_queue.hpp000066400000000000000000001572751263566244600226510ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_COMMAND_QUEUE_HPP #define BOOST_COMPUTE_COMMAND_QUEUE_HPP #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { inline void BOOST_COMPUTE_CL_CALLBACK nullary_native_kernel_trampoline(void *user_func_ptr) { void (*user_func)(); std::memcpy(&user_func, user_func_ptr, sizeof(user_func)); user_func(); } } // end detail namespace /// \class command_queue /// \brief A command queue. /// /// Command queues provide the interface for interacting with compute /// devices. The command_queue class provides methods to copy data to /// and from a compute device as well as execute compute kernels. /// /// Command queues are created for a compute device within a compute /// context. /// /// For example, to create a context and command queue for the default device /// on the system (this is the normal set up code used by almost all OpenCL /// programs): /// \code /// #include /// /// // get the default compute device /// boost::compute::device device = boost::compute::system::default_device(); /// /// // set up a compute context and command queue /// boost::compute::context context(device); /// boost::compute::command_queue queue(context, device); /// \endcode /// /// The default command queue for the system can be obtained with the /// system::default_queue() method. /// /// \see buffer, context, kernel class command_queue { public: enum properties { enable_profiling = CL_QUEUE_PROFILING_ENABLE, enable_out_of_order_execution = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE }; enum map_flags { map_read = CL_MAP_READ, map_write = CL_MAP_WRITE #ifdef CL_VERSION_1_2 , map_write_invalidate_region = CL_MAP_WRITE_INVALIDATE_REGION #endif }; /// Creates a null command queue. command_queue() : m_queue(0) { } explicit command_queue(cl_command_queue queue, bool retain = true) : m_queue(queue) { if(m_queue && retain){ clRetainCommandQueue(m_queue); } } /// Creates a command queue in \p context for \p device with /// \p properties. /// /// \see_opencl_ref{clCreateCommandQueue} command_queue(const context &context, const device &device, cl_command_queue_properties properties = 0) { BOOST_ASSERT(device.id() != 0); cl_int error = 0; #ifdef CL_VERSION_2_0 if (device.check_version(2, 0)){ std::vector queue_properties; if(properties){ queue_properties.push_back(CL_QUEUE_PROPERTIES); queue_properties.push_back(cl_queue_properties(properties)); queue_properties.push_back(cl_queue_properties(0)); } const cl_queue_properties *queue_properties_ptr = queue_properties.empty() ? 0 : &queue_properties[0]; m_queue = clCreateCommandQueueWithProperties( context, device.id(), queue_properties_ptr, &error ); } else #endif { m_queue = clCreateCommandQueue( context, device.id(), properties, &error ); } if(!m_queue){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// Creates a new command queue object as a copy of \p other. command_queue(const command_queue &other) : m_queue(other.m_queue) { if(m_queue){ clRetainCommandQueue(m_queue); } } /// Copies the command queue object from \p other to \c *this. command_queue& operator=(const command_queue &other) { if(this != &other){ if(m_queue){ clReleaseCommandQueue(m_queue); } m_queue = other.m_queue; if(m_queue){ clRetainCommandQueue(m_queue); } } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new command queue object from \p other. command_queue(command_queue&& other) BOOST_NOEXCEPT : m_queue(other.m_queue) { other.m_queue = 0; } /// Move-assigns the command queue from \p other to \c *this. command_queue& operator=(command_queue&& other) BOOST_NOEXCEPT { if(m_queue){ clReleaseCommandQueue(m_queue); } m_queue = other.m_queue; other.m_queue = 0; return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the command queue. /// /// \see_opencl_ref{clReleaseCommandQueue} ~command_queue() { if(m_queue){ BOOST_COMPUTE_ASSERT_CL_SUCCESS( clReleaseCommandQueue(m_queue) ); } } /// Returns the underlying OpenCL command queue. cl_command_queue& get() const { return const_cast(m_queue); } /// Returns the device that the command queue issues commands to. device get_device() const { return device(get_info(CL_QUEUE_DEVICE)); } /// Returns the context for the command queue. context get_context() const { return context(get_info(CL_QUEUE_CONTEXT)); } /// Returns information about the command queue. /// /// \see_opencl_ref{clGetCommandQueueInfo} template T get_info(cl_command_queue_info info) const { return detail::get_object_info(clGetCommandQueueInfo, m_queue, info); } /// \overload template typename detail::get_object_info_type::type get_info() const; /// Returns the properties for the command queue. cl_command_queue_properties get_properties() const { return get_info(CL_QUEUE_PROPERTIES); } /// Enqueues a command to read data from \p buffer to host memory. /// /// \see_opencl_ref{clEnqueueReadBuffer} /// /// \see copy() event enqueue_read_buffer(const buffer &buffer, size_t offset, size_t size, void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); event event_; cl_int ret = clEnqueueReadBuffer( m_queue, buffer.get(), CL_TRUE, offset, size, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to read data from \p buffer to host memory. The /// copy will be performed asynchronously. /// /// \see_opencl_ref{clEnqueueReadBuffer} /// /// \see copy_async() event enqueue_read_buffer_async(const buffer &buffer, size_t offset, size_t size, void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); event event_; cl_int ret = clEnqueueReadBuffer( m_queue, buffer.get(), CL_FALSE, offset, size, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to read a rectangular region from \p buffer to /// host memory. /// /// \see_opencl_ref{clEnqueueReadBufferRect} /// /// \opencl_version_warning{1,1} event enqueue_read_buffer_rect(const buffer &buffer, const size_t buffer_origin[3], const size_t host_origin[3], const size_t region[3], size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); event event_; cl_int ret = clEnqueueReadBufferRect( m_queue, buffer.get(), CL_TRUE, buffer_origin, host_origin, region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to read a rectangular region from \p buffer to /// host memory. The copy will be performed asynchronously. /// /// \see_opencl_ref{clEnqueueReadBufferRect} /// /// \opencl_version_warning{1,1} event enqueue_read_buffer_rect_async(const buffer &buffer, const size_t buffer_origin[3], const size_t host_origin[3], const size_t region[3], size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); event event_; cl_int ret = clEnqueueReadBufferRect( m_queue, buffer.get(), CL_FALSE, buffer_origin, host_origin, region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #endif // CL_VERSION_1_1 /// Enqueues a command to write data from host memory to \p buffer. /// /// \see_opencl_ref{clEnqueueWriteBuffer} /// /// \see copy() event enqueue_write_buffer(const buffer &buffer, size_t offset, size_t size, const void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); event event_; cl_int ret = clEnqueueWriteBuffer( m_queue, buffer.get(), CL_TRUE, offset, size, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to write data from host memory to \p buffer. /// The copy is performed asynchronously. /// /// \see_opencl_ref{clEnqueueWriteBuffer} /// /// \see copy_async() event enqueue_write_buffer_async(const buffer &buffer, size_t offset, size_t size, const void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); event event_; cl_int ret = clEnqueueWriteBuffer( m_queue, buffer.get(), CL_FALSE, offset, size, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to write a rectangular region from host memory /// to \p buffer. /// /// \see_opencl_ref{clEnqueueWriteBufferRect} /// /// \opencl_version_warning{1,1} event enqueue_write_buffer_rect(const buffer &buffer, const size_t buffer_origin[3], const size_t host_origin[3], const size_t region[3], size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); event event_; cl_int ret = clEnqueueWriteBufferRect( m_queue, buffer.get(), CL_TRUE, buffer_origin, host_origin, region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to write a rectangular region from host memory /// to \p buffer. The copy is performed asynchronously. /// /// \see_opencl_ref{clEnqueueWriteBufferRect} /// /// \opencl_version_warning{1,1} event enqueue_write_buffer_rect_async(const buffer &buffer, const size_t buffer_origin[3], const size_t host_origin[3], const size_t region[3], size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); event event_; cl_int ret = clEnqueueWriteBufferRect( m_queue, buffer.get(), CL_FALSE, buffer_origin, host_origin, region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #endif // CL_VERSION_1_1 /// Enqueues a command to copy data from \p src_buffer to /// \p dst_buffer. /// /// \see_opencl_ref{clEnqueueCopyBuffer} /// /// \see copy() event enqueue_copy_buffer(const buffer &src_buffer, const buffer &dst_buffer, size_t src_offset, size_t dst_offset, size_t size, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(src_offset + size <= src_buffer.size()); BOOST_ASSERT(dst_offset + size <= dst_buffer.size()); BOOST_ASSERT(src_buffer.get_context() == this->get_context()); BOOST_ASSERT(dst_buffer.get_context() == this->get_context()); event event_; cl_int ret = clEnqueueCopyBuffer( m_queue, src_buffer.get(), dst_buffer.get(), src_offset, dst_offset, size, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to copy a rectangular region from /// \p src_buffer to \p dst_buffer. /// /// \see_opencl_ref{clEnqueueCopyBufferRect} /// /// \opencl_version_warning{1,1} event enqueue_copy_buffer_rect(const buffer &src_buffer, const buffer &dst_buffer, const size_t src_origin[3], const size_t dst_origin[3], const size_t region[3], size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(src_buffer.get_context() == this->get_context()); BOOST_ASSERT(dst_buffer.get_context() == this->get_context()); event event_; cl_int ret = clEnqueueCopyBufferRect( m_queue, src_buffer.get(), dst_buffer.get(), src_origin, dst_origin, region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #endif // CL_VERSION_1_1 #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to fill \p buffer with \p pattern. /// /// \see_opencl_ref{clEnqueueFillBuffer} /// /// \opencl_version_warning{1,2} /// /// \see fill() event enqueue_fill_buffer(const buffer &buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(offset + size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); event event_; cl_int ret = clEnqueueFillBuffer( m_queue, buffer.get(), pattern, pattern_size, offset, size, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #endif // CL_VERSION_1_2 /// Enqueues a command to map \p buffer into the host address space. /// Event associated with map operation is returned through /// \p map_buffer_event parameter. /// /// \see_opencl_ref{clEnqueueMapBuffer} void* enqueue_map_buffer(const buffer &buffer, cl_map_flags flags, size_t offset, size_t size, event &map_buffer_event, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(offset + size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); cl_int ret = 0; void *pointer = clEnqueueMapBuffer( m_queue, buffer.get(), CL_TRUE, flags, offset, size, events.size(), events.get_event_ptr(), &map_buffer_event.get(), &ret ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return pointer; } /// \overload void* enqueue_map_buffer(const buffer &buffer, cl_map_flags flags, size_t offset, size_t size, const wait_list &events = wait_list()) { event event_; return enqueue_map_buffer(buffer, flags, offset, size, event_, events); } /// Enqueues a command to map \p buffer into the host address space. /// Map operation is performed asynchronously. The pointer to the mapped /// region cannot be used until the map operation has completed. /// /// Event associated with map operation is returned through /// \p map_buffer_event parameter. /// /// \see_opencl_ref{clEnqueueMapBuffer} void* enqueue_map_buffer_async(const buffer &buffer, cl_map_flags flags, size_t offset, size_t size, event &map_buffer_event, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(offset + size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); cl_int ret = 0; void *pointer = clEnqueueMapBuffer( m_queue, buffer.get(), CL_FALSE, flags, offset, size, events.size(), events.get_event_ptr(), &map_buffer_event.get(), &ret ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return pointer; } /// Enqueues a command to unmap \p buffer from the host memory space. /// /// \see_opencl_ref{clEnqueueUnmapMemObject} event enqueue_unmap_buffer(const buffer &buffer, void *mapped_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(buffer.get_context() == this->get_context()); return enqueue_unmap_mem_object(buffer.get(), mapped_ptr, events); } /// Enqueues a command to unmap \p mem from the host memory space. /// /// \see_opencl_ref{clEnqueueUnmapMemObject} event enqueue_unmap_mem_object(cl_mem mem, void *mapped_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueUnmapMemObject( m_queue, mem, mapped_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to read data from \p image to host memory. /// /// \see_opencl_ref{clEnqueueReadImage} event enqueue_read_image(const image_object& image, const size_t *origin, const size_t *region, size_t row_pitch, size_t slice_pitch, void *host_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueReadImage( m_queue, image.get(), CL_TRUE, origin, region, row_pitch, slice_pitch, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// \overload template event enqueue_read_image(const image_object& image, const extents origin, const extents region, void *host_ptr, size_t row_pitch = 0, size_t slice_pitch = 0, const wait_list &events = wait_list()) { BOOST_ASSERT(image.get_context() == this->get_context()); size_t origin3[3] = { 0, 0, 0 }; size_t region3[3] = { 1, 1, 1 }; std::copy(origin.data(), origin.data() + N, origin3); std::copy(region.data(), region.data() + N, region3); return enqueue_read_image( image, origin3, region3, row_pitch, slice_pitch, host_ptr, events ); } /// Enqueues a command to write data from host memory to \p image. /// /// \see_opencl_ref{clEnqueueWriteImage} event enqueue_write_image(image_object& image, const size_t *origin, const size_t *region, const void *host_ptr, size_t input_row_pitch = 0, size_t input_slice_pitch = 0, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueWriteImage( m_queue, image.get(), CL_TRUE, origin, region, input_row_pitch, input_slice_pitch, host_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// \overload template event enqueue_write_image(image_object& image, const extents origin, const extents region, const void *host_ptr, const size_t input_row_pitch = 0, const size_t input_slice_pitch = 0, const wait_list &events = wait_list()) { BOOST_ASSERT(image.get_context() == this->get_context()); size_t origin3[3] = { 0, 0, 0 }; size_t region3[3] = { 1, 1, 1 }; std::copy(origin.data(), origin.data() + N, origin3); std::copy(region.data(), region.data() + N, region3); return enqueue_write_image( image, origin3, region3, host_ptr, input_row_pitch, input_slice_pitch, events ); } /// Enqueues a command to map \p image into the host address space. /// /// Event associated with map operation is returned through /// \p map_image_event parameter. /// /// \see_opencl_ref{clEnqueueMapImage} void* enqueue_map_image(const image_object &image, cl_map_flags flags, const size_t *origin, const size_t *region, size_t &output_row_pitch, size_t &output_slice_pitch, event &map_image_event, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(image.get_context() == this->get_context()); cl_int ret = 0; void *pointer = clEnqueueMapImage( m_queue, image.get(), CL_TRUE, flags, origin, region, &output_row_pitch, &output_slice_pitch, events.size(), events.get_event_ptr(), &map_image_event.get(), &ret ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return pointer; } /// \overload void* enqueue_map_image(const image_object &image, cl_map_flags flags, const size_t *origin, const size_t *region, size_t &output_row_pitch, size_t &output_slice_pitch, const wait_list &events = wait_list()) { event event_; return enqueue_map_image( image, flags, origin, region, output_row_pitch, output_slice_pitch, event_, events ); } /// \overload template void* enqueue_map_image(image_object& image, cl_map_flags flags, const extents origin, const extents region, size_t &output_row_pitch, size_t &output_slice_pitch, event &map_image_event, const wait_list &events = wait_list()) { BOOST_ASSERT(image.get_context() == this->get_context()); size_t origin3[3] = { 0, 0, 0 }; size_t region3[3] = { 1, 1, 1 }; std::copy(origin.data(), origin.data() + N, origin3); std::copy(region.data(), region.data() + N, region3); return enqueue_map_image( image, flags, origin3, region3, output_row_pitch, output_slice_pitch, map_image_event, events ); } /// \overload template void* enqueue_map_image(image_object& image, cl_map_flags flags, const extents origin, const extents region, size_t &output_row_pitch, size_t &output_slice_pitch, const wait_list &events = wait_list()) { event event_; return enqueue_map_image( image, flags, origin, region, output_row_pitch, output_slice_pitch, event_, events ); } /// Enqueues a command to map \p image into the host address space. /// Map operation is performed asynchronously. The pointer to the mapped /// region cannot be used until the map operation has completed. /// /// Event associated with map operation is returned through /// \p map_image_event parameter. /// /// \see_opencl_ref{clEnqueueMapImage} void* enqueue_map_image_async(const image_object &image, cl_map_flags flags, const size_t *origin, const size_t *region, size_t &output_row_pitch, size_t &output_slice_pitch, event &map_image_event, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(image.get_context() == this->get_context()); cl_int ret = 0; void *pointer = clEnqueueMapImage( m_queue, image.get(), CL_FALSE, flags, origin, region, &output_row_pitch, &output_slice_pitch, events.size(), events.get_event_ptr(), &map_image_event.get(), &ret ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return pointer; } /// \overload template void* enqueue_map_image_async(image_object& image, cl_map_flags flags, const extents origin, const extents region, size_t &output_row_pitch, size_t &output_slice_pitch, event &map_image_event, const wait_list &events = wait_list()) { BOOST_ASSERT(image.get_context() == this->get_context()); size_t origin3[3] = { 0, 0, 0 }; size_t region3[3] = { 1, 1, 1 }; std::copy(origin.data(), origin.data() + N, origin3); std::copy(region.data(), region.data() + N, region3); return enqueue_map_image_async( image, flags, origin3, region3, output_row_pitch, output_slice_pitch, map_image_event, events ); } /// Enqueues a command to unmap \p image from the host memory space. /// /// \see_opencl_ref{clEnqueueUnmapMemObject} event enqueue_unmap_image(const image_object &image, void *mapped_ptr, const wait_list &events = wait_list()) { BOOST_ASSERT(image.get_context() == this->get_context()); return enqueue_unmap_mem_object(image.get(), mapped_ptr, events); } /// Enqueues a command to copy data from \p src_image to \p dst_image. /// /// \see_opencl_ref{clEnqueueCopyImage} event enqueue_copy_image(const image_object& src_image, image_object& dst_image, const size_t *src_origin, const size_t *dst_origin, const size_t *region, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueCopyImage( m_queue, src_image.get(), dst_image.get(), src_origin, dst_origin, region, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// \overload template event enqueue_copy_image(const image_object& src_image, image_object& dst_image, const extents src_origin, const extents dst_origin, const extents region, const wait_list &events = wait_list()) { BOOST_ASSERT(src_image.get_context() == this->get_context()); BOOST_ASSERT(dst_image.get_context() == this->get_context()); BOOST_ASSERT_MSG(src_image.format() == dst_image.format(), "Source and destination image formats must match."); size_t src_origin3[3] = { 0, 0, 0 }; size_t dst_origin3[3] = { 0, 0, 0 }; size_t region3[3] = { 1, 1, 1 }; std::copy(src_origin.data(), src_origin.data() + N, src_origin3); std::copy(dst_origin.data(), dst_origin.data() + N, dst_origin3); std::copy(region.data(), region.data() + N, region3); return enqueue_copy_image( src_image, dst_image, src_origin3, dst_origin3, region3, events ); } /// Enqueues a command to copy data from \p src_image to \p dst_buffer. /// /// \see_opencl_ref{clEnqueueCopyImageToBuffer} event enqueue_copy_image_to_buffer(const image_object& src_image, memory_object& dst_buffer, const size_t *src_origin, const size_t *region, size_t dst_offset, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueCopyImageToBuffer( m_queue, src_image.get(), dst_buffer.get(), src_origin, region, dst_offset, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to copy data from \p src_buffer to \p dst_image. /// /// \see_opencl_ref{clEnqueueCopyBufferToImage} event enqueue_copy_buffer_to_image(const memory_object& src_buffer, image_object& dst_image, size_t src_offset, const size_t *dst_origin, const size_t *region, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueCopyBufferToImage( m_queue, src_buffer.get(), dst_image.get(), src_offset, dst_origin, region, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to fill \p image with \p fill_color. /// /// \see_opencl_ref{clEnqueueFillImage} /// /// \opencl_version_warning{1,2} event enqueue_fill_image(image_object& image, const void *fill_color, const size_t *origin, const size_t *region, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueFillImage( m_queue, image.get(), fill_color, origin, region, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// \overload template event enqueue_fill_image(image_object& image, const void *fill_color, const extents origin, const extents region, const wait_list &events = wait_list()) { BOOST_ASSERT(image.get_context() == this->get_context()); size_t origin3[3] = { 0, 0, 0 }; size_t region3[3] = { 1, 1, 1 }; std::copy(origin.data(), origin.data() + N, origin3); std::copy(region.data(), region.data() + N, region3); return enqueue_fill_image( image, fill_color, origin3, region3, events ); } /// Enqueues a command to migrate \p mem_objects. /// /// \see_opencl_ref{clEnqueueMigrateMemObjects} /// /// \opencl_version_warning{1,2} event enqueue_migrate_memory_objects(uint_ num_mem_objects, const cl_mem *mem_objects, cl_mem_migration_flags flags, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueMigrateMemObjects( m_queue, num_mem_objects, mem_objects, flags, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #endif // CL_VERSION_1_2 /// Enqueues a kernel for execution. /// /// \see_opencl_ref{clEnqueueNDRangeKernel} event enqueue_nd_range_kernel(const kernel &kernel, size_t work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(kernel.get_context() == this->get_context()); event event_; cl_int ret = clEnqueueNDRangeKernel( m_queue, kernel, static_cast(work_dim), global_work_offset, global_work_size, local_work_size, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// \overload template event enqueue_nd_range_kernel(const kernel &kernel, const extents &global_work_offset, const extents &global_work_size, const extents &local_work_size, const wait_list &events = wait_list()) { return enqueue_nd_range_kernel( kernel, N, global_work_offset.data(), global_work_size.data(), local_work_size.data(), events ); } /// Convenience method which calls enqueue_nd_range_kernel() with a /// one-dimensional range. event enqueue_1d_range_kernel(const kernel &kernel, size_t global_work_offset, size_t global_work_size, size_t local_work_size, const wait_list &events = wait_list()) { return enqueue_nd_range_kernel( kernel, 1, &global_work_offset, &global_work_size, local_work_size ? &local_work_size : 0, events ); } /// Enqueues a kernel to execute using a single work-item. /// /// \see_opencl_ref{clEnqueueTask} event enqueue_task(const kernel &kernel, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(kernel.get_context() == this->get_context()); event event_; // clEnqueueTask() was deprecated in OpenCL 2.0. In that case we // just forward to the equivalent clEnqueueNDRangeKernel() call. #ifdef CL_VERSION_2_0 size_t one = 1; cl_int ret = clEnqueueNDRangeKernel( m_queue, kernel, 1, 0, &one, &one, events.size(), events.get_event_ptr(), &event_.get() ); #else cl_int ret = clEnqueueTask( m_queue, kernel, events.size(), events.get_event_ptr(), &event_.get() ); #endif if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a function to execute on the host. event enqueue_native_kernel(void (BOOST_COMPUTE_CL_CALLBACK *user_func)(void *), void *args, size_t cb_args, uint_ num_mem_objects, const cl_mem *mem_list, const void **args_mem_loc, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueNativeKernel( m_queue, user_func, args, cb_args, num_mem_objects, mem_list, args_mem_loc, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Convenience overload for enqueue_native_kernel() which enqueues a /// native kernel on the host with a nullary function. event enqueue_native_kernel(void (BOOST_COMPUTE_CL_CALLBACK *user_func)(void), const wait_list &events = wait_list()) { return enqueue_native_kernel( detail::nullary_native_kernel_trampoline, reinterpret_cast(&user_func), sizeof(user_func), 0, 0, 0, events ); } /// Flushes the command queue. /// /// \see_opencl_ref{clFlush} void flush() { BOOST_ASSERT(m_queue != 0); clFlush(m_queue); } /// Blocks until all outstanding commands in the queue have finished. /// /// \see_opencl_ref{clFinish} void finish() { BOOST_ASSERT(m_queue != 0); clFinish(m_queue); } /// Enqueues a barrier in the queue. void enqueue_barrier() { BOOST_ASSERT(m_queue != 0); cl_int ret = CL_SUCCESS; #ifdef CL_VERSION_1_2 if(get_device().check_version(1, 2)){ ret = clEnqueueBarrierWithWaitList(m_queue, 0, 0, 0); } else #endif // CL_VERSION_1_2 { ret = clEnqueueBarrier(m_queue); } if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } } #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a barrier in the queue after \p events. /// /// \opencl_version_warning{1,2} event enqueue_barrier(const wait_list &events) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = CL_SUCCESS; ret = clEnqueueBarrierWithWaitList( m_queue, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #endif // CL_VERSION_1_2 /// Enqueues a marker in the queue and returns an event that can be /// used to track its progress. event enqueue_marker() { event event_; cl_int ret = CL_SUCCESS; #ifdef CL_VERSION_1_2 if(get_device().check_version(1, 2)){ ret = clEnqueueMarkerWithWaitList(m_queue, 0, 0, &event_.get()); } else #endif { ret = clEnqueueMarker(m_queue, &event_.get()); } if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a marker after \p events in the queue and returns an /// event that can be used to track its progress. /// /// \opencl_version_warning{1,2} event enqueue_marker(const wait_list &events) { event event_; cl_int ret = clEnqueueMarkerWithWaitList( m_queue, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #endif // CL_VERSION_1_2 #if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to copy \p size bytes of data from \p src_ptr to /// \p dst_ptr. /// /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clEnqueueSVMMemcpy} event enqueue_svm_memcpy(void *dst_ptr, const void *src_ptr, size_t size, const wait_list &events = wait_list()) { event event_; cl_int ret = clEnqueueSVMMemcpy( m_queue, CL_TRUE, dst_ptr, src_ptr, size, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to copy \p size bytes of data from \p src_ptr to /// \p dst_ptr. The operation is performed asynchronously. /// /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clEnqueueSVMMemcpy} event enqueue_svm_memcpy_async(void *dst_ptr, const void *src_ptr, size_t size, const wait_list &events = wait_list()) { event event_; cl_int ret = clEnqueueSVMMemcpy( m_queue, CL_FALSE, dst_ptr, src_ptr, size, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to fill \p size bytes of data at \p svm_ptr with /// \p pattern. /// /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clEnqueueSVMMemFill} event enqueue_svm_fill(void *svm_ptr, const void *pattern, size_t pattern_size, size_t size, const wait_list &events = wait_list()) { event event_; cl_int ret = clEnqueueSVMMemFill( m_queue, svm_ptr, pattern, pattern_size, size, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to free \p svm_ptr. /// /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clEnqueueSVMFree} /// /// \see svm_free() event enqueue_svm_free(void *svm_ptr, const wait_list &events = wait_list()) { event event_; cl_int ret = clEnqueueSVMFree( m_queue, 1, &svm_ptr, 0, 0, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to map \p svm_ptr to the host memory space. /// /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clEnqueueSVMMap} event enqueue_svm_map(void *svm_ptr, size_t size, cl_map_flags flags, const wait_list &events = wait_list()) { event event_; cl_int ret = clEnqueueSVMMap( m_queue, CL_TRUE, flags, svm_ptr, size, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to unmap \p svm_ptr from the host memory space. /// /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clEnqueueSVMUnmap} event enqueue_svm_unmap(void *svm_ptr, const wait_list &events = wait_list()) { event event_; cl_int ret = clEnqueueSVMUnmap( m_queue, svm_ptr, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } #endif // CL_VERSION_2_0 /// Returns \c true if the command queue is the same at \p other. bool operator==(const command_queue &other) const { return m_queue == other.m_queue; } /// Returns \c true if the command queue is different from \p other. bool operator!=(const command_queue &other) const { return m_queue != other.m_queue; } /// \internal_ operator cl_command_queue() const { return m_queue; } /// \internal_ bool check_device_version(int major, int minor) const { return get_device().check_version(major, minor); } private: cl_command_queue m_queue; }; inline buffer buffer::clone(command_queue &queue) const { buffer copy(get_context(), size(), get_memory_flags()); queue.enqueue_copy_buffer(*this, copy, 0, 0, size()); return copy; } inline image1d image1d::clone(command_queue &queue) const { image1d copy( get_context(), width(), format(), get_memory_flags() ); queue.enqueue_copy_image(*this, copy, origin(), copy.origin(), size()); return copy; } inline image2d image2d::clone(command_queue &queue) const { image2d copy( get_context(), width(), height(), format(), get_memory_flags() ); queue.enqueue_copy_image(*this, copy, origin(), copy.origin(), size()); return copy; } inline image3d image3d::clone(command_queue &queue) const { image3d copy( get_context(), width(), height(), depth(), format(), get_memory_flags() ); queue.enqueue_copy_image(*this, copy, origin(), copy.origin(), size()); return copy; } /// \internal_ define get_info() specializations for command_queue BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(command_queue, ((cl_context, CL_QUEUE_CONTEXT)) ((cl_device_id, CL_QUEUE_DEVICE)) ((uint_, CL_QUEUE_REFERENCE_COUNT)) ((cl_command_queue_properties, CL_QUEUE_PROPERTIES)) ) } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_COMMAND_QUEUE_HPP compute-0.5/include/boost/compute/config.hpp000066400000000000000000000050221263566244600212520ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONFIG_HPP #define BOOST_COMPUTE_CONFIG_HPP #include #include #include // check for minimum required boost version #if BOOST_VERSION < 104800 #error Boost.Compute requires Boost version 1.48 or later #endif // the BOOST_COMPUTE_NO_VARIADIC_TEMPLATES macro is defined // if the compiler does not *fully* support variadic templates #if defined(BOOST_NO_CXX11_VARIADIC_TEMPLATES) || \ defined(BOOST_NO_VARIADIC_TEMPLATES) || \ (defined(__GNUC__) && !defined(__clang__) && \ __GNUC__ == 4 && __GNUC_MINOR__ <= 6) #define BOOST_COMPUTE_NO_VARIADIC_TEMPLATES #endif // BOOST_NO_CXX11_VARIADIC_TEMPLATES // the BOOST_COMPUTE_NO_STD_TUPLE macro is defined if the // compiler/stdlib does not support std::tuple #if defined(BOOST_NO_CXX11_HDR_TUPLE) || \ defined(BOOST_NO_0X_HDR_TUPLE) || \ defined(BOOST_COMPUTE_NO_VARIADIC_TEMPLATES) #define BOOST_COMPUTE_NO_STD_TUPLE #endif // BOOST_NO_CXX11_HDR_TUPLE // defines BOOST_COMPUTE_CL_CALLBACK to the value of CL_CALLBACK // if it is defined (it was added in OpenCL 1.1). this is used to // annotate certain callback functions registered with OpenCL #ifdef CL_CALLBACK # define BOOST_COMPUTE_CL_CALLBACK CL_CALLBACK #else # define BOOST_COMPUTE_CL_CALLBACK #endif // Maximum number of iterators acceptable for make_zip_iterator #ifndef BOOST_COMPUTE_MAX_ARITY // should be no more than max boost::tuple size (10 by default) # define BOOST_COMPUTE_MAX_ARITY 10 #endif #if !defined(BOOST_COMPUTE_DOXYGEN_INVOKED) && \ (defined(BOOST_NO_CXX11_RVALUE_REFERENCES) || defined(BOOST_NO_RVALUE_REFERENCES)) # define BOOST_COMPUTE_NO_RVALUE_REFERENCES #endif // BOOST_NO_CXX11_RVALUE_REFERENCES #if defined(BOOST_NO_CXX11_HDR_INITIALIZER_LIST) || \ defined(BOOST_NO_0X_HDR_INITIALIZER_LIST) # define BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST #endif // BOOST_NO_CXX11_HDR_INITIALIZER_LIST #if defined(BOOST_NO_CXX11_HDR_CHRONO) || \ defined(BOOST_NO_0X_HDR_CHRONO) # define BOOST_COMPUTE_NO_HDR_CHRONO #endif // BOOST_NO_CXX11_HDR_CHRONO #endif // BOOST_COMPUTE_CONFIG_HPP compute-0.5/include/boost/compute/container.hpp000066400000000000000000000017751263566244600220020ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONTAINER_HPP #define BOOST_COMPUTE_CONTAINER_HPP /// \file /// /// Meta-header to include all Boost.Compute container headers. #include #include #include #include #include #include #include #include #endif // BOOST_COMPUTE_CONTAINER_HPP compute-0.5/include/boost/compute/container/000077500000000000000000000000001263566244600212575ustar00rootroot00000000000000compute-0.5/include/boost/compute/container/array.hpp000066400000000000000000000145231263566244600231130ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONTAINER_ARRAY_HPP #define BOOST_COMPUTE_CONTAINER_ARRAY_HPP #include #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { /// \class array /// \brief A fixed-size container. /// /// The array container is very similar to the \ref vector container except /// its size is fixed at compile-time rather than being dynamically resizable /// at run-time. /// /// For example, to create a fixed-size array with eight values on the device: /// \code /// boost::compute::array values(context); /// \endcode /// /// The Boost.Compute \c array class provides a STL-like API and is modeled /// after the \c std::array class from the C++ standard library. /// /// \see \ref vector "vector" template class array { public: typedef T value_type; typedef std::size_t size_type; typedef ptrdiff_t difference_type; typedef detail::buffer_value reference; typedef const detail::buffer_value const_reference; typedef T* pointer; typedef const T* const_pointer; typedef buffer_iterator iterator; typedef buffer_iterator const_iterator; typedef std::reverse_iterator reverse_iterator; typedef std::reverse_iterator const_reverse_iterator; enum { static_size = N }; explicit array(const context &context = system::default_context()) : m_buffer(context, sizeof(T) * N) { } array(const array &other) : m_buffer(other.m_buffer.get_context(), sizeof(T) * N) { boost::compute::copy(other.begin(), other.end(), begin()); } array(const boost::array &array, const context &context = system::default_context()) : m_buffer(context, sizeof(T) * N) { boost::compute::copy(array.begin(), array.end(), begin()); } array& operator=(const array &other) { if(this != &other){ boost::compute::copy(other.begin(), other.end(), begin()); } return *this; } array& operator=(const boost::array &array) { boost::compute::copy(array.begin(), array.end(), begin()); return *this; } ~array() { } iterator begin() { return buffer_iterator(m_buffer, 0); } const_iterator begin() const { return buffer_iterator(m_buffer, 0); } const_iterator cbegin() const { return begin(); } iterator end() { return buffer_iterator(m_buffer, N); } const_iterator end() const { return buffer_iterator(m_buffer, N); } const_iterator cend() const { return end(); } reverse_iterator rbegin() { return reverse_iterator(end() - 1); } const_reverse_iterator rbegin() const { return reverse_iterator(end() - 1); } const_reverse_iterator crbegin() const { return rbegin(); } reverse_iterator rend() { return reverse_iterator(begin() - 1); } const_reverse_iterator rend() const { return reverse_iterator(begin() - 1); } const_reverse_iterator crend() const { return rend(); } size_type size() const { return N; } bool empty() const { return N == 0; } size_type max_size() const { return N; } reference operator[](size_type index) { return *(begin() + static_cast(index)); } const_reference operator[](size_type index) const { return *(begin() + static_cast(index)); } reference at(size_type index) { if(index >= N){ BOOST_THROW_EXCEPTION(std::out_of_range("index out of range")); } return operator[](index); } const_reference at(size_type index) const { if(index >= N){ BOOST_THROW_EXCEPTION(std::out_of_range("index out of range")); } return operator[](index); } reference front() { return *begin(); } const_reference front() const { return *begin(); } reference back() { return *(end() - static_cast(1)); } const_reference back() const { return *(end() - static_cast(1)); } void fill(const value_type &value) { ::boost::compute::fill(begin(), end(), value); } void swap(array &other) { ::boost::compute::swap_ranges(begin(), end(), other.begin()); } const buffer& get_buffer() const { return m_buffer; } private: buffer m_buffer; }; namespace detail { // set_kernel_arg specialization for array template struct set_kernel_arg > { void operator()(kernel &kernel_, size_t index, const array &array) { kernel_.set_arg(index, array.get_buffer()); } }; // for capturing array with BOOST_COMPUTE_CLOSURE() template struct capture_traits > { static std::string type_name() { return std::string("__global ") + ::boost::compute::type_name() + "*"; } }; // meta_kernel streaming operator for array template meta_kernel& operator<<(meta_kernel &k, const array &array) { return k << k.get_buffer_identifier(array.get_buffer()); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_CONTAINER_ARRAY_HPP compute-0.5/include/boost/compute/container/basic_string.hpp000066400000000000000000000172561263566244600244520ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONTAINER_BASIC_STRING_HPP #define BOOST_COMPUTE_CONTAINER_BASIC_STRING_HPP #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { /// \class basic_string /// \brief A template for a dynamically-sized character sequence. /// /// The \c basic_string class provides a generic template for a dynamically- /// sized character sequence. This is most commonly used through the \c string /// typedef (for \c basic_string). /// /// For example, to create a string on the device with its contents copied /// from a C-string on the host: /// \code /// boost::compute::string str("hello, world!"); /// \endcode /// /// \see \ref vector "vector" template > class basic_string { public: typedef Traits traits_type; typedef typename Traits::char_type value_type; typedef size_t size_type; static const size_type npos = size_type(-1); typedef typename ::boost::compute::vector::reference reference; typedef typename ::boost::compute::vector::const_reference const_reference; typedef typename ::boost::compute::vector::iterator iterator; typedef typename ::boost::compute::vector::const_iterator const_iterator; typedef typename ::boost::compute::vector::reverse_iterator reverse_iterator; typedef typename ::boost::compute::vector::const_reverse_iterator const_reverse_iterator; basic_string() { } basic_string(size_type count, CharT ch) : m_data(count) { std::fill(m_data.begin(), m_data.end(), ch); } basic_string(const basic_string &other, size_type pos, size_type count = npos) : m_data(other.begin() + pos, other.begin() + (std::min)(other.size(), count)) { } basic_string(const char *s, size_type count) : m_data(s, s + count) { } basic_string(const char *s) : m_data(s, s + std::strlen(s)) { } template basic_string(InputIterator first, InputIterator last) : m_data(first, last) { } basic_string(const basic_string &other) : m_data(other.m_data) { } basic_string& operator=(const basic_string &other) { if(this != &other){ m_data = other.m_data; } return *this; } ~basic_string() { } reference at(size_type pos) { return m_data.at(pos); } const_reference at(size_type pos) const { return m_data.at(pos); } reference operator[](size_type pos) { return m_data[pos]; } const_reference operator[](size_type pos) const { return m_data[pos]; } reference front() { return m_data.front(); } const_reference front() const { return m_data.front(); } reference back() { return m_data.back(); } const_reference back() const { return m_data.back(); } iterator begin() { return m_data.begin(); } const_iterator begin() const { return m_data.begin(); } const_iterator cbegin() const { return m_data.cbegin(); } iterator end() { return m_data.end(); } const_iterator end() const { return m_data.end(); } const_iterator cend() const { return m_data.cend(); } reverse_iterator rbegin() { return m_data.rbegin(); } const_reverse_iterator rbegin() const { return m_data.rbegin(); } const_reverse_iterator crbegin() const { return m_data.crbegin(); } reverse_iterator rend() { return m_data.rend(); } const_reverse_iterator rend() const { return m_data.rend(); } const_reverse_iterator crend() const { return m_data.crend(); } bool empty() const { return m_data.empty(); } size_type size() const { return m_data.size(); } size_type length() const { return m_data.size(); } size_type max_size() const { return m_data.max_size(); } void reserve(size_type size) { m_data.reserve(size); } size_type capacity() const { return m_data.capacity(); } void shrink_to_fit() { m_data.shrink_to_fit(); } void clear() { m_data.clear(); } void swap(basic_string &other) { if(this != &other) { ::boost::compute::vector temp_data(other.m_data); other.m_data = m_data; m_data = temp_data; } } basic_string substr(size_type pos = 0, size_type count = npos) const { return basic_string(*this, pos, count); } /// Finds the first character \p ch size_type find(CharT ch, size_type pos = 0) const { const_iterator iter = ::boost::compute::find(begin() + pos, end(), ch); if(iter == end()){ return npos; } else { return static_cast(std::distance(begin(), iter)); } } /// Finds the first substring equal to \p str size_type find(basic_string& str, size_type pos = 0) const { const_iterator iter = ::boost::compute::search(begin() + pos, end(), str.begin(), str.end()); if(iter == end()){ return npos; } else { return static_cast(std::distance(begin(), iter)); } } /// Finds the first substring equal to the character string /// pointed to by \p s. /// The length of the string is determined by the first null character. /// /// For example, the following code /// \snippet test/test_string.cpp string_find /// /// will return 5 as position. size_type find(const char* s, size_type pos = 0) const { basic_string str(s); const_iterator iter = ::boost::compute::search(begin() + pos, end(), str.begin(), str.end()); if(iter == end()){ return npos; } else { return static_cast(std::distance(begin(), iter)); } } private: ::boost::compute::vector m_data; }; template std::ostream& operator<<(std::ostream& stream, boost::compute::basic_stringconst& outStr) { command_queue queue = ::boost::compute::system::default_queue(); boost::compute::copy(outStr.begin(), outStr.end(), std::ostream_iterator(stream), queue); return stream; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_CONTAINER_BASIC_STRING_HPP compute-0.5/include/boost/compute/container/detail/000077500000000000000000000000001263566244600225215ustar00rootroot00000000000000compute-0.5/include/boost/compute/container/detail/scalar.hpp000066400000000000000000000026701263566244600245040ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONTAINER_DETAIL_SCALAR_HPP #define BOOST_COMPUTE_CONTAINER_DETAIL_SCALAR_HPP #include #include namespace boost { namespace compute { namespace detail { // scalar provides a trivial "container" that stores a // single value in a memory buffer on a compute device template class scalar { public: typedef T value_type; scalar(const context &context) : m_buffer(context, sizeof(T)) { } ~scalar() { } T read(command_queue &queue) const { return read_single_value(m_buffer, 0, queue); } void write(const T &value, command_queue &queue) { write_single_value(value, m_buffer, 0, queue); } const buffer& get_buffer() const { return m_buffer; } private: buffer m_buffer; }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_CONTAINER_DETAIL_SCALAR_HPP compute-0.5/include/boost/compute/container/dynamic_bitset.hpp000066400000000000000000000153041263566244600247710ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONTAINER_DYNAMIC_BITSET_HPP #define BOOST_COMPUTE_CONTAINER_DYNAMIC_BITSET_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { /// \class dynamic_bitset /// \brief The dynamic_bitset class contains a resizable bit array. /// /// For example, to create a dynamic-bitset with space for 1000 bits on the /// device: /// \code /// boost::compute::dynamic_bitset<> bits(1000, queue); /// \endcode /// /// The Boost.Compute \c dynamic_bitset class provides a STL-like API and is /// modeled after the \c boost::dynamic_bitset class from Boost. /// /// \see \ref vector "vector" template > class dynamic_bitset { public: typedef Block block_type; typedef Alloc allocator_type; typedef vector container_type; typedef typename container_type::size_type size_type; BOOST_STATIC_CONSTANT(size_type, bits_per_block = sizeof(block_type) * CHAR_BIT); BOOST_STATIC_CONSTANT(size_type, npos = static_cast(-1)); /// Creates a new dynamic bitset with storage for \p size bits. Initializes /// all bits to zero. dynamic_bitset(size_type size, command_queue &queue) : m_bits(size / sizeof(block_type), queue.get_context()), m_size(size) { // initialize all bits to zero reset(queue); } /// Creates a new dynamic bitset as a copy of \p other. dynamic_bitset(const dynamic_bitset &other) : m_bits(other.m_bits), m_size(other.m_size) { } /// Copies the data from \p other to \c *this. dynamic_bitset& operator=(const dynamic_bitset &other) { if(this != &other){ m_bits = other.m_bits; m_size = other.m_size; } return *this; } /// Destroys the dynamic bitset. ~dynamic_bitset() { } /// Returns the size of the dynamic bitset. size_type size() const { return m_size; } /// Returns the number of blocks to store the bits in the dynamic bitset. size_type num_blocks() const { return m_bits.size(); } /// Returns the maximum possible size for the dynamic bitset. size_type max_size() const { return m_bits.max_size() * bits_per_block; } /// Returns \c true if the dynamic bitset is empty (i.e. \c size() == \c 0). bool empty() const { return size() == 0; } /// Returns the number of set bits (i.e. '1') in the bitset. size_type count(command_queue &queue) const { ulong_ count = 0; transform_reduce( m_bits.begin(), m_bits.end(), &count, popcount(), plus(), queue ); return static_cast(count); } /// Resizes the bitset to contain \p num_bits. If the new size is greater /// than the current size the new bits are set to zero. void resize(size_type num_bits, command_queue &queue) { // resize bits const size_type current_block_count = m_bits.size(); m_bits.resize(num_bits * bits_per_block, queue); // fill new block with zeros (if new blocks were added) const size_type new_block_count = m_bits.size(); if(new_block_count > current_block_count){ fill_n( m_bits.begin() + current_block_count, new_block_count - current_block_count, block_type(0), queue ); } // store new size m_size = num_bits; } /// Sets the bit at position \p n to \c true. void set(size_type n, command_queue &queue) { set(n, true, queue); } /// Sets the bit at position \p n to \p value. void set(size_type n, bool value, command_queue &queue) { const size_type bit = n % bits_per_block; const size_type block = n / bits_per_block; // load current block block_type block_value; copy_n(m_bits.begin() + block, 1, &block_value, queue); // update block value if(value){ block_value |= (size_type(1) << bit); } else { block_value &= ~(size_type(1) << bit); } // store new block copy_n(&block_value, 1, m_bits.begin() + block, queue); } /// Returns \c true if the bit at position \p n is set (i.e. '1'). bool test(size_type n, command_queue &queue) { const size_type bit = n % (sizeof(block_type) * CHAR_BIT); const size_type block = n / (sizeof(block_type) * CHAR_BIT); block_type block_value; copy_n(m_bits.begin() + block, 1, &block_value, queue); return block_value & (size_type(1) << bit); } /// Flips the value of the bit at position \p n. void flip(size_type n, command_queue &queue) { set(n, !test(n, queue), queue); } /// Returns \c true if any bit in the bitset is set (i.e. '1'). bool any(command_queue &queue) const { return any_of( m_bits.begin(), m_bits.end(), lambda::_1 != block_type(0), queue ); } /// Returns \c true if all of the bits in the bitset are set to zero. bool none(command_queue &queue) const { return !any(queue); } /// Sets all of the bits in the bitset to zero. void reset(command_queue &queue) { fill(m_bits.begin(), m_bits.end(), block_type(0), queue); } /// Sets the bit at position \p n to zero. void reset(size_type n, command_queue &queue) { set(n, false, queue); } /// Empties the bitset (e.g. \c resize(0)). void clear() { m_bits.clear(); } /// Returns the allocator used to allocate storage for the bitset. allocator_type get_allocator() const { return m_bits.get_allocator(); } private: container_type m_bits; size_type m_size; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_CONTAINER_DYNAMIC_BITSET_HPP compute-0.5/include/boost/compute/container/flat_map.hpp000066400000000000000000000250531263566244600235600ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONTAINER_FLAT_MAP_HPP #define BOOST_COMPUTE_CONTAINER_FLAT_MAP_HPP #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { template class flat_map { public: typedef Key key_type; typedef T mapped_type; typedef typename ::boost::compute::vector > vector_type; typedef typename vector_type::value_type value_type; typedef typename vector_type::size_type size_type; typedef typename vector_type::difference_type difference_type; typedef typename vector_type::reference reference; typedef typename vector_type::const_reference const_reference; typedef typename vector_type::pointer pointer; typedef typename vector_type::const_pointer const_pointer; typedef typename vector_type::iterator iterator; typedef typename vector_type::const_iterator const_iterator; typedef typename vector_type::reverse_iterator reverse_iterator; typedef typename vector_type::const_reverse_iterator const_reverse_iterator; explicit flat_map(const context &context = system::default_context()) : m_vector(context) { } flat_map(const flat_map &other) : m_vector(other.m_vector) { } flat_map& operator=(const flat_map &other) { if(this != &other){ m_vector = other.m_vector; } return *this; } ~flat_map() { } iterator begin() { return m_vector.begin(); } const_iterator begin() const { return m_vector.begin(); } const_iterator cbegin() const { return m_vector.cbegin(); } iterator end() { return m_vector.end(); } const_iterator end() const { return m_vector.end(); } const_iterator cend() const { return m_vector.cend(); } reverse_iterator rbegin() { return m_vector.rbegin(); } const_reverse_iterator rbegin() const { return m_vector.rbegin(); } const_reverse_iterator crbegin() const { return m_vector.crbegin(); } reverse_iterator rend() { return m_vector.rend(); } const_reverse_iterator rend() const { return m_vector.rend(); } const_reverse_iterator crend() const { return m_vector.crend(); } size_type size() const { return m_vector.size(); } size_type max_size() const { return m_vector.max_size(); } bool empty() const { return m_vector.empty(); } size_type capacity() const { return m_vector.capacity(); } void reserve(size_type size, command_queue &queue) { m_vector.reserve(size, queue); } void reserve(size_type size) { command_queue queue = m_vector.default_queue(); reserve(size, queue); queue.finish(); } void shrink_to_fit() { m_vector.shrink_to_fit(); } void clear() { m_vector.clear(); } std::pair insert(const value_type &value, command_queue &queue) { iterator location = upper_bound(value.first, queue); if(location != begin()){ value_type current_value; ::boost::compute::copy_n(location - 1, 1, ¤t_value, queue); if(value.first == current_value.first){ return std::make_pair(location - 1, false); } } m_vector.insert(location, value); return std::make_pair(location, true); } std::pair insert(const value_type &value) { command_queue queue = m_vector.default_queue(); std::pair result = insert(value, queue); queue.finish(); return result; } iterator erase(const const_iterator &position, command_queue &queue) { return erase(position, position + 1, queue); } iterator erase(const const_iterator &position) { command_queue queue = m_vector.default_queue(); iterator iter = erase(position, queue); queue.finish(); return iter; } iterator erase(const const_iterator &first, const const_iterator &last, command_queue &queue) { return m_vector.erase(first, last, queue); } iterator erase(const const_iterator &first, const const_iterator &last) { command_queue queue = m_vector.default_queue(); iterator iter = erase(first, last, queue); queue.finish(); return iter; } size_type erase(const key_type &value, command_queue &queue) { iterator position = find(value, queue); if(position == end()){ return 0; } else { erase(position, queue); return 1; } } iterator find(const key_type &value, command_queue &queue) { ::boost::compute::get<0> get_key; return ::boost::compute::find( ::boost::compute::make_transform_iterator(begin(), get_key), ::boost::compute::make_transform_iterator(end(), get_key), value, queue ).base(); } iterator find(const key_type &value) { command_queue queue = m_vector.default_queue(); iterator iter = find(value, queue); queue.finish(); return iter; } const_iterator find(const key_type &value, command_queue &queue) const { ::boost::compute::get<0> get_key; return ::boost::compute::find( ::boost::compute::make_transform_iterator(begin(), get_key), ::boost::compute::make_transform_iterator(end(), get_key), value, queue ).base(); } const_iterator find(const key_type &value) const { command_queue queue = m_vector.default_queue(); const_iterator iter = find(value, queue); queue.finish(); return iter; } size_type count(const key_type &value, command_queue &queue) const { return find(value, queue) != end() ? 1 : 0; } size_type count(const key_type &value) const { command_queue queue = m_vector.default_queue(); size_type result = count(value, queue); queue.finish(); return result; } iterator lower_bound(const key_type &value, command_queue &queue) { ::boost::compute::get<0> get_key; return ::boost::compute::lower_bound( ::boost::compute::make_transform_iterator(begin(), get_key), ::boost::compute::make_transform_iterator(end(), get_key), value, queue ).base(); } iterator lower_bound(const key_type &value) { command_queue queue = m_vector.default_queue(); iterator iter = lower_bound(value, queue); queue.finish(); return iter; } const_iterator lower_bound(const key_type &value, command_queue &queue) const { ::boost::compute::get<0> get_key; return ::boost::compute::lower_bound( ::boost::compute::make_transform_iterator(begin(), get_key), ::boost::compute::make_transform_iterator(end(), get_key), value, queue ).base(); } const_iterator lower_bound(const key_type &value) const { command_queue queue = m_vector.default_queue(); const_iterator iter = lower_bound(value, queue); queue.finish(); return iter; } iterator upper_bound(const key_type &value, command_queue &queue) { ::boost::compute::get<0> get_key; return ::boost::compute::upper_bound( ::boost::compute::make_transform_iterator(begin(), get_key), ::boost::compute::make_transform_iterator(end(), get_key), value, queue ).base(); } iterator upper_bound(const key_type &value) { command_queue queue = m_vector.default_queue(); iterator iter = upper_bound(value, queue); queue.finish(); return iter; } const_iterator upper_bound(const key_type &value, command_queue &queue) const { ::boost::compute::get<0> get_key; return ::boost::compute::upper_bound( ::boost::compute::make_transform_iterator(begin(), get_key), ::boost::compute::make_transform_iterator(end(), get_key), value, queue ).base(); } const_iterator upper_bound(const key_type &value) const { command_queue queue = m_vector.default_queue(); const_iterator iter = upper_bound(value, queue); queue.finish(); return iter; } const mapped_type at(const key_type &key) const { const_iterator iter = find(key); if(iter == end()){ BOOST_THROW_EXCEPTION(std::out_of_range("key not found")); } return value_type(*iter).second; } detail::buffer_value operator[](const key_type &key) { iterator iter = find(key); if(iter == end()){ iter = insert(std::make_pair(key, mapped_type())).first; } size_t index = iter.get_index() * sizeof(value_type) + sizeof(key_type); return detail::buffer_value(m_vector.get_buffer(), index); } private: ::boost::compute::vector > m_vector; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_CONTAINER_FLAT_MAP_HPP compute-0.5/include/boost/compute/container/flat_set.hpp000066400000000000000000000202471263566244600235760ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONTAINER_FLAT_SET_HPP #define BOOST_COMPUTE_CONTAINER_FLAT_SET_HPP #include #include #include #include #include #include namespace boost { namespace compute { template class flat_set { public: typedef T key_type; typedef typename vector::value_type value_type; typedef typename vector::size_type size_type; typedef typename vector::difference_type difference_type; typedef typename vector::reference reference; typedef typename vector::const_reference const_reference; typedef typename vector::pointer pointer; typedef typename vector::const_pointer const_pointer; typedef typename vector::iterator iterator; typedef typename vector::const_iterator const_iterator; typedef typename vector::reverse_iterator reverse_iterator; typedef typename vector::const_reverse_iterator const_reverse_iterator; explicit flat_set(const context &context = system::default_context()) : m_vector(context) { } flat_set(const flat_set &other) : m_vector(other.m_vector) { } flat_set& operator=(const flat_set &other) { if(this != &other){ m_vector = other.m_vector; } return *this; } ~flat_set() { } iterator begin() { return m_vector.begin(); } const_iterator begin() const { return m_vector.begin(); } const_iterator cbegin() const { return m_vector.cbegin(); } iterator end() { return m_vector.end(); } const_iterator end() const { return m_vector.end(); } const_iterator cend() const { return m_vector.cend(); } reverse_iterator rbegin() { return m_vector.rbegin(); } const_reverse_iterator rbegin() const { return m_vector.rbegin(); } const_reverse_iterator crbegin() const { return m_vector.crbegin(); } reverse_iterator rend() { return m_vector.rend(); } const_reverse_iterator rend() const { return m_vector.rend(); } const_reverse_iterator crend() const { return m_vector.crend(); } size_type size() const { return m_vector.size(); } size_type max_size() const { return m_vector.max_size(); } bool empty() const { return m_vector.empty(); } size_type capacity() const { return m_vector.capacity(); } void reserve(size_type size, command_queue &queue) { m_vector.reserve(size, queue); } void reserve(size_type size) { command_queue queue = m_vector.default_queue(); reserve(size, queue); queue.finish(); } void shrink_to_fit() { m_vector.shrink_to_fit(); } void clear() { m_vector.clear(); } std::pair insert(const value_type &value, command_queue &queue) { iterator location = upper_bound(value, queue); if(location != begin()){ value_type current_value; ::boost::compute::copy_n(location - 1, 1, ¤t_value, queue); if(value == current_value){ return std::make_pair(location - 1, false); } } m_vector.insert(location, value, queue); return std::make_pair(location, true); } std::pair insert(const value_type &value) { command_queue queue = m_vector.default_queue(); std::pair result = insert(value, queue); queue.finish(); return result; } iterator erase(const const_iterator &position, command_queue &queue) { return erase(position, position + 1, queue); } iterator erase(const const_iterator &position) { command_queue queue = m_vector.default_queue(); iterator iter = erase(position, queue); queue.finish(); return iter; } iterator erase(const const_iterator &first, const const_iterator &last, command_queue &queue) { return m_vector.erase(first, last, queue); } iterator erase(const const_iterator &first, const const_iterator &last) { command_queue queue = m_vector.default_queue(); iterator iter = erase(first, last, queue); queue.finish(); return iter; } size_type erase(const key_type &value, command_queue &queue) { iterator position = find(value, queue); if(position == end()){ return 0; } else { erase(position, queue); return 1; } } size_type erase(const key_type &value) { command_queue queue = m_vector.default_queue(); size_type result = erase(value, queue); queue.finish(); return result; } iterator find(const key_type &value, command_queue &queue) { return ::boost::compute::find(begin(), end(), value, queue); } iterator find(const key_type &value) { command_queue queue = m_vector.default_queue(); iterator iter = find(value, queue); queue.finish(); return iter; } const_iterator find(const key_type &value, command_queue &queue) const { return ::boost::compute::find(begin(), end(), value, queue); } const_iterator find(const key_type &value) const { command_queue queue = m_vector.default_queue(); const_iterator iter = find(value, queue); queue.finish(); return iter; } size_type count(const key_type &value, command_queue &queue) const { return find(value, queue) != end() ? 1 : 0; } size_type count(const key_type &value) const { command_queue queue = m_vector.default_queue(); size_type result = count(value, queue); queue.finish(); return result; } iterator lower_bound(const key_type &value, command_queue &queue) { return ::boost::compute::lower_bound(begin(), end(), value, queue); } iterator lower_bound(const key_type &value) { command_queue queue = m_vector.default_queue(); iterator iter = lower_bound(value, queue); queue.finish(); return iter; } const_iterator lower_bound(const key_type &value, command_queue &queue) const { return ::boost::compute::lower_bound(begin(), end(), value, queue); } const_iterator lower_bound(const key_type &value) const { command_queue queue = m_vector.default_queue(); const_iterator iter = lower_bound(value, queue); queue.finish(); return iter; } iterator upper_bound(const key_type &value, command_queue &queue) { return ::boost::compute::upper_bound(begin(), end(), value, queue); } iterator upper_bound(const key_type &value) { command_queue queue = m_vector.default_queue(); iterator iter = upper_bound(value, queue); queue.finish(); return iter; } const_iterator upper_bound(const key_type &value, command_queue &queue) const { return ::boost::compute::upper_bound(begin(), end(), value, queue); } const_iterator upper_bound(const key_type &value) const { command_queue queue = m_vector.default_queue(); const_iterator iter = upper_bound(value, queue); queue.finish(); return iter; } private: vector m_vector; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_CONTAINER_FLAT_SET_HPP compute-0.5/include/boost/compute/container/mapped_view.hpp000066400000000000000000000153761263566244600243040ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONTAINER_MAPPED_VIEW_HPP #define BOOST_COMPUTE_CONTAINER_MAPPED_VIEW_HPP #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { /// \class mapped_view /// \brief A mapped view of host memory. /// /// The mapped_view class simplifies mapping host-memory to a compute /// device. This allows for host-allocated memory to be used with the /// Boost.Compute algorithms. /// /// The following example shows how to map a simple C-array containing /// data on the host to the device and run the reduce() algorithm to /// calculate the sum: /// /// \snippet test/test_mapped_view.cpp reduce /// /// \see buffer template class mapped_view { public: typedef T value_type; typedef size_t size_type; typedef ptrdiff_t difference_type; typedef buffer_iterator iterator; typedef buffer_iterator const_iterator; /// Creates a null mapped_view object. mapped_view() { m_mapped_ptr = 0; } /// Creates a mapped_view for \p host_ptr with \p n elements. After /// constructing a mapped_view the data is available for use by a /// compute device. Use the \p unmap() method to make the updated data /// available to the host. mapped_view(T *host_ptr, size_type n, const context &context = system::default_context()) : m_buffer(_make_mapped_buffer(host_ptr, n, context)) { m_mapped_ptr = 0; } /// Creates a read-only mapped_view for \p host_ptr with \p n elements. /// After constructing a mapped_view the data is available for use by a /// compute device. Use the \p unmap() method to make the updated data /// available to the host. mapped_view(const T *host_ptr, size_type n, const context &context = system::default_context()) : m_buffer(_make_mapped_buffer(host_ptr, n, context)) { m_mapped_ptr = 0; } /// Creates a copy of \p other. mapped_view(const mapped_view &other) : m_buffer(other.m_buffer) { m_mapped_ptr = 0; } /// Copies the mapped buffer from \p other. mapped_view& operator=(const mapped_view &other) { if(this != &other){ m_buffer = other.m_buffer; m_mapped_ptr = 0; } return *this; } /// Destroys the mapped_view object. ~mapped_view() { } /// Returns an iterator to the first element in the mapped_view. iterator begin() { return ::boost::compute::make_buffer_iterator(m_buffer, 0); } /// Returns a const_iterator to the first element in the mapped_view. const_iterator begin() const { return ::boost::compute::make_buffer_iterator(m_buffer, 0); } /// Returns a const_iterator to the first element in the mapped_view. const_iterator cbegin() const { return begin(); } /// Returns an iterator to one past the last element in the mapped_view. iterator end() { return ::boost::compute::make_buffer_iterator(m_buffer, size()); } /// Returns a const_iterator to one past the last element in the mapped_view. const_iterator end() const { return ::boost::compute::make_buffer_iterator(m_buffer, size()); } /// Returns a const_iterator to one past the last element in the mapped_view. const_iterator cend() const { return end(); } /// Returns the number of elements in the mapped_view. size_type size() const { return m_buffer.size() / sizeof(T); } /// Returns the host data pointer. T* get_host_ptr() { return static_cast(m_buffer.get_info(CL_MEM_HOST_PTR)); } /// Returns the host data pointer. const T* get_host_ptr() const { return static_cast(m_buffer.get_info(CL_MEM_HOST_PTR)); } /// Resizes the mapped_view to \p size elements. void resize(size_type size) { T *old_ptr = get_host_ptr(); m_buffer = _make_mapped_buffer(old_ptr, size, m_buffer.get_context()); } /// Returns \c true if the mapped_view is empty. bool empty() const { return size() == 0; } /// Returns the mapped buffer. const buffer& get_buffer() const { return m_buffer; } /// Maps the buffer into the host address space. /// /// \see_opencl_ref{clEnqueueMapBuffer} void map(cl_map_flags flags, command_queue &queue) { BOOST_ASSERT(m_mapped_ptr == 0); m_mapped_ptr = queue.enqueue_map_buffer( m_buffer, flags, 0, m_buffer.size() ); } /// Maps the buffer into the host address space for reading and writing. /// /// Equivalent to: /// \code /// map(CL_MAP_READ | CL_MAP_WRITE, queue); /// \endcode void map(command_queue &queue) { map(CL_MAP_READ | CL_MAP_WRITE, queue); } /// Unmaps the buffer from the host address space. /// /// \see_opencl_ref{clEnqueueUnmapMemObject} void unmap(command_queue &queue) { BOOST_ASSERT(m_mapped_ptr != 0); queue.enqueue_unmap_buffer(m_buffer, m_mapped_ptr); m_mapped_ptr = 0; } private: /// \internal_ static buffer _make_mapped_buffer(T *host_ptr, size_t n, const context &context) { return buffer( context, n * sizeof(T), buffer::read_write | buffer::use_host_ptr, host_ptr ); } /// \internal_ static buffer _make_mapped_buffer(const T *host_ptr, size_t n, const context &context) { return buffer( context, n * sizeof(T), buffer::read_only | buffer::use_host_ptr, const_cast(static_cast(host_ptr)) ); } private: buffer m_buffer; void *m_mapped_ptr; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_CONTAINER_MAPPED_VIEW_HPP compute-0.5/include/boost/compute/container/stack.hpp000066400000000000000000000030631263566244600230770ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONTAINER_STACK_HPP #define BOOST_COMPUTE_CONTAINER_STACK_HPP #include namespace boost { namespace compute { template class stack { public: typedef vector container_type; typedef typename container_type::size_type size_type; typedef typename container_type::value_type value_type; stack() { } stack(const stack &other) : m_vector(other.m_vector) { } stack& operator=(const stack &other) { if(this != &other){ m_vector = other.m_vector; } return *this; } ~stack() { } bool empty() const { return m_vector.empty(); } size_type size() const { return m_vector.size(); } value_type top() const { return m_vector.back(); } void push(const T &value) { m_vector.push_back(value); } void pop() { m_vector.pop_back(); } private: container_type m_vector; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_CONTAINER_STACK_HPP compute-0.5/include/boost/compute/container/string.hpp000066400000000000000000000014411263566244600232760ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONTAINER_STRING_HPP #define BOOST_COMPUTE_CONTAINER_STRING_HPP #include #include namespace boost { namespace compute { typedef basic_string string; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_CONTAINER_STRING_HPP compute-0.5/include/boost/compute/container/valarray.hpp000066400000000000000000000362641263566244600236240ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONTAINER_VALARRAY_HPP #define BOOST_COMPUTE_CONTAINER_VALARRAY_HPP #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { template class valarray { public: explicit valarray(const context &context = system::default_context()) : m_buffer(context, 0) { } explicit valarray(size_t size, const context &context = system::default_context()) : m_buffer(context, size * sizeof(T)) { } valarray(const T &value, size_t size, const context &context = system::default_context()) : m_buffer(context, size * sizeof(T)) { fill(begin(), end(), value); } valarray(const T *values, size_t size, const context &context = system::default_context()) : m_buffer(context, size * sizeof(T)) { copy(values, values + size, begin()); } valarray(const valarray &other) : m_buffer(other.m_buffer.get_context(), other.size() * sizeof(T)) { } valarray(const std::valarray &valarray, const context &context = system::default_context()) : m_buffer(context, valarray.size() * sizeof(T)) { copy(&valarray[0], &valarray[valarray.size()], begin()); } valarray& operator=(const valarray &other) { if(this != &other){ resize(other.size()); copy(other.begin(), other.end(), begin()); } return *this; } valarray& operator=(const std::valarray &valarray) { resize(valarray.size()); copy(&valarray[0], &valarray[valarray.size()], begin()); return *this; } valarray& operator*=(const T&); valarray& operator/=(const T&); valarray& operator%=(const T& val); valarray operator+() const { // This operator can be used with any type. valarray result(size()); copy(begin(), end(), result.begin()); return result; } valarray operator-() const { BOOST_STATIC_ASSERT_MSG( is_fundamental::value, "This operator can be used with all OpenCL built-in scalar" " and vector types" ); valarray result(size()); BOOST_COMPUTE_FUNCTION(T, unary_minus, (T x), { return -x; }); transform(begin(), end(), result.begin(), unary_minus); return result; } valarray operator~() const { BOOST_STATIC_ASSERT_MSG( is_fundamental::value && !is_floating_point::type>::value, "This operator can be used with all OpenCL built-in scalar" " and vector types except the built-in scalar and vector float types" ); valarray result(size()); BOOST_COMPUTE_FUNCTION(T, bitwise_not, (T x), { return ~x; }); transform(begin(), end(), result.begin(), bitwise_not); return result; } /// In OpenCL there cannot be memory buffer with bool type, for /// this reason return type is valarray instead of valarray. /// 1 means true, 0 means false. valarray operator!() const { BOOST_STATIC_ASSERT_MSG( is_fundamental::value, "This operator can be used with all OpenCL built-in scalar" " and vector types" ); valarray result(size()); BOOST_COMPUTE_FUNCTION(char, logical_not, (T x), { return !x; }); transform(begin(), end(), &result[0], logical_not); return result; } valarray& operator+=(const T&); valarray& operator-=(const T&); valarray& operator^=(const T&); valarray& operator&=(const T&); valarray& operator|=(const T&); valarray& operator<<=(const T&); valarray& operator>>=(const T&); valarray& operator*=(const valarray&); valarray& operator/=(const valarray&); valarray& operator%=(const valarray&); valarray& operator+=(const valarray&); valarray& operator-=(const valarray&); valarray& operator^=(const valarray&); valarray& operator&=(const valarray&); valarray& operator|=(const valarray&); valarray& operator<<=(const valarray&); valarray& operator>>=(const valarray&); ~valarray() { } size_t size() const { return m_buffer.size() / sizeof(T); } void resize(size_t size, T value = T()) { m_buffer = buffer(m_buffer.get_context(), size * sizeof(T)); fill(begin(), end(), value); } detail::buffer_value operator[](size_t index) { return *(begin() + static_cast(index)); } const detail::buffer_value operator[](size_t index) const { return *(begin() + static_cast(index)); } T (min)() const { return *(boost::compute::min_element(begin(), end())); } T (max)() const { return *(boost::compute::max_element(begin(), end())); } T sum() const { return boost::compute::accumulate(begin(), end(), T(0)); } template valarray apply(UnaryFunction function) const { valarray result(size()); transform(begin(), end(), result.begin(), function); return result; } const buffer& get_buffer() const { return m_buffer; } private: buffer_iterator begin() const { return buffer_iterator(m_buffer, 0); } buffer_iterator end() const { return buffer_iterator(m_buffer, size()); } private: buffer m_buffer; }; /// \internal_ #define BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT(op, op_name, assert) \ template \ inline valarray& \ valarray::operator op##=(const T& val) \ { \ assert \ transform(begin(), end(), begin(), \ ::boost::compute::bind(op_name(), placeholders::_1, val)); \ return *this; \ } \ \ template \ inline valarray& \ valarray::operator op##=(const valarray &rhs) \ { \ assert \ transform(begin(), end(), rhs.begin(), begin(), op_name()); \ return *this; \ } /// \internal_ #define BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_ANY(op, op_name) \ BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT(op, op_name, \ BOOST_STATIC_ASSERT_MSG( \ is_fundamental::value, \ "This operator can be used with all OpenCL built-in scalar" \ " and vector types" \ ); \ ) /// \internal_ /// For some operators class T can't be floating point type. /// See OpenCL specification, operators chapter. #define BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(op, op_name) \ BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT(op, op_name, \ BOOST_STATIC_ASSERT_MSG( \ is_fundamental::value && \ !is_floating_point::type>::value, \ "This operator can be used with all OpenCL built-in scalar" \ " and vector types except the built-in scalar and vector float types" \ ); \ ) // defining operators BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_ANY(+, plus) BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_ANY(-, minus) BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_ANY(*, multiplies) BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_ANY(/, divides) BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(^, bit_xor) BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(&, bit_and) BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(|, bit_or) BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(<<, shift_left) BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(>>, shift_right) // The remainder (%) operates on // integer scalar and integer vector data types only. // See OpenCL specification. BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT(%, modulus, BOOST_STATIC_ASSERT_MSG( is_integral::type>::value, "This operator can be used only with OpenCL built-in integer types" ); ) #undef BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_ANY #undef BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP #undef BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT /// \internal_ /// Macro for defining binary operators for valarray #define BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR(op, op_name, assert) \ template \ valarray operator op (const valarray& lhs, const valarray& rhs) \ { \ assert \ valarray result(lhs.size()); \ transform(&lhs[0], &lhs[lhs.size()], &rhs[0], \ &result[0], op_name()); \ return result; \ } \ \ template \ valarray operator op (const T& val, const valarray& rhs) \ { \ assert \ valarray result(rhs.size()); \ transform(&rhs[0], &rhs[rhs.size()], &result[0], \ ::boost::compute::bind(op_name(), val, placeholders::_1)); \ return result; \ } \ \ template \ valarray operator op (const valarray& lhs, const T& val) \ { \ assert \ valarray result(lhs.size()); \ transform(&lhs[0], &lhs[lhs.size()], &result[0], \ ::boost::compute::bind(op_name(), placeholders::_1, val)); \ return result; \ } /// \internal_ #define BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_ANY(op, op_name) \ BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR(op, op_name, \ BOOST_STATIC_ASSERT_MSG( \ is_fundamental::value, \ "This operator can be used with all OpenCL built-in scalar" \ " and vector types" \ ); \ ) /// \internal_ /// For some operators class T can't be floating point type. /// See OpenCL specification, operators chapter. #define BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP(op, op_name) \ BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR(op, op_name, \ BOOST_STATIC_ASSERT_MSG( \ is_fundamental::value && \ !is_floating_point::type>::value, \ "This operator can be used with all OpenCL built-in scalar" \ " and vector types except the built-in scalar and vector float types" \ ); \ ) // defining binary operators for valarray BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_ANY(+, plus) BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_ANY(-, minus) BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_ANY(*, multiplies) BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_ANY(/, divides) BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP(^, bit_xor) BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP(&, bit_and) BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP(|, bit_or) BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP(<<, shift_left) BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP(>>, shift_right) #undef BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_ANY #undef BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP #undef BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR /// \internal_ /// Macro for defining valarray comparison operators. /// For return type valarray is used instead of valarray because /// in OpenCL there cannot be memory buffer with bool type. /// /// Note it's also used for defining binary logical operators (==, &&) #define BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(op, op_name) \ template \ valarray operator op (const valarray& lhs, const valarray& rhs) \ { \ BOOST_STATIC_ASSERT_MSG( \ is_fundamental::value, \ "This operator can be used with all OpenCL built-in scalar" \ " and vector types" \ ); \ valarray result(lhs.size()); \ transform(&lhs[0], &lhs[lhs.size()], &rhs[0], \ &result[0], op_name()); \ return result; \ } \ \ template \ valarray operator op (const T& val, const valarray& rhs) \ { \ BOOST_STATIC_ASSERT_MSG( \ is_fundamental::value, \ "This operator can be used with all OpenCL built-in scalar" \ " and vector types" \ ); \ valarray result(rhs.size()); \ transform(&rhs[0], &rhs[rhs.size()], &result[0], \ ::boost::compute::bind(op_name(), val, placeholders::_1)); \ return result; \ } \ \ template \ valarray operator op (const valarray& lhs, const T& val) \ { \ BOOST_STATIC_ASSERT_MSG( \ is_fundamental::value, \ "This operator can be used with all OpenCL built-in scalar" \ " and vector types" \ ); \ valarray result(lhs.size()); \ transform(&lhs[0], &lhs[lhs.size()], &result[0], \ ::boost::compute::bind(op_name(), placeholders::_1, val)); \ return result; \ } BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(==, equal_to) BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(!=, not_equal_to) BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(>, greater) BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(<, less) BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(>=, greater_equal) BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(<=, less_equal) /// \internal_ /// Macro for defining binary logical operators for valarray. /// /// For return type valarray is used instead of valarray because /// in OpenCL there cannot be memory buffer with bool type. /// 1 means true, 0 means false. #define BOOST_COMPUTE_DEFINE_VALARRAY_LOGICAL_OPERATOR(op, op_name) \ BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(op, op_name) BOOST_COMPUTE_DEFINE_VALARRAY_LOGICAL_OPERATOR(&&, logical_and) BOOST_COMPUTE_DEFINE_VALARRAY_LOGICAL_OPERATOR(||, logical_or) #undef BOOST_COMPUTE_DEFINE_VALARRAY_LOGICAL_OPERATOR #undef BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_CONTAINER_VALARRAY_HPP compute-0.5/include/boost/compute/container/vector.hpp000066400000000000000000000527011263566244600232770ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONTAINER_VECTOR_HPP #define BOOST_COMPUTE_CONTAINER_VECTOR_HPP #include #include #include #include #include #include #ifndef BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { /// \class vector /// \brief A resizable array of values. /// /// The vector class stores a dynamic array of values. Internally, the data /// is stored in an OpenCL buffer object. /// /// The vector class is the prefered container for storing and accessing data /// on a compute device. In most cases it should be used instead of directly /// dealing with buffer objects. If the undelying buffer is needed, it can be /// accessed with the get_buffer() method. /// /// The internal storage is allocated in a specific OpenCL context which is /// passed as an argument to the constructor when the vector is created. /// /// For example, to create a vector on the device containing space for ten /// \c int values: /// \code /// boost::compute::vector vec(10, context); /// \endcode /// /// Allocation and data transfer can also be performed in a single step: /// \code /// // values on the host /// int data[] = { 1, 2, 3, 4 }; /// /// // create a vector of size four and copy the values from data /// boost::compute::vector vec(data, data + 4, queue); /// \endcode /// /// The Boost.Compute \c vector class provides a STL-like API and is modeled /// after the \c std::vector class from the C++ standard library. It can be /// used with any of the STL-like algorithms provided by Boost.Compute /// including \c copy(), \c transform(), and \c sort() (among many others). /// /// For example: /// \code /// // a vector on a compute device /// boost::compute::vector vec = ... /// /// // copy data to the vector from a host std:vector /// boost::compute::copy(host_vec.begin(), host_vec.end(), vec.begin(), queue); /// /// // copy data from the vector to a host std::vector /// boost::compute::copy(vec.begin(), vec.end(), host_vec.begin(), queue); /// /// // sort the values in the vector /// boost::compute::sort(vec.begin(), vec.end(), queue); /// /// // calculate the sum of the values in the vector (also see reduce()) /// float sum = boost::compute::accumulate(vec.begin(), vec.end(), 0, queue); /// /// // reverse the values in the vector /// boost::compute::reverse(vec.begin(), vec.end(), queue); /// /// // fill the vector with ones /// boost::compute::fill(vec.begin(), vec.end(), 1, queue); /// \endcode /// /// \see \ref array "array", buffer template > class vector { public: typedef T value_type; typedef Alloc allocator_type; typedef typename allocator_type::size_type size_type; typedef typename allocator_type::difference_type difference_type; typedef detail::buffer_value reference; typedef const detail::buffer_value const_reference; typedef typename allocator_type::pointer pointer; typedef typename allocator_type::const_pointer const_pointer; typedef buffer_iterator iterator; typedef buffer_iterator const_iterator; typedef std::reverse_iterator reverse_iterator; typedef std::reverse_iterator const_reverse_iterator; /// Creates an empty vector in \p context. explicit vector(const context &context = system::default_context()) : m_size(0), m_allocator(context) { m_data = m_allocator.allocate(_minimum_capacity()); } /// Creates a vector with space for \p count elements in \p context. /// /// Note that unlike \c std::vector's constructor, this will not initialize /// the values in the container. Either call the vector constructor which /// takes a value to initialize with or use the fill() algorithm to set /// the initial values. /// /// For example: /// \code /// // create a vector on the device with space for ten ints /// boost::compute::vector vec(10, context); /// \endcode explicit vector(size_type count, const context &context = system::default_context()) : m_size(count), m_allocator(context) { m_data = m_allocator.allocate((std::max)(count, _minimum_capacity())); } /// Creates a vector with space for \p count elements and sets each equal /// to \p value. /// /// For example: /// \code /// // creates a vector with four values set to nine (e.g. [9, 9, 9, 9]). /// boost::compute::vector vec(4, 9, queue); /// \endcode vector(size_type count, const T &value, command_queue &queue = system::default_queue()) : m_size(count), m_allocator(queue.get_context()) { m_data = m_allocator.allocate((std::max)(count, _minimum_capacity())); ::boost::compute::fill_n(begin(), count, value, queue); } /// Creates a vector with space for the values in the range [\p first, /// \p last) and copies them into the vector with \p queue. /// /// For example: /// \code /// // values on the host /// int data[] = { 1, 2, 3, 4 }; /// /// // create a vector of size four and copy the values from data /// boost::compute::vector vec(data, data + 4, queue); /// \endcode template vector(InputIterator first, InputIterator last, command_queue &queue = system::default_queue()) : m_size(detail::iterator_range_size(first, last)), m_allocator(queue.get_context()) { m_data = m_allocator.allocate((std::max)(m_size, _minimum_capacity())); ::boost::compute::copy(first, last, begin(), queue); } /// Creates a new vector and copies the values from \p other. vector(const vector &other, command_queue &queue = system::default_queue()) : m_size(other.m_size), m_allocator(other.m_allocator) { m_data = m_allocator.allocate((std::max)(m_size, _minimum_capacity())); if(!other.empty()){ ::boost::compute::copy(other.begin(), other.end(), begin(), queue); queue.finish(); } } /// Creates a new vector and copies the values from \p other. template vector(const vector &other, command_queue &queue = system::default_queue()) : m_size(other.size()), m_allocator(queue.get_context()) { m_data = m_allocator.allocate((std::max)(m_size, _minimum_capacity())); if(!other.empty()){ ::boost::compute::copy(other.begin(), other.end(), begin(), queue); queue.finish(); } } /// Creates a new vector and copies the values from \p vector. template vector(const std::vector &vector, command_queue &queue = system::default_queue()) : m_size(vector.size()), m_allocator(queue.get_context()) { m_data = m_allocator.allocate((std::max)(m_size, _minimum_capacity())); ::boost::compute::copy(vector.begin(), vector.end(), begin(), queue); } #ifndef BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST vector(std::initializer_list list, command_queue &queue = system::default_queue()) : m_size(list.size()), m_allocator(queue.get_context()) { m_data = m_allocator.allocate((std::max)(m_size, _minimum_capacity())); ::boost::compute::copy(list.begin(), list.end(), begin(), queue); } #endif // BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST vector& operator=(const vector &other) { if(this != &other){ command_queue queue = default_queue(); resize(other.size(), queue); ::boost::compute::copy(other.begin(), other.end(), begin(), queue); queue.finish(); } return *this; } template vector& operator=(const std::vector &vector) { command_queue queue = default_queue(); resize(vector.size(), queue); ::boost::compute::copy(vector.begin(), vector.end(), begin(), queue); queue.finish(); return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new vector from \p other. vector(vector&& other) : m_data(std::move(other.m_data)), m_size(other.m_size), m_allocator(std::move(other.m_allocator)) { other.m_size = 0; } /// Move-assigns the data from \p other to \c *this. vector& operator=(vector&& other) { if(m_size){ m_allocator.deallocate(m_data, m_size); } m_data = std::move(other.m_data); m_size = other.m_size; m_allocator = std::move(other.m_allocator); other.m_size = 0; return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the vector object. ~vector() { if(m_size){ m_allocator.deallocate(m_data, m_size); } } iterator begin() { return ::boost::compute::make_buffer_iterator(m_data.get_buffer(), 0); } const_iterator begin() const { return ::boost::compute::make_buffer_iterator(m_data.get_buffer(), 0); } const_iterator cbegin() const { return begin(); } iterator end() { return ::boost::compute::make_buffer_iterator(m_data.get_buffer(), m_size); } const_iterator end() const { return ::boost::compute::make_buffer_iterator(m_data.get_buffer(), m_size); } const_iterator cend() const { return end(); } reverse_iterator rbegin() { return reverse_iterator(end() - 1); } const_reverse_iterator rbegin() const { return reverse_iterator(end() - 1); } const_reverse_iterator crbegin() const { return rbegin(); } reverse_iterator rend() { return reverse_iterator(begin() - 1); } const_reverse_iterator rend() const { return reverse_iterator(begin() - 1); } const_reverse_iterator crend() const { return rend(); } /// Returns the number of elements in the vector. size_type size() const { return m_size; } size_type max_size() const { return m_allocator.max_size(); } /// Resizes the vector to \p size. void resize(size_type size, command_queue &queue) { if(size < capacity()){ m_size = size; } else { // allocate new buffer pointer new_data = m_allocator.allocate( static_cast( static_cast(size) * _growth_factor() ) ); // copy old values to the new buffer ::boost::compute::copy(m_data, m_data + m_size, new_data, queue); // free old memory m_allocator.deallocate(m_data, m_size); // set new data and size m_data = new_data; m_size = size; } } /// \overload void resize(size_type size) { command_queue queue = default_queue(); resize(size, queue); queue.finish(); } /// Returns \c true if the vector is empty. bool empty() const { return m_size == 0; } /// Returns the capacity of the vector. size_type capacity() const { return m_data.get_buffer().size() / sizeof(T); } void reserve(size_type size, command_queue &queue) { (void) size; (void) queue; } void reserve(size_type size) { command_queue queue = default_queue(); reserve(size, queue); queue.finish(); } void shrink_to_fit(command_queue &queue) { (void) queue; } void shrink_to_fit() { command_queue queue = default_queue(); shrink_to_fit(queue); queue.finish(); } reference operator[](size_type index) { return *(begin() + static_cast(index)); } const_reference operator[](size_type index) const { return *(begin() + static_cast(index)); } reference at(size_type index) { if(index >= size()){ BOOST_THROW_EXCEPTION(std::out_of_range("index out of range")); } return operator[](index); } const_reference at(size_type index) const { if(index >= size()){ BOOST_THROW_EXCEPTION(std::out_of_range("index out of range")); } return operator[](index); } reference front() { return *begin(); } const_reference front() const { return *begin(); } reference back() { return *(end() - static_cast(1)); } const_reference back() const { return *(end() - static_cast(1)); } template void assign(InputIterator first, InputIterator last, command_queue &queue) { // resize vector for new contents resize(detail::iterator_range_size(first, last), queue); // copy values into the vector ::boost::compute::copy(first, last, begin(), queue); } template void assign(InputIterator first, InputIterator last) { command_queue queue = default_queue(); assign(first, last, queue); queue.finish(); } void assign(size_type n, const T &value, command_queue &queue) { // resize vector for new contents resize(n, queue); // fill vector with value ::boost::compute::fill_n(begin(), n, value, queue); } void assign(size_type n, const T &value) { command_queue queue = default_queue(); assign(n, value, queue); queue.finish(); } /// Inserts \p value at the end of the vector (resizing if neccessary). /// /// Note that calling \c push_back() to insert data values one at a time /// is inefficient as there is a non-trivial overhead in performing a data /// transfer to the device. It is usually better to store a set of values /// on the host (for example, in a \c std::vector) and then transfer them /// in bulk using the \c insert() method or the copy() algorithm. void push_back(const T &value, command_queue &queue) { insert(end(), value, queue); } /// \overload void push_back(const T &value) { command_queue queue = default_queue(); push_back(value, queue); queue.finish(); } void pop_back(command_queue &queue) { resize(size() - 1, queue); } void pop_back() { command_queue queue = default_queue(); pop_back(queue); queue.finish(); } iterator insert(iterator position, const T &value, command_queue &queue) { if(position == end()){ resize(m_size + 1, queue); position = begin() + position.get_index(); ::boost::compute::copy_n(&value, 1, position, queue); } else { ::boost::compute::vector tmp(position, end(), queue); resize(m_size + 1, queue); position = begin() + position.get_index(); ::boost::compute::copy_n(&value, 1, position, queue); ::boost::compute::copy(tmp.begin(), tmp.end(), position + 1, queue); } return position + 1; } iterator insert(iterator position, const T &value) { command_queue queue = default_queue(); iterator iter = insert(position, value, queue); queue.finish(); return iter; } void insert(iterator position, size_type count, const T &value, command_queue &queue) { ::boost::compute::vector tmp(position, end(), queue); resize(size() + count, queue); position = begin() + position.get_index(); ::boost::compute::fill_n(position, count, value, queue); ::boost::compute::copy( tmp.begin(), tmp.end(), position + static_cast(count), queue ); } void insert(iterator position, size_type count, const T &value) { command_queue queue = default_queue(); insert(position, count, value, queue); queue.finish(); } /// Inserts the values in the range [\p first, \p last) into the vector at /// \p position using \p queue. template void insert(iterator position, InputIterator first, InputIterator last, command_queue &queue) { ::boost::compute::vector tmp(position, end(), queue); size_type count = detail::iterator_range_size(first, last); resize(size() + count, queue); position = begin() + position.get_index(); ::boost::compute::copy(first, last, position, queue); ::boost::compute::copy( tmp.begin(), tmp.end(), position + static_cast(count), queue ); } /// \overload template void insert(iterator position, InputIterator first, InputIterator last) { command_queue queue = default_queue(); insert(position, first, last, queue); queue.finish(); } iterator erase(iterator position, command_queue &queue) { return erase(position, position + 1, queue); } iterator erase(iterator position) { command_queue queue = default_queue(); iterator iter = erase(position, queue); queue.finish(); return iter; } iterator erase(iterator first, iterator last, command_queue &queue) { if(last != end()){ ::boost::compute::vector tmp(last, end(), queue); ::boost::compute::copy(tmp.begin(), tmp.end(), first, queue); } difference_type count = std::distance(first, last); resize(size() - static_cast(count), queue); return begin() + first.get_index() + count; } iterator erase(iterator first, iterator last) { command_queue queue = default_queue(); iterator iter = erase(first, last, queue); queue.finish(); return iter; } /// Swaps the contents of \c *this with \p other. void swap(vector &other) { std::swap(m_data, other.m_data); std::swap(m_size, other.m_size); std::swap(m_allocator, other.m_allocator); } /// Removes all elements from the vector. void clear() { m_size = 0; } allocator_type get_allocator() const { return m_allocator; } /// Returns the underlying buffer. const buffer& get_buffer() const { return m_data.get_buffer(); } /// \internal_ /// /// Returns a command queue usable to issue commands for the vector's /// memory buffer. This is used when a member function is called without /// specifying an existing command queue to use. command_queue default_queue() const { const context &context = m_allocator.get_context(); command_queue queue(context, context.get_device()); return queue; } private: /// \internal_ BOOST_CONSTEXPR size_type _minimum_capacity() const { return 4; } /// \internal_ BOOST_CONSTEXPR float _growth_factor() const { return 1.5; } private: pointer m_data; size_type m_size; allocator_type m_allocator; }; namespace detail { // set_kernel_arg specialization for vector template struct set_kernel_arg > { void operator()(kernel &kernel_, size_t index, const vector &vector) { kernel_.set_arg(index, vector.get_buffer()); } }; // for capturing vector with BOOST_COMPUTE_CLOSURE() template struct capture_traits > { static std::string type_name() { return std::string("__global ") + ::boost::compute::type_name() + "*"; } }; // meta_kernel streaming operator for vector template meta_kernel& operator<<(meta_kernel &k, const vector &vector) { return k << k.get_buffer_identifier(vector.get_buffer()); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_CONTAINER_VECTOR_HPP compute-0.5/include/boost/compute/context.hpp000066400000000000000000000146461263566244600215050ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CONTEXT_HPP #define BOOST_COMPUTE_CONTEXT_HPP #include #include #include #include #include #include namespace boost { namespace compute { /// \class context /// \brief A compute context. /// /// The context class represents a compute context. /// /// A context object manages a set of OpenCL resources including memory /// buffers and program objects. Before allocating memory on the device or /// executing kernels you must set up a context object. /// /// To create a context for the default device on the system: /// \code /// // get the default compute device /// boost::compute::device gpu = boost::compute::system::default_device(); /// /// // create a context for the device /// boost::compute::context context(gpu); /// \endcode /// /// Once a context is created, memory can be allocated using the buffer class /// and kernels can be executed using the command_queue class. /// /// \see device, command_queue class context { public: /// Create a null context object. context() : m_context(0) { } /// Creates a new context for \p device with \p properties. /// /// \see_opencl_ref{clCreateContext} explicit context(const device &device, const cl_context_properties *properties = 0) { BOOST_ASSERT(device.id() != 0); cl_device_id device_id = device.id(); cl_int error = 0; m_context = clCreateContext(properties, 1, &device_id, 0, 0, &error); if(!m_context){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// Creates a new context for \p devices with \p properties. /// /// \see_opencl_ref{clCreateContext} explicit context(const std::vector &devices, const cl_context_properties *properties = 0) { BOOST_ASSERT(!devices.empty()); cl_int error = 0; m_context = clCreateContext( properties, static_cast(devices.size()), reinterpret_cast(&devices[0]), 0, 0, &error ); if(!m_context){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// Creates a new context object for \p context. If \p retain is /// \c true, the reference count for \p context will be incremented. explicit context(cl_context context, bool retain = true) : m_context(context) { if(m_context && retain){ clRetainContext(m_context); } } /// Creates a new context object as a copy of \p other. context(const context &other) : m_context(other.m_context) { if(m_context){ clRetainContext(m_context); } } /// Copies the context object from \p other to \c *this. context& operator=(const context &other) { if(this != &other){ if(m_context){ clReleaseContext(m_context); } m_context = other.m_context; if(m_context){ clRetainContext(m_context); } } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new context object from \p other. context(context&& other) BOOST_NOEXCEPT : m_context(other.m_context) { other.m_context = 0; } /// Move-assigns the context from \p other to \c *this. context& operator=(context&& other) BOOST_NOEXCEPT { if(m_context){ clReleaseContext(m_context); } m_context = other.m_context; other.m_context = 0; return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the context object. ~context() { if(m_context){ BOOST_COMPUTE_ASSERT_CL_SUCCESS( clReleaseContext(m_context) ); } } /// Returns the underlying OpenCL context. cl_context& get() const { return const_cast(m_context); } /// Returns the device for the context. If the context contains multiple /// devices, the first is returned. device get_device() const { std::vector devices = get_devices(); if(devices.empty()) { return device(); } return devices.front(); } /// Returns a vector of devices for the context. std::vector get_devices() const { return get_info >(CL_CONTEXT_DEVICES); } /// Returns information about the context. /// /// \see_opencl_ref{clGetContextInfo} template T get_info(cl_context_info info) const { return detail::get_object_info(clGetContextInfo, m_context, info); } /// \overload template typename detail::get_object_info_type::type get_info() const; /// Returns \c true if the context is the same at \p other. bool operator==(const context &other) const { return m_context == other.m_context; } /// Returns \c true if the context is different from \p other. bool operator!=(const context &other) const { return m_context != other.m_context; } /// \internal_ operator cl_context() const { return m_context; } private: cl_context m_context; }; /// \internal_ define get_info() specializations for context BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(context, ((cl_uint, CL_CONTEXT_REFERENCE_COUNT)) ((std::vector, CL_CONTEXT_DEVICES)) ((std::vector, CL_CONTEXT_PROPERTIES)) ) #ifdef CL_VERSION_1_1 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(context, ((cl_uint, CL_CONTEXT_NUM_DEVICES)) ) #endif // CL_VERSION_1_1 } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_CONTEXT_HPP compute-0.5/include/boost/compute/core.hpp000066400000000000000000000021221263566244600207330ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_CORE_HPP #define BOOST_COMPUTE_CORE_HPP /// \file /// /// Meta-header to include all Boost.Compute core headers. #include #include #include #include #include #include #include #include #include #include #include #include #include #endif // BOOST_COMPUTE_CORE_HPP compute-0.5/include/boost/compute/detail/000077500000000000000000000000001263566244600205375ustar00rootroot00000000000000compute-0.5/include/boost/compute/detail/assert_cl_success.hpp000066400000000000000000000015231263566244600247600ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_ASSERT_CL_SUCCESS_HPP #define BOOST_COMPUTE_DETAIL_ASSERT_CL_SUCCESS_HPP #include #if defined(BOOST_DISABLE_ASSERTS) || defined(NDEBUG) #define BOOST_COMPUTE_ASSERT_CL_SUCCESS(function) \ function #else #define BOOST_COMPUTE_ASSERT_CL_SUCCESS(function) \ BOOST_ASSERT(function == CL_SUCCESS) #endif #endif // BOOST_COMPUTE_DETAIL_ASSERT_CL_SUCCESS_HPP compute-0.5/include/boost/compute/detail/buffer_value.hpp000066400000000000000000000076711263566244600237300ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_BUFFER_VALUE_HPP #define BOOST_COMPUTE_DETAIL_BUFFER_VALUE_HPP #include #include #include #include namespace boost { namespace compute { namespace detail { template class buffer_value { public: typedef T value_type; buffer_value() { } buffer_value(const value_type &value) : m_value(value) { } // creates a reference for the value in buffer at index (in bytes). buffer_value(const buffer &buffer, size_t index) : m_buffer(buffer.get(), false), m_index(index) { } buffer_value(const buffer_value &other) : m_buffer(other.m_buffer.get(), false), m_index(other.m_index) { } ~buffer_value() { // set buffer to null so that its reference count will // not be decremented when its destructor is called m_buffer.get() = 0; } operator value_type() const { if(m_buffer.get()){ const context &context = m_buffer.get_context(); const device &device = context.get_device(); command_queue queue(context, device); return detail::read_single_value(m_buffer, m_index / sizeof(T), queue); } else { return m_value; } } buffer_value operator-() const { return -T(*this); } bool operator<(const T &value) const { return T(*this) < value; } bool operator>(const T &value) const { return T(*this) > value; } bool operator<=(const T &value) const { return T(*this) <= value; } bool operator>=(const T &value) const { return T(*this) <= value; } bool operator==(const T &value) const { return T(*this) == value; } bool operator==(const buffer_value &other) const { if(m_buffer.get() != other.m_buffer.get()){ return false; } if(m_buffer.get()){ return m_index == other.m_index; } else { return m_value == other.m_value; } } bool operator!=(const T &value) const { return T(*this) != value; } buffer_value& operator=(const T &value) { if(m_buffer.get()){ const context &context = m_buffer.get_context(); command_queue queue(context, context.get_device()); detail::write_single_value(value, m_buffer, m_index / sizeof(T), queue); return *this; } else { m_value = value; return *this; } } buffer_value& operator=(const buffer_value &value) { return operator=(T(value)); } detail::device_ptr operator&() const { return detail::device_ptr(m_buffer, m_index); } buffer_value& operator++() { if(m_buffer.get()){ T value = T(*this); value++; *this = value; } else { m_value++; } return *this; } buffer_value operator++(int) { buffer_value result(*this); ++(*this); return result; } private: const buffer m_buffer; size_t m_index; value_type m_value; }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_BUFFER_VALUE_HPP compute-0.5/include/boost/compute/detail/device_ptr.hpp000066400000000000000000000124221263566244600233750ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DEVICE_PTR_HPP #define BOOST_COMPUTE_DEVICE_PTR_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template struct device_ptr_index_expr { typedef T result_type; device_ptr_index_expr(const buffer &buffer, uint_ index, const IndexExpr &expr) : m_buffer(buffer), m_index(index), m_expr(expr) { } operator T() const { BOOST_STATIC_ASSERT_MSG(boost::is_integral::value, "Index expression must be integral"); BOOST_ASSERT(m_buffer.get()); const context &context = m_buffer.get_context(); const device &device = context.get_device(); command_queue queue(context, device); return detail::read_single_value(m_buffer, m_expr, queue); } const buffer &m_buffer; uint_ m_index; IndexExpr m_expr; }; template class device_ptr { public: typedef T value_type; typedef std::size_t size_type; typedef std::ptrdiff_t difference_type; typedef std::random_access_iterator_tag iterator_category; typedef T* pointer; typedef T& reference; device_ptr() : m_index(0) { } device_ptr(const buffer &buffer, size_t index = 0) : m_buffer(buffer.get(), false), m_index(index) { } device_ptr(const device_ptr &other) : m_buffer(other.m_buffer.get(), false), m_index(other.m_index) { } device_ptr& operator=(const device_ptr &other) { if(this != &other){ m_buffer.get() = other.m_buffer.get(); m_index = other.m_index; } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES device_ptr(device_ptr&& other) BOOST_NOEXCEPT : m_buffer(other.m_buffer.get(), false), m_index(other.m_index) { other.m_buffer.get() = 0; } device_ptr& operator=(device_ptr&& other) BOOST_NOEXCEPT { m_buffer.get() = other.m_buffer.get(); m_index = other.m_index; other.m_buffer.get() = 0; return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES ~device_ptr() { // set buffer to null so that its reference count will // not be decremented when its destructor is called m_buffer.get() = 0; } size_type get_index() const { return m_index; } const buffer& get_buffer() const { return m_buffer; } template device_ptr cast() const { return device_ptr(m_buffer, m_index); } device_ptr operator+(difference_type n) const { return device_ptr(m_buffer, m_index + n); } device_ptr operator+(const device_ptr &other) const { return device_ptr(m_buffer, m_index + other.m_index); } device_ptr& operator+=(difference_type n) { m_index += static_cast(n); return *this; } difference_type operator-(const device_ptr &other) const { return static_cast(m_index - other.m_index); } device_ptr& operator-=(difference_type n) { m_index -= n; return *this; } bool operator==(const device_ptr &other) const { return m_buffer.get() == other.m_buffer.get() && m_index == other.m_index; } bool operator!=(const device_ptr &other) const { return !(*this == other); } template detail::device_ptr_index_expr operator[](const Expr &expr) const { BOOST_ASSERT(m_buffer.get()); return detail::device_ptr_index_expr(m_buffer, uint_(m_index), expr); } private: const buffer m_buffer; size_t m_index; }; // is_buffer_iterator specialization for device_ptr template struct is_buffer_iterator< Iterator, typename boost::enable_if< boost::is_same< device_ptr, typename boost::remove_const::type > >::type > : public boost::true_type {}; } // end detail namespace // is_device_iterator specialization for device_ptr template struct is_device_iterator > : boost::true_type {}; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DEVICE_PTR_HPP compute-0.5/include/boost/compute/detail/duration.hpp000066400000000000000000000027561263566244600231070ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_DURATION_HPP #define BOOST_COMPUTE_DETAIL_DURATION_HPP #include #ifndef BOOST_COMPUTE_NO_HDR_CHRONO #include #endif #include namespace boost { namespace compute { namespace detail { #ifndef BOOST_COMPUTE_NO_HDR_CHRONO template inline std::chrono::duration make_duration_from_nanoseconds(std::chrono::duration, size_t nanoseconds) { return std::chrono::duration_cast >( std::chrono::nanoseconds(nanoseconds) ); } #endif // BOOST_COMPUTE_NO_HDR_CHRONO template inline boost::chrono::duration make_duration_from_nanoseconds(boost::chrono::duration, size_t nanoseconds) { return boost::chrono::duration_cast >( boost::chrono::nanoseconds(nanoseconds) ); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_DURATION_HPP compute-0.5/include/boost/compute/detail/get_object_info.hpp000066400000000000000000000137061263566244600243770ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_GET_OBJECT_INFO_HPP #define BOOST_COMPUTE_DETAIL_GET_OBJECT_INFO_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template struct bound_info_function { bound_info_function(Function function, Object object, AuxInfo aux_info) : m_function(function), m_object(object), m_aux_info(aux_info) { } template cl_int operator()(Info info, size_t size, void *value, size_t *size_ret) const { return m_function(m_object, m_aux_info, info, size, value, size_ret); } Function m_function; Object m_object; AuxInfo m_aux_info; }; template struct bound_info_function { bound_info_function(Function function, Object object) : m_function(function), m_object(object) { } template cl_int operator()(Info info, size_t size, void *value, size_t *size_ret) const { return m_function(m_object, info, size, value, size_ret); } Function m_function; Object m_object; }; template inline bound_info_function bind_info_function(Function f, Object o) { return bound_info_function(f, o); } template inline bound_info_function bind_info_function(Function f, Object o, AuxInfo j) { return bound_info_function(f, o, j); } // default implementation template struct get_object_info_impl { template T operator()(Function function, Info info) const { T value; cl_int ret = function(info, sizeof(T), &value, 0); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return value; } }; // specialization for bool template<> struct get_object_info_impl { template bool operator()(Function function, Info info) const { cl_bool value; cl_int ret = function(info, sizeof(cl_bool), &value, 0); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return value == CL_TRUE; } }; // specialization for std::string template<> struct get_object_info_impl { template std::string operator()(Function function, Info info) const { size_t size = 0; cl_int ret = function(info, 0, 0, &size); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } if(size == 0){ return std::string(); } std::string value(size - 1, 0); ret = function(info, size, &value[0], 0); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return value; } }; // specialization for std::vector template struct get_object_info_impl > { template std::vector operator()(Function function, Info info) const { size_t size = 0; cl_int ret = function(info, 0, 0, &size); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } std::vector vector(size / sizeof(T)); ret = function(info, size, &vector[0], 0); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return vector; } }; // returns the value (of type T) from the given clGet*Info() function call. template inline T get_object_info(Function f, Object o, Info i) { return get_object_info_impl()(bind_info_function(f, o), i); } template inline T get_object_info(Function f, Object o, Info i, AuxInfo j) { return get_object_info_impl()(bind_info_function(f, o, j), i); } // returns the value type for the clGet*Info() call on Object with Enum. template struct get_object_info_type; // defines the object::get_info() specialization #define BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATION(object_type, result_type, value) \ namespace detail { \ template<> struct get_object_info_type { typedef result_type type; }; \ } \ template<> inline result_type object_type::get_info() const \ { \ return get_info(value); \ } // used by BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS() #define BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_IMPL(r, data, elem) \ BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATION( \ data, BOOST_PP_TUPLE_ELEM(2, 0, elem), BOOST_PP_TUPLE_ELEM(2, 1, elem) \ ) // defines the object::get_info() specialization for each // (result_type, value) tuple in seq for object_type. #define BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(object_type, seq) \ BOOST_PP_SEQ_FOR_EACH( \ BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_IMPL, object_type, seq \ ) } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_GET_OBJECT_INFO_HPP compute-0.5/include/boost/compute/detail/getenv.hpp000066400000000000000000000016501263566244600225420ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_GETENV_HPP #define BOOST_COMPUTE_DETAIL_GETENV_HPP #include namespace boost { namespace compute { namespace detail { inline const char* getenv(const char *env_var) { #ifdef _MSC_VER # pragma warning(push) # pragma warning(disable: 4996) #endif return std::getenv(env_var); #ifdef _MSC_VER # pragma warning(pop) #endif } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_GETENV_HPP compute-0.5/include/boost/compute/detail/global_static.hpp000066400000000000000000000026351263566244600240650ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_GLOBAL_STATIC_HPP #define BOOST_COMPUTE_DETAIL_GLOBAL_STATIC_HPP #include #ifdef BOOST_COMPUTE_THREAD_SAFE # ifdef BOOST_COMPUTE_HAVE_THREAD_LOCAL // use c++11 thread local storage # define BOOST_COMPUTE_DETAIL_GLOBAL_STATIC(type, name, ctor) \ thread_local type name ctor; # else // use thread_specific_ptr from boost.thread # include # define BOOST_COMPUTE_DETAIL_GLOBAL_STATIC(type, name, ctor) \ static ::boost::thread_specific_ptr< type > BOOST_PP_CAT(name, _tls_ptr_); \ if(!BOOST_PP_CAT(name, _tls_ptr_).get()){ \ BOOST_PP_CAT(name, _tls_ptr_).reset(new type ctor); \ } \ static type &name = *BOOST_PP_CAT(name, _tls_ptr_); # endif #else // no thread-safety, just use static # define BOOST_COMPUTE_DETAIL_GLOBAL_STATIC(type, name, ctor) \ static type name ctor; #endif #endif // BOOST_COMPUTE_DETAIL_GLOBAL_STATIC_HPP compute-0.5/include/boost/compute/detail/is_buffer_iterator.hpp000066400000000000000000000017001263566244600251230ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_IS_BUFFER_ITERATOR_HPP #define BOOST_COMPUTE_DETAIL_IS_BUFFER_ITERATOR_HPP #include #include #include namespace boost { namespace compute { namespace detail { // default = false template struct is_buffer_iterator : public boost::false_type {}; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_IS_BUFFER_ITERATOR_HPP compute-0.5/include/boost/compute/detail/is_contiguous_iterator.hpp000066400000000000000000000072101263566244600260530ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_IS_CONTIGUOUS_ITERATOR_HPP #define BOOST_COMPUTE_DETAIL_IS_CONTIGUOUS_ITERATOR_HPP #include #include #include #include #include namespace boost { namespace compute { namespace detail { // default = false template struct _is_contiguous_iterator : public boost::false_type {}; // std::vector::iterator = true template struct _is_contiguous_iterator< Iterator, typename boost::enable_if< typename boost::is_same< Iterator, typename std::vector::iterator >::type >::type > : public boost::true_type {}; // std::vector::const_iterator = true template struct _is_contiguous_iterator< Iterator, typename boost::enable_if< typename boost::is_same< Iterator, typename std::vector::const_iterator >::type >::type > : public boost::true_type {}; // std::valarray::iterator = true template struct _is_contiguous_iterator< Iterator, typename boost::enable_if< typename boost::is_same< Iterator, typename std::valarray::iterator >::type >::type > : public boost::true_type {}; // std::valarray::const_iterator = true template struct _is_contiguous_iterator< Iterator, typename boost::enable_if< typename boost::is_same< Iterator, typename std::valarray::const_iterator >::type >::type > : public boost::true_type {}; // T* = true template struct _is_contiguous_iterator< Iterator, typename boost::enable_if< boost::is_pointer >::type > : public boost::true_type {}; // the is_contiguous_iterator meta-function returns true if Iterator points // to a range of contiguous values. examples of contiguous iterators are // std::vector<>::iterator and float*. examples of non-contiguous iterators // are std::set<>::iterator and std::insert_iterator<>. // // the implementation consists of two phases. the first checks that value_type // for the iterator is not void. this must be done as for many containers void // is not a valid value_type (ex. std::vector::iterator is not valid). // after ensuring a non-void value_type, the _is_contiguous_iterator function // is invoked. it has specializations retuning true for all (known) contiguous // iterators types and a default value of false. template struct is_contiguous_iterator : public _is_contiguous_iterator< typename boost::remove_cv::type > {}; // value_type of void = false template struct is_contiguous_iterator< Iterator, typename boost::enable_if< typename boost::is_void< typename Iterator::value_type >::type >::type > : public boost::false_type {}; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_IS_CONTIGUOUS_ITERATOR_HPP compute-0.5/include/boost/compute/detail/iterator_plus_distance.hpp000066400000000000000000000031761263566244600260250ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_ITERATOR_PLUS_DISTANCE_HPP #define BOOST_COMPUTE_DETAIL_ITERATOR_PLUS_DISTANCE_HPP #include namespace boost { namespace compute { namespace detail { template inline Iterator iterator_plus_distance(Iterator i, Distance n, Tag) { while(n--){ i++; } return i; } template inline Iterator iterator_plus_distance(Iterator i, Distance n, std::random_access_iterator_tag) { typedef typename std::iterator_traits::difference_type difference_type; return i + static_cast(n); } // similar to std::advance() except returns the advanced iterator and // also works with iterators that don't define difference_type template inline Iterator iterator_plus_distance(Iterator i, Distance n) { typedef typename std::iterator_traits::iterator_category tag; return iterator_plus_distance(i, n, tag()); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_ITERATOR_PLUS_DISTANCE_HPP compute-0.5/include/boost/compute/detail/iterator_range_size.hpp000066400000000000000000000026061263566244600253130ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_ITERATOR_RANGE_SIZE_H #define BOOST_COMPUTE_DETAIL_ITERATOR_RANGE_SIZE_H #include #include #include namespace boost { namespace compute { namespace detail { // This is a convenience function which returns the size of a range // bounded by two iterators. This function has two differences from // the std::distance() function: 1) the return type (size_t) is // unsigned, and 2) the return value is always positive. template inline size_t iterator_range_size(Iterator first, Iterator last) { typedef typename std::iterator_traits::difference_type difference_type; difference_type difference = std::distance(first, last); return static_cast( (std::max)(difference, static_cast(0)) ); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_ITERATOR_RANGE_SIZE_H compute-0.5/include/boost/compute/detail/iterator_traits.hpp000066400000000000000000000022061263566244600244670ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_ITERATOR_TRAITS_HPP #define BOOST_COMPUTE_DETAIL_ITERATOR_TRAITS_HPP #include #include #include namespace boost { namespace compute { namespace detail { template struct iterator_traits : public std::iterator_traits { static const bool is_contiguous = is_contiguous_iterator::value; static const bool is_on_device = is_device_iterator::value; static const bool is_on_host = !is_on_device; }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ITERATOR_TRAITS_HPP compute-0.5/include/boost/compute/detail/literal.hpp000066400000000000000000000022021263566244600227000ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_LITERAL_HPP #define BOOST_COMPUTE_DETAIL_LITERAL_HPP #include #include #include #include #include namespace boost { namespace compute { namespace detail { template std::string make_literal(T x) { std::stringstream s; s << std::setprecision(std::numeric_limits::digits10) << std::scientific << x; if(boost::is_same::value || boost::is_same::value){ s << "f"; } return s.str(); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_LITERAL_HPP compute-0.5/include/boost/compute/detail/lru_cache.hpp000066400000000000000000000066131263566244600232030ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_LRU_CACHE_HPP #define BOOST_COMPUTE_DETAIL_LRU_CACHE_HPP #include #include #include #include namespace boost { namespace compute { namespace detail { // a cache which evicts the least recently used item when it is full template class lru_cache { public: typedef Key key_type; typedef Value value_type; typedef std::list list_type; typedef std::map< key_type, std::pair > map_type; lru_cache(size_t capacity) : m_capacity(capacity) { } ~lru_cache() { } size_t size() const { return m_map.size(); } size_t capacity() const { return m_capacity; } bool empty() const { return m_map.empty(); } bool contains(const key_type &key) { return m_map.find(key) != m_map.end(); } void insert(const key_type &key, const value_type &value) { typename map_type::iterator i = m_map.find(key); if(i == m_map.end()){ // insert item into the cache, but first check if it is full if(size() >= m_capacity){ // cache is full, evict the least recently used item evict(); } // insert the new item m_list.push_front(key); m_map[key] = std::make_pair(value, m_list.begin()); } } boost::optional get(const key_type &key) { // lookup value in the cache typename map_type::iterator i = m_map.find(key); if(i == m_map.end()){ // value not in cache return boost::none; } // return the value, but first update its place in the most // recently used list typename list_type::iterator j = i->second.second; if(j != m_list.begin()){ // move item to the front of the most recently used list m_list.erase(j); m_list.push_front(key); // update iterator in map j = m_list.begin(); const value_type &value = i->second.first; m_map[key] = std::make_pair(value, j); // return the value return value; } else { // the item is already at the front of the most recently // used list so just return it return i->second.first; } } void clear() { m_map.clear(); m_list.clear(); } private: void evict() { // evict item from the end of most recently used list typename list_type::iterator i = --m_list.end(); m_map.erase(*i); m_list.erase(i); } private: map_type m_map; list_type m_list; size_t m_capacity; }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_LRU_CACHE_HPP compute-0.5/include/boost/compute/detail/meta_kernel.hpp000066400000000000000000000707741263566244600235550ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_META_KERNEL_HPP #define BOOST_COMPUTE_DETAIL_META_KERNEL_HPP #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template class meta_kernel_variable { public: typedef T result_type; meta_kernel_variable(const std::string &name) : m_name(name) { } meta_kernel_variable(const meta_kernel_variable &other) : m_name(other.m_name) { } meta_kernel_variable& operator=(const meta_kernel_variable &other) { if(this != &other){ m_name = other.m_name; } return *this; } ~meta_kernel_variable() { } std::string name() const { return m_name; } private: std::string m_name; }; template class meta_kernel_literal { public: typedef T result_type; meta_kernel_literal(const T &value) : m_value(value) { } meta_kernel_literal(const meta_kernel_literal &other) : m_value(other.m_value) { } meta_kernel_literal& operator=(const meta_kernel_literal &other) { if(this != &other){ m_value = other.m_value; } return *this; } ~meta_kernel_literal() { } const T& value() const { return m_value; } private: T m_value; }; struct meta_kernel_stored_arg { meta_kernel_stored_arg() : m_size(0), m_value(0) { } meta_kernel_stored_arg(const meta_kernel_stored_arg &other) : m_size(0), m_value(0) { set_value(other.m_size, other.m_value); } meta_kernel_stored_arg& operator=(const meta_kernel_stored_arg &other) { if(this != &other){ set_value(other.m_size, other.m_value); } return *this; } template meta_kernel_stored_arg(const T &value) : m_size(0), m_value(0) { set_value(value); } ~meta_kernel_stored_arg() { if(m_value){ std::free(m_value); } } void set_value(size_t size, const void *value) { if(m_value){ std::free(m_value); } m_size = size; if(value){ m_value = std::malloc(size); std::memcpy(m_value, value, size); } else { m_value = 0; } } template void set_value(const T &value) { set_value(sizeof(T), boost::addressof(value)); } size_t m_size; void *m_value; }; struct meta_kernel_buffer_info { meta_kernel_buffer_info(const buffer &buffer, const std::string &id, memory_object::address_space addr_space, size_t i) : m_mem(buffer.get()), identifier(id), address_space(addr_space), index(i) { } cl_mem m_mem; std::string identifier; memory_object::address_space address_space; size_t index; }; class meta_kernel; template struct inject_type_impl { void operator()(meta_kernel &) { // default implementation does nothing } }; #define BOOST_COMPUTE_META_KERNEL_DECLARE_SCALAR_TYPE_STREAM_OPERATOR(type) \ meta_kernel& operator<<(const type &x) \ { \ m_source << x; \ return *this; \ } #define BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(type) \ meta_kernel& operator<<(const type &x) \ { \ m_source << "(" << type_name() << ")"; \ m_source << "("; \ for(size_t i = 0; i < vector_size::value; i++){ \ *this << lit(x[i]); \ \ if(i != vector_size::value - 1){ \ m_source << ","; \ } \ } \ m_source << ")"; \ return *this; \ } #define BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(type) \ BOOST_COMPUTE_META_KERNEL_DECLARE_SCALAR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(type, _)) \ BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 2), _)) \ BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 4), _)) \ BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 8), _)) \ BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 16), _)) class meta_kernel { public: template class argument { public: argument(const std::string &name, size_t index) : m_name(name), m_index(index) { } const std::string &name() const { return m_name; } size_t index() const { return m_index; } private: std::string m_name; size_t m_index; }; explicit meta_kernel(const std::string &name) : m_name(name) { } meta_kernel(const meta_kernel &other) { m_source.str(other.m_source.str()); } meta_kernel& operator=(const meta_kernel &other) { if(this != &other){ m_source.str(other.m_source.str()); } return *this; } ~meta_kernel() { } std::string name() const { return m_name; } std::string source() const { std::stringstream stream; // add pragmas if(!m_pragmas.empty()){ stream << m_pragmas << "\n"; } // add macros stream << "#define boost_pair_type(t1, t2) _pair_ ## t1 ## _ ## t2 ## _t\n"; stream << "#define boost_pair_get(x, n) (n == 0 ? x.first ## x.second)\n"; stream << "#define boost_make_pair(t1, x, t2, y) (boost_pair_type(t1, t2)) { x, y }\n"; stream << "#define boost_tuple_get(x, n) (x.v ## n)\n"; // add type declaration source stream << m_type_declaration_source.str() << "\n"; // add external function source stream << m_external_function_source.str() << "\n"; // add kernel source stream << "__kernel void " << m_name << "(" << boost::join(m_args, ", ") << ")\n" << "{\n" << m_source.str() << "\n}\n"; return stream.str(); } kernel compile(const context &context, const std::string &options = std::string()) { // generate the program source std::string source = this->source(); // generate cache key std::string cache_key = "__boost_meta_kernel_" + static_cast(detail::sha1(source)); // load program cache boost::shared_ptr cache = program_cache::get_global_cache(context); // load (or build) program from cache ::boost::compute::program program = cache->get_or_build(cache_key, options, source, context); // create kernel ::boost::compute::kernel kernel = program.create_kernel(name()); // bind stored args for(size_t i = 0; i < m_stored_args.size(); i++){ const detail::meta_kernel_stored_arg &arg = m_stored_args[i]; if(arg.m_size != 0){ kernel.set_arg(i, arg.m_size, arg.m_value); } } // bind buffer args for(size_t i = 0; i < m_stored_buffers.size(); i++){ const detail::meta_kernel_buffer_info &bi = m_stored_buffers[i]; kernel.set_arg(bi.index, bi.m_mem); } return kernel; } template size_t add_arg(const std::string &name) { std::stringstream stream; stream << type() << " " << name; // add argument to list m_args.push_back(stream.str()); // return index return m_args.size() - 1; } template size_t add_arg(memory_object::address_space address_space, const std::string &name) { return add_arg_with_qualifiers(address_space_prefix(address_space), name); } template void set_arg(size_t index, const T &value) { if(index >= m_stored_args.size()){ m_stored_args.resize(index + 1); } m_stored_args[index] = detail::meta_kernel_stored_arg(value); } void set_arg(size_t index, const memory_object &mem) { set_arg(index, mem.get()); } void set_arg(size_t index, const image_sampler &sampler) { set_arg(index, cl_sampler(sampler)); } template size_t add_set_arg(const std::string &name, const T &value) { size_t index = add_arg(name); set_arg(index, value); return index; } void add_extension_pragma(const std::string &extension, const std::string &value = "enable") { m_pragmas += "#pragma OPENCL EXTENSION " + extension + " : " + value + "\n"; } void add_extension_pragma(const std::string &extension, const std::string &value) const { return const_cast(this)->add_extension_pragma(extension, value); } template std::string type() const { std::stringstream stream; // const qualifier if(boost::is_const::value){ stream << "const "; } // volatile qualifier if(boost::is_volatile::value){ stream << "volatile "; } // type typedef typename boost::remove_cv< typename boost::remove_pointer::type >::type Type; stream << type_name(); // pointer if(boost::is_pointer::value){ stream << "*"; } // inject type pragmas and/or definitions inject_type(); return stream.str(); } template std::string decl(const std::string &name) const { return type() + " " + name; } template std::string decl(const std::string &name, const Expr &init) const { meta_kernel tmp((std::string())); tmp << tmp.decl(name) << " = " << init; return tmp.m_source.str(); } template detail::meta_kernel_variable var(const std::string &name) const { type(); return make_var(name); } template detail::meta_kernel_literal lit(const T &value) const { type(); return detail::meta_kernel_literal(value); } template detail::meta_kernel_variable expr(const std::string &expr) const { type(); return detail::meta_kernel_variable(expr); } // define stream operators for scalar and vector types BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(char) BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(uchar) BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(short) BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(ushort) BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(int) BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(uint) BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(long) BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(ulong) BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(double) // define stream operators for float scalar and vector types meta_kernel& operator<<(const float &x) { m_source << std::showpoint << x << 'f'; return *this; } BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float2_) BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float4_) BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float8_) BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float16_) // define stream operators for variable types template meta_kernel& operator<<(const meta_kernel_variable &variable) { return *this << variable.name(); } // define stream operators for literal types template meta_kernel& operator<<(const meta_kernel_literal &literal) { return *this << literal.value(); } meta_kernel& operator<<(const meta_kernel_literal &literal) { return *this << (literal.value() ? "true" : "false"); } meta_kernel& operator<<(const meta_kernel_literal &literal) { const char c = literal.value(); switch(c){ // control characters case '\0': return *this << "'\\0'"; case '\a': return *this << "'\\a'"; case '\b': return *this << "'\\b'"; case '\t': return *this << "'\\t'"; case '\n': return *this << "'\\n'"; case '\v': return *this << "'\\v'"; case '\f': return *this << "'\\f'"; case '\r': return *this << "'\\r'"; // characters which need escaping case '\"': case '\'': case '\?': case '\\': return *this << "'\\" << c << "'"; // all other characters default: return *this << "'" << c << "'"; } } meta_kernel& operator<<(const meta_kernel_literal &literal) { return *this << lit(literal.value()); } meta_kernel& operator<<(const meta_kernel_literal &literal) { return *this << uint_(literal.value()); } // define stream operators for strings meta_kernel& operator<<(char ch) { m_source << ch; return *this; } meta_kernel& operator<<(const char *string) { m_source << string; return *this; } meta_kernel& operator<<(const std::string &string) { m_source << string; return *this; } template static detail::meta_kernel_variable make_var(const std::string &name) { return detail::meta_kernel_variable(name); } template static detail::meta_kernel_literal make_lit(const T &value) { return detail::meta_kernel_literal(value); } template static detail::meta_kernel_variable make_expr(const std::string &expr) { return detail::meta_kernel_variable(expr); } event exec(command_queue &queue) { return exec_1d(queue, 0, 1); } event exec_1d(command_queue &queue, size_t global_work_offset, size_t global_work_size) { const context &context = queue.get_context(); ::boost::compute::kernel kernel = compile(context); return queue.enqueue_1d_range_kernel( kernel, global_work_offset, global_work_size, 0 ); } event exec_1d(command_queue &queue, size_t global_work_offset, size_t global_work_size, size_t local_work_size) { const context &context = queue.get_context(); ::boost::compute::kernel kernel = compile(context); return queue.enqueue_1d_range_kernel( kernel, global_work_offset, global_work_size, local_work_size ); } template std::string get_buffer_identifier(const buffer &buffer, const memory_object::address_space address_space = memory_object::global_memory) { // check if we've already seen buffer for(size_t i = 0; i < m_stored_buffers.size(); i++){ const detail::meta_kernel_buffer_info &bi = m_stored_buffers[i]; if(bi.m_mem == buffer.get() && bi.address_space == address_space){ return bi.identifier; } } // create a new binding std::string identifier = "_buf" + lexical_cast(m_stored_buffers.size()); size_t index = add_arg(address_space, identifier); // store new buffer info m_stored_buffers.push_back( detail::meta_kernel_buffer_info(buffer, identifier, address_space, index)); return identifier; } std::string get_image_identifier(const char *qualifiers, const image2d &image) { size_t index = add_arg_with_qualifiers(qualifiers, "image"); set_arg(index, image); return "image"; } std::string get_sampler_identifier(bool normalized_coords, cl_addressing_mode addressing_mode, cl_filter_mode filter_mode) { (void) normalized_coords; (void) addressing_mode; (void) filter_mode; m_pragmas += "const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |\n" " CLK_ADDRESS_NONE |\n" " CLK_FILTER_NEAREST;\n"; return "sampler"; } template static std::string expr_to_string(const Expr &expr) { meta_kernel tmp((std::string())); tmp << expr; return tmp.m_source.str(); } template detail::invoked_function > if_(Predicate pred) const { return detail::invoked_function >( "if", std::string(), boost::make_tuple(pred) ); } template detail::invoked_function > else_if_(Predicate pred) const { return detail::invoked_function >( "else if", std::string(), boost::make_tuple(pred) ); } detail::meta_kernel_variable get_global_id(size_t dim) const { return expr("get_global_id(" + lexical_cast(dim) + ")"); } void add_function(const std::string &name, const std::string &source) { if(m_external_function_names.count(name)){ return; } m_external_function_names.insert(name); m_external_function_source << source << "\n"; } void add_function(const std::string &name, const std::string &source, const std::map &definitions) { typedef std::map::const_iterator iter; std::stringstream s; // add #define's for(iter i = definitions.begin(); i != definitions.end(); i++){ s << "#define " << i->first; if(!i->second.empty()){ s << " " << i->second; } s << "\n"; } s << source << "\n"; // add #undef's for(iter i = definitions.begin(); i != definitions.end(); i++){ s << "#undef " << i->first << "\n"; } add_function(name, s.str()); } template void add_type_declaration(const std::string &declaration) { const char *name = type_name(); // check if the type has already been declared std::string source = m_type_declaration_source.str(); if(source.find(name) != std::string::npos){ return; } m_type_declaration_source << declaration; } template void inject_type() const { inject_type_impl()(const_cast(*this)); } // the insert_function_call() method inserts a call to a function with // the given name tuple of argument values. template void insert_function_call(const std::string &name, const ArgTuple &args) { *this << name << '('; insert_function_call_args(args); *this << ')'; } // the insert_function_call_args() method takes a tuple of argument values // and inserts them into the source string with a comma in-between each. // this is useful for creating function calls given a tuple of values. void insert_function_call_args(const boost::tuple<>&) { } #define BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARG_TYPE(z, n, unused) \ inject_type(); #define BOOST_COMPUTE_META_KERNEL_STREAM_FUNCTION_ARG(z, n, unused) \ << boost::get(args) << ", " #define BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARGS(z, n, unused) \ template \ void insert_function_call_args( \ const boost::tuple &args \ ) \ { \ BOOST_PP_REPEAT_FROM_TO( \ 0, n, BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARG_TYPE, ~ \ ) \ *this \ BOOST_PP_REPEAT_FROM_TO( \ 1, n, BOOST_COMPUTE_META_KERNEL_STREAM_FUNCTION_ARG, ~ \ ) \ << boost::get(args); \ } BOOST_PP_REPEAT_FROM_TO( 1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARGS, ~ ) #undef BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARG_TYPE #undef BOOST_COMPUTE_META_KERNEL_STREAM_FUNCTION_ARG #undef BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARGS static const char* address_space_prefix(const memory_object::address_space value) { switch(value){ case memory_object::global_memory: return "__global"; case memory_object::local_memory: return "__local"; case memory_object::private_memory: return "__private"; case memory_object::constant_memory: return "__constant"; }; return 0; // unreachable } private: template size_t add_arg_with_qualifiers(const char *qualifiers, const std::string &name) { size_t index = add_arg(name); // update argument type declaration with qualifiers std::stringstream s; s << qualifiers << " " << m_args[index]; m_args[index] = s.str(); return index; } private: std::string m_name; std::stringstream m_source; std::stringstream m_external_function_source; std::stringstream m_type_declaration_source; std::set m_external_function_names; std::vector m_args; std::string m_pragmas; std::vector m_stored_args; std::vector m_stored_buffers; }; template inline meta_kernel& operator<<(meta_kernel &kernel, const invoked_function &expr) { if(!expr.source().empty()){ kernel.add_function(expr.name(), expr.source(), expr.definitions()); } kernel.insert_function_call(expr.name(), expr.args()); return kernel; } template inline meta_kernel& operator<<(meta_kernel &kernel, const invoked_closure &expr) { if(!expr.source().empty()){ kernel.add_function(expr.name(), expr.source(), expr.definitions()); } kernel << expr.name() << '('; kernel.insert_function_call_args(expr.args()); kernel << ", "; kernel.insert_function_call_args(expr.capture()); kernel << ')'; return kernel; } template inline meta_kernel& operator<<(meta_kernel &kernel, const invoked_binary_operator &expr) { return kernel << "((" << expr.arg1() << ")" << expr.op() << "(" << expr.arg2() << "))"; } template inline meta_kernel& operator<<(meta_kernel &kernel, const detail::device_ptr_index_expr &expr) { if(expr.m_index == 0){ return kernel << kernel.get_buffer_identifier(expr.m_buffer) << '[' << expr.m_expr << ']'; } else { return kernel << kernel.get_buffer_identifier(expr.m_buffer) << '[' << expr.m_index << "+(" << expr.m_expr << ")]"; } } template inline meta_kernel& operator<<(meta_kernel &kernel, const detail::device_ptr_index_expr, IndexExpr> &expr) { typedef std::pair T; if(expr.m_index == 0){ return kernel << kernel.get_buffer_identifier(expr.m_buffer) << '[' << expr.m_expr << ']'; } else { return kernel << kernel.get_buffer_identifier(expr.m_buffer) << '[' << expr.m_index << "+(" << expr.m_expr << ")]"; } } template inline meta_kernel& operator<<(meta_kernel &kernel, const invoked_unary_negate_function &expr) { return kernel << "!(" << expr.pred()(expr.expr()) << ')'; } template inline meta_kernel& operator<<(meta_kernel &kernel, const invoked_binary_negate_function &expr) { return kernel << "!(" << expr.pred()(expr.expr1(), expr.expr2()) << ')'; } // get() for vector types template inline meta_kernel& operator<<(meta_kernel &kernel, const invoked_get &expr) { BOOST_STATIC_ASSERT(N < 16); if(N < 10){ return kernel << expr.m_arg << ".s" << uint_(N); } else if(N < 16){ #ifdef _MSC_VER # pragma warning(push) # pragma warning(disable: 4307) #endif return kernel << expr.m_arg << ".s" << char('a' + (N - 10)); #ifdef _MSC_VER # pragma warning(pop) #endif } return kernel; } template inline meta_kernel& operator<<(meta_kernel &kernel, const invoked_field &expr) { return kernel << expr.m_arg << "." << expr.m_field; } template inline meta_kernel& operator<<(meta_kernel &k, const invoked_as &expr) { return k << "as_" << type_name() << "(" << expr.m_arg << ")"; } template inline meta_kernel& operator<<(meta_kernel &k, const invoked_convert &expr) { return k << "convert_" << type_name() << "(" << expr.m_arg << ")"; } template inline meta_kernel& operator<<(meta_kernel &k, const invoked_identity &expr) { return k << expr.m_arg; } template<> struct inject_type_impl { void operator()(meta_kernel &kernel) { kernel.add_extension_pragma("cl_khr_fp64", "enable"); } }; template struct inject_type_impl > { void operator()(meta_kernel &kernel) { kernel.inject_type(); } }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_META_KERNEL_HPP compute-0.5/include/boost/compute/detail/mpl_vector_to_tuple.hpp000066400000000000000000000043361263566244600253430ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_MPL_VECTOR_TO_TUPLE_HPP #define BOOST_COMPUTE_DETAIL_MPL_VECTOR_TO_TUPLE_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { namespace mpl = boost::mpl; template struct mpl_vector_to_tuple_impl; #define BOOST_COMPUTE_PRINT_ELEM(z, n, unused) \ typename mpl::at_c::type #define BOOST_COMPUTE_VEC2TUP(z, n, unused) \ template \ struct mpl_vector_to_tuple_impl \ { \ typedef typename \ boost::tuple< \ BOOST_PP_ENUM(n, BOOST_COMPUTE_PRINT_ELEM, ~) \ > type; \ }; BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_VEC2TUP, ~) #undef BOOST_COMPUTE_VEC2TUP #undef BOOST_COMPUTE_PRINT_ELEM // meta-function which converts a mpl::vector to a boost::tuple template struct mpl_vector_to_tuple { typedef typename mpl_vector_to_tuple_impl< Vector, mpl::size::value >::type type; }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_MPL_VECTOR_TO_TUPLE_HPP compute-0.5/include/boost/compute/detail/nvidia_compute_capability.hpp000066400000000000000000000037431263566244600264660ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_NVIDIA_COMPUTE_CAPABILITY_HPP #define BOOST_COMPUTE_DETAIL_NVIDIA_COMPUTE_CAPABILITY_HPP #include #ifdef BOOST_COMPUTE_HAVE_HDR_CL_EXT #include #endif namespace boost { namespace compute { namespace detail { #ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV #define BOOST_COMPUTE_CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV #else #define BOOST_COMPUTE_CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 #endif #ifdef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV #define BOOST_COMPUTE_CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV #else #define BOOST_COMPUTE_CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 #endif inline void get_nvidia_compute_capability(const device &device, int &major, int &minor) { if(!device.supports_extension("cl_nv_device_attribute_query")){ major = minor = 0; return; } major = device.get_info(BOOST_COMPUTE_CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV); minor = device.get_info(BOOST_COMPUTE_CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV); } inline bool check_nvidia_compute_capability(const device &device, int major, int minor) { int actual_major, actual_minor; get_nvidia_compute_capability(device, actual_major, actual_minor); return actual_major > major || (actual_major == major && actual_minor >= minor); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_NVIDIA_COMPUTE_CAPABILITY_HPP compute-0.5/include/boost/compute/detail/parameter_cache.hpp000066400000000000000000000153351263566244600243620ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_PARAMETER_CACHE_HPP #define BOOST_COMPUTE_DETAIL_PARAMETER_CACHE_HPP #include #include #include #include #include #include #include #include #include #ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE #include #include #include #include #endif // BOOST_COMPUTE_USE_OFFLINE_CACHE namespace boost { namespace compute { namespace detail { class parameter_cache : boost::noncopyable { public: parameter_cache(const device &device) : m_dirty(false), m_device_name(device.name()) { #ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE // get offline cache file name (e.g. /home/user/.boost_compute/tune/device.json) m_file_name = make_file_name(); // load parameters from offline cache file (if it exists) if(boost::filesystem::exists(m_file_name)){ read_from_disk(); } #endif // BOOST_COMPUTE_USE_OFFLINE_CACHE } ~parameter_cache() { #ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE write_to_disk(); #endif // BOOST_COMPUTE_USE_OFFLINE_CACHE } void set(const std::string &object, const std::string ¶meter, uint_ value) { m_cache[std::make_pair(object, parameter)] = value; // set the dirty flag to true. this will cause the updated parameters // to be stored to disk. m_dirty = true; } uint_ get(const std::string &object, const std::string ¶meter, uint_ default_value) { std::map, uint_>::iterator iter = m_cache.find(std::make_pair(object, parameter)); if(iter != m_cache.end()){ return iter->second; } else { return default_value; } } static boost::shared_ptr get_global_cache(const device &device) { // device name -> parameter cache typedef std::map > cache_map; BOOST_COMPUTE_DETAIL_GLOBAL_STATIC(cache_map, caches, ((std::less()))); cache_map::iterator iter = caches.find(device.name()); if(iter == caches.end()){ boost::shared_ptr cache = boost::make_shared(device); caches.insert(iter, std::make_pair(device.name(), cache)); return cache; } else { return iter->second; } } private: #ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE // returns a string containing a cannoical device name static std::string cannonical_device_name(std::string name) { boost::algorithm::trim(name); std::replace(name.begin(), name.end(), ' ', '_'); std::replace(name.begin(), name.end(), '(', '_'); std::replace(name.begin(), name.end(), ')', '_'); return name; } // returns the boost.compute version string static std::string version_string() { char buf[32]; std::snprintf(buf, sizeof(buf), "%d.%d.%d", BOOST_COMPUTE_VERSION_MAJOR, BOOST_COMPUTE_VERSION_MINOR, BOOST_COMPUTE_VERSION_PATCH); return buf; } // returns the file path for the cached parameters std::string make_file_name() const { return detail::parameter_cache_path(true) + cannonical_device_name(m_device_name) + ".json"; } // store current parameters to disk void write_to_disk() { BOOST_ASSERT(!m_file_name.empty()); if(m_dirty){ // save current parameters to disk boost::property_tree::ptree pt; pt.put("header.device", m_device_name); pt.put("header.version", version_string()); typedef std::map, uint_> map_type; for(map_type::const_iterator iter = m_cache.begin(); iter != m_cache.end(); ++iter){ const std::pair &key = iter->first; pt.add(key.first + "." + key.second, iter->second); } write_json(m_file_name, pt); m_dirty = false; } } // load stored parameters from disk void read_from_disk() { BOOST_ASSERT(!m_file_name.empty()); m_cache.clear(); boost::property_tree::ptree pt; try { read_json(m_file_name, pt); } catch(boost::property_tree::json_parser::json_parser_error &e){ // no saved cache file, ignore return; } std::string stored_device; try { stored_device = pt.get("header.device"); } catch(boost::property_tree::ptree_bad_path&){ return; } std::string stored_version; try { stored_version = pt.get("header.version"); } catch(boost::property_tree::ptree_bad_path&){ return; } if(stored_device == m_device_name && stored_version == version_string()){ typedef boost::property_tree::ptree::const_iterator pt_iter; for(pt_iter iter = pt.begin(); iter != pt.end(); ++iter){ if(iter->first == "header"){ // skip header continue; } boost::property_tree::ptree child_pt = pt.get_child(iter->first); for(pt_iter child_iter = child_pt.begin(); child_iter != child_pt.end(); ++child_iter){ set(iter->first, child_iter->first, boost::lexical_cast(child_iter->second.data())); } } } m_dirty = false; } #endif // BOOST_COMPUTE_USE_OFFLINE_CACHE private: bool m_dirty; std::string m_device_name; std::string m_file_name; std::map, uint_> m_cache; }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_PARAMETER_CACHE_HPP compute-0.5/include/boost/compute/detail/path.hpp000066400000000000000000000040631263566244600222070ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_PATH_HPP #define BOOST_COMPUTE_DETAIL_PATH_HPP #include #include #include namespace boost { namespace compute { namespace detail { // Path delimiter symbol for the current OS. static const std::string& path_delim() { static const std::string delim = boost::filesystem::path("/").make_preferred().string(); return delim; } // Path to appdata folder. inline const std::string& appdata_path() { #ifdef WIN32 static const std::string appdata = detail::getenv("APPDATA") + path_delim() + "boost_compute"; #else static const std::string appdata = detail::getenv("HOME") + path_delim() + ".boost_compute"; #endif return appdata; } // Path to cached binaries. inline std::string program_binary_path(const std::string &hash, bool create = false) { std::string dir = detail::appdata_path() + path_delim() + hash.substr(0, 2) + path_delim() + hash.substr(2); if(create && !boost::filesystem::exists(dir)){ boost::filesystem::create_directories(dir); } return dir + path_delim(); } // Path to parameter caches. inline std::string parameter_cache_path(bool create = false) { const static std::string dir = appdata_path() + path_delim() + "tune"; if(create && !boost::filesystem::exists(dir)){ boost::filesystem::create_directories(dir); } return dir + path_delim(); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_PATH_HPP compute-0.5/include/boost/compute/detail/print_range.hpp000066400000000000000000000050341263566244600235620ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_PRINT_RANGE_HPP #define BOOST_COMPUTE_DETAIL_PRINT_RANGE_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template inline void print_range(InputIterator first, InputIterator last, command_queue &queue, typename boost::enable_if< is_buffer_iterator >::type* = 0) { typedef typename std::iterator_traits::value_type value_type; const size_t size = iterator_range_size(first, last); // copy values to temporary vector on the host std::vector tmp(size); ::boost::compute::copy(first, last, tmp.begin(), queue); // print values std::cout << "[ "; for(size_t i = 0; i < size; i++){ std::cout << tmp[i]; if(i != size - 1){ std::cout << ", "; } } std::cout << " ]" << std::endl; } template inline void print_range(InputIterator first, InputIterator last, command_queue &queue, typename boost::enable_if_c< !is_buffer_iterator::value >::type* = 0) { typedef typename std::iterator_traits::value_type value_type; const context &context = queue.get_context(); const size_t size = iterator_range_size(first, last); // copy values to temporary vector on the device ::boost::compute::vector tmp(size, context); ::boost::compute::copy(first, last, tmp.begin(), queue); print_range(tmp.begin(), tmp.end(), queue); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_PRINT_RANGE_HPP compute-0.5/include/boost/compute/detail/read_write_single_value.hpp000066400000000000000000000046531263566244600261420ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_READ_WRITE_SINGLE_VALUE_HPP #define BOOST_COMPUTE_DETAIL_READ_WRITE_SINGLE_VALUE_HPP #include #include #include #include namespace boost { namespace compute { namespace detail { // reads and returns a single value at index in the buffer template inline T read_single_value(const buffer &buffer, size_t index, command_queue &queue) { BOOST_ASSERT(index < buffer.size() / sizeof(T)); BOOST_ASSERT(buffer.get_context() == queue.get_context()); T value; queue.enqueue_read_buffer(buffer, sizeof(T) * index, sizeof(T), &value); return value; } // reads and returns a the first value in the buffer template inline T read_single_value(const buffer &buffer, command_queue &queue) { return read_single_value(buffer, 0, queue); } // writes a single value at index to the buffer template inline void write_single_value(const T &value, const buffer &buffer, size_t index, command_queue &queue) { BOOST_ASSERT(index < buffer.size() / sizeof(T)); BOOST_ASSERT(buffer.get_context() == queue.get_context()); queue.enqueue_write_buffer(buffer, index * sizeof(T), sizeof(T), &value); } // writes value to the first location in buffer template inline void write_single_value(const T &value, const buffer &buffer, command_queue &queue) { write_single_value(value, buffer, 0, queue); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_READ_WRITE_SINGLE_VALUE_HPP compute-0.5/include/boost/compute/detail/sha1.hpp000066400000000000000000000026311263566244600221060ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_SHA1_HPP #define BOOST_COMPUTE_DETAIL_SHA1_HPP #include #include #include namespace boost { namespace compute { namespace detail { // Accumulates SHA1 hash of the passed strings. class sha1 { public: sha1(const std::string &s = "") { if (!s.empty()) this->process(s); } sha1& process(const std::string &s) { h.process_bytes(s.c_str(), s.size()); return *this; } operator std::string() { unsigned int digest[5]; h.get_digest(digest); std::ostringstream buf; for(int i = 0; i < 5; ++i) buf << std::hex << std::setfill('0') << std::setw(8) << digest[i]; return buf.str(); } private: boost::uuids::detail::sha1 h; }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_SHA1_HPP compute-0.5/include/boost/compute/detail/variadic_macros.hpp000066400000000000000000000034541263566244600244040ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_VARIDAIC_MACROS_HPP #define BOOST_COMPUTE_DETAIL_VARIDAIC_MACROS_HPP #include #include #include #if BOOST_PP_VARIADICS == 1 # include #endif #ifdef BOOST_PP_VARIADIC_SIZE # define BOOST_COMPUTE_PP_VARIADIC_SIZE BOOST_PP_VARIADIC_SIZE #else # define BOOST_COMPUTE_PP_VARIADIC_SIZE(...) BOOST_COMPUTE_PP_VARIADIC_SIZE_I(__VA_ARGS__, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,) # define BOOST_COMPUTE_PP_VARIADIC_SIZE_I(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, e32, e33, e34, e35, e36, e37, e38, e39, e40, e41, e42, e43, e44, e45, e46, e47, e48, e49, e50, e51, e52, e53, e54, e55, e56, e57, e58, e59, e60, e61, e62, e63, size, ...) size #endif #define BOOST_COMPUTE_PP_TUPLE_SIZE(tuple) \ BOOST_COMPUTE_PP_VARIADIC_SIZE tuple #define BOOST_COMPUTE_PP_TUPLE_TO_SEQ(tuple) \ BOOST_PP_TUPLE_TO_SEQ(BOOST_COMPUTE_PP_TUPLE_SIZE(tuple), tuple) #endif // BOOST_COMPUTE_DETAIL_VARIDAIC_MACROS_HPP compute-0.5/include/boost/compute/detail/vendor.hpp000066400000000000000000000022111263566244600225410ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_VENDOR_HPP #define BOOST_COMPUTE_DETAIL_VENDOR_HPP #include #include namespace boost { namespace compute { namespace detail { // returns true if the device is an nvidia gpu inline bool is_nvidia_device(const device &device) { std::string nvidia("NVIDIA"); return device.vendor().compare(0, nvidia.size(), nvidia) == 0; } // returns true if the device is an amd cpu or gpu inline bool is_amd_device(const device &device) { return device.platform().vendor() == "Advanced Micro Devices, Inc."; } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_VENDOR_HPP compute-0.5/include/boost/compute/detail/work_size.hpp000066400000000000000000000023051263566244600232640ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DETAIL_WORK_SIZE_HPP #define BOOST_COMPUTE_DETAIL_WORK_SIZE_HPP #include namespace boost { namespace compute { namespace detail { // Given a total number of values (count), a number of values to // process per thread (vtp), and a number of threads to execute per // block (tpb), this function returns the global work size to be // passed to clEnqueueNDRangeKernel() for a 1D algorithm. inline size_t calculate_work_size(size_t count, size_t vpt, size_t tpb) { size_t work_size = std::ceil(float(count) / vpt); if(work_size % tpb != 0){ work_size += tpb - work_size % tpb; } return work_size; } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DETAIL_WORK_SIZE_HPP compute-0.5/include/boost/compute/device.hpp000066400000000000000000000422421263566244600212510ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_DEVICE_HPP #define BOOST_COMPUTE_DEVICE_HPP #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { class platform; /// \class device /// \brief A compute device. /// /// Typical compute devices include GPUs and multi-core CPUs. A list /// of all compute devices available on a platform can be obtained /// via the platform::devices() method. /// /// The default compute device for the system can be obtained with /// the system::default_device() method. For example: /// /// \snippet test/test_device.cpp default_gpu /// /// \see platform, context, command_queue class device { public: enum type { cpu = CL_DEVICE_TYPE_CPU, gpu = CL_DEVICE_TYPE_GPU, accelerator = CL_DEVICE_TYPE_ACCELERATOR }; /// Creates a null device object. device() : m_id(0) { } /// Creates a new device object for \p id. If \p retain is \c true, /// the reference count for the device will be incremented. explicit device(cl_device_id id, bool retain = true) : m_id(id) { #ifdef CL_VERSION_1_2 if(m_id && retain && is_subdevice()){ clRetainDevice(m_id); } #else (void) retain; #endif } /// Creates a new device object as a copy of \p other. device(const device &other) : m_id(other.m_id) { #ifdef CL_VERSION_1_2 if(m_id && is_subdevice()){ clRetainDevice(m_id); } #endif } /// Copies the device from \p other to \c *this. device& operator=(const device &other) { if(this != &other){ #ifdef CL_VERSION_1_2 if(m_id && is_subdevice()){ clReleaseDevice(m_id); } #endif m_id = other.m_id; #ifdef CL_VERSION_1_2 if(m_id && is_subdevice()){ clRetainDevice(m_id); } #endif } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new device object from \p other. device(device&& other) BOOST_NOEXCEPT : m_id(other.m_id) { other.m_id = 0; } /// Move-assigns the device from \p other to \c *this. device& operator=(device&& other) BOOST_NOEXCEPT { #ifdef CL_VERSION_1_2 if(m_id && is_subdevice()){ clReleaseDevice(m_id); } #endif m_id = other.m_id; other.m_id = 0; return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the device object. ~device() { #ifdef CL_VERSION_1_2 if(m_id && is_subdevice()){ BOOST_COMPUTE_ASSERT_CL_SUCCESS( clReleaseDevice(m_id) ); } #endif } /// Returns the ID of the device. cl_device_id id() const { return m_id; } /// Returns a reference to the underlying OpenCL device id. cl_device_id& get() const { return const_cast(m_id); } /// Returns the type of the device. cl_device_type type() const { return get_info(CL_DEVICE_TYPE); } #ifdef BOOST_COMPUTE_DOXYGEN_INVOKED /// Returns the platform for the device. platform platform() const; #else boost::compute::platform platform() const; #endif /// Returns the name of the device. std::string name() const { return get_info(CL_DEVICE_NAME); } /// Returns the name of the vendor for the device. std::string vendor() const { return get_info(CL_DEVICE_VENDOR); } /// Returns the device profile string. std::string profile() const { return get_info(CL_DEVICE_PROFILE); } /// Returns the device version string. std::string version() const { return get_info(CL_DEVICE_VERSION); } /// Returns the driver version string. std::string driver_version() const { return get_info(CL_DRIVER_VERSION); } /// Returns a list of extensions supported by the device. std::vector extensions() const { std::string extensions_string = get_info(CL_DEVICE_EXTENSIONS); std::vector extensions_vector; boost::split(extensions_vector, extensions_string, boost::is_any_of("\t "), boost::token_compress_on); return extensions_vector; } /// Returns \c true if the device supports the extension with /// \p name. bool supports_extension(const std::string &name) const { const std::vector extensions = this->extensions(); return std::find( extensions.begin(), extensions.end(), name) != extensions.end(); } /// Returns the number of address bits. uint_ address_bits() const { return get_info(CL_DEVICE_ADDRESS_BITS); } /// Returns the global memory size in bytes. ulong_ global_memory_size() const { return get_info(CL_DEVICE_GLOBAL_MEM_SIZE); } /// Returns the local memory size in bytes. ulong_ local_memory_size() const { return get_info(CL_DEVICE_LOCAL_MEM_SIZE); } /// Returns the clock frequency for the device's compute units. uint_ clock_frequency() const { return get_info(CL_DEVICE_MAX_CLOCK_FREQUENCY); } /// Returns the number of compute units in the device. uint_ compute_units() const { return get_info(CL_DEVICE_MAX_COMPUTE_UNITS); } /// \internal_ ulong_ max_memory_alloc_size() const { return get_info(CL_DEVICE_MAX_MEM_ALLOC_SIZE); } /// \internal_ size_t max_work_group_size() const { return get_info(CL_DEVICE_MAX_WORK_GROUP_SIZE); } /// \internal_ uint_ max_work_item_dimensions() const { return get_info(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS); } /// Returns the preferred vector width for type \c T. template uint_ preferred_vector_width() const { return 0; } /// Returns the profiling timer resolution in nanoseconds. size_t profiling_timer_resolution() const { return get_info(CL_DEVICE_PROFILING_TIMER_RESOLUTION); } /// Returns \c true if the device is a sub-device. bool is_subdevice() const { #if defined(CL_VERSION_1_2) try { return get_info(CL_DEVICE_PARENT_DEVICE) != 0; } catch(opencl_error&){ // the get_info() call above will throw if the device's opencl version // is less than 1.2 (in which case it can't be a sub-device). return false; } #else return false; #endif } /// Returns information about the device. /// /// For example, to get the number of compute units: /// \code /// device.get_info(CL_DEVICE_MAX_COMPUTE_UNITS); /// \endcode /// /// Alternatively, the template-specialized version can be used which /// automatically determines the result type: /// \code /// device.get_info(); /// \endcode /// /// \see_opencl_ref{clGetDeviceInfo} template T get_info(cl_device_info info) const { return detail::get_object_info(clGetDeviceInfo, m_id, info); } /// \overload template typename detail::get_object_info_type::type get_info() const; #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Partitions the device into multiple sub-devices according to /// \p properties. /// /// \opencl_version_warning{1,2} std::vector partition(const cl_device_partition_property *properties) const { // get sub-device count uint_ count = 0; int_ ret = clCreateSubDevices(m_id, properties, 0, 0, &count); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } // get sub-device ids std::vector ids(count); ret = clCreateSubDevices(m_id, properties, count, &ids[0], 0); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } // convert ids to device objects std::vector devices(count); for(size_t i = 0; i < count; i++){ devices[i] = device(ids[i], false); } return devices; } /// \opencl_version_warning{1,2} std::vector partition_equally(size_t count) const { cl_device_partition_property properties[] = { CL_DEVICE_PARTITION_EQUALLY, static_cast(count), 0 }; return partition(properties); } /// \opencl_version_warning{1,2} std::vector partition_by_counts(const std::vector &counts) const { std::vector properties; properties.push_back(CL_DEVICE_PARTITION_BY_COUNTS); for(size_t i = 0; i < counts.size(); i++){ properties.push_back( static_cast(counts[i])); } properties.push_back(CL_DEVICE_PARTITION_BY_COUNTS_LIST_END); properties.push_back(0); return partition(&properties[0]); } /// \opencl_version_warning{1,2} std::vector partition_by_affinity_domain(cl_device_affinity_domain domain) const { cl_device_partition_property properties[] = { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, static_cast(domain), 0 }; return partition(properties); } #endif // CL_VERSION_1_2 /// Returns \c true if the device is the same at \p other. bool operator==(const device &other) const { return m_id == other.m_id; } /// Returns \c true if the device is different from \p other. bool operator!=(const device &other) const { return m_id != other.m_id; } /// \internal_ bool check_version(int major, int minor) const { std::stringstream stream; stream << version(); int actual_major, actual_minor; stream.ignore(7); // 'OpenCL ' stream >> actual_major; stream.ignore(1); // '.' stream >> actual_minor; return actual_major > major || (actual_major == major && actual_minor >= minor); } private: cl_device_id m_id; }; /// \internal_ template<> inline uint_ device::preferred_vector_width() const { return get_info(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT); } /// \internal_ template<> inline uint_ device::preferred_vector_width() const { return get_info(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT); } /// \internal_ template<> inline uint_ device::preferred_vector_width() const { return get_info(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG); } /// \internal_ template<> inline uint_ device::preferred_vector_width() const { return get_info(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT); } /// \internal_ template<> inline uint_ device::preferred_vector_width() const { return get_info(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE); } /// \internal_ define get_info() specializations for device BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device, ((cl_uint, CL_DEVICE_ADDRESS_BITS)) ((bool, CL_DEVICE_AVAILABLE)) ((bool, CL_DEVICE_COMPILER_AVAILABLE)) ((bool, CL_DEVICE_ENDIAN_LITTLE)) ((bool, CL_DEVICE_ERROR_CORRECTION_SUPPORT)) ((cl_device_exec_capabilities, CL_DEVICE_EXECUTION_CAPABILITIES)) ((std::string, CL_DEVICE_EXTENSIONS)) ((cl_ulong, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE)) ((cl_device_mem_cache_type, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE)) ((cl_ulong, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE)) ((cl_ulong, CL_DEVICE_GLOBAL_MEM_SIZE)) ((bool, CL_DEVICE_IMAGE_SUPPORT)) ((size_t, CL_DEVICE_IMAGE2D_MAX_HEIGHT)) ((size_t, CL_DEVICE_IMAGE2D_MAX_WIDTH)) ((size_t, CL_DEVICE_IMAGE3D_MAX_DEPTH)) ((size_t, CL_DEVICE_IMAGE3D_MAX_HEIGHT)) ((size_t, CL_DEVICE_IMAGE3D_MAX_WIDTH)) ((cl_ulong, CL_DEVICE_LOCAL_MEM_SIZE)) ((cl_device_local_mem_type, CL_DEVICE_LOCAL_MEM_TYPE)) ((cl_uint, CL_DEVICE_MAX_CLOCK_FREQUENCY)) ((cl_uint, CL_DEVICE_MAX_COMPUTE_UNITS)) ((cl_uint, CL_DEVICE_MAX_CONSTANT_ARGS)) ((cl_ulong, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE)) ((cl_ulong, CL_DEVICE_MAX_MEM_ALLOC_SIZE)) ((size_t, CL_DEVICE_MAX_PARAMETER_SIZE)) ((cl_uint, CL_DEVICE_MAX_READ_IMAGE_ARGS)) ((cl_uint, CL_DEVICE_MAX_SAMPLERS)) ((size_t, CL_DEVICE_MAX_WORK_GROUP_SIZE)) ((cl_uint, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS)) ((std::vector, CL_DEVICE_MAX_WORK_ITEM_SIZES)) ((cl_uint, CL_DEVICE_MAX_WRITE_IMAGE_ARGS)) ((cl_uint, CL_DEVICE_MEM_BASE_ADDR_ALIGN)) ((cl_uint, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE)) ((std::string, CL_DEVICE_NAME)) ((cl_platform_id, CL_DEVICE_PLATFORM)) ((cl_uint, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR)) ((cl_uint, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT)) ((cl_uint, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT)) ((cl_uint, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG)) ((cl_uint, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT)) ((cl_uint, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE)) ((std::string, CL_DEVICE_PROFILE)) ((size_t, CL_DEVICE_PROFILING_TIMER_RESOLUTION)) ((cl_command_queue_properties, CL_DEVICE_QUEUE_PROPERTIES)) ((cl_device_fp_config, CL_DEVICE_SINGLE_FP_CONFIG)) ((cl_device_type, CL_DEVICE_TYPE)) ((std::string, CL_DEVICE_VENDOR)) ((cl_uint, CL_DEVICE_VENDOR_ID)) ((std::string, CL_DEVICE_VERSION)) ((std::string, CL_DRIVER_VERSION)) ) #ifdef CL_DEVICE_DOUBLE_FP_CONFIG BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device, ((cl_device_fp_config, CL_DEVICE_DOUBLE_FP_CONFIG)) ) #endif #ifdef CL_DEVICE_HALF_FP_CONFIG BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device, ((cl_device_fp_config, CL_DEVICE_HALF_FP_CONFIG)) ) #endif #ifdef CL_VERSION_1_1 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device, ((bool, CL_DEVICE_HOST_UNIFIED_MEMORY)) ((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR)) ((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT)) ((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT)) ((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG)) ((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT)) ((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE)) ((std::string, CL_DEVICE_OPENCL_C_VERSION)) ) #endif // CL_VERSION_1_1 #ifdef CL_VERSION_1_2 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device, ((std::string, CL_DEVICE_BUILT_IN_KERNELS)) ((bool, CL_DEVICE_LINKER_AVAILABLE)) ((cl_device_id, CL_DEVICE_PARENT_DEVICE)) ((cl_uint, CL_DEVICE_PARTITION_MAX_SUB_DEVICES)) ((cl_device_partition_property, CL_DEVICE_PARTITION_PROPERTIES)) ((cl_device_affinity_domain, CL_DEVICE_PARTITION_AFFINITY_DOMAIN)) ((cl_device_partition_property, CL_DEVICE_PARTITION_TYPE)) ((size_t, CL_DEVICE_PRINTF_BUFFER_SIZE)) ((bool, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC)) ((cl_uint, CL_DEVICE_REFERENCE_COUNT)) ) #endif // CL_VERSION_1_2 #ifdef CL_VERSION_2_0 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device, ((size_t, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE)) ((size_t, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE)) ((cl_uint, CL_DEVICE_MAX_ON_DEVICE_EVENTS)) ((cl_uint, CL_DEVICE_MAX_ON_DEVICE_QUEUES)) ((cl_uint, CL_DEVICE_MAX_PIPE_ARGS)) ((cl_uint, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS)) ((cl_uint, CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS)) ((cl_uint, CL_DEVICE_PIPE_MAX_PACKET_SIZE)) ((cl_uint, CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT)) ((cl_uint, CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT)) ((cl_uint, CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT)) ((cl_uint, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE)) ((cl_uint, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE)) ((cl_command_queue_properties, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES)) ((cl_device_svm_capabilities, CL_DEVICE_SVM_CAPABILITIES)) ((cl_uint, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT)) ((cl_uint, CL_DEVICE_IMAGE_PITCH_ALIGNMENT)) ) #endif // CL_VERSION_2_0 } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_DEVICE_HPP compute-0.5/include/boost/compute/event.hpp000066400000000000000000000237241263566244600211370ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_EVENT_HPP #define BOOST_COMPUTE_EVENT_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { /// \class event /// \brief An event corresponding to an operation on a compute device /// /// Event objects are used to track operations running on the device (such as /// kernel executions and memory transfers). Event objects are returned by the /// various \c enqueue_* methods of the command_queue class. /// /// Events can be used to synchronize operations between the host and the /// device. The \c wait() method will block execution on the host until the /// operation corresponding to the event on the device has completed. The /// status of the operation can also be polled with the \c status() method. /// /// Event objects can also be used for performance profiling. In order to use /// events for profiling, the command queue must be constructed with the /// \c CL_QUEUE_PROFILING_ENABLE flag. Then the \c duration() method can be /// used to retrieve the total duration of the operation on the device: /// \code /// std::cout << "time = " << e.duration().count() << "ms\n"; /// \endcode /// /// \see \ref future "future", wait_list class event { public: /// \internal_ enum execution_status { complete = CL_COMPLETE, running = CL_RUNNING, submitted = CL_SUBMITTED, queued = CL_QUEUED }; /// \internal_ enum command_type { ndrange_kernel = CL_COMMAND_NDRANGE_KERNEL, task = CL_COMMAND_TASK, native_kernel = CL_COMMAND_NATIVE_KERNEL, read_buffer = CL_COMMAND_READ_BUFFER, write_buffer = CL_COMMAND_WRITE_BUFFER, copy_buffer = CL_COMMAND_COPY_BUFFER, read_image = CL_COMMAND_READ_IMAGE, write_image = CL_COMMAND_WRITE_IMAGE, copy_image = CL_COMMAND_COPY_IMAGE, copy_image_to_buffer = CL_COMMAND_COPY_IMAGE_TO_BUFFER, copy_buffer_to_image = CL_COMMAND_COPY_BUFFER_TO_IMAGE, map_buffer = CL_COMMAND_MAP_BUFFER, map_image = CL_COMMAND_MAP_IMAGE, unmap_mem_object = CL_COMMAND_UNMAP_MEM_OBJECT, marker = CL_COMMAND_MARKER, aquire_gl_objects = CL_COMMAND_ACQUIRE_GL_OBJECTS, release_gl_object = CL_COMMAND_RELEASE_GL_OBJECTS #if defined(CL_VERSION_1_1) , read_buffer_rect = CL_COMMAND_READ_BUFFER_RECT, write_buffer_rect = CL_COMMAND_WRITE_BUFFER_RECT, copy_buffer_rect = CL_COMMAND_COPY_BUFFER_RECT #endif }; /// \internal_ enum profiling_info { profiling_command_queued = CL_PROFILING_COMMAND_QUEUED, profiling_command_submit = CL_PROFILING_COMMAND_SUBMIT, profiling_command_start = CL_PROFILING_COMMAND_START, profiling_command_end = CL_PROFILING_COMMAND_END }; /// Creates a null event object. event() : m_event(0) { } explicit event(cl_event event, bool retain = true) : m_event(event) { if(m_event && retain){ clRetainEvent(event); } } /// Makes a new event as a copy of \p other. event(const event &other) : m_event(other.m_event) { if(m_event){ clRetainEvent(m_event); } } /// Copies the event object from \p other to \c *this. event& operator=(const event &other) { if(this != &other){ if(m_event){ clReleaseEvent(m_event); } m_event = other.m_event; if(m_event){ clRetainEvent(m_event); } } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new event object from \p other. event(event&& other) BOOST_NOEXCEPT : m_event(other.m_event) { other.m_event = 0; } /// Move-assigns the event from \p other to \c *this. event& operator=(event&& other) BOOST_NOEXCEPT { if(m_event){ clReleaseEvent(m_event); } m_event = other.m_event; other.m_event = 0; return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the event object. ~event() { if(m_event){ BOOST_COMPUTE_ASSERT_CL_SUCCESS( clReleaseEvent(m_event) ); } } /// Returns a reference to the underlying OpenCL event object. cl_event& get() const { return const_cast(m_event); } /// Returns the status of the event. cl_int status() const { return get_info(CL_EVENT_COMMAND_EXECUTION_STATUS); } /// Returns the command type for the event. cl_command_type get_command_type() const { return get_info(CL_EVENT_COMMAND_TYPE); } /// Returns information about the event. /// /// \see_opencl_ref{clGetEventInfo} template T get_info(cl_event_info info) const { return detail::get_object_info(clGetEventInfo, m_event, info); } /// \overload template typename detail::get_object_info_type::type get_info() const; /// Returns profiling information for the event. /// /// \see event::duration() /// /// \see_opencl_ref{clGetEventProfilingInfo} template T get_profiling_info(cl_profiling_info info) const { return detail::get_object_info(clGetEventProfilingInfo, m_event, info); } /// Blocks until the actions corresponding to the event have /// completed. void wait() const { cl_int ret = clWaitForEvents(1, &m_event); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } } #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Registers a function to be called when the event status changes to /// \p status (by default CL_COMPLETE). The callback is passed the OpenCL /// event object, the event status, and a pointer to arbitrary user data. /// /// \see_opencl_ref{clSetEventCallback} /// /// \opencl_version_warning{1,1} void set_callback(void (BOOST_COMPUTE_CL_CALLBACK *callback)( cl_event event, cl_int status, void *user_data ), cl_int status = CL_COMPLETE, void *user_data = 0) { cl_int ret = clSetEventCallback(m_event, status, callback, user_data); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } } /// Registers a generic function to be called when the event status /// changes to \p status (by default \c CL_COMPLETE). /// /// The function specified by \p callback must be invokable with zero /// arguments (e.g. \c callback()). /// /// \opencl_version_warning{1,1} template void set_callback(Function callback, cl_int status = CL_COMPLETE) { set_callback( event_callback_invoker, status, new boost::function(callback) ); } #endif // CL_VERSION_1_1 /// Returns the total duration of the event from \p start to \p end. /// /// For example, to print the number of milliseconds the event took to /// execute: /// \code /// std::cout << event.duration().count() << " ms" << std::endl; /// \endcode /// /// \see event::get_profiling_info() template Duration duration(cl_profiling_info start = CL_PROFILING_COMMAND_START, cl_profiling_info end = CL_PROFILING_COMMAND_END) const { const ulong_ nanoseconds = get_profiling_info(end) - get_profiling_info(start); return detail::make_duration_from_nanoseconds(Duration(), nanoseconds); } /// Returns \c true if the event is the same as \p other. bool operator==(const event &other) const { return m_event == other.m_event; } /// Returns \c true if the event is different from \p other. bool operator!=(const event &other) const { return m_event != other.m_event; } /// \internal_ operator cl_event() const { return m_event; } /// \internal_ (deprecated) cl_int get_status() const { return status(); } private: #ifdef CL_VERSION_1_1 /// \internal_ static void BOOST_COMPUTE_CL_CALLBACK event_callback_invoker(cl_event, cl_int, void *user_data) { boost::function *callback = static_cast *>(user_data); (*callback)(); delete callback; } #endif // CL_VERSION_1_1 protected: cl_event m_event; }; /// \internal_ define get_info() specializations for event BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(event, ((cl_command_queue, CL_EVENT_COMMAND_QUEUE)) ((cl_command_type, CL_EVENT_COMMAND_TYPE)) ((cl_int, CL_EVENT_COMMAND_EXECUTION_STATUS)) ((cl_uint, CL_EVENT_REFERENCE_COUNT)) ) #ifdef CL_VERSION_1_1 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(event, ((cl_context, CL_EVENT_CONTEXT)) ) #endif } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_EVENT_HPP compute-0.5/include/boost/compute/exception.hpp000066400000000000000000000015321263566244600220050ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_EXCEPTION_HPP #define BOOST_COMPUTE_EXCEPTION_HPP /// \file /// /// Meta-header to include all Boost.Compute exception headers. #include #include #include #include #endif // BOOST_COMPUTE_EXCEPTION_HPP compute-0.5/include/boost/compute/exception/000077500000000000000000000000001263566244600212735ustar00rootroot00000000000000compute-0.5/include/boost/compute/exception/context_error.hpp000066400000000000000000000050251263566244600247030ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_EXCEPTION_CONTEXT_ERROR_HPP #define BOOST_COMPUTE_EXCEPTION_CONTEXT_ERROR_HPP #include namespace boost { namespace compute { class context; /// \class context_error /// \brief A run-time OpenCL context error. /// /// The context_error exception is thrown when the OpenCL context encounters /// an error condition. Boost.Compute is notified of these error conditions by /// registering an error handler when creating context objects (via the /// \c pfn_notify argument to the \c clCreateContext() function). /// /// This exception is different than the opencl_error exception which is thrown /// as a result of error caused when calling a single OpenCL API function. /// /// \see opencl_error class context_error : public std::exception { public: /// Creates a new context error exception object. context_error(const context *context, const char *errinfo, const void *private_info, size_t private_info_size) throw() : m_context(context), m_errinfo(errinfo), m_private_info(private_info), m_private_info_size(private_info_size) { } /// Destroys the context error object. ~context_error() throw() { } /// Returns a string with a description of the error. const char* what() const throw() { return m_errinfo; } /// Returns a pointer to the context object which generated the error /// notification. const context* get_context_ptr() const throw() { return m_context; } /// Returns a pointer to the private info memory block. const void* get_private_info_ptr() const throw() { return m_private_info; } /// Returns the size of the private info memory block. const size_t get_private_info_size() const throw() { return m_private_info_size; } private: const context *m_context; const char *m_errinfo; const void *m_private_info; size_t m_private_info_size; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_EXCEPTION_CONTEXT_ERROR_HPP compute-0.5/include/boost/compute/exception/no_device_found.hpp000066400000000000000000000024511263566244600251340ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_EXCEPTION_NO_DEVICE_FOUND_HPP #define BOOST_COMPUTE_EXCEPTION_NO_DEVICE_FOUND_HPP #include namespace boost { namespace compute { /// \class no_device_found /// \brief Exception thrown when no OpenCL device is found /// /// This exception is thrown when no valid OpenCL device can be found. /// /// \see opencl_error class no_device_found : public std::exception { public: /// Creates a new no_device_found exception object. no_device_found() throw() { } /// Destroys the no_device_found exception object. ~no_device_found() throw() { } /// Returns a string containing a human-readable error message. const char* what() const throw() { return "No OpenCL device found"; } }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_EXCEPTION_NO_DEVICE_FOUND_HPP compute-0.5/include/boost/compute/exception/opencl_error.hpp000066400000000000000000000150111263566244600244730ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_EXCEPTION_OPENCL_ERROR_HPP #define BOOST_COMPUTE_EXCEPTION_OPENCL_ERROR_HPP #include #include #include #include namespace boost { namespace compute { /// \class opencl_error /// \brief A run-time OpenCL error. /// /// The opencl_error class represents an error returned from an OpenCL /// function. /// /// \see context_error class opencl_error : public std::exception { public: /// Creates a new opencl_error exception object for \p error. explicit opencl_error(cl_int error) throw() : m_error(error), m_error_string(to_string(error)) { } /// Destroys the opencl_error object. ~opencl_error() throw() { } /// Returns the numeric error code. cl_int error_code() const throw() { return m_error; } /// Returns a string description of the error. std::string error_string() const throw() { return m_error_string; } /// Returns a C-string description of the error. const char* what() const throw() { return m_error_string.c_str(); } /// Static function which converts the numeric OpenCL error code \p error /// to a human-readable string. /// /// For example: /// \code /// std::cout << opencl_error::to_string(CL_INVALID_KERNEL_ARGS) << std::endl; /// \endcode /// /// Will print "Invalid Kernel Arguments". /// /// If the error code is unknown (e.g. not a valid OpenCL error), a string /// containing "Unknown OpenCL Error" along with the error number will be /// returned. static std::string to_string(cl_int error) { switch(error){ case CL_SUCCESS: return "Success"; case CL_DEVICE_NOT_FOUND: return "Device Not Found"; case CL_DEVICE_NOT_AVAILABLE: return "Device Not Available"; case CL_COMPILER_NOT_AVAILABLE: return "Compiler Not Available"; case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "Memory Object Allocation Failure"; case CL_OUT_OF_RESOURCES: return "Out of Resources"; case CL_OUT_OF_HOST_MEMORY: return "Out of Host Memory"; case CL_PROFILING_INFO_NOT_AVAILABLE: return "Profiling Information Not Available"; case CL_MEM_COPY_OVERLAP: return "Memory Copy Overlap"; case CL_IMAGE_FORMAT_MISMATCH: return "Image Format Mismatch"; case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "Image Format Not Supported"; case CL_BUILD_PROGRAM_FAILURE: return "Build Program Failure"; case CL_MAP_FAILURE: return "Map Failure"; case CL_INVALID_VALUE: return "Invalid Value"; case CL_INVALID_DEVICE_TYPE: return "Invalid Device Type"; case CL_INVALID_PLATFORM: return "Invalid Platform"; case CL_INVALID_DEVICE: return "Invalid Device"; case CL_INVALID_CONTEXT: return "Invalid Context"; case CL_INVALID_QUEUE_PROPERTIES: return "Invalid Queue Properties"; case CL_INVALID_COMMAND_QUEUE: return "Invalid Command Queue"; case CL_INVALID_HOST_PTR: return "Invalid Host Pointer"; case CL_INVALID_MEM_OBJECT: return "Invalid Memory Object"; case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "Invalid Image Format Descriptor"; case CL_INVALID_IMAGE_SIZE: return "Invalid Image Size"; case CL_INVALID_SAMPLER: return "Invalid Sampler"; case CL_INVALID_BINARY: return "Invalid Binary"; case CL_INVALID_BUILD_OPTIONS: return "Invalid Build Options"; case CL_INVALID_PROGRAM: return "Invalid Program"; case CL_INVALID_PROGRAM_EXECUTABLE: return "Invalid Program Executable"; case CL_INVALID_KERNEL_NAME: return "Invalid Kernel Name"; case CL_INVALID_KERNEL_DEFINITION: return "Invalid Kernel Definition"; case CL_INVALID_KERNEL: return "Invalid Kernel"; case CL_INVALID_ARG_INDEX: return "Invalid Argument Index"; case CL_INVALID_ARG_VALUE: return "Invalid Argument Value"; case CL_INVALID_ARG_SIZE: return "Invalid Argument Size"; case CL_INVALID_KERNEL_ARGS: return "Invalid Kernel Arguments"; case CL_INVALID_WORK_DIMENSION: return "Invalid Work Dimension"; case CL_INVALID_WORK_GROUP_SIZE: return "Invalid Work Group Size"; case CL_INVALID_WORK_ITEM_SIZE: return "Invalid Work Item Size"; case CL_INVALID_GLOBAL_OFFSET: return "Invalid Global Offset"; case CL_INVALID_EVENT_WAIT_LIST: return "Invalid Event Wait List"; case CL_INVALID_EVENT: return "Invalid Event"; case CL_INVALID_OPERATION: return "Invalid Operation"; case CL_INVALID_GL_OBJECT: return "Invalid GL Object"; case CL_INVALID_BUFFER_SIZE: return "Invalid Buffer Size"; case CL_INVALID_MIP_LEVEL: return "Invalid MIP Level"; case CL_INVALID_GLOBAL_WORK_SIZE: return "Invalid Global Work Size"; #ifdef CL_VERSION_1_2 case CL_COMPILE_PROGRAM_FAILURE: return "Compile Program Failure"; case CL_LINKER_NOT_AVAILABLE: return "Linker Not Available"; case CL_LINK_PROGRAM_FAILURE: return "Link Program Failure"; case CL_DEVICE_PARTITION_FAILED: return "Device Partition Failed"; case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: return "Kernel Argument Info Not Available"; case CL_INVALID_PROPERTY: return "Invalid Property"; case CL_INVALID_IMAGE_DESCRIPTOR: return "Invalid Image Descriptor"; case CL_INVALID_COMPILER_OPTIONS: return "Invalid Compiler Options"; case CL_INVALID_LINKER_OPTIONS: return "Invalid Linker Options"; case CL_INVALID_DEVICE_PARTITION_COUNT: return "Invalid Device Partition Count"; #endif // CL_VERSION_1_2 #ifdef CL_VERSION_2_0 case CL_INVALID_PIPE_SIZE: return "Invalid Pipe Size"; case CL_INVALID_DEVICE_QUEUE: return "Invalid Device Queue"; #endif default: { std::stringstream s; s << "Unknown OpenCL Error (" << error << ")"; return s.str(); } } } private: cl_int m_error; std::string m_error_string; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_EXCEPTION_OPENCL_ERROR_HPP compute-0.5/include/boost/compute/exception/unsupported_extension_error.hpp000066400000000000000000000042301263566244600277000ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_EXCEPTION_UNSUPPORTED_EXTENSION_ERROR_HPP #define BOOST_COMPUTE_EXCEPTION_UNSUPPORTED_EXTENSION_ERROR_HPP #include #include #include namespace boost { namespace compute { /// \class unsupported_extension_error /// \brief Exception thrown when attempting to use an unsupported /// OpenCL extension. /// /// This exception is thrown when the user attempts to use an OpenCL /// extension which is not supported on the platform and/or device. /// /// An example of this is attempting to use CL-GL sharing on a non-GPU /// device. /// /// \see opencl_error class unsupported_extension_error : public std::exception { public: /// Creates a new unsupported extension error exception object indicating /// that \p extension is not supported by the OpenCL platform or device. explicit unsupported_extension_error(const char *extension) throw() : m_extension(extension) { std::stringstream msg; msg << "OpenCL extension " << extension << " not supported"; m_error_string = msg.str(); } /// Destroys the unsupported extension error object. ~unsupported_extension_error() throw() { } /// Returns the name of the unsupported extension. std::string extension_name() const throw() { return m_extension; } /// Returns a string containing a human-readable error message containing /// the name of the unsupported exception. const char* what() const throw() { return m_error_string.c_str(); } private: std::string m_extension; std::string m_error_string; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_EXCEPTION_UNSUPPORTED_EXTENSION_ERROR_HPP compute-0.5/include/boost/compute/experimental/000077500000000000000000000000001263566244600217725ustar00rootroot00000000000000compute-0.5/include/boost/compute/experimental/clamp_range.hpp000066400000000000000000000026741263566244600247640ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_EXPERIMENTAL_CLAMP_RANGE_HPP #define BOOST_COMPUTE_EXPERIMENTAL_CLAMP_RANGE_HPP #include #include #include namespace boost { namespace compute { namespace experimental { template inline OutputIterator clamp_range(InputIterator first, InputIterator last, OutputIterator result, typename std::iterator_traits::value_type lo, typename std::iterator_traits::value_type hi, command_queue &queue) { using ::boost::compute::lambda::_1; using ::boost::compute::lambda::_2; using ::boost::compute::lambda::clamp; return ::boost::compute::transform( first, last, result, clamp(_1, lo, hi), queue ); } } // end experimental namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_EXPERIMENTAL_CLAMP_RANGE_HPP compute-0.5/include/boost/compute/experimental/malloc.hpp000066400000000000000000000026511263566244600237560ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_EXPERIMENTAL_MALLOC_HPP #define BOOST_COMPUTE_EXPERIMENTAL_MALLOC_HPP #include #include #include #include namespace boost { namespace compute { namespace experimental { // bring device_ptr into the experimental namespace using detail::device_ptr; template inline device_ptr malloc(std::size_t size, const context &context = system::default_context()) { buffer buf(context, size * sizeof(T)); clRetainMemObject(buf.get()); return device_ptr(buf); } inline device_ptr malloc(std::size_t size, const context &context = system::default_context()) { return malloc(size, context); } template inline void free(device_ptr &ptr) { clReleaseMemObject(ptr.get_buffer().get()); } } // end experimental namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_EXPERIMENTAL_MALLOC_HPP compute-0.5/include/boost/compute/experimental/sort_by_transform.hpp000066400000000000000000000037051263566244600262640ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_EXPERIMENTAL_SORT_BY_TRANSFORM_HPP #define BOOST_COMPUTE_EXPERIMENTAL_SORT_BY_TRANSFORM_HPP #include #include #include #include #include #include namespace boost { namespace compute { namespace experimental { template inline void sort_by_transform(Iterator first, Iterator last, Transform transform, Compare compare, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type value_type; typedef typename boost::compute::result_of::type key_type; size_t n = detail::iterator_range_size(first, last); if(n < 2){ return; } const context &context = queue.get_context(); ::boost::compute::vector keys(n, context); ::boost::compute::transform( first, last, keys.begin(), transform, queue ); ::boost::compute::sort_by_key( keys.begin(), keys.end(), first, compare, queue ); } } // end experimental namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_EXPERIMENTAL_SORT_BY_TRANSFORM_HPP compute-0.5/include/boost/compute/experimental/tabulate.hpp000066400000000000000000000024701263566244600243070ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_EXPERIMENTAL_TABULATE_HPP #define BOOST_COMPUTE_EXPERIMENTAL_TABULATE_HPP #include #include #include namespace boost { namespace compute { namespace experimental { template inline void tabulate(Iterator first, Iterator last, UnaryFunction function, command_queue &queue) { size_t n = detail::iterator_range_size(first, last); ::boost::compute::transform( ::boost::compute::make_counting_iterator(0), ::boost::compute::make_counting_iterator(n), first, function, queue ); } } // end experimental namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_EXPERIMENTAL_TABULATE_HPP compute-0.5/include/boost/compute/function.hpp000066400000000000000000000270251263566244600216410ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTION_HPP #define BOOST_COMPUTE_FUNCTION_HPP #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template class invoked_function { public: typedef ResultType result_type; BOOST_STATIC_CONSTANT( size_t, arity = boost::tuples::length::value ); invoked_function(const std::string &name, const std::string &source) : m_name(name), m_source(source) { } invoked_function(const std::string &name, const std::string &source, const std::map &definitions) : m_name(name), m_source(source), m_definitions(definitions) { } invoked_function(const std::string &name, const std::string &source, const ArgTuple &args) : m_name(name), m_source(source), m_args(args) { } invoked_function(const std::string &name, const std::string &source, const std::map &definitions, const ArgTuple &args) : m_name(name), m_source(source), m_definitions(definitions), m_args(args) { } std::string name() const { return m_name; } std::string source() const { return m_source; } const std::map& definitions() const { return m_definitions; } const ArgTuple& args() const { return m_args; } private: std::string m_name; std::string m_source; std::map m_definitions; ArgTuple m_args; }; } // end detail namespace /// \class function /// \brief A function object. template class function { public: /// \internal_ typedef typename boost::function_traits::result_type result_type; /// \internal_ BOOST_STATIC_CONSTANT( size_t, arity = boost::function_traits::arity ); /// \internal_ typedef Signature signature; /// Creates a new function object with \p name. function(const std::string &name) : m_name(name) { } /// Destroys the function object. ~function() { } /// \internal_ std::string name() const { return m_name; } /// \internal_ void set_source(const std::string &source) { m_source = source; } /// \internal_ std::string source() const { return m_source; } /// \internal_ void define(std::string name, std::string value = std::string()) { m_definitions[name] = value; } /// \internal_ detail::invoked_function > operator()() const { BOOST_STATIC_ASSERT_MSG( arity == 0, "Non-nullary function invoked with zero arguments" ); return detail::invoked_function >( m_name, m_source, m_definitions ); } /// \internal_ template detail::invoked_function > operator()(const Arg1 &arg1) const { BOOST_STATIC_ASSERT_MSG( arity == 1, "Non-unary function invoked one argument" ); return detail::invoked_function >( m_name, m_source, m_definitions, boost::make_tuple(arg1) ); } /// \internal_ template detail::invoked_function > operator()(const Arg1 &arg1, const Arg2 &arg2) const { BOOST_STATIC_ASSERT_MSG( arity == 2, "Non-binary function invoked with two arguments" ); return detail::invoked_function >( m_name, m_source, m_definitions, boost::make_tuple(arg1, arg2) ); } /// \internal_ template detail::invoked_function > operator()(const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) const { BOOST_STATIC_ASSERT_MSG( arity == 3, "Non-ternary function invoked with two arguments" ); return detail::invoked_function >( m_name, m_source, m_definitions, boost::make_tuple(arg1, arg2, arg3) ); } private: std::string m_name; std::string m_source; std::map m_definitions; }; /// Creates a function object given its \p name and \p source. /// /// \param name The function name. /// \param source The function source code. /// /// \see BOOST_COMPUTE_FUNCTION() template inline function make_function_from_source(const std::string &name, const std::string &source) { function f(name); f.set_source(source); return f; } namespace detail { // given a string containing the arguments declaration for a function // like: "(int a, const float b)", returns a vector containing the name // of each argument (e.g. ["a", "b"]). inline std::vector parse_argument_names(const char *arguments) { BOOST_ASSERT_MSG( arguments[0] == '(' && arguments[std::strlen(arguments)-1] == ')', "Arguments should start and end with parentheses" ); std::vector args; size_t last_space = 0; size_t skip_comma = 0; for(size_t i = 1; i < std::strlen(arguments) - 2; i++){ const char c = arguments[i]; if(c == ' '){ last_space = i; } else if(c == ',' && !skip_comma){ std::string name( arguments + last_space + 1, i - last_space - 1 ); args.push_back(name); } else if(c == '<'){ skip_comma++; } else if(c == '>'){ skip_comma--; } } std::string last_argument( arguments + last_space + 1, std::strlen(arguments) - last_space - 2 ); args.push_back(last_argument); return args; } struct signature_argument_inserter { signature_argument_inserter(std::stringstream &s_, const char *arguments, size_t last) : s(s_) { n = 0; m_last = last; m_argument_names = parse_argument_names(arguments); BOOST_ASSERT_MSG( m_argument_names.size() == last, "Wrong number of arguments" ); } template void operator()(const T*) { s << type_name() << " " << m_argument_names[n]; if(n+1 < m_last){ s << ", "; } n++; } size_t n; size_t m_last; std::stringstream &s; std::vector m_argument_names; }; template inline std::string make_function_declaration(const char *name, const char *arguments) { typedef typename boost::function_traits::result_type result_type; typedef typename boost::function_types::parameter_types::type parameter_types; typedef typename mpl::size::type arity_type; std::stringstream s; s << "inline " << type_name() << " " << name; s << "("; if(arity_type::value > 0){ signature_argument_inserter i(s, arguments, arity_type::value); mpl::for_each< typename mpl::transform >::type>(i); } s << ")"; return s.str(); } struct argument_list_inserter { argument_list_inserter(std::stringstream &s_, const char first, size_t last) : s(s_) { n = 0; m_last = last; m_name = first; } template void operator()(const T*) { s << type_name() << " " << m_name++; if(n+1 < m_last){ s << ", "; } n++; } size_t n; size_t m_last; char m_name; std::stringstream &s; }; template inline std::string generate_argument_list(const char first = 'a') { typedef typename boost::function_types::parameter_types::type parameter_types; typedef typename mpl::size::type arity_type; std::stringstream s; s << '('; if(arity_type::value > 0){ argument_list_inserter i(s, first, arity_type::value); mpl::for_each< typename mpl::transform >::type>(i); } s << ')'; return s.str(); } // used by the BOOST_COMPUTE_FUNCTION() macro to create a function // with the given signature, name, arguments, and source. template inline function make_function_impl(const char *name, const char *arguments, const char *source) { std::stringstream s; s << make_function_declaration(name, arguments); s << source; return make_function_from_source(name, s.str()); } } // end detail namespace } // end compute namespace } // end boost namespace /// Creates a function object with \p name and \p source. /// /// \param return_type The return type for the function. /// \param name The name of the function. /// \param arguments A list of arguments for the function. /// \param source The OpenCL C source code for the function. /// /// The function declaration and signature are automatically created using /// the \p return_type, \p name, and \p arguments macro parameters. /// /// The source code for the function is interpreted as OpenCL C99 source code /// which is stringified and passed to the OpenCL compiler when the function /// is invoked. /// /// For example, to create a function which squares a number: /// \code /// BOOST_COMPUTE_FUNCTION(float, square, (float x), /// { /// return x * x; /// }); /// \endcode /// /// And to create a function which sums two numbers: /// \code /// BOOST_COMPUTE_FUNCTION(int, sum_two, (int x, int y), /// { /// return x + y; /// }); /// \endcode /// /// \see BOOST_COMPUTE_CLOSURE() #ifdef BOOST_COMPUTE_DOXYGEN_INVOKED #define BOOST_COMPUTE_FUNCTION(return_type, name, arguments, source) #else #define BOOST_COMPUTE_FUNCTION(return_type, name, arguments, ...) \ ::boost::compute::function name = \ ::boost::compute::detail::make_function_impl( \ #name, #arguments, #__VA_ARGS__ \ ) #endif #endif // BOOST_COMPUTE_FUNCTION_HPP compute-0.5/include/boost/compute/functional.hpp000066400000000000000000000024771263566244600221620ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_HPP #define BOOST_COMPUTE_FUNCTIONAL_HPP /// \file /// /// Meta-header to include all Boost.Compute functional headers. #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #endif // BOOST_COMPUTE_FUNCTIONAL_HPP compute-0.5/include/boost/compute/functional/000077500000000000000000000000001263566244600214375ustar00rootroot00000000000000compute-0.5/include/boost/compute/functional/as.hpp000066400000000000000000000022601263566244600225530ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_AS_HPP #define BOOST_COMPUTE_FUNCTIONAL_AS_HPP namespace boost { namespace compute { namespace detail { template struct invoked_as { invoked_as(const Arg &arg) : m_arg(arg) { } Arg m_arg; }; } // end detail namespace /// The \ref as function converts its argument to type \c T (similar to /// reinterpret_cast). /// /// \see \ref convert "convert" template struct as { typedef T result_type; /// \internal_ template detail::invoked_as operator()(const Arg &arg) const { return detail::invoked_as(arg); } }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_AS_HPP compute-0.5/include/boost/compute/functional/atomic.hpp000066400000000000000000000055671263566244600234410ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_ATOMIC_HPP #define BOOST_COMPUTE_FUNCTIONAL_ATOMIC_HPP #include #include #ifndef BOOST_COMPUTE_DOXYGEN_INVOKED #ifdef CL_VERSION_1_1 #define BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "atomic_" #else #define BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "atom_" #endif #endif // BOOST_COMPUTE_DOXYGEN_INVOKED namespace boost { namespace compute { template class atomic_add : public function { public: atomic_add() : function(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "add") { } }; template class atomic_sub : public function { public: atomic_sub() : function(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "sub") { } }; template class atomic_xchg : public function { public: atomic_xchg() : function(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "xchg") { } }; template class atomic_inc : public function { public: atomic_inc() : function(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "inc") { } }; template class atomic_dec : public function { public: atomic_dec() : function(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "dec") { } }; template class atomic_cmpxchg : public function { public: atomic_cmpxchg() : function(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "cmpxchg") { } }; template class atomic_max : public function { public: atomic_max() : function(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "max") { } }; template class atomic_min : public function { public: atomic_min() : function(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "min") { } }; template class atomic_and : public function { public: atomic_and() : function(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "and") { } }; template class atomic_or : public function { public: atomic_or() : function(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "or") { } }; template class atomic_xor : public function { public: atomic_xor() : function(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "xor") { } }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_ATOMIC_HPP compute-0.5/include/boost/compute/functional/bind.hpp000066400000000000000000000160761263566244600230760ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_BIND_HPP #define BOOST_COMPUTE_FUNCTIONAL_BIND_HPP #include #include #include #include #include namespace boost { namespace compute { namespace placeholders { /// \internal_ template struct placeholder : boost::integral_constant { placeholder() { } }; placeholder<0> const _1; placeholder<1> const _2; } // end placeholders namespace /// Meta-function returning \c true if \c T is a placeholder type. template struct is_placeholder : boost::false_type { }; /// \internal_ template struct is_placeholder > : boost::true_type { }; namespace detail { template struct invoked_bound_function { invoked_bound_function(Function f, BoundArgs bound_args, Args args) : m_function(f), m_bound_args(bound_args), m_args(args) { } // meta-function returning true if the N'th argument is a placeholder template struct is_placeholder_arg { typedef typename boost::tuples::element::type nth_bound_arg; typedef typename is_placeholder::type type; static const bool value = is_placeholder::value; }; template struct get_arg_type { typedef Arg type; }; template struct get_arg_type > { typedef typename boost::tuples::element::type type; }; // meta-function returning the type of the N'th argument when invoked template struct get_nth_arg_type { typedef typename boost::tuples::element::type nth_bound_arg; typedef typename get_arg_type::type type; }; template typename get_nth_arg_type::type get_nth_arg( typename boost::enable_if_c::value>::type* = 0 ) const { typedef typename boost::tuples::element::type nth_bound_arg; return boost::get(m_args); } template typename get_nth_arg_type::type get_nth_arg( typename boost::disable_if_c::value>::type* = 0 ) const { return boost::get(m_bound_args); } Function m_function; BoundArgs m_bound_args; Args m_args; }; template inline meta_kernel& apply_invoked_bound_function( meta_kernel &k, const invoked_bound_function &expr, typename boost::enable_if_c< boost::tuples::length::value == 1 >::type* = 0 ) { return k << expr.m_function(expr.template get_nth_arg<0>()); } template inline meta_kernel& apply_invoked_bound_function( meta_kernel &k, const invoked_bound_function &expr, typename boost::enable_if_c< boost::tuples::length::value == 2 >::type* = 0 ) { return k << expr.m_function(expr.template get_nth_arg<0>(), expr.template get_nth_arg<1>()); } template inline meta_kernel& apply_invoked_bound_function( meta_kernel &k, const invoked_bound_function &expr, typename boost::enable_if_c< boost::tuples::length::value == 3 >::type* = 0 ) { return k << expr.m_function(expr.template get_nth_arg<0>(), expr.template get_nth_arg<1>(), expr.template get_nth_arg<2>()); } template inline meta_kernel& operator<<( meta_kernel &k, const invoked_bound_function &expr ) { return apply_invoked_bound_function(k, expr); } template struct bound_function { typedef int result_type; bound_function(Function f, BoundArgs args) : m_function(f), m_args(args) { } template detail::invoked_bound_function< Function, BoundArgs, boost::tuple > operator()(const Arg1 &arg1) const { return detail::invoked_bound_function< Function, BoundArgs, boost::tuple >(m_function, m_args, boost::make_tuple(arg1)); } template detail::invoked_bound_function< Function, BoundArgs, boost::tuple > operator()(const Arg1 &arg1, const Arg2 &arg2) const { return detail::invoked_bound_function< Function, BoundArgs, boost::tuple >(m_function, m_args, boost::make_tuple(arg1, arg2)); } Function m_function; BoundArgs m_args; }; } // end detail namespace #if !defined(BOOST_COMPUTE_NO_VARIADIC_TEMPLATES) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Returns a function wrapper which invokes \p f with \p args when called. /// /// For example, to generate a unary function object which returns \c true /// when its argument is less than \c 7: /// \code /// using boost::compute::less; /// using boost::compute::placeholders::_1; /// /// auto less_than_seven = boost::compute::bind(less(), _1, 7); /// \endcode template inline detail::bound_function > bind(F f, Args... args) { typedef typename boost::tuple ArgsTuple; return detail::bound_function(f, boost::make_tuple(args...)); } #else template inline detail::bound_function > bind(F f, A1 a1) { typedef typename boost::tuple Args; return detail::bound_function(f, boost::make_tuple(a1)); } template inline detail::bound_function > bind(F f, A1 a1, A2 a2) { typedef typename boost::tuple Args; return detail::bound_function(f, boost::make_tuple(a1, a2)); } template inline detail::bound_function > bind(F f, A1 a1, A2 a2, A3 a3) { typedef typename boost::tuple Args; return detail::bound_function(f, boost::make_tuple(a1, a2, a3)); } #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_BIND_HPP compute-0.5/include/boost/compute/functional/common.hpp000066400000000000000000000021351263566244600234410ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_COMMON_HPP #define BOOST_COMPUTE_FUNCTIONAL_COMMON_HPP #include namespace boost { namespace compute { BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(clamp, T (T, T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(degrees, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(radians, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(sign, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(smoothstep, T (T, T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(step, T (T, T), class T) } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_COMMON_HPP compute-0.5/include/boost/compute/functional/convert.hpp000066400000000000000000000023161263566244600236320ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_CONVERT_HPP #define BOOST_COMPUTE_FUNCTIONAL_CONVERT_HPP namespace boost { namespace compute { namespace detail { template struct invoked_convert { invoked_convert(const Arg &arg) : m_arg(arg) { } Arg m_arg; }; } // end detail namespace /// The \ref convert function converts its argument to type \c T (similar to /// static_cast). /// /// \see \ref as "as" template struct convert { typedef T result_type; /// \internal_ template detail::invoked_convert operator()(const Arg &arg) const { return detail::invoked_convert(arg); } }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_CONVERT_HPP compute-0.5/include/boost/compute/functional/detail/000077500000000000000000000000001263566244600227015ustar00rootroot00000000000000compute-0.5/include/boost/compute/functional/detail/macros.hpp000066400000000000000000000023501263566244600246760ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_MACROS_HPP #define BOOST_COMPUTE_FUNCTIONAL_MACROS_HPP #include #include #include #define BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(name, signature, template_args) \ template \ class name : public function \ { \ public: \ (name)() : function(BOOST_PP_STRINGIZE(name)) { } \ }; #define BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(name, signature, template_args) \ template \ class BOOST_PP_CAT(name, _) : public function \ { \ public: \ BOOST_PP_CAT(name, _)() : function(BOOST_PP_STRINGIZE(name)) { } \ }; #endif // BOOST_COMPUTE_FUNCTIONAL_MACROS_HPP compute-0.5/include/boost/compute/functional/detail/nvidia_ballot.hpp000066400000000000000000000026331263566244600262250ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_DETAIL_NVIDIA_BALLOT_HPP #define BOOST_COMPUTE_FUNCTIONAL_DETAIL_NVIDIA_BALLOT_HPP #include #include namespace boost { namespace compute { namespace detail { template class nvidia_ballot : public function { public: nvidia_ballot() : function("nvidia_ballot") { this->set_source( "inline uint nvidia_ballot(const uint x)\n" "{\n" " uint result;\n" " asm volatile(\n" " \"setp.ne.u32 %%p1, %1, 0;\"\n" " \"vote.ballot.b32 %0, %%p1;\"\n" " : \"=r\"(result)\n" " : \"r\"(x)\n" " );\n" " return result;\n" "}\n" ); } }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_DETAIL_NVIDIA_BALLOT_HPP compute-0.5/include/boost/compute/functional/detail/nvidia_popcount.hpp000066400000000000000000000023121263566244600266110ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_DETAIL_NVIDIA_POPCOUNT_HPP #define BOOST_COMPUTE_FUNCTIONAL_DETAIL_NVIDIA_POPCOUNT_HPP #include namespace boost { namespace compute { namespace detail { template class nvidia_popcount : public function { public: nvidia_popcount() : function("nvidia_popcount") { this->set_source( "inline uint nvidia_popcount(const uint x)\n" "{\n" " uint count;\n" " asm(\"popc.b32 %0, %1;\" : \"=r\"(count) : \"r\"(x));\n" " return count;\n" "}\n" ); } }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_DETAIL_NVIDIA_POPCOUNT_HPP compute-0.5/include/boost/compute/functional/detail/unpack.hpp000066400000000000000000000101561263566244600246760ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_DETAIL_UNPACK_HPP #define BOOST_COMPUTE_FUNCTIONAL_DETAIL_UNPACK_HPP #include #include #include #include #include namespace boost { namespace compute { namespace detail { template struct invoked_unpacked { invoked_unpacked(const Function &f, const Arg &arg) : m_function(f), m_arg(arg) { } Function m_function; Arg m_arg; }; template inline meta_kernel& operator<<(meta_kernel &k, const invoked_unpacked &expr); template inline meta_kernel& operator<<(meta_kernel &k, const invoked_unpacked &expr) { return k << expr.m_function(get<0>()(expr.m_arg)); } template inline meta_kernel& operator<<(meta_kernel &k, const invoked_unpacked &expr) { return k << expr.m_function(get<0>()(expr.m_arg), get<1>()(expr.m_arg)); } template inline meta_kernel& operator<<(meta_kernel &k, const invoked_unpacked &expr) { return k << expr.m_function(get<0>()(expr.m_arg), get<1>()(expr.m_arg), get<2>()(expr.m_arg)); } template struct unpacked { template struct aggregate_length { BOOST_STATIC_CONSTANT(size_t, value = boost::tuples::length::value); }; template struct aggregate_length >::type> { BOOST_STATIC_CONSTANT(size_t, value = vector_size::value); }; template struct result_impl {}; template struct result_impl { typedef typename detail::get_result_type<0, TupleArg>::type T1; typedef typename boost::compute::result_of::type type; }; template struct result_impl { typedef typename detail::get_result_type<0, TupleArg>::type T1; typedef typename detail::get_result_type<1, TupleArg>::type T2; typedef typename boost::compute::result_of::type type; }; template struct result_impl { typedef typename detail::get_result_type<0, TupleArg>::type T1; typedef typename detail::get_result_type<1, TupleArg>::type T2; typedef typename detail::get_result_type<2, TupleArg>::type T3; typedef typename boost::compute::result_of::type type; }; template struct result {}; template struct result { typedef typename result_impl::value>::type type; }; unpacked(const Function &f) : m_function(f) { } template detail::invoked_unpacked< Function, Arg, aggregate_length::value > operator()(const Arg &arg) const { return detail::invoked_unpacked< Function, Arg, aggregate_length::value >(m_function, arg); } Function m_function; }; template inline unpacked unpack(const Function &f) { return unpacked(f); } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_DETAIL_UNPACK_HPP compute-0.5/include/boost/compute/functional/field.hpp000066400000000000000000000040611263566244600232340ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_FIELD_HPP #define BOOST_COMPUTE_FUNCTIONAL_FIELD_HPP #include namespace boost { namespace compute { namespace detail { template struct invoked_field { typedef T result_type; invoked_field(const Arg &arg, const std::string &field) : m_arg(arg), m_field(field) { } Arg m_arg; std::string m_field; }; } // end detail namespace /// Returns the named field from a value. /// /// The template-type \c T specifies the field's value type. Note /// that the value type must match the actual type of the field /// otherwise runtime compilation or logic errors may occur. /// /// For example, to access the \c second field in a /// \c std::pair object: /// \code /// field("second"); /// \endcode /// /// This can also be used with vector types to access individual /// components as well as perform swizzle operations. /// /// For example, to access the first and third components of an /// \c int vector type (e.g. \c int4): /// \code /// field("xz"); /// \endcode /// /// \see \ref get "get" template class field { public: /// Result type. typedef T result_type; /// Creates a new field functor with \p field. field(const std::string &field) : m_field(field) { } /// \internal_ template detail::invoked_field operator()(const Arg &arg) const { return detail::invoked_field(arg, m_field); } private: std::string m_field; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_FIELD_HPP compute-0.5/include/boost/compute/functional/geometry.hpp000066400000000000000000000026471263566244600240140ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_GEOMETRY_HPP #define BOOST_COMPUTE_FUNCTIONAL_GEOMETRY_HPP #include #include namespace boost { namespace compute { BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(cross, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(dot, typename scalar_type::type (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(distance, typename scalar_type::type (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fast_distance, typename scalar_type::type (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(length, typename scalar_type::type (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fast_length, typename scalar_type::type (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(normalize, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fast_normalize, T (T), class T) } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_GEOMETRY_HPP compute-0.5/include/boost/compute/functional/get.hpp000066400000000000000000000035331263566244600227330ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_GET_HPP #define BOOST_COMPUTE_FUNCTIONAL_GET_HPP #include #include #include namespace boost { namespace compute { namespace detail { // meta-function returning the result type for get() template struct get_result_type { typedef typename scalar_type::type type; }; template struct invoked_get { typedef typename get_result_type::type result_type; invoked_get(const Arg &arg) : m_arg(arg) { } Arg m_arg; }; } // end detail namespace /// Returns the \c N'th element of an aggregate type (e.g. scalarN, /// pair, tuple, etc.). /// /// \see \ref field "field" template struct get { /// \internal_ template struct result; /// \internal_ template struct result { typedef typename detail::get_result_type::type type; }; template detail::invoked_get< N, Arg, typename boost::remove_cv::type > operator()(const Arg &arg) const { typedef typename boost::remove_cv::type T; return detail::invoked_get(arg); } }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_GET_HPP compute-0.5/include/boost/compute/functional/hash.hpp000066400000000000000000000050151263566244600230740ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_HASH_HPP #define BOOST_COMPUTE_FUNCTIONAL_HASH_HPP #include #include namespace boost { namespace compute { namespace detail { template std::string make_hash_function_name() { return std::string("boost_hash_") + type_name(); } template inline std::string make_hash_function_source() { std::stringstream source; source << "inline ulong " << make_hash_function_name() << "(const " << type_name() << " x)\n" << "{\n" // note we reinterpret the argument as a 32-bit uint and // then promote it to a 64-bit ulong for the result type << " ulong a = as_uint(x);\n" << " a = (a ^ 61) ^ (a >> 16);\n" << " a = a + (a << 3);\n" << " a = a ^ (a >> 4);\n" << " a = a * 0x27d4eb2d;\n" << " a = a ^ (a >> 15);\n" << " return a;\n" << "}\n"; return source.str(); } template struct hash_impl { typedef Key argument_type; typedef ulong_ result_type; hash_impl() : m_function("") { m_function = make_function_from_source( make_hash_function_name(), make_hash_function_source() ); } template invoked_function > operator()(const Arg &arg) const { return m_function(arg); } function m_function; }; } // end detail namespace /// The hash function returns a hash value for the input value. /// /// The return type is \c ulong_ (the OpenCL unsigned long type). template struct hash; /// \internal_ template<> struct hash : detail::hash_impl { }; /// \internal_ template<> struct hash : detail::hash_impl { }; /// \internal_ template<> struct hash : detail::hash_impl { }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_HASH_HPP compute-0.5/include/boost/compute/functional/identity.hpp000066400000000000000000000030211263566244600237750ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_IDENTITY_HPP #define BOOST_COMPUTE_FUNCTIONAL_IDENTITY_HPP namespace boost { namespace compute { namespace detail { template struct invoked_identity { typedef T result_type; invoked_identity(const Arg &arg) : m_arg(arg) { } Arg m_arg; }; } // end detail namespace /// Identity function which simply returns its input. /// /// For example, to directly copy values using the transform() algorithm: /// \code /// transform(input.begin(), input.end(), output.begin(), identity(), queue); /// \endcode /// /// \see \ref as "as", \ref convert "convert" template class identity { public: /// Identity function result type. typedef T result_type; /// Creates a new identity function. identity() { } /// \internal_ template detail::invoked_identity operator()(const Arg &arg) const { return detail::invoked_identity(arg); } }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_IDENTITY_HPP compute-0.5/include/boost/compute/functional/integer.hpp000066400000000000000000000022301263566244600236020ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_INTEGER_HPP #define BOOST_COMPUTE_FUNCTIONAL_INTEGER_HPP #include namespace boost { namespace compute { BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(abs, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(abs_diff, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(add_sat, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(hadd, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(rhadd, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(max, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(min, T (T, T), class T) } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_INTEGER_HPP compute-0.5/include/boost/compute/functional/logical.hpp000066400000000000000000000111631263566244600235640ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_LOGICAL_HPP #define BOOST_COMPUTE_FUNCTIONAL_LOGICAL_HPP namespace boost { namespace compute { namespace detail { template class invoked_unary_negate_function { public: typedef int result_type; invoked_unary_negate_function(const Predicate &pred, const Expr &expr) : m_pred(pred), m_expr(expr) { } Predicate pred() const { return m_pred; } Expr expr() const { return m_expr; } private: Predicate m_pred; Expr m_expr; }; template class invoked_binary_negate_function { public: typedef int result_type; invoked_binary_negate_function(const Predicate &pred, const Expr1 &expr1, const Expr2 &expr2) : m_pred(pred), m_expr1(expr1), m_expr2(expr2) { } Predicate pred() const { return m_pred; } Expr1 expr1() const { return m_expr1; } Expr2 expr2() const { return m_expr2; } private: Predicate m_pred; Expr1 m_expr1; Expr2 m_expr2; }; } // end detail namespace /// \internal_ template struct unary_function { typedef Arg argument_type; typedef Result result_type; }; /// \internal_ template struct binary_function { typedef Arg1 first_argument_type; typedef Arg2 second_argument_type; typedef Result result_type; }; /// \internal_ template struct ternary_function { typedef Arg1 first_argument_type; typedef Arg2 second_argument_type; typedef Arg3 third_argument_type; typedef Result result_type; }; /// The unary_negate function adaptor negates a unary function. /// /// \see not1() template class unary_negate : public unary_function { public: explicit unary_negate(Predicate pred) : m_pred(pred) { } /// \internal_ template detail::invoked_unary_negate_function operator()(const Arg &arg) const { return detail::invoked_unary_negate_function< Predicate, Arg >(m_pred, arg); } private: Predicate m_pred; }; /// The binnary_negate function adaptor negates a binary function. /// /// \see not2() template class binary_negate : public binary_function { public: explicit binary_negate(Predicate pred) : m_pred(pred) { } /// \internal_ template detail::invoked_binary_negate_function operator()(const Arg1 &arg1, const Arg2 &arg2) const { return detail::invoked_binary_negate_function< Predicate, Arg1, Arg2 >(m_pred, arg1, arg2); } private: Predicate m_pred; }; /// Returns a unary_negate adaptor around \p predicate. /// /// \param predicate the unary function to wrap /// /// \return a unary_negate wrapper around \p predicate template inline unary_negate not1(const Predicate &predicate) { return unary_negate(predicate); } /// Returns a binary_negate adaptor around \p predicate. /// /// \param predicate the binary function to wrap /// /// \return a binary_negate wrapper around \p predicate template inline binary_negate not2(const Predicate &predicate) { return binary_negate(predicate); } /// The logical_not function negates its argument and returns it. /// /// \see not1(), not2() template struct logical_not : public unary_function { /// \internal_ template detail::invoked_function > operator()(const Expr &expr) const { return detail::invoked_function >( "!", std::string(), boost::make_tuple(expr) ); } }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_LOGICAL_HPP compute-0.5/include/boost/compute/functional/math.hpp000066400000000000000000000102711263566244600231020ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_MATH_HPP #define BOOST_COMPUTE_FUNCTIONAL_MATH_HPP #include namespace boost { namespace compute { BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(acos, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(acosh, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(acospi, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(asin, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(asinh, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(asinpi, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(atan, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(atan2, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(atanh, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(atanpi, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(atan2pi, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(cbrt, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(ceil, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(copysign, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(cos, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(cosh, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(cospi, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(erf, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(erfc, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(exp, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(exp2, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(exp10, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(expm1, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fabs, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fdim, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(floor, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fma, T (T, T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fmax, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fmin, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fmod, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(hypot, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(ilogb, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(lgamma, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(log, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(log2, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(log10, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(log1p, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(logb, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(mad, T (T, T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(nextafter, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(pow, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(pown, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(powr, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(remainder, T (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(rint, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(rootn, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(round, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(rsqrt, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(sin, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(sinh, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(sinpi, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(sqrt, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(tan, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(tanh, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(tanpi, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(tgamma, T (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(trunc, T (T), class T) } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_MATH_HPP compute-0.5/include/boost/compute/functional/operator.hpp000066400000000000000000000060011263566244600240000ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_OPERATORS_HPP #define BOOST_COMPUTE_FUNCTIONAL_OPERATORS_HPP #include namespace boost { namespace compute { namespace detail { template struct invoked_binary_operator { typedef Result result_type; invoked_binary_operator(const std::string &op, const Expr1 &arg1, const Expr2 &arg2) : m_op(op), m_expr1(arg1), m_expr2(arg2) { } std::string op() const { return m_op; } Expr1 arg1() const { return m_expr1; } Expr2 arg2() const { return m_expr2; } std::string m_op; Expr1 m_expr1; Expr2 m_expr2; }; } // end detail namespace /// \internal_ #define BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(name, op, return_type, arg_type) \ template \ class name : public function \ { \ public: \ name() : function(BOOST_PP_STRINGIZE(name)) { } \ \ template \ detail::invoked_binary_operator \ operator()(const Arg1 &x, const Arg2 &y) const \ { \ return detail::invoked_binary_operator(op, x, y); \ } \ }; // arithmetic operations BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(plus, "+", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(minus, "-", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(multiplies, "*", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(divides, "/", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(modulus, "%", T, T) // comparisons BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(equal_to, "==", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(not_equal_to, "!=", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(greater, ">", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(less, "<", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(greater_equal, ">=", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(less_equal, "<=", T, T) // logical operators BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(logical_and, "&&", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(logical_or, "||", T, T) // bitwise operations BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(bit_and, "&", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(bit_or, "|", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(bit_xor, "^", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(shift_left, "<<", T, T) BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(shift_right, ">>", T, T) } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_OPERATORS_HPP compute-0.5/include/boost/compute/functional/popcount.hpp000066400000000000000000000034121263566244600240170ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_POPCOUNT_HPP #define BOOST_COMPUTE_FUNCTIONAL_POPCOUNT_HPP #include #include namespace boost { namespace compute { /// Returns the number of non-zero bits in \p x. /// /// \see_opencl_ref{popcount} template class popcount : public function { public: popcount() : function("boost_popcount") { std::stringstream s; s << "inline " << type_name() << " boost_popcount" << "(const " << type_name() << " x)\n" << "{\n" // use built-in popcount if opencl 1.2 is supported << "#if __OPENCL_VERSION__ >= 120\n" << " return popcount(x);\n" // fallback to generic popcount() implementation << "#else\n" << " " << type_name() << " count = 0;\n" << " for(" << type_name() << " i = 0; i < sizeof(i) * CHAR_BIT; i++){\n" << " if(x & (" << type_name() << ") 1 << i){\n" << " count++;\n" << " }\n" << " }\n" << " return count;\n" << "#endif\n" << "}\n"; this->set_source(s.str()); } }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_POPCOUNT_HPP compute-0.5/include/boost/compute/functional/relational.hpp000066400000000000000000000036771263566244600243170ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_FUNCTIONAL_RELATIONAL_HPP #define BOOST_COMPUTE_FUNCTIONAL_RELATIONAL_HPP #include namespace boost { namespace compute { BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isequal, int (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isnotequal, int (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isgreater, int (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isgreaterequal, int (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isless, int (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(islessequal, int (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(islessgreater, int (T, T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isfinite, int (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isinf, int (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isnan, int (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isnormal, int (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isordered, int (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isunordered, int (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(signbit, int (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(any, int (T), class T) BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(all, int (T), class T) } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_FUNCTIONAL_RELATIONAL_HPP compute-0.5/include/boost/compute/image.hpp000066400000000000000000000015761263566244600211010ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_IMAGE_HPP #define BOOST_COMPUTE_IMAGE_HPP /// \file /// /// Meta-header to include all Boost.Compute image headers. #include #include #include #include #include #include #endif // BOOST_COMPUTE_IMAGE_HPP compute-0.5/include/boost/compute/image/000077500000000000000000000000001263566244600203575ustar00rootroot00000000000000compute-0.5/include/boost/compute/image/image1d.hpp000066400000000000000000000127371263566244600224110ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_IMAGE_IMAGE1D_HPP #define BOOST_COMPUTE_IMAGE_IMAGE1D_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { // forward declarations class command_queue; /// \class image1d /// \brief An OpenCL 1D image object /// /// \opencl_version_warning{1,2} /// /// \see image_format, image2d class image1d : public image_object { public: /// Creates a null image1d object. image1d() : image_object() { } /// Creates a new image1d object. /// /// \see_opencl_ref{clCreateImage} image1d(const context &context, size_t image_width, const image_format &format, cl_mem_flags flags = read_write, void *host_ptr = 0) { #ifdef CL_VERSION_1_2 cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE1D; desc.image_width = image_width; desc.image_height = 1; desc.image_depth = 1; desc.image_array_size = 0; desc.image_row_pitch = 0; desc.image_slice_pitch = 0; desc.num_mip_levels = 0; desc.num_samples = 0; #ifdef CL_VERSION_2_0 desc.mem_object = 0; #else desc.buffer = 0; #endif cl_int error = 0; m_mem = clCreateImage( context, flags, format.get_format_ptr(), &desc, host_ptr, &error ); if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); } #else // image1d objects are only supported in OpenCL 1.2 and later BOOST_THROW_EXCEPTION(opencl_error(CL_IMAGE_FORMAT_NOT_SUPPORTED)); #endif } /// Creates a new image1d as a copy of \p other. image1d(const image1d &other) : image_object(other) { } /// Copies the image1d from \p other. image1d& operator=(const image1d &other) { image_object::operator=(other); return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new image object from \p other. image1d(image1d&& other) BOOST_NOEXCEPT : image_object(std::move(other)) { } /// Move-assigns the image from \p other to \c *this. image1d& operator=(image1d&& other) BOOST_NOEXCEPT { image_object::operator=(std::move(other)); return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the image1d object. ~image1d() { } /// Returns the size (width) of the image. extents<1> size() const { extents<1> size; size[0] = get_info(CL_IMAGE_WIDTH); return size; } /// Returns the origin of the image (\c 0). extents<1> origin() const { return extents<1>(); } /// Returns information about the image. /// /// \see_opencl_ref{clGetImageInfo} template T get_info(cl_image_info info) const { return get_image_info(info); } /// \overload template typename detail::get_object_info_type::type get_info() const; /// Returns the supported image formats for the context. /// /// \see_opencl_ref{clGetSupportedImageFormats} static std::vector get_supported_formats(const context &context, cl_mem_flags flags = read_write) { #ifdef CL_VERSION_1_2 return image_object::get_supported_formats(context, CL_MEM_OBJECT_IMAGE1D, flags); #else return std::vector(); #endif } /// Returns \c true if \p format is a supported 1D image format for /// \p context. static bool is_supported_format(const image_format &format, const context &context, cl_mem_flags flags = read_write) { #ifdef CL_VERSION_1_2 return image_object::is_supported_format( format, context, CL_MEM_OBJECT_IMAGE1D, flags ); #else return false; #endif } /// Creates a new image with a copy of the data in \c *this. Uses \p queue /// to perform the copy operation. image1d clone(command_queue &queue) const; }; /// \internal_ define get_info() specializations for image1d BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(image1d, ((cl_image_format, CL_IMAGE_FORMAT)) ((size_t, CL_IMAGE_ELEMENT_SIZE)) ((size_t, CL_IMAGE_ROW_PITCH)) ((size_t, CL_IMAGE_SLICE_PITCH)) ((size_t, CL_IMAGE_WIDTH)) ((size_t, CL_IMAGE_HEIGHT)) ((size_t, CL_IMAGE_DEPTH)) ) namespace detail { // set_kernel_arg() specialization for image1d template<> struct set_kernel_arg : public set_kernel_arg { }; } // end detail namespace } // end compute namespace } // end boost namespace BOOST_COMPUTE_TYPE_NAME(boost::compute::image1d, image1d_t) #endif // BOOST_COMPUTE_IMAGE_IMAGE1D_HPP compute-0.5/include/boost/compute/image/image2d.hpp000066400000000000000000000170501263566244600224030ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_IMAGE_IMAGE2D_HPP #define BOOST_COMPUTE_IMAGE_IMAGE2D_HPP #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { // forward declarations class command_queue; /// \class image2d /// \brief An OpenCL 2D image object /// /// For example, to create a 640x480 8-bit RGBA image: /// /// \snippet test/test_image2d.cpp create_image /// /// \see image_format, image3d class image2d : public image_object { public: /// Creates a null image2d object. image2d() : image_object() { } /// Creates a new image2d object. /// /// \see_opencl_ref{clCreateImage} image2d(const context &context, size_t image_width, size_t image_height, const image_format &format, cl_mem_flags flags = read_write, void *host_ptr = 0, size_t image_row_pitch = 0) { cl_int error = 0; #ifdef CL_VERSION_1_2 cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE2D; desc.image_width = image_width; desc.image_height = image_height; desc.image_depth = 1; desc.image_array_size = 0; desc.image_row_pitch = image_row_pitch; desc.image_slice_pitch = 0; desc.num_mip_levels = 0; desc.num_samples = 0; #ifdef CL_VERSION_2_0 desc.mem_object = 0; #else desc.buffer = 0; #endif m_mem = clCreateImage(context, flags, format.get_format_ptr(), &desc, host_ptr, &error); #else m_mem = clCreateImage2D(context, flags, format.get_format_ptr(), image_width, image_height, image_row_pitch, host_ptr, &error); #endif if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// \internal_ (deprecated) image2d(const context &context, cl_mem_flags flags, const image_format &format, size_t image_width, size_t image_height, size_t image_row_pitch = 0, void *host_ptr = 0) { cl_int error = 0; #ifdef CL_VERSION_1_2 cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE2D; desc.image_width = image_width; desc.image_height = image_height; desc.image_depth = 1; desc.image_array_size = 0; desc.image_row_pitch = image_row_pitch; desc.image_slice_pitch = 0; desc.num_mip_levels = 0; desc.num_samples = 0; #ifdef CL_VERSION_2_0 desc.mem_object = 0; #else desc.buffer = 0; #endif m_mem = clCreateImage(context, flags, format.get_format_ptr(), &desc, host_ptr, &error); #else m_mem = clCreateImage2D(context, flags, format.get_format_ptr(), image_width, image_height, image_row_pitch, host_ptr, &error); #endif if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// Creates a new image2d as a copy of \p other. image2d(const image2d &other) : image_object(other) { } /// Copies the image2d from \p other. image2d& operator=(const image2d &other) { image_object::operator=(other); return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new image object from \p other. image2d(image2d&& other) BOOST_NOEXCEPT : image_object(std::move(other)) { } /// Move-assigns the image from \p other to \c *this. image2d& operator=(image2d&& other) BOOST_NOEXCEPT { image_object::operator=(std::move(other)); return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the image2d object. ~image2d() { } /// Returns the size (width, height) of the image. extents<2> size() const { extents<2> size; size[0] = get_info(CL_IMAGE_WIDTH); size[1] = get_info(CL_IMAGE_HEIGHT); return size; } /// Returns the origin of the image (\c 0, \c 0). extents<2> origin() const { return extents<2>(); } /// Returns information about the image. /// /// \see_opencl_ref{clGetImageInfo} template T get_info(cl_image_info info) const { return detail::get_object_info(clGetImageInfo, m_mem, info); } /// \overload template typename detail::get_object_info_type::type get_info() const; /// Returns the supported image formats for the context. /// /// \see_opencl_ref{clGetSupportedImageFormats} static std::vector get_supported_formats(const context &context, cl_mem_flags flags = read_write) { return image_object::get_supported_formats(context, CL_MEM_OBJECT_IMAGE2D, flags); } /// Returns \c true if \p format is a supported 2D image format for /// \p context. static bool is_supported_format(const image_format &format, const context &context, cl_mem_flags flags = read_write) { return image_object::is_supported_format( format, context, CL_MEM_OBJECT_IMAGE2D, flags ); } /// Creates a new image with a copy of the data in \c *this. Uses \p queue /// to perform the copy operation. image2d clone(command_queue &queue) const; }; /// \internal_ define get_info() specializations for image2d BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(image2d, ((cl_image_format, CL_IMAGE_FORMAT)) ((size_t, CL_IMAGE_ELEMENT_SIZE)) ((size_t, CL_IMAGE_ROW_PITCH)) ((size_t, CL_IMAGE_SLICE_PITCH)) ((size_t, CL_IMAGE_WIDTH)) ((size_t, CL_IMAGE_HEIGHT)) ((size_t, CL_IMAGE_DEPTH)) ) namespace detail { // set_kernel_arg() specialization for image2d template<> struct set_kernel_arg : public set_kernel_arg { }; } // end detail namespace } // end compute namespace } // end boost namespace BOOST_COMPUTE_TYPE_NAME(boost::compute::image2d, image2d_t) #endif // BOOST_COMPUTE_IMAGE_IMAGE2D_HPP compute-0.5/include/boost/compute/image/image3d.hpp000066400000000000000000000174141263566244600224100ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_IMAGE_IMAGE3D_HPP #define BOOST_COMPUTE_IMAGE_IMAGE3D_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { /// \class image3d /// \brief An OpenCL 3D image object /// /// \see image_format, image2d class image3d : public image_object { public: /// Creates a null image3d object. image3d() : image_object() { } /// Creates a new image3d object. /// /// \see_opencl_ref{clCreateImage} image3d(const context &context, size_t image_width, size_t image_height, size_t image_depth, const image_format &format, cl_mem_flags flags = read_write, void *host_ptr = 0, size_t image_row_pitch = 0, size_t image_slice_pitch = 0) { cl_int error = 0; #ifdef CL_VERSION_1_2 cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE3D; desc.image_width = image_width; desc.image_height = image_height; desc.image_depth = image_depth; desc.image_array_size = 0; desc.image_row_pitch = image_row_pitch; desc.image_slice_pitch = image_slice_pitch; desc.num_mip_levels = 0; desc.num_samples = 0; #ifdef CL_VERSION_2_0 desc.mem_object = 0; #else desc.buffer = 0; #endif m_mem = clCreateImage(context, flags, format.get_format_ptr(), &desc, host_ptr, &error); #else m_mem = clCreateImage3D(context, flags, format.get_format_ptr(), image_width, image_height, image_depth, image_row_pitch, image_slice_pitch, host_ptr, &error); #endif if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// \internal_ (deprecated) image3d(const context &context, cl_mem_flags flags, const image_format &format, size_t image_width, size_t image_height, size_t image_depth, size_t image_row_pitch, size_t image_slice_pitch = 0, void *host_ptr = 0) { cl_int error = 0; #ifdef CL_VERSION_1_2 cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE3D; desc.image_width = image_width; desc.image_height = image_height; desc.image_depth = image_depth; desc.image_array_size = 0; desc.image_row_pitch = image_row_pitch; desc.image_slice_pitch = image_slice_pitch; desc.num_mip_levels = 0; desc.num_samples = 0; #ifdef CL_VERSION_2_0 desc.mem_object = 0; #else desc.buffer = 0; #endif m_mem = clCreateImage(context, flags, format.get_format_ptr(), &desc, host_ptr, &error); #else m_mem = clCreateImage3D(context, flags, format.get_format_ptr(), image_width, image_height, image_depth, image_row_pitch, image_slice_pitch, host_ptr, &error); #endif if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// Creates a new image3d as a copy of \p other. image3d(const image3d &other) : image_object(other) { } /// Copies the image3d from \p other. image3d& operator=(const image3d &other) { image_object::operator=(other); return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new image object from \p other. image3d(image3d&& other) BOOST_NOEXCEPT : image_object(std::move(other)) { } /// Move-assigns the image from \p other to \c *this. image3d& operator=(image3d&& other) BOOST_NOEXCEPT { image_object::operator=(std::move(other)); return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the image3d object. ~image3d() { } /// Returns the size (width, height, depth) of the image. extents<3> size() const { extents<3> size; size[0] = get_info(CL_IMAGE_WIDTH); size[1] = get_info(CL_IMAGE_HEIGHT); size[2] = get_info(CL_IMAGE_DEPTH); return size; } /// Returns the origin of the image (\c 0, \c 0, \c 0). extents<3> origin() const { return extents<3>(); } /// Returns information about the image. /// /// \see_opencl_ref{clGetImageInfo} template T get_info(cl_image_info info) const { return detail::get_object_info(clGetImageInfo, m_mem, info); } /// \overload template typename detail::get_object_info_type::type get_info() const; /// Returns the supported 3D image formats for the context. /// /// \see_opencl_ref{clGetSupportedImageFormats} static std::vector get_supported_formats(const context &context, cl_mem_flags flags = read_write) { return image_object::get_supported_formats(context, CL_MEM_OBJECT_IMAGE3D, flags); } /// Returns \c true if \p format is a supported 3D image format for /// \p context. static bool is_supported_format(const image_format &format, const context &context, cl_mem_flags flags = read_write) { return image_object::is_supported_format( format, context, CL_MEM_OBJECT_IMAGE3D, flags ); } /// Creates a new image with a copy of the data in \c *this. Uses \p queue /// to perform the copy operation. image3d clone(command_queue &queue) const; }; /// \internal_ define get_info() specializations for image3d BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(image3d, ((cl_image_format, CL_IMAGE_FORMAT)) ((size_t, CL_IMAGE_ELEMENT_SIZE)) ((size_t, CL_IMAGE_ROW_PITCH)) ((size_t, CL_IMAGE_SLICE_PITCH)) ((size_t, CL_IMAGE_WIDTH)) ((size_t, CL_IMAGE_HEIGHT)) ((size_t, CL_IMAGE_DEPTH)) ) namespace detail { // set_kernel_arg() specialization for image3d template<> struct set_kernel_arg : public set_kernel_arg { }; } // end detail namespace } // end compute namespace } // end boost namespace BOOST_COMPUTE_TYPE_NAME(boost::compute::image3d, image3d_t) #endif // BOOST_COMPUTE_IMAGE_IMAGE3D_HPP compute-0.5/include/boost/compute/image/image_format.hpp000066400000000000000000000076261263566244600235350ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_IMAGE_IMAGE_FORMAT_HPP #define BOOST_COMPUTE_IMAGE_IMAGE_FORMAT_HPP #include namespace boost { namespace compute { /// \class image_format /// \brief A OpenCL image format /// /// For example, to create a format for a 8-bit RGBA image: /// \code /// boost::compute::image_format rgba8(CL_RGBA, CL_UNSIGNED_INT8); /// \endcode /// /// After being constructed, image_format objects are usually passed to the /// constructor of the various image classes (e.g. \ref image2d, \ref image3d) /// to create an image object on a compute device. /// /// Image formats supported by a context can be queried with the static /// get_supported_formats() in each image class. For example: /// \code /// std::vector formats = image2d::get_supported_formats(ctx); /// \endcode /// /// \see image2d class image_format { public: enum channel_order { r = CL_R, a = CL_A, intensity = CL_INTENSITY, luminance = CL_LUMINANCE, rg = CL_RG, ra = CL_RA, rgb = CL_RGB, rgba = CL_RGBA, argb = CL_ARGB, bgra = CL_BGRA }; enum channel_data_type { snorm_int8 = CL_SNORM_INT8, snorm_int16 = CL_SNORM_INT16, unorm_int8 = CL_UNORM_INT8, unorm_int16 = CL_UNORM_INT16, unorm_short_565 = CL_UNORM_SHORT_565, unorm_short_555 = CL_UNORM_SHORT_555, unorm_int_101010 = CL_UNORM_INT_101010, signed_int8 = CL_SIGNED_INT8, signed_int16 = CL_SIGNED_INT16, signed_int32 = CL_SIGNED_INT32, unsigned_int8 = CL_UNSIGNED_INT8, unsigned_int16 = CL_UNSIGNED_INT16, unsigned_int32 = CL_UNSIGNED_INT32, float16 = CL_HALF_FLOAT, float32 = CL_FLOAT }; /// Creates a new image format object with \p order and \p type. explicit image_format(cl_channel_order order, cl_channel_type type) { m_format.image_channel_order = order; m_format.image_channel_data_type = type; } /// Creates a new image format object from \p format. explicit image_format(const cl_image_format &format) { m_format.image_channel_order = format.image_channel_order; m_format.image_channel_data_type = format.image_channel_data_type; } /// Creates a new image format object as a copy of \p other. image_format(const image_format &other) : m_format(other.m_format) { } /// Copies the format from \p other to \c *this. image_format& operator=(const image_format &other) { if(this != &other){ m_format = other.m_format; } return *this; } /// Destroys the image format object. ~image_format() { } /// Returns a pointer to the \c cl_image_format object. const cl_image_format* get_format_ptr() const { return &m_format; } /// Returns \c true if \c *this is the same as \p other. bool operator==(const image_format &other) const { return m_format.image_channel_order == other.m_format.image_channel_order && m_format.image_channel_data_type == other.m_format.image_channel_data_type; } /// Returns \c true if \c *this is not the same as \p other. bool operator!=(const image_format &other) const { return !(*this == other); } private: cl_image_format m_format; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_IMAGE_IMAGE_FORMAT_HPP compute-0.5/include/boost/compute/image/image_object.hpp000066400000000000000000000110401263566244600234740ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_IMAGE_IMAGE_OBJECT_HPP #define BOOST_COMPUTE_IMAGE_IMAGE_OBJECT_HPP #include #include #include #include #include #include namespace boost { namespace compute { /// \class image_object /// \brief Base-class for image objects. /// /// The image_object class is the base-class for image objects on compute /// devices. /// /// \see image1d, image2d, image3d class image_object : public memory_object { public: image_object() : memory_object() { } explicit image_object(cl_mem mem, bool retain = true) : memory_object(mem, retain) { } image_object(const image_object &other) : memory_object(other) { } image_object& operator=(const image_object &other) { if(this != &other){ memory_object::operator=(other); } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES image_object(image_object&& other) BOOST_NOEXCEPT : memory_object(std::move(other)) { } /// \internal_ image_object& operator=(image_object&& other) BOOST_NOEXCEPT { memory_object::operator=(std::move(other)); return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the image object. ~image_object() { } /// Returns information about the image object. /// /// \see_opencl_ref{clGetImageInfo} template T get_image_info(cl_mem_info info) const { return detail::get_object_info(clGetImageInfo, m_mem, info); } /// Returns the format for the image. image_format format() const { return image_format(get_image_info(CL_IMAGE_FORMAT)); } /// \internal_ (deprecated) image_format get_format() const { return format(); } /// Returns the width of the image. size_t width() const { return get_image_info(CL_IMAGE_WIDTH); } /// Returns the height of the image. /// /// For 1D images, this function will return \c 1. size_t height() const { return get_image_info(CL_IMAGE_HEIGHT); } /// Returns the depth of the image. /// /// For 1D and 2D images, this function will return \c 1. size_t depth() const { return get_image_info(CL_IMAGE_DEPTH); } /// Returns the supported image formats for the \p type in \p context. /// /// \see_opencl_ref{clGetSupportedImageFormats} static std::vector get_supported_formats(const context &context, cl_mem_object_type type, cl_mem_flags flags = read_write) { cl_uint count = 0; clGetSupportedImageFormats(context, flags, type, 0, 0, &count); std::vector cl_formats(count); clGetSupportedImageFormats(context, flags, type, count, &cl_formats[0], 0); std::vector formats; formats.reserve(count); for(cl_uint i = 0; i < count; i++){ formats.push_back(image_format(cl_formats[i])); } return formats; } /// Returns \c true if \p format is a supported image format for /// \p type in \p context with \p flags. static bool is_supported_format(const image_format &format, const context &context, cl_mem_object_type type, cl_mem_flags flags = read_write) { const std::vector formats = get_supported_formats(context, type, flags); return std::find(formats.begin(), formats.end(), format) != formats.end(); } }; namespace detail { // set_kernel_arg() specialization for image_object template<> struct set_kernel_arg : public set_kernel_arg { }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_IMAGE_IMAGE_OBJECT_HPP compute-0.5/include/boost/compute/image/image_sampler.hpp000066400000000000000000000137231263566244600237030ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_IMAGE_IMAGE_SAMPLER_HPP #define BOOST_COMPUTE_IMAGE_IMAGE_SAMPLER_HPP #include #include #include #include #include #include #include #include namespace boost { namespace compute { /// \class image_sampler /// \brief An OpenCL image sampler object /// /// \see image2d, image_format class image_sampler { public: enum addressing_mode { none = CL_ADDRESS_NONE, clamp_to_edge = CL_ADDRESS_CLAMP_TO_EDGE, clamp = CL_ADDRESS_CLAMP, repeat = CL_ADDRESS_REPEAT }; enum filter_mode { nearest = CL_FILTER_NEAREST, linear = CL_FILTER_LINEAR }; image_sampler() : m_sampler(0) { } image_sampler(const context &context, bool normalized_coords, cl_addressing_mode addressing_mode, cl_filter_mode filter_mode) { cl_int error = 0; #ifdef CL_VERSION_2_0 std::vector sampler_properties; sampler_properties.push_back(CL_SAMPLER_NORMALIZED_COORDS); sampler_properties.push_back(cl_sampler_properties(normalized_coords)); sampler_properties.push_back(CL_SAMPLER_ADDRESSING_MODE); sampler_properties.push_back(cl_sampler_properties(addressing_mode)); sampler_properties.push_back(CL_SAMPLER_FILTER_MODE); sampler_properties.push_back(cl_sampler_properties(filter_mode)); sampler_properties.push_back(cl_sampler_properties(0)); m_sampler = clCreateSamplerWithProperties( context, &sampler_properties[0], &error ); #else m_sampler = clCreateSampler( context, normalized_coords, addressing_mode, filter_mode, &error ); #endif if(!m_sampler){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } explicit image_sampler(cl_sampler sampler, bool retain = true) : m_sampler(sampler) { if(m_sampler && retain){ clRetainSampler(m_sampler); } } /// Creates a new image sampler object as a copy of \p other. image_sampler(const image_sampler &other) : m_sampler(other.m_sampler) { if(m_sampler){ clRetainSampler(m_sampler); } } /// Copies the image sampler object from \p other to \c *this. image_sampler& operator=(const image_sampler &other) { if(this != &other){ if(m_sampler){ clReleaseSampler(m_sampler); } m_sampler = other.m_sampler; if(m_sampler){ clRetainSampler(m_sampler); } } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES image_sampler(image_sampler&& other) BOOST_NOEXCEPT : m_sampler(other.m_sampler) { other.m_sampler = 0; } image_sampler& operator=(image_sampler&& other) BOOST_NOEXCEPT { if(m_sampler){ clReleaseSampler(m_sampler); } m_sampler = other.m_sampler; other.m_sampler = 0; return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the image sampler object. ~image_sampler() { if(m_sampler){ BOOST_COMPUTE_ASSERT_CL_SUCCESS( clReleaseSampler(m_sampler) ); } } /// Returns the underlying \c cl_sampler object. cl_sampler& get() const { return const_cast(m_sampler); } /// Returns the context for the image sampler object. context get_context() const { return context(get_info(CL_SAMPLER_CONTEXT)); } /// Returns information about the sampler. /// /// \see_opencl_ref{clGetSamplerInfo} template T get_info(cl_sampler_info info) const { return detail::get_object_info(clGetSamplerInfo, m_sampler, info); } /// \overload template typename detail::get_object_info_type::type get_info() const; /// Returns \c true if the sampler is the same at \p other. bool operator==(const image_sampler &other) const { return m_sampler == other.m_sampler; } /// Returns \c true if the sampler is different from \p other. bool operator!=(const image_sampler &other) const { return m_sampler != other.m_sampler; } operator cl_sampler() const { return m_sampler; } private: cl_sampler m_sampler; }; /// \internal_ define get_info() specializations for image_sampler BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(image_sampler, ((cl_uint, CL_SAMPLER_REFERENCE_COUNT)) ((cl_context, CL_SAMPLER_CONTEXT)) ((cl_addressing_mode, CL_SAMPLER_ADDRESSING_MODE)) ((cl_filter_mode, CL_SAMPLER_FILTER_MODE)) ((bool, CL_SAMPLER_NORMALIZED_COORDS)) ) namespace detail { // set_kernel_arg specialization for image samplers template<> struct set_kernel_arg { void operator()(kernel &kernel_, size_t index, const image_sampler &sampler) { kernel_.set_arg(index, sampler.get()); } }; } // end detail namespace } // end compute namespace } // end boost namespace BOOST_COMPUTE_TYPE_NAME(boost::compute::image_sampler, sampler_t) #endif // BOOST_COMPUTE_IMAGE_IMAGE_SAMPLER_HPP compute-0.5/include/boost/compute/image2d.hpp000066400000000000000000000010451263566244600213160ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// // deprecated, use instead #include compute-0.5/include/boost/compute/image3d.hpp000066400000000000000000000010451263566244600213170ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// // deprecated, use instead #include compute-0.5/include/boost/compute/image_format.hpp000066400000000000000000000010571263566244600224430ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// // deprecated, use instead #include compute-0.5/include/boost/compute/image_sampler.hpp000066400000000000000000000010611263566244600226110ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// // deprecated, use instead #include compute-0.5/include/boost/compute/interop/000077500000000000000000000000001263566244600207555ustar00rootroot00000000000000compute-0.5/include/boost/compute/interop/eigen.hpp000066400000000000000000000011511263566244600225530ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_EIGEN_HPP #define BOOST_COMPUTE_INTEROP_EIGEN_HPP #include #endif // BOOST_COMPUTE_INTEROP_EIGEN_HPP compute-0.5/include/boost/compute/interop/eigen/000077500000000000000000000000001263566244600220445ustar00rootroot00000000000000compute-0.5/include/boost/compute/interop/eigen/core.hpp000066400000000000000000000051521263566244600235100ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_EIGEN_EIGEN_HPP #define BOOST_COMPUTE_INTEROP_EIGEN_EIGEN_HPP #include #include #include #include #include namespace boost { namespace compute { /// Copies \p matrix to \p buffer. template inline void eigen_copy_matrix_to_buffer(const Eigen::PlainObjectBase &matrix, buffer_iterator buffer, command_queue &queue = system::default_queue()) { ::boost::compute::copy_n(matrix.data(), matrix.size(), buffer, queue); } /// Copies \p buffer to \p matrix. template inline void eigen_copy_buffer_to_matrix(const buffer_iterator buffer, Eigen::PlainObjectBase &matrix, command_queue &queue = system::default_queue()) { ::boost::compute::copy_n(buffer, matrix.size(), matrix.data(), queue); } /// Converts an \c Eigen::Matrix4f to a \c float16_. inline float16_ eigen_matrix4f_to_float16(const Eigen::Matrix4f &matrix) { float16_ result; std::memcpy(&result, matrix.data(), 16 * sizeof(float)); return result; } /// Converts an \c Eigen::Matrix4d to a \c double16_. inline double16_ eigen_matrix4d_to_double16(const Eigen::Matrix4d &matrix) { double16_ result; std::memcpy(&result, matrix.data(), 16 * sizeof(double)); return result; } } // end compute namespace } // end boost namespace BOOST_COMPUTE_TYPE_NAME(Eigen::Vector2i, int2) BOOST_COMPUTE_TYPE_NAME(Eigen::Vector4i, int4) BOOST_COMPUTE_TYPE_NAME(Eigen::Vector2f, float2) BOOST_COMPUTE_TYPE_NAME(Eigen::Vector4f, float4) BOOST_COMPUTE_TYPE_NAME(Eigen::Matrix2f, float8) BOOST_COMPUTE_TYPE_NAME(Eigen::Matrix4f, float16) BOOST_COMPUTE_TYPE_NAME(Eigen::Vector2d, double2) BOOST_COMPUTE_TYPE_NAME(Eigen::Vector4d, double4) BOOST_COMPUTE_TYPE_NAME(Eigen::Matrix2d, double8) BOOST_COMPUTE_TYPE_NAME(Eigen::Matrix4d, double16) #endif // BOOST_COMPUTE_INTEROP_EIGEN_EIGEN_HPP compute-0.5/include/boost/compute/interop/opencv.hpp000066400000000000000000000012411263566244600227560ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_OPENCV_HPP #define BOOST_COMPUTE_INTEROP_OPENCV_HPP #include #include #endif // BOOST_COMPUTE_INTEROP_OPENCV_HPP compute-0.5/include/boost/compute/interop/opencv/000077500000000000000000000000001263566244600222475ustar00rootroot00000000000000compute-0.5/include/boost/compute/interop/opencv/core.hpp000066400000000000000000000112111263566244600237040ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_OPENCV_CORE_HPP #define BOOST_COMPUTE_INTEROP_OPENCV_CORE_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { template inline void opencv_copy_mat_to_buffer(const cv::Mat &mat, buffer_iterator buffer, command_queue &queue = system::default_queue()) { BOOST_ASSERT(mat.isContinuous()); ::boost::compute::copy_n( reinterpret_cast(mat.data), mat.rows * mat.cols, buffer, queue ); } template inline void opencv_copy_buffer_to_mat(const buffer_iterator buffer, cv::Mat &mat, command_queue &queue = system::default_queue()) { BOOST_ASSERT(mat.isContinuous()); ::boost::compute::copy_n( buffer, mat.cols * mat.rows, reinterpret_cast(mat.data), queue ); } inline void opencv_copy_mat_to_image(const cv::Mat &mat, image2d &image, command_queue &queue = system::default_queue()) { BOOST_ASSERT(mat.data != 0); BOOST_ASSERT(mat.isContinuous()); BOOST_ASSERT(image.get_context() == queue.get_context()); queue.enqueue_write_image(image, image.origin(), image.size(), mat.data); } inline void opencv_copy_image_to_mat(const image2d &image, cv::Mat &mat, command_queue &queue = system::default_queue()) { BOOST_ASSERT(mat.isContinuous()); BOOST_ASSERT(image.get_context() == queue.get_context()); queue.enqueue_read_image(image, image.origin(), image.size(), mat.data); } inline image_format opencv_get_mat_image_format(const cv::Mat &mat) { switch(mat.type()){ case CV_8UC4: return image_format(CL_BGRA, CL_UNORM_INT8); case CV_16UC4: return image_format(CL_BGRA, CL_UNORM_INT16); case CV_32F: return image_format(CL_INTENSITY, CL_FLOAT); case CV_32FC4: return image_format(CL_RGBA, CL_FLOAT); case CV_8UC1: return image_format(CL_INTENSITY, CL_UNORM_INT8); } BOOST_THROW_EXCEPTION(opencl_error(CL_IMAGE_FORMAT_NOT_SUPPORTED)); } inline cv::Mat opencv_create_mat_with_image2d(const image2d &image, command_queue &queue = system::default_queue()) { BOOST_ASSERT(image.get_context() == queue.get_context()); cv::Mat mat; image_format format = image.get_format(); const cl_image_format *cl_image_format = format.get_format_ptr(); if(cl_image_format->image_channel_data_type == CL_UNORM_INT8 && cl_image_format->image_channel_order == CL_BGRA) { mat = cv::Mat(image.height(), image.width(), CV_8UC4); } else if(cl_image_format->image_channel_data_type == CL_UNORM_INT16 && cl_image_format->image_channel_order == CL_BGRA) { mat = cv::Mat(image.height(), image.width(), CV_16UC4); } else if(cl_image_format->image_channel_data_type == CL_FLOAT && cl_image_format->image_channel_order == CL_INTENSITY) { mat = cv::Mat(image.height(), image.width(), CV_32FC1); } else { mat = cv::Mat(image.height(), image.width(), CV_8UC1); } opencv_copy_image_to_mat(image, mat, queue); return mat; } inline image2d opencv_create_image2d_with_mat(const cv::Mat &mat, cl_mem_flags flags, command_queue &queue = system::default_queue()) { const context &context = queue.get_context(); const image_format format = opencv_get_mat_image_format(mat); image2d image(context, mat.cols, mat.rows, format, flags); opencv_copy_mat_to_image(mat, image, queue); return image; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_INTEROP_OPENCV_CORE_HPP compute-0.5/include/boost/compute/interop/opencv/highgui.hpp000066400000000000000000000020431263566244600244030ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_OPENCV_HIGHGUI_HPP #define BOOST_COMPUTE_INTEROP_OPENCV_HIGHGUI_HPP #include #include namespace boost { namespace compute { inline void opencv_imshow(const std::string &winname, const image2d &image, command_queue &queue = system::default_queue()) { const cv::Mat mat = opencv_create_mat_with_image2d(image, queue); cv::imshow(winname, mat); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_INTEROP_OPENCV_HIGHGUI_HPP compute-0.5/include/boost/compute/interop/opencv/ocl.hpp000066400000000000000000000025461263566244600235440ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_OPENCV_OCL_HPP #define BOOST_COMPUTE_INTEROP_OPENCV_OCL_HPP #include #include #include #include namespace boost { namespace compute { context opencv_ocl_get_context() { void *ocl_context = cv::ocl::getoclContext(); if(!ocl_context){ return context(); } return context(*(static_cast(ocl_context))); } command_queue opencv_ocl_get_command_queue() { void *ocl_queue = cv::ocl::getoclCommandQueue(); if(!ocl_queue){ return command_queue(); } return command_queue(*(static_cast(ocl_queue))); } buffer opencv_ocl_get_buffer(const cv::ocl::oclMat &mat) { return buffer(reinterpret_cast(mat.data)); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_INTEROP_OPENCV_OCL_HPP compute-0.5/include/boost/compute/interop/opengl.hpp000066400000000000000000000016551263566244600227610ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_OPENGL_HPP #define BOOST_COMPUTE_INTEROP_OPENGL_HPP /// \file /// /// Meta-header to include all Boost.Compute OpenGL interop headers. #include #include #include #include #include #endif // BOOST_COMPUTE_INTEROP_OPENGL_HPP compute-0.5/include/boost/compute/interop/opengl/000077500000000000000000000000001263566244600222415ustar00rootroot00000000000000compute-0.5/include/boost/compute/interop/opengl/acquire.hpp000066400000000000000000000071561263566244600244140ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_OPENGL_ACQUIRE_HPP #define BOOST_COMPUTE_INTEROP_OPENGL_ACQUIRE_HPP #include #include #include #include namespace boost { namespace compute { /// Enqueues a command to acquire the specified OpenGL memory objects. /// /// \see_opencl_ref{clEnqueueAcquireGLObjects} inline event opengl_enqueue_acquire_gl_objects(size_t num_objects, const cl_mem *mem_objects, command_queue &queue, const wait_list &events = wait_list()) { BOOST_ASSERT(queue != 0); event event_; cl_int ret = clEnqueueAcquireGLObjects(queue.get(), num_objects, mem_objects, events.size(), events.get_event_ptr(), &event_.get()); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to release the specified OpenGL memory objects. /// /// \see_opencl_ref{clEnqueueReleaseGLObjects} inline event opengl_enqueue_release_gl_objects(size_t num_objects, const cl_mem *mem_objects, command_queue &queue, const wait_list &events = wait_list()) { BOOST_ASSERT(queue != 0); event event_; cl_int ret = clEnqueueReleaseGLObjects(queue.get(), num_objects, mem_objects, events.size(), events.get_event_ptr(), &event_.get()); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; } /// Enqueues a command to acquire the specified OpenGL buffer. /// /// \see_opencl_ref{clEnqueueAcquireGLObjects} inline event opengl_enqueue_acquire_buffer(const opengl_buffer &buffer, command_queue &queue, const wait_list &events = wait_list()) { BOOST_ASSERT(buffer.get_context() == queue.get_context()); return opengl_enqueue_acquire_gl_objects(1, &buffer.get(), queue, events); } /// Enqueues a command to release the specified OpenGL buffer. /// /// \see_opencl_ref{clEnqueueReleaseGLObjects} inline event opengl_enqueue_release_buffer(const opengl_buffer &buffer, command_queue &queue, const wait_list &events = wait_list()) { BOOST_ASSERT(buffer.get_context() == queue.get_context()); return opengl_enqueue_release_gl_objects(1, &buffer.get(), queue, events); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_INTEROP_OPENGL_ACQUIRE_HPP compute-0.5/include/boost/compute/interop/opengl/cl_gl.hpp000066400000000000000000000012421263566244600240310ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_OPENGL_CL_GL_HPP #define BOOST_COMPUTE_INTEROP_OPENGL_CL_GL_HPP #if defined(__APPLE__) #include #else #include #endif #endif // BOOST_COMPUTE_INTEROP_OPENGL_CL_GL_HPP compute-0.5/include/boost/compute/interop/opengl/cl_gl_ext.hpp000066400000000000000000000012661263566244600247170ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_OPENGL_CL_GL_EXT_HPP #define BOOST_COMPUTE_INTEROP_OPENGL_CL_GL_EXT_HPP #if defined(__APPLE__) #include #else #include #endif #endif // BOOST_COMPUTE_INTEROP_OPENGL_CL_GL_EXT_HPP compute-0.5/include/boost/compute/interop/opengl/context.hpp000066400000000000000000000104761263566244600244460ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_OPENGL_CONTEXT_HPP #define BOOST_COMPUTE_INTEROP_OPENGL_CONTEXT_HPP #include #include #include #include #include #include #ifdef __APPLE__ #include #include #endif #ifdef __linux__ #include #endif namespace boost { namespace compute { /// Creates a shared OpenCL/OpenGL context for the currently active /// OpenGL context. /// /// Once created, the shared context can be used to create OpenCL memory /// objects which can interact with OpenGL memory objects (e.g. VBOs). /// /// \throws unsupported_extension_error if no CL-GL sharing capable devices /// are found. inline context opengl_create_shared_context() { // name of the OpenGL sharing extension for the system #if defined(__APPLE__) const char *cl_gl_sharing_extension = "cl_APPLE_gl_sharing"; #else const char *cl_gl_sharing_extension = "cl_khr_gl_sharing"; #endif #if defined(__APPLE__) // get OpenGL share group CGLContextObj cgl_current_context = CGLGetCurrentContext(); CGLShareGroupObj cgl_share_group = CGLGetShareGroup(cgl_current_context); cl_context_properties properties[] = { CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties) cgl_share_group, 0 }; cl_int error = 0; cl_context cl_gl_context = clCreateContext(properties, 0, 0, 0, 0, &error); if(!cl_gl_context){ BOOST_THROW_EXCEPTION(opencl_error(error)); } return context(cl_gl_context, false); #else typedef cl_int(*GetGLContextInfoKHRFunction)( const cl_context_properties*, cl_gl_context_info, size_t, void *, size_t * ); std::vector platforms = system::platforms(); for(size_t i = 0; i < platforms.size(); i++){ const platform &platform = platforms[i]; // load clGetGLContextInfoKHR() extension function GetGLContextInfoKHRFunction GetGLContextInfoKHR = reinterpret_cast( reinterpret_cast( platform.get_extension_function_address("clGetGLContextInfoKHR") ) ); if(!GetGLContextInfoKHR){ continue; } // create context properties listing the platform and current OpenGL display cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties) platform.id(), #if defined(__linux__) CL_GL_CONTEXT_KHR, (cl_context_properties) glXGetCurrentContext(), CL_GLX_DISPLAY_KHR, (cl_context_properties) glXGetCurrentDisplay(), #elif defined(WIN32) CL_GL_CONTEXT_KHR, (cl_context_properties) wglGetCurrentContext(), CL_WGL_HDC_KHR, (cl_context_properties) wglGetCurrentDC(), #endif 0 }; // lookup current OpenCL device for current OpenGL context cl_device_id gpu_id; cl_int ret = GetGLContextInfoKHR( properties, CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR, sizeof(cl_device_id), &gpu_id, 0 ); if(ret != CL_SUCCESS){ continue; } // create device object for the GPU and ensure it supports CL-GL sharing device gpu(gpu_id, false); if(!gpu.supports_extension(cl_gl_sharing_extension)){ continue; } // return CL-GL sharing context return context(gpu, properties); } #endif // no CL-GL sharing capable devices found BOOST_THROW_EXCEPTION( unsupported_extension_error(cl_gl_sharing_extension) ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_INTEROP_OPENGL_CONTEXT_HPP compute-0.5/include/boost/compute/interop/opengl/gl.hpp000066400000000000000000000012231263566244600233520ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_OPENGL_GL_HPP #define BOOST_COMPUTE_INTEROP_OPENGL_GL_HPP #if defined(__APPLE__) #include #else #include #endif #endif // BOOST_COMPUTE_INTEROP_OPENGL_GL_HPP compute-0.5/include/boost/compute/interop/opengl/opengl_buffer.hpp000066400000000000000000000054141263566244600255730ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_BUFFER_HPP #define BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_BUFFER_HPP #include #include #include namespace boost { namespace compute { /// \class opengl_buffer /// /// A OpenCL buffer for accessing an OpenGL memory object. class opengl_buffer : public buffer { public: /// Creates a null OpenGL buffer object. opengl_buffer() : buffer() { } /// Creates a new OpenGL buffer object for \p mem. explicit opengl_buffer(cl_mem mem, bool retain = true) : buffer(mem, retain) { } /// Creates a new OpenGL buffer object in \p context for \p bufobj /// with \p flags. /// /// \see_opencl_ref{clCreateFromGLBuffer} opengl_buffer(const context &context, GLuint bufobj, cl_mem_flags flags = read_write) { cl_int error = 0; m_mem = clCreateFromGLBuffer(context, flags, bufobj, &error); if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// Creates a new OpenGL buffer object as a copy of \p other. opengl_buffer(const opengl_buffer &other) : buffer(other) { } /// Copies the OpenGL buffer object from \p other. opengl_buffer& operator=(const opengl_buffer &other) { if(this != &other){ buffer::operator=(other); } return *this; } /// Destroys the OpenGL buffer object. ~opengl_buffer() { } /// Returns the OpenGL memory object ID. /// /// \see_opencl_ref{clGetGLObjectInfo} GLuint get_opengl_object() const { GLuint object = 0; clGetGLObjectInfo(m_mem, 0, &object); return object; } /// Returns the OpenGL memory object type. /// /// \see_opencl_ref{clGetGLObjectInfo} cl_gl_object_type get_opengl_type() const { cl_gl_object_type type; clGetGLObjectInfo(m_mem, &type, 0); return type; } }; namespace detail { // set_kernel_arg specialization for opengl_buffer template<> struct set_kernel_arg : set_kernel_arg { }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_BUFFER_HPP compute-0.5/include/boost/compute/interop/opengl/opengl_renderbuffer.hpp000066400000000000000000000066721263566244600270020ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_RENDERBUFFER_HPP #define BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_RENDERBUFFER_HPP #include #include #include #include namespace boost { namespace compute { /// \class opengl_renderbuffer /// /// A OpenCL buffer for accessing an OpenGL renderbuffer object. class opengl_renderbuffer : public image_object { public: /// Creates a null OpenGL renderbuffer object. opengl_renderbuffer() : image_object() { } /// Creates a new OpenGL renderbuffer object for \p mem. explicit opengl_renderbuffer(cl_mem mem, bool retain = true) : image_object(mem, retain) { } /// Creates a new OpenGL renderbuffer object in \p context for /// \p renderbuffer with \p flags. /// /// \see_opencl_ref{clCreateFromGLRenderbuffer} opengl_renderbuffer(const context &context, GLuint renderbuffer, cl_mem_flags flags = read_write) { cl_int error = 0; m_mem = clCreateFromGLRenderbuffer( context, flags, renderbuffer, &error ); if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// Creates a new OpenGL renderbuffer object as a copy of \p other. opengl_renderbuffer(const opengl_renderbuffer &other) : image_object(other) { } /// Copies the OpenGL renderbuffer object from \p other. opengl_renderbuffer& operator=(const opengl_renderbuffer &other) { if(this != &other){ image_object::operator=(other); } return *this; } /// Destroys the OpenGL buffer object. ~opengl_renderbuffer() { } /// Returns the size (width, height) of the renderbuffer. extents<2> size() const { extents<2> size; size[0] = get_image_info(CL_IMAGE_WIDTH); size[1] = get_image_info(CL_IMAGE_HEIGHT); return size; } /// Returns the origin of the renderbuffer (\c 0, \c 0). extents<2> origin() const { return extents<2>(); } /// Returns the OpenGL memory object ID. /// /// \see_opencl_ref{clGetGLObjectInfo} GLuint get_opengl_object() const { GLuint object = 0; clGetGLObjectInfo(m_mem, 0, &object); return object; } /// Returns the OpenGL memory object type. /// /// \see_opencl_ref{clGetGLObjectInfo} cl_gl_object_type get_opengl_type() const { cl_gl_object_type type; clGetGLObjectInfo(m_mem, &type, 0); return type; } }; namespace detail { // set_kernel_arg() specialization for opengl_renderbuffer template<> struct set_kernel_arg : public set_kernel_arg { }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_RENDERBUFFER_HPP compute-0.5/include/boost/compute/interop/opengl/opengl_texture.hpp000066400000000000000000000073411263566244600260230ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_TEXTURE_HPP #define BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_TEXTURE_HPP #include #include #include #include #include namespace boost { namespace compute { /// \class opengl_texture /// /// A OpenCL image2d for accessing an OpenGL texture object. class opengl_texture : public image_object { public: /// Creates a null OpenGL texture object. opengl_texture() : image_object() { } /// Creates a new OpenGL texture object for \p mem. explicit opengl_texture(cl_mem mem, bool retain = true) : image_object(mem, retain) { } /// Creates a new OpenGL texture object in \p context for \p texture /// with \p flags. /// /// \see_opencl_ref{clCreateFromGLTexture} opengl_texture(const context &context, GLenum texture_target, GLint miplevel, GLuint texture, cl_mem_flags flags = read_write) { cl_int error = 0; #ifdef CL_VERSION_1_2 m_mem = clCreateFromGLTexture(context, flags, texture_target, miplevel, texture, &error); #else m_mem = clCreateFromGLTexture2D(context, flags, texture_target, miplevel, texture, &error); #endif if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// Creates a new OpenGL texture object as a copy of \p other. opengl_texture(const opengl_texture &other) : image_object(other) { } /// Copies the OpenGL texture object from \p other. opengl_texture& operator=(const opengl_texture &other) { if(this != &other){ image_object::operator=(other); } return *this; } /// Destroys the texture object. ~opengl_texture() { } /// Returns the size (width, height) of the texture. extents<2> size() const { extents<2> size; size[0] = get_image_info(CL_IMAGE_WIDTH); size[1] = get_image_info(CL_IMAGE_HEIGHT); return size; } /// Returns the origin of the texture (\c 0, \c 0). extents<2> origin() const { return extents<2>(); } /// Returns information about the texture. /// /// \see_opencl_ref{clGetGLTextureInfo} template T get_texture_info(cl_gl_texture_info info) const { return detail::get_object_info(clGetGLTextureInfo, m_mem, info); } }; namespace detail { // set_kernel_arg() specialization for opengl_texture template<> struct set_kernel_arg : public set_kernel_arg { }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_TEXTURE_HPP compute-0.5/include/boost/compute/interop/qt.hpp000066400000000000000000000012151263566244600221110ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_QT_HPP #define BOOST_COMPUTE_INTEROP_QT_HPP #include #include #endif // BOOST_COMPUTE_INTEROP_QT_HPP compute-0.5/include/boost/compute/interop/qt/000077500000000000000000000000001263566244600214015ustar00rootroot00000000000000compute-0.5/include/boost/compute/interop/qt/qimage.hpp000066400000000000000000000041751263566244600233640ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_QT_QIMAGE_HPP #define BOOST_COMPUTE_INTEROP_QT_QIMAGE_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { inline image_format qt_qimage_format_to_image_format(const QImage::Format &format) { if(format == QImage::Format_RGB32){ return image_format(image_format::bgra, image_format::unorm_int8); } BOOST_THROW_EXCEPTION(opencl_error(CL_IMAGE_FORMAT_NOT_SUPPORTED)); } inline QImage::Format qt_image_format_to_qimage_format(const image_format &format) { if(format == image_format(image_format::bgra, image_format::unorm_int8)){ return QImage::Format_RGB32; } return QImage::Format_Invalid; } inline image_format qt_qimage_get_format(const QImage &image) { return qt_qimage_format_to_image_format(image.format()); } inline void qt_copy_qimage_to_image2d(const QImage &qimage, image2d &image, command_queue &queue) { queue.enqueue_write_image(image, image.origin(), image.size(), qimage.constBits()); } inline void qt_copy_image2d_to_qimage(const image2d &image, QImage &qimage, command_queue &queue) { queue.enqueue_read_image( image, dim(0, 0), dim(qimage.width(), qimage.height()), qimage.bits() ); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_INTEROP_QT_QIMAGE_HPP compute-0.5/include/boost/compute/interop/qt/qpoint.hpp000066400000000000000000000012641263566244600234270ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_QT_QPOINT_HPP #define BOOST_COMPUTE_INTEROP_QT_QPOINT_HPP #include #include BOOST_COMPUTE_TYPE_NAME(QPoint, "int2") #endif // BOOST_COMPUTE_INTEROP_QT_QPOINT_HPP compute-0.5/include/boost/compute/interop/qt/qpointf.hpp000066400000000000000000000012731263566244600235750ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_QT_QPOINTF_HPP #define BOOST_COMPUTE_INTEROP_QT_QPOINTF_HPP #include #include BOOST_COMPUTE_TYPE_NAME(QPointF, "float2") #endif // BOOST_COMPUTE_INTEROP_QT_QPOINTF_HPP compute-0.5/include/boost/compute/interop/qt/qtcore.hpp000066400000000000000000000013241263566244600234070ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_QT_QTCORE_HPP #define BOOST_COMPUTE_INTEROP_QT_QTCORE_HPP #include #include #include #endif // BOOST_COMPUTE_INTEROP_QT_QTCORE_HPP compute-0.5/include/boost/compute/interop/qt/qtgui.hpp000066400000000000000000000011611263566244600232420ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_QT_QTGUI_HPP #define BOOST_COMPUTE_INTEROP_QT_QTGUI_HPP #include #endif // BOOST_COMPUTE_INTEROP_QT_QTGUI_HPP compute-0.5/include/boost/compute/interop/qt/qvector.hpp000066400000000000000000000025461263566244600236040ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_QT_QVECTOR_HPP #define BOOST_COMPUTE_INTEROP_QT_QVECTOR_HPP #include #include namespace boost { namespace compute { namespace detail { template struct _is_contiguous_iterator< Iterator, typename boost::enable_if< typename boost::is_same< Iterator, typename QVector::iterator >::type >::type > : public boost::true_type {}; template struct _is_contiguous_iterator< Iterator, typename boost::enable_if< typename boost::is_same< Iterator, typename QVector::const_iterator >::type >::type > : public boost::true_type {}; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_INTEROP_QT_QVECTOR_HPP compute-0.5/include/boost/compute/interop/vtk.hpp000066400000000000000000000013721263566244600222750ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_VTK_HPP #define BOOST_COMPUTE_INTEROP_VTK_HPP #include #include #include #include #endif // BOOST_COMPUTE_INTEROP_VTK_HPP compute-0.5/include/boost/compute/interop/vtk/000077500000000000000000000000001263566244600215615ustar00rootroot00000000000000compute-0.5/include/boost/compute/interop/vtk/bounds.hpp000066400000000000000000000040061263566244600235640ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_VTK_BOUNDS_HPP #define BOOST_COMPUTE_INTEROP_VTK_BOUNDS_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { /// Calculates the bounds for the points in the range [\p first, \p last) and /// stores the result in \p bounds. /// /// For example, this can be used to implement the GetBounds() method for a /// vtkMapper subclass. template inline void vtk_compute_bounds(PointIterator first, PointIterator last, double bounds[6], command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits::value_type T; const context &context = queue.get_context(); // compute min and max point array extrema(context); reduce(first, last, extrema.begin() + 0, min(), queue); reduce(first, last, extrema.begin() + 1, max(), queue); // copy results to host buffer std::vector buffer(2); copy_n(extrema.begin(), 2, buffer.begin(), queue); // copy to vtk-style bounds bounds[0] = buffer[0][0]; bounds[1] = buffer[1][0]; bounds[2] = buffer[0][1]; bounds[3] = buffer[1][1]; bounds[4] = buffer[0][2]; bounds[5] = buffer[1][2]; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_INTEROP_VTK_BOUNDS_HPP compute-0.5/include/boost/compute/interop/vtk/data_array.hpp000066400000000000000000000051651263566244600244100ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_VTK_DATA_ARRAY_HPP #define BOOST_COMPUTE_INTEROP_VTK_DATA_ARRAY_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { /// Copies the values in \p data to \p buffer. template inline void vtk_copy_data_array_to_buffer(const vtkDataArray *data, buffer_iterator buffer, command_queue &queue = system::default_queue()); /// \internal_ template inline void vtk_copy_data_array_to_buffer(const vtkDataArrayTemplate *data, buffer_iterator buffer, command_queue &queue = system::default_queue()) { vtkDataArrayTemplate *data_ = const_cast *>(data); const T *data_ptr = static_cast(data_->GetVoidPointer(0)); size_t data_size = data_->GetNumberOfComponents() * data_->GetNumberOfTuples(); ::boost::compute::copy_n(data_ptr, data_size, buffer, queue); } /// Copies the values in the range [\p first, \p last) to \p data. template inline void vtk_copy_buffer_to_data_array(buffer_iterator first, buffer_iterator last, vtkDataArray *data, command_queue &queue = system::default_queue()); /// \internal_ template inline void vtk_copy_buffer_to_data_array(buffer_iterator first, buffer_iterator last, vtkDataArrayTemplate *data, command_queue &queue = system::default_queue()) { T *data_ptr = static_cast(data->GetVoidPointer(0)); ::boost::compute::copy(first, last, data_ptr, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_INTEROP_VTK_DATA_ARRAY_HPP compute-0.5/include/boost/compute/interop/vtk/matrix4x4.hpp000066400000000000000000000024151263566244600241400ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_VTK_MATRIX4X4_HPP #define BOOST_COMPUTE_INTEROP_VTK_MATRIX4X4_HPP #include #include namespace boost { namespace compute { /// Converts a \c vtkMatrix4x4 to a \c float16_. inline float16_ vtk_matrix4x4_to_float16(const vtkMatrix4x4 *matrix) { float16_ result; for(int i = 0; i < 4; i++){ for(int j = 0; j < 4; j++){ result[i*4+j] = matrix->GetElement(i, j); } } return result; } /// Converts a \c vtkMatrix4x4 to a \c double16_; inline double16_ vtk_matrix4x4_to_double16(const vtkMatrix4x4 *matrix) { double16_ result; std::memcpy(&result, matrix->Element, 16 * sizeof(double)); return result; } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_INTEROP_VTK_MATRIX4X4_HPP compute-0.5/include/boost/compute/interop/vtk/points.hpp000066400000000000000000000034611263566244600236120ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_INTEROP_VTK_POINTS_HPP #define BOOST_COMPUTE_INTEROP_VTK_POINTS_HPP #include #include #include #include #include #include namespace boost { namespace compute { /// Copies \p points to \p buffer. /// /// For example, to copy from a \c vtkPoints object to a \c vector: /// \code /// vtkPoints *points = ... /// vector vector(points->GetNumberOfPoints(), context); /// vtk_copy_points_to_buffer(points, vector.begin(), queue); /// \endcode template inline void vtk_copy_points_to_buffer(const vtkPoints *points, buffer_iterator buffer, command_queue &queue = system::default_queue()) { vtkPoints *points_ = const_cast(points); // copy points to aligned buffer std::vector tmp(points_->GetNumberOfPoints()); for(vtkIdType i = 0; i < points_->GetNumberOfPoints(); i++){ double *p = points_->GetPoint(i); tmp[i] = PointType(p[0], p[1], p[2], 1); } // copy data to device copy(tmp.begin(), tmp.end(), buffer, queue); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_INTEROP_VTK_POINTS_HPP compute-0.5/include/boost/compute/iterator.hpp000066400000000000000000000021641263566244600216420ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ITERATOR_HPP #define BOOST_COMPUTE_ITERATOR_HPP /// \file /// /// Meta-header to include all Boost.Compute iterator headers. #include #include #include #include #include #include #include #include #include #endif // BOOST_COMPUTE_ITERATOR_HPP compute-0.5/include/boost/compute/iterator/000077500000000000000000000000001263566244600211265ustar00rootroot00000000000000compute-0.5/include/boost/compute/iterator/buffer_iterator.hpp000066400000000000000000000171611263566244600250270ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ITERATOR_BUFFER_ITERATOR_HPP #define BOOST_COMPUTE_ITERATOR_BUFFER_ITERATOR_HPP #include #include #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { // forward declaration for buffer_iterator template class buffer_iterator; namespace detail { // helper class which defines the iterator_facade super-class // type for buffer_iterator template class buffer_iterator_base { public: typedef ::boost::iterator_facade< ::boost::compute::buffer_iterator, T, ::std::random_access_iterator_tag, ::boost::compute::detail::buffer_value > type; }; template struct buffer_iterator_index_expr { typedef T result_type; buffer_iterator_index_expr(const buffer &buffer, size_t index, const memory_object::address_space address_space, const IndexExpr &expr) : m_buffer(buffer), m_index(index), m_address_space(address_space), m_expr(expr) { } operator T() const { BOOST_STATIC_ASSERT_MSG(boost::is_integral::value, "Index expression must be integral"); return buffer_value(m_buffer, size_t(m_expr) * sizeof(T)); } const buffer &m_buffer; size_t m_index; memory_object::address_space m_address_space; IndexExpr m_expr; }; template inline meta_kernel& operator<<(meta_kernel &kernel, const buffer_iterator_index_expr &expr) { if(expr.m_index == 0){ return kernel << kernel.get_buffer_identifier(expr.m_buffer, expr.m_address_space) << '[' << expr.m_expr << ']'; } else { return kernel << kernel.get_buffer_identifier(expr.m_buffer, expr.m_address_space) << '[' << uint_(expr.m_index) << "+(" << expr.m_expr << ")]"; } } } // end detail namespace /// \class buffer_iterator /// \brief An iterator for values in a buffer. /// /// The buffer_iterator class iterates over values in a memory buffer on a /// compute device. It is the most commonly used iterator in Boost.Compute /// and is used by the \ref vector "vector" and \ref array "array" /// container classes. /// /// Buffer iterators store a reference to a memory buffer along with an index /// into that memory buffer. /// /// The buffer_iterator class allows for arbitrary OpenCL memory objects /// (including those created outside of Boost.Compute) to be used with the /// Boost.Compute algorithms (such as transform() and sort()). For example, /// to reverse the contents of an OpenCL memory buffer containing a set of /// integers: /// /// \snippet test/test_buffer_iterator.cpp reverse_external_buffer /// /// \see buffer, make_buffer_iterator() template class buffer_iterator : public detail::buffer_iterator_base::type { public: typedef typename detail::buffer_iterator_base::type super_type; typedef typename super_type::reference reference; typedef typename super_type::difference_type difference_type; buffer_iterator() : m_index(0) { } buffer_iterator(const buffer &buffer, size_t index) : m_buffer(buffer.get(), false), m_index(index) { } buffer_iterator(const buffer_iterator &other) : m_buffer(other.m_buffer.get(), false), m_index(other.m_index) { } buffer_iterator& operator=(const buffer_iterator &other) { if(this != &other){ m_buffer.get() = other.m_buffer.get(); m_index = other.m_index; } return *this; } ~buffer_iterator() { // set buffer to null so that its reference count will // not be decremented when its destructor is called m_buffer.get() = 0; } const buffer& get_buffer() const { return m_buffer; } size_t get_index() const { return m_index; } T read(command_queue &queue) const { BOOST_ASSERT(m_buffer.get()); BOOST_ASSERT(m_index < m_buffer.size() / sizeof(T)); return detail::read_single_value(m_buffer, m_index, queue); } void write(const T &value, command_queue &queue) { BOOST_ASSERT(m_buffer.get()); BOOST_ASSERT(m_index < m_buffer.size() / sizeof(T)); detail::write_single_value(value, m_buffer, m_index, queue); } /// \internal_ template detail::buffer_iterator_index_expr operator[](const Expr &expr) const { BOOST_ASSERT(m_buffer.get()); return detail::buffer_iterator_index_expr( m_buffer, m_index, memory_object::global_memory, expr ); } private: friend class ::boost::iterator_core_access; /// \internal_ reference dereference() const { return detail::buffer_value(m_buffer, m_index * sizeof(T)); } /// \internal_ bool equal(const buffer_iterator &other) const { return m_buffer.get() == other.m_buffer.get() && m_index == other.m_index; } /// \internal_ void increment() { m_index++; } /// \internal_ void decrement() { m_index--; } /// \internal_ void advance(difference_type n) { m_index = static_cast(static_cast(m_index) + n); } /// \internal_ difference_type distance_to(const buffer_iterator &other) const { return static_cast(other.m_index - m_index); } private: const buffer m_buffer; size_t m_index; }; /// Creates a new \ref buffer_iterator for \p buffer at \p index. /// /// \param buffer the \ref buffer object /// \param index the index in the buffer /// /// \return a \c buffer_iterator for \p buffer at \p index template inline buffer_iterator make_buffer_iterator(const buffer &buffer, size_t index = 0) { return buffer_iterator(buffer, index); } /// \internal_ (is_device_iterator specialization for buffer_iterator) template struct is_device_iterator > : boost::true_type {}; namespace detail { // is_buffer_iterator specialization for buffer_iterator template struct is_buffer_iterator< Iterator, typename boost::enable_if< boost::is_same< buffer_iterator, typename boost::remove_const::type > >::type > : public boost::true_type {}; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ITERATOR_BUFFER_ITERATOR_HPP compute-0.5/include/boost/compute/iterator/constant_buffer_iterator.hpp000066400000000000000000000127101263566244600267330ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ITERATOR_CONSTANT_BUFFER_ITERATOR_HPP #define BOOST_COMPUTE_ITERATOR_CONSTANT_BUFFER_ITERATOR_HPP #include #include #include #include #include #include namespace boost { namespace compute { // forward declaration for constant_buffer_iterator template class constant_buffer_iterator; namespace detail { // helper class which defines the iterator_facade super-class // type for constant_buffer_iterator template class constant_buffer_iterator_base { public: typedef ::boost::iterator_facade< ::boost::compute::constant_buffer_iterator, T, ::std::random_access_iterator_tag, ::boost::compute::detail::buffer_value > type; }; } // end detail namespace /// \class constant_buffer_iterator /// \brief An iterator for a buffer in the \c constant memory space. /// /// The constant_buffer_iterator class provides an iterator for values in a /// buffer in the \c constant memory space. /// /// For iterating over values in the \c global memory space (the most common /// case), use the buffer_iterator class. /// /// \see buffer_iterator template class constant_buffer_iterator : public detail::constant_buffer_iterator_base::type { public: typedef typename detail::constant_buffer_iterator_base::type super_type; typedef typename super_type::reference reference; typedef typename super_type::difference_type difference_type; constant_buffer_iterator() : m_buffer(0), m_index(0) { } constant_buffer_iterator(const buffer &buffer, size_t index) : m_buffer(&buffer), m_index(index) { } constant_buffer_iterator(const constant_buffer_iterator &other) : m_buffer(other.m_buffer), m_index(other.m_index) { } constant_buffer_iterator& operator=(const constant_buffer_iterator &other) { if(this != &other){ m_buffer = other.m_buffer; m_index = other.m_index; } return *this; } ~constant_buffer_iterator() { } const buffer& get_buffer() const { return *m_buffer; } size_t get_index() const { return m_index; } T read(command_queue &queue) const { BOOST_ASSERT(m_buffer && m_buffer->get()); BOOST_ASSERT(m_index < m_buffer->size() / sizeof(T)); return detail::read_single_value(m_buffer, m_index, queue); } void write(const T &value, command_queue &queue) { BOOST_ASSERT(m_buffer && m_buffer->get()); BOOST_ASSERT(m_index < m_buffer->size() / sizeof(T)); detail::write_single_value(m_buffer, m_index, queue); } template detail::buffer_iterator_index_expr operator[](const Expr &expr) const { BOOST_ASSERT(m_buffer); BOOST_ASSERT(m_buffer->get()); return detail::buffer_iterator_index_expr( *m_buffer, m_index, memory_object::constant_memory, expr ); } private: friend class ::boost::iterator_core_access; reference dereference() const { return detail::buffer_value(*m_buffer, m_index); } bool equal(const constant_buffer_iterator &other) const { return m_buffer == other.m_buffer && m_index == other.m_index; } void increment() { m_index++; } void decrement() { m_index--; } void advance(difference_type n) { m_index = static_cast(static_cast(m_index) + n); } difference_type distance_to(const constant_buffer_iterator &other) const { return static_cast(other.m_index - m_index); } private: const buffer *m_buffer; size_t m_index; }; /// Creates a new constant_buffer_iterator for \p buffer at \p index. /// /// \param buffer the \ref buffer object /// \param index the index in the buffer /// /// \return a \c constant_buffer_iterator for \p buffer at \p index template inline constant_buffer_iterator make_constant_buffer_iterator(const buffer &buffer, size_t index = 0) { return constant_buffer_iterator(buffer, index); } /// \internal_ (is_device_iterator specialization for constant_buffer_iterator) template struct is_device_iterator > : boost::true_type {}; namespace detail { // is_buffer_iterator specialization for constant_buffer_iterator template struct is_buffer_iterator< Iterator, typename boost::enable_if< boost::is_same< constant_buffer_iterator, typename boost::remove_const::type > >::type > : public boost::true_type {}; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ITERATOR_CONSTANT_BUFFER_ITERATOR_HPP compute-0.5/include/boost/compute/iterator/constant_iterator.hpp000066400000000000000000000102241263566244600254000ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ITERATOR_CONSTANT_ITERATOR_HPP #define BOOST_COMPUTE_ITERATOR_CONSTANT_ITERATOR_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { // forward declaration for constant_iterator template class constant_iterator; namespace detail { // helper class which defines the iterator_facade super-class // type for constant_iterator template class constant_iterator_base { public: typedef ::boost::iterator_facade< ::boost::compute::constant_iterator, T, ::std::random_access_iterator_tag > type; }; } // end detail namespace /// \class constant_iterator /// \brief An iterator with a constant value. /// /// The constant_iterator class provides an iterator which returns a constant /// value when dereferenced. /// /// For example, this could be used to implement the fill() algorithm in terms /// of the copy() algorithm by copying from a range of constant iterators: /// /// \snippet test/test_constant_iterator.cpp fill_with_copy /// /// \see make_constant_iterator() template class constant_iterator : public detail::constant_iterator_base::type { public: typedef typename detail::constant_iterator_base::type super_type; typedef typename super_type::reference reference; typedef typename super_type::difference_type difference_type; constant_iterator(const T &value, size_t index = 0) : m_value(value), m_index(index) { } constant_iterator(const constant_iterator &other) : m_value(other.m_value), m_index(other.m_index) { } constant_iterator& operator=(const constant_iterator &other) { if(this != &other){ m_value = other.m_value; m_index = other.m_index; } return *this; } ~constant_iterator() { } size_t get_index() const { return m_index; } /// \internal_ template detail::meta_kernel_literal operator[](const Expr &expr) const { (void) expr; return detail::meta_kernel::make_lit(m_value); } private: friend class ::boost::iterator_core_access; /// \internal_ reference dereference() const { return m_value; } /// \internal_ bool equal(const constant_iterator &other) const { return m_value == other.m_value && m_index == other.m_index; } /// \internal_ void increment() { m_index++; } /// \internal_ void decrement() { m_index--; } /// \internal_ void advance(difference_type n) { m_index = static_cast(static_cast(m_index) + n); } /// \internal_ difference_type distance_to(const constant_iterator &other) const { return static_cast(other.m_index - m_index); } private: T m_value; size_t m_index; }; /// Returns a new constant_iterator with \p value at \p index. /// /// \param value the constant value /// \param index the iterators index /// /// \return a \c constant_iterator with \p value template inline constant_iterator make_constant_iterator(const T &value, size_t index = 0) { return constant_iterator(value, index); } /// \internal_ (is_device_iterator specialization for constant_iterator) template struct is_device_iterator > : boost::true_type {}; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ITERATOR_CONSTANT_ITERATOR_HPP compute-0.5/include/boost/compute/iterator/counting_iterator.hpp000066400000000000000000000107241263566244600254020ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ITERATOR_COUNTING_ITERATOR_HPP #define BOOST_COMPUTE_ITERATOR_COUNTING_ITERATOR_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { // forward declaration for counting_iterator template class counting_iterator; namespace detail { // helper class which defines the iterator_facade super-class // type for counting_iterator template class counting_iterator_base { public: typedef ::boost::iterator_facade< ::boost::compute::counting_iterator, T, ::std::random_access_iterator_tag > type; }; template struct counting_iterator_index_expr { typedef T result_type; counting_iterator_index_expr(const T &init, const IndexExpr &expr) : m_init(init), m_expr(expr) { } const T &m_init; IndexExpr m_expr; }; template inline meta_kernel& operator<<(meta_kernel &kernel, const counting_iterator_index_expr &expr) { return kernel << '(' << expr.m_init << '+' << expr.m_expr << ')'; } } // end detail namespace /// \class counting_iterator /// \brief The counting_iterator class implements a counting iterator. /// /// A counting iterator returns an internal value (initialized with \p init) /// which is incremented each time the iterator is incremented. /// /// For example, this could be used to implement the iota() algorithm in terms /// of the copy() algorithm by copying from a range of counting iterators: /// /// \snippet test/test_counting_iterator.cpp iota_with_copy /// /// \see make_counting_iterator() template class counting_iterator : public detail::counting_iterator_base::type { public: typedef typename detail::counting_iterator_base::type super_type; typedef typename super_type::reference reference; typedef typename super_type::difference_type difference_type; counting_iterator(const T &init) : m_init(init) { } counting_iterator(const counting_iterator &other) : m_init(other.m_init) { } counting_iterator& operator=(const counting_iterator &other) { if(this != &other){ m_init = other.m_init; } return *this; } ~counting_iterator() { } size_t get_index() const { return 0; } template detail::counting_iterator_index_expr operator[](const Expr &expr) const { return detail::counting_iterator_index_expr(m_init, expr); } private: friend class ::boost::iterator_core_access; reference dereference() const { return m_init; } bool equal(const counting_iterator &other) const { return m_init == other.m_init; } void increment() { m_init++; } void decrement() { m_init--; } void advance(difference_type n) { m_init += static_cast(n); } difference_type distance_to(const counting_iterator &other) const { return other.m_init - m_init; } private: T m_init; }; /// Returns a new counting_iterator starting at \p init. /// /// \param init the initial value /// /// \return a counting_iterator with \p init. /// /// For example, to create a counting iterator which returns unsigned integers /// and increments from one: /// \code /// auto iter = make_counting_iterator(1); /// \endcode template inline counting_iterator make_counting_iterator(const T &init) { return counting_iterator(init); } /// \internal_ (is_device_iterator specialization for counting_iterator) template struct is_device_iterator > : boost::true_type {}; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ITERATOR_COUNTING_ITERATOR_HPP compute-0.5/include/boost/compute/iterator/detail/000077500000000000000000000000001263566244600223705ustar00rootroot00000000000000compute-0.5/include/boost/compute/iterator/detail/get_base_iterator_buffer.hpp000066400000000000000000000032121263566244600301120ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ITERATOR_DETAIL_GET_BASE_ITERATOR_BUFFER_HPP #define BOOST_COMPUTE_ITERATOR_DETAIL_GET_BASE_ITERATOR_BUFFER_HPP namespace boost { namespace compute { namespace detail { // returns the buffer for an iterator adaptor's base iterator if // it exists, otherwise returns a null buffer object. template inline const buffer& get_base_iterator_buffer(const Iterator &iter, typename boost::enable_if< is_buffer_iterator< typename Iterator::base_type > >::type* = 0) { return iter.base().get_buffer(); } template inline const buffer& get_base_iterator_buffer(const Iterator &iter, typename boost::disable_if< is_buffer_iterator< typename Iterator::base_type > >::type* = 0) { (void) iter; static buffer null_buffer; return null_buffer; } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ITERATOR_DETAIL_GET_BASE_ITERATOR_BUFFER_HPP compute-0.5/include/boost/compute/iterator/detail/swizzle_iterator.hpp000066400000000000000000000134001263566244600265170ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ITERATOR_DETAIL_SWIZZLE_ITERATOR_HPP #define BOOST_COMPUTE_ITERATOR_DETAIL_SWIZZLE_ITERATOR_HPP #include #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { // forward declaration for swizzle_iterator template class swizzle_iterator; // meta-function returing the value_type for a swizzle_iterator template struct make_swizzle_iterator_value_type { typedef typename make_vector_type< typename scalar_type< typename std::iterator_traits::value_type >::type, Size >::type type; }; // helper class which defines the iterator_adaptor super-class // type for swizzle_iterator template class swizzle_iterator_base { public: typedef ::boost::iterator_adaptor< swizzle_iterator, InputIterator, typename make_swizzle_iterator_value_type::type, typename std::iterator_traits::iterator_category, typename make_swizzle_iterator_value_type::type > type; }; template struct swizzle_iterator_index_expr { typedef typename make_swizzle_iterator_value_type::type result_type; swizzle_iterator_index_expr(const InputIterator &input_iter, const IndexExpr &index_expr, const std::string &components) : m_input_iter(input_iter), m_index_expr(index_expr), m_components(components) { } InputIterator m_input_iter; IndexExpr m_index_expr; std::string m_components; }; template inline meta_kernel& operator<<(meta_kernel &kernel, const swizzle_iterator_index_expr &expr) { return kernel << expr.m_input_iter[expr.m_index_expr] << "." << expr.m_components; } template class swizzle_iterator : public swizzle_iterator_base::type { public: typedef typename swizzle_iterator_base::type super_type; typedef typename super_type::value_type value_type; typedef typename super_type::reference reference; typedef typename super_type::base_type base_type; typedef typename super_type::difference_type difference_type; BOOST_STATIC_CONSTANT(size_t, vector_size = Size); swizzle_iterator(InputIterator iterator, const std::string &components) : super_type(iterator), m_components(components) { BOOST_ASSERT(components.size() == Size); } swizzle_iterator(const swizzle_iterator &other) : super_type(other.base()), m_components(other.m_components) { BOOST_ASSERT(m_components.size() == Size); } swizzle_iterator& operator=(const swizzle_iterator &other) { if(this != &other){ super_type::operator=(other); m_components = other.m_components; } return *this; } ~swizzle_iterator() { } size_t get_index() const { return super_type::base().get_index(); } const buffer& get_buffer() const { return get_base_iterator_buffer(*this); } template swizzle_iterator_index_expr operator[](const IndexExpression &expr) const { return swizzle_iterator_index_expr(super_type::base(), expr, m_components); } private: friend class ::boost::iterator_core_access; reference dereference() const { return reference(); } private: std::string m_components; }; template inline swizzle_iterator make_swizzle_iterator(InputIterator iterator, const std::string &components) { return swizzle_iterator(iterator, components); } } // end detail namespace // is_device_iterator specialization for swizzle_iterator template struct is_device_iterator > : boost::true_type {}; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ITERATOR_SWIZZLE_ITERATOR_HPP compute-0.5/include/boost/compute/iterator/discard_iterator.hpp000066400000000000000000000075041263566244600251670ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ITERATOR_DISCARD_ITERATOR_HPP #define BOOST_COMPUTE_ITERATOR_DISCARD_ITERATOR_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { // forward declaration for discard_iterator class discard_iterator; namespace detail { // helper class which defines the iterator_facade super-class // type for discard_iterator struct discard_iterator_base { typedef ::boost::iterator_facade< ::boost::compute::discard_iterator, void, ::std::random_access_iterator_tag, void * > type; }; template struct discard_iterator_index_expr { typedef void result_type; discard_iterator_index_expr(const IndexExpr &expr) : m_expr(expr) { } IndexExpr m_expr; }; template inline meta_kernel& operator<<(meta_kernel &kernel, const discard_iterator_index_expr &expr) { (void) expr; return kernel; } } // end detail namespace /// \class discard_iterator /// \brief An iterator which discards all values written to it. /// /// \see make_discard_iterator(), constant_iterator class discard_iterator : public detail::discard_iterator_base::type { public: typedef detail::discard_iterator_base::type super_type; typedef super_type::reference reference; typedef super_type::difference_type difference_type; discard_iterator(size_t index = 0) : m_index(index) { } discard_iterator(const discard_iterator &other) : m_index(other.m_index) { } discard_iterator& operator=(const discard_iterator &other) { if(this != &other){ m_index = other.m_index; } return *this; } ~discard_iterator() { } /// \internal_ template detail::discard_iterator_index_expr operator[](const Expr &expr) const { return detail::discard_iterator_index_expr(expr); } private: friend class ::boost::iterator_core_access; /// \internal_ reference dereference() const { return 0; } /// \internal_ bool equal(const discard_iterator &other) const { return m_index == other.m_index; } /// \internal_ void increment() { m_index++; } /// \internal_ void decrement() { m_index--; } /// \internal_ void advance(difference_type n) { m_index = static_cast(static_cast(m_index) + n); } /// \internal_ difference_type distance_to(const discard_iterator &other) const { return static_cast(other.m_index - m_index); } private: size_t m_index; }; /// Returns a new discard_iterator with \p index. /// /// \param index the index of the iterator /// /// \return a \c discard_iterator at \p index inline discard_iterator make_discard_iterator(size_t index = 0) { return discard_iterator(index); } /// internal_ (is_device_iterator specialization for discard_iterator) template<> struct is_device_iterator : boost::true_type {}; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ITERATOR_DISCARD_ITERATOR_HPP compute-0.5/include/boost/compute/iterator/function_input_iterator.hpp000066400000000000000000000115611263566244600266200ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ITERATOR_FUNCTION_INPUT_ITERATOR_HPP #define BOOST_COMPUTE_ITERATOR_FUNCTION_INPUT_ITERATOR_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { // forward declaration for function_input_iterator template class function_input_iterator; namespace detail { // helper class which defines the iterator_facade super-class // type for function_input_iterator template class function_input_iterator_base { public: typedef ::boost::iterator_facade< ::boost::compute::function_input_iterator, typename ::boost::compute::result_of::type, ::std::random_access_iterator_tag, typename ::boost::compute::result_of::type > type; }; template struct function_input_iterator_expr { typedef typename ::boost::compute::result_of::type result_type; function_input_iterator_expr(const Function &function) : m_function(function) { } Function m_function; }; template inline meta_kernel& operator<<(meta_kernel &kernel, const function_input_iterator_expr &expr) { return kernel << expr.m_function(); } } // end detail namespace /// \class function_input_iterator /// \brief Iterator which returns the result of a function when dereferenced /// /// For example: /// /// \snippet test/test_function_input_iterator.cpp generate_42 /// /// \see make_function_input_iterator() template class function_input_iterator : public detail::function_input_iterator_base::type { public: typedef typename detail::function_input_iterator_base::type super_type; typedef typename super_type::reference reference; typedef typename super_type::difference_type difference_type; typedef Function function; function_input_iterator(const Function &function, size_t index = 0) : m_function(function), m_index(index) { } function_input_iterator(const function_input_iterator &other) : m_function(other.m_function), m_index(other.m_index) { } function_input_iterator& operator=(const function_input_iterator &other) { if(this != &other){ m_function = other.m_function; m_index = other.m_index; } return *this; } ~function_input_iterator() { } size_t get_index() const { return m_index; } template detail::function_input_iterator_expr operator[](const Expr &expr) const { (void) expr; return detail::function_input_iterator_expr(m_function); } private: friend class ::boost::iterator_core_access; reference dereference() const { return reference(); } bool equal(const function_input_iterator &other) const { return m_function == other.m_function && m_index == other.m_index; } void increment() { m_index++; } void decrement() { m_index--; } void advance(difference_type n) { m_index = static_cast(static_cast(m_index) + n); } difference_type distance_to(const function_input_iterator &other) const { return static_cast(other.m_index - m_index); } private: Function m_function; size_t m_index; }; /// Returns a function_input_iterator with \p function. /// /// \param function function to execute when dereferenced /// \param index index of the iterator /// /// \return a \c function_input_iterator with \p function template inline function_input_iterator make_function_input_iterator(const Function &function, size_t index = 0) { return function_input_iterator(function, index); } /// \internal_ (is_device_iterator specialization for function_input_iterator) template struct is_device_iterator > : boost::true_type {}; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ITERATOR_FUNCTION_INPUT_ITERATOR_HPP compute-0.5/include/boost/compute/iterator/permutation_iterator.hpp000066400000000000000000000142271263566244600261250ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ITERATOR_PERMUTATION_ITERATOR_HPP #define BOOST_COMPUTE_ITERATOR_PERMUTATION_ITERATOR_HPP #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { // forward declaration for transform_iterator template class permutation_iterator; namespace detail { // helper class which defines the iterator_adaptor super-class // type for permutation_iterator template class permutation_iterator_base { public: typedef ::boost::iterator_adaptor< ::boost::compute::permutation_iterator, ElementIterator > type; }; template struct permutation_iterator_access_expr { typedef typename std::iterator_traits::value_type result_type; permutation_iterator_access_expr(const ElementIterator &e, const IndexIterator &i, const IndexExpr &expr) : m_element_iter(e), m_index_iter(i), m_expr(expr) { } ElementIterator m_element_iter; IndexIterator m_index_iter; IndexExpr m_expr; }; template inline meta_kernel& operator<<(meta_kernel &kernel, const permutation_iterator_access_expr &expr) { return kernel << expr.m_element_iter[expr.m_index_iter[expr.m_expr]]; } } // end detail namespace /// \class permutation_iterator /// \brief The permutation_iterator class provides a permuation iterator /// /// A permutation iterator iterates over a value range and an index range. When /// dereferenced, it returns the value from the value range using the current /// index from the index range. /// /// For example, to reverse a range using the copy() algorithm and a permutation /// sequence: /// /// \snippet test/test_permutation_iterator.cpp reverse_range /// /// \see make_permutation_iterator() template class permutation_iterator : public detail::permutation_iterator_base::type { public: typedef typename detail::permutation_iterator_base::type super_type; typedef typename super_type::value_type value_type; typedef typename super_type::reference reference; typedef typename super_type::base_type base_type; typedef typename super_type::difference_type difference_type; typedef IndexIterator index_iterator; permutation_iterator(ElementIterator e, IndexIterator i) : super_type(e), m_map(i) { } permutation_iterator(const permutation_iterator &other) : super_type(other), m_map(other.m_map) { } permutation_iterator& operator=(const permutation_iterator &other) { if(this != &other){ super_type::operator=(other); m_map = other.m_map; } return *this; } ~permutation_iterator() { } size_t get_index() const { return super_type::base().get_index(); } const buffer& get_buffer() const { return detail::get_base_iterator_buffer(*this); } template detail::permutation_iterator_access_expr operator[](const IndexExpr &expr) const { return detail::permutation_iterator_access_expr(super_type::base(), m_map, expr); } private: friend class ::boost::iterator_core_access; reference dereference() const { return reference(); } private: IndexIterator m_map; }; /// Returns a permutation_iterator for \p e using indices from \p i. /// /// \param e the element range iterator /// \param i the index range iterator /// /// \return a \c permutation_iterator for \p e using \p i template inline permutation_iterator make_permutation_iterator(ElementIterator e, IndexIterator i) { return permutation_iterator(e, i); } /// \internal_ (is_device_iterator specialization for permutation_iterator) template struct is_device_iterator< permutation_iterator > : boost::true_type {}; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ITERATOR_PERMUTATION_ITERATOR_HPP compute-0.5/include/boost/compute/iterator/strided_iterator.hpp000066400000000000000000000223051263566244600252100ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ITERATOR_STRIDED_ITERATOR_HPP #define BOOST_COMPUTE_ITERATOR_STRIDED_ITERATOR_HPP #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { // forward declaration for strided_iterator template class strided_iterator; namespace detail { // helper class which defines the iterator_adaptor super-class // type for strided_iterator template class strided_iterator_base { public: typedef ::boost::iterator_adaptor< ::boost::compute::strided_iterator, Iterator > type; }; // helper class for including stride value in index expression template struct stride_expr { stride_expr(const IndexExpr &expr, Stride stride) : m_index_expr(expr), m_stride(stride) { } IndexExpr m_index_expr; Stride m_stride; }; template inline stride_expr make_stride_expr(const IndexExpr &expr, Stride stride) { return stride_expr(expr, stride); } template inline meta_kernel& operator<<(meta_kernel &kernel, const stride_expr &expr) { return kernel << "(" << kernel.lit(expr.m_stride) << " * (" << expr.m_index_expr << "))"; } template struct strided_iterator_index_expr { typedef typename std::iterator_traits::value_type result_type; strided_iterator_index_expr(const Iterator &input_iter, const Stride &stride, const IndexExpr &index_expr) : m_input_iter(input_iter), m_stride(stride), m_index_expr(index_expr) { } Iterator m_input_iter; const Stride& m_stride; IndexExpr m_index_expr; }; template inline meta_kernel& operator<<(meta_kernel &kernel, const strided_iterator_index_expr &expr) { return kernel << expr.m_input_iter[make_stride_expr(expr.m_index_expr, expr.m_stride)]; } } // end detail namespace /// \class strided_iterator /// \brief An iterator adaptor with adjustable iteration step. /// /// The strided iterator adaptor skips over multiple elements each time /// it is incremented or decremented. /// /// \see buffer_iterator, make_strided_iterator(), make_strided_iterator_end() template class strided_iterator : public detail::strided_iterator_base::type { public: typedef typename detail::strided_iterator_base::type super_type; typedef typename super_type::value_type value_type; typedef typename super_type::reference reference; typedef typename super_type::base_type base_type; typedef typename super_type::difference_type difference_type; strided_iterator(Iterator iterator, difference_type stride) : super_type(iterator), m_stride(static_cast(stride)) { // stride must be greater than zero BOOST_ASSERT_MSG(stride > 0, "Stride must be greater than zero"); } strided_iterator(const strided_iterator &other) : super_type(other.base()), m_stride(other.m_stride) { } strided_iterator& operator=(const strided_iterator &other) { if(this != &other){ super_type::operator=(other); m_stride = other.m_stride; } return *this; } ~strided_iterator() { } size_t get_index() const { return super_type::base().get_index(); } const buffer& get_buffer() const { return detail::get_base_iterator_buffer(*this); } template detail::strided_iterator_index_expr operator[](const IndexExpression &expr) const { return detail::strided_iterator_index_expr(super_type::base(), m_stride, expr); } private: friend class ::boost::iterator_core_access; reference dereference() const { return reference(); } bool equal(const strided_iterator &other) const { return (other.m_stride == m_stride) && (other.base_reference() == this->base_reference()); } void increment() { std::advance(super_type::base_reference(), m_stride); } void decrement() { std::advance(super_type::base_reference(),-m_stride); } void advance(typename super_type::difference_type n) { std::advance(super_type::base_reference(), n * m_stride); } difference_type distance_to(const strided_iterator &other) const { return std::distance(this->base_reference(), other.base_reference()) / m_stride; } private: difference_type m_stride; }; /// Returns a strided_iterator for \p iterator with \p stride. /// /// \param iterator the underlying iterator /// \param stride the iteration step for strided_iterator /// /// \return a \c strided_iterator for \p iterator with \p stride. /// /// For example, to create an iterator which iterates over every other /// element in a \c vector: /// \code /// auto strided_iterator = make_strided_iterator(vec.begin(), 2); /// \endcode template inline strided_iterator make_strided_iterator(Iterator iterator, typename std::iterator_traits::difference_type stride) { return strided_iterator(iterator, stride); } /// Returns a strided_iterator which refers to element that would follow /// the last element accessible through strided_iterator for \p first iterator /// with \p stride. /// /// Parameter \p stride must be greater than zero. /// /// \param first the iterator referring to the first element accessible /// through strided_iterator for \p first with \p stride /// \param last the iterator referring to the last element that may be //// accessible through strided_iterator for \p first with \p stride /// \param stride the iteration step /// /// \return a \c strided_iterator referring to element that would follow /// the last element accessible through strided_iterator for \p first /// iterator with \p stride. /// /// It can be helpful when iterating over strided_iterator: /// \code /// // vec.size() may not be divisible by 3 /// auto strided_iterator_begin = make_strided_iterator(vec.begin(), 3); /// auto strided_iterator_end = make_strided_iterator_end(vec.begin(), vec.end(), 3); /// /// // copy every 3rd element to result /// boost::compute::copy( /// strided_iterator_begin, /// strided_iterator_end,ided_iterator referring to element that would follow /// the last element accessible through strided_iterator for \p first /// iterator with \p stride. /// result.begin(), /// queue /// ); /// \endcode template strided_iterator make_strided_iterator_end(Iterator first, Iterator last, typename std::iterator_traits::difference_type stride) { typedef typename std::iterator_traits::difference_type difference_type; // calculate distance from end to the last element that would be // accessible through strided_iterator. difference_type range = std::distance(first, last); difference_type d = (range - 1) / stride; d *= stride; d -= range; // advance from end to the element that would follow the last // accessible element Iterator end_for_strided_iterator = last; std::advance(end_for_strided_iterator, d + stride); return strided_iterator(end_for_strided_iterator, stride); } /// \internal_ (is_device_iterator specialization for strided_iterator) template struct is_device_iterator > : boost::true_type {}; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ITERATOR_STRIDED_ITERATOR_HPP compute-0.5/include/boost/compute/iterator/transform_iterator.hpp000066400000000000000000000170661263566244600255750ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ITERATOR_TRANSFORM_ITERATOR_HPP #define BOOST_COMPUTE_ITERATOR_TRANSFORM_ITERATOR_HPP #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { // forward declaration for transform_iterator template class transform_iterator; namespace detail { // meta-function returning the value_type for a transform_iterator template struct make_transform_iterator_value_type { typedef typename std::iterator_traits::value_type value_type; typedef typename boost::compute::result_of::type type; }; // helper class which defines the iterator_adaptor super-class // type for transform_iterator template class transform_iterator_base { public: typedef ::boost::iterator_adaptor< ::boost::compute::transform_iterator, InputIterator, typename make_transform_iterator_value_type::type, typename std::iterator_traits::iterator_category, typename make_transform_iterator_value_type::type > type; }; template struct transform_iterator_index_expr { typedef typename make_transform_iterator_value_type< InputIterator, UnaryFunction >::type result_type; transform_iterator_index_expr(const InputIterator &input_iter, const UnaryFunction &transform_expr, const IndexExpr &index_expr) : m_input_iter(input_iter), m_transform_expr(transform_expr), m_index_expr(index_expr) { } InputIterator m_input_iter; UnaryFunction m_transform_expr; IndexExpr m_index_expr; }; template inline meta_kernel& operator<<(meta_kernel &kernel, const transform_iterator_index_expr &expr) { return kernel << expr.m_transform_expr(expr.m_input_iter[expr.m_index_expr]); } } // end detail namespace /// \class transform_iterator /// \brief A transform iterator adaptor. /// /// The transform_iterator adaptor applies a unary function to each element /// produced from the underlying iterator when dereferenced. /// /// For example, to copy from an input range to an output range while taking /// the absolute value of each element: /// /// \snippet test/test_transform_iterator.cpp copy_abs /// /// \see buffer_iterator, make_transform_iterator() template class transform_iterator : public detail::transform_iterator_base::type { public: typedef typename detail::transform_iterator_base::type super_type; typedef typename super_type::value_type value_type; typedef typename super_type::reference reference; typedef typename super_type::base_type base_type; typedef typename super_type::difference_type difference_type; typedef UnaryFunction unary_function; transform_iterator(InputIterator iterator, UnaryFunction transform) : super_type(iterator), m_transform(transform) { } transform_iterator(const transform_iterator &other) : super_type(other.base()), m_transform(other.m_transform) { } transform_iterator& operator=(const transform_iterator &other) { if(this != &other){ super_type::operator=(other); m_transform = other.m_transform; } return *this; } ~transform_iterator() { } size_t get_index() const { return super_type::base().get_index(); } const buffer& get_buffer() const { return detail::get_base_iterator_buffer(*this); } template detail::transform_iterator_index_expr operator[](const IndexExpression &expr) const { return detail::transform_iterator_index_expr(super_type::base(), m_transform, expr); } private: friend class ::boost::iterator_core_access; reference dereference() const { const context &context = super_type::base().get_buffer().get_context(); command_queue queue(context, context.get_device()); detail::meta_kernel k("read"); size_t output_arg = k.add_arg(memory_object::global_memory, "output"); k << "*output = " << m_transform(super_type::base()[k.lit(0)]) << ";"; kernel kernel = k.compile(context); buffer output_buffer(context, sizeof(value_type)); kernel.set_arg(output_arg, output_buffer); queue.enqueue_task(kernel); return detail::read_single_value(output_buffer, queue); } private: UnaryFunction m_transform; }; /// Returns a transform_iterator for \p iterator with \p transform. /// /// \param iterator the underlying iterator /// \param transform the unary transform function /// /// \return a \c transform_iterator for \p iterator with \p transform /// /// For example, to create an iterator which returns the square-root of each /// value in a \c vector: /// \code /// auto sqrt_iterator = make_transform_iterator(vec.begin(), sqrt()); /// \endcode template inline transform_iterator make_transform_iterator(InputIterator iterator, UnaryFunction transform) { return transform_iterator(iterator, transform); } /// \internal_ (is_device_iterator specialization for transform_iterator) template struct is_device_iterator< transform_iterator > : boost::true_type {}; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ITERATOR_TRANSFORM_ITERATOR_HPP compute-0.5/include/boost/compute/iterator/zip_iterator.hpp000066400000000000000000000242671263566244600243650ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ITERATOR_ZIP_ITERATOR_HPP #define BOOST_COMPUTE_ITERATOR_ZIP_ITERATOR_HPP #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { // forward declaration for zip_iterator template class zip_iterator; namespace detail { namespace mpl = boost::mpl; // meta-function returning the value_type for an iterator template struct make_iterator_value_type { typedef typename std::iterator_traits::value_type type; }; // meta-function returning the value_type for a zip_iterator template struct make_zip_iterator_value_type { typedef typename detail::mpl_vector_to_tuple< typename mpl::transform< IteratorTuple, make_iterator_value_type, mpl::back_inserter > >::type >::type type; }; // helper class which defines the iterator_facade super-class // type for zip_iterator template class zip_iterator_base { public: typedef ::boost::iterator_facade< ::boost::compute::zip_iterator, typename make_zip_iterator_value_type::type, ::std::random_access_iterator_tag, typename make_zip_iterator_value_type::type > type; }; template struct zip_iterator_index_expr { typedef typename make_zip_iterator_value_type::type result_type; zip_iterator_index_expr(const IteratorTuple &iterators, const IndexExpr &index_expr) : m_iterators(iterators), m_index_expr(index_expr) { } IteratorTuple m_iterators; IndexExpr m_index_expr; }; /// \internal_ #define BOOST_COMPUTE_PRINT_ELEM(z, n, unused) \ BOOST_PP_EXPR_IF(n, << ", ") \ << boost::get(expr.m_iterators)[expr.m_index_expr] /// \internal_ #define BOOST_COMPUTE_PRINT_ZIP_IDX(z, n, unused) \ template \ inline meta_kernel& operator<<( \ meta_kernel &kernel, \ const zip_iterator_index_expr< \ boost::tuple, \ IndexExpr \ > &expr) \ { \ typedef typename \ boost::tuple \ tuple_type; \ typedef typename \ make_zip_iterator_value_type::type \ value_type; \ kernel.inject_type(); \ return kernel \ << "(" << type_name() << ")" \ << "{ " \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_PRINT_ELEM, ~) \ << "}"; \ } BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_PRINT_ZIP_IDX, ~) #undef BOOST_COMPUTE_PRINT_ZIP_IDX #undef BOOST_COMPUTE_PRINT_ELEM struct iterator_advancer { iterator_advancer(size_t n) : m_distance(n) { } template void operator()(Iterator &i) const { std::advance(i, m_distance); } size_t m_distance; }; template void increment_iterator(Iterator &i) { i++; } template void decrement_iterator(Iterator &i) { i--; } } // end detail namespace /// \class zip_iterator /// \brief A zip iterator adaptor. /// /// The zip_iterator class combines values from multiple input iterators. When /// dereferenced it returns a tuple containing each value at the current /// position in each input range. /// /// \see make_zip_iterator() template class zip_iterator : public detail::zip_iterator_base::type { public: typedef typename detail::zip_iterator_base::type super_type; typedef typename super_type::value_type value_type; typedef typename super_type::reference reference; typedef typename super_type::difference_type difference_type; typedef IteratorTuple iterator_tuple; zip_iterator(IteratorTuple iterators) : m_iterators(iterators) { } zip_iterator(const zip_iterator &other) : m_iterators(other.m_iterators) { } zip_iterator& operator=(const zip_iterator &other) { if(this != &other){ super_type::operator=(other); m_iterators = other.m_iterators; } return *this; } ~zip_iterator() { } const IteratorTuple& get_iterator_tuple() const { return m_iterators; } template detail::zip_iterator_index_expr operator[](const IndexExpression &expr) const { return detail::zip_iterator_index_expr(m_iterators, expr); } private: friend class ::boost::iterator_core_access; reference dereference() const { return reference(); } bool equal(const zip_iterator &other) const { return m_iterators == other.m_iterators; } void increment() { boost::fusion::for_each(m_iterators, detail::increment_iterator); } void decrement() { boost::fusion::for_each(m_iterators, detail::decrement_iterator); } void advance(difference_type n) { boost::fusion::for_each(m_iterators, detail::iterator_advancer(n)); } difference_type distance_to(const zip_iterator &other) const { return std::distance(boost::get<0>(m_iterators), boost::get<0>(other.m_iterators)); } private: IteratorTuple m_iterators; }; /// Creates a zip_iterator for \p iterators. /// /// \param iterators a tuple of input iterators to zip together /// /// \return a \c zip_iterator for \p iterators /// /// For example, to zip together iterators from three vectors (\c a, \c b, and /// \p c): /// \code /// auto zipped = boost::compute::make_zip_iterator( /// boost::make_tuple(a.begin(), b.begin(), c.begin()) /// ); /// \endcode template inline zip_iterator make_zip_iterator(IteratorTuple iterators) { return zip_iterator(iterators); } /// \internal_ (is_device_iterator specialization for zip_iterator) template struct is_device_iterator > : boost::true_type {}; namespace detail { // get() specialization for zip_iterator /// \internal_ #define BOOST_COMPUTE_ZIP_GET_N(z, n, unused) \ template \ inline meta_kernel& \ operator<<(meta_kernel &kernel, \ const invoked_get< \ N, \ zip_iterator_index_expr, \ boost::tuple \ > &expr) \ { \ typedef typename boost::tuple Tuple; \ typedef typename boost::tuples::element::type T; \ BOOST_STATIC_ASSERT(N < size_t(boost::tuples::length::value)); \ kernel.inject_type(); \ return kernel \ << boost::get(expr.m_arg.m_iterators)[expr.m_arg.m_index_expr]; \ } BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_ZIP_GET_N, ~) #undef BOOST_COMPUTE_ZIP_GET_N } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ITERATOR_ZIP_ITERATOR_HPP compute-0.5/include/boost/compute/kernel.hpp000066400000000000000000000256611263566244600213000ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_KERNEL_HPP #define BOOST_COMPUTE_KERNEL_HPP #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template struct set_kernel_arg; } // end detail namespace /// \class kernel /// \brief A compute kernel. /// /// \see command_queue, program class kernel { public: /// Creates a null kernel object. kernel() : m_kernel(0) { } /// Creates a new kernel object for \p kernel. If \p retain is /// \c true, the reference count for \p kernel will be incremented. explicit kernel(cl_kernel kernel, bool retain = true) : m_kernel(kernel) { if(m_kernel && retain){ clRetainKernel(m_kernel); } } /// Creates a new kernel object with \p name from \p program. kernel(const program &program, const std::string &name) { cl_int error = 0; m_kernel = clCreateKernel(program.get(), name.c_str(), &error); if(!m_kernel){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// Creates a new kernel object as a copy of \p other. kernel(const kernel &other) : m_kernel(other.m_kernel) { if(m_kernel){ clRetainKernel(m_kernel); } } /// Copies the kernel object from \p other to \c *this. kernel& operator=(const kernel &other) { if(this != &other){ if(m_kernel){ clReleaseKernel(m_kernel); } m_kernel = other.m_kernel; if(m_kernel){ clRetainKernel(m_kernel); } } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new kernel object from \p other. kernel(kernel&& other) BOOST_NOEXCEPT : m_kernel(other.m_kernel) { other.m_kernel = 0; } /// Move-assigns the kernel from \p other to \c *this. kernel& operator=(kernel&& other) BOOST_NOEXCEPT { if(m_kernel){ clReleaseKernel(m_kernel); } m_kernel = other.m_kernel; other.m_kernel = 0; return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the kernel object. ~kernel() { if(m_kernel){ BOOST_COMPUTE_ASSERT_CL_SUCCESS( clReleaseKernel(m_kernel) ); } } /// Returns a reference to the underlying OpenCL kernel object. cl_kernel& get() const { return const_cast(m_kernel); } /// Returns the function name for the kernel. std::string name() const { return get_info(CL_KERNEL_FUNCTION_NAME); } /// Returns the number of arguments for the kernel. size_t arity() const { return get_info(CL_KERNEL_NUM_ARGS); } /// Returns the program for the kernel. program get_program() const { return program(get_info(CL_KERNEL_PROGRAM)); } /// Returns the context for the kernel. context get_context() const { return context(get_info(CL_KERNEL_CONTEXT)); } /// Returns information about the kernel. /// /// \see_opencl_ref{clGetKernelInfo} template T get_info(cl_kernel_info info) const { return detail::get_object_info(clGetKernelInfo, m_kernel, info); } /// \overload template typename detail::get_object_info_type::type get_info() const; #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Returns information about the argument at \p index. /// /// For example, to get the name of the first argument: /// \code /// std::string arg = kernel.get_arg_info(0, CL_KERNEL_ARG_NAME); /// \endcode /// /// Note, this function requires that the program be compiled with the /// \c "-cl-kernel-arg-info" flag. For example: /// \code /// program.build("-cl-kernel-arg-info"); /// \endcode /// /// \opencl_version_warning{1,2} /// /// \see_opencl_ref{clGetKernelArgInfo} template T get_arg_info(size_t index, cl_kernel_arg_info info) const { return detail::get_object_info(clGetKernelArgInfo, m_kernel, info, index); } #endif // CL_VERSION_1_2 /// Returns work-group information for the kernel with \p device. /// /// \see_opencl_ref{clGetKernelWorkGroupInfo} template T get_work_group_info(const device &device, cl_kernel_work_group_info info) const { return detail::get_object_info(clGetKernelWorkGroupInfo, m_kernel, info, device.id()); } /// Sets the argument at \p index to \p value with \p size. /// /// \see_opencl_ref{clSetKernelArg} void set_arg(size_t index, size_t size, const void *value) { BOOST_ASSERT(index < arity()); cl_int ret = clSetKernelArg(m_kernel, static_cast(index), size, value); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } } /// Sets the argument at \p index to \p value. /// /// For built-in types (e.g. \c float, \c int4_), this is equivalent to /// calling set_arg(index, sizeof(type), &value). /// /// Additionally, this method is specialized for device memory objects /// such as buffer and image2d. This allows for them to be passed directly /// without having to extract their underlying cl_mem object. /// /// This method is also specialized for device container types such as /// vector and array. This allows for them to be passed directly /// as kernel arguments without having to extract their underlying buffer. /// /// For setting local memory arguments (e.g. "__local float *buf"), the /// local_buffer class may be used: /// \code /// // set argument to a local buffer with storage for 32 float's /// kernel.set_arg(0, local_buffer(32)); /// \endcode template void set_arg(size_t index, const T &value) { // if you get a compilation error pointing here it means you // attempted to set a kernel argument from an invalid type. detail::set_kernel_arg()(*this, index, value); } /// \internal_ void set_arg(size_t index, const cl_mem mem) { set_arg(index, sizeof(cl_mem), static_cast(&mem)); } /// \internal_ void set_arg(size_t index, const cl_sampler sampler) { set_arg(index, sizeof(cl_sampler), static_cast(&sampler)); } /// \internal_ template void set_arg(size_t index, const svm_ptr ptr) { #ifdef CL_VERSION_2_0 cl_int ret = clSetKernelArgSVMPointer(m_kernel, index, ptr.get()); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } #else BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_ARG_VALUE)); #endif } #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES /// Sets the arguments for the kernel to \p args. template void set_args(T&&... args) { BOOST_ASSERT(sizeof...(T) <= arity()); _set_args<0>(args...); } #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES #if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Sets additional execution information for the kernel. /// /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clSetKernelExecInfo} void set_exec_info(cl_kernel_exec_info info, size_t size, const void *value) { cl_int ret = clSetKernelExecInfo(m_kernel, info, size, value); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } } #endif // CL_VERSION_2_0 /// Returns \c true if the kernel is the same at \p other. bool operator==(const kernel &other) const { return m_kernel == other.m_kernel; } /// Returns \c true if the kernel is different from \p other. bool operator!=(const kernel &other) const { return m_kernel != other.m_kernel; } /// \internal_ operator cl_kernel() const { return m_kernel; } /// \internal_ static kernel create_with_source(const std::string &source, const std::string &name, const context &context) { return program::build_with_source(source, context).create_kernel(name); } private: #ifndef BOOST_NO_VARIADIC_TEMPLATES /// \internal_ template void _set_args() { } /// \internal_ template void _set_args(T&& arg, Args&&... rest) { set_arg(N, arg); _set_args(rest...); } #endif // BOOST_NO_VARIADIC_TEMPLATES private: cl_kernel m_kernel; }; inline kernel program::create_kernel(const std::string &name) const { return kernel(*this, name); } /// \internal_ define get_info() specializations for kernel BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel, ((std::string, CL_KERNEL_FUNCTION_NAME)) ((cl_uint, CL_KERNEL_NUM_ARGS)) ((cl_uint, CL_KERNEL_REFERENCE_COUNT)) ((cl_context, CL_KERNEL_CONTEXT)) ((cl_program, CL_KERNEL_PROGRAM)) ) #ifdef CL_VERSION_1_2 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel, ((std::string, CL_KERNEL_ATTRIBUTES)) ) #endif // CL_VERSION_1_2 namespace detail { // set_kernel_arg implementation for built-in types template struct set_kernel_arg { typename boost::enable_if >::type operator()(kernel &kernel_, size_t index, const T &value) { kernel_.set_arg(index, sizeof(T), &value); } }; // set_kernel_arg specialization for char (different from built-in cl_char) template<> struct set_kernel_arg { void operator()(kernel &kernel_, size_t index, const char c) { kernel_.set_arg(index, sizeof(char), &c); } }; } // end detail namespace } // end namespace compute } // end namespace boost #endif // BOOST_COMPUTE_KERNEL_HPP compute-0.5/include/boost/compute/lambda.hpp000066400000000000000000000015361263566244600212330ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_LAMBDA_HPP #define BOOST_COMPUTE_LAMBDA_HPP #include #include #include #include #include #include #include #endif // BOOST_COMPUTE_LAMBDA_HPP compute-0.5/include/boost/compute/lambda/000077500000000000000000000000001263566244600205155ustar00rootroot00000000000000compute-0.5/include/boost/compute/lambda/context.hpp000066400000000000000000000234561263566244600227240ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_LAMBDA_CONTEXT_HPP #define BOOST_COMPUTE_LAMBDA_CONTEXT_HPP #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace lambda { namespace mpl = boost::mpl; namespace proto = boost::proto; #define BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(tag, op) \ template \ void operator()(tag, const LHS &lhs, const RHS &rhs) \ { \ if(proto::arity_of::value > 0){ \ stream << '('; \ proto::eval(lhs, *this); \ stream << ')'; \ } \ else { \ proto::eval(lhs, *this); \ } \ \ stream << op; \ \ if(proto::arity_of::value > 0){ \ stream << '('; \ proto::eval(rhs, *this); \ stream << ')'; \ } \ else { \ proto::eval(rhs, *this); \ } \ } // lambda expression context template struct context : proto::callable_context > { typedef void result_type; typedef Args args_tuple; // create a lambda context for kernel with args context(boost::compute::detail::meta_kernel &kernel, const Args &args_) : stream(kernel), args(args_) { } // handle terminals template void operator()(proto::tag::terminal, const T &x) { // terminal values in lambda expressions are always literals stream << stream.lit(x); } // handle placeholders template void operator()(proto::tag::terminal, placeholder) { stream << boost::get(args); } // handle functions #define BOOST_COMPUTE_LAMBDA_CONTEXT_FUNCTION_ARG(z, n, unused) \ BOOST_PP_COMMA_IF(n) BOOST_PP_CAT(const Arg, n) BOOST_PP_CAT(&arg, n) #define BOOST_COMPUTE_LAMBDA_CONTEXT_FUNCTION(z, n, unused) \ template \ void operator()( \ proto::tag::function, \ const F &function, \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_LAMBDA_CONTEXT_FUNCTION_ARG, ~) \ ) \ { \ proto::value(function).apply(*this, BOOST_PP_ENUM_PARAMS(n, arg)); \ } BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_LAMBDA_CONTEXT_FUNCTION, ~) #undef BOOST_COMPUTE_LAMBDA_CONTEXT_FUNCTION // operators BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::plus, '+') BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::minus, '-') BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::multiplies, '*') BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::divides, '/') BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::modulus, '%') BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::less, '<') BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::greater, '>') BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::less_equal, "<=") BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::greater_equal, ">=") BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::equal_to, "==") BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::not_equal_to, "!=") BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::logical_and, "&&") BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::logical_or, "||") BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::bitwise_and, '&') BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::bitwise_or, '|') BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::bitwise_xor, '^') BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::assign, '=') // subscript operator template void operator()(proto::tag::subscript, const LHS &lhs, const RHS &rhs) { proto::eval(lhs, *this); stream << '['; proto::eval(rhs, *this); stream << ']'; } // ternary conditional operator template void operator()(proto::tag::if_else_, const Pred &p, const Arg1 &x, const Arg2 &y) { proto::eval(p, *this); stream << '?'; proto::eval(x, *this); stream << ':'; proto::eval(y, *this); } boost::compute::detail::meta_kernel &stream; Args args; }; namespace detail { template struct invoked_unary_expression { typedef typename ::boost::compute::result_of::type result_type; invoked_unary_expression(const Expr &expr, const Arg &arg) : m_expr(expr), m_arg(arg) { } Expr m_expr; Arg m_arg; }; template boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel &kernel, const invoked_unary_expression &expr) { context > ctx(kernel, boost::make_tuple(expr.m_arg)); proto::eval(expr.m_expr, ctx); return kernel; } template struct invoked_binary_expression { typedef typename ::boost::compute::result_of::type result_type; invoked_binary_expression(const Expr &expr, const Arg1 &arg1, const Arg2 &arg2) : m_expr(expr), m_arg1(arg1), m_arg2(arg2) { } Expr m_expr; Arg1 m_arg1; Arg2 m_arg2; }; template boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel &kernel, const invoked_binary_expression &expr) { context > ctx( kernel, boost::make_tuple(expr.m_arg1, expr.m_arg2) ); proto::eval(expr.m_expr, ctx); return kernel; } } // end detail namespace // forward declare domain struct domain; // lambda expression wrapper template struct expression : proto::extends, domain> { typedef proto::extends, domain> base_type; BOOST_PROTO_EXTENDS_USING_ASSIGN(expression) expression(const Expr &expr = Expr()) : base_type(expr) { } // result_of protocol template struct result { }; template struct result { typedef typename ::boost::compute::lambda::result_of::type type; }; template struct result { typedef typename ::boost::compute::lambda::result_of< Expr, typename boost::tuple >::type type; }; template struct result { typedef typename ::boost::compute::lambda::result_of< Expr, typename boost::tuple >::type type; }; template detail::invoked_unary_expression, Arg> operator()(const Arg &x) const { return detail::invoked_unary_expression, Arg>(*this, x); } template detail::invoked_binary_expression, Arg1, Arg2> operator()(const Arg1 &x, const Arg2 &y) const { return detail::invoked_binary_expression< expression, Arg1, Arg2 >(*this, x, y); } // function<> conversion operator template operator function() const { using ::boost::compute::detail::meta_kernel; std::stringstream source; ::boost::compute::detail::meta_kernel_variable arg1("x"); source << "inline " << type_name() << " lambda" << ::boost::compute::detail::generate_argument_list('x') << "{\n" << " return " << meta_kernel::expr_to_string((*this)(arg1)) << ";\n" << "}\n"; return make_function_from_source("lambda", source.str()); } template operator function() const { using ::boost::compute::detail::meta_kernel; std::stringstream source; ::boost::compute::detail::meta_kernel_variable arg1("x"); ::boost::compute::detail::meta_kernel_variable arg2("y"); source << "inline " << type_name() << " lambda" << ::boost::compute::detail::generate_argument_list('x') << "{\n" << " return " << meta_kernel::expr_to_string((*this)(arg1, arg2)) << ";\n" << "}\n"; return make_function_from_source("lambda", source.str()); } }; // lambda expression domain struct domain : proto::domain > { }; } // end lambda namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_LAMBDA_CONTEXT_HPP compute-0.5/include/boost/compute/lambda/functional.hpp000066400000000000000000000216021263566244600233710ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_LAMBDA_FUNCTIONAL_HPP #define BOOST_COMPUTE_LAMBDA_FUNCTIONAL_HPP #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace lambda { namespace mpl = boost::mpl; namespace proto = boost::proto; // wraps a unary boolean function #define BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(name) \ namespace detail { \ struct BOOST_PP_CAT(name, _func) \ { \ template \ struct lambda_result \ { \ typedef int type; \ }; \ \ template \ static void apply(Context &ctx, const Arg &arg) \ { \ ctx.stream << #name << "("; \ proto::eval(arg, ctx); \ ctx.stream << ")"; \ } \ }; \ } \ template \ inline typename proto::result_of::make_expr< \ proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg& \ >::type const \ name(const Arg &arg) \ { \ return proto::make_expr( \ BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg) \ ); \ } // wraps a unary function who's return type is the same as the argument type #define BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(name) \ namespace detail { \ struct BOOST_PP_CAT(name, _func) \ { \ template \ struct lambda_result \ { \ typedef typename proto::result_of::child_c::type Arg1; \ typedef typename ::boost::compute::lambda::result_of::type type; \ }; \ \ template \ static void apply(Context &ctx, const Arg &arg) \ { \ ctx.stream << #name << "("; \ proto::eval(arg, ctx); \ ctx.stream << ")"; \ } \ }; \ } \ template \ inline typename proto::result_of::make_expr< \ proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg& \ >::type const \ name(const Arg &arg) \ { \ return proto::make_expr( \ BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg) \ ); \ } // wraps a binary function #define BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(name) \ namespace detail { \ struct BOOST_PP_CAT(name, _func) \ { \ template \ struct lambda_result \ { \ typedef typename proto::result_of::child_c::type Arg1; \ typedef typename ::boost::compute::lambda::result_of::type type; \ }; \ \ template \ static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2) \ { \ ctx.stream << #name << "("; \ proto::eval(arg1, ctx); \ ctx.stream << ", "; \ proto::eval(arg2, ctx); \ ctx.stream << ")"; \ } \ }; \ } \ template \ inline typename proto::result_of::make_expr< \ proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg1&, const Arg2& \ >::type const \ name(const Arg1 &arg1, const Arg2 &arg2) \ { \ return proto::make_expr( \ BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg1), ::boost::ref(arg2) \ ); \ } // wraps a binary function who's result type is the scalar type of the first argument #define BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_ST(name) \ namespace detail { \ struct BOOST_PP_CAT(name, _func) \ { \ template \ struct lambda_result \ { \ typedef typename proto::result_of::child_c::type Arg1; \ typedef typename ::boost::compute::lambda::result_of::type result_type; \ typedef typename ::boost::compute::scalar_type::type type; \ }; \ \ template \ static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2) \ { \ ctx.stream << #name << "("; \ proto::eval(arg1, ctx); \ ctx.stream << ", "; \ proto::eval(arg2, ctx); \ ctx.stream << ")"; \ } \ }; \ } \ template \ inline typename proto::result_of::make_expr< \ proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg1&, const Arg2& \ >::type const \ name(const Arg1 &arg1, const Arg2 &arg2) \ { \ return proto::make_expr( \ BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg1), ::boost::ref(arg2) \ ); \ } // wraps a ternary function #define BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(name) \ namespace detail { \ struct BOOST_PP_CAT(name, _func) \ { \ template \ struct lambda_result \ { \ typedef typename proto::result_of::child_c::type Arg1; \ typedef typename ::boost::compute::lambda::result_of::type type; \ }; \ \ template \ static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) \ { \ ctx.stream << #name << "("; \ proto::eval(arg1, ctx); \ ctx.stream << ", "; \ proto::eval(arg2, ctx); \ ctx.stream << ", "; \ proto::eval(arg3, ctx); \ ctx.stream << ")"; \ } \ }; \ } \ template \ inline typename proto::result_of::make_expr< \ proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg1&, const Arg2&, const Arg3& \ >::type const \ name(const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) \ { \ return proto::make_expr( \ BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg1), ::boost::ref(arg2), ::boost::ref(arg3) \ ); \ } BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(all) BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(any) BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(isinf) BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(isnan) BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(isfinite) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(abs) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(cos) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(acos) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sin) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(asin) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(tan) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(atan) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sqrt) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(rsqrt) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(exp) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(exp2) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(exp10) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(log) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(log2) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(log10) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(round) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(length) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(cross) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(pow) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(pown) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(powr) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_ST(dot) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_ST(distance) BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(clamp) BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(fma) BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(mad) BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(smoothstep) } // end lambda namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_LAMBDA_FUNCTIONAL_HPP compute-0.5/include/boost/compute/lambda/get.hpp000066400000000000000000000102011263566244600217770ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_LAMBDA_GET_HPP #define BOOST_COMPUTE_LAMBDA_GET_HPP #include #include #include #include namespace boost { namespace compute { namespace lambda { namespace detail { // function wrapper for get() in lambda expressions template struct get_func { template struct lambda_result { typedef typename proto::result_of::child_c::type Arg; typedef typename ::boost::compute::lambda::result_of::type T; typedef typename ::boost::compute::detail::get_result_type::type type; }; template struct make_get_result_type { typedef typename boost::remove_cv< typename boost::compute::lambda::result_of< Arg, typename Context::args_tuple >::type >::type type; }; // returns the suffix string for get() in lambda expressions // (e.g. ".x" for get<0>() with float4) template struct make_get_suffix { static std::string value() { BOOST_STATIC_ASSERT(N < 16); std::stringstream stream; if(N < 10){ stream << ".s" << uint_(N); } else if(N < 16){ stream << ".s" << char('a' + (N - 10)); } return stream.str(); } }; // get() specialization for std::pair template struct make_get_suffix > { static std::string value() { BOOST_STATIC_ASSERT(N < 2); if(N == 0){ return ".first"; } else { return ".second"; } }; }; // get() specialization for boost::tuple #define BOOST_COMPUTE_LAMBDA_GET_MAKE_TUPLE_SUFFIX(z, n, unused) \ template \ struct make_get_suffix > \ { \ static std::string value() \ { \ BOOST_STATIC_ASSERT(N < n); \ return ".v" + boost::lexical_cast(N); \ } \ }; BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_LAMBDA_GET_MAKE_TUPLE_SUFFIX, ~) #undef BOOST_COMPUTE_LAMBDA_GET_MAKE_TUPLE_SUFFIX template static void dispatch_apply_terminal(Context &ctx, const Arg &arg) { typedef typename make_get_result_type::type T; proto::eval(arg, ctx); ctx.stream << make_get_suffix::value(); } template static void dispatch_apply_terminal(Context &ctx, placeholder) { ctx.stream << ::boost::compute::get()(::boost::get(ctx.args)); } template static void dispatch_apply(Context &ctx, const Arg &arg, proto::tag::terminal) { dispatch_apply_terminal(ctx, proto::value(arg)); } template static void apply(Context &ctx, const Arg &arg) { dispatch_apply(ctx, arg, typename proto::tag_of::type()); } }; } // end detail namespace // get() template inline typename proto::result_of::make_expr< proto::tag::function, detail::get_func, const Arg& >::type const get(const Arg &arg) { return proto::make_expr( detail::get_func(), ::boost::ref(arg) ); } } // end lambda namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_LAMBDA_GET_HPP compute-0.5/include/boost/compute/lambda/make_pair.hpp000066400000000000000000000042701263566244600231610ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_LAMBDA_MAKE_PAIR_HPP #define BOOST_COMPUTE_LAMBDA_MAKE_PAIR_HPP #include namespace boost { namespace compute { namespace lambda { namespace detail { // function wrapper for make_pair() in lambda expressions struct make_pair_func { template struct lambda_result { typedef typename proto::result_of::child_c::type Arg1; typedef typename proto::result_of::child_c::type Arg2; typedef typename lambda::result_of::type T1; typedef typename lambda::result_of::type T2; typedef std::pair type; }; template static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2) { typedef typename lambda::result_of::type T1; typedef typename lambda::result_of::type T2; ctx.stream << "boost_make_pair("; ctx.stream << type_name() << ", "; proto::eval(arg1, ctx); ctx.stream << ", "; ctx.stream << type_name() << ", "; proto::eval(arg2, ctx); ctx.stream << ")"; } }; } // end detail namespace // make_pair(first, second) template inline typename proto::result_of::make_expr< proto::tag::function, detail::make_pair_func, const Arg1&, const Arg2& >::type const make_pair(const Arg1 &first, const Arg2 &second) { return proto::make_expr( detail::make_pair_func(), ::boost::ref(first), ::boost::ref(second) ); } } // end lambda namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_LAMBDA_MAKE_PAIR_HPP compute-0.5/include/boost/compute/lambda/make_tuple.hpp000066400000000000000000000114621263566244600233600ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_HPP #define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_HPP #include #include #include namespace boost { namespace compute { namespace lambda { namespace detail { // function wrapper for make_tuple() in lambda expressions struct make_tuple_func { template struct make_tuple_result_type; #define BOOST_COMPUTE_MAKE_TUPLE_RESULT_GET_ARG(z, n, unused) \ typedef typename proto::result_of::child_c::type BOOST_PP_CAT(Arg, n); #define BOOST_COMPUTE_MAKE_TUPLE_RESULT_GET_ARG_TYPE(z, n, unused) \ typedef typename lambda::result_of::type BOOST_PP_CAT(T, n); #define BOOST_COMPUTE_MAKE_TUPLE_RESULT_TYPE(z, n, unused) \ template \ struct make_tuple_result_type \ { \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_MAKE_TUPLE_RESULT_GET_ARG, ~) \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_MAKE_TUPLE_RESULT_GET_ARG_TYPE, ~) \ typedef boost::tuple type; \ }; BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_MAKE_TUPLE_RESULT_TYPE, ~) #undef BOOST_COMPUTE_MAKE_TUPLE_RESULT_GET_ARG #undef BOOST_COMPUTE_MAKE_TUPLE_RESULT_GET_ARG_TYPE #undef BOOST_COMPUTE_MAKE_TUPLE_RESULT_TYPE template struct lambda_result { typedef typename make_tuple_result_type< Expr, Args, proto::arity_of::value - 1 >::type type; }; #define BOOST_COMPUTE_MAKE_TUPLE_GET_ARG_TYPE(z, n, unused) \ typedef typename lambda::result_of< \ BOOST_PP_CAT(Arg, n), typename Context::args_tuple \ >::type BOOST_PP_CAT(T, n); #define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY_ARG(z, n, unused) \ BOOST_PP_COMMA_IF(n) BOOST_PP_CAT(const Arg, n) BOOST_PP_CAT(&arg, n) #define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY_EVAL_ARG(z, n, unused) \ BOOST_PP_EXPR_IF(n, ctx.stream << ", ";) proto::eval(BOOST_PP_CAT(arg, n), ctx); #define BOOST_COMPUTE_MAKE_TUPLE_APPLY(z, n, unused) \ template \ static void apply(Context &ctx, BOOST_PP_REPEAT(n, BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY_ARG, ~)) \ { \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_MAKE_TUPLE_GET_ARG_TYPE, ~) \ typedef typename boost::tuple tuple_type; \ ctx.stream.template inject_type(); \ ctx.stream << "((" << type_name() << "){"; \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY_EVAL_ARG, ~) \ ctx.stream << "})"; \ } BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_MAKE_TUPLE_APPLY, ~) #undef BOOST_COMPUTE_MAKE_TUPLE_GET_ARG_TYPE #undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY_ARG #undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY_EVAL_ARG #undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY }; } // end detail namespace #define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_ARG(z, n, unused) \ BOOST_PP_COMMA_IF(n) BOOST_PP_CAT(const Arg, n) BOOST_PP_CAT(&arg, n) #define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_ARG_TYPE(z, n, unused) \ BOOST_PP_COMMA_IF(n) BOOST_PP_CAT(const Arg, n) & #define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_REF_ARG(z, n, unused) \ BOOST_PP_COMMA_IF(n) ::boost::ref(BOOST_PP_CAT(arg, n)) #define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE(z, n, unused) \ template \ inline typename proto::result_of::make_expr< \ proto::tag::function, \ detail::make_tuple_func, \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_ARG_TYPE, ~) \ >::type \ make_tuple(BOOST_PP_REPEAT(n, BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_ARG, ~)) \ { \ return proto::make_expr( \ detail::make_tuple_func(), \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_REF_ARG, ~) \ ); \ } BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_LAMBDA_MAKE_TUPLE, ~) #undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_ARG #undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_ARG_TYPE #undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_REF_ARG #undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE } // end lambda namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_HPP compute-0.5/include/boost/compute/lambda/placeholder.hpp000066400000000000000000000014241263566244600235110ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_LAMBDA_PLACEHOLDER_HPP #define BOOST_COMPUTE_LAMBDA_PLACEHOLDER_HPP namespace boost { namespace compute { namespace lambda { // lambda placeholder type template struct placeholder { }; } // end lambda namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_LAMBDA_PLACEHOLDER_HPP compute-0.5/include/boost/compute/lambda/placeholders.hpp000066400000000000000000000047001263566244600236740ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_LAMBDA_PLACEHOLDERS_HPP #define BOOST_COMPUTE_LAMBDA_PLACEHOLDERS_HPP #include #include #include namespace boost { namespace compute { namespace lambda { namespace mpl = boost::mpl; namespace proto = boost::proto; // lambda placeholders expression >::type> const _1; expression >::type> const _2; expression >::type> const _3; namespace detail { BOOST_MPL_HAS_XXX_TRAIT_DEF(result_type) template struct terminal_type_impl; template struct terminal_type_impl { typedef typename T::result_type type; }; template struct terminal_type_impl { typedef T type; }; template struct terminal_type { typedef typename terminal_type_impl::value>::type type; }; } // end detail namespace // result_of placeholders template struct result_of >::type>, Args, proto::tag::terminal> { typedef typename boost::tuples::element<0, Args>::type arg_type; typedef typename detail::terminal_type::type type; }; template struct result_of >::type>, Args, proto::tag::terminal> { typedef typename boost::tuples::element<1, Args>::type arg_type; typedef typename detail::terminal_type::type type; }; template struct result_of >::type>, Args, proto::tag::terminal> { typedef typename boost::tuples::element<2, Args>::type arg_type; typedef typename detail::terminal_type::type type; }; } // end lambda namespace // lift lambda placeholders up to the boost::compute namespace using lambda::_1; using lambda::_2; using lambda::_3; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_LAMBDA_PLACEHOLDERS_HPP compute-0.5/include/boost/compute/lambda/result_of.hpp000066400000000000000000000100271263566244600232300ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_LAMBDA_RESULT_OF_HPP #define BOOST_COMPUTE_LAMBDA_RESULT_OF_HPP #include #include #include namespace boost { namespace compute { namespace lambda { namespace mpl = boost::mpl; namespace proto = boost::proto; // meta-function returning the result type of a lambda expression template::type> struct result_of { }; // terminals template struct result_of { typedef typename proto::result_of::value::type type; }; // binary operators #define BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(tag) \ template \ struct result_of \ { \ typedef typename proto::result_of::child_c::type left; \ typedef typename proto::result_of::child_c::type right; \ \ typedef typename boost::common_type< \ typename ::boost::compute::lambda::result_of< \ left, \ Args, \ typename proto::tag_of::type>::type, \ typename ::boost::compute::lambda::result_of< \ right, \ Args, \ typename proto::tag_of::type>::type \ >::type type; \ }; BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::plus) BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::minus) BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::multiplies) BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::divides) BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::modulus) BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::bitwise_and) BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::bitwise_or) BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::bitwise_xor) // comparision operators #define BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(tag) \ template \ struct result_of \ { \ typedef bool type; \ }; BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::less) BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::greater) BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::less_equal) BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::greater_equal) BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::equal_to) BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::not_equal_to) BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::logical_and) BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::logical_or) // assignment operator template struct result_of { typedef typename proto::result_of::child_c::type left; typedef typename proto::result_of::child_c::type right; typedef typename ::boost::compute::lambda::result_of< right, Args, typename proto::tag_of::type >::type type; }; // functions template struct result_of { typedef typename proto::result_of::child_c::type func_expr; typedef typename proto::result_of::value::type func; typedef typename func::template lambda_result::type type; }; } // end lambda namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_LAMBDA_RESULT_OF_HPP compute-0.5/include/boost/compute/memory.hpp000066400000000000000000000013151263566244600213160ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_MEMORY_HPP #define BOOST_COMPUTE_MEMORY_HPP /// \file /// /// Meta-header to include all Boost.Compute memory headers. #include #include #endif // BOOST_COMPUTE_MEMORY_HPP compute-0.5/include/boost/compute/memory/000077500000000000000000000000001263566244600206055ustar00rootroot00000000000000compute-0.5/include/boost/compute/memory/local_buffer.hpp000066400000000000000000000043141263566244600237430ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_MEMORY_LOCAL_BUFFER_HPP #define BOOST_COMPUTE_MEMORY_LOCAL_BUFFER_HPP #include #include namespace boost { namespace compute { /// \class local_buffer /// \brief Represents a local memory buffer on the device. /// /// The local_buffer class represents a block of local memory on a compute /// device. /// /// This class is most commonly used to set local memory arguments for compute /// kernels: /// \code /// // set argument to a local buffer with storage for 32 float's /// kernel.set_arg(0, local_buffer(32)); /// \endcode /// /// \see buffer, kernel template class local_buffer { public: /// Creates a local buffer object for \p size elements. local_buffer(const size_t size) : m_size(size) { } /// Creates a local buffer object as a copy of \p other. local_buffer(const local_buffer &other) : m_size(other.m_size) { } /// Copies \p other to \c *this. local_buffer& operator=(const local_buffer &other) { if(this != &other){ m_size = other.m_size; } return *this; } /// Destroys the local memory object. ~local_buffer() { } /// Returns the number of elements in the local buffer. size_t size() const { return m_size; } private: size_t m_size; }; namespace detail { // set_kernel_arg specialization for local_buffer template struct set_kernel_arg > { void operator()(kernel &kernel_, size_t index, const local_buffer &buffer) { kernel_.set_arg(index, buffer.size() * sizeof(T), 0); } }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_MEMORY_SVM_PTR_HPP compute-0.5/include/boost/compute/memory/svm_ptr.hpp000066400000000000000000000033001263566244600230040ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_MEMORY_SVM_PTR_HPP #define BOOST_COMPUTE_MEMORY_SVM_PTR_HPP #include #include namespace boost { namespace compute { template class svm_ptr { public: typedef T value_type; typedef std::ptrdiff_t difference_type; typedef T* pointer; typedef T& reference; typedef std::random_access_iterator_tag iterator_category; svm_ptr() : m_ptr(0) { } explicit svm_ptr(void *ptr) : m_ptr(static_cast(ptr)) { } svm_ptr(const svm_ptr &other) : m_ptr(other.m_ptr) { } svm_ptr& operator=(const svm_ptr &other) { m_ptr = other.m_ptr; } ~svm_ptr() { } void* get() const { return m_ptr; } svm_ptr operator+(difference_type n) { return svm_ptr(m_ptr + n); } difference_type operator-(svm_ptr other) { return m_ptr - other.m_ptr; } private: T *m_ptr; }; /// \internal_ (is_device_iterator specialization for svm_ptr) template struct is_device_iterator > : boost::true_type {}; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_MEMORY_SVM_PTR_HPP compute-0.5/include/boost/compute/memory_object.hpp000066400000000000000000000154141263566244600226510ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_MEMORY_OBJECT_HPP #define BOOST_COMPUTE_MEMORY_OBJECT_HPP #include #include #include #include #include namespace boost { namespace compute { /// \class memory_object /// \brief Base-class for memory objects. /// /// The memory_object class is the base-class for memory objects on /// compute devices. /// /// \see buffer, vector class memory_object { public: /// Flags for the creation of memory objects. enum mem_flags { read_write = CL_MEM_READ_WRITE, read_only = CL_MEM_READ_ONLY, write_only = CL_MEM_WRITE_ONLY, use_host_ptr = CL_MEM_USE_HOST_PTR, alloc_host_ptr = CL_MEM_ALLOC_HOST_PTR, copy_host_ptr = CL_MEM_COPY_HOST_PTR #ifdef CL_VERSION_1_2 , host_write_only = CL_MEM_HOST_WRITE_ONLY, host_read_only = CL_MEM_HOST_READ_ONLY, host_no_access = CL_MEM_HOST_NO_ACCESS #endif }; /// Symbolic names for the OpenCL address spaces. enum address_space { global_memory, local_memory, private_memory, constant_memory }; /// Returns the underlying OpenCL memory object. cl_mem& get() const { return const_cast(m_mem); } /// Returns the size of the memory object in bytes. size_t get_memory_size() const { return get_memory_info(CL_MEM_SIZE); } /// Returns the type for the memory object. cl_mem_object_type get_memory_type() const { return get_memory_info(CL_MEM_TYPE); } /// Returns the flags for the memory object. cl_mem_flags get_memory_flags() const { return get_memory_info(CL_MEM_FLAGS); } /// Returns the context for the memory object. context get_context() const { return context(get_memory_info(CL_MEM_CONTEXT)); } /// Returns the host pointer associated with the memory object. void* get_host_ptr() const { return get_memory_info(CL_MEM_HOST_PTR); } /// Returns the reference count for the memory object. uint_ reference_count() const { return get_memory_info(CL_MEM_REFERENCE_COUNT); } /// Returns information about the memory object. /// /// \see_opencl_ref{clGetMemObjectInfo} template T get_memory_info(cl_mem_info info) const { return detail::get_object_info(clGetMemObjectInfo, m_mem, info); } #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Registers a function to be called when the memory object is deleted /// and its resources freed. /// /// \see_opencl_ref{clSetMemObjectDestructorCallback} /// /// \opencl_version_warning{1,1} void set_destructor_callback(void (BOOST_COMPUTE_CL_CALLBACK *callback)( cl_mem memobj, void *user_data ), void *user_data = 0) { cl_int ret = clSetMemObjectDestructorCallback(m_mem, callback, user_data); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } } /// Registers a function to be called when the memory object is deleted /// and its resources freed. /// /// The function specified by \p callback must be invokable with zero /// arguments (e.g. \c callback()). /// /// \opencl_version_warning{1,1} template void set_destructor_callback(Function callback) { set_destructor_callback( destructor_callback_invoker, new boost::function(callback) ); } #endif // CL_VERSION_1_1 /// Returns \c true if the memory object is the same as \p other. bool operator==(const memory_object &other) const { return m_mem == other.m_mem; } /// Returns \c true if the memory object is different from \p other. bool operator!=(const memory_object &other) const { return m_mem != other.m_mem; } private: #ifdef CL_VERSION_1_1 /// \internal_ static void BOOST_COMPUTE_CL_CALLBACK destructor_callback_invoker(cl_mem, void *user_data) { boost::function *callback = static_cast *>(user_data); (*callback)(); delete callback; } #endif // CL_VERSION_1_1 protected: /// \internal_ memory_object() : m_mem(0) { } /// \internal_ explicit memory_object(cl_mem mem, bool retain = true) : m_mem(mem) { if(m_mem && retain){ clRetainMemObject(m_mem); } } /// \internal_ memory_object(const memory_object &other) : m_mem(other.m_mem) { if(m_mem){ clRetainMemObject(m_mem); } } /// \internal_ memory_object& operator=(const memory_object &other) { if(this != &other){ if(m_mem){ clReleaseMemObject(m_mem); } m_mem = other.m_mem; if(m_mem){ clRetainMemObject(m_mem); } } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// \internal_ memory_object(memory_object&& other) BOOST_NOEXCEPT : m_mem(other.m_mem) { other.m_mem = 0; } /// \internal_ memory_object& operator=(memory_object&& other) BOOST_NOEXCEPT { if(m_mem){ clReleaseMemObject(m_mem); } m_mem = other.m_mem; other.m_mem = 0; return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// \internal_ ~memory_object() { if(m_mem){ BOOST_COMPUTE_ASSERT_CL_SUCCESS( clReleaseMemObject(m_mem) ); } } protected: cl_mem m_mem; }; namespace detail { // set_kernel_arg specialization for memory_object template<> struct set_kernel_arg { void operator()(kernel &kernel_, size_t index, const memory_object &mem) { kernel_.set_arg(index, mem.get()); } }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_MEMORY_OBJECT_HPP compute-0.5/include/boost/compute/pipe.hpp000066400000000000000000000076141263566244600207530ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_PIPE_HPP #define BOOST_COMPUTE_PIPE_HPP #include #include #include #include #include // pipe objects require opencl 2.0 #if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) namespace boost { namespace compute { /// \class pipe /// \brief A FIFO data pipe /// /// \opencl_version_warning{2,0} /// /// \see memory_object class pipe : public memory_object { public: /// Creates a null pipe object. pipe() : memory_object() { } /// Creates a pipe object for \p mem. If \p retain is \c true, the /// reference count for \p mem will be incremented. explicit pipe(cl_mem mem, bool retain = true) : memory_object(mem, retain) { } /// Creates a new pipe in \p context. pipe(const context &context, uint_ pipe_packet_size, uint_ pipe_max_packets, cl_mem_flags flags = read_write, const cl_pipe_properties *properties = 0) { cl_int error = 0; m_mem = clCreatePipe(context, flags, pipe_packet_size, pipe_max_packets, properties, &error); if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// Creates a new pipe object as a copy of \p other. pipe(const pipe &other) : memory_object(other) { } /// Copies the pipe object from \p other to \c *this. pipe& operator=(const pipe &other) { if(this != &other){ memory_object::operator=(other); } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new pipe object from \p other. pipe(pipe&& other) BOOST_NOEXCEPT : memory_object(std::move(other)) { } /// Move-assigns the pipe from \p other to \c *this. pipe& operator=(pipe&& other) BOOST_NOEXCEPT { memory_object::operator=(std::move(other)); return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the pipe object. ~pipe() { } /// Returns the packet size. uint_ packet_size() const { return get_info(CL_PIPE_PACKET_SIZE); } /// Returns the max number of packets. uint_ max_packets() const { return get_info(CL_PIPE_MAX_PACKETS); } /// Returns information about the pipe. /// /// \see_opencl2_ref{clGetPipeInfo} template T get_info(cl_pipe_info info) const { return detail::get_object_info(clGetPipeInfo, m_mem, info); } /// \overload template typename detail::get_object_info_type::type get_info() const; }; /// \internal_ define get_info() specializations for pipe BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(pipe, ((cl_uint, CL_PIPE_PACKET_SIZE)) ((cl_uint, CL_PIPE_MAX_PACKETS)) ) namespace detail { // set_kernel_arg specialization for pipe template<> struct set_kernel_arg { void operator()(kernel &kernel_, size_t index, const pipe &pipe_) { kernel_.set_arg(index, pipe_.get()); } }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // CL_VERSION_2_0 #endif // BOOST_COMPUTE_PIPE_HPP compute-0.5/include/boost/compute/platform.hpp000066400000000000000000000150221263566244600216320ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_PLATFORM_HPP #define BOOST_COMPUTE_PLATFORM_HPP #include #include #include #include #include #include #include #include namespace boost { namespace compute { /// \class platform /// \brief A compute platform. /// /// The platform class provides an interface to an OpenCL platform. /// /// To obtain a list of all platforms on the system use the /// system::platforms() method. /// /// \see device, context class platform { public: /// Creates a new platform object for \p id. explicit platform(cl_platform_id id) : m_platform(id) { } /// Creates a new platform as a copy of \p other. platform(const platform &other) : m_platform(other.m_platform) { } /// Copies the platform id from \p other. platform& operator=(const platform &other) { if(this != &other){ m_platform = other.m_platform; } return *this; } /// Destroys the platform object. ~platform() { } /// Returns the ID of the platform. cl_platform_id id() const { return m_platform; } /// Returns the name of the platform. std::string name() const { return get_info(CL_PLATFORM_NAME); } /// Returns the name of the vendor for the platform. std::string vendor() const { return get_info(CL_PLATFORM_VENDOR); } /// Returns the profile string for the platform. std::string profile() const { return get_info(CL_PLATFORM_PROFILE); } /// Returns the version string for the platform. std::string version() const { return get_info(CL_PLATFORM_VERSION); } /// Returns a list of extensions supported by the platform. std::vector extensions() const { std::string extensions_string = get_info(CL_PLATFORM_EXTENSIONS); std::vector extensions_vector; boost::split(extensions_vector, extensions_string, boost::is_any_of("\t "), boost::token_compress_on); return extensions_vector; } /// Returns \c true if the platform supports the extension with /// \p name. bool supports_extension(const std::string &name) const { const std::vector extensions = this->extensions(); return std::find( extensions.begin(), extensions.end(), name) != extensions.end(); } /// Returns a list of devices on the platform. std::vector devices(cl_device_type type = CL_DEVICE_TYPE_ALL) const { size_t count = device_count(type); if(count == 0){ // no devices for this platform return std::vector(); } std::vector device_ids(count); cl_int ret = clGetDeviceIDs(m_platform, type, static_cast(count), &device_ids[0], 0); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } std::vector devices; for(cl_uint i = 0; i < count; i++){ devices.push_back(device(device_ids[i])); } return devices; } /// Returns the number of devices on the platform. size_t device_count(cl_device_type type = CL_DEVICE_TYPE_ALL) const { cl_uint count = 0; cl_int ret = clGetDeviceIDs(m_platform, type, 0, 0, &count); if(ret != CL_SUCCESS){ if(ret == CL_DEVICE_NOT_FOUND){ // no devices for this platform return 0; } else { // something else went wrong BOOST_THROW_EXCEPTION(opencl_error(ret)); } } return count; } /// Returns information about the platform. /// /// \see_opencl_ref{clGetPlatformInfo} template T get_info(cl_platform_info info) const { return detail::get_object_info(clGetPlatformInfo, m_platform, info); } /// \overload template typename detail::get_object_info_type::type get_info() const; /// Returns the address of the \p function_name extension /// function. Returns \c 0 if \p function_name is invalid. void* get_extension_function_address(const char *function_name) const { #ifdef CL_VERSION_1_2 return clGetExtensionFunctionAddressForPlatform(m_platform, function_name); #else return clGetExtensionFunctionAddress(function_name); #endif } /// Requests that the platform unload any compiler resources. void unload_compiler() { #ifdef CL_VERSION_1_2 clUnloadPlatformCompiler(m_platform); #else clUnloadCompiler(); #endif } /// Returns \c true if the platform is the same at \p other. bool operator==(const platform &other) const { return m_platform == other.m_platform; } /// Returns \c true if the platform is different from \p other. bool operator!=(const platform &other) const { return m_platform != other.m_platform; } private: cl_platform_id m_platform; }; /// \internal_ define get_info() specializations for platform BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(platform, ((std::string, CL_PLATFORM_PROFILE)) ((std::string, CL_PLATFORM_VERSION)) ((std::string, CL_PLATFORM_NAME)) ((std::string, CL_PLATFORM_VENDOR)) ((std::string, CL_PLATFORM_EXTENSIONS)) ) inline boost::compute::platform device::platform() const { return boost::compute::platform(get_info()); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_PLATFORM_HPP compute-0.5/include/boost/compute/program.hpp000066400000000000000000000473361263566244600214720ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_PROGRAM_HPP #define BOOST_COMPUTE_PROGRAM_HPP #include #include #include #include #ifdef BOOST_COMPUTE_DEBUG_KERNEL_COMPILATION #include #endif #include #include #include #include #ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE #include #include #include #include #include #include #endif namespace boost { namespace compute { class kernel; /// \class program /// \brief A compute program. /// /// The program class represents an OpenCL program. /// /// Program objects are created with one of the static \c create_with_* /// functions. For example, to create a program from a source string: /// /// \snippet test/test_program.cpp create_with_source /// /// And to create a program from a source file: /// \code /// boost::compute::program bar_program = /// boost::compute::program::create_with_source_file("/path/to/bar.cl", context); /// \endcode /// /// Once a program object has been succesfully created, it can be compiled /// using the \c build() method: /// \code /// // build the program /// foo_program.build(); /// \endcode /// /// Once the program is built, \ref kernel objects can be created using the /// \c create_kernel() method by passing their name: /// \code /// // create a kernel from the compiled program /// boost::compute::kernel foo_kernel = foo_program.create_kernel("foo"); /// \endcode /// /// \see kernel class program { public: /// Creates a null program object. program() : m_program(0) { } /// Creates a program object for \p program. If \p retain is \c true, /// the reference count for \p program will be incremented. explicit program(cl_program program, bool retain = true) : m_program(program) { if(m_program && retain){ clRetainProgram(m_program); } } /// Creates a new program object as a copy of \p other. program(const program &other) : m_program(other.m_program) { if(m_program){ clRetainProgram(m_program); } } /// Copies the program object from \p other to \c *this. program& operator=(const program &other) { if(this != &other){ if(m_program){ clReleaseProgram(m_program); } m_program = other.m_program; if(m_program){ clRetainProgram(m_program); } } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new program object from \p other. program(program&& other) BOOST_NOEXCEPT : m_program(other.m_program) { other.m_program = 0; } /// Move-assigns the program from \p other to \c *this. program& operator=(program&& other) BOOST_NOEXCEPT { if(m_program){ clReleaseProgram(m_program); } m_program = other.m_program; other.m_program = 0; return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the program object. ~program() { if(m_program){ BOOST_COMPUTE_ASSERT_CL_SUCCESS( clReleaseProgram(m_program) ); } } /// Returns the underlying OpenCL program. cl_program& get() const { return const_cast(m_program); } /// Returns the source code for the program. std::string source() const { return get_info(CL_PROGRAM_SOURCE); } /// Returns the binary for the program. std::vector binary() const { size_t binary_size = get_info(CL_PROGRAM_BINARY_SIZES); std::vector binary(binary_size); unsigned char *binary_ptr = &binary[0]; cl_int error = clGetProgramInfo(m_program, CL_PROGRAM_BINARIES, sizeof(unsigned char **), &binary_ptr, 0); if(error != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(error)); } return binary; } std::vector get_devices() const { std::vector device_ids = get_info >(CL_PROGRAM_DEVICES); std::vector devices; for(size_t i = 0; i < device_ids.size(); i++){ devices.push_back(device(device_ids[i])); } return devices; } /// Returns the context for the program. context get_context() const { return context(get_info(CL_PROGRAM_CONTEXT)); } /// Returns information about the program. /// /// \see_opencl_ref{clGetProgramInfo} template T get_info(cl_program_info info) const { return detail::get_object_info(clGetProgramInfo, m_program, info); } /// \overload template typename detail::get_object_info_type::type get_info() const; /// Returns build information about the program. /// /// For example, this function can be used to retreive the options used /// to build the program: /// \code /// std::string build_options = /// program.get_build_info(CL_PROGRAM_BUILD_OPTIONS); /// \endcode /// /// \see_opencl_ref{clGetProgramInfo} template T get_build_info(cl_program_build_info info, const device &device) const { return detail::get_object_info(clGetProgramBuildInfo, m_program, info, device.id()); } /// Builds the program with \p options. /// /// If the program fails to compile, this function will throw an /// opencl_error exception. /// \code /// try { /// // attempt to compile to program /// program.build(); /// } /// catch(boost::compute::opencl_error &e){ /// // program failed to compile, print out the build log /// std::cout << program.build_log() << std::endl; /// } /// \endcode /// /// \see_opencl_ref{clBuildProgram} void build(const std::string &options = std::string()) { const char *options_string = 0; if(!options.empty()){ options_string = options.c_str(); } cl_int ret = clBuildProgram(m_program, 0, 0, options_string, 0, 0); #ifdef BOOST_COMPUTE_DEBUG_KERNEL_COMPILATION if(ret != CL_SUCCESS){ // print the error, source code and build log std::cerr << "Boost.Compute: " << "kernel compilation failed (" << ret << ")\n" << "--- source ---\n" << source() << "\n--- build log ---\n" << build_log() << std::endl; } #endif if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } } #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Compiles the program with \p options. /// /// \opencl_version_warning{1,2} /// /// \see_opencl_ref{clCompileProgram} void compile(const std::string &options = std::string()) { const char *options_string = 0; if(!options.empty()){ options_string = options.c_str(); } cl_int ret = clCompileProgram( m_program, 0, 0, options_string, 0, 0, 0, 0, 0 ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } } /// Links the programs in \p programs with \p options in \p context. /// /// \opencl_version_warning{1,2} /// /// \see_opencl_ref{clLinkProgram} static program link(const std::vector &programs, const context &context, const std::string &options = std::string()) { const char *options_string = 0; if(!options.empty()){ options_string = options.c_str(); } cl_int ret; cl_program program_ = clLinkProgram( context.get(), 0, 0, options_string, static_cast(programs.size()), reinterpret_cast(&programs[0]), 0, 0, &ret ); if(!program_){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return program(program_, false); } #endif // CL_VERSION_1_2 /// Returns the build log. std::string build_log() const { return get_build_info(CL_PROGRAM_BUILD_LOG, get_devices().front()); } /// Creates and returns a new kernel object for \p name. /// /// For example, to create the \c "foo" kernel (after the program has been /// created and built): /// \code /// boost::compute::kernel foo_kernel = foo_program.create_kernel("foo"); /// \endcode kernel create_kernel(const std::string &name) const; /// Returns \c true if the program is the same at \p other. bool operator==(const program &other) const { return m_program == other.m_program; } /// Returns \c true if the program is different from \p other. bool operator!=(const program &other) const { return m_program != other.m_program; } /// \internal_ operator cl_program() const { return m_program; } /// Creates a new program with \p source in \p context. /// /// \see_opencl_ref{clCreateProgramWithSource} static program create_with_source(const std::string &source, const context &context) { const char *source_string = source.c_str(); cl_int error = 0; cl_program program_ = clCreateProgramWithSource(context, uint_(1), &source_string, 0, &error); if(!program_){ BOOST_THROW_EXCEPTION(opencl_error(error)); } return program(program_, false); } /// Creates a new program with \p sources in \p context. /// /// \see_opencl_ref{clCreateProgramWithSource} static program create_with_source(const std::vector &sources, const context &context) { std::vector source_strings(sources.size()); for(size_t i = 0; i < sources.size(); i++){ source_strings[i] = sources[i].c_str(); } cl_int error = 0; cl_program program_ = clCreateProgramWithSource(context, uint_(sources.size()), &source_strings[0], 0, &error); if(!program_){ BOOST_THROW_EXCEPTION(opencl_error(error)); } return program(program_, false); } /// Creates a new program with \p file in \p context. /// /// \see_opencl_ref{clCreateProgramWithSource} static program create_with_source_file(const std::string &file, const context &context) { // open file stream std::ifstream stream(file.c_str()); if(stream.fail()){ BOOST_THROW_EXCEPTION(std::ios_base::failure("failed to create stream.")); } // read source std::string source( (std::istreambuf_iterator(stream)), std::istreambuf_iterator() ); // create program return create_with_source(source, context); } /// Creates a new program with \p binary of \p binary_size in /// \p context. /// /// \see_opencl_ref{clCreateProgramWithBinary} static program create_with_binary(const unsigned char *binary, size_t binary_size, const context &context) { const cl_device_id device = context.get_device().id(); cl_int error = 0; cl_int binary_status = 0; cl_program program_ = clCreateProgramWithBinary(context, uint_(1), &device, &binary_size, &binary, &binary_status, &error); if(!program_){ BOOST_THROW_EXCEPTION(opencl_error(error)); } if(binary_status != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(binary_status)); } return program(program_, false); } /// Creates a new program with \p binary in \p context. /// /// \see_opencl_ref{clCreateProgramWithBinary} static program create_with_binary(const std::vector &binary, const context &context) { return create_with_binary(&binary[0], binary.size(), context); } /// Creates a new program with \p file in \p context. /// /// \see_opencl_ref{clCreateProgramWithBinary} static program create_with_binary_file(const std::string &file, const context &context) { // open file stream std::ifstream stream(file.c_str(), std::ios::in | std::ios::binary); // read binary std::vector binary( (std::istreambuf_iterator(stream)), std::istreambuf_iterator() ); // create program return create_with_binary(&binary[0], binary.size(), context); } #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Creates a new program with the built-in kernels listed in /// \p kernel_names for \p devices in \p context. /// /// \opencl_version_warning{1,2} /// /// \see_opencl_ref{clCreateProgramWithBuiltInKernels} static program create_with_builtin_kernels(const context &context, const std::vector &devices, const std::string &kernel_names) { cl_int error = 0; cl_program program_ = clCreateProgramWithBuiltInKernels( context.get(), static_cast(devices.size()), reinterpret_cast(&devices[0]), kernel_names.c_str(), &error ); if(!program_){ BOOST_THROW_EXCEPTION(opencl_error(error)); } return program(program_, false); } #endif // CL_VERSION_1_2 /// Create a new program with \p source in \p context and builds it with \p options. /** * In case BOOST_COMPUTE_USE_OFFLINE_CACHE macro is defined, * the compiled binary is stored for reuse in the offline cache located in * $HOME/.boost_compute on UNIX-like systems and in %APPDATA%/boost_compute * on Windows. */ static program build_with_source( const std::string &source, const context &context, const std::string &options = std::string() ) { #ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE // Get hash string for the kernel. device d = context.get_device(); platform p = d.platform(); detail::sha1 hash; hash.process( p.name() ) .process( p.version() ) .process( d.name() ) .process( options ) .process( source ) ; // Try to get cached program binaries: try { boost::optional prog = load_program_binary(hash, context); if (prog) { prog->build(options); return *prog; } } catch (...) { // Something bad happened. Fallback to normal compilation. } // Cache is apparently not available. Just compile the sources. #endif const char *source_string = source.c_str(); cl_int error = 0; cl_program program_ = clCreateProgramWithSource(context, uint_(1), &source_string, 0, &error); if(!program_){ BOOST_THROW_EXCEPTION(opencl_error(error)); } program prog(program_, false); prog.build(options); #ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE // Save program binaries for future reuse. save_program_binary(hash, prog); #endif return prog; } private: #ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE // Saves program binaries for future reuse. static void save_program_binary(const std::string &hash, const program &prog) { std::string fname = detail::program_binary_path(hash, true) + "kernel"; std::ofstream bfile(fname.c_str(), std::ios::binary); if (!bfile) return; std::vector binary = prog.binary(); size_t binary_size = binary.size(); bfile.write((char*)&binary_size, sizeof(size_t)); bfile.write((char*)binary.data(), binary_size); } // Tries to read program binaries from file cache. static boost::optional load_program_binary( const std::string &hash, const context &ctx ) { std::string fname = detail::program_binary_path(hash) + "kernel"; std::ifstream bfile(fname.c_str(), std::ios::binary); if (!bfile) return boost::optional(); size_t binary_size; std::vector binary; bfile.read((char*)&binary_size, sizeof(size_t)); binary.resize(binary_size); bfile.read((char*)binary.data(), binary_size); return boost::optional( program::create_with_binary( binary.data(), binary_size, ctx ) ); } #endif // BOOST_COMPUTE_USE_OFFLINE_CACHE private: cl_program m_program; }; /// \internal_ define get_info() specializations for program BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(program, ((cl_uint, CL_PROGRAM_REFERENCE_COUNT)) ((cl_context, CL_PROGRAM_CONTEXT)) ((cl_uint, CL_PROGRAM_NUM_DEVICES)) ((std::vector, CL_PROGRAM_DEVICES)) ((std::string, CL_PROGRAM_SOURCE)) ((std::vector, CL_PROGRAM_BINARY_SIZES)) ((std::vector, CL_PROGRAM_BINARIES)) ) #ifdef CL_VERSION_1_2 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(program, ((size_t, CL_PROGRAM_NUM_KERNELS)) ((std::string, CL_PROGRAM_KERNEL_NAMES)) ) #endif // CL_VERSION_1_2 } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_PROGRAM_HPP compute-0.5/include/boost/compute/random.hpp000066400000000000000000000021741263566244600212720ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_RANDOM_HPP #define BOOST_COMPUTE_RANDOM_HPP /// \file /// /// Meta-header to include all Boost.Compute random headers. #include #include #include #include #include #include #include #include #include #endif // BOOST_COMPUTE_RANDOM_HPP compute-0.5/include/boost/compute/random/000077500000000000000000000000001263566244600205555ustar00rootroot00000000000000compute-0.5/include/boost/compute/random/bernoulli_distribution.hpp000066400000000000000000000053101263566244600260570ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_RANDOM_BERNOULLI_DISTRIBUTION_HPP #define BOOST_COMPUTE_RANDOM_BERNOULLI_DISTRIBUTION_HPP #include #include #include #include #include namespace boost { namespace compute { /// /// \class bernoulli_distribution /// \brief Produces random boolean values according to the following /// discrete probability function with parameter p : /// P(true/p) = p and P(false/p) = (1 - p) /// /// The following example shows how to setup a bernoulli distribution to /// produce random boolean values with parameter p = 0.25 /// /// \snippet test/test_bernoulli_distribution.cpp generate /// template class bernoulli_distribution { public: /// Creates a new bernoulli distribution bernoulli_distribution(RealType p = 0.5f) : m_p(p) { } /// Destroys the bernoulli_distribution object ~bernoulli_distribution() { } /// Returns the value of the parameter p RealType p() const { return m_p; } /// Generates bernoulli distributed booleans and stores /// them in the range [\p first, \p last). template void generate(OutputIterator first, OutputIterator last, Generator &generator, command_queue &queue) { size_t count = detail::iterator_range_size(first, last); vector tmp(count, queue.get_context()); generator.generate(tmp.begin(), tmp.end(), queue); BOOST_COMPUTE_FUNCTION(bool, scale_random, (const uint_ x), { return (convert_RealType(x) / MAX_RANDOM) < PARAM; }); scale_random.define("PARAM", detail::make_literal(m_p)); scale_random.define("MAX_RANDOM", "UINT_MAX"); scale_random.define( "convert_RealType", std::string("convert_") + type_name() ); transform( tmp.begin(), tmp.end(), first, scale_random, queue ); } private: RealType m_p; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_RANDOM_BERNOULLI_DISTRIBUTION_HPP compute-0.5/include/boost/compute/random/default_random_engine.hpp000066400000000000000000000014411263566244600255770ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_RANDOM_DEFAULT_RANDOM_ENGINE_HPP #define BOOST_COMPUTE_RANDOM_DEFAULT_RANDOM_ENGINE_HPP #include namespace boost { namespace compute { typedef mt19937 default_random_engine; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_RANDOM_DEFAULT_RANDOM_ENGINE_HPP compute-0.5/include/boost/compute/random/discrete_distribution.hpp000066400000000000000000000066711263566244600257010ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_RANDOM_DISCRETE_DISTRIBUTION_HPP #define BOOST_COMPUTE_RANDOM_DISCRETE_DISTRIBUTION_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { /// \class discrete_distribution /// \brief Produces random integers on the interval [0, n), where /// probability of each integer is given by the weight of the ith /// integer divided by the sum of all weights. /// /// The following example shows how to setup a discrete distribution to /// produce 0 and 1 with equal probability /// /// \snippet test/test_discrete_distribution.cpp generate /// template class discrete_distribution { public: typedef IntType result_type; /// Creates a new discrete distribution with weights given by /// the range [\p first, \p last) template discrete_distribution(InputIterator first, InputIterator last) : m_n(std::distance(first, last)), m_probabilities(std::distance(first, last)) { double sum = 0; for(InputIterator iter = first; iter!=last; iter++) { sum += *iter; } for(size_t i=0; i probabilities() const { return m_probabilities; } /// Generates uniformily distributed integers and stores /// them to the range [\p first, \p last). template void generate(OutputIterator first, OutputIterator last, Generator &generator, command_queue &queue) { std::string source = "inline uint scale_random(uint x)\n"; source = source + "{\n" + "float rno = convert_float(x) / UINT_MAX;\n"; for(size_t i=0; i m_probabilities; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_RANDOM_UNIFORM_INT_DISTRIBUTION_HPP compute-0.5/include/boost/compute/random/linear_congruential_engine.hpp000066400000000000000000000162431263566244600266450ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_RANDOM_LINEAR_CONGRUENTIAL_ENGINE_HPP #define BOOST_COMPUTE_RANDOM_LINEAR_CONGRUENTIAL_ENGINE_HPP #include #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { /// /// \class linear_congruential_engine /// \brief 'Quick and Dirty' linear congruential engine /// /// Quick and dirty linear congruential engine to generate low quality /// random numbers very quickly. For uses in which good quality of random /// numbers is required(Monte-Carlo Simulations), use other engines like /// Mersenne Twister instead. /// template class linear_congruential_engine { public: typedef T result_type; static const T default_seed = 1; static const T a = 1099087573; static const size_t threads = 1024; /// Creates a new linear_congruential_engine and seeds it with \p value. explicit linear_congruential_engine(command_queue &queue, result_type value = default_seed) : m_context(queue.get_context()), m_multiplicands(m_context, threads * sizeof(result_type)) { // setup program load_program(); // seed state seed(value, queue); // generate multiplicands generate_multiplicands(queue); } /// Creates a new linear_congruential_engine object as a copy of \p other. linear_congruential_engine(const linear_congruential_engine &other) : m_context(other.m_context), m_seed(other.m_seed), m_multiplicands(other.m_multiplicands) { } /// Copies \p other to \c *this. linear_congruential_engine& operator=(const linear_congruential_engine &other) { if(this != &other){ m_context = other.m_context; } return *this; } /// Destroys the linear_congruential_engine object. ~linear_congruential_engine() { } /// Seeds the random number generator with \p value. /// /// \param value seed value for the random-number generator /// \param queue command queue to perform the operation /// /// If no seed value is provided, \c default_seed is used. void seed(result_type value, command_queue &queue) { (void) queue; m_seed = value; } /// \overload void seed(command_queue &queue) { seed(default_seed, queue); } /// Generates random numbers and stores them to the range [\p first, \p last). template void generate(OutputIterator first, OutputIterator last, command_queue &queue) { size_t size = detail::iterator_range_size(first, last); kernel fill_kernel(m_program, "fill"); fill_kernel.set_arg(1, m_multiplicands); fill_kernel.set_arg(2, first.get_buffer()); size_t offset = 0; for(;;){ size_t count = 0; if(size > threads){ count = (std::min)(static_cast(threads), size - offset); } else { count = size; } fill_kernel.set_arg(0, static_cast(m_seed)); fill_kernel.set_arg(3, static_cast(offset)); queue.enqueue_1d_range_kernel(fill_kernel, 0, count, 0); offset += count; if(offset >= size){ break; } update_seed(queue); } } /// \internal_ void generate(discard_iterator first, discard_iterator last, command_queue &queue) { (void) queue; size_t size = detail::iterator_range_size(first, last); uint_ max_mult = detail::read_single_value(m_multiplicands, threads-1, queue); while(size >= threads) { m_seed *= max_mult; size -= threads; } m_seed *= detail::read_single_value(m_multiplicands, size-1, queue); } /// Generates random numbers, transforms them with \p op, and then stores /// them to the range [\p first, \p last). template void generate(OutputIterator first, OutputIterator last, Function op, command_queue &queue) { vector tmp(std::distance(first, last), queue.get_context()); generate(tmp.begin(), tmp.end(), queue); transform(tmp.begin(), tmp.end(), first, op, queue); } /// Generates \p z random numbers and discards them. void discard(size_t z, command_queue &queue) { generate(discard_iterator(0), discard_iterator(z), queue); } private: /// \internal_ /// Generates the multiplicands for each thread void generate_multiplicands(command_queue &queue) { kernel multiplicand_kernel = m_program.create_kernel("multiplicand"); multiplicand_kernel.set_arg(0, m_multiplicands); queue.enqueue_task(multiplicand_kernel); } /// \internal_ void update_seed(command_queue &queue) { m_seed *= detail::read_single_value(m_multiplicands, threads-1, queue); } /// \internal_ void load_program() { boost::shared_ptr cache = program_cache::get_global_cache(m_context); std::string cache_key = std::string("__boost_linear_congruential_engine_") + type_name(); const char source[] = "__kernel void multiplicand(__global uint *multiplicands)\n" "{\n" " uint a = 1099087573;\n" " multiplicands[0] = a;\n" " for(uint i = 1; i < 1024; i++){\n" " multiplicands[i] = a * multiplicands[i-1];\n" " }\n" "}\n" "__kernel void fill(const uint seed,\n" " __global uint *multiplicands,\n" " __global uint *result," " const uint offset)\n" "{\n" " const uint i = get_global_id(0);\n" " result[offset+i] = seed * multiplicands[i];\n" "}\n"; m_program = cache->get_or_build(cache_key, std::string(), source, m_context); } private: context m_context; program m_program; T m_seed; buffer m_multiplicands; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_RANDOM_LINEAR_CONGRUENTIAL_ENGINE_HPP compute-0.5/include/boost/compute/random/mersenne_twister_engine.hpp000066400000000000000000000175771263566244600262310ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_RANDOM_MERSENNE_TWISTER_ENGINE_HPP #define BOOST_COMPUTE_RANDOM_MERSENNE_TWISTER_ENGINE_HPP #include #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { /// \class mersenne_twister_engine /// \brief Mersenne twister pseudorandom number generator. template class mersenne_twister_engine { public: typedef T result_type; static const T default_seed = 5489U; static const T n = 624; static const T m = 397; /// Creates a new mersenne_twister_engine and seeds it with \p value. explicit mersenne_twister_engine(command_queue &queue, result_type value = default_seed) : m_context(queue.get_context()), m_state_buffer(m_context, n * sizeof(result_type)) { // setup program load_program(); // seed state seed(value, queue); } /// Creates a new mersenne_twister_engine object as a copy of \p other. mersenne_twister_engine(const mersenne_twister_engine &other) : m_context(other.m_context), m_state_buffer(other.m_state_buffer) { } /// Copies \p other to \c *this. mersenne_twister_engine& operator=(const mersenne_twister_engine &other) { if(this != &other){ m_context = other.m_context; } return *this; } /// Destroys the mersenne_twister_engine object. ~mersenne_twister_engine() { } /// Seeds the random number generator with \p value. /// /// \param value seed value for the random-number generator /// \param queue command queue to perform the operation /// /// If no seed value is provided, \c default_seed is used. void seed(result_type value, command_queue &queue) { kernel seed_kernel = m_program.create_kernel("seed"); seed_kernel.set_arg(0, value); seed_kernel.set_arg(1, m_state_buffer); queue.enqueue_task(seed_kernel); m_state_index = 0; } /// \overload void seed(command_queue &queue) { seed(default_seed, queue); } /// Generates random numbers and stores them to the range [\p first, \p last). template void generate(OutputIterator first, OutputIterator last, command_queue &queue) { const size_t size = detail::iterator_range_size(first, last); kernel fill_kernel(m_program, "fill"); fill_kernel.set_arg(0, m_state_buffer); fill_kernel.set_arg(2, first.get_buffer()); size_t offset = 0; size_t &p = m_state_index; for(;;){ size_t count = 0; if(size > n){ count = (std::min)(static_cast(n), size - offset); } else { count = size; } fill_kernel.set_arg(1, static_cast(p)); fill_kernel.set_arg(3, static_cast(offset)); queue.enqueue_1d_range_kernel(fill_kernel, 0, count, 0); p += count; offset += count; if(offset >= size){ break; } generate_state(queue); p = 0; } } /// \internal_ void generate(discard_iterator first, discard_iterator last, command_queue &queue) { (void) queue; m_state_index += std::distance(first, last); } /// Generates random numbers, transforms them with \p op, and then stores /// them to the range [\p first, \p last). template void generate(OutputIterator first, OutputIterator last, Function op, command_queue &queue) { vector tmp(std::distance(first, last), queue.get_context()); generate(tmp.begin(), tmp.end(), queue); transform(tmp.begin(), tmp.end(), first, op, queue); } /// Generates \p z random numbers and discards them. void discard(size_t z, command_queue &queue) { generate(discard_iterator(0), discard_iterator(z), queue); } /// \internal_ (deprecated) template void fill(OutputIterator first, OutputIterator last, command_queue &queue) { generate(first, last, queue); } private: /// \internal_ void generate_state(command_queue &queue) { kernel generate_state_kernel = m_program.create_kernel("generate_state"); generate_state_kernel.set_arg(0, m_state_buffer); queue.enqueue_task(generate_state_kernel); } /// \internal_ void load_program() { boost::shared_ptr cache = program_cache::get_global_cache(m_context); std::string cache_key = std::string("__boost_mersenne_twister_engine_") + type_name(); const char source[] = "static uint twiddle(uint u, uint v)\n" "{\n" " return (((u & 0x80000000U) | (v & 0x7FFFFFFFU)) >> 1) ^\n" " ((v & 1U) ? 0x9908B0DFU : 0x0U);\n" "}\n" "__kernel void generate_state(__global uint *state)\n" "{\n" " const uint n = 624;\n" " const uint m = 397;\n" " for(uint i = 0; i < (n - m); i++)\n" " state[i] = state[i+m] ^ twiddle(state[i], state[i+1]);\n" " for(uint i = n - m; i < (n - 1); i++)\n" " state[i] = state[i+m-n] ^ twiddle(state[i], state[i+1]);\n" " state[n-1] = state[m-1] ^ twiddle(state[n-1], state[0]);\n" "}\n" "__kernel void seed(const uint s, __global uint *state)\n" "{\n" " const uint n = 624;\n" " state[0] = s & 0xFFFFFFFFU;\n" " for(uint i = 1; i < n; i++){\n" " state[i] = 1812433253U * (state[i-1] ^ (state[i-1] >> 30)) + i;\n" " state[i] &= 0xFFFFFFFFU;\n" " }\n" " generate_state(state);\n" "}\n" "static uint random_number(__global uint *state, const uint p)\n" "{\n" " uint x = state[p];\n" " x ^= (x >> 11);\n" " x ^= (x << 7) & 0x9D2C5680U;\n" " x ^= (x << 15) & 0xEFC60000U;\n" " return x ^ (x >> 18);\n" "}\n" "__kernel void fill(__global uint *state,\n" " const uint state_index,\n" " __global uint *vector,\n" " const uint offset)\n" "{\n" " const uint i = get_global_id(0);\n" " vector[offset+i] = random_number(state, state_index + i);\n" "}\n"; m_program = cache->get_or_build(cache_key, std::string(), source, m_context); } private: context m_context; size_t m_state_index; program m_program; buffer m_state_buffer; }; typedef mersenne_twister_engine mt19937; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_RANDOM_MERSENNE_TWISTER_ENGINE_HPP compute-0.5/include/boost/compute/random/normal_distribution.hpp000066400000000000000000000076511263566244600253660ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_RANDOM_NORMAL_DISTRIBUTION_HPP #define BOOST_COMPUTE_RANDOM_NORMAL_DISTRIBUTION_HPP #include #include #include #include #include namespace boost { namespace compute { /// \class normal_distribution /// \brief Produces random, normally-distributed floating-point numbers. /// /// The following example shows how to setup a normal distribution to /// produce random \c float values centered at \c 5: /// /// \snippet test/test_normal_distribution.cpp generate /// /// \see default_random_engine, uniform_real_distribution template class normal_distribution { public: typedef RealType result_type; /// Creates a new normal distribution producing numbers with the given /// \p mean and \p stddev. normal_distribution(RealType mean = 0.f, RealType stddev = 1.f) : m_mean(mean), m_stddev(stddev) { } /// Destroys the normal distribution object. ~normal_distribution() { } /// Returns the mean value of the distribution. result_type mean() const { return m_mean; } /// Returns the standard-deviation of the distribution. result_type stddev() const { return m_stddev; } /// Returns the minimum value of the distribution. result_type min BOOST_PREVENT_MACRO_SUBSTITUTION () const { return -std::numeric_limits::infinity(); } /// Returns the maximum value of the distribution. result_type max BOOST_PREVENT_MACRO_SUBSTITUTION () const { return std::numeric_limits::infinity(); } /// Generates normally-distributed floating-point numbers and stores /// them to the range [\p first, \p last). template void generate(OutputIterator first, OutputIterator last, Generator &generator, command_queue &queue) { typedef typename make_vector_type::type RealType2; size_t count = detail::iterator_range_size(first, last); vector tmp(count, queue.get_context()); generator.generate(tmp.begin(), tmp.end(), queue); BOOST_COMPUTE_FUNCTION(RealType2, box_muller, (const uint2_ x), { const RealType x1 = x.x / (RealType) (UINT_MAX - 1); const RealType x2 = x.y / (RealType) (UINT_MAX - 1); const RealType z1 = sqrt(-2.f * log2(x1)) * cos(2.f * M_PI_F * x2); const RealType z2 = sqrt(-2.f * log2(x1)) * sin(2.f * M_PI_F * x2); return (RealType2)(MEAN, MEAN) + (RealType2)(z1, z2) * (RealType2)(STDDEV, STDDEV); }); box_muller.define("MEAN", boost::lexical_cast(m_mean)); box_muller.define("STDDEV", boost::lexical_cast(m_stddev)); box_muller.define("RealType", type_name()); box_muller.define("RealType2", type_name()); transform( make_buffer_iterator(tmp.get_buffer(), 0), make_buffer_iterator(tmp.get_buffer(), count / 2), make_buffer_iterator(first.get_buffer(), 0), box_muller, queue ); } private: RealType m_mean; RealType m_stddev; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_RANDOM_NORMAL_DISTRIBUTION_HPP compute-0.5/include/boost/compute/random/threefry_engine.hpp000066400000000000000000000325371263566244600244550ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Muhammad Junaid Muzammil // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_RANDOM_THREEFRY_HPP #define BOOST_COMPUTE_RANDOM_THREEFRY_HPP #include #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { /// \class threefry_engine /// \brief Threefry pseudorandom number generator. template class threefry_engine { public: static const size_t threads = 1024; typedef T result_type; /// Creates a new threefry_engine and seeds it with \p value. explicit threefry_engine(command_queue &queue) : m_context(queue.get_context()) { // setup program load_program(); } /// Creates a new threefry_engine object as a copy of \p other. threefry_engine(const threefry_engine &other) : m_context(other.m_context), m_program(other.m_program) { } /// Copies \p other to \c *this. threefry_engine& operator=(const threefry_engine &other) { if(this != &other){ m_context = other.m_context; m_program = other.m_program; } return *this; } /// Destroys the threefry_engine object. ~threefry_engine() { } private: /// \internal_ void load_program() { boost::shared_ptr cache = program_cache::get_global_cache(m_context); std::string cache_key = std::string("threefry_engine_32x2"); // Copyright 2010-2012, D. E. Shaw Research. // All rights reserved. // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions, and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions, and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of D. E. Shaw Research nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. const char source[] = "#define THREEFRY2x32_DEFAULT_ROUNDS 20\n" "#define SKEIN_KS_PARITY_32 0x1BD11BDA\n" "enum r123_enum_threefry32x2 {\n" " R_32x2_0_0=13,\n" " R_32x2_1_0=15,\n" " R_32x2_2_0=26,\n" " R_32x2_3_0= 6,\n" " R_32x2_4_0=17,\n" " R_32x2_5_0=29,\n" " R_32x2_6_0=16,\n" " R_32x2_7_0=24\n" "};\n" "static uint RotL_32(uint x, uint N)\n" "{\n" " return (x << (N & 31)) | (x >> ((32-N) & 31));\n" "}\n" "struct r123array2x32 {\n" " uint v[2];\n" "};\n" "typedef struct r123array2x32 threefry2x32_ctr_t;\n" "typedef struct r123array2x32 threefry2x32_key_t;\n" "threefry2x32_ctr_t threefry2x32_R(unsigned int Nrounds, threefry2x32_ctr_t in, threefry2x32_key_t k)\n" "{\n" " threefry2x32_ctr_t X;\n" " uint ks[3];\n" " uint i; \n" " ks[2] = SKEIN_KS_PARITY_32;\n" " for (i=0;i < 2; i++) {\n" " ks[i] = k.v[i];\n" " X.v[i] = in.v[i];\n" " ks[2] ^= k.v[i];\n" " }\n" " X.v[0] += ks[0]; X.v[1] += ks[1];\n" " if(Nrounds>0){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>1){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>2){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>3){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>3){\n" " X.v[0] += ks[1]; X.v[1] += ks[2];\n" " X.v[1] += 1;\n" " }\n" " if(Nrounds>4){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>5){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>6){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>7){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>7){\n" " X.v[0] += ks[2]; X.v[1] += ks[0];\n" " X.v[1] += 2;\n" " }\n" " if(Nrounds>8){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>9){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>10){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>11){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>11){\n" " X.v[0] += ks[0]; X.v[1] += ks[1];\n" " X.v[1] += 3;\n" " }\n" " if(Nrounds>12){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>13){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>14){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>15){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>15){\n" " X.v[0] += ks[1]; X.v[1] += ks[2];\n" " X.v[1] += 4;\n" " }\n" " if(Nrounds>16){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>17){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>18){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>19){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>19){\n" " X.v[0] += ks[2]; X.v[1] += ks[0];\n" " X.v[1] += 5;\n" " }\n" " if(Nrounds>20){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>21){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>22){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>23){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>23){\n" " X.v[0] += ks[0]; X.v[1] += ks[1];\n" " X.v[1] += 6;\n" " }\n" " if(Nrounds>24){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>25){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>26){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>27){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>27){\n" " X.v[0] += ks[1]; X.v[1] += ks[2];\n" " X.v[1] += 7;\n" " }\n" " if(Nrounds>28){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>29){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>30){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>31){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" " if(Nrounds>31){\n" " X.v[0] += ks[2]; X.v[1] += ks[0];\n" " X.v[1] += 8;\n" " }\n" " return X;\n" "}\n" "__kernel void generate_rng(__global uint *ctr, __global uint *key, const uint offset) {\n" " threefry2x32_ctr_t in;\n" " threefry2x32_key_t k;\n" " const uint i = get_global_id(0);\n" " in.v[0] = ctr[2 * (offset + i)];\n" " in.v[1] = ctr[2 * (offset + i) + 1];\n" " k.v[0] = key[2 * (offset + i)];\n" " k.v[1] = key[2 * (offset + i) + 1];\n" " in = threefry2x32_R(20, in, k);\n" " ctr[2 * (offset + i)] = in.v[0];\n" " ctr[2 * (offset + i) + 1] = in.v[1];\n" "}\n"; m_program = cache->get_or_build(cache_key, std::string(), source, m_context); } public: /// Generates Threefry random numbers using both the counter and key values, and then stores /// them to the range [\p first_ctr, \p last_ctr). template void generate(OutputIterator first_ctr, OutputIterator last_ctr, OutputIterator first_key, OutputIterator last_key, command_queue &queue) { const size_t size_ctr = detail::iterator_range_size(first_ctr, last_ctr); const size_t size_key = detail::iterator_range_size(first_key, last_key); if(!size_ctr || !size_key || (size_ctr != size_key)) { return; } kernel rng_kernel = m_program.create_kernel("generate_rng"); rng_kernel.set_arg(0, first_ctr.get_buffer()); rng_kernel.set_arg(1, first_key.get_buffer()); size_t offset = 0; for(;;){ size_t count = 0; size_t size = size_ctr/2; if(size > threads){ count = (std::min)(static_cast(threads), size - offset); } else { count = size; } rng_kernel.set_arg(2, static_cast(offset)); queue.enqueue_1d_range_kernel(rng_kernel, 0, count, 0); offset += count; if(offset >= size){ break; } } } template void generate(OutputIterator first_ctr, OutputIterator last_ctr, command_queue &queue) { const size_t size_ctr = detail::iterator_range_size(first_ctr, last_ctr); if(!size_ctr) { return; } boost::compute::vector vector_key(size_ctr, m_context); vector_key.assign(size_ctr, 0, queue); kernel rng_kernel = m_program.create_kernel("generate_rng"); rng_kernel.set_arg(0, first_ctr.get_buffer()); rng_kernel.set_arg(1, vector_key); size_t offset = 0; for(;;){ size_t count = 0; size_t size = size_ctr/2; if(size > threads){ count = (std::min)(static_cast(threads), size - offset); } else { count = size; } rng_kernel.set_arg(2, static_cast(offset)); queue.enqueue_1d_range_kernel(rng_kernel, 0, count, 0); offset += count; if(offset >= size){ break; } } } private: context m_context; program m_program; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_RANDOM_THREEFRY_HPP compute-0.5/include/boost/compute/random/uniform_int_distribution.hpp000066400000000000000000000065131263566244600264230ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_RANDOM_UNIFORM_INT_DISTRIBUTION_HPP #define BOOST_COMPUTE_RANDOM_UNIFORM_INT_DISTRIBUTION_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { /// \class uniform_int_distribution /// \brief Produces uniformily distributed random integers /// /// The following example shows how to setup a uniform int distribution to /// produce random integers 0 and 1. /// /// \snippet test/test_uniform_int_distribution.cpp generate /// template class uniform_int_distribution { public: typedef IntType result_type; /// Creates a new uniform distribution producing numbers in the range /// [\p a, \p b]. explicit uniform_int_distribution(IntType a = 0, IntType b = (std::numeric_limits::max)()) : m_a(a), m_b(b) { } /// Destroys the uniform_int_distribution object. ~uniform_int_distribution() { } /// Returns the minimum value of the distribution. result_type a() const { return m_a; } /// Returns the maximum value of the distribution. result_type b() const { return m_b; } /// Generates uniformily distributed integers and stores /// them to the range [\p first, \p last). template void generate(OutputIterator first, OutputIterator last, Generator &generator, command_queue &queue) { size_t size = std::distance(first, last); typedef typename Generator::result_type g_result_type; vector tmp(size, queue.get_context()); vector tmp2(size, queue.get_context()); uint_ bound = ((uint_(-1))/(m_b-m_a+1))*(m_b-m_a+1); buffer_iterator tmp2_iter; while(size>0) { generator.generate(tmp.begin(), tmp.begin() + size, queue); tmp2_iter = copy_if(tmp.begin(), tmp.begin() + size, tmp2.begin(), _1 <= bound, queue); size = std::distance(tmp2_iter, tmp2.end()); } BOOST_COMPUTE_FUNCTION(IntType, scale_random, (const g_result_type x), { return LO + (x % (HI-LO+1)); }); scale_random.define("LO", boost::lexical_cast(m_a)); scale_random.define("HI", boost::lexical_cast(m_b)); transform(tmp2.begin(), tmp2.end(), first, scale_random, queue); } private: IntType m_a; IntType m_b; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_RANDOM_UNIFORM_INT_DISTRIBUTION_HPP compute-0.5/include/boost/compute/random/uniform_real_distribution.hpp000066400000000000000000000060661263566244600265570ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_RANDOM_UNIFORM_REAL_DISTRIBUTION_HPP #define BOOST_COMPUTE_RANDOM_UNIFORM_REAL_DISTRIBUTION_HPP #include #include #include #include namespace boost { namespace compute { /// \class uniform_real_distribution /// \brief Produces uniformily distributed random floating-point numbers. /// /// The following example shows how to setup a uniform real distribution to /// produce random \c float values between \c 1 and \c 100. /// /// \snippet test/test_uniform_real_distribution.cpp generate /// /// \see default_random_engine, normal_distribution template class uniform_real_distribution { public: typedef RealType result_type; /// Creates a new uniform distribution producing numbers in the range /// [\p a, \p b). uniform_real_distribution(RealType a = 0.f, RealType b = 1.f) : m_a(a), m_b(b) { } /// Destroys the uniform_real_distribution object. ~uniform_real_distribution() { } /// Returns the minimum value of the distribution. result_type a() const { return m_a; } /// Returns the maximum value of the distribution. result_type b() const { return m_b; } /// Generates uniformily distributed floating-point numbers and stores /// them to the range [\p first, \p last). template void generate(OutputIterator first, OutputIterator last, Generator &generator, command_queue &queue) { BOOST_COMPUTE_FUNCTION(RealType, scale_random, (const uint_ x), { return LO + (convert_RealType(x) / MAX_RANDOM) * (HI - LO); }); scale_random.define("LO", detail::make_literal(m_a)); scale_random.define("HI", detail::make_literal(m_b)); scale_random.define("MAX_RANDOM", "UINT_MAX"); scale_random.define( "convert_RealType", std::string("convert_") + type_name() ); generator.generate( first, last, scale_random, queue ); } /// \internal_ (deprecated) template void fill(OutputIterator first, OutputIterator last, Generator &g, command_queue &queue) { generate(first, last, g, queue); } private: RealType m_a; RealType m_b; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_RANDOM_UNIFORM_REAL_DISTRIBUTION_HPP compute-0.5/include/boost/compute/source.hpp000066400000000000000000000010471263566244600213100ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// // deprecated, use instead #include compute-0.5/include/boost/compute/svm.hpp000066400000000000000000000033771263566244600206250ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_SVM_HPP #define BOOST_COMPUTE_SVM_HPP #include #include #include // svm functions require opencl 2.0 #if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) namespace boost { namespace compute { /// Allocates a shared virtual memory (SVM) buffer. // /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clSVMAlloc} /// /// \see svm_free() template inline svm_ptr svm_alloc(const context &context, size_t size, cl_svm_mem_flags flags = CL_MEM_READ_WRITE, unsigned int alignment = 0) { svm_ptr ptr(clSVMAlloc(context.get(), flags, size * sizeof(T), alignment)); if(!ptr.get()){ BOOST_THROW_EXCEPTION(opencl_error(CL_MEM_OBJECT_ALLOCATION_FAILURE)); } return ptr; } /// Deallocates a shared virtual memory (SVM) buffer. /// /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clSVMFree} /// /// \see svm_alloc(), command_queue::enqueue_svm_free() template inline void svm_free(const context &context, svm_ptr ptr) { clSVMFree(context.get(), ptr.get()); } } // end compute namespace } // end boost namespace #endif // CL_VERSION_2_0 #endif // BOOST_COMPUTE_PIPE_HPP compute-0.5/include/boost/compute/system.hpp000066400000000000000000000213041263566244600213320ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_SYSTEM_HPP #define BOOST_COMPUTE_SYSTEM_HPP #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { /// \class system /// \brief Provides access to platforms and devices on the system. /// /// The system class contains a set of static functions which provide access to /// the OpenCL platforms and compute devices on the host system. /// /// The default_device() convenience method automatically selects and returns /// the "best" compute device for the system following a set of heuristics and /// environment variables. This simplifies setup of the OpenCL enviornment. /// /// \see platform, device, context class system { public: /// Returns the default compute device for the system. /// /// The default device is selected based on a set of heuristics and can be /// influenced using one of the following environment variables: /// /// \li \c BOOST_COMPUTE_DEFAULT_DEVICE - /// name of the compute device (e.g. "GTX TITAN") /// \li \c BOOST_COMPUTE_DEFAULT_DEVICE_TYPE /// type of the compute device (e.g. "GPU" or "CPU") /// \li \c BOOST_COMPUTE_DEFAULT_PLATFORM - /// name of the platform (e.g. "NVIDIA CUDA") /// \li \c BOOST_COMPUTE_DEFAULT_VENDOR - /// name of the device vendor (e.g. "NVIDIA") /// /// The default device is determined once on the first time this function /// is called. Calling this function multiple times will always result in /// the same device being returned. /// /// If no OpenCL device is found on the system, a no_device_found exception /// is thrown. /// /// For example, to print the name of the default compute device on the /// system: /// \code /// // get the default compute device /// boost::compute::device device = boost::compute::system::default_device(); /// /// // print the name of the device /// std::cout << "default device: " << device.name() << std::endl; /// \endcode static device default_device() { static device default_device = find_default_device(); return default_device; } /// Returns the device with \p name. /// /// \throws no_device_found if no device with \p name is found. static device find_device(const std::string &name) { const std::vector devices = system::devices(); for(size_t i = 0; i < devices.size(); i++){ const device& device = devices[i]; if(device.name() == name){ return device; } } BOOST_THROW_EXCEPTION(no_device_found()); } /// Returns a vector containing all of the compute devices on /// the system. /// /// For example, to print out the name of each OpenCL-capable device /// available on the system: /// \code /// for(const auto &device : boost::compute::system::devices()){ /// std::cout << device.name() << std::endl; /// } /// \endcode static std::vector devices() { std::vector devices; const std::vector platforms = system::platforms(); for(size_t i = 0; i < platforms.size(); i++){ const std::vector platform_devices = platforms[i].devices(); devices.insert( devices.end(), platform_devices.begin(), platform_devices.end() ); } return devices; } /// Returns the number of compute devices on the system. static size_t device_count() { size_t count = 0; const std::vector platforms = system::platforms(); for(size_t i = 0; i < platforms.size(); i++){ count += platforms[i].device_count(); } return count; } /// Returns the default context for the system. /// /// The default context is created for the default device on the system /// (as returned by default_device()). /// /// The default context is created once on the first time this function is /// called. Calling this function multiple times will always result in the /// same context object being returned. static context default_context() { static context default_context(default_device()); return default_context; } /// Returns the default command queue for the system. static command_queue& default_queue() { static command_queue queue(default_context(), default_device()); return queue; } /// Blocks until all outstanding computations on the default /// command queue are complete. /// /// This is equivalent to: /// \code /// system::default_queue().finish(); /// \endcode static void finish() { default_queue().finish(); } /// Returns a vector containing each of the OpenCL platforms on the system. /// /// For example, to print out the name of each OpenCL platform present on /// the system: /// \code /// for(const auto &platform : boost::compute::system::platforms()){ /// std::cout << platform.name() << std::endl; /// } /// \endcode static std::vector platforms() { cl_uint count = 0; clGetPlatformIDs(0, 0, &count); std::vector platform_ids(count); clGetPlatformIDs(count, &platform_ids[0], 0); std::vector platforms; for(size_t i = 0; i < platform_ids.size(); i++){ platforms.push_back(platform(platform_ids[i])); } return platforms; } /// Returns the number of compute platforms on the system. static size_t platform_count() { cl_uint count = 0; clGetPlatformIDs(0, 0, &count); return static_cast(count); } private: /// \internal_ static device find_default_device() { // get a list of all devices on the system const std::vector devices_ = devices(); if(devices_.empty()){ BOOST_THROW_EXCEPTION(no_device_found()); } // check for device from environment variable const char *name = detail::getenv("BOOST_COMPUTE_DEFAULT_DEVICE"); const char *type = detail::getenv("BOOST_COMPUTE_DEFAULT_DEVICE_TYPE"); const char *platform = detail::getenv("BOOST_COMPUTE_DEFAULT_PLATFORM"); const char *vendor = detail::getenv("BOOST_COMPUTE_DEFAULT_VENDOR"); if(name || type || platform || vendor){ for(size_t i = 0; i < devices_.size(); i++){ const device& device = devices_[i]; if (name && !matches(device.name(), name)) continue; if (type && matches(std::string("GPU"), type)) if (!(device.type() & device::gpu)) continue; if (type && matches(std::string("CPU"), type)) if (!(device.type() & device::cpu)) continue; if (platform && !matches(device.platform().name(), platform)) continue; if (vendor && !matches(device.vendor(), vendor)) continue; return device; } } // find the first gpu device for(size_t i = 0; i < devices_.size(); i++){ const device& device = devices_[i]; if(device.type() & device::gpu){ return device; } } // find the first cpu device for(size_t i = 0; i < devices_.size(); i++){ const device& device = devices_[i]; if(device.type() & device::cpu){ return device; } } // return the first device found return devices_[0]; } /// \internal_ static bool matches(const std::string &str, const std::string &pattern) { return str.find(pattern) != std::string::npos; } }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_SYSTEM_HPP compute-0.5/include/boost/compute/type_traits.hpp000066400000000000000000000021221263566244600223520ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPE_TRAITS_HPP #define BOOST_COMPUTE_TYPE_TRAITS_HPP #include #include #include #include #include #include #include #include #include #include #endif // BOOST_COMPUTE_TYPE_TRAITS_HPP compute-0.5/include/boost/compute/type_traits/000077500000000000000000000000001263566244600216445ustar00rootroot00000000000000compute-0.5/include/boost/compute/type_traits/common_type.hpp000066400000000000000000000042261263566244600247120ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPE_TRAITS_COMMON_TYPE_HPP #define BOOST_COMPUTE_TYPE_TRAITS_COMMON_TYPE_HPP #include #include namespace boost { /// \internal_ #define BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPE(scalar, size) \ template<> \ struct common_type \ { \ typedef BOOST_COMPUTE_MAKE_VECTOR_TYPE(scalar, size) type; \ }; \ template<> \ struct common_type \ { \ typedef BOOST_COMPUTE_MAKE_VECTOR_TYPE(scalar, size) type; \ }; /// \internal_ #define BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(scalar) \ BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPE(scalar, 2) \ BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPE(scalar, 4) \ BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPE(scalar, 8) \ BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPE(scalar, 16) \ BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(char) BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(uchar) BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(short) BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(ushort) BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(int) BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(uint) BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(long) BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(ulong) BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(float) BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(double) } // end boost namespace #endif // BOOST_COMPUTE_TYPE_TRAITS_COMMON_TYPE_HPP compute-0.5/include/boost/compute/type_traits/detail/000077500000000000000000000000001263566244600231065ustar00rootroot00000000000000compute-0.5/include/boost/compute/type_traits/detail/capture_traits.hpp000066400000000000000000000016771263566244600266630ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPE_TRAITS_DETAIL_CAPTURE_TRAITS_HPP #define BOOST_COMPUTE_TYPE_TRAITS_DETAIL_CAPTURE_TRAITS_HPP #include namespace boost { namespace compute { namespace detail { template struct capture_traits { static std::string type_name() { return ::boost::compute::type_name(); } }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_TYPE_TRAITS_DETAIL_CAPTURE_TRAITS_HPP compute-0.5/include/boost/compute/type_traits/is_device_iterator.hpp000066400000000000000000000025071263566244600262240ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPE_TRAITS_IS_DEVICE_ITERATOR_HPP #define BOOST_COMPUTE_TYPE_TRAITS_IS_DEVICE_ITERATOR_HPP #include namespace boost { namespace compute { /// Meta-function returning \c true if \c Iterator is a device-iterator. /// /// By default, this function returns false. Device iterator types (such as /// buffer_iterator) should specialize this trait and return \c true. /// /// For example: /// \code /// is_device_iterator>::value == true /// is_device_iterator::iterator>::value == false /// \endcode template struct is_device_iterator : boost::false_type {}; /// \internal_ template struct is_device_iterator : is_device_iterator {}; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_TYPE_TRAITS_IS_DEVICE_ITERATOR_HPP compute-0.5/include/boost/compute/type_traits/is_fundamental.hpp000066400000000000000000000047761263566244600253640ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPE_TRAITS_IS_FUNDAMENTAL_HPP #define BOOST_COMPUTE_TYPE_TRAITS_IS_FUNDAMENTAL_HPP #include namespace boost { namespace compute { /// Meta-function returning \c true if \p T is a fundamental (i.e. /// built-in) type. /// /// For example, /// \code /// is_fundamental::value == true /// is_fundamental>::value == false /// \endcode template struct is_fundamental : public boost::false_type {}; /// \internal_ #define BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(type) \ template<> struct is_fundamental : boost::true_type {}; \ template<> struct is_fundamental : boost::true_type {}; \ template<> struct is_fundamental : boost::true_type {}; \ template<> struct is_fundamental : boost::true_type {}; \ template<> struct is_fundamental : boost::true_type {}; \ template<> struct is_fundamental : boost::true_type {}; \ template<> struct is_fundamental : boost::true_type {}; \ template<> struct is_fundamental : boost::true_type {}; \ template<> struct is_fundamental : boost::true_type {}; BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(char) BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(uchar) BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(short) BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(ushort) BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(int) BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(uint) BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(long) BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(ulong) BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(float) BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(double) #undef BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_TYPE_TRAITS_IS_FUNDAMENTAL_HPP compute-0.5/include/boost/compute/type_traits/is_vector_type.hpp000066400000000000000000000021211263566244600254070ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPE_TRAITS_IS_VECTOR_TYPE_HPP #define BOOST_COMPUTE_TYPE_TRAITS_IS_VECTOR_TYPE_HPP #include #include namespace boost { namespace compute { /// Meta-function returning \c true if \p T is a vector type. /// /// For example, /// \code /// is_vector_type::value == false /// is_vector_type::value == true /// \endcode /// /// \see make_vector_type, vector_size template struct is_vector_type : boost::mpl::bool_::value != 1> { }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_TYPE_TRAITS_IS_VECTOR_TYPE_HPP compute-0.5/include/boost/compute/type_traits/make_vector_type.hpp000066400000000000000000000044501263566244600257200ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPE_TRAITS_MAKE_VECTOR_TYPE_HPP #define BOOST_COMPUTE_TYPE_TRAITS_MAKE_VECTOR_TYPE_HPP #include #include namespace boost { namespace compute { /// Meta-function which returns a vector type for \p Scalar with \p Size. /// /// For example, /// \code /// make_vector_type::type == int2_ /// make_vector_type::type == float4_ /// \endcode /// /// \see is_vector_type template struct make_vector_type { }; /// \internal_ template struct make_vector_type { typedef Scalar type; }; /// \internal_ #define BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTION(scalar, size) \ template<> \ struct make_vector_type \ { \ typedef BOOST_PP_CAT(BOOST_PP_CAT(scalar, size), _) type; \ }; /// \internal_ #define BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(scalar) \ BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTION(scalar, 2) \ BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTION(scalar, 4) \ BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTION(scalar, 8) \ BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTION(scalar, 16) BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(char) BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(uchar) BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(short) BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(ushort) BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(int) BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(uint) BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(long) BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(ulong) BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(float) BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(double) } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_TYPE_TRAITS_MAKE_VECTOR_TYPE_HPP compute-0.5/include/boost/compute/type_traits/result_of.hpp000066400000000000000000000024401263566244600243570ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPE_TRAITS_RESULT_OF_HPP #define BOOST_COMPUTE_TYPE_TRAITS_RESULT_OF_HPP #include namespace boost { namespace compute { /// Returns the result of \c Function when called with \c Args. /// /// For example, /// \code /// // int + int = int /// result_of::type == int /// \endcode template struct result_of { // the default implementation uses the TR1-style result_of protocol. note // that we explicitly do *not* use the C++11 decltype operator as we want // the result type as it would be on an OpenCL device, not the actual C++ // type resulting from "invoking" the function on the host. typedef typename ::boost::tr1_result_of::type type; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_TYPE_TRAITS_RESULT_OF_HPP compute-0.5/include/boost/compute/type_traits/scalar_type.hpp000066400000000000000000000044761263566244600246760ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPE_TRAITS_SCALAR_TYPE_HPP #define BOOST_COMPUTE_TYPE_TRAITS_SCALAR_TYPE_HPP #include #include namespace boost { namespace compute { /// Meta-function returning the scalar type for a vector type. /// /// For example, /// \code /// scalar_type::type == float /// \endcode template struct scalar_type { /// \internal_ typedef void type; }; /// \internal_ #define BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTION(scalar) \ template<> \ struct scalar_type \ { \ typedef BOOST_PP_CAT(scalar, _) type; \ }; /// \internal_ #define BOOST_COMPUTE_DECLARE_VECTOR_SCALAR_TYPE_FUNCTION(scalar, size) \ template<> \ struct scalar_type \ { \ typedef BOOST_PP_CAT(scalar, _) type; \ }; /// \internal_ #define BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(scalar) \ BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTION(scalar) \ BOOST_COMPUTE_DECLARE_VECTOR_SCALAR_TYPE_FUNCTION(scalar, 2) \ BOOST_COMPUTE_DECLARE_VECTOR_SCALAR_TYPE_FUNCTION(scalar, 4) \ BOOST_COMPUTE_DECLARE_VECTOR_SCALAR_TYPE_FUNCTION(scalar, 8) \ BOOST_COMPUTE_DECLARE_VECTOR_SCALAR_TYPE_FUNCTION(scalar, 16) BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(char) BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(uchar) BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(short) BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(ushort) BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(int) BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(uint) BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(long) BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(ulong) BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(float) BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(double) } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_TYPE_TRAITS_SCALAR_TYPE_HPP compute-0.5/include/boost/compute/type_traits/type_definition.hpp000066400000000000000000000020601263566244600255440ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPE_TRAITS_TYPE_DEFINITION_HPP #define BOOST_COMPUTE_TYPE_TRAITS_TYPE_DEFINITION_HPP #include namespace boost { namespace compute { namespace detail { template struct type_definition_trait; } // end detail namespace /// Returns the OpenCL type definition for \c T. /// /// \return a string containing the type definition for \c T /// /// \see type_name() template inline std::string type_definition() { return detail::type_definition_trait::value(); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_TYPE_TRAITS_TYPE_DEFINITION_HPP compute-0.5/include/boost/compute/type_traits/type_name.hpp000066400000000000000000000071751263566244600243500ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPE_TRAITS_TYPE_NAME_HPP #define BOOST_COMPUTE_TYPE_TRAITS_TYPE_NAME_HPP #include #include #include namespace boost { namespace compute { namespace detail { template struct type_name_trait; /// \internal_ #define BOOST_COMPUTE_DEFINE_SCALAR_TYPE_NAME_FUNCTION(type) \ template<> \ struct type_name_trait \ { \ static const char* value() \ { \ return BOOST_PP_STRINGIZE(type); \ } \ }; /// \internal_ #define BOOST_COMPUTE_DEFINE_VECTOR_TYPE_NAME_FUNCTION(scalar, n) \ template<> \ struct type_name_trait \ { \ static const char* value() \ { \ return BOOST_PP_STRINGIZE(BOOST_PP_CAT(scalar, n)); \ } \ }; /// \internal_ #define BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(scalar) \ BOOST_COMPUTE_DEFINE_SCALAR_TYPE_NAME_FUNCTION(scalar) \ BOOST_COMPUTE_DEFINE_VECTOR_TYPE_NAME_FUNCTION(scalar, 2) \ BOOST_COMPUTE_DEFINE_VECTOR_TYPE_NAME_FUNCTION(scalar, 4) \ BOOST_COMPUTE_DEFINE_VECTOR_TYPE_NAME_FUNCTION(scalar, 8) \ BOOST_COMPUTE_DEFINE_VECTOR_TYPE_NAME_FUNCTION(scalar, 16) BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(char) BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(uchar) BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(short) BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(ushort) BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(int) BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(uint) BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(long) BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(ulong) BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(float) BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(double) /// \internal_ #define BOOST_COMPUTE_DEFINE_BUILTIN_TYPE_NAME_FUNCTION(type) \ template<> \ struct type_name_trait \ { \ static const char* value() \ { \ return #type; \ } \ }; BOOST_COMPUTE_DEFINE_BUILTIN_TYPE_NAME_FUNCTION(bool) BOOST_COMPUTE_DEFINE_BUILTIN_TYPE_NAME_FUNCTION(char) BOOST_COMPUTE_DEFINE_BUILTIN_TYPE_NAME_FUNCTION(void) } // end detail namespace /// Returns the OpenCL type name for the type \c T as a string. /// /// \return a string containing the type name for \c T /// /// For example: /// \code /// type_name() == "float" /// type_name() == "float4" /// \endcode /// /// \see type_definition() template inline const char* type_name() { return detail::type_name_trait::value(); } } // end compute namespace } // end boost namespace /// Registers the OpenCL type for the C++ \p type to \p name. /// /// For example, the following will allow Eigen's \c Vector2f type /// to be used with Boost.Compute algorithms and containers as the /// built-in \c float2 type. /// \code /// BOOST_COMPUTE_TYPE_NAME(Eigen::Vector2f, float2) /// \endcode /// /// This macro should be invoked in the global namespace. /// /// \see type_name() #define BOOST_COMPUTE_TYPE_NAME(type, name) \ namespace boost { namespace compute { \ template<> \ inline const char* type_name() \ { \ return #name; \ }}} #endif // BOOST_COMPUTE_TYPE_TRAITS_TYPE_NAME_HPP compute-0.5/include/boost/compute/type_traits/vector_size.hpp000066400000000000000000000042211263566244600247100ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPE_TRAITS_VECTOR_SIZE_HPP #define BOOST_COMPUTE_TYPE_TRAITS_VECTOR_SIZE_HPP #include #include namespace boost { namespace compute { /// Meta-function returning the size (number of components) of a vector type /// \p T. For scalar types this function returns \c 1. /// /// For example, /// \code /// vector_size::value == 1 /// vector_size::value == 4 /// \endcode template struct vector_size { /// \internal_ BOOST_STATIC_CONSTANT(size_t, value = 1); }; /// \internal_ #define BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTION(scalar, size) \ template<> \ struct vector_size \ { \ BOOST_STATIC_CONSTANT(size_t, value = size); \ }; /// \internal_ #define BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(scalar) \ BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTION(scalar, 2) \ BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTION(scalar, 4) \ BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTION(scalar, 8) \ BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTION(scalar, 16) BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(char) BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(uchar) BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(short) BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(ushort) BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(int) BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(uint) BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(long) BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(ulong) BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(float) BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(double) } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_TYPE_TRAITS_VECTOR_SIZE_HPP compute-0.5/include/boost/compute/types.hpp000066400000000000000000000014741263566244600211600ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPES_HPP #define BOOST_COMPUTE_TYPES_HPP /// \file /// /// Meta-header to include all Boost.Compute types headers. #include #include #include #include #include #endif // BOOST_COMPUTE_TYPES_HPP compute-0.5/include/boost/compute/types/000077500000000000000000000000001263566244600204415ustar00rootroot00000000000000compute-0.5/include/boost/compute/types/builtin.hpp000066400000000000000000000010551263566244600226210ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// // deprecated, use instead #include compute-0.5/include/boost/compute/types/complex.hpp000066400000000000000000000114521263566244600226240ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPES_COMPLEX_HPP #define BOOST_COMPUTE_TYPES_COMPLEX_HPP #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template meta_kernel& operator<<(meta_kernel &kernel, const std::complex &x) { typedef typename std::complex value_type; kernel << "(" << type_name() << ")" << "(" << x.real() << ", " << x.imag() << ")"; return kernel; } // get() result type specialization for std::complex<> template struct get_result_type > { typedef T type; }; // get() specialization for std::complex<> template inline meta_kernel& operator<<(meta_kernel &kernel, const invoked_get > &expr) { BOOST_STATIC_ASSERT(N < 2); return kernel << expr.m_arg << (N == 0 ? ".x" : ".y"); } } // end detail namespace // returns the real component of a complex template struct real { typedef T result_type; template detail::invoked_get<0, Arg, std::complex > operator()(const Arg &x) const { return detail::invoked_get<0, Arg, std::complex >(x); } }; // returns the imaginary component of a complex template struct imag { typedef T result_type; template detail::invoked_get<1, Arg, std::complex > operator()(const Arg &x) const { return detail::invoked_get<1, Arg, std::complex >(x); } }; namespace detail { template struct invoked_complex_multiplies { typedef typename std::complex result_type; invoked_complex_multiplies(const Arg1 &x, const Arg2 &y) : m_x(x), m_y(y) { } Arg1 m_x; Arg2 m_y; }; template inline meta_kernel& operator<<(meta_kernel &kernel, const invoked_complex_multiplies &expr) { typedef typename std::complex value_type; kernel << "(" << type_name() << ")" << "(" << expr.m_x << ".x*" << expr.m_y << ".x-" << expr.m_x << ".y*" << expr.m_y << ".y," << expr.m_x << ".y*" << expr.m_y << ".x+" << expr.m_x << ".x*" << expr.m_y << ".y" << ")"; return kernel; } template struct invoked_complex_conj { typedef typename std::complex result_type; invoked_complex_conj(const Arg &arg) : m_arg(arg) { } Arg m_arg; }; template inline meta_kernel& operator<<(meta_kernel &kernel, const invoked_complex_conj &expr) { typedef typename std::complex value_type; kernel << "(" << type_name() << ")" << "(" << expr.m_arg << ".x" << ", -" << expr.m_arg << ".y" << ")"; return kernel; } } // end detail namespace // specialization for multiplies template class multiplies > : public function (std::complex, std::complex)> { public: multiplies() : function< std::complex (std::complex, std::complex) >("complex_multiplies") { } template detail::invoked_complex_multiplies operator()(const Arg1 &x, const Arg2 &y) const { return detail::invoked_complex_multiplies(x, y); } }; // returns the complex conjugate of a complex template struct conj { typedef typename std::complex result_type; template detail::invoked_complex_conj operator()(const Arg &x) const { return detail::invoked_complex_conj(x); } }; namespace detail { // type_name() specialization for std::complex template struct type_name_trait > { static const char* value() { typedef typename make_vector_type::type vector_type; return type_name(); } }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_TYPES_COMPLEX_HPP compute-0.5/include/boost/compute/types/fundamental.hpp000066400000000000000000000115701263566244600234540ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPES_FUNDAMENTAL_HPP #define BOOST_COMPUTE_TYPES_FUNDAMENTAL_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { // scalar data types typedef cl_char char_; typedef cl_uchar uchar_; typedef cl_short short_; typedef cl_ushort ushort_; typedef cl_int int_; typedef cl_uint uint_; typedef cl_long long_; typedef cl_ulong ulong_; typedef cl_float float_; typedef cl_double double_; // converts uchar to ::boost::compute::uchar_ #define BOOST_COMPUTE_MAKE_SCALAR_TYPE(scalar) \ BOOST_PP_CAT(::boost::compute::scalar, _) // converts float, 4 to ::boost::compute::float4_ #define BOOST_COMPUTE_MAKE_VECTOR_TYPE(scalar, size) \ BOOST_PP_CAT(BOOST_PP_CAT(::boost::compute::scalar, size), _) // vector data types template class vector_type { public: typedef Scalar scalar_type; vector_type() { } explicit vector_type(const Scalar scalar) { for(int i = 0; i < N; i++) m_value[i] = scalar; } vector_type(const vector_type &other) { std::memcpy(m_value, other.m_value, sizeof(m_value)); } vector_type& operator=(const vector_type &other) { std::memcpy(m_value, other.m_value, sizeof(m_value)); return *this; } size_t size() const { return N; } Scalar& operator[](size_t i) { return m_value[i]; } Scalar operator[](size_t i) const { return m_value[i]; } bool operator==(const vector_type &other) const { return std::memcmp(m_value, other.m_value, sizeof(m_value)) == 0; } bool operator!=(const vector_type &other) const { return !(*this == other); } protected: scalar_type m_value[N]; }; #define BOOST_COMPUTE_VECTOR_TYPE_CTOR_ARG_FUNCTION(z, i, _) \ BOOST_PP_COMMA_IF(i) scalar_type BOOST_PP_CAT(arg, i) #define BOOST_COMPUTE_VECTOR_TYPE_DECLARE_CTOR_ARGS(scalar, size) \ BOOST_PP_REPEAT(size, BOOST_COMPUTE_VECTOR_TYPE_CTOR_ARG_FUNCTION, _) #define BOOST_COMPUTE_VECTOR_TYPE_ASSIGN_CTOR_ARG(z, i, _) \ m_value[i] = BOOST_PP_CAT(arg, i); #define BOOST_COMPUTE_VECTOR_TYPE_ASSIGN_CTOR_SINGLE_ARG(z, i, _) \ m_value[i] = arg; #define BOOST_COMPUTE_DECLARE_VECTOR_TYPE_CLASS(cl_scalar, size, class_name) \ class class_name : public vector_type \ { \ public: \ class_name() { } \ explicit class_name( scalar_type arg ) \ { \ BOOST_PP_REPEAT(size, BOOST_COMPUTE_VECTOR_TYPE_ASSIGN_CTOR_SINGLE_ARG, _) \ } \ class_name( \ BOOST_PP_REPEAT(size, BOOST_COMPUTE_VECTOR_TYPE_CTOR_ARG_FUNCTION, _) \ ) \ { \ BOOST_PP_REPEAT(size, BOOST_COMPUTE_VECTOR_TYPE_ASSIGN_CTOR_ARG, _) \ } \ }; #define BOOST_COMPUTE_DECLARE_VECTOR_TYPE(scalar, size) \ BOOST_COMPUTE_DECLARE_VECTOR_TYPE_CLASS(BOOST_PP_CAT(cl_, scalar), \ size, \ BOOST_PP_CAT(BOOST_PP_CAT(scalar, size), _)) \ \ inline std::ostream& operator<<( \ std::ostream &s, \ const BOOST_COMPUTE_MAKE_VECTOR_TYPE(scalar, size) &v) \ { \ s << BOOST_PP_STRINGIZE(BOOST_PP_CAT(scalar, size)) << "("; \ for(size_t i = 0; i < size; i++){\ s << v[i]; \ if(i != size - 1){\ s << ", "; \ } \ } \ s << ")"; \ return s; \ } #define BOOST_COMPUTE_DECLARE_VECTOR_TYPES(scalar) \ BOOST_COMPUTE_DECLARE_VECTOR_TYPE(scalar, 2) \ BOOST_COMPUTE_DECLARE_VECTOR_TYPE(scalar, 4) \ BOOST_COMPUTE_DECLARE_VECTOR_TYPE(scalar, 8) \ BOOST_COMPUTE_DECLARE_VECTOR_TYPE(scalar, 16) \ BOOST_COMPUTE_DECLARE_VECTOR_TYPES(char) BOOST_COMPUTE_DECLARE_VECTOR_TYPES(uchar) BOOST_COMPUTE_DECLARE_VECTOR_TYPES(short) BOOST_COMPUTE_DECLARE_VECTOR_TYPES(ushort) BOOST_COMPUTE_DECLARE_VECTOR_TYPES(int) BOOST_COMPUTE_DECLARE_VECTOR_TYPES(uint) BOOST_COMPUTE_DECLARE_VECTOR_TYPES(long) BOOST_COMPUTE_DECLARE_VECTOR_TYPES(ulong) BOOST_COMPUTE_DECLARE_VECTOR_TYPES(float) BOOST_COMPUTE_DECLARE_VECTOR_TYPES(double) } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_TYPES_FUNDAMENTAL_HPP compute-0.5/include/boost/compute/types/pair.hpp000066400000000000000000000061511263566244600221100ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPES_PAIR_HPP #define BOOST_COMPUTE_TYPES_PAIR_HPP #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { // meta_kernel operator for std::pair literals template inline meta_kernel& operator<<(meta_kernel &kernel, const std::pair &x) { kernel << "(" << type_name >() << ")" << "{" << kernel.make_lit(x.first) << ", " << kernel.make_lit(x.second) << "}"; return kernel; } // inject_type() specialization for std::pair template struct inject_type_impl > { void operator()(meta_kernel &kernel) { typedef std::pair pair_type; kernel.inject_type(); kernel.inject_type(); kernel.add_type_declaration(type_definition()); } }; // get() result type specialization for std::pair<> template struct get_result_type<0, std::pair > { typedef T1 type; }; template struct get_result_type<1, std::pair > { typedef T2 type; }; // get() specialization for std::pair<> template inline meta_kernel& operator<<(meta_kernel &kernel, const invoked_get > &expr) { kernel.inject_type >(); return kernel << expr.m_arg << (N == 0 ? ".first" : ".second"); } } // end detail namespace namespace detail { // type_name() specialization for std::pair template struct type_name_trait > { static const char* value() { static std::string name = std::string("_pair_") + type_name() + "_" + type_name() + "_t"; return name.c_str(); } }; // type_definition() specialization for std::pair template struct type_definition_trait > { static std::string value() { typedef std::pair pair_type; std::stringstream declaration; declaration << "typedef struct {\n" << " " << type_name() << " first;\n" << " " << type_name() << " second;\n" << "} " << type_name() << ";\n"; return declaration.str(); } }; } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_TYPES_PAIR_HPP compute-0.5/include/boost/compute/types/struct.hpp000066400000000000000000000131211263566244600224740ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPES_STRUCT_HPP #define BOOST_COMPUTE_TYPES_STRUCT_HPP #include #include #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template inline std::string adapt_struct_insert_member(T Struct::*, const char *name) { std::stringstream s; s << " " << type_name() << " " << name << ";\n"; return s.str(); } template inline std::string adapt_struct_insert_member(T (Struct::*)[N], const char *name) { std::stringstream s; s << " " << type_name() << " " << name << "[" << N << "]" << ";\n"; return s.str(); } } // end detail namespace } // end compute namespace } // end boost namespace /// \internal_ #define BOOST_COMPUTE_DETAIL_ADAPT_STRUCT_INSERT_MEMBER(r, type, member) \ << ::boost::compute::detail::adapt_struct_insert_member( \ &type::member, BOOST_PP_STRINGIZE(member) \ ) /// \internal_ #define BOOST_COMPUTE_DETAIL_ADAPT_STRUCT_STREAM_MEMBER(r, data, i, elem) \ BOOST_PP_EXPR_IF(i, << ", ") << data.elem /// \internal_ #define BOOST_COMPUTE_DETAIL_STRUCT_MEMBER_SIZE(s, struct_, member_) \ sizeof(((struct_ *)0)->member_) /// \internal_ #define BOOST_COMPUTE_DETAIL_STRUCT_MEMBER_SIZE_ADD(s, x, y) (x+y) /// \internal_ #define BOOST_COMPUTE_DETAIL_STRUCT_MEMBER_SIZE_SUM(struct_, members_) \ BOOST_PP_SEQ_FOLD_LEFT( \ BOOST_COMPUTE_DETAIL_STRUCT_MEMBER_SIZE_ADD, \ 0, \ BOOST_PP_SEQ_TRANSFORM( \ BOOST_COMPUTE_DETAIL_STRUCT_MEMBER_SIZE, struct_, members_ \ ) \ ) /// \internal_ /// /// Returns true if struct_ contains no internal padding bytes (i.e. it is /// packed). members_ is a sequence of the names of the struct members. #define BOOST_COMPUTE_DETAIL_STRUCT_IS_PACKED(struct_, members_) \ (sizeof(struct_) == BOOST_COMPUTE_DETAIL_STRUCT_MEMBER_SIZE_SUM(struct_, members_)) /// The BOOST_COMPUTE_ADAPT_STRUCT() macro makes a C++ struct/class available /// to OpenCL kernels. /// /// \param type The C++ type. /// \param name The OpenCL name. /// \param members A tuple of the struct's members. /// /// For example, to adapt a 2D particle struct with position (x, y) and /// velocity (dx, dy): /// \code /// // c++ struct definition /// struct Particle /// { /// float x, y; /// float dx, dy; /// }; /// /// // adapt struct for OpenCL /// BOOST_COMPUTE_ADAPT_STRUCT(Particle, Particle, (x, y, dx, dy)) /// \endcode /// /// After adapting the struct it can be used in Boost.Compute containers /// and with Boost.Compute algorithms: /// \code /// // create vector of particles /// boost::compute::vector particles = ... /// /// // function to compare particles by their x-coordinate /// BOOST_COMPUTE_FUNCTION(bool, sort_by_x, (Particle a, Particle b), /// { /// return a.x < b.x; /// }); /// /// // sort particles by their x-coordinate /// boost::compute::sort( /// particles.begin(), particles.end(), sort_by_x, queue /// ); /// \endcode /// /// Due to differences in struct padding between the host compiler and the /// device compiler, the \c BOOST_COMPUTE_ADAPT_STRUCT() macro requires that /// the adapted struct is packed (i.e. no padding bytes between members). /// /// \see type_name() #define BOOST_COMPUTE_ADAPT_STRUCT(type, name, members) \ BOOST_STATIC_ASSERT_MSG( \ BOOST_COMPUTE_DETAIL_STRUCT_IS_PACKED(type, BOOST_COMPUTE_PP_TUPLE_TO_SEQ(members)), \ "BOOST_COMPUTE_ADAPT_STRUCT() does not support structs with internal padding." \ ); \ BOOST_COMPUTE_TYPE_NAME(type, name) \ namespace boost { namespace compute { \ template<> \ inline std::string type_definition() \ { \ std::stringstream declaration; \ declaration << "typedef struct __attribute__((packed)) {\n" \ BOOST_PP_SEQ_FOR_EACH( \ BOOST_COMPUTE_DETAIL_ADAPT_STRUCT_INSERT_MEMBER, \ type, \ BOOST_COMPUTE_PP_TUPLE_TO_SEQ(members) \ ) \ << "} " << type_name() << ";\n"; \ return declaration.str(); \ } \ namespace detail { \ template<> \ struct inject_type_impl \ { \ void operator()(meta_kernel &kernel) \ { \ kernel.add_type_declaration(type_definition()); \ } \ }; \ inline meta_kernel& operator<<(meta_kernel &k, type s) \ { \ return k << "(" << #name << "){" \ BOOST_PP_SEQ_FOR_EACH_I( \ BOOST_COMPUTE_DETAIL_ADAPT_STRUCT_STREAM_MEMBER, \ s, \ BOOST_COMPUTE_PP_TUPLE_TO_SEQ(members) \ ) \ << "}"; \ } \ }}} #endif // BOOST_COMPUTE_TYPES_STRUCT_HPP compute-0.5/include/boost/compute/types/tuple.hpp000066400000000000000000000211441263566244600223050ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TYPES_TUPLE_HPP #define BOOST_COMPUTE_TYPES_TUPLE_HPP #include #include #include #include #include #include #include #include #include #include #ifndef BOOST_COMPUTE_NO_STD_TUPLE #include #endif namespace boost { namespace compute { namespace detail { // meta_kernel operators for boost::tuple literals #define BOOST_COMPUTE_PRINT_ELEM(z, n, unused) \ BOOST_PP_EXPR_IF(n, << ", ") \ << kernel.make_lit(boost::get(x)) #define BOOST_COMPUTE_PRINT_TUPLE(z, n, unused) \ template \ inline meta_kernel& \ operator<<(meta_kernel &kernel, \ const boost::tuple &x) \ { \ return kernel \ << "(" \ << type_name >() \ << ")" \ << "{" \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_PRINT_ELEM, ~) \ << "}"; \ } BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_PRINT_TUPLE, ~) #undef BOOST_COMPUTE_PRINT_TUPLE #undef BOOST_COMPUTE_PRINT_ELEM // inject_type() specializations for boost::tuple #define BOOST_COMPUTE_INJECT_TYPE(z, n, unused) \ kernel.inject_type(); #define BOOST_COMPUTE_INJECT_DECL(z, n, unused) \ << " " << type_name() << " v" #n ";\n" #define BOOST_COMPUTE_INJECT_IMPL(z, n, unused) \ template \ struct inject_type_impl > \ { \ void operator()(meta_kernel &kernel) \ { \ typedef boost::tuple tuple_type; \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_INJECT_TYPE, ~) \ std::stringstream declaration; \ declaration << "typedef struct {\n" \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_INJECT_DECL, ~) \ << "} " << type_name() << ";\n"; \ kernel.add_type_declaration(declaration.str()); \ } \ }; BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_INJECT_IMPL, ~) #undef BOOST_COMPUTE_INJECT_IMPL #undef BOOST_COMPUTE_INJECT_DECL #undef BOOST_COMPUTE_INJECT_TYPE #ifdef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES // type_name() specializations for boost::tuple (without variadic templates) #define BOOST_COMPUTE_PRINT_TYPE(z, n, unused) \ + type_name() + "_" #define BOOST_COMPUTE_PRINT_TYPE_NAME(z, n, unused) \ template \ struct type_name_trait > \ { \ static const char* value() \ { \ static std::string name = \ std::string("boost_tuple_") \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_PRINT_TYPE, ~) \ "t"; \ return name.c_str(); \ } \ }; BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_PRINT_TYPE_NAME, ~) #undef BOOST_COMPUTE_PRINT_TYPE_NAME #undef BOOST_COMPUTE_PRINT_TYPE #else template struct write_tuple_type_names { void operator()(std::ostream &os) { os << type_name() << "_"; write_tuple_type_names()(os); } }; template struct write_tuple_type_names<1, T, Rest...> { void operator()(std::ostream &os) { os << type_name(); } }; // type_name<> specialization for boost::tuple<...> (with variadic templates) template struct type_name_trait> { static const char* value() { static std::string str = make_type_name(); return str.c_str(); } static std::string make_type_name() { typedef typename boost::tuple tuple_type; std::stringstream s; s << "boost_tuple_"; write_tuple_type_names< boost::tuples::length::value, T... >()(s); s << "_t"; return s.str(); } }; #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES #ifndef BOOST_COMPUTE_NO_STD_TUPLE // type_name<> specialization for std::tuple template struct type_name_trait> { static const char* value() { static std::string str = make_type_name(); return str.c_str(); } static std::string make_type_name() { typedef typename std::tuple tuple_type; std::stringstream s; s << "std_tuple_"; write_tuple_type_names< std::tuple_size::value, T... >()(s); s << "_t"; return s.str(); } }; #endif // BOOST_COMPUTE_NO_STD_TUPLE // get() result type specialization for boost::tuple<> #define BOOST_COMPUTE_GET_RESULT_TYPE(z, n, unused) \ template \ struct get_result_type > \ { \ typedef typename boost::tuple T; \ typedef typename boost::tuples::element::type type; \ }; BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_GET_RESULT_TYPE, ~) #undef BOOST_COMPUTE_GET_RESULT_TYPE // get() specialization for boost::tuple<> #define BOOST_COMPUTE_GET_N(z, n, unused) \ template \ inline meta_kernel& operator<<(meta_kernel &kernel, \ const invoked_get > &expr) \ { \ typedef typename boost::tuple T; \ BOOST_STATIC_ASSERT(N < size_t(boost::tuples::length::value)); \ kernel.inject_type(); \ return kernel << expr.m_arg << ".v" << uint_(N); \ } BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_GET_N, ~) #undef BOOST_COMPUTE_GET_N } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_TYPES_TUPLE_HPP compute-0.5/include/boost/compute/user_event.hpp000066400000000000000000000045221263566244600221700ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_USER_EVENT_HPP #define BOOST_COMPUTE_USER_EVENT_HPP #include #include namespace boost { namespace compute { #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// \class user_event /// \brief An user-created event. /// /// \opencl_version_warning{1,1} /// /// \see event class user_event : public event { public: /// Creates a new user-event object. /// /// \see_opencl_ref{clCreateUserEvent} explicit user_event(const context &context) { cl_int error; m_event = clCreateUserEvent(context.get(), &error); if(!m_event){ BOOST_THROW_EXCEPTION(opencl_error(error)); } } /// Creates a new user-event from \p other. user_event(const user_event &other) : event(other) { } /// Copies the user-event from \p other to \c *this. user_event& operator=(const user_event &other) { event::operator=(other); return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new user event object from \p other. user_event(user_event&& other) BOOST_NOEXCEPT : event(std::move(other)) { } /// Move-assigns the user event from \p other to \c *this. user_event& operator=(user_event&& other) BOOST_NOEXCEPT { event::operator=(std::move(other)); return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Sets the execution status for the user-event. /// /// \see_opencl_ref{clSetUserEventStatus} void set_status(cl_int execution_status) { cl_int ret = clSetUserEventStatus(m_event, execution_status); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } } }; #endif // CL_VERSION_1_1 } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_EVENT_HPP compute-0.5/include/boost/compute/utility.hpp000066400000000000000000000014671263566244600215210ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_UTILITY_HPP #define BOOST_COMPUTE_UTILITY_HPP #include #include #include #include #include #include #endif // BOOST_COMPUTE_UTILITY_HPP compute-0.5/include/boost/compute/utility/000077500000000000000000000000001263566244600210005ustar00rootroot00000000000000compute-0.5/include/boost/compute/utility/dim.hpp000066400000000000000000000042121263566244600222610ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_UTILITY_DIM_HPP #define BOOST_COMPUTE_UTILITY_DIM_HPP #include #include namespace boost { namespace compute { #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES /// The variadic \c dim() function provides a concise syntax for creating /// \ref extents objects. /// /// For example, /// \code /// extents<2> region = dim(640, 480); // region == (640, 480) /// \endcode /// /// \see \ref extents "extents" template inline extents dim(Args... args) { return extents({ static_cast(args)... }); } #if BOOST_WORKAROUND(BOOST_MSVC, <= 1800) // for some inexplicable reason passing one parameter to 'dim' variadic template // generates compile error on msvc 2013 update 4 template inline extents<1> dim(T arg) { return extents<1>(static_cast(arg)); } #endif // BOOST_WORKAROUND(BOOST_MSVC, <= 1800) #else // dim() function definitions for non-c++11 compilers #define BOOST_COMPUTE_DETAIL_ASSIGN_DIM(z, n, var) \ var[n] = BOOST_PP_CAT(e, n); #define BOOST_COMPUTE_DETAIL_DEFINE_DIM(z, n, var) \ inline extents dim(BOOST_PP_ENUM_PARAMS(n, size_t e)) \ { \ extents exts; \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_DETAIL_ASSIGN_DIM, exts) \ return exts; \ } BOOST_PP_REPEAT(BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_DETAIL_DEFINE_DIM, ~) #undef BOOST_COMPUTE_DETAIL_ASSIGN_DIM #undef BOOST_COMPUTE_DETAIL_DEFINE_DIM #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES /// \internal_ template inline extents dim() { return extents(); } } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_UTILITY_DIM_HPP compute-0.5/include/boost/compute/utility/extents.hpp000066400000000000000000000074441263566244600232140ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_UTILITY_EXTENTS_HPP #define BOOST_COMPUTE_UTILITY_EXTENTS_HPP #include #include #include #ifndef BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST #include #endif #include namespace boost { namespace compute { /// The extents class contains an array of n-dimensional extents. /// /// \see dim() template class extents { public: typedef size_t size_type; static const size_type static_size = N; typedef boost::array array_type; typedef typename array_type::iterator iterator; typedef typename array_type::const_iterator const_iterator; /// Creates an extents object with each component set to zero. /// /// For example: /// \code /// extents<3> exts(); // (0, 0, 0) /// \endcode extents() { m_extents.fill(0); } /// Creates an extents object with each component set to \p value. /// /// For example: /// \code /// extents<3> exts(1); // (1, 1, 1) /// \endcode explicit extents(size_t value) { m_extents.fill(value); } #ifndef BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST /// Creates an extents object with \p values. extents(std::initializer_list values) { BOOST_ASSERT(values.size() == N); std::copy(values.begin(), values.end(), m_extents.begin()); } #endif // BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST /// Returns the size (i.e. dimensionality) of the extents array. size_type size() const { return N; } /// Returns the linear size of the extents. This is equivalent to the /// product of each extent in each dimension. size_type linear() const { return std::accumulate( m_extents.begin(), m_extents.end(), 1, std::multiplies() ); } /// Returns a pointer to the extents data array. /// /// This is useful for passing the extents data to OpenCL APIs which /// expect an array of \c size_t. size_t* data() { return m_extents.data(); } /// \overload const size_t* data() const { return m_extents.data(); } iterator begin() { return m_extents.begin(); } const_iterator begin() const { return m_extents.begin(); } const_iterator cbegin() const { return m_extents.cbegin(); } iterator end() { return m_extents.end(); } const_iterator end() const { return m_extents.end(); } const_iterator cend() const { return m_extents.cend(); } /// Returns a reference to the extent at \p index. size_t& operator[](size_t index) { return m_extents[index]; } /// \overload const size_t& operator[](size_t index) const { return m_extents[index]; } /// Returns \c true if the extents in \c *this are the same as \p other. bool operator==(const extents &other) const { return m_extents == other.m_extents; } /// Returns \c true if the extents in \c *this are not the same as \p other. bool operator!=(const extents &other) const { return m_extents != other.m_extents; } private: array_type m_extents; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_UTILITY_EXTENTS_HPP compute-0.5/include/boost/compute/utility/invoke.hpp000066400000000000000000000053421263566244600230100ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://kylelutz.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_UTILITY_INVOKE_HPP #define BOOST_COMPUTE_UTILITY_INVOKE_HPP #include #include #include #include #include #include #include namespace boost { namespace compute { #define BOOST_COMPUTE_DETAIL_INVOKE_ARG(z, n, unused) \ BOOST_PP_COMMA_IF(n) k.var("arg" BOOST_PP_STRINGIZE(n)) #define BOOST_COMPUTE_DETAIL_INVOKE_ADD_ARG(z, n, unused) \ k.add_set_arg("arg" BOOST_PP_STRINGIZE(n), BOOST_PP_CAT(arg, n)); #define BOOST_COMPUTE_DETAIL_DEFINE_INVOKE(z, n, unused) \ template \ inline typename result_of::type \ invoke(const Function& function, command_queue& queue, BOOST_PP_ENUM_BINARY_PARAMS(n, const T, &arg)) \ { \ typedef typename result_of::type result_type; \ detail::meta_kernel k("invoke"); \ detail::scalar result(queue.get_context()); \ const size_t result_arg = k.add_arg(memory_object::global_memory, "result"); \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_DETAIL_INVOKE_ADD_ARG, ~) \ k << "*result = " << function( \ BOOST_PP_REPEAT(n, BOOST_COMPUTE_DETAIL_INVOKE_ARG, ~) \ ) << ";"; \ k.set_arg(result_arg, result.get_buffer()); \ k.exec(queue); \ return result.read(queue); \ } BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_DETAIL_DEFINE_INVOKE, ~) #undef BOOST_COMPUTE_DETAIL_INVOKE_ARG #undef BOOST_COMPUTE_DETAIL_INVOKE_ADD_ARG #undef BOOST_COMPUTE_DETAIL_DEFINE_INVOKE #ifdef BOOST_COMPUTE_DOXYGEN_INVOKED /// Invokes \p function with \p args on \p queue. /// /// For example, to invoke the builtin abs() function: /// \code /// int result = invoke(abs(), queue, -10); // returns 10 /// \endcode template inline typename result_of::type invoke(const Function& function, command_queue& queue, const Args&... args); #endif // BOOST_COMPUTE_DOXYGEN_INVOKED } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_UTILITY_INVOKE_HPP compute-0.5/include/boost/compute/utility/program_cache.hpp000066400000000000000000000125651263566244600243140ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_UTILITY_PROGRAM_CACHE_HPP #define BOOST_COMPUTE_UTILITY_PROGRAM_CACHE_HPP #include #include #include #include #include #include #include #include #include namespace boost { namespace compute { /// The program_cache class stores \ref program objects in a LRU cache. /// /// This class can be used to help mitigate the overhead of OpenCL's run-time /// kernel compilation model. Commonly used programs can be stored persistently /// in the cache and only compiled once on their first use. /// /// Program objects are stored and retreived based on a user-defined cache key /// along with the options used to build the program (if any). /// /// For example, to insert a program into the cache: /// \code /// cache.insert("foo", foo_program); /// \endcode /// /// And to retreive the program later: /// \code /// boost::optional p = cache.get("foo"); /// if(p){ /// // program found in cache /// } /// \endcode /// /// \see program class program_cache : boost::noncopyable { public: /// Creates a new program cache with space for \p capacity number of /// program objects. program_cache(size_t capacity) : m_cache(capacity) { } /// Destroys the program cache. ~program_cache() { } /// Returns the number of program objects currently stored in the cache. size_t size() const { return m_cache.size(); } /// Returns the total capacity of the cache. size_t capacity() const { return m_cache.capacity(); } /// Clears the program cache. void clear() { m_cache.clear(); } /// Returns the program object with \p key. Returns a null optional if no /// program with \p key exists in the cache. boost::optional get(const std::string &key) { return m_cache.get(std::make_pair(key, std::string())); } /// Returns the program object with \p key and \p options. Returns a null /// optional if no program with \p key and \p options exists in the cache. boost::optional get(const std::string &key, const std::string &options) { return m_cache.get(std::make_pair(key, options)); } /// Inserts \p program into the cache with \p key. void insert(const std::string &key, const program &program) { insert(key, std::string(), program); } /// Inserts \p program into the cache with \p key and \p options. void insert(const std::string &key, const std::string &options, const program &program) { m_cache.insert(std::make_pair(key, options), program); } /// Loads the program with \p key from the cache if it exists. Otherwise /// builds a new program with \p source and \p options, stores it in the /// cache, and returns it. /// /// This is a convenience function to simplify the common pattern of /// attempting to load a program from the cache and, if not present, /// building the program from source and storing it in the cache. /// /// Equivalent to: /// \code /// boost::optional p = get(key, options); /// if(!p){ /// p = program::create_with_source(source, context); /// p->build(options); /// insert(key, options, *p); /// } /// return *p; /// \endcode program get_or_build(const std::string &key, const std::string &options, const std::string &source, const context &context) { boost::optional p = get(key, options); if(!p){ p = program::build_with_source(source, context, options); insert(key, options, *p); } return *p; } /// Returns the global program cache for \p context. /// /// This global cache is used internally by Boost.Compute to store compiled /// program objects used by its algorithms. All Boost.Compute programs are /// stored with a cache key beginning with \c "__boost". User programs /// should avoid using the same prefix in order to prevent collisions. static boost::shared_ptr get_global_cache(const context &context) { typedef detail::lru_cache > cache_map; BOOST_COMPUTE_DETAIL_GLOBAL_STATIC(cache_map, caches, (8)); boost::optional > cache = caches.get(context.get()); if(!cache){ cache = boost::make_shared(64); caches.insert(context.get(), *cache); } return *cache; } private: detail::lru_cache, program> m_cache; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_UTILITY_PROGRAM_CACHE_HPP compute-0.5/include/boost/compute/utility/source.hpp000066400000000000000000000024571263566244600230210ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_UTILITY_SOURCE_HPP #define BOOST_COMPUTE_UTILITY_SOURCE_HPP /// Stringizes OpenCL source code. /// /// For example, to create a simple kernel which squares each input value: /// \code /// const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( /// __kernel void square(const float *input, float *output) /// { /// const uint i = get_global_id(0); /// const float x = input[i]; /// output[i] = x * x; /// } /// ); /// /// // create and build square program /// program square_program = program::build_with_source(source, context); /// /// // create square kernel /// kernel square_kernel(square_program, "square"); /// \endcode #ifdef BOOST_COMPUTE_DOXYGEN_INVOKED #define BOOST_COMPUTE_STRINGIZE_SOURCE(source) #else #define BOOST_COMPUTE_STRINGIZE_SOURCE(...) #__VA_ARGS__ #endif #endif // BOOST_COMPUTE_UTILITY_SOURCE_HPP compute-0.5/include/boost/compute/utility/wait_list.hpp000066400000000000000000000075061263566244600235200ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_UTILITY_WAIT_LIST_HPP #define BOOST_COMPUTE_UTILITY_WAIT_LIST_HPP #include #include namespace boost { namespace compute { template class future; /// \class wait_list /// \brief Stores a list of events. /// /// The wait_list class stores a set of event objects and can be used to /// specify dependencies for OpenCL operations or to wait on the host until /// all of the events have completed. /// /// This class also provides convenience fnuctions for interacting with /// OpenCL APIs which typically accept event dependencies as a \c cl_event* /// pointer and a \c cl_uint size. For example: /// \code /// wait_list events = ...; /// /// clEnqueueNDRangeKernel(..., events.get_event_ptr(), events.size(), ...); /// \endcode /// /// \see event, \ref future "future" class wait_list { public: /// Creates an empty wait-list. wait_list() { } /// Creates a wait-list containing \p event. wait_list(const event &event) { insert(event); } /// Creates a new wait-list as a copy of \p other. wait_list(const wait_list &other) : m_events(other.m_events) { } /// Copies the events in the wait-list from \p other. wait_list& operator=(const wait_list &other) { if(this != &other){ m_events = other.m_events; } return *this; } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new wait list object from \p other. wait_list(wait_list&& other) : m_events(std::move(other.m_events)) { } /// Move-assigns the wait list from \p other to \c *this. wait_list& operator=(wait_list&& other) { m_events = std::move(other.m_events); return *this; } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Destroys the wait-list. ~wait_list() { } /// Returns \c true if the wait-list is empty. bool empty() const { return m_events.empty(); } /// Returns the number of events in the wait-list. uint_ size() const { return m_events.size(); } /// Removes all of the events from the wait-list. void clear() { m_events.clear(); } /// Returns a cl_event pointer to the first event in the wait-list. /// Returns \c 0 if the wait-list is empty. /// /// This can be used to pass the wait-list to OpenCL functions which /// expect a \c cl_event pointer to refer to a list of events. const cl_event* get_event_ptr() const { if(empty()){ return 0; } return reinterpret_cast(&m_events[0]); } /// Inserts \p event into the wait-list. void insert(const event &event) { m_events.push_back(event); } /// Inserts the event from \p future into the wait-list. template void insert(const future &future) { insert(future.get_event()); } /// Blocks until all of the events in the wait-list have completed. /// /// Does nothing if the wait-list is empty. void wait() const { if(!empty()){ BOOST_COMPUTE_ASSERT_CL_SUCCESS( clWaitForEvents(size(), get_event_ptr()) ); } } private: std::vector m_events; }; } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_UTILITY_WAIT_LIST_HPP compute-0.5/include/boost/compute/version.hpp000066400000000000000000000012311263566244600214700ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_VERSION_HPP #define BOOST_COMPUTE_VERSION_HPP #define BOOST_COMPUTE_VERSION_MAJOR 0 #define BOOST_COMPUTE_VERSION_MINOR 5 #define BOOST_COMPUTE_VERSION_PATCH 0 #endif // BOOST_COMPUTE_VERSION_HPP compute-0.5/include/boost/compute/wait_list.hpp000066400000000000000000000010551263566244600220060ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// // deprecated, use instead #include compute-0.5/index.html000066400000000000000000000007571263566244600150560ustar00rootroot00000000000000 Automatic redirection failed, please go to doc/html/index.html
Boost.Compute

Copyright (C) 2013-2015 Kyle Lutz

Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

compute-0.5/meta/000077500000000000000000000000001263566244600137765ustar00rootroot00000000000000compute-0.5/meta/libraries.json000066400000000000000000000004141263566244600166440ustar00rootroot00000000000000{ "key": "compute", "name": "Compute", "authors": [ "Kyle Lutz" ], "description": "Parallel/GPU-computing library", "category": [ "Concurrent" ], "maintainers": [ "Kyle Lutz " ] } compute-0.5/perf/000077500000000000000000000000001263566244600140045ustar00rootroot00000000000000compute-0.5/perf/CMakeLists.txt000066400000000000000000000114701263566244600165470ustar00rootroot00000000000000# --------------------------------------------------------------------------- # Copyright (c) 2013 Kyle Lutz # # Distributed under the Boost Software License, Version 1.0 # See accompanying file LICENSE_1_0.txt or copy at # http://www.boost.org/LICENSE_1_0.txt # # --------------------------------------------------------------------------- include_directories(../include) set(PERF_BOOST_COMPONENTS system timer chrono program_options) if (${BOOST_COMPUTE_USE_OFFLINE_CACHE}) set(PERF_BOOST_COMPONENTS ${PERF_BOOST_COMPONENTS} filesystem) endif() if(${BOOST_COMPUTE_THREAD_SAFE} AND NOT ${BOOST_COMPUTE_USE_CPP11}) set(PERF_BOOST_COMPONENTS ${PERF_BOOST_COMPONENTS} thread) elseif(${BOOST_COMPUTE_HAVE_BOLT} AND ${BOOST_COMPUTE_USE_CPP11}) set(PERF_BOOST_COMPONENTS ${PERF_BOOST_COMPONENTS} thread) endif() if(${BOOST_COMPUTE_HAVE_BOLT} AND ${BOOST_COMPUTE_USE_CPP11}) set(PERF_BOOST_COMPONENTS ${PERF_BOOST_COMPONENTS} date_time) endif() find_package(Boost 1.48 REQUIRED COMPONENTS ${PERF_BOOST_COMPONENTS}) include_directories(SYSTEM ${Boost_INCLUDE_DIRS}) set(BENCHMARKS accumulate bernoulli_distribution binary_find cart_to_polar copy_if copy_to_device count discrete_distribution erase_remove exclusive_scan fill find find_end includes inner_product is_permutation is_sorted max_element merge next_permutation nth_element partial_sum partition partition_point prev_permutation reverse reverse_copy rotate rotate_copy host_sort random_number_engine reduce_by_key saxpy search search_n set_difference set_intersection set_symmetric_difference set_union sort sort_by_key sort_float stable_partition uniform_int_distribution unique unique_copy ) foreach(BENCHMARK ${BENCHMARKS}) set(PERF_TARGET perf_${BENCHMARK}) add_executable(${PERF_TARGET} perf_${BENCHMARK}.cpp) target_link_libraries(${PERF_TARGET} ${OPENCL_LIBRARIES} ${Boost_LIBRARIES}) endforeach() # stl benchmarks (for comparison) set(STL_BENCHMARKS stl_accumulate stl_count stl_find stl_find_end stl_includes stl_inner_product stl_max_element stl_merge stl_next_permutation stl_partial_sum stl_partition stl_prev_permutation stl_reverse stl_reverse_copy stl_rotate stl_rotate_copy stl_saxpy stl_search stl_search_n stl_set_difference stl_set_intersection stl_set_symmetric_difference stl_set_union stl_sort stl_stable_partition stl_unique stl_unique_copy ) # stl benchmarks which require c++11 if(${BOOST_COMPUTE_USE_CPP11}) list(APPEND STL_BENCHMARKS stl_is_permutation stl_partition_point ) endif() foreach(BENCHMARK ${STL_BENCHMARKS}) set(PERF_TARGET perf_${BENCHMARK}) add_executable(${PERF_TARGET} perf_${BENCHMARK}.cpp) target_link_libraries(${PERF_TARGET} ${Boost_LIBRARIES}) endforeach() # cuda/thrust benchmarks (for comparison) if(${BOOST_COMPUTE_HAVE_CUDA}) find_package(CUDA 5.0 REQUIRED) set(CUDA_BENCHMARKS thrust_accumulate thrust_count thrust_exclusive_scan thrust_find thrust_inner_product thrust_merge thrust_partial_sum thrust_partition thrust_reduce_by_key thrust_reverse thrust_reverse_copy thrust_rotate thrust_saxpy thrust_set_difference thrust_sort thrust_unique ) foreach(BENCHMARK ${CUDA_BENCHMARKS}) set(PERF_TARGET perf_${BENCHMARK}) cuda_add_executable(${PERF_TARGET} perf_${BENCHMARK}.cu) target_link_libraries(${PERF_TARGET} ${CUDA_LIBRARIES} ${Boost_LIBRARIES}) endforeach() endif() # intel tbb benchmarks (for comparison) if(${BOOST_COMPUTE_HAVE_TBB}) find_package(TBB REQUIRED) include_directories(SYSTEM ${TBB_INCLUDE_DIRS}) set(TBB_BENCHMARKS tbb_accumulate tbb_merge tbb_sort ) foreach(BENCHMARK ${TBB_BENCHMARKS}) set(PERF_TARGET perf_${BENCHMARK}) add_executable(${PERF_TARGET} perf_${BENCHMARK}.cpp) target_link_libraries(${PERF_TARGET} ${TBB_LIBRARIES} ${Boost_LIBRARIES}) endforeach() endif() # bolt c++ template lib benchmarks (for comparison) if(${BOOST_COMPUTE_HAVE_BOLT} AND ${BOOST_COMPUTE_USE_CPP11}) find_package(Bolt REQUIRED) include_directories(SYSTEM ${BOLT_INCLUDE_DIRS}) set(BOLT_BENCHMARKS bolt_accumulate bolt_count bolt_exclusive_scan bolt_fill bolt_inner_product bolt_max_element bolt_merge bolt_partial_sum bolt_reduce_by_key bolt_saxpy bolt_sort ) foreach(BENCHMARK ${BOLT_BENCHMARKS}) set(PERF_TARGET perf_${BENCHMARK}) add_executable(${PERF_TARGET} perf_${BENCHMARK}.cpp) target_link_libraries(${PERF_TARGET} ${OPENCL_LIBRARIES} ${BOLT_LIBRARIES} ${Boost_LIBRARIES}) endforeach() elseif(${BOOST_COMPUTE_HAVE_BOLT} AND NOT ${BOOST_COMPUTE_USE_CPP11}) message(WARNING "BOOST_COMPUTE_USE_CPP11 must be ON for building Bolt C++ Template Library performance tests.") endif() compute-0.5/perf/perf.hpp000066400000000000000000000047001263566244600154520ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef PERF_HPP #define PERF_HPP // this header contains general purpose functions and variables used by // the boost.compute performance benchmarks. #include #include #include #include #include static size_t PERF_N = 1024; static size_t PERF_TRIALS = 1; // parses command line arguments and sets the corresponding perf variables inline void perf_parse_args(int argc, char *argv[]) { if(argc >= 2){ PERF_N = boost::lexical_cast(argv[1]); } // TODO: make this configurable from the command line PERF_TRIALS = 3; } // generates a vector of random numbers template std::vector generate_random_vector(const size_t size) { std::vector vector(size); std::generate(vector.begin(), vector.end(), rand); return vector; } // a simple timer wrapper which records multiple time entries class perf_timer { public: typedef boost::timer::nanosecond_type nanosecond_type; perf_timer() { timer.stop(); } void start() { timer.start(); } void stop() { timer.stop(); times.push_back(timer.elapsed().wall); } size_t trials() const { return times.size(); } void clear() { times.clear(); } nanosecond_type last_time() const { return times.back(); } nanosecond_type min_time() const { return *std::min_element(times.begin(), times.end()); } nanosecond_type max_time() const { return *std::max_element(times.begin(), times.end()); } boost::timer::cpu_timer timer; std::vector times; }; // returns the rate (in MB/s) for processing 'count' items of type 'T' // in 'time' nanoseconds template double perf_rate(const size_t count, perf_timer::nanosecond_type time) { const size_t byte_count = count * sizeof(T); return (double(byte_count) / 1024 / 1024) / (time / 1e9); } #endif // PERF_HPP compute-0.5/perf/perf.py000077500000000000000000000131451263566244600153210ustar00rootroot00000000000000#!/usr/bin/python # Copyright (c) 2014 Kyle Lutz # Distributed under the Boost Software License, Version 1.0 # See accompanying file LICENSE_1_0.txt or copy at # http://www.boost.org/LICENSE_1_0.txt # # See http://boostorg.github.com/compute for more information. # driver script for boost.compute benchmarking. will run a # benchmark for a given function (e.g. accumulate, sort). import os import sys import subprocess try: import pylab except: print('pylab not found, no ploting...') pass def run_perf_process(name, size, backend = ""): if not backend: proc = "perf_%s" % name else: proc = "perf_%s_%s" % (backend, name) filename = "./perf/" + proc if not os.path.isfile(filename): print("Error: failed to find ", filename, " for running") return 0 try: output = subprocess.check_output([filename, str(int(size))]) except: return 0 t = 0 for line in output.decode('utf8').split("\n"): if line.startswith("time:"): t = float(line.split(":")[1].split()[0]) return t class Report: def __init__(self, name): self.name = name self.samples = {} def add_sample(self, name, size, time): if not name in self.samples: self.samples[name] = [] self.samples[name].append((size, time)) def display(self): for name in self.samples.keys(): print('=== %s with %s ===' % (self.name, name)) print('size,time (ms)') for sample in self.samples[name]: print('%d,%f' % sample) def plot_time(self, name): if not name in self.samples: return x = [] y = [] any_valid_samples = False for sample in self.samples[name]: if sample[1] == 0: continue x.append(sample[0]) y.append(sample[1]) any_valid_samples = True if not any_valid_samples: return pylab.loglog(x, y, marker='o', label=name) pylab.xlabel("Size") pylab.ylabel("Time (ms)") pylab.title(self.name) def plot_rate(self, name): if not name in self.samples: return x = [] y = [] any_valid_samples = False for sample in self.samples[name]: if sample[1] == 0: continue x.append(sample[0]) y.append(float(sample[0]) / (float(sample[1]) * 1e-3)) any_valid_samples = True if not any_valid_samples: return pylab.loglog(x, y, marker='o', label=name) pylab.xlabel("Size") pylab.ylabel("Rate (values/s)") pylab.title(self.name) def run_benchmark(name, sizes, vs=[]): report = Report(name) for size in sizes: time = run_perf_process(name, size) report.add_sample("compute", size, time) competitors = { "thrust" : [ "accumulate", "count", "exclusive_scan", "find", "inner_product", "merge", "partial_sum", "partition", "reduce_by_key", "reverse", "reverse_copy", "rotate", "saxpy", "sort", "unique" ], "bolt" : [ "accumulate", "count", "exclusive_scan", "fill", "inner_product", "max_element", "merge", "partial_sum", "reduce_by_key", "saxpy", "sort" ], "tbb": [ "accumulate", "merge", "sort" ], "stl": [ "accumulate", "count", "find", "find_end", "includes", "inner_product", "is_permutation", "max_element", "merge", "next_permutation", "nth_element", "partial_sum", "partition", "partition_point", "prev_permutation", "reverse", "reverse_copy", "rotate", "rotate_copy", "saxpy", "search", "search_n", "set_difference", "set_intersection", "set_symmetric_difference", "set_union", "sort", "stable_partition", "unique", "unique_copy" ] } for other in vs: if not other in competitors: continue if not name in competitors[other]: continue for size in sizes: time = run_perf_process(name, size, other) report.add_sample(other, size, time) return report if __name__ == '__main__': test = "sort" if len(sys.argv) >= 2: test = sys.argv[1] print('running %s perf test' % test) sizes = [ pow(2, x) for x in range(1, 26) ] sizes = sorted(sizes) competitors = ["bolt", "tbb", "thrust", "stl"] report = run_benchmark(test, sizes, competitors) plot = None if "--plot-time" in sys.argv: plot = "time" elif "--plot-rate" in sys.argv: plot = "rate" if plot == "time": report.plot_time("compute") for competitor in competitors: report.plot_time(competitor) elif plot == "rate": report.plot_rate("compute") for competitor in competitors: report.plot_rate(competitor) if plot: pylab.legend(loc='upper left') pylab.show() else: report.display() compute-0.5/perf/perf_accumulate.cpp000066400000000000000000000105761263566244600176600ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include #include "perf.hpp" namespace po = boost::program_options; namespace compute = boost::compute; int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } template double perf_accumulate(const compute::vector& data, const size_t trials, compute::command_queue& queue) { perf_timer t; for(size_t trial = 0; trial < trials; trial++){ t.start(); compute::accumulate(data.begin(), data.end(), T(0), queue); queue.finish(); t.stop(); } return t.min_time(); } template void tune_accumulate(const compute::vector& data, const size_t trials, compute::command_queue& queue) { boost::shared_ptr params = compute::detail::parameter_cache::get_global_cache(queue.get_device()); const std::string cache_key = std::string("__boost_reduce_on_gpu_") + compute::type_name(); const compute::uint_ tpbs[] = { 4, 8, 16, 32, 64, 128, 256, 512, 1024 }; const compute::uint_ vpts[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; double min_time = (std::numeric_limits::max)(); compute::uint_ best_tpb = 0; compute::uint_ best_vpt = 0; for(size_t i = 0; i < sizeof(tpbs) / sizeof(*tpbs); i++){ params->set(cache_key, "tpb", tpbs[i]); for(size_t j = 0; j < sizeof(vpts) / sizeof(*vpts); j++){ params->set(cache_key, "vpt", vpts[j]); try { const double t = perf_accumulate(data, trials, queue); if(t < min_time){ best_tpb = tpbs[i]; best_vpt = vpts[j]; min_time = t; } } catch(compute::opencl_error&){ // invalid parameters for this device, skip } } } // store optimal parameters params->set(cache_key, "tpb", best_tpb); params->set(cache_key, "vpt", best_vpt); } int main(int argc, char *argv[]) { // setup command line arguments po::options_description options("options"); options.add_options() ("help", "show usage instructions") ("size", po::value()->default_value(8192), "input size") ("trials", po::value()->default_value(3), "number of trials to run") ("tune", "run tuning procedure") ; po::positional_options_description positional_options; positional_options.add("size", 1); // parse command line po::variables_map vm; po::store( po::command_line_parser(argc, argv) .options(options).positional(positional_options).run(), vm ); po::notify(vm); const size_t size = vm["size"].as(); const size_t trials = vm["trials"].as(); std::cout << "size: " << size << std::endl; // setup context and queue for the default device compute::device device = compute::system::default_device(); compute::context context(device); compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_data(size); std::generate(host_data.begin(), host_data.end(), rand_int); // create vector on the device and copy the data compute::vector device_data( host_data.begin(), host_data.end(), queue ); // run tuning proceure (if requested) if(vm.count("tune")){ tune_accumulate(device_data, trials, queue); } // run benchmark double t = perf_accumulate(device_data, trials, queue); std::cout << "time: " << t / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_bernoulli_distribution.cpp000066400000000000000000000025741263566244600223260ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" namespace compute = boost::compute; int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; compute::device device = compute::system::default_device(); compute::context context(device); compute::command_queue queue(context, device); compute::vector vector(PERF_N, context); compute::default_random_engine rng(queue); compute::bernoulli_distribution dist(0.5); perf_timer t; t.start(); dist.generate(vector.begin(), vector.end(), rng, queue); queue.finish(); t.stop(); std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_binary_find.cpp000066400000000000000000000043541263566244600200160ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); using boost::compute::_1; boost::compute::partition( device_vector.begin(), device_vector.end(), _1 < 20, queue ); // just to be sure everything is finished before measuring execution time // of binary_find algorithm queue.finish(); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); boost::compute::detail::binary_find( device_vector.begin(), device_vector.end(), _1 >= 20, queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_bolt_accumulate.cpp000066400000000000000000000030101263566244600206610ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include "perf.hpp" int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; bolt::cl::control ctrl = bolt::cl::control::getDefault(); ::cl::Device device = ctrl.getDevice(); std::cout << "device: " << device.getInfo() << std::endl; // create host vector std::vector host_vec = generate_random_vector(PERF_N); // create device vectors bolt::cl::device_vector device_vec(PERF_N); // transfer data to the device bolt::cl::copy(host_vec.begin(), host_vec.end(), device_vec.begin()); int sum = 0; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); sum = bolt::cl::reduce(device_vec.begin(), device_vec.end()); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "sum: " << sum << std::endl; return 0; } compute-0.5/perf/perf_bolt_count.cpp000066400000000000000000000032051263566244600176740ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; bolt::cl::control ctrl = bolt::cl::control::getDefault(); ::cl::Device device = ctrl.getDevice(); std::cout << "device: " << device.getInfo() << std::endl; // create vector of random numbers on the host std::vector h_vec(PERF_N); std::generate(h_vec.begin(), h_vec.end(), rand_int); // create device vector bolt::cl::device_vector d_vec(PERF_N); // transfer data to the device bolt::cl::copy(h_vec.begin(), h_vec.end(), d_vec.begin()); size_t count = 0; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); count = bolt::cl::count(ctrl, d_vec.begin(), d_vec.end(), 4); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "count: " << count << std::endl; return 0; } compute-0.5/perf/perf_bolt_exclusive_scan.cpp000066400000000000000000000030751263566244600215640ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include "perf.hpp" int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; bolt::cl::control ctrl = bolt::cl::control::getDefault(); ::cl::Device device = ctrl.getDevice(); std::cout << "device: " << device.getInfo() << std::endl; // create vector of random numbers on the host std::vector h_vec = generate_random_vector(PERF_N); // create device vector bolt::cl::device_vector d_vec(PERF_N); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ // transfer data to the device bolt::cl::copy(h_vec.begin(), h_vec.end(), d_vec.begin()); t.start(); bolt::cl::exclusive_scan(d_vec.begin(), d_vec.end(), d_vec.begin()); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // transfer data back to host bolt::cl::copy(d_vec.begin(), d_vec.end(), h_vec.begin()); return 0; } compute-0.5/perf/perf_bolt_fill.cpp000066400000000000000000000024041263566244600174720ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include "perf.hpp" int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; bolt::cl::control ctrl = bolt::cl::control::getDefault(); ::cl::Device device = ctrl.getDevice(); std::cout << "device: " << device.getInfo() << std::endl; // create device vector (filled with zeros) bolt::cl::device_vector d_vec(PERF_N, 0); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); bolt::cl::fill(d_vec.begin(), d_vec.end(), int(trial)); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_bolt_inner_product.cpp000066400000000000000000000033761263566244600214300ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include "perf.hpp" int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; bolt::cl::control ctrl = bolt::cl::control::getDefault(); ::cl::Device device = ctrl.getDevice(); std::cout << "device: " << device.getInfo() << std::endl; // create host vectors std::vector host_x = generate_random_vector(PERF_N); std::vector host_y = generate_random_vector(PERF_N); // create device vectors bolt::cl::device_vector device_x(PERF_N); bolt::cl::device_vector device_y(PERF_N); // transfer data to the device bolt::cl::copy(host_x.begin(), host_x.end(), device_x.begin()); bolt::cl::copy(host_y.begin(), host_y.end(), device_y.begin()); int product = 0; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); product = bolt::cl::inner_product( device_x.begin(), device_x.end(), device_y.begin(), 0 ); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "product: " << product << std::endl; return 0; } compute-0.5/perf/perf_bolt_max_element.cpp000066400000000000000000000040441263566244600210440ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast(rand() % 10000000); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; bolt::cl::control ctrl = bolt::cl::control::getDefault(); ::cl::Device device = ctrl.getDevice(); std::cout << "device: " << device.getInfo() << std::endl; // create host vector std::vector host_vec = generate_random_vector(PERF_N); // create device vectors bolt::cl::device_vector device_vec(PERF_N); // transfer data to the device bolt::cl::copy(host_vec.begin(), host_vec.end(), device_vec.begin()); bolt::cl::device_vector::iterator max_iter = device_vec.begin(); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); max_iter = bolt::cl::max_element(device_vec.begin(), device_vec.end()); t.stop(); } int device_max = *max_iter; std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "max: " << device_max << std::endl; // verify max is correct int host_max = *std::max_element(host_vec.begin(), host_vec.end()); if(device_max != host_max){ std::cout << "ERROR: " << "device_max (" << device_max << ") " << "!= " << "host_max (" << host_max << ")" << std::endl; return -1; } return 0; } compute-0.5/perf/perf_bolt_merge.cpp000066400000000000000000000037551263566244600176550ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include "perf.hpp" int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; bolt::cl::control ctrl = bolt::cl::control::getDefault(); ::cl::Device device = ctrl.getDevice(); std::cout << "device: " << device.getInfo() << std::endl; // create vector of random numbers on the host std::vector host_vec1 = generate_random_vector(std::floor(PERF_N / 2.0)); std::vector host_vec2 = generate_random_vector(std::ceil(PERF_N / 2.0)); // sort them std::sort(host_vec1.begin(), host_vec1.end()); std::sort(host_vec2.begin(), host_vec2.end()); // create device vectors bolt::cl::device_vector device_vec1(PERF_N); bolt::cl::device_vector device_vec2(PERF_N); bolt::cl::device_vector device_vec3(PERF_N); // transfer data to the device bolt::cl::copy(host_vec1.begin(), host_vec1.end(), device_vec1.begin()); bolt::cl::copy(host_vec2.begin(), host_vec2.end(), device_vec2.begin()); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); bolt::cl::merge( device_vec1.begin(), device_vec1.end(), device_vec2.begin(), device_vec2.end(), device_vec3.begin() ); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_bolt_partial_sum.cpp000066400000000000000000000030761263566244600210720ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include "perf.hpp" int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; bolt::cl::control ctrl = bolt::cl::control::getDefault(); ::cl::Device device = ctrl.getDevice(); std::cout << "device: " << device.getInfo() << std::endl; // create vector of random numbers on the host std::vector h_vec = generate_random_vector(PERF_N); // create device vector bolt::cl::device_vector d_vec(PERF_N); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ // transfer data to the device bolt::cl::copy(h_vec.begin(), h_vec.end(), d_vec.begin()); t.start(); bolt::cl::inclusive_scan(d_vec.begin(), d_vec.end(), d_vec.begin()); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // transfer data back to host bolt::cl::copy(d_vec.begin(), d_vec.end(), h_vec.begin()); return 0; } compute-0.5/perf/perf_bolt_reduce_by_key.cpp000066400000000000000000000061151263566244600213600ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } struct unique_key { int current; int avgValuesNoPerKey; unique_key() { current = 0; avgValuesNoPerKey = 512; } int operator()() { double p = double(1.0) / static_cast(avgValuesNoPerKey); if((rand() / double(RAND_MAX)) <= p) return ++current; return current; } } UniqueKey; int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; bolt::cl::control ctrl = bolt::cl::control::getDefault(); ::cl::Device device = ctrl.getDevice(); std::cout << "device: " << device.getInfo() << std::endl; // create vector of keys and random values std::vector host_keys(PERF_N); std::vector host_values(PERF_N); std::generate(host_keys.begin(), host_keys.end(), UniqueKey); std::generate(host_values.begin(), host_values.end(), rand_int); // create device vectors for data bolt::cl::device_vector device_keys(PERF_N); bolt::cl::device_vector device_values(PERF_N); // transfer data to the device bolt::cl::copy(host_keys.begin(), host_keys.end(), device_keys.begin()); bolt::cl::copy(host_values.begin(), host_values.end(), device_values.begin()); // create device vectors for the results bolt::cl::device_vector device_keys_results(PERF_N); bolt::cl::device_vector device_values_results(PERF_N); typedef bolt::cl::device_vector::iterator iterType; bolt::cl::pair result = { device_keys_results.begin(), device_values_results.begin() }; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); result = bolt::cl::reduce_by_key(device_keys.begin(), device_keys.end(), device_values.begin(), device_keys_results.begin(), device_values_results.begin()); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; size_t result_size = bolt::cl::distance(device_keys_results.begin(), result.first); if(result_size != static_cast(host_keys[PERF_N-1] + 1)){ std::cout << "ERROR: " << "wrong number of keys" << std::endl; return -1; } return 0; } compute-0.5/perf/perf_bolt_saxpy.cpp000066400000000000000000000043111263566244600177070ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include "perf.hpp" BOLT_FUNCTOR(saxpy_functor, struct saxpy_functor { float _a; saxpy_functor(float a) : _a(a) {}; float operator() (const float &x, const float &y) const { return _a * x + y; }; }; ) int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; bolt::cl::control ctrl = bolt::cl::control::getDefault(); ::cl::Device device = ctrl.getDevice(); std::cout << "device: " << device.getInfo() << std::endl; // create host vectors std::vector host_x(PERF_N); std::vector host_y(PERF_N); std::generate(host_x.begin(), host_x.end(), rand); std::generate(host_y.begin(), host_y.end(), rand); // create device vectors bolt::cl::device_vector device_x(PERF_N); bolt::cl::device_vector device_y(PERF_N); // transfer data to the device bolt::cl::copy(host_x.begin(), host_x.end(), device_x.begin()); bolt::cl::copy(host_y.begin(), host_y.end(), device_y.begin()); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); bolt::cl::transform( device_x.begin(), device_x.end(), device_y.begin(), device_y.begin(), saxpy_functor(2.5f) ); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // transfer data back to host bolt::cl::copy(device_x.begin(), device_x.end(), host_x.begin()); bolt::cl::copy(device_y.begin(), device_y.end(), host_y.begin()); return 0; } compute-0.5/perf/perf_bolt_sort.cpp000066400000000000000000000027461263566244600175440ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include "perf.hpp" int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; ::cl::Device device = bolt::cl::control::getDefault().getDevice(); std::cout << "device: " << device.getInfo() << std::endl; // create host vector std::vector h_vec = generate_random_vector(PERF_N); // create device vector bolt::cl::device_vector d_vec(PERF_N); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ // transfer data to the device bolt::cl::copy(h_vec.begin(), h_vec.end(), d_vec.begin()); t.start(); bolt::cl::sort(d_vec.begin(), d_vec.end()); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // transfer data back to host bolt::cl::copy(d_vec.begin(), d_vec.end(), h_vec.begin()); return 0; } compute-0.5/perf/perf_cart_to_polar.cpp000066400000000000000000000106021263566244600203530ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define _USE_MATH_DEFINES #include #include #include #include #include #include #include #include #include "perf.hpp" namespace compute = boost::compute; using compute::float2_; float rand_float() { return (float(rand()) / float(RAND_MAX)) * 1000.f; } void serial_cartesian_to_polar(const float *input, size_t n, float *output) { for(size_t i = 0; i < n; i++){ float x = input[i*2+0]; float y = input[i*2+1]; float magnitude = std::sqrt(x*x + y*y); float angle = std::atan2(y, x) * 180.f / M_PI; output[i*2+0] = magnitude; output[i*2+1] = angle; } } void serial_polar_to_cartesian(const float *input, size_t n, float *output) { for(size_t i = 0; i < n; i++){ float magnitude = input[i*2+0]; float angle = input[i*2+1]; float x = magnitude * cos(angle); float y = magnitude * sin(angle); output[i*2+0] = x; output[i*2+1] = y; } } // converts from cartesian coordinates (x, y) to polar coordinates (magnitude, angle) BOOST_COMPUTE_FUNCTION(float2_, cartesian_to_polar, (float2_ p), { float x = p.x; float y = p.y; float magnitude = sqrt(x*x + y*y); float angle = atan2(y, x) * 180.f / M_PI; return (float2)(magnitude, angle); }); // converts from polar coordinates (magnitude, angle) to cartesian coordinates (x, y) BOOST_COMPUTE_FUNCTION(float2_, polar_to_cartesian, (float2_ p), { float magnitude = p.x; float angle = p.y; float x = magnitude * cos(angle); float y = magnitude * sin(angle); return (float2)(x, y) }); int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device compute::device device = compute::system::default_device(); compute::context context(device); compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N*2); std::generate(host_vector.begin(), host_vector.end(), rand_float); // create vector on the device and copy the data compute::vector device_vector(PERF_N, context); compute::copy_n( reinterpret_cast(&host_vector[0]), PERF_N, device_vector.begin(), queue ); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); compute::transform( device_vector.begin(), device_vector.end(), device_vector.begin(), cartesian_to_polar, queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // perform saxpy on host t.clear(); for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); serial_cartesian_to_polar(&host_vector[0], PERF_N, &host_vector[0]); t.stop(); } std::cout << "host time: " << t.min_time() / 1e6 << " ms" << std::endl; std::vector device_data(PERF_N*2); compute::copy( device_vector.begin(), device_vector.end(), reinterpret_cast(&device_data[0]), queue ); for(size_t i = 0; i < PERF_N; i++){ float host_value = host_vector[i]; float device_value = device_data[i]; if(std::abs(device_value - host_value) > 1e-3){ std::cout << "ERROR: " << "value at " << i << " " << "device_value (" << device_value << ") " << "!= " << "host_value (" << host_value << ")" << std::endl; return -1; } } return 0; } compute-0.5/perf/perf_copy_if.cpp000066400000000000000000000101231263566244600171510ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" namespace compute = boost::compute; void test_copy_if_odd(compute::command_queue &queue) { // create input and output vectors on the device const compute::context &context = queue.get_context(); compute::vector input(PERF_N, context); compute::vector output(PERF_N, context); // generate random numbers between 1 and 10 compute::default_random_engine rng(queue); compute::uniform_int_distribution d(1, 10); d.generate(input.begin(), input.end(), rng, queue); BOOST_COMPUTE_FUNCTION(bool, is_odd, (int x), { return x & 1; }); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); compute::vector::iterator i = compute::copy_if( input.begin(), input.end(), output.begin(), is_odd, queue ); queue.finish(); t.stop(); float ratio = float(std::distance(output.begin(), i)) / PERF_N; if(PERF_N > 1000 && (ratio < 0.45f || ratio > 0.55f)){ std::cerr << "error: ratio is " << ratio << std::endl; std::cerr << "error: ratio should be around 45-55%" << std::endl; } } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; } void test_copy_if_in_sphere(compute::command_queue &queue) { using boost::compute::float4_; // create input and output vectors on the device const compute::context &context = queue.get_context(); compute::vector input_points(PERF_N, context); compute::vector output_points(PERF_N, context); // generate random numbers in a cube float radius = 5.0f; compute::default_random_engine rng(queue); compute::uniform_real_distribution d(-radius, +radius); d.generate( compute::make_buffer_iterator(input_points.get_buffer(), 0), compute::make_buffer_iterator(input_points.get_buffer(), PERF_N * 4), rng, queue ); // predicate which returns true if the point lies within the sphere BOOST_COMPUTE_CLOSURE(bool, is_in_sphere, (float4_ point), (radius), { // ignore fourth component point.w = 0; return length(point) < radius; }); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); compute::vector::iterator i = compute::copy_if( input_points.begin(), input_points.end(), output_points.begin(), is_in_sphere, queue ); queue.finish(); t.stop(); float ratio = float(std::distance(output_points.begin(), i)) / PERF_N; if(PERF_N > 1000 && (ratio < 0.5f || ratio > 0.6f)){ std::cerr << "error: ratio is " << ratio << std::endl; std::cerr << "error: ratio should be around 50-60%" << std::endl; } } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; test_copy_if_odd(queue); return 0; } compute-0.5/perf/perf_copy_to_device.cpp000066400000000000000000000034261263566244600205240ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include int main(int argc, char *argv[]) { size_t size = 1000; if(argc >= 2){ size = boost::lexical_cast(argv[1]); } boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue::properties properties = boost::compute::command_queue::enable_profiling; boost::compute::command_queue queue(context, device, properties); std::vector host_vector(size); std::generate(host_vector.begin(), host_vector.end(), rand); boost::compute::vector device_vector(host_vector.size(), context); boost::compute::future future = boost::compute::copy_async(host_vector.begin(), host_vector.end(), device_vector.begin(), queue); // wait for copy to finish future.wait(); // get elapsed time in nanoseconds size_t elapsed = future.get_event().duration().count(); std::cout << "time: " << elapsed / 1e6 << " ms" << std::endl; float rate = (float(size * sizeof(int)) / elapsed) * 1000.f; std::cout << "rate: " << rate << " MB/s" << std::endl; return 0; } compute-0.5/perf/perf_count.cpp000066400000000000000000000045661263566244600166670ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); size_t count = 0; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); count = boost::compute::count( device_vector.begin(), device_vector.end(), 4, queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "count: " << count << std::endl; // verify count is correct size_t host_count = std::count(host_vector.begin(), host_vector.end(), 4); if(count != host_count){ std::cout << "ERROR: " << "device_count (" << count << ") " << "!= " << "host_count (" << host_count << ")" << std::endl; return -1; } return 0; } compute-0.5/perf/perf_discrete_distribution.cpp000066400000000000000000000026711263566244600221330ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" namespace compute = boost::compute; int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; compute::device device = compute::system::default_device(); compute::context context(device); compute::command_queue queue(context, device); compute::vector vector(PERF_N, context); int weights[] = {1, 1}; compute::default_random_engine rng(queue); compute::discrete_distribution dist(weights, weights+2); perf_timer t; t.start(); dist.generate(vector.begin(), vector.end(), rng, queue); queue.finish(); t.stop(); std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_erase_remove.cpp000066400000000000000000000035271263566244600202070ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 10.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ boost::compute::vector device_vector( host_vector.begin(), host_vector.end(), queue ); t.start(); device_vector.erase( boost::compute::remove( device_vector.begin(), device_vector.end(), 4, queue ), device_vector.end(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_exclusive_scan.cpp000066400000000000000000000053621263566244600205450ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Benoit // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector device_vector(PERF_N, context); boost::compute::vector device_res(PERF_N,context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); // sum vector perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); t.start(); boost::compute::exclusive_scan( device_vector.begin(), device_vector.end(), device_res.begin(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // verify sum is correct std::partial_sum( host_vector.begin(), host_vector.end(), host_vector.begin() ); int device_sum = device_res.back(); // when scan is exclusive values are shifted by one on the left // compared to a inclusive scan int host_sum = host_vector[host_vector.size()-2]; if(device_sum != host_sum){ std::cout << "ERROR: " << "device_sum (" << device_sum << ") " << "!= " << "host_sum (" << host_sum << ")" << std::endl; return -1; } return 0; } compute-0.5/perf/perf_fill.cpp000066400000000000000000000026151263566244600164560ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include "perf.hpp" int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector on the device (filled with zeros) boost::compute::vector vec(PERF_N, 0, queue); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); boost::compute::fill(vec.begin(), vec.end(), int(trial), queue); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_find.cpp000066400000000000000000000057741263566244600164610ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include "perf.hpp" // Max integer that can be generated by rand_int() function. int rand_int_max = 25; int rand_int() { return static_cast((rand() / double(RAND_MAX)) * rand_int_max); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); // trying to find element that isn't in vector (worst-case scenario) int wanted = rand_int_max + 1; // device iterator boost::compute::vector::iterator device_result_it; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); device_result_it = boost::compute::find(device_vector.begin(), device_vector.end(), wanted, queue); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // verify if found index is correct by comparing it with std::find() result size_t host_result_index = std::distance(host_vector.begin(), std::find(host_vector.begin(), host_vector.end(), wanted)); size_t device_result_index = device_result_it.get_index(); if(device_result_index != host_result_index){ std::cout << "ERROR: " << "device_result_index (" << device_result_index << ") " << "!= " << "host_result_index (" << host_result_index << ")" << std::endl; return -1; } return 0; } compute-0.5/perf/perf_find_end.cpp000066400000000000000000000040361263566244600172750ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); int pattern[] = {2, 6, 6, 7, 8, 4}; // create vector on the device and copy the data boost::compute::vector device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); boost::compute::vector pattern_vector(pattern, pattern + 6, queue); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); boost::compute::find_end( device_vector.begin(), device_vector.end(), pattern_vector.begin(), pattern_vector.end(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_host_sort.cpp000066400000000000000000000041341263566244600175520ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include #include "perf.hpp" int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector random_vector(PERF_N); std::generate(random_vector.begin(), random_vector.end(), rand); // create input vector for gpu std::vector gpu_vector = random_vector; // sort vector on gpu boost::timer::cpu_timer t; boost::compute::sort( gpu_vector.begin(), gpu_vector.end(), queue ); queue.finish(); std::cout << "time: " << t.elapsed().wall / 1e6 << " ms" << std::endl; // create input vector for host std::vector host_vector = random_vector; // sort vector on host t.start(); std::sort(host_vector.begin(), host_vector.end()); std::cout << "host time: " << t.elapsed().wall / 1e6 << " ms" << std::endl; // ensure that both sorted vectors are equal if(!std::equal(gpu_vector.begin(), gpu_vector.end(), host_vector.begin())){ std::cerr << "ERROR: sorted vectors not the same" << std::endl; return -1; } return 0; } compute-0.5/perf/perf_includes.cpp000066400000000000000000000042351263566244600173360ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vectors of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); std::sort(host_vector.begin(), host_vector.end()); // create vectors on the device and copy the data boost::compute::vector device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); boost::compute::vector device_vector2(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector2.begin(), queue ); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); boost::compute::includes( device_vector.begin(), device_vector.end(), device_vector2.begin(), device_vector2.end(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_inner_product.cpp000066400000000000000000000044711263566244600204050ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; std::vector h1(PERF_N); std::vector h2(PERF_N); std::generate(h1.begin(), h1.end(), rand_int); std::generate(h2.begin(), h2.end(), rand_int); // create vector on the device and copy the data boost::compute::vector d1(PERF_N, context); boost::compute::vector d2(PERF_N, context); boost::compute::copy(h1.begin(), h1.end(), d1.begin(), queue); boost::compute::copy(h2.begin(), h2.end(), d2.begin(), queue); int product = 0; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); product = boost::compute::inner_product( d1.begin(), d1.end(), d2.begin(), int(0), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // verify product is correct int host_product = std::inner_product( h1.begin(), h1.end(), h2.begin(), int(0) ); if(product != host_product){ std::cout << "ERROR: " << "device_product (" << product << ") " << "!= " << "host_product (" << host_product << ")" << std::endl; return -1; } return 0; } compute-0.5/perf/perf_is_permutation.cpp000066400000000000000000000041461263566244600205730ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); boost::compute::vector device_vector2(PERF_N, context); boost::compute::copy( host_vector.rbegin(), host_vector.rend(), device_vector2.begin(), queue ); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); boost::compute::is_permutation( device_vector.begin(), device_vector.end(), device_vector2.begin(), device_vector2.end(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_is_sorted.cpp000066400000000000000000000042431263566244600175220ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include #include "perf.hpp" int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand); // create vector on the device and copy the data boost::compute::vector device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); // sort and then reverse the random vector boost::compute::sort(device_vector.begin(), device_vector.end(), queue); boost::compute::reverse(device_vector.begin(), device_vector.end(), queue); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); bool sorted = boost::compute::is_sorted( device_vector.begin(), device_vector.end(), queue ); queue.finish(); t.stop(); if(sorted){ std::cerr << "ERROR: is_sorted() returned true" << std::endl; } } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_max_element.cpp000066400000000000000000000056771263566244600200410ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Rastko Anicic // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast(rand() % 10000000); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); boost::compute::vector::iterator device_max_iter = device_vector.begin(); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); device_max_iter = boost::compute::max_element( device_vector.begin(), device_vector.end(), queue ); queue.finish(); t.stop(); } int device_max = device_max_iter.read(queue); std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "max: " << device_max << std::endl; // verify max is correct std::vector::iterator host_max_iter = std::max_element(host_vector.begin(), host_vector.end()); int host_max = *host_max_iter; if(device_max != host_max){ std::cout << "ERROR: " << "device_max (" << device_max << ") " << "!= " << "host_max (" << host_max << ")" << std::endl; return -1; } size_t host_max_idx = std::distance(host_vector.begin(), host_max_iter); size_t device_max_idx = std::distance(device_vector.begin(), device_max_iter); if(device_max_idx != host_max_idx){ std::cout << "ERROR: " << "device_max index (" << device_max_idx << ") " << "!= " << "host_max index (" << host_max_idx << ")" << std::endl; return -1; } return 0; } compute-0.5/perf/perf_merge.cpp000066400000000000000000000044351263566244600166310ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; std::vector v1 = generate_random_vector(std::floor(PERF_N / 2.0)); std::vector v2 = generate_random_vector(std::ceil(PERF_N / 2.0)); std::vector v3(PERF_N); std::sort(v1.begin(), v1.end()); std::sort(v2.begin(), v2.end()); boost::compute::vector gpu_v1(v1.begin(), v1.end(), queue); boost::compute::vector gpu_v2(v2.begin(), v2.end(), queue); boost::compute::vector gpu_v3(PERF_N, context); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); boost::compute::merge(gpu_v1.begin(), gpu_v1.end(), gpu_v2.begin(), gpu_v2.end(), gpu_v3.begin(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::vector check_v3(PERF_N); boost::compute::copy(gpu_v3.begin(), gpu_v3.end(), check_v3.begin(), queue); queue.finish(); std::merge(v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin()); bool ok = std::equal(check_v3.begin(), check_v3.end(), v3.begin()); if(!ok){ std::cerr << "ERROR: merged ranges different" << std::endl; return -1; } return 0; } compute-0.5/perf/perf_next_permutation.cpp000066400000000000000000000041641263566244600211360ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); std::sort(host_vector.begin(), host_vector.end(), std::greater()); // create vector on the device and copy the data boost::compute::vector device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); boost::compute::next_permutation( device_vector.begin(), device_vector.end(), queue ); queue.finish(); t.stop(); boost::compute::prev_permutation( device_vector.begin(), device_vector.end(), queue ); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_nth_element.cpp000066400000000000000000000036251263566244600200340ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); boost::compute::nth_element( device_vector.begin(), device_vector.begin()+(PERF_N/2), device_vector.end(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_partial_sum.cpp000066400000000000000000000052251263566244600200500ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector device_vector(PERF_N, context); boost::compute::vector device_res(PERF_N,context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); // sum vector perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); t.start(); boost::compute::partial_sum( device_vector.begin(), device_vector.end(), device_res.begin(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // verify sum is correct std::partial_sum( host_vector.begin(), host_vector.end(), host_vector.begin() ); int device_sum = device_res.back(); int host_sum = host_vector.back(); if(device_sum != host_sum){ std::cout << "ERROR: " << "device_sum (" << device_sum << ") " << "!= " << "host_sum (" << host_sum << ")" << std::endl; return -1; } return 0; } compute-0.5/perf/perf_partition.cpp000066400000000000000000000040221263566244600175330ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); using boost::compute::_1; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); t.start(); boost::compute::partition( device_vector.begin(), device_vector.end(), _1 < 10, queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_partition_point.cpp000066400000000000000000000041421263566244600207470ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); using boost::compute::_1; boost::compute::partition( device_vector.begin(), device_vector.end(), _1 < 20, queue ); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); boost::compute::partition_point( device_vector.begin(), device_vector.end(), _1 < 20, queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_prev_permutation.cpp000066400000000000000000000041371263566244600211340ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); std::sort(host_vector.begin(), host_vector.end()); // create vector on the device and copy the data boost::compute::vector device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); boost::compute::prev_permutation( device_vector.begin(), device_vector.end(), queue ); queue.finish(); t.stop(); boost::compute::next_permutation( device_vector.begin(), device_vector.end(), queue ); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_random_number_engine.cpp000066400000000000000000000064621263566244600217110ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include "perf.hpp" namespace compute = boost::compute; namespace po = boost::program_options; template void perf_random_number_engine(const size_t size, const size_t trials, compute::command_queue& queue) { typedef typename Engine::result_type T; // create random number engine Engine engine(queue); // create vector on the device std::cout << "size = " << size << std::endl; compute::vector vector(size, queue.get_context()); // generate random numbers perf_timer t; for(size_t i = 0; i < trials; i++){ t.start(); engine.generate(vector.begin(), vector.end(), queue); queue.finish(); t.stop(); } // print result std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "rate: " << perf_rate(size, t.min_time()) << " MB/s" << std::endl; } int main(int argc, char *argv[]) { // setup and parse command line options po::options_description options("options"); options.add_options() ("help", "show usage instructions") ("size", po::value()->default_value(8192), "number of values") ("trials", po::value()->default_value(3), "number of trials") ("engine", po::value()->default_value("default_random_engine"), "random number engine") ; po::variables_map vm; po::store(po::parse_command_line(argc, argv, options), vm); po::notify(vm); if(vm.count("help")) { std::cout << options << std::endl; return 0; } // setup context and queue for the default device compute::device device = compute::system::default_device(); compute::context context(device); compute::command_queue queue(context, device); // get command line options const size_t size = vm["size"].as(); const size_t trials = vm["trials"].as(); const std::string& engine = vm["engine"].as(); // run benchmark if(engine == "default_random_engine"){ perf_random_number_engine(size, trials, queue); } else if(engine == "mersenne_twister_engine"){ perf_random_number_engine(size, trials, queue); } else if(engine == "linear_congruential_engine"){ perf_random_number_engine >(size, trials, queue); } else if(engine == "threefry_engine"){ perf_random_number_engine >(size, trials, queue); } else { std::cerr << "error: unknown random number engine '" << engine << "'" << std::endl; return -1; } return 0; } compute-0.5/perf/perf_reduce_by_key.cpp000066400000000000000000000070461263566244600203440ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } struct unique_key { int current; int avgValuesNoPerKey; unique_key() { current = 0; avgValuesNoPerKey = 512; } int operator()() { double p = double(1.0) / static_cast(avgValuesNoPerKey); if((rand() / double(RAND_MAX)) <= p) return ++current; return current; } } UniqueKey; int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of keys and random values std::vector host_keys(PERF_N); std::vector host_values(PERF_N); std::generate(host_keys.begin(), host_keys.end(), UniqueKey); std::generate(host_values.begin(), host_values.end(), rand_int); // create vectors for keys and values on the device and copy the data boost::compute::vector device_keys(PERF_N, context); boost::compute::vector device_values(PERF_N,context); boost::compute::copy( host_keys.begin(), host_keys.end(), device_keys.begin(), queue ); boost::compute::copy( host_values.begin(), host_values.end(), device_values.begin(), queue ); // vectors for the results boost::compute::vector device_keys_results(PERF_N, context); boost::compute::vector device_values_results(PERF_N,context); typedef boost::compute::vector::iterator iterType; std::pair result( device_keys_results.begin(), device_values_results.begin() ); // reduce by key perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); result = boost::compute::reduce_by_key(device_keys.begin(), device_keys.end(), device_values.begin(), device_keys_results.begin(), device_values_results.begin(), queue); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; size_t result_size = std::distance(device_keys_results.begin(), result.first); if(result_size != static_cast(host_keys[PERF_N-1] + 1)){ std::cout << "ERROR: " << "wrong number of keys" << result_size << "\n" << (host_keys[PERF_N-1] + 1) << std::endl; return -1; } return 0; } compute-0.5/perf/perf_reverse.cpp000066400000000000000000000035551263566244600172070ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); boost::compute::reverse( device_vector.begin(), device_vector.end(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_reverse_copy.cpp000066400000000000000000000040511263566244600202310ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); // create vector on the device for reversed data boost::compute::vector device_reversed_vector(PERF_N, context); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); boost::compute::reverse_copy( device_vector.begin(), device_vector.end(), device_reversed_vector.begin(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_rotate.cpp000066400000000000000000000036131263566244600170250ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); boost::compute::rotate( device_vector.begin(), device_vector.begin()+(PERF_N/2), device_vector.end(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_rotate_copy.cpp000066400000000000000000000037571263566244600200700ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); boost::compute::vector device_vector2(PERF_N, context); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); boost::compute::rotate_copy( device_vector.begin(), device_vector.begin()+(PERF_N/2), device_vector.end(), device_vector2.begin(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_saxpy.cpp000066400000000000000000000121731263566244600166740ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include #include #include "perf.hpp" namespace po = boost::program_options; namespace compute = boost::compute; float rand_float() { return (float(rand()) / float(RAND_MAX)) * 1000.f; } template double perf_saxpy(const compute::vector& x, const compute::vector& y, const T alpha, const size_t trials, compute::command_queue& queue) { // create vector on the device to store the result compute::vector result(x.size(), queue.get_context()); perf_timer t; for(size_t trial = 0; trial < trials; trial++){ compute::fill(result.begin(), result.end(), T(0), queue); t.start(); using compute::lambda::_1; using compute::lambda::_2; compute::transform( x.begin(), x.end(), y.begin(), result.begin(), alpha * _1 + _2, queue ); queue.finish(); t.stop(); } return t.min_time(); } template void tune_saxpy(const compute::vector& x, const compute::vector& y, const T alpha, const size_t trials, compute::command_queue& queue) { boost::shared_ptr params = compute::detail::parameter_cache::get_global_cache(queue.get_device()); const std::string cache_key = std::string("__boost_copy_kernel_") + boost::lexical_cast(sizeof(T)); const compute::uint_ tpbs[] = { 4, 8, 16, 32, 64, 128, 256, 512, 1024 }; const compute::uint_ vpts[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; double min_time = (std::numeric_limits::max)(); compute::uint_ best_tpb = 0; compute::uint_ best_vpt = 0; for(size_t i = 0; i < sizeof(tpbs) / sizeof(*tpbs); i++){ params->set(cache_key, "tpb", tpbs[i]); for(size_t j = 0; j < sizeof(vpts) / sizeof(*vpts); j++){ params->set(cache_key, "vpt", vpts[j]); try { const double t = perf_saxpy(x, y, alpha, trials, queue); if(t < min_time){ best_tpb = tpbs[i]; best_vpt = vpts[j]; min_time = t; } } catch(compute::opencl_error&){ // invalid parameters for this device, skip } } } // store optimal parameters params->set(cache_key, "tpb", best_tpb); params->set(cache_key, "vpt", best_vpt); } int main(int argc, char *argv[]) { // setup command line arguments po::options_description options("options"); options.add_options() ("help", "show usage instructions") ("size", po::value()->default_value(8192), "input size") ("trials", po::value()->default_value(3), "number of trials to run") ("tune", "run tuning procedure") ("alpha", po::value()->default_value(2.5), "saxpy alpha value") ; po::positional_options_description positional_options; positional_options.add("size", 1); // parse command line po::variables_map vm; po::store( po::command_line_parser(argc, argv) .options(options).positional(positional_options).run(), vm ); po::notify(vm); const size_t size = vm["size"].as(); const size_t trials = vm["trials"].as(); const float alpha = vm["alpha"].as(); std::cout << "size: " << size << std::endl; // setup context and queue for the default device compute::device device = boost::compute::system::default_device(); compute::context context(device); compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_x(size); std::vector host_y(size); std::generate(host_x.begin(), host_x.end(), rand_float); std::generate(host_y.begin(), host_y.end(), rand_float); // create vector on the device and copy the data compute::vector x(host_x.begin(), host_x.end(), queue); compute::vector y(host_y.begin(), host_y.end(), queue); // run tuning proceure (if requested) if(vm.count("tune")){ tune_saxpy(x, y, alpha, trials, queue); } // run benchmark double t = perf_saxpy(x, y, alpha, trials, queue); std::cout << "time: " << t / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_search.cpp000066400000000000000000000040321263566244600167700ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); int pattern[] = {2, 6, 6, 7, 8, 4}; // create vector on the device and copy the data boost::compute::vector device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); boost::compute::vector pattern_vector(pattern, pattern + 6, queue); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); boost::compute::search( device_vector.begin(), device_vector.end(), pattern_vector.begin(), pattern_vector.end(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_search_n.cpp000066400000000000000000000035771263566244600173220ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); boost::compute::search_n( device_vector.begin(), device_vector.end(), 5, 2, queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_set_difference.cpp000066400000000000000000000046751263566244600205050ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vectors of random numbers on the host std::vector v1(std::floor(PERF_N / 2.0)); std::vector v2(std::ceil(PERF_N / 2.0)); std::generate(v1.begin(), v1.end(), rand_int); std::generate(v2.begin(), v2.end(), rand_int); std::sort(v1.begin(), v1.end()); std::sort(v2.begin(), v2.end()); // create vectors on the device and copy the data boost::compute::vector gpu_v1(std::floor(PERF_N / 2.0), context); boost::compute::vector gpu_v2(std::ceil(PERF_N / 2.0), context); boost::compute::copy( v1.begin(), v1.end(), gpu_v1.begin(), queue ); boost::compute::copy( v2.begin(), v2.end(), gpu_v2.begin(), queue ); boost::compute::vector gpu_v3(PERF_N, context); boost::compute::vector::iterator gpu_v3_end; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); gpu_v3_end = boost::compute::set_difference( gpu_v1.begin(), gpu_v1.end(), gpu_v2.begin(), gpu_v2.end(), gpu_v3.begin(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "size: " << std::distance(gpu_v3.begin(), gpu_v3_end) << std::endl; return 0; } compute-0.5/perf/perf_set_intersection.cpp000066400000000000000000000047011263566244600211070ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vectors of random numbers on the host std::vector v1(std::floor(PERF_N / 2.0)); std::vector v2(std::ceil(PERF_N / 2.0)); std::generate(v1.begin(), v1.end(), rand_int); std::generate(v2.begin(), v2.end(), rand_int); std::sort(v1.begin(), v1.end()); std::sort(v2.begin(), v2.end()); // create vectors on the device and copy the data boost::compute::vector gpu_v1(std::floor(PERF_N / 2.0), context); boost::compute::vector gpu_v2(std::ceil(PERF_N / 2.0), context); boost::compute::copy( v1.begin(), v1.end(), gpu_v1.begin(), queue ); boost::compute::copy( v2.begin(), v2.end(), gpu_v2.begin(), queue ); boost::compute::vector gpu_v3(PERF_N, context); boost::compute::vector::iterator gpu_v3_end; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); gpu_v3_end = boost::compute::set_intersection( gpu_v1.begin(), gpu_v1.end(), gpu_v2.begin(), gpu_v2.end(), gpu_v3.begin(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "size: " << std::distance(gpu_v3.begin(), gpu_v3_end) << std::endl; return 0; } compute-0.5/perf/perf_set_symmetric_difference.cpp000066400000000000000000000047211263566244600225710ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vectors of random numbers on the host std::vector v1(std::floor(PERF_N / 2.0)); std::vector v2(std::ceil(PERF_N / 2.0)); std::generate(v1.begin(), v1.end(), rand_int); std::generate(v2.begin(), v2.end(), rand_int); std::sort(v1.begin(), v1.end()); std::sort(v2.begin(), v2.end()); // create vectors on the device and copy the data boost::compute::vector gpu_v1(std::floor(PERF_N / 2.0), context); boost::compute::vector gpu_v2(std::ceil(PERF_N / 2.0), context); boost::compute::copy( v1.begin(), v1.end(), gpu_v1.begin(), queue ); boost::compute::copy( v2.begin(), v2.end(), gpu_v2.begin(), queue ); boost::compute::vector gpu_v3(PERF_N, context); boost::compute::vector::iterator gpu_v3_end; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); gpu_v3_end = boost::compute::set_symmetric_difference( gpu_v1.begin(), gpu_v1.end(), gpu_v2.begin(), gpu_v2.end(), gpu_v3.begin(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "size: " << std::distance(gpu_v3.begin(), gpu_v3_end) << std::endl; return 0; } compute-0.5/perf/perf_set_union.cpp000066400000000000000000000046631263566244600175400ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vectors of random numbers on the host std::vector v1(std::floor(PERF_N / 2.0)); std::vector v2(std::ceil(PERF_N / 2.0)); std::generate(v1.begin(), v1.end(), rand_int); std::generate(v2.begin(), v2.end(), rand_int); std::sort(v1.begin(), v1.end()); std::sort(v2.begin(), v2.end()); // create vectors on the device and copy the data boost::compute::vector gpu_v1(std::floor(PERF_N / 2.0), context); boost::compute::vector gpu_v2(std::ceil(PERF_N / 2.0), context); boost::compute::copy( v1.begin(), v1.end(), gpu_v1.begin(), queue ); boost::compute::copy( v2.begin(), v2.end(), gpu_v2.begin(), queue ); boost::compute::vector gpu_v3(PERF_N, context); boost::compute::vector::iterator gpu_v3_end; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); gpu_v3_end = boost::compute::set_union( gpu_v1.begin(), gpu_v1.end(), gpu_v2.begin(), gpu_v2.end(), gpu_v3.begin(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "size: " << std::distance(gpu_v3.begin(), gpu_v3_end) << std::endl; return 0; } compute-0.5/perf/perf_sort.cpp000066400000000000000000000077531263566244600165270ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include #include "perf.hpp" namespace po = boost::program_options; namespace compute = boost::compute; template double perf_sort(const std::vector& data, const size_t trials, compute::command_queue& queue) { compute::vector vec(data.size(), queue.get_context()); perf_timer t; for(size_t trial = 0; trial < trials; trial++){ compute::copy(data.begin(), data.end(), vec.begin(), queue); t.start(); compute::sort(vec.begin(), vec.end(), queue); queue.finish(); t.stop(); if(!compute::is_sorted(vec.begin(), vec.end(), queue)){ std::cerr << "ERROR: is_sorted() returned false" << std::endl; } } return t.min_time(); } template void tune_sort(const std::vector& data, const size_t trials, compute::command_queue& queue) { boost::shared_ptr params = compute::detail::parameter_cache::get_global_cache(queue.get_device()); const std::string cache_key = std::string("__boost_radix_sort_") + compute::type_name(); const compute::uint_ tpbs[] = { 32, 64, 128, 256, 512, 1024 }; double min_time = (std::numeric_limits::max)(); compute::uint_ best_tpb = 0; for(size_t i = 0; i < sizeof(tpbs) / sizeof(*tpbs); i++){ params->set(cache_key, "tpb", tpbs[i]); try { const double t = perf_sort(data, trials, queue); if(t < min_time){ best_tpb = tpbs[i]; min_time = t; } } catch(compute::opencl_error&){ // invalid work group size for this device, skip } } // store optimal parameters params->set(cache_key, "tpb", best_tpb); } int main(int argc, char *argv[]) { // setup command line arguments po::options_description options("options"); options.add_options() ("help", "show usage instructions") ("size", po::value()->default_value(8192), "input size") ("trials", po::value()->default_value(3), "number of trials to run") ("tune", "run tuning procedure") ; po::positional_options_description positional_options; positional_options.add("size", 1); // parse command line po::variables_map vm; po::store( po::command_line_parser(argc, argv) .options(options).positional(positional_options).run(), vm ); po::notify(vm); const size_t size = vm["size"].as(); const size_t trials = vm["trials"].as(); std::cout << "size: " << size << std::endl; // setup context and queue for the default device compute::device device = boost::compute::system::default_device(); compute::context context(device); compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector data(size); std::generate(data.begin(), data.end(), rand); // run tuning proceure (if requested) if(vm.count("tune")){ tune_sort(data, trials, queue); } // run sort benchmark double t = perf_sort(data, trials, queue); std::cout << "time: " << t / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_sort_by_key.cpp000066400000000000000000000053121263566244600200560ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include #include "perf.hpp" int main(int argc, char *argv[]) { using boost::compute::int_; using boost::compute::long_; perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_keys(PERF_N); std::generate(host_keys.begin(), host_keys.end(), rand); std::vector host_values(PERF_N); std::copy(host_keys.begin(), host_keys.end(), host_values.begin()); // create vector on the device and copy the data boost::compute::vector device_keys(PERF_N, context); boost::compute::vector device_values(PERF_N, context); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ boost::compute::copy( host_keys.begin(), host_keys.end(), device_keys.begin(), queue ); boost::compute::copy( host_values.begin(), host_values.end(), device_values.begin(), queue ); t.start(); // sort vector boost::compute::sort_by_key( device_keys.begin(), device_keys.end(), device_values.begin(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // verify keys are sorted if(!boost::compute::is_sorted(device_keys.begin(), device_keys.end(), queue)){ std::cout << "ERROR: is_sorted() returned false for the keys" << std::endl; return -1; } // verify values are sorted if(!boost::compute::is_sorted(device_values.begin(), device_values.end(), queue)){ std::cout << "ERROR: is_sorted() returned false for the values" << std::endl; return -1; } return 0; } compute-0.5/perf/perf_sort_float.cpp000066400000000000000000000042121263566244600176770ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" float rand_float() { return ((rand() / float(RAND_MAX)) - 0.5f) * 100000.0f; } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_float); // create vector on the device and copy the data boost::compute::vector device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); // sort vector perf_timer t; t.start(); boost::compute::sort( device_vector.begin(), device_vector.end(), queue ); queue.finish(); t.stop(); std::cout << "time: " << t.last_time() / 1e6 << " ms" << std::endl; // verify vector is sorted if(!boost::compute::is_sorted(device_vector.begin(), device_vector.end(), queue)){ std::cout << "ERROR: is_sorted() returned false" << std::endl; return -1; } return 0; } compute-0.5/perf/perf_stable_partition.cpp000066400000000000000000000036451263566244600210770ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector device_vector(PERF_N, context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); using boost::compute::_1; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); boost::compute::stable_partition( device_vector.begin(), device_vector.end(), _1 < 10, queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_stl_accumulate.cpp000066400000000000000000000023541263566244600205350ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); int sum = 0; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); sum = std::accumulate(host_vector.begin(), host_vector.end(), int(0)); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "sum: " << sum << std::endl; return 0; } compute-0.5/perf/perf_stl_count.cpp000066400000000000000000000024401263566244600175360ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // count values equal to four in the vector size_t count = 0; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); count = std::count( host_vector.begin(), host_vector.end(), 4 ); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "count: " << count << std::endl; return 0; } compute-0.5/perf/perf_stl_find.cpp000066400000000000000000000032171263566244600173310ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include "perf.hpp" // Max integer that can be generated by rand_int() function. int rand_int_max = 25; int rand_int() { return static_cast((rand() / double(RAND_MAX)) * rand_int_max); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // trying to find element that isn't in vector (worst-case scenario) int wanted = rand_int_max + 1; // result std::vector::iterator host_result_it; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); host_result_it = std::find(host_vector.begin(), host_vector.end(), wanted); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // verify if(host_result_it != host_vector.end()){ std::cout << "ERROR: " << "host_result_iterator != " << "host_vector.end()" << std::endl; return -1; } return 0; } compute-0.5/perf/perf_stl_find_end.cpp000066400000000000000000000023621263566244600201570ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); int pattern[] = {2, 6, 6, 7, 8, 4}; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); std::find_end(host_vector.begin(), host_vector.end(), pattern, pattern + 6); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_stl_includes.cpp000066400000000000000000000023121263566244600202120ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; std::vector v1(PERF_N); std::generate(v1.begin(), v1.end(), rand_int); std::vector v2(v1); std::sort(v1.begin(), v1.end()); std::sort(v2.begin(), v2.end()); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); std::includes( v1.begin(), v1.end(), v2.begin(), v2.end() ); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_stl_inner_product.cpp000066400000000000000000000024251263566244600212640ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; std::vector h1(PERF_N); std::vector h2(PERF_N); std::generate(h1.begin(), h1.end(), rand_int); std::generate(h2.begin(), h2.end(), rand_int); int product = 0; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); product = std::inner_product( h1.begin(), h1.end(), h2.begin(), int(0) ); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "product: " << product << std::endl; return 0; } compute-0.5/perf/perf_stl_is_permutation.cpp000066400000000000000000000025161263566244600214540ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); std::vector host_vector2(PERF_N); std::copy(host_vector.rbegin(), host_vector.rend(), host_vector2.begin()); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); std::is_permutation(host_vector.begin(), host_vector.end(), host_vector2.begin()); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_stl_max_element.cpp000066400000000000000000000023131263566244600207030ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Rastko Anicic // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include "perf.hpp" int rand_int() { return static_cast(rand() % 10000000); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); int max = 0; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); max = *(std::max_element(host_vector.begin(), host_vector.end())); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "max: " << max << std::endl; return 0; } compute-0.5/perf/perf_stl_merge.cpp000066400000000000000000000022551263566244600175110ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include "perf.hpp" int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; std::vector v1 = generate_random_vector(std::floor(PERF_N / 2.0)); std::vector v2 = generate_random_vector(std::ceil(PERF_N / 2.0)); std::vector v3(PERF_N); std::sort(v1.begin(), v1.end()); std::sort(v2.begin(), v2.end()); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); std::merge(v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin()); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_stl_next_permutation.cpp000066400000000000000000000024661263566244600220230ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); std::sort(host_vector.begin(), host_vector.end(), std::greater()); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); std::next_permutation(host_vector.begin(), host_vector.end()); t.stop(); std::prev_permutation(host_vector.begin(), host_vector.end()); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_stl_partial_sum.cpp000066400000000000000000000021761263566244600207340ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host std::vector v(PERF_N); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ std::generate(v.begin(), v.end(), rand_int); t.start(); std::partial_sum(v.begin(), v.end(), v.begin()); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_stl_partition.cpp000066400000000000000000000023431263566244600204210ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } bool less_than_10(int value) { return value < 10; } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); std::partition(host_vector.begin(), host_vector.end(), less_than_10); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_stl_partition_point.cpp000066400000000000000000000025431263566244600216340ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } bool less_than_20(int value) { return value < 20; } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); std::partition(host_vector.begin(), host_vector.end(), less_than_20); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); std::partition_point(host_vector.begin(), host_vector.end(), less_than_20); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_stl_prev_permutation.cpp000066400000000000000000000024411263566244600220120ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); std::sort(host_vector.begin(), host_vector.end()); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); std::prev_permutation(host_vector.begin(), host_vector.end()); t.stop(); std::next_permutation(host_vector.begin(), host_vector.end()); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_stl_reverse.cpp000066400000000000000000000022321263566244600200600ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); std::reverse(host_vector.begin(), host_vector.end()); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_stl_reverse_copy.cpp000066400000000000000000000024571263566244600211230ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector for reversed data std::vector host_reversed_vector(PERF_N); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); std::reverse_copy(host_vector.begin(), host_vector.end(), host_reversed_vector.begin()); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_stl_rotate.cpp000066400000000000000000000022711263566244600177060ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); std::rotate(host_vector.begin(), host_vector.begin()+(PERF_N/2), host_vector.end()); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_stl_rotate_copy.cpp000066400000000000000000000024001263566244600207320ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); std::vector host_vector2(PERF_N); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); std::rotate_copy(host_vector.begin(), host_vector.begin()+(PERF_N/2), host_vector.end(), host_vector2.begin()); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_stl_saxpy.cpp000066400000000000000000000025551263566244600175610ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include "perf.hpp" float rand_float() { return (float(rand()) / float(RAND_MAX)) * 1000.f; } // y <- alpha * x + y void serial_saxpy(size_t n, float alpha, const float *x, float *y) { for(size_t i = 0; i < n; i++){ y[i] = alpha * x[i] + y[i]; } } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; float alpha = 2.5f; std::vector host_x(PERF_N); std::vector host_y(PERF_N); std::generate(host_x.begin(), host_x.end(), rand_float); std::generate(host_y.begin(), host_y.end(), rand_float); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); serial_saxpy(PERF_N, alpha, &host_x[0], &host_y[0]); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_stl_search.cpp000066400000000000000000000023541263566244600176570ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); int pattern[] = {2, 6, 6, 7, 8, 4}; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); std::search(host_vector.begin(), host_vector.end(), pattern, pattern + 6); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_stl_search_n.cpp000066400000000000000000000022411263566244600201670ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); std::search_n(host_vector.begin(), host_vector.end(), 5, 2); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_stl_set_difference.cpp000066400000000000000000000027411263566244600213570ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; std::vector v1(std::floor(PERF_N / 2.0)); std::vector v2(std::ceil(PERF_N / 2.0)); std::generate(v1.begin(), v1.end(), rand_int); std::generate(v2.begin(), v2.end(), rand_int); std::sort(v1.begin(), v1.end()); std::sort(v2.begin(), v2.end()); std::vector v3(PERF_N); std::vector::iterator v3_end; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); v3_end = std::set_difference( v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin() ); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "size: " << std::distance(v3.begin(), v3_end) << std::endl; return 0; } compute-0.5/perf/perf_stl_set_intersection.cpp000066400000000000000000000027431263566244600217750ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; std::vector v1(std::floor(PERF_N / 2.0)); std::vector v2(std::ceil(PERF_N / 2.0)); std::generate(v1.begin(), v1.end(), rand_int); std::generate(v2.begin(), v2.end(), rand_int); std::sort(v1.begin(), v1.end()); std::sort(v2.begin(), v2.end()); std::vector v3(PERF_N); std::vector::iterator v3_end; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); v3_end = std::set_intersection( v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin() ); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "size: " << std::distance(v3.begin(), v3_end) << std::endl; return 0; } compute-0.5/perf/perf_stl_set_symmetric_difference.cpp000066400000000000000000000027531263566244600234560ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; std::vector v1(std::floor(PERF_N / 2.0)); std::vector v2(std::ceil(PERF_N / 2.0)); std::generate(v1.begin(), v1.end(), rand_int); std::generate(v2.begin(), v2.end(), rand_int); std::sort(v1.begin(), v1.end()); std::sort(v2.begin(), v2.end()); std::vector v3(PERF_N); std::vector::iterator v3_end; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); v3_end = std::set_symmetric_difference( v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin() ); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "size: " << std::distance(v3.begin(), v3_end) << std::endl; return 0; } compute-0.5/perf/perf_stl_set_union.cpp000066400000000000000000000027341263566244600204170ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; std::vector v1(std::floor(PERF_N / 2.0)); std::vector v2(std::ceil(PERF_N / 2.0)); std::generate(v1.begin(), v1.end(), rand_int); std::generate(v2.begin(), v2.end(), rand_int); std::sort(v1.begin(), v1.end()); std::sort(v2.begin(), v2.end()); std::vector v3(PERF_N); std::vector::iterator v3_end; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); v3_end = std::set_union( v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin() ); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "size: " << std::distance(v3.begin(), v3_end) << std::endl; return 0; } compute-0.5/perf/perf_stl_sort.cpp000066400000000000000000000016361263566244600174030ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include "perf.hpp" int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; std::vector v; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ v = generate_random_vector(PERF_N); t.start(); std::sort(v.begin(), v.end()); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_stl_stable_partition.cpp000066400000000000000000000024121263566244600217500ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } bool less_than_10(int value) { return value < 10; } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); std::stable_partition(host_vector.begin(), host_vector.end(), less_than_10); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_stl_unique.cpp000066400000000000000000000022351263566244600177160ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ std::generate(host_vector.begin(), host_vector.end(), rand_int); t.start(); std::unique(host_vector.begin(), host_vector.end()); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_stl_unique_copy.cpp000066400000000000000000000023711263566244600207510ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::vector host_vector2(PERF_N); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ std::generate(host_vector.begin(), host_vector.end(), rand_int); t.start(); std::unique_copy( host_vector.begin(), host_vector.end(), host_vector2.begin() ); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_tbb_accumulate.cpp000066400000000000000000000040551263566244600205020ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } template struct Sum { T value; Sum() : value(0) {} Sum( Sum& s, tbb::split ) {value = 0;} void operator()( const tbb::blocked_range& r ) { T temp = value; for( T* a=r.begin(); a!=r.end(); ++a ) { temp += *a; } value = temp; } void join( Sum& rhs ) {value += rhs.value;} }; template T ParallelSum( T array[], size_t n ) { Sum total; tbb::parallel_reduce( tbb::blocked_range( array, array+n ), total ); return total.value; } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); int sum = 0; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); sum = ParallelSum(&host_vector[0], host_vector.size()); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "sum: " << sum << std::endl; int host_sum = std::accumulate(host_vector.begin(), host_vector.end(), int(0)); if(sum != host_sum){ std::cerr << "ERROR: sum (" << sum << ") != (" << host_sum << ")" << std::endl; return -1; } return 0; } compute-0.5/perf/perf_tbb_merge.cpp000066400000000000000000000060621263566244600174560ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include "perf.hpp" // example from: http://www.threadingbuildingblocks.org/docs/help/reference/algorithms/parallel_for_func.htm using namespace tbb; template struct ParallelMergeRange { static size_t grainsize; Iterator begin1, end1; // [begin1,end1) is 1st sequence to be merged Iterator begin2, end2; // [begin2,end2) is 2nd sequence to be merged Iterator out; // where to put merged sequence bool empty() const {return (end1-begin1)+(end2-begin2)==0;} bool is_divisible() const { return (std::min)( end1-begin1, end2-begin2 ) > grainsize; } ParallelMergeRange( ParallelMergeRange& r, split ) { if( r.end1-r.begin1 < r.end2-r.begin2 ) { std::swap(r.begin1,r.begin2); std::swap(r.end1,r.end2); } Iterator m1 = r.begin1 + (r.end1-r.begin1)/2; Iterator m2 = std::lower_bound( r.begin2, r.end2, *m1 ); begin1 = m1; begin2 = m2; end1 = r.end1; end2 = r.end2; out = r.out + (m1-r.begin1) + (m2-r.begin2); r.end1 = m1; r.end2 = m2; } ParallelMergeRange( Iterator begin1_, Iterator end1_, Iterator begin2_, Iterator end2_, Iterator out_ ) : begin1(begin1_), end1(end1_), begin2(begin2_), end2(end2_), out(out_) {} }; template size_t ParallelMergeRange::grainsize = 1000; template struct ParallelMergeBody { void operator()( ParallelMergeRange& r ) const { std::merge( r.begin1, r.end1, r.begin2, r.end2, r.out ); } }; template void ParallelMerge( Iterator begin1, Iterator end1, Iterator begin2, Iterator end2, Iterator out ) { parallel_for( ParallelMergeRange(begin1,end1,begin2,end2,out), ParallelMergeBody(), simple_partitioner() ); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; std::vector v1 = generate_random_vector(PERF_N / 2); std::vector v2 = generate_random_vector(PERF_N / 2); std::vector v3(PERF_N); std::sort(v1.begin(), v1.end()); std::sort(v2.begin(), v2.end()); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); ParallelMerge(v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin()); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_tbb_sort.cpp000066400000000000000000000017401263566244600173440ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include "perf.hpp" int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; std::vector v(PERF_N); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ v = generate_random_vector(PERF_N); t.start(); tbb::parallel_sort(v.begin(), v.end()); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_thrust_accumulate.cu000066400000000000000000000024131263566244600211050ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; thrust::host_vector h_vec = generate_random_vector(PERF_N); // transfer data to the device thrust::device_vector d_vec = h_vec; int sum = 0; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); sum = thrust::reduce(d_vec.begin(), d_vec.end()); cudaDeviceSynchronize(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "sum: " << sum << std::endl; return 0; } compute-0.5/perf/perf_thrust_count.cu000066400000000000000000000026031263566244600201130ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host thrust::host_vector host_vector(PERF_N); thrust::generate(host_vector.begin(), host_vector.end(), rand_int); thrust::device_vector v = host_vector; size_t count = 0; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); count = thrust::count(v.begin(), v.end(), 4); cudaDeviceSynchronize(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "count: " << count << std::endl; return 0; } compute-0.5/perf/perf_thrust_exclusive_scan.cu000066400000000000000000000024631263566244600220020ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Benoit // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; thrust::host_vector h_vec = generate_random_vector(PERF_N); // transfer data to the device thrust::device_vector d_vec = h_vec; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ d_vec = h_vec; t.start(); thrust::exclusive_scan(d_vec.begin(), d_vec.end(), d_vec.begin()); cudaDeviceSynchronize(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // transfer data back to host thrust::copy(d_vec.begin(), d_vec.end(), h_vec.begin()); return 0; } compute-0.5/perf/perf_thrust_find.cu000066400000000000000000000035011263566244600177010ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include "perf.hpp" // Max integer that can be generated by rand_int() function. int rand_int_max = 25; int rand_int() { return static_cast((rand() / double(RAND_MAX)) * rand_int_max); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of random numbers on the host thrust::host_vector host_vector(PERF_N); thrust::generate(host_vector.begin(), host_vector.end(), rand_int); thrust::device_vector v = host_vector; // trying to find element that isn't in vector (worst-case scenario) int wanted = rand_int_max + 1; // result thrust::device_vector::iterator device_result_it; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); device_result_it = thrust::find(v.begin(), v.end(), wanted); cudaDeviceSynchronize(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // verify if(device_result_it != v.end()){ std::cout << "ERROR: " << "device_result_iterator != " << "v.end()" << std::endl; return -1; } return 0; } compute-0.5/perf/perf_thrust_inner_product.cu000066400000000000000000000027551263566244600216460ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include "perf.hpp" int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; thrust::host_vector host_x(PERF_N); thrust::host_vector host_y(PERF_N); std::generate(host_x.begin(), host_x.end(), rand); std::generate(host_y.begin(), host_y.end(), rand); // transfer data to the device thrust::device_vector device_x = host_x; thrust::device_vector device_y = host_y; int product = 0; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); product = thrust::inner_product( device_x.begin(), device_x.end(), device_y.begin(), 0 ); cudaDeviceSynchronize(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "product: " << product << std::endl; return 0; } compute-0.5/perf/perf_thrust_merge.cu000066400000000000000000000036331263566244600200660ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; thrust::host_vector v1(std::floor(PERF_N / 2.0)); thrust::host_vector v2(std::ceil(PERF_N / 2.0)); std::generate(v1.begin(), v1.end(), rand); std::generate(v2.begin(), v2.end(), rand); std::sort(v1.begin(), v1.end()); std::sort(v2.begin(), v2.end()); // transfer data to the device thrust::device_vector gpu_v1 = v1; thrust::device_vector gpu_v2 = v2; thrust::device_vector gpu_v3(PERF_N); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); thrust::merge( gpu_v1.begin(), gpu_v1.end(), gpu_v2.begin(), gpu_v2.end(), gpu_v3.begin() ); cudaDeviceSynchronize(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; thrust::host_vector check_v3 = gpu_v3; thrust::host_vector v3(PERF_N); std::merge(v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin()); bool ok = std::equal(check_v3.begin(), check_v3.end(), v3.begin()); if(!ok){ std::cerr << "ERROR: merged ranges different" << std::endl; return -1; } return 0; } compute-0.5/perf/perf_thrust_partial_sum.cu000066400000000000000000000025231263566244600213040ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; thrust::host_vector h_vec = generate_random_vector(PERF_N); // transfer data to the device thrust::device_vector d_vec = h_vec; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ d_vec = h_vec; t.start(); thrust::inclusive_scan(d_vec.begin(), d_vec.end(), d_vec.begin()); cudaDeviceSynchronize(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // transfer data back to host thrust::copy(d_vec.begin(), d_vec.end(), h_vec.begin()); return 0; } compute-0.5/perf/perf_thrust_partition.cu000066400000000000000000000027551263566244600210040ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } struct less_than_ten : public thrust::unary_function { __device__ bool operator()(int x) const { return x < 10; } }; int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; thrust::host_vector h_vec(PERF_N); std::generate(h_vec.begin(), h_vec.end(), rand_int); thrust::device_vector d_vec(PERF_N); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ d_vec = h_vec; t.start(); thrust::partition( d_vec.begin(), d_vec.end(), less_than_ten() ); cudaDeviceSynchronize(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_thrust_reduce_by_key.cu000066400000000000000000000053201263566244600215730ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } struct unique_key { int current; int avgValuesNoPerKey; unique_key() { current = 0; avgValuesNoPerKey = 512; } int operator()() { double p = double(1.0) / static_cast(avgValuesNoPerKey); if((rand() / double(RAND_MAX)) <= p) return ++current; return current; } } UniqueKey; int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // create vector of keys and random values thrust::host_vector host_keys(PERF_N); thrust::host_vector host_values(PERF_N); std::generate(host_keys.begin(), host_keys.end(), UniqueKey); std::generate(host_values.begin(), host_values.end(), rand_int); // transfer data to the device thrust::device_vector device_keys = host_keys; thrust::device_vector device_values = host_values; // create device vectors for the results thrust::device_vector device_keys_results(PERF_N); thrust::device_vector device_values_results(PERF_N); typedef typename thrust::device_vector::iterator iterType; thrust::pair result; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); result = thrust::reduce_by_key(device_keys.begin(), device_keys.end(), device_values.begin(), device_keys_results.begin(), device_values_results.begin()); cudaDeviceSynchronize(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; size_t result_size = thrust::distance(device_keys_results.begin(), result.first); if(result_size != static_cast(host_keys[PERF_N-1] + 1)){ std::cout << "ERROR: " << "wrong number of keys" << std::endl; return -1; } return 0; } compute-0.5/perf/perf_thrust_reverse.cu000066400000000000000000000024701263566244600204400ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; thrust::host_vector h_vec = generate_random_vector(PERF_N); // transfer data to the device thrust::device_vector d_vec; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ d_vec = h_vec; t.start(); thrust::reverse(d_vec.begin(), d_vec.end()); cudaDeviceSynchronize(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // transfer data back to host thrust::copy(d_vec.begin(), d_vec.end(), h_vec.begin()); return 0; } compute-0.5/perf/perf_thrust_reverse_copy.cu000066400000000000000000000025221263566244600214700ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; thrust::host_vector h_vec = generate_random_vector(PERF_N); // transfer data to the device thrust::device_vector d_vec; d_vec = h_vec; // device vector for reversed data thrust::device_vector d_reversed_vec(PERF_N); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); thrust::reverse_copy(d_vec.begin(), d_vec.end(), d_reversed_vec.begin()); cudaDeviceSynchronize(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_thrust_rotate.cu000066400000000000000000000030761263566244600202660ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include "perf.hpp" int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; thrust::host_vector h_vec = generate_random_vector(PERF_N); // transfer data to the device thrust::device_vector d_vec; size_t rotate_distance = PERF_N / 2; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ d_vec = h_vec; t.start(); // there is no thrust::rotate() so we implement it manually with copy() thrust::device_vector tmp(d_vec.begin(), d_vec.begin() + rotate_distance); thrust::copy(d_vec.begin() + rotate_distance, d_vec.end(), d_vec.begin()); thrust::copy(tmp.begin(), tmp.end(), d_vec.begin() + rotate_distance); cudaDeviceSynchronize(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // transfer data back to host thrust::copy(d_vec.begin(), d_vec.end(), h_vec.begin()); return 0; } compute-0.5/perf/perf_thrust_saxpy.cu000066400000000000000000000035541263566244600201350ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" struct saxpy_functor : public thrust::binary_function { const float a; saxpy_functor(float _a) : a(_a) {} __host__ __device__ float operator()(const float& x, const float& y) const { return a * x + y; } }; int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; thrust::host_vector host_x(PERF_N); thrust::host_vector host_y(PERF_N); std::generate(host_x.begin(), host_x.end(), rand); std::generate(host_y.begin(), host_y.end(), rand); // transfer data to the device thrust::device_vector device_x = host_x; thrust::device_vector device_y = host_y; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); thrust::transform(device_x.begin(), device_x.end(), device_y.begin(), device_y.begin(), saxpy_functor(2.5f)); cudaDeviceSynchronize(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // transfer data back to host thrust::copy(device_x.begin(), device_x.end(), host_x.begin()); thrust::copy(device_y.begin(), device_y.end(), host_y.begin()); return 0; } compute-0.5/perf/perf_thrust_set_difference.cu000066400000000000000000000035201263566244600217270ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; thrust::host_vector v1(std::floor(PERF_N / 2.0)); thrust::host_vector v2(std::ceil(PERF_N / 2.0)); std::generate(v1.begin(), v1.end(), rand_int); std::generate(v2.begin(), v2.end(), rand_int); std::sort(v1.begin(), v1.end()); std::sort(v2.begin(), v2.end()); // transfer data to the device thrust::device_vector gpu_v1 = v1; thrust::device_vector gpu_v2 = v2; thrust::device_vector gpu_v3(PERF_N); thrust::device_vector::iterator gpu_v3_end; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ t.start(); gpu_v3_end = thrust::set_difference( gpu_v1.begin(), gpu_v1.end(), gpu_v2.begin(), gpu_v2.end(), gpu_v3.begin() ); cudaDeviceSynchronize(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; std::cout << "size: " << thrust::distance(gpu_v3.begin(), gpu_v3_end) << std::endl; return 0; } compute-0.5/perf/perf_thrust_sort.cu000066400000000000000000000024621263566244600177550ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; thrust::host_vector h_vec = generate_random_vector(PERF_N); // transfer data to the device thrust::device_vector d_vec; perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ d_vec = h_vec; t.start(); thrust::sort(d_vec.begin(), d_vec.end()); cudaDeviceSynchronize(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; // transfer data back to host thrust::copy(d_vec.begin(), d_vec.end(), h_vec.begin()); return 0; } compute-0.5/perf/perf_thrust_unique.cu000066400000000000000000000024531263566244600202740ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; thrust::host_vector h_vec(PERF_N); std::generate(h_vec.begin(), h_vec.end(), rand_int); thrust::device_vector d_vec(PERF_N); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ d_vec = h_vec; t.start(); thrust::unique(d_vec.begin(), d_vec.end()); cudaDeviceSynchronize(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_uniform_int_distribution.cpp000066400000000000000000000026241263566244600226600ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" namespace compute = boost::compute; int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; compute::device device = compute::system::default_device(); compute::context context(device); compute::command_queue queue(context, device); compute::vector vector(PERF_N, context); compute::default_random_engine rng(queue); compute::uniform_int_distribution dist(0, 1); perf_timer t; t.start(); dist.generate(vector.begin(), vector.end(), rng, queue); queue.finish(); t.stop(); std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_unique.cpp000066400000000000000000000035651263566244600170430ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector device_vector(PERF_N, context); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); t.start(); boost::compute::unique( device_vector.begin(), device_vector.end(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perf_unique_copy.cpp000066400000000000000000000037301263566244600200670ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #include #include #include #include #include #include #include "perf.hpp" int rand_int() { return static_cast((rand() / double(RAND_MAX)) * 25.0); } int main(int argc, char *argv[]) { perf_parse_args(argc, argv); std::cout << "size: " << PERF_N << std::endl; // setup context and queue for the default device boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context(device); boost::compute::command_queue queue(context, device); std::cout << "device: " << device.name() << std::endl; // create vector of random numbers on the host std::vector host_vector(PERF_N); std::generate(host_vector.begin(), host_vector.end(), rand_int); // create vector on the device and copy the data boost::compute::vector device_vector(PERF_N, context); boost::compute::vector device_vector2(PERF_N, context); perf_timer t; for(size_t trial = 0; trial < PERF_TRIALS; trial++){ boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); t.start(); boost::compute::unique_copy( device_vector.begin(), device_vector.end(), device_vector2.begin(), queue ); queue.finish(); t.stop(); } std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; return 0; } compute-0.5/perf/perfdoc.py000077500000000000000000000030701263566244600160030ustar00rootroot00000000000000#!/usr/bin/python # Copyright (c) 2014 Kyle Lutz # Distributed under the Boost Software License, Version 1.0 # See accompanying file LICENSE_1_0.txt or copy at # http://www.boost.org/LICENSE_1_0.txt # # See http://boostorg.github.com/compute for more information. import os import sys import pylab from perf import run_benchmark fignum = 0 def plot_to_file(report, filename): global fignum fignum += 1 pylab.figure(fignum) run_to_label = { "stl" : "C++ STL", "thrust" : "Thrust", "compute" : "Boost.Compute", "bolt" : "Bolt" } for run in sorted(report.samples.keys()): x = [] y = [] for sample in report.samples[run]: x.append(sample[0]) y.append(sample[1]) pylab.loglog(x, y, marker='o', label=run_to_label[run]) pylab.xlabel("Size") pylab.ylabel("Time (ms)") pylab.legend(loc='upper left') pylab.savefig(filename) if __name__ == '__main__': sizes = [pow(2, x) for x in range(10, 26)] algorithms = [ "accumulate", "count", "inner_product", "merge", "partial_sum", "partition", "reverse", "rotate", "saxpy", "sort", "unique", ] try: os.mkdir("perf_plots") except OSError: pass for algorithm in algorithms: print("running '%s'" % (algorithm)) report = run_benchmark(algorithm, sizes, ["stl", "thrust", "bolt"]) plot_to_file(report, "perf_plots/%s_time_plot.png" % algorithm) compute-0.5/test/000077500000000000000000000000001263566244600140275ustar00rootroot00000000000000compute-0.5/test/CMakeLists.txt000066400000000000000000000246211263566244600165740ustar00rootroot00000000000000# --------------------------------------------------------------------------- # Copyright (c) 2013 Kyle Lutz # # Distributed under the Boost Software License, Version 1.0 # See accompanying file LICENSE_1_0.txt or copy at # http://www.boost.org/LICENSE_1_0.txt # # --------------------------------------------------------------------------- include_directories(../include) set(BOOST_COMPONENTS unit_test_framework) if(${BOOST_COMPUTE_USE_CPP11}) # allow tests to use C++11 features add_definitions(-DBOOST_COMPUTE_USE_CPP11) endif() if (${BOOST_COMPUTE_USE_OFFLINE_CACHE}) set(BOOST_COMPONENTS ${BOOST_COMPONENTS} system filesystem) add_definitions(-DBOOST_COMPUTE_USE_OFFLINE_CACHE) endif() if(${BOOST_COMPUTE_THREAD_SAFE} AND NOT ${BOOST_COMPUTE_USE_CPP11}) set(BOOST_COMPONENTS ${BOOST_COMPONENTS} thread) endif() find_package(Boost 1.48 REQUIRED COMPONENTS ${BOOST_COMPONENTS}) add_definitions(-DBOOST_TEST_DYN_LINK) # enable automatic kernel compilation error messages for tests add_definitions(-DBOOST_COMPUTE_DEBUG_KERNEL_COMPILATION) # enable code coverage generation (only with GCC for now) if(${BOOST_COMPUTE_ENABLE_COVERAGE} AND "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") add_definitions(-fprofile-arcs -ftest-coverage) endif() function(add_compute_test TEST_NAME TEST_SOURCE) get_filename_component(TEST_TARGET ${TEST_SOURCE} NAME_WE) add_executable(${TEST_TARGET} ${TEST_SOURCE}) target_link_libraries(${TEST_TARGET} ${OPENCL_LIBRARIES} ${Boost_LIBRARIES} ) # link with coverage library if(${BOOST_COMPUTE_ENABLE_COVERAGE} AND "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") target_link_libraries(${TEST_TARGET} -fprofile-arcs -ftest-coverage) endif() add_test(${TEST_NAME} ${TEST_TARGET}) endfunction() add_compute_test("core.buffer" test_buffer.cpp) add_compute_test("core.closure" test_closure.cpp) add_compute_test("core.command_queue" test_command_queue.cpp) add_compute_test("core.context" test_context.cpp) add_compute_test("core.device" test_device.cpp) add_compute_test("core.event" test_event.cpp) add_compute_test("core.function" test_function.cpp) add_compute_test("core.kernel" test_kernel.cpp) add_compute_test("core.pipe" test_pipe.cpp) add_compute_test("core.platform" test_platform.cpp) add_compute_test("core.program" test_program.cpp) add_compute_test("core.system" test_system.cpp) add_compute_test("core.type_traits" test_type_traits.cpp) add_compute_test("core.user_event" test_user_event.cpp) add_compute_test("utility.extents" test_extents.cpp) add_compute_test("utility.invoke" test_invoke.cpp) add_compute_test("utility.program_cache" test_program_cache.cpp) add_compute_test("utility.wait_list" test_wait_list.cpp) add_compute_test("algorithm.accumulate" test_accumulate.cpp) add_compute_test("algorithm.adjacent_difference" test_adjacent_difference.cpp) add_compute_test("algorithm.adjacent_find" test_adjacent_find.cpp) add_compute_test("algorithm.any_all_none_of" test_any_all_none_of.cpp) add_compute_test("algorithm.binary_search" test_binary_search.cpp) add_compute_test("algorithm.copy" test_copy.cpp) add_compute_test("algorithm.copy_if" test_copy_if.cpp) add_compute_test("algorithm.count" test_count.cpp) add_compute_test("algorithm.equal" test_equal.cpp) add_compute_test("algorithm.equal_range" test_equal_range.cpp) add_compute_test("algorithm.extrema" test_extrema.cpp) add_compute_test("algorithm.fill" test_fill.cpp) add_compute_test("algorithm.find" test_find.cpp) add_compute_test("algorithm.find_end" test_find_end.cpp) add_compute_test("algorithm.for_each" test_for_each.cpp) add_compute_test("algorithm.gather" test_gather.cpp) add_compute_test("algorithm.generate" test_generate.cpp) add_compute_test("algorithm.includes" test_includes.cpp) add_compute_test("algorithm.inner_product" test_inner_product.cpp) add_compute_test("algorithm.inplace_merge" test_inplace_merge.cpp) add_compute_test("algorithm.inplace_reduce" test_inplace_reduce.cpp) add_compute_test("algorithm.insertion_sort" test_insertion_sort.cpp) add_compute_test("algorithm.iota" test_iota.cpp) add_compute_test("algorithm.is_permutation" test_is_permutation.cpp) add_compute_test("algorithm.is_sorted" test_is_sorted.cpp) add_compute_test("algorithm.merge" test_merge.cpp) add_compute_test("algorithm.mismatch" test_mismatch.cpp) add_compute_test("algorithm.next_permutation" test_next_permutation.cpp) add_compute_test("algorithm.nth_element" test_nth_element.cpp) add_compute_test("algorithm.partial_sum" test_partial_sum.cpp) add_compute_test("algorithm.partition" test_partition.cpp) add_compute_test("algorithm.partition_point" test_partition_point.cpp) add_compute_test("algorithm.prev_permutation" test_prev_permutation.cpp) add_compute_test("algorithm.radix_sort" test_radix_sort.cpp) add_compute_test("algorithm.random_fill" test_random_fill.cpp) add_compute_test("algorithm.random_shuffle" test_random_shuffle.cpp) add_compute_test("algorithm.reduce" test_reduce.cpp) add_compute_test("algorithm.reduce_by_key" test_reduce_by_key.cpp) add_compute_test("algorithm.remove" test_remove.cpp) add_compute_test("algorithm.replace" test_replace.cpp) add_compute_test("algorithm.reverse" test_reverse.cpp) add_compute_test("algorithm.rotate" test_rotate.cpp) add_compute_test("algorithm.rotate_copy" test_rotate_copy.cpp) add_compute_test("algorithm.scan" test_scan.cpp) add_compute_test("algorithm.scatter" test_scatter.cpp) add_compute_test("algorithm.scatter_if" test_scatter_if.cpp) add_compute_test("algorithm.search" test_search.cpp) add_compute_test("algorithm.search_n" test_search_n.cpp) add_compute_test("algorithm.set_difference" test_set_difference.cpp) add_compute_test("algorithm.set_intersection" test_set_intersection.cpp) add_compute_test("algorithm.set_symmetric_difference" test_set_symmetric_difference.cpp) add_compute_test("algorithm.set_union" test_set_union.cpp) add_compute_test("algorithm.sort" test_sort.cpp) add_compute_test("algorithm.sort_by_key" test_sort_by_key.cpp) add_compute_test("algorithm.stable_partition" test_stable_partition.cpp) add_compute_test("algorithm.stable_sort" test_stable_sort.cpp) add_compute_test("algorithm.transform" test_transform.cpp) add_compute_test("algorithm.transform_if" test_transform_if.cpp) add_compute_test("algorithm.transform_reduce" test_transform_reduce.cpp) add_compute_test("algorithm.unique" test_unique.cpp) add_compute_test("algorithm.unique_copy" test_unique_copy.cpp) add_compute_test("algorithm.lexicographical_compare" test_lexicographical_compare.cpp) add_compute_test("allocator.buffer_allocator" test_buffer_allocator.cpp) add_compute_test("allocator.pinned_allocator" test_pinned_allocator.cpp) add_compute_test("async.wait" test_async_wait.cpp) add_compute_test("async.wait_guard" test_async_wait_guard.cpp) add_compute_test("container.array" test_array.cpp) add_compute_test("container.dynamic_bitset" test_dynamic_bitset.cpp) add_compute_test("container.flat_map" test_flat_map.cpp) add_compute_test("container.flat_set" test_flat_set.cpp) add_compute_test("container.mapped_view" test_mapped_view.cpp) add_compute_test("container.stack" test_stack.cpp) add_compute_test("container.string" test_string.cpp) add_compute_test("container.valarray" test_valarray.cpp) add_compute_test("container.vector" test_vector.cpp) add_compute_test("exception.context_error" test_context_error.cpp) add_compute_test("exception.no_device_found" test_no_device_found.cpp) add_compute_test("exception.opencl_error" test_opencl_error.cpp) add_compute_test("exception.unsupported_extension" test_unsupported_extension.cpp) add_compute_test("functional.as" test_functional_as.cpp) add_compute_test("functional.bind" test_functional_bind.cpp) add_compute_test("functional.convert" test_functional_convert.cpp) add_compute_test("functional.get" test_functional_get.cpp) add_compute_test("functional.hash" test_functional_hash.cpp) add_compute_test("functional.identity" test_functional_identity.cpp) add_compute_test("functional.popcount" test_functional_popcount.cpp) add_compute_test("functional.unpack" test_functional_unpack.cpp) add_compute_test("image.image1d" test_image1d.cpp) add_compute_test("image.image2d" test_image2d.cpp) add_compute_test("image.image3d" test_image3d.cpp) add_compute_test("image.image_sampler" test_image_sampler.cpp) add_compute_test("iterator.buffer_iterator" test_buffer_iterator.cpp) add_compute_test("iterator.constant_iterator" test_constant_iterator.cpp) add_compute_test("iterator.counting_iterator" test_counting_iterator.cpp) add_compute_test("iterator.discard_iterator" test_discard_iterator.cpp) add_compute_test("iterator.function_input_iterator" test_function_input_iterator.cpp) add_compute_test("iterator.permutation_iterator" test_permutation_iterator.cpp) add_compute_test("iterator.strided_iterator" test_strided_iterator.cpp) add_compute_test("iterator.transform_iterator" test_transform_iterator.cpp) add_compute_test("iterator.zip_iterator" test_zip_iterator.cpp) add_compute_test("memory.local_buffer" test_local_buffer.cpp) add_compute_test("memory.svm_ptr" test_svm_ptr.cpp) add_compute_test("random.bernoulli_distribution" test_bernoulli_distribution.cpp) add_compute_test("random.discrete_distribution" test_discrete_distribution.cpp) add_compute_test("random.linear_congruential_engine" test_linear_congruential_engine.cpp) add_compute_test("random.mersenne_twister_engine" test_mersenne_twister_engine.cpp) add_compute_test("random.threefry_engine" test_threefry_engine.cpp) add_compute_test("random.normal_distribution" test_normal_distribution.cpp) add_compute_test("random.uniform_int_distribution" test_uniform_int_distribution.cpp) add_compute_test("random.uniform_real_distribution" test_uniform_real_distribution.cpp) add_compute_test("types.fundamental" test_types.cpp) add_compute_test("types.complex" test_complex.cpp) add_compute_test("types.pair" test_pair.cpp) add_compute_test("types.tuple" test_tuple.cpp) add_compute_test("types.struct" test_struct.cpp) add_compute_test("type_traits.result_of" test_result_of.cpp) add_compute_test("experimental.clamp_range" test_clamp_range.cpp) add_compute_test("experimental.malloc" test_malloc.cpp) add_compute_test("experimental.sort_by_transform" test_sort_by_transform.cpp) add_compute_test("experimental.tabulate" test_tabulate.cpp) # miscellaneous tests add_compute_test("misc.amd_cpp_kernel_language" test_amd_cpp_kernel_language.cpp) add_compute_test("misc.lambda" test_lambda.cpp) add_compute_test("misc.user_defined_types" test_user_defined_types.cpp) # extra tests (interop tests, linkage tests, etc.) add_subdirectory(extra) compute-0.5/test/Jamfile.v2000066400000000000000000000015021263566244600156450ustar00rootroot00000000000000# (C) Copyright 2015: Kyle Lutz # Distributed under the Boost Software License, Version 1.0. # (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) import testing ; lib boost_unit_test_framework ; project : source-location . : requirements BOOST_ALL_NO_LIB=1 -Wno-deprecated-declarations /boost/test//boost_unit_test_framework ; rule test_all { local all_rules = ; for local fileb in [ glob *.cpp ] { all_rules += [ run $(fileb) : : : shared:BOOST_TEST_DYN_LINK=1 linux:"-lOpenCL" darwin:"-framework OpenCL" ] ; } return $(all_rules) ; } test-suite compute : [ test_all r ] : ; compute-0.5/test/check_macros.hpp000066400000000000000000000034651263566244600171710ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TEST_CHECK_MACROS_HPP #define BOOST_COMPUTE_TEST_CHECK_MACROS_HPP #define LIST_ARRAY_VALUES(z, n, data) \ BOOST_PP_COMMA_IF(n) BOOST_PP_ARRAY_ELEM(n, data) // checks 'size' values of 'type' in the device range 'actual` // against the values given in the array 'expected' #define CHECK_RANGE_EQUAL(type, size, actual, expected) \ { \ type _actual[size]; \ boost::compute::copy( \ actual.begin(), actual.begin()+size, _actual, queue \ ); \ const type _expected[size] = { \ BOOST_PP_REPEAT(size, LIST_ARRAY_VALUES, (size, expected)) \ }; \ BOOST_CHECK_EQUAL_COLLECTIONS( \ _actual, _actual + size, _expected, _expected + size \ ); \ } #define CHECK_HOST_RANGE_EQUAL(type, size, actual, expected) \ { \ const type _expected[size] = { \ BOOST_PP_REPEAT(size, LIST_ARRAY_VALUES, (size, expected)) \ }; \ BOOST_CHECK_EQUAL_COLLECTIONS( \ actual, actual + size, _expected, _expected + size \ ); \ } #define CHECK_STRING_EQUAL(actual, expected) \ { \ std::string _actual(actual.size(), '\0'); \ boost::compute::copy( \ actual.begin(), actual.end(), _actual.begin(), queue \ ); \ BOOST_CHECK_EQUAL(_actual, expected); \ } #endif // BOOST_COMPUTE_TEST_CHECK_MACROS_HPP compute-0.5/test/context_setup.hpp000066400000000000000000000020221263566244600174400ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Denis Demidov // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TEST_CONTEXT_SETUP_HPP #define BOOST_COMPUTE_TEST_CONTEXT_SETUP_HPP #include #include #include "opencl_version_check.hpp" struct Context { boost::compute::device device; boost::compute::context context; boost::compute::command_queue queue; Context() : device ( boost::compute::system::default_device() ), context( boost::compute::system::default_context() ), queue ( boost::compute::system::default_queue() ) {} }; BOOST_FIXTURE_TEST_SUITE(compute_test, Context) #endif compute-0.5/test/extra/000077500000000000000000000000001263566244600151525ustar00rootroot00000000000000compute-0.5/test/extra/CMakeLists.txt000066400000000000000000000050751263566244600177210ustar00rootroot00000000000000# --------------------------------------------------------------------------- # Copyright (c) 2015 Kyle Lutz # # Distributed under the Boost Software License, Version 1.0 # See accompanying file LICENSE_1_0.txt or copy at # http://www.boost.org/LICENSE_1_0.txt # # --------------------------------------------------------------------------- # include local test headers include_directories(..) # Check for linkage problems add_executable(test_multiple_objects test_multiple_objects1.cpp test_multiple_objects2.cpp ) target_link_libraries(test_multiple_objects ${OPENCL_LIBRARIES} ${Boost_LIBRARIES} ) # link with coverage library if(${BOOST_COMPUTE_ENABLE_COVERAGE} AND "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") target_link_libraries(test_multiple_objects -fprofile-arcs -ftest-coverage) endif() add_test("misc.multiple_objects" test_multiple_objects) # eigen interop tests if(${BOOST_COMPUTE_HAVE_EIGEN}) find_package(Eigen REQUIRED) include_directories(SYSTEM ${EIGEN_INCLUDE_DIRS}) add_compute_test("interop.eigen" test_interop_eigen.cpp) endif() # opencv interop tests if(${BOOST_COMPUTE_HAVE_OPENCV}) find_package(OpenCV REQUIRED) include_directories(SYSTEM ${OpenCV_INCLUDE_DIRS}) add_compute_test("interop.opencv" test_interop_opencv.cpp) target_link_libraries(test_interop_opencv ${OpenCV_LIBS}) endif() # qt interop tests if(${BOOST_COMPUTE_HAVE_QT}) # look for Qt4 in the first place find_package(Qt4 QUIET) if(${QT4_FOUND}) find_package(Qt4 REQUIRED COMPONENTS QtCore QtGui QtOpenGL) include(${QT_USE_FILE}) else() find_package(Qt5Widgets QUIET) # look for Qt5 if(${Qt5Widgets_FOUND}) find_package(Qt5Core REQUIRED) find_package(Qt5Widgets REQUIRED) find_package(Qt5OpenGL REQUIRED) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${Qt5OpenGL_EXECUTABLE_COMPILE_FLAGS}") set(QT_LIBRARIES ${Qt5OpenGL_LIBRARIES}) else() # no valid Qt framework found message(FATAL_ERROR "Error: Did not find Qt4 or Qt5") endif() endif() add_compute_test("interop.qt" test_interop_qt.cpp) target_link_libraries(test_interop_qt ${QT_LIBRARIES}) # the opengl interop test depends on qt to create the opengl context add_compute_test("interop.opengl" test_interop_opengl.cpp) target_link_libraries(test_interop_opengl ${QT_LIBRARIES}) endif() # vtk interop tests if(${BOOST_COMPUTE_HAVE_VTK}) find_package(VTK REQUIRED) include(${VTK_USE_FILE}) add_compute_test("interop.vtk" test_interop_vtk.cpp) target_link_libraries(test_interop_vtk ${VTK_LIBRARIES}) endif() compute-0.5/test/extra/test_interop_eigen.cpp000066400000000000000000000067711263566244600215570ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestInteropEigen #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bcl = boost::compute; BOOST_AUTO_TEST_CASE(eigen) { Eigen::MatrixXf mat(3, 3); mat << 1, 2, 3, 6, 5, 4, 7, 8, 9; // copy matrix to gpu buffer bcl::vector vec(9, context); bcl::eigen_copy_matrix_to_buffer(mat, vec.begin(), queue); CHECK_RANGE_EQUAL(float, 9, vec, (1, 6, 7, 2, 5, 8, 3, 4, 9)); // transpose matrix and then copy to gpu buffer mat = mat.transpose().eval(); bcl::eigen_copy_matrix_to_buffer(mat, vec.begin(), queue); CHECK_RANGE_EQUAL(float, 9, vec, (1, 2, 3, 6, 5, 4, 7, 8, 9)); // set matrix to zero and copy data back from gpu buffer mat.setZero(); bcl::eigen_copy_buffer_to_matrix(vec.begin(), mat, queue); BOOST_CHECK(mat.isZero() == false); BOOST_CHECK_EQUAL(mat.sum(), 45); } BOOST_AUTO_TEST_CASE(eigen_types) { BOOST_CHECK(std::strcmp(bcl::type_name(), "int2") == 0); BOOST_CHECK(std::strcmp(bcl::type_name(), "float2") == 0); BOOST_CHECK(std::strcmp(bcl::type_name(), "float4") == 0); BOOST_CHECK(std::strcmp(bcl::type_name(), "double4") == 0); } BOOST_AUTO_TEST_CASE(multiply_matrix4) { std::vector host_vectors; std::vector host_matrices; Eigen::Matrix4f matrix; matrix << 1, 2, 0, 3, 2, 1, 2, 0, 0, 3, 1, 2, 2, 0, 2, 1; host_vectors.push_back(Eigen::Vector4f(1, 2, 3, 4)); host_vectors.push_back(Eigen::Vector4f(4, 3, 2, 1)); host_vectors.push_back(Eigen::Vector4f(1, 2, 3, 4)); host_vectors.push_back(Eigen::Vector4f(4, 3, 2, 1)); // store the eigen 4x4 matrix as a float16 bcl::float16_ M = bcl::eigen_matrix4f_to_float16(matrix); // returns the result of M*x BOOST_COMPUTE_CLOSURE(Eigen::Vector4f, transform4x4, (const Eigen::Vector4f x), (M), { float4 r; r.x = dot(M.s048c, x); r.y = dot(M.s159d, x); r.z = dot(M.s26ae, x); r.w = dot(M.s37bf, x); return r; }); bcl::vector vectors(4, context); bcl::vector results(4, context); bcl::copy(host_vectors.begin(), host_vectors.end(), vectors.begin(), queue); bcl::transform( vectors.begin(), vectors.end(), results.begin(), transform4x4, queue ); std::vector host_results(4); bcl::copy(results.begin(), results.end(), host_results.begin(), queue); BOOST_CHECK((matrix * host_vectors[0]) == host_results[0]); BOOST_CHECK((matrix * host_vectors[1]) == host_results[1]); BOOST_CHECK((matrix * host_vectors[2]) == host_results[2]); BOOST_CHECK((matrix * host_vectors[3]) == host_results[3]); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/extra/test_interop_opencv.cpp000066400000000000000000000062451263566244600217560ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestInteropOpenCV #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bcl = boost::compute; BOOST_AUTO_TEST_CASE(opencv_mat_to_buffer) { // create opencv mat cv::Mat mat(1, 4, CV_32F); mat.at(0, 0) = 0.0f; mat.at(0, 1) = 2.5f; mat.at(0, 2) = 4.1f; mat.at(0, 3) = 5.6f; // copy mat to gpu vector bcl::vector vector(4, context); bcl::opencv_copy_mat_to_buffer(mat, vector.begin(), queue); CHECK_RANGE_EQUAL(float, 4, vector, (0.0f, 2.5f, 4.1f, 5.6f)); // reverse gpu vector and copy back to mat bcl::reverse(vector.begin(), vector.end(), queue); bcl::opencv_copy_buffer_to_mat(vector.begin(), mat, queue); BOOST_CHECK_EQUAL(mat.at(0), 5.6f); BOOST_CHECK_EQUAL(mat.at(1), 4.1f); BOOST_CHECK_EQUAL(mat.at(2), 2.5f); BOOST_CHECK_EQUAL(mat.at(3), 0.0f); } BOOST_AUTO_TEST_CASE(opencv_image_format) { // 8-bit uchar BGRA BOOST_CHECK( bcl::opencv_get_mat_image_format(cv::Mat(32, 32, CV_8UC4)) == bcl::image_format(CL_BGRA, CL_UNORM_INT8) ); // 32-bit float BOOST_CHECK( bcl::opencv_get_mat_image_format(cv::Mat(32, 32, CV_32F)) == bcl::image_format(CL_INTENSITY, CL_FLOAT) ); // 32-bit float RGBA BOOST_CHECK( bcl::opencv_get_mat_image_format(cv::Mat(32, 32, CV_32FC4)) == bcl::image_format(CL_RGBA, CL_FLOAT) ); // 16-bit uchar BGRA BOOST_CHECK( bcl::opencv_get_mat_image_format(cv::Mat(32, 32, CV_16UC4)) == bcl::image_format(CL_BGRA, CL_UNORM_INT16) ); // 8-bit uchar BOOST_CHECK( bcl::opencv_get_mat_image_format(cv::Mat(32, 32, CV_8UC1)) == bcl::image_format(CL_INTENSITY, CL_UNORM_INT8) ); } BOOST_AUTO_TEST_CASE(opencv_float_mat_image2d) { REQUIRES_OPENCL_VERSION(1,2); cv::Vec4f pixel; // create opencv mat cv::Mat mat(2, 2, CV_32FC4, cv::Scalar(100, 150, 200, 255)); // transfer image to gpu bcl::image2d image = bcl::opencv_create_image2d_with_mat( mat, bcl::image2d::read_only, queue ); // copy the data back to cpu bcl::opencv_copy_image_to_mat(image, mat, queue); pixel = mat.at(1,1); BOOST_CHECK_EQUAL(pixel[0], 100.0f); BOOST_CHECK_EQUAL(pixel[1], 150.0f); BOOST_CHECK_EQUAL(pixel[2], 200.0f); BOOST_CHECK_EQUAL(pixel[3], 255.0f); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/extra/test_interop_opengl.cpp000066400000000000000000000011431263566244600217400ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestInteropOpenGL #include #include BOOST_AUTO_TEST_CASE(opengl_buffer) { } compute-0.5/test/extra/test_interop_qt.cpp000066400000000000000000000055311263566244600211050ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestInteropQt #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bcl = boost::compute; BOOST_AUTO_TEST_CASE(qimage_format) { BOOST_CHECK( bcl::qt_qimage_format_to_image_format(QImage::Format_RGB32) == bcl::image_format(CL_BGRA, CL_UNORM_INT8) ); } BOOST_AUTO_TEST_CASE(copy_qvector_to_device) { QList qvector; qvector.append(0); qvector.append(2); qvector.append(4); qvector.append(6); bcl::vector vector(4, context); bcl::copy(qvector.begin(), qvector.end(), vector.begin(), queue); CHECK_RANGE_EQUAL(int, 4, vector, (0, 2, 4, 6)); } BOOST_AUTO_TEST_CASE(copy_qlist_to_device) { QList list; list.append(1); list.append(3); list.append(5); list.append(7); bcl::vector vector(4, context); bcl::copy(list.begin(), list.end(), vector.begin(), queue); CHECK_RANGE_EQUAL(int, 4, vector, (1, 3, 5, 7)); } BOOST_AUTO_TEST_CASE(qvector_of_qpoint) { QVector qt_points; qt_points.append(QPoint(0, 1)); qt_points.append(QPoint(2, 3)); qt_points.append(QPoint(4, 5)); qt_points.append(QPoint(6, 7)); bcl::vector bcl_points(qt_points.size(), context); bcl::copy(qt_points.begin(), qt_points.end(), bcl_points.begin(), queue); } BOOST_AUTO_TEST_CASE(qvector_of_qpointf) { QVector qt_points; qt_points.append(QPointF(0.3f, 1.7f)); qt_points.append(QPointF(2.3f, 3.7f)); qt_points.append(QPointF(4.3f, 5.7f)); qt_points.append(QPointF(6.3f, 7.7f)); bcl::vector bcl_points(qt_points.size(), context); bcl::copy(qt_points.begin(), qt_points.end(), bcl_points.begin(), queue); } BOOST_AUTO_TEST_CASE(qvector_iterator) { using boost::compute::detail::is_contiguous_iterator; BOOST_STATIC_ASSERT(is_contiguous_iterator::iterator>::value == true); BOOST_STATIC_ASSERT(is_contiguous_iterator::const_iterator>::value == true); BOOST_STATIC_ASSERT(is_contiguous_iterator::iterator>::value == false); BOOST_STATIC_ASSERT(is_contiguous_iterator::const_iterator>::value == false); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/extra/test_interop_vtk.cpp000066400000000000000000000076521263566244600212730ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestInteropVTK #include #include #include #include #include #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(bounds) { using compute::float4_; // create vtk points vtkNew points; points->InsertNextPoint(0.0, 0.0, 0.0); points->InsertNextPoint(1.0, 2.0, 1.0); points->InsertNextPoint(-1.0, -3.0, -1.0); points->InsertNextPoint(0.5, 2.5, 1.5); // copy points to vector on gpu compute::vector vector(points->GetNumberOfPoints(), context); compute::vtk_copy_points_to_buffer(points.GetPointer(), vector.begin(), queue); // compute bounds double bounds[6]; compute::vtk_compute_bounds(vector.begin(), vector.end(), bounds, queue); // check bounds BOOST_CHECK_CLOSE(bounds[0], -1.0, 1e-8); BOOST_CHECK_CLOSE(bounds[1], 1.0, 1e-8); BOOST_CHECK_CLOSE(bounds[2], -3.0, 1e-8); BOOST_CHECK_CLOSE(bounds[3], 2.5, 1e-8); BOOST_CHECK_CLOSE(bounds[4], -1.0, 1e-8); BOOST_CHECK_CLOSE(bounds[5], 1.5, 1e-8); } BOOST_AUTO_TEST_CASE(copy_uchar_array) { // create vtk uchar vector containing 3 RGBA colors vtkNew array; array->SetNumberOfComponents(4); unsigned char red[4] = { 255, 0, 0, 255 }; array->InsertNextTupleValue(red); unsigned char green[4] = { 0, 255, 0, 255 }; array->InsertNextTupleValue(green); unsigned char blue[4] = { 0, 0, 255, 255 }; array->InsertNextTupleValue(blue); // create vector on device and copy values from vtk array compute::vector vector(3, context); compute::vtk_copy_data_array_to_buffer( array.GetPointer(), compute::make_buffer_iterator(vector.get_buffer(), 0), queue ); // check values std::vector host_vector(3); compute::copy( vector.begin(), vector.end(), host_vector.begin(), queue ); BOOST_CHECK(host_vector[0] == compute::uchar4_(255, 0, 0, 255)); BOOST_CHECK(host_vector[1] == compute::uchar4_(0, 255, 0, 255)); BOOST_CHECK(host_vector[2] == compute::uchar4_(0, 0, 255, 255)); } BOOST_AUTO_TEST_CASE(sort_float_array) { // create vtk float array vtkNew array; array->InsertNextValue(2.5f); array->InsertNextValue(1.0f); array->InsertNextValue(6.5f); array->InsertNextValue(4.0f); // create vector on device and copy values from vtk array compute::vector vector(4, context); compute::vtk_copy_data_array_to_buffer(array.GetPointer(), vector.begin(), queue); // sort values on the gpu compute::sort(vector.begin(), vector.end(), queue); CHECK_RANGE_EQUAL(float, 4, vector, (1.0f, 2.5f, 4.0f, 6.5f)); // copy sorted values back to the vtk array compute::vtk_copy_buffer_to_data_array( vector.begin(), vector.end(), array.GetPointer(), queue ); BOOST_CHECK_EQUAL(array->GetValue(0), 1.0f); BOOST_CHECK_EQUAL(array->GetValue(1), 2.5f); BOOST_CHECK_EQUAL(array->GetValue(2), 4.0f); BOOST_CHECK_EQUAL(array->GetValue(3), 6.5f); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/extra/test_multiple_objects1.cpp000066400000000000000000000012721263566244600223440ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestMultipleObjects #include #include bool dummy_function(); BOOST_AUTO_TEST_CASE(multiple_objects) { // It is enough if the test compiles. BOOST_CHECK( dummy_function() ); } compute-0.5/test/extra/test_multiple_objects2.cpp000066400000000000000000000010011263566244600223330ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include bool dummy_function() { return true; } compute-0.5/test/opencl_version_check.hpp000066400000000000000000000011711263566244600207220ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Denis Demidov // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TEST_OPENCL_VERSION_CHECK_HPP #define BOOST_COMPUTE_TEST_OPENCL_VERSION_CHECK_HPP #define REQUIRES_OPENCL_VERSION(major, minor) \ if (!device.check_version(major, minor)) return #endif compute-0.5/test/quirks.hpp000066400000000000000000000040271263566244600160610ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_TEST_QUIRKS_HPP #define BOOST_COMPUTE_TEST_QUIRKS_HPP #include #include #include // this file contains functions which check for 'quirks' or buggy // behavior in OpenCL implementations. this allows us to skip certain // tests when running on buggy platforms. // returns true if the device is a POCL device inline bool is_pocl_device(const boost::compute::device &device) { return device.platform().name() == "Portable Computing Language"; } // AMD platforms have a bug when using struct assignment. this affects // algorithms like fill() when used with pairs/tuples. // // see: https://community.amd.com/thread/166622 inline bool bug_in_struct_assignment(const boost::compute::device &device) { return boost::compute::detail::is_amd_device(device); } // clEnqueueSVMMemcpy() operation does not work on AMD devices. This affects // copy() algorithm. // // see: https://community.amd.com/thread/190585 inline bool bug_in_svmmemcpy(const boost::compute::device &device) { return boost::compute::detail::is_amd_device(device); } // returns true if the device supports image samplers. inline bool supports_image_samplers(const boost::compute::device &device) { // POCL does not yet support image samplers and gives the following // error when attempting to create one: // // pocl error: encountered unimplemented part of the OpenCL specs // in clCreateSampler.c:28 if(is_pocl_device(device)){ return false; } return true; } #endif // BOOST_COMPUTE_TEST_QUIRKS_HPP compute-0.5/test/test_accumulate.cpp000066400000000000000000000202201263566244600177110ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestAccumulate #include #include #include #include #include #include #include #include #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(sum_int) { int data[] = { 2, 4, 6, 8 }; boost::compute::vector vector(data, data + 4, queue); BOOST_CHECK_EQUAL( boost::compute::accumulate(vector.begin(), vector.end(), 0, queue), 20 ); BOOST_CHECK_EQUAL( boost::compute::accumulate(vector.begin(), vector.end(), -10, queue), 10 ); BOOST_CHECK_EQUAL( boost::compute::accumulate(vector.begin(), vector.end(), 5, queue), 25 ); } BOOST_AUTO_TEST_CASE(product_int) { int data[] = { 2, 4, 6, 8 }; boost::compute::vector vector(data, data + 4, queue); BOOST_CHECK_EQUAL( boost::compute::accumulate( vector.begin(), vector.end(), 1, boost::compute::multiplies(), queue), 384 ); BOOST_CHECK_EQUAL( boost::compute::accumulate( vector.begin(), vector.end(), -1, boost::compute::multiplies(), queue), -384 ); BOOST_CHECK_EQUAL( boost::compute::accumulate( vector.begin(), vector.end(), 2, boost::compute::multiplies(), queue), 768 ); } BOOST_AUTO_TEST_CASE(quotient_int) { int data[] = { 2, 8, 16 }; boost::compute::vector vector(data, data + 3, queue); BOOST_CHECK_EQUAL( boost::compute::accumulate( vector.begin(), vector.end(), 1024, boost::compute::divides(), queue ), 4 ); } BOOST_AUTO_TEST_CASE(sum_counting_iterator) { // sum 0 -> 9 BOOST_CHECK_EQUAL( boost::compute::accumulate( boost::compute::make_counting_iterator(0), boost::compute::make_counting_iterator(10), 0, boost::compute::plus(), queue ), 45 ); // sum 0 -> 9 + 7 BOOST_CHECK_EQUAL( boost::compute::accumulate( boost::compute::make_counting_iterator(0), boost::compute::make_counting_iterator(10), 7, boost::compute::plus(), queue ), 52 ); // sum 15 -> 24 BOOST_CHECK_EQUAL( boost::compute::accumulate( boost::compute::make_counting_iterator(15), boost::compute::make_counting_iterator(25), 0, boost::compute::plus(), queue ), 195 ); // sum -5 -> 10 BOOST_CHECK_EQUAL( boost::compute::accumulate( boost::compute::make_counting_iterator(-5), boost::compute::make_counting_iterator(10), 0, boost::compute::plus(), queue ), 30 ); // sum -5 -> 10 - 2 BOOST_CHECK_EQUAL( boost::compute::accumulate( boost::compute::make_counting_iterator(-5), boost::compute::make_counting_iterator(10), -2, boost::compute::plus(), queue ), 28 ); } BOOST_AUTO_TEST_CASE(sum_iota) { // size 0 boost::compute::vector vector(0, context); BOOST_CHECK_EQUAL( boost::compute::accumulate(vector.begin(), vector.end(), 0, queue), 0 ); BOOST_CHECK_EQUAL( boost::compute::accumulate(vector.begin(), vector.end(), 4, queue), 4 ); // size 50 vector.resize(50); boost::compute::iota(vector.begin(), vector.end(), 0, queue); BOOST_CHECK_EQUAL( boost::compute::accumulate(vector.begin(), vector.end(), 0, queue), 1225 ); BOOST_CHECK_EQUAL( boost::compute::accumulate(vector.begin(), vector.end(), 11, queue), 1236 ); // size 1000 vector.resize(1000); boost::compute::iota(vector.begin(), vector.end(), 0, queue); BOOST_CHECK_EQUAL( boost::compute::accumulate(vector.begin(), vector.end(), 0, queue), 499500 ); BOOST_CHECK_EQUAL( boost::compute::accumulate(vector.begin(), vector.end(), -45, queue), 499455 ); // size 1025 vector.resize(1025); boost::compute::iota(vector.begin(), vector.end(), 0, queue); BOOST_CHECK_EQUAL( boost::compute::accumulate(vector.begin(), vector.end(), 0, queue), 524800 ); BOOST_CHECK_EQUAL( boost::compute::accumulate(vector.begin(), vector.end(), 2, queue), 524802 ); } BOOST_AUTO_TEST_CASE(min_and_max) { using boost::compute::int2_; int data[] = { 5, 3, 1, 6, 4, 2 }; boost::compute::vector vector(data, data + 6, queue); BOOST_COMPUTE_FUNCTION(int2_, min_and_max, (int2_ accumulator, const int value), { return (int2)((min)(accumulator.x, value), (max)(accumulator.y, value)); }); int2_ result = boost::compute::accumulate( vector.begin(), vector.end(), int2_(100, -100), min_and_max, queue ); BOOST_CHECK_EQUAL(result[0], 1); BOOST_CHECK_EQUAL(result[1], 6); } BOOST_AUTO_TEST_CASE(min_max) { float data[] = { 1.2f, 5.5f, 0.1f, 9.6f, 4.2f, 6.7f, 9.0f, 3.4f }; boost::compute::vector vec(data, data + 8, queue); using ::boost::compute::min; using ::boost::compute::max; float min_value = boost::compute::accumulate( vec.begin(), vec.end(), (std::numeric_limits::max)(), min(), queue ); BOOST_CHECK_EQUAL(min_value, 0.1f); float max_value = boost::compute::accumulate( vec.begin(), vec.end(), (std::numeric_limits::min)(), max(), queue ); BOOST_CHECK_EQUAL(max_value, 9.6f); // find min with init less than any value in the array min_value = boost::compute::accumulate( vec.begin(), vec.end(), -1.f, min(), queue ); BOOST_CHECK_EQUAL(min_value, -1.f); // find max with init greater than any value in the array max_value = boost::compute::accumulate( vec.begin(), vec.end(), 10.f, max(), queue ); BOOST_CHECK_EQUAL(max_value, 10.f); } template void ensure_std_accumulate_equality(const std::vector &data, boost::compute::command_queue &queue) { boost::compute::mapped_view view(&data[0], data.size(), queue.get_context()); BOOST_CHECK_EQUAL( std::accumulate(data.begin(), data.end(), 0), boost::compute::accumulate(view.begin(), view.end(), 0, queue) ); } BOOST_AUTO_TEST_CASE(std_accumulate_equality) { // test accumulate() with int int data1[] = { 1, 2, 3, 4 }; std::vector vec1(data1, data1 + 4); ensure_std_accumulate_equality(vec1, queue); vec1.resize(10000); std::fill(vec1.begin(), vec1.end(), 2); ensure_std_accumulate_equality(vec1, queue); // test accumulate() with float float data2[] = { 1.2f, 2.3f, 4.5f, 6.7f, 8.9f }; std::vector vec2(data2, data2 + 5); ensure_std_accumulate_equality(vec2, queue); vec2.resize(10000); std::fill(vec2.begin(), vec2.end(), 1.01f); ensure_std_accumulate_equality(vec2, queue); // test accumulate() with double if(device.supports_extension("cl_khr_fp64")){ double data3[] = { 1.2, 2.3, 4.5, 6.7, 8.9 }; std::vector vec3(data3, data3 + 5); ensure_std_accumulate_equality(vec3, queue); vec3.resize(10000); std::fill(vec3.begin(), vec3.end(), 2.02); ensure_std_accumulate_equality(vec3, queue); } } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_adjacent_difference.cpp000066400000000000000000000044411263566244600215200ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestAdjacentDifference #include #include #include #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(adjacent_difference_int) { compute::vector a(5, context); compute::iota(a.begin(), a.end(), 0, queue); CHECK_RANGE_EQUAL(int, 5, a, (0, 1, 2, 3, 4)); compute::vector b(5, context); compute::vector::iterator iter = compute::adjacent_difference(a.begin(), a.end(), b.begin(), queue); BOOST_CHECK(iter == b.end()); CHECK_RANGE_EQUAL(int, 5, b, (0, 1, 1, 1, 1)); int data[] = { 1, 9, 36, 48, 81 }; compute::copy(data, data + 5, a.begin(), queue); CHECK_RANGE_EQUAL(int, 5, a, (1, 9, 36, 48, 81)); iter = compute::adjacent_difference(a.begin(), a.end(), b.begin(), queue); BOOST_CHECK(iter == b.end()); CHECK_RANGE_EQUAL(int, 5, b, (1, 8, 27, 12, 33)); } BOOST_AUTO_TEST_CASE(all_same) { compute::vector input(1000, context); compute::fill(input.begin(), input.end(), 42, queue); compute::vector output(input.size(), context); compute::adjacent_difference( input.begin(), input.end(), output.begin(), queue ); int first; compute::copy_n(output.begin(), 1, &first, queue); BOOST_CHECK_EQUAL(first, 42); using compute::lambda::_1; BOOST_CHECK( compute::all_of(output.begin() + 1, output.end(), _1 == 0, queue) ); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_adjacent_find.cpp000066400000000000000000000040461263566244600203470ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestAdjacentFind #include #include #include #include #include #include #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(adjacent_find_int) { int data[] = { 1, 3, 5, 5, 6, 7, 7, 8 }; compute::vector vec(data, data + 8, queue); compute::vector::iterator iter = compute::adjacent_find(vec.begin(), vec.end(), queue); BOOST_CHECK(iter == vec.begin() + 2); } BOOST_AUTO_TEST_CASE(adjacent_find_int2) { using compute::int2_; compute::vector vec(context); vec.push_back(int2_(1, 2), queue); vec.push_back(int2_(3, 4), queue); vec.push_back(int2_(5, 6), queue); vec.push_back(int2_(7, 8), queue); vec.push_back(int2_(7, 8), queue); compute::vector::iterator iter = compute::adjacent_find(vec.begin(), vec.end(), queue); BOOST_CHECK(iter == vec.begin() + 3); } BOOST_AUTO_TEST_CASE(adjacent_find_iota) { compute::vector vec(2048, context); compute::iota(vec.begin(), vec.end(), 1, queue); BOOST_VERIFY( compute::adjacent_find(vec.begin(), vec.end(), queue) == vec.end() ); } BOOST_AUTO_TEST_CASE(adjacent_find_fill) { compute::vector vec(2048, context); compute::fill(vec.begin(), vec.end(), 7, queue); BOOST_VERIFY( compute::adjacent_find(vec.begin(), vec.end(), queue) == vec.begin() ); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_amd_cpp_kernel_language.cpp000066400000000000000000000040741263566244600224050ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestAmdCppKernel #include #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(amd_template_function) { if(!compute::detail::is_amd_device(device)){ std::cerr << "skipping amd_template_function test: c++ static kernel " "language is only supported on AMD devices." << std::endl; return; } const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( template inline T square(const T x) { return x * x; } template __kernel void square_kernel(__global T *data) { const uint i = get_global_id(0); data[i] = square(data[i]); } template __attribute__((mangled_name(square_kernel_int))) __kernel void square_kernel(__global int *data); ); int data[] = { 1, 2, 3, 4 }; compute::vector vec(data, data + 4, queue); compute::program square_program = compute::program::build_with_source(source, context, "-x clc++"); compute::kernel square_kernel(square_program, "square_kernel_int"); square_kernel.set_arg(0, vec); queue.enqueue_1d_range_kernel(square_kernel, 0, vec.size(), 4); CHECK_RANGE_EQUAL(int, 4, vec, (1, 4, 9, 16)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_any_all_none_of.cpp000066400000000000000000000062411263566244600207170ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestAnyAllNoneOf #include #include #include #include #include #include #include #include #include "context_setup.hpp" namespace bc = boost::compute; namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(any_all_none_of) { int data[] = { 1, 2, 3, 4, 5, 6 }; bc::vector v(data, data + 6, queue); using ::boost::compute::_1; BOOST_CHECK(bc::any_of(v.begin(), v.end(), _1 == 6) == true); BOOST_CHECK(bc::any_of(v.begin(), v.end(), _1 == 9) == false); BOOST_CHECK(bc::none_of(v.begin(), v.end(), _1 == 6) == false); BOOST_CHECK(bc::none_of(v.begin(), v.end(), _1 == 9) == true); BOOST_CHECK(bc::all_of(v.begin(), v.end(), _1 == 6) == false); BOOST_CHECK(bc::all_of(v.begin(), v.end(), _1 < 9) == true); BOOST_CHECK(bc::all_of(v.begin(), v.end(), _1 < 6) == false); BOOST_CHECK(bc::all_of(v.begin(), v.end(), _1 >= 1) == true); } BOOST_AUTO_TEST_CASE(any_nan_inf) { using ::boost::compute::_1; using ::boost::compute::lambda::isinf; using ::boost::compute::lambda::isnan; using ::boost::compute::lambda::isfinite; float nan = std::sqrt(-1.f); float inf = std::numeric_limits::infinity(); float data[] = { 1.2f, 2.3f, nan, nan, 3.4f, inf, 4.5f, inf }; compute::vector vector(data, data + 8, queue); BOOST_CHECK(compute::any_of(vector.begin(), vector.end(), isinf(_1) || isnan(_1), queue) == true); BOOST_CHECK(compute::any_of(vector.begin(), vector.end(), isfinite(_1), queue) == true); BOOST_CHECK(compute::all_of(vector.begin(), vector.end(), isfinite(_1), queue) == false); BOOST_CHECK(compute::all_of(vector.begin(), vector.begin() + 2, isfinite(_1), queue) == true); BOOST_CHECK(compute::all_of(vector.begin() + 2, vector.begin() + 4, isnan(_1), queue) == true); BOOST_CHECK(compute::none_of(vector.begin(), vector.end(), isinf(_1), queue) == false); BOOST_CHECK(compute::none_of(vector.begin(), vector.begin() + 4, isinf(_1), queue) == true); } BOOST_AUTO_TEST_CASE(any_of_doctest) { using boost::compute::lambda::_1; int data[] = { 1, 2, 3, 4 }; boost::compute::vector v(data, data + 4, queue); bool result = //! [any_of] boost::compute::any_of(v.begin(), v.end(), _1 < 0, queue); //! [any_of] BOOST_CHECK(result == false); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_array.cpp000066400000000000000000000052361263566244600167160ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestArray #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(concept_check) { BOOST_CONCEPT_ASSERT((boost::Container >)); // BOOST_CONCEPT_ASSERT((boost::SequenceConcept >)); BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator::iterator>)); BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator::const_iterator>)); } BOOST_AUTO_TEST_CASE(size) { boost::compute::array empty_array(context); BOOST_CHECK_EQUAL(empty_array.size(), size_t(0)); boost::compute::array array10(context); BOOST_CHECK_EQUAL(array10.size(), size_t(10)); } BOOST_AUTO_TEST_CASE(at) { boost::compute::array array; array[0] = 3; array[1] = -2; array[2] = 5; BOOST_CHECK_EQUAL(array.at(0), 3); BOOST_CHECK_EQUAL(array.at(1), -2); BOOST_CHECK_EQUAL(array.at(2), 5); BOOST_CHECK_THROW(array.at(3), std::out_of_range); } BOOST_AUTO_TEST_CASE(copy_from_vector) { int data[] = { 3, 6, 9, 12 }; boost::compute::vector vector(data, data + 4, queue); boost::compute::array array(context); boost::compute::copy(vector.begin(), vector.end(), array.begin(), queue); CHECK_RANGE_EQUAL(int, 4, array, (3, 6, 9, 12)); } BOOST_AUTO_TEST_CASE(fill) { boost::compute::array array(context); array.fill(0); CHECK_RANGE_EQUAL(int, 4, array, (0, 0, 0, 0)); array.fill(17); CHECK_RANGE_EQUAL(int, 4, array, (17, 17, 17, 17)); } BOOST_AUTO_TEST_CASE(swap) { int data[] = { 1, 2, 6, 9 }; boost::compute::array a(context); boost::compute::copy(data, data + 4, a.begin(), queue); CHECK_RANGE_EQUAL(int, 4, a, (1, 2, 6, 9)); boost::compute::array b(context); b.fill(3); CHECK_RANGE_EQUAL(int, 4, b, (3, 3, 3, 3)); a.swap(b); CHECK_RANGE_EQUAL(int, 4, a, (3, 3, 3, 3)); CHECK_RANGE_EQUAL(int, 4, b, (1, 2, 6, 9)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_async_wait.cpp000066400000000000000000000044161263566244600177400ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestAsyncWait #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(empty) { } #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES BOOST_AUTO_TEST_CASE(wait_for_copy) { // wait list compute::wait_list events; // create host data array int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; // create vector on the device compute::vector vector(8, context); // fill vector with 9's compute::future fill_future = compute::fill_async(vector.begin(), vector.end(), 9, queue); // wait for fill() to complete compute::wait_for_all(fill_future); // check data on the device CHECK_RANGE_EQUAL(int, 8, vector, (9, 9, 9, 9, 9, 9, 9, 9)); // copy each pair of values independently and asynchronously compute::event copy1 = queue.enqueue_write_buffer_async( vector.get_buffer(), 0 * sizeof(int), 2 * sizeof(int), data + 0 ); compute::event copy2 = queue.enqueue_write_buffer_async( vector.get_buffer(), 2 * sizeof(int), 2 * sizeof(int), data + 2 ); compute::event copy3 = queue.enqueue_write_buffer_async( vector.get_buffer(), 4 * sizeof(int), 2 * sizeof(int), data + 4 ); compute::event copy4 = queue.enqueue_write_buffer_async( vector.get_buffer(), 6 * sizeof(int), 2 * sizeof(int), data + 6 ); // wait for all copies to complete compute::wait_for_all(copy1, copy2, copy3, copy4); // check data on the device CHECK_RANGE_EQUAL(int, 8, vector, (1, 2, 3, 4, 5, 6, 7, 8)); } #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_async_wait_guard.cpp000066400000000000000000000023611263566244600211170ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestAsyncWaitGuard #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(wait_for_fill) { compute::vector vector(8192, context); compute::event fill_event = compute::fill_async(vector.begin(), vector.end(), 9, queue).get_event(); BOOST_CHECK(fill_event.status() != CL_COMPLETE); { compute::wait_guard fill_guard(fill_event); } BOOST_CHECK(fill_event.status() == CL_COMPLETE); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_bernoulli_distribution.cpp000066400000000000000000000024661263566244600223740ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestBernoulliDistribution #include #include #include #include #include #include #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(bernoulli_distribution_doctest) { boost::compute::vector vec(10, context); //! [generate] // initialize the default random engine boost::compute::default_random_engine engine(queue); // setup the bernoulli distribution to produce booleans // with parameter p = 0.25 boost::compute::bernoulli_distribution distribution(0.25f); // generate the random values and store them to 'vec' distribution.generate(vec.begin(), vec.end(), engine, queue); //! [generate] } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_binary_search.cpp000066400000000000000000000123161263566244600204060ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestBinarySearch #include #include #include #include #include #include #include #include #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(binary_search_int) { // test data = { 1, ..., 2, ..., 4, 4, 5, 7, ..., 9, ..., 10 } boost::compute::vector vector(size_t(4096), int(1), queue); boost::compute::vector::iterator first = vector.begin() + 128; boost::compute::vector::iterator last = first + (1024 - 128); boost::compute::fill(first, last, int(2), queue); last.write(4, queue); last++; last.write(4, queue); last++; last.write(5, queue); last++; first = last; last = first + 127; boost::compute::fill(first, last, 7, queue); first = last; last = vector.end() - 1; boost::compute::fill(first, last, 9, queue); last.write(10, queue); queue.finish(); BOOST_CHECK(boost::compute::binary_search(vector.begin(), vector.end(), int(0), queue) == false); BOOST_CHECK(boost::compute::binary_search(vector.begin(), vector.end(), int(1), queue) == true); BOOST_CHECK(boost::compute::binary_search(vector.begin(), vector.end(), int(2), queue) == true); BOOST_CHECK(boost::compute::binary_search(vector.begin(), vector.end(), int(3), queue) == false); BOOST_CHECK(boost::compute::binary_search(vector.begin(), vector.end(), int(4), queue) == true); BOOST_CHECK(boost::compute::binary_search(vector.begin(), vector.end(), int(5), queue) == true); BOOST_CHECK(boost::compute::binary_search(vector.begin(), vector.end(), int(6), queue) == false); BOOST_CHECK(boost::compute::binary_search(vector.begin(), vector.end(), int(7), queue) == true); BOOST_CHECK(boost::compute::binary_search(vector.begin(), vector.end(), int(8), queue) == false); } BOOST_AUTO_TEST_CASE(range_bounds_int) { // test data = { 1, ..., 2, ..., 4, 4, 5, 7, ..., 9, ..., 10 } boost::compute::vector vector(size_t(4096), int(1), queue); boost::compute::vector::iterator first = vector.begin() + 128; boost::compute::vector::iterator last = first + (1024 - 128); boost::compute::fill(first, last, int(2), queue); last.write(4, queue); last++; // 1024 last.write(4, queue); last++; // 1025 last.write(5, queue); last++; // 1026 first = last; last = first + 127; boost::compute::fill(first, last, 7, queue); first = last; last = vector.end() - 1; boost::compute::fill(first, last, 9, queue); last.write(10, queue); queue.finish(); BOOST_CHECK(boost::compute::lower_bound(vector.begin(), vector.end(), int(0), queue) == vector.begin()); BOOST_CHECK(boost::compute::upper_bound(vector.begin(), vector.end(), int(0), queue) == vector.begin()); BOOST_CHECK(boost::compute::lower_bound(vector.begin(), vector.end(), int(1), queue) == vector.begin()); BOOST_CHECK(boost::compute::upper_bound(vector.begin(), vector.end(), int(1), queue) == vector.begin() + 128); BOOST_CHECK(boost::compute::lower_bound(vector.begin(), vector.end(), int(2), queue) == vector.begin() + 128); BOOST_CHECK(boost::compute::upper_bound(vector.begin(), vector.end(), int(2), queue) == vector.begin() + 1024); BOOST_CHECK(boost::compute::lower_bound(vector.begin(), vector.end(), int(4), queue) == vector.begin() + 1024); BOOST_CHECK(boost::compute::upper_bound(vector.begin(), vector.end(), int(4), queue) == vector.begin() + 1026); BOOST_CHECK(boost::compute::lower_bound(vector.begin(), vector.end(), int(5), queue) == vector.begin() + 1026); BOOST_CHECK(boost::compute::upper_bound(vector.begin(), vector.end(), int(5), queue) == vector.begin() + 1027); BOOST_CHECK(boost::compute::lower_bound(vector.begin(), vector.end(), int(6), queue) == vector.begin() + 1027); BOOST_CHECK(boost::compute::upper_bound(vector.begin(), vector.end(), int(6), queue) == vector.begin() + 1027); BOOST_CHECK(boost::compute::lower_bound(vector.begin(), vector.end(), int(7), queue) == vector.begin() + 1027); BOOST_CHECK(boost::compute::upper_bound(vector.begin(), vector.end(), int(7), queue) == vector.begin() + (1027 + 127)); BOOST_CHECK(boost::compute::lower_bound(vector.begin(), vector.end(), int(9), queue) == vector.begin() + (1027 + 127)); BOOST_CHECK(boost::compute::upper_bound(vector.begin(), vector.end(), int(9), queue) == vector.end() - 1); BOOST_CHECK(boost::compute::lower_bound(vector.begin(), vector.end(), int(10), queue) == vector.end() - 1); BOOST_CHECK(boost::compute::upper_bound(vector.begin(), vector.end(), int(10), queue) == vector.end()); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_buffer.cpp000066400000000000000000000121721263566244600170460ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestBuffer #include #include #include #include #ifdef BOOST_COMPUTE_USE_CPP11 #include #include #endif // BOOST_COMPUTE_USE_CPP11 #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(size) { bc::buffer buffer(context, 100); BOOST_CHECK_EQUAL(buffer.size(), size_t(100)); BOOST_VERIFY(buffer.max_size() > buffer.size()); } BOOST_AUTO_TEST_CASE(cl_context) { bc::buffer buffer(context, 100); BOOST_VERIFY(buffer.get_context() == context); } BOOST_AUTO_TEST_CASE(equality_operator) { bc::buffer a(context, 10); bc::buffer b(context, 10); BOOST_VERIFY(a == a); BOOST_VERIFY(b == b); BOOST_VERIFY(!(a == b)); BOOST_VERIFY(a != b); a = b; BOOST_VERIFY(a == b); BOOST_VERIFY(!(a != b)); } BOOST_AUTO_TEST_CASE(construct_from_cl_mem) { // create cl_mem cl_mem mem = clCreateBuffer(context, CL_MEM_READ_WRITE, 16, 0, 0); BOOST_VERIFY(mem); // create boost::compute::buffer boost::compute::buffer buffer(mem); // check buffer BOOST_CHECK(buffer.get() == mem); BOOST_CHECK(buffer.get_context() == context); BOOST_CHECK_EQUAL(buffer.size(), size_t(16)); // cleanup cl_mem clReleaseMemObject(mem); } BOOST_AUTO_TEST_CASE(reference_count) { using boost::compute::uint_; boost::compute::buffer buf(context, 16); BOOST_CHECK_GE(buf.reference_count(), uint_(1)); } BOOST_AUTO_TEST_CASE(get_size) { boost::compute::buffer buf(context, 16); BOOST_CHECK_EQUAL(buf.size(), size_t(16)); BOOST_CHECK_EQUAL(buf.get_info(), size_t(16)); BOOST_CHECK_EQUAL(buf.get_info(CL_MEM_SIZE), size_t(16)); } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES BOOST_AUTO_TEST_CASE(move_constructor) { boost::compute::buffer buffer1(context, 16); BOOST_CHECK(buffer1.get() != 0); BOOST_CHECK_EQUAL(buffer1.size(), size_t(16)); boost::compute::buffer buffer2(std::move(buffer1)); BOOST_CHECK(buffer1.get() == 0); BOOST_CHECK(buffer2.get() != 0); BOOST_CHECK_EQUAL(buffer2.size(), size_t(16)); } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES BOOST_AUTO_TEST_CASE(clone_buffer) { boost::compute::buffer buffer1(context, 16); boost::compute::buffer buffer2 = buffer1.clone(queue); BOOST_CHECK(buffer1.get() != buffer2.get()); BOOST_CHECK_EQUAL(buffer1.size(), buffer2.size()); BOOST_CHECK(buffer1.get_memory_flags() == buffer2.get_memory_flags()); } #ifdef CL_VERSION_1_1 static void BOOST_COMPUTE_CL_CALLBACK destructor_callback_function(cl_mem memobj, void *user_data) { (void) memobj; bool *flag = static_cast(user_data); *flag = true; } BOOST_AUTO_TEST_CASE(destructor_callback) { REQUIRES_OPENCL_VERSION(1,2); bool invoked = false; { boost::compute::buffer buf(context, 128); buf.set_destructor_callback(destructor_callback_function, &invoked); } BOOST_CHECK(invoked == true); } #ifdef BOOST_COMPUTE_USE_CPP11 std::mutex callback_mutex; std::condition_variable callback_condition_variable; static void BOOST_COMPUTE_CL_CALLBACK destructor_templated_callback_function(bool *flag) { std::lock_guard lock(callback_mutex); *flag = true; callback_condition_variable.notify_one(); } BOOST_AUTO_TEST_CASE(destructor_templated_callback) { bool invoked = false; { boost::compute::buffer buf(context, 128); buf.set_destructor_callback(boost::bind(destructor_templated_callback_function, &invoked)); } std::unique_lock lock(callback_mutex); callback_condition_variable.wait_for( lock, std::chrono::seconds(1), [&](){ return invoked; } ); BOOST_CHECK(invoked == true); } #endif // BOOST_COMPUTE_USE_CPP11 BOOST_AUTO_TEST_CASE(create_subbuffer) { REQUIRES_OPENCL_VERSION(1, 1); size_t base_addr_align = device.get_info() / 8; size_t multiplier = 16; size_t buffer_size = base_addr_align * multiplier; size_t subbuffer_size = 64; boost::compute::buffer buffer(context, buffer_size); for(size_t i = 0; i < multiplier; ++i) { boost::compute::buffer subbuffer = buffer.create_subbuffer( boost::compute::buffer::read_write, base_addr_align * i, subbuffer_size); BOOST_CHECK(buffer.get() != subbuffer.get()); BOOST_CHECK_EQUAL(subbuffer.size(), subbuffer_size); } } #endif // CL_VERSION_1_1 BOOST_AUTO_TEST_CASE(create_buffer_doctest) { //! [constructor] boost::compute::buffer buf(context, 32 * sizeof(float)); //! [constructor] BOOST_CHECK_EQUAL(buf.size(), 32 * sizeof(float)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_buffer_allocator.cpp000066400000000000000000000016121263566244600211030ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestBufferAllocator #include #include #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(allocate) { compute::buffer_allocator allocator(context); typedef compute::buffer_allocator::pointer pointer; pointer x = allocator.allocate(10); allocator.deallocate(x, 10); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_buffer_iterator.cpp000066400000000000000000000037311263566244600207600ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestBufferIterator #include #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(value_type) { BOOST_STATIC_ASSERT(( boost::is_same< compute::buffer_iterator::value_type, int >::value )); BOOST_STATIC_ASSERT(( boost::is_same< compute::buffer_iterator::value_type, float >::value )); } BOOST_AUTO_TEST_CASE(reverse_external_buffer_doctest) { int data[] = { 1, 2, 3 }; compute::buffer buffer(context, 3 * sizeof(int)); queue.enqueue_write_buffer(buffer, 0, 3 * sizeof(int), data); cl_mem external_mem_obj = buffer.get(); //! [reverse_external_buffer] // create a buffer object wrapping the cl_mem object boost::compute::buffer buf(external_mem_obj); // reverse the values in the buffer boost::compute::reverse( boost::compute::make_buffer_iterator(buf, 0), boost::compute::make_buffer_iterator(buf, 3), queue ); //! [reverse_external_buffer] queue.enqueue_read_buffer(buf, 0, 3 * sizeof(int), data); BOOST_CHECK_EQUAL(data[0], 3); BOOST_CHECK_EQUAL(data[1], 2); BOOST_CHECK_EQUAL(data[2], 1); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_clamp_range.cpp000066400000000000000000000023621263566244600200450ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestClampRange #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(clamp_float_range) { float data[] = { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f }; compute::vector input(data, data + 8, queue); compute::vector result(8, context); compute::experimental::clamp_range( input.begin(), input.end(), result.begin(), 3.f, // low 6.f, // high queue ); CHECK_RANGE_EQUAL( float, 8, result, (3.f, 3.f, 3.f, 4.f, 5.f, 6.f, 6.f, 6.f) ); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_closure.cpp000066400000000000000000000142071263566244600172520ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestClosure #include #include #include #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(add_two) { int two = 2; BOOST_COMPUTE_CLOSURE(int, add_two, (int x), (two), { return x + two; }); int data[] = { 1, 2, 3, 4 }; compute::vector vector(data, data + 4, queue); compute::transform( vector.begin(), vector.end(), vector.begin(), add_two, queue ); CHECK_RANGE_EQUAL(int, 4, vector, (3, 4, 5, 6)); } BOOST_AUTO_TEST_CASE(add_two_and_pi) { int two = 2; float pi = 3.14f; BOOST_COMPUTE_CLOSURE(float, add_two_and_pi, (float x), (two, pi), { return x + two + pi; }); float data[] = { 1.9f, 2.2f, 3.4f, 4.7f }; compute::vector vector(data, data + 4, queue); compute::transform( vector.begin(), vector.end(), vector.begin(), add_two_and_pi, queue ); std::vector results(4); compute::copy(vector.begin(), vector.end(), results.begin(), queue); BOOST_CHECK_CLOSE(results[0], 7.04f, 1e-6); BOOST_CHECK_CLOSE(results[1], 7.34f, 1e-6); BOOST_CHECK_CLOSE(results[2], 8.54f, 1e-6); BOOST_CHECK_CLOSE(results[3], 9.84f, 1e-6); } BOOST_AUTO_TEST_CASE(add_y) { // setup input and output vectors int data[] = { 1, 2, 3, 4 }; compute::vector input(data, data + 4, queue); compute::vector output(4, context); // make closure which adds 'y' to each value int y = 2; BOOST_COMPUTE_CLOSURE(int, add_y, (int x), (y), { return x + y; }); compute::transform( input.begin(), input.end(), output.begin(), add_y, queue ); CHECK_RANGE_EQUAL(int, 4, output, (3, 4, 5, 6)); // change y and run again y = 4; compute::transform( input.begin(), input.end(), output.begin(), add_y, queue ); CHECK_RANGE_EQUAL(int, 4, output, (5, 6, 7, 8)); } BOOST_AUTO_TEST_CASE(scale_add_vec) { const int N = 10; float s = 4.5; compute::vector a(N, context); compute::vector b(N, context); a.assign(N, 1.0f, queue); b.assign(N, 2.0f, queue); BOOST_COMPUTE_CLOSURE(float, scaleAddVec, (float b, float a), (s), { return b * s + a; }); compute::transform(b.begin(), b.end(), a.begin(), b.begin(), scaleAddVec, queue); } BOOST_AUTO_TEST_CASE(capture_vector) { int data[] = { 6, 7, 8, 9 }; compute::vector vec(data, data + 4, queue); BOOST_COMPUTE_CLOSURE(int, get_vec, (int i), (vec), { return vec[i]; }); // run using a counting iterator to copy from vec to output compute::vector output(4, context); compute::transform( compute::make_counting_iterator(0), compute::make_counting_iterator(4), output.begin(), get_vec, queue ); CHECK_RANGE_EQUAL(int, 4, output, (6, 7, 8, 9)); // fill vec with 4's and run again compute::fill(vec.begin(), vec.end(), 4, queue); compute::transform( compute::make_counting_iterator(0), compute::make_counting_iterator(4), output.begin(), get_vec, queue ); CHECK_RANGE_EQUAL(int, 4, output, (4, 4, 4, 4)); } BOOST_AUTO_TEST_CASE(capture_array) { int data[] = { 1, 2, 3, 4 }; compute::array array(context); compute::copy(data, data + 4, array.begin(), queue); BOOST_COMPUTE_CLOSURE(int, negative_array_value, (int i), (array), { return -array[i]; }); compute::vector output(4, context); compute::transform( compute::make_counting_iterator(0), compute::make_counting_iterator(4), output.begin(), negative_array_value, queue ); CHECK_RANGE_EQUAL(int, 4, output, (-1, -2, -3, -4)); } BOOST_AUTO_TEST_CASE(triangle_area) { using compute::uint4_; using compute::float4_; compute::vector triangle_indices(context); compute::vector triangle_vertices(context); triangle_vertices.push_back(float4_(0, 0, 0, 1), queue); triangle_vertices.push_back(float4_(1, 1, 0, 1), queue); triangle_vertices.push_back(float4_(1, 0, 0, 1), queue); triangle_vertices.push_back(float4_(2, 0, 0, 1), queue); triangle_indices.push_back(uint4_(0, 1, 2, 0), queue); triangle_indices.push_back(uint4_(2, 1, 3, 0), queue); queue.finish(); BOOST_COMPUTE_CLOSURE(float, triangle_area, (const uint4_ i), (triangle_vertices), { // load triangle vertices const float4 a = triangle_vertices[i.x]; const float4 b = triangle_vertices[i.y]; const float4 c = triangle_vertices[i.z]; // return area of triangle return length(cross(b-a, c-a)) / 2; }); // compute area of each triangle compute::vector triangle_areas(triangle_indices.size(), context); compute::transform( triangle_indices.begin(), triangle_indices.end(), triangle_areas.begin(), triangle_area, queue ); // compute total area of all triangles float total_area = 0; compute::transform_reduce( triangle_indices.begin(), triangle_indices.end(), &total_area, triangle_area, compute::plus(), queue ); BOOST_CHECK_CLOSE(total_area, 1.f, 1e-6); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_command_queue.cpp000066400000000000000000000211661263566244600204220ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestCommandQueue #include #include #include #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(get_context) { BOOST_VERIFY(queue.get_context() == context); BOOST_VERIFY(queue.get_info() == context.get()); } BOOST_AUTO_TEST_CASE(get_device) { BOOST_VERIFY(queue.get_info() == device.get()); } BOOST_AUTO_TEST_CASE(equality_operator) { compute::command_queue queue1(context, device); BOOST_CHECK(queue1 == queue1); compute::command_queue queue2 = queue1; BOOST_CHECK(queue1 == queue2); compute::command_queue queue3(context, device); BOOST_CHECK(queue1 != queue3); } BOOST_AUTO_TEST_CASE(event_profiling) { bc::command_queue queue(context, device, bc::command_queue::enable_profiling); int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; bc::buffer buffer(context, sizeof(data)); bc::event event = queue.enqueue_write_buffer_async(buffer, 0, sizeof(data), static_cast(data)); queue.finish(); event.get_profiling_info(bc::event::profiling_command_queued); event.get_profiling_info(bc::event::profiling_command_submit); event.get_profiling_info(bc::event::profiling_command_start); event.get_profiling_info(bc::event::profiling_command_end); } BOOST_AUTO_TEST_CASE(kernel_profiling) { // create queue with profiling enabled boost::compute::command_queue queue( context, device, boost::compute::command_queue::enable_profiling ); // input data int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; boost::compute::buffer buffer(context, sizeof(data)); // copy input data to device queue.enqueue_write_buffer(buffer, 0, sizeof(data), data); // setup kernel const char source[] = "__kernel void iscal(__global int *buffer, int alpha)\n" "{\n" " buffer[get_global_id(0)] *= alpha;\n" "}\n"; boost::compute::program program = boost::compute::program::create_with_source(source, context); program.build(); boost::compute::kernel kernel(program, "iscal"); kernel.set_arg(0, buffer); kernel.set_arg(1, 2); // execute kernel size_t global_work_offset = 0; size_t global_work_size = 8; boost::compute::event event = queue.enqueue_nd_range_kernel(kernel, size_t(1), &global_work_offset, &global_work_size, 0); // wait until kernel is finished event.wait(); // check profiling information event.get_profiling_info(bc::event::profiling_command_queued); event.get_profiling_info(bc::event::profiling_command_submit); event.get_profiling_info(bc::event::profiling_command_start); event.get_profiling_info(bc::event::profiling_command_end); // read results back to host queue.enqueue_read_buffer(buffer, 0, sizeof(data), data); // check results BOOST_CHECK_EQUAL(data[0], 2); BOOST_CHECK_EQUAL(data[1], 4); BOOST_CHECK_EQUAL(data[2], 6); BOOST_CHECK_EQUAL(data[3], 8); BOOST_CHECK_EQUAL(data[4], 10); BOOST_CHECK_EQUAL(data[5], 12); BOOST_CHECK_EQUAL(data[6], 14); BOOST_CHECK_EQUAL(data[7], 16); } BOOST_AUTO_TEST_CASE(construct_from_cl_command_queue) { // create cl_command_queue cl_command_queue cl_queue; #ifdef CL_VERSION_2_0 if (device.check_version(2, 0)){ // runtime check cl_queue = clCreateCommandQueueWithProperties(context, device.id(), 0, 0); } else #endif // CL_VERSION_2_0 { cl_queue = clCreateCommandQueue(context, device.id(), 0, 0); } BOOST_VERIFY(cl_queue); // create boost::compute::command_queue boost::compute::command_queue queue(cl_queue); // check queue BOOST_CHECK(queue.get_context() == context); BOOST_CHECK(cl_command_queue(queue) == cl_queue); // cleanup cl_command_queue clReleaseCommandQueue(cl_queue); } #ifdef CL_VERSION_1_1 BOOST_AUTO_TEST_CASE(write_buffer_rect) { REQUIRES_OPENCL_VERSION(1, 1); // skip this test on AMD GPUs due to a buggy implementation // of the clEnqueueWriteBufferRect() function if(device.vendor() == "Advanced Micro Devices, Inc." && device.type() & boost::compute::device::gpu){ std::cerr << "skipping write_buffer_rect test on AMD GPU" << std::endl; return; } int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; boost::compute::buffer buffer(context, 8 * sizeof(int)); // copy every other value to the buffer size_t buffer_origin[] = { 0, 0, 0 }; size_t host_origin[] = { 0, 0, 0 }; size_t region[] = { sizeof(int), sizeof(int), 1 }; queue.enqueue_write_buffer_rect( buffer, buffer_origin, host_origin, region, sizeof(int), 0, 2 * sizeof(int), 0, data ); // check output values int output[4]; queue.enqueue_read_buffer(buffer, 0, 4 * sizeof(int), output); BOOST_CHECK_EQUAL(output[0], 1); BOOST_CHECK_EQUAL(output[1], 3); BOOST_CHECK_EQUAL(output[2], 5); BOOST_CHECK_EQUAL(output[3], 7); } #endif // CL_VERSION_1_1 static bool nullary_kernel_executed = false; static void nullary_kernel() { nullary_kernel_executed = true; } BOOST_AUTO_TEST_CASE(native_kernel) { cl_device_exec_capabilities exec_capabilities = device.get_info(); if(!(exec_capabilities & CL_EXEC_NATIVE_KERNEL)){ std::cerr << "skipping native_kernel test: " << "device does not support CL_EXEC_NATIVE_KERNEL" << std::endl; return; } compute::vector vector(1000, context); compute::fill(vector.begin(), vector.end(), 42, queue); BOOST_CHECK_EQUAL(nullary_kernel_executed, false); queue.enqueue_native_kernel(&nullary_kernel); queue.finish(); BOOST_CHECK_EQUAL(nullary_kernel_executed, true); } BOOST_AUTO_TEST_CASE(copy_with_wait_list) { int data1[] = { 1, 3, 5, 7 }; int data2[] = { 2, 4, 6, 8 }; compute::buffer buf1(context, 4 * sizeof(int)); compute::buffer buf2(context, 4 * sizeof(int)); compute::event write_event1 = queue.enqueue_write_buffer_async(buf1, 0, buf1.size(), data1); compute::event write_event2 = queue.enqueue_write_buffer_async(buf2, 0, buf2.size(), data2); compute::event read_event1 = queue.enqueue_read_buffer_async(buf1, 0, buf1.size(), data2, write_event1); compute::event read_event2 = queue.enqueue_read_buffer_async(buf2, 0, buf2.size(), data1, write_event2); read_event1.wait(); read_event2.wait(); CHECK_HOST_RANGE_EQUAL(int, 4, data1, (2, 4, 6, 8)); CHECK_HOST_RANGE_EQUAL(int, 4, data2, (1, 3, 5, 7)); } BOOST_AUTO_TEST_CASE(enqueue_kernel_with_extents) { using boost::compute::dim; using boost::compute::uint_; const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( __kernel void foo(__global int *output1, __global int *output2) { output1[get_global_id(0)] = get_local_id(0); output2[get_global_id(1)] = get_local_id(1); } ); compute::kernel kernel = compute::kernel::create_with_source(source, "foo", context); compute::vector output1(4, context); compute::vector output2(4, context); kernel.set_arg(0, output1); kernel.set_arg(1, output2); queue.enqueue_nd_range_kernel(kernel, dim(0, 0), dim(4, 4), dim(2, 2)); CHECK_RANGE_EQUAL(int, 4, output1, (0, 1, 0, 1)); CHECK_RANGE_EQUAL(int, 4, output2, (0, 1, 0, 1)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_complex.cpp000066400000000000000000000147731263566244600172550ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestComplex #include #include #include #include #include #include #include #include "context_setup.hpp" // copies a vector of complex's on the host to the device BOOST_AUTO_TEST_CASE(copy_complex_vector) { std::vector > host_vector; host_vector.push_back(std::complex(1.0f, 2.0f)); host_vector.push_back(std::complex(-2.0f, 1.0f)); host_vector.push_back(std::complex(1.0f, -2.0f)); host_vector.push_back(std::complex(-2.0f, -1.0f)); boost::compute::vector > device_vector(context); boost::compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); queue.finish(); BOOST_CHECK_EQUAL(std::complex(device_vector[0]), std::complex(1.0f, 2.0f)); BOOST_CHECK_EQUAL(std::complex(device_vector[1]), std::complex(-2.0f, 1.0f)); BOOST_CHECK_EQUAL(std::complex(device_vector[2]), std::complex(1.0f, -2.0f)); BOOST_CHECK_EQUAL(std::complex(device_vector[3]), std::complex(-2.0f, -1.0f)); } // fills a vector of complex's on the device with a constant value BOOST_AUTO_TEST_CASE(fill_complex_vector) { boost::compute::vector > vector(6, context); boost::compute::fill( vector.begin(), vector.end(), std::complex(2.0f, 5.0f), queue ); queue.finish(); BOOST_CHECK_EQUAL(std::complex(vector[0]), std::complex(2.0f, 5.0f)); BOOST_CHECK_EQUAL(std::complex(vector[1]), std::complex(2.0f, 5.0f)); BOOST_CHECK_EQUAL(std::complex(vector[2]), std::complex(2.0f, 5.0f)); BOOST_CHECK_EQUAL(std::complex(vector[3]), std::complex(2.0f, 5.0f)); BOOST_CHECK_EQUAL(std::complex(vector[4]), std::complex(2.0f, 5.0f)); BOOST_CHECK_EQUAL(std::complex(vector[5]), std::complex(2.0f, 5.0f)); } // extracts the real and imag components of a vector of complex's using // transform with the real() and imag() functions BOOST_AUTO_TEST_CASE(extract_real_and_imag) { boost::compute::vector > vector(context); vector.push_back(std::complex(1.0f, 3.0f), queue); vector.push_back(std::complex(3.0f, 1.0f), queue); vector.push_back(std::complex(5.0f, -1.0f), queue); vector.push_back(std::complex(7.0f, -3.0f), queue); vector.push_back(std::complex(9.0f, -5.0f), queue); BOOST_CHECK_EQUAL(vector.size(), size_t(5)); boost::compute::vector reals(5, context); boost::compute::transform( vector.begin(), vector.end(), reals.begin(), boost::compute::real(), queue ); queue.finish(); BOOST_CHECK_EQUAL(float(reals[0]), float(1.0f)); BOOST_CHECK_EQUAL(float(reals[1]), float(3.0f)); BOOST_CHECK_EQUAL(float(reals[2]), float(5.0f)); BOOST_CHECK_EQUAL(float(reals[3]), float(7.0f)); BOOST_CHECK_EQUAL(float(reals[4]), float(9.0f)); boost::compute::vector imags(5, context); boost::compute::transform( vector.begin(), vector.end(), imags.begin(), boost::compute::imag(), queue ); queue.finish(); BOOST_CHECK_EQUAL(float(imags[0]), float(3.0f)); BOOST_CHECK_EQUAL(float(imags[1]), float(1.0f)); BOOST_CHECK_EQUAL(float(imags[2]), float(-1.0f)); BOOST_CHECK_EQUAL(float(imags[3]), float(-3.0f)); BOOST_CHECK_EQUAL(float(imags[4]), float(-5.0f)); } // compute the complex conjugate of a vector of complex's BOOST_AUTO_TEST_CASE(complex_conj) { boost::compute::vector > input(context); input.push_back(std::complex(1.0f, 3.0f), queue); input.push_back(std::complex(3.0f, 1.0f), queue); input.push_back(std::complex(5.0f, -1.0f), queue); input.push_back(std::complex(7.0f, -3.0f), queue); input.push_back(std::complex(9.0f, -5.0f), queue); BOOST_CHECK_EQUAL(input.size(), size_t(5)); boost::compute::vector > output(5, context); boost::compute::transform( input.begin(), input.end(), output.begin(), boost::compute::conj(), queue ); queue.finish(); BOOST_CHECK_EQUAL(std::complex(output[0]), std::complex(1.0f, -3.0f)); BOOST_CHECK_EQUAL(std::complex(output[1]), std::complex(3.0f, -1.0f)); BOOST_CHECK_EQUAL(std::complex(output[2]), std::complex(5.0f, 1.0f)); BOOST_CHECK_EQUAL(std::complex(output[3]), std::complex(7.0f, 3.0f)); BOOST_CHECK_EQUAL(std::complex(output[4]), std::complex(9.0f, 5.0f)); } // check type_name() for std::complex BOOST_AUTO_TEST_CASE(complex_type_name) { BOOST_CHECK( std::strcmp( boost::compute::type_name >(), "float2" ) == 0 ); } BOOST_AUTO_TEST_CASE(transform_multiply) { boost::compute::vector > x(context); x.push_back(std::complex(1.0f, 2.0f), queue); x.push_back(std::complex(-2.0f, 5.0f), queue); boost::compute::vector > y(context); y.push_back(std::complex(3.0f, 4.0f), queue); y.push_back(std::complex(2.0f, -1.0f), queue); boost::compute::vector > z(2, context); // z = x * y boost::compute::transform( x.begin(), x.end(), y.begin(), z.begin(), boost::compute::multiplies >(), queue ); queue.finish(); BOOST_CHECK_EQUAL(std::complex(z[0]), std::complex(-5.0f, 10.0f)); BOOST_CHECK_EQUAL(std::complex(z[1]), std::complex(1.0f, 12.0f)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_constant_iterator.cpp000066400000000000000000000046241263566244600213420ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestConstantIterator #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(value_type) { BOOST_STATIC_ASSERT(( boost::is_same< boost::compute::constant_iterator::value_type, int >::value )); BOOST_STATIC_ASSERT(( boost::is_same< boost::compute::constant_iterator::value_type, float >::value )); } BOOST_AUTO_TEST_CASE(distance) { BOOST_CHECK_EQUAL( std::distance( boost::compute::make_constant_iterator(128, 0), boost::compute::make_constant_iterator(128, 10) ), std::ptrdiff_t(10) ); BOOST_CHECK_EQUAL( std::distance( boost::compute::make_constant_iterator(256, 5), boost::compute::make_constant_iterator(256, 10) ), std::ptrdiff_t(5) ); } BOOST_AUTO_TEST_CASE(copy) { boost::compute::vector vector(10, context); boost::compute::copy( boost::compute::make_constant_iterator(42, 0), boost::compute::make_constant_iterator(42, 10), vector.begin(), queue ); CHECK_RANGE_EQUAL( int, 10, vector, (42, 42, 42, 42, 42, 42, 42, 42, 42, 42) ); } BOOST_AUTO_TEST_CASE(fill_with_copy_doctest) { //! [fill_with_copy] using boost::compute::make_constant_iterator; boost::compute::vector result(5, context); boost::compute::copy( make_constant_iterator(42, 0), make_constant_iterator(42, result.size()), result.begin(), queue ); // result == { 42, 42, 42, 42, 42 } //! [fill_with_copy] CHECK_RANGE_EQUAL(int, 5, result, (42, 42, 42, 42, 42)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_context.cpp000066400000000000000000000040221263566244600172540ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestContext #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(construct_from_cl_context) { cl_device_id id = device.id(); // create cl_context cl_context ctx = clCreateContext(0, 1, &id, 0, 0, 0); BOOST_VERIFY(ctx); // create boost::compute::context boost::compute::context context(ctx); // check context BOOST_CHECK(cl_context(context) == ctx); // cleanup cl_context clReleaseContext(ctx); } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES BOOST_AUTO_TEST_CASE(move_constructor) { boost::compute::device device = boost::compute::system::default_device(); boost::compute::context context1(device); BOOST_VERIFY(cl_context(context1) != cl_context()); boost::compute::context context2(std::move(context1)); BOOST_VERIFY(cl_context(context2) != cl_context()); BOOST_VERIFY(cl_context(context1) == cl_context()); } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES BOOST_AUTO_TEST_CASE(multiple_devices) { const std::vector &platforms = compute::system::platforms(); for(size_t i = 0; i < platforms.size(); i++){ const compute::platform &platform = platforms[i]; // create a context for containing all devices in the platform compute::context ctx(platform.devices()); // check device count BOOST_CHECK_EQUAL(ctx.get_devices().size(), platform.device_count()); } } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_context_error.cpp000066400000000000000000000020021263566244600204610ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Fabian Köhler // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestContextError #include #include #include BOOST_AUTO_TEST_CASE(what) { boost::compute::context context = boost::compute::system::default_context(); boost::compute::context_error error(&context, "Test", 0, 0); BOOST_CHECK_EQUAL(std::string(error.what()), std::string("Test")); BOOST_CHECK(*error.get_context_ptr() == context); BOOST_CHECK(error.get_private_info_ptr() == 0); BOOST_CHECK(error.get_private_info_size() == 0); } compute-0.5/test/test_copy.cpp000066400000000000000000000232631263566244600165520ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestCopy #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "quirks.hpp" #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(copy_on_device) { float data[] = { 6.1f, 10.2f, 19.3f, 25.4f }; bc::vector a(4); bc::copy(data, data + 4, a.begin(), queue); CHECK_RANGE_EQUAL(float, 4, a, (6.1f, 10.2f, 19.3f, 25.4f)); bc::vector b(4); bc::fill(b.begin(), b.end(), 0, queue); CHECK_RANGE_EQUAL(float, 4, b, (0.0f, 0.0f, 0.0f, 0.0f)); bc::copy(a.begin(), a.end(), b.begin(), queue); CHECK_RANGE_EQUAL(float, 4, b, (6.1f, 10.2f, 19.3f, 25.4f)); } BOOST_AUTO_TEST_CASE(copy_on_host) { int data[] = { 2, 4, 6, 8 }; std::vector vector(4); compute::copy(data, data + 4, vector.begin(), queue); CHECK_RANGE_EQUAL(int, 4, vector, (2, 4, 6, 8)); } BOOST_AUTO_TEST_CASE(copy) { int data[] = { 1, 2, 5, 6 }; bc::vector vector(4, context); bc::copy(data, data + 4, vector.begin(), queue); CHECK_RANGE_EQUAL(int, 4, vector, (1, 2, 5, 6)); std::vector host_vector(4); bc::copy(vector.begin(), vector.end(), host_vector.begin(), queue); BOOST_CHECK_EQUAL(host_vector[0], 1); BOOST_CHECK_EQUAL(host_vector[1], 2); BOOST_CHECK_EQUAL(host_vector[2], 5); BOOST_CHECK_EQUAL(host_vector[3], 6); } // Test copying from a std::list to a bc::vector. This differs from // the test copying from std::vector because std::list has non-contigous // storage for its data values. BOOST_AUTO_TEST_CASE(copy_from_host_list) { int data[] = { -4, 12, 9, 0 }; std::list host_list(data, data + 4); bc::vector vector(4, context); bc::copy(host_list.begin(), host_list.end(), vector.begin(), queue); CHECK_RANGE_EQUAL(int, 4, vector, (-4, 12, 9, 0)); } BOOST_AUTO_TEST_CASE(copy_n_int) { int data[] = { 1, 2, 3, 4, 5 }; bc::vector a(data, data + 5, queue); bc::vector b(5, context); bc::fill(b.begin(), b.end(), 0, queue); bc::copy_n(a.begin(), 3, b.begin(), queue); CHECK_RANGE_EQUAL(int, 5, b, (1, 2, 3, 0, 0)); bc::copy_n(b.begin(), 4, a.begin(), queue); CHECK_RANGE_EQUAL(int, 5, a, (1, 2, 3, 0, 5)); } BOOST_AUTO_TEST_CASE(copy_swizzle_iterator) { using bc::int2_; using bc::int4_; int data[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7 }; bc::vector input(reinterpret_cast(data), reinterpret_cast(data) + 4, queue); BOOST_CHECK_EQUAL(input.size(), size_t(4)); CHECK_RANGE_EQUAL(int4_, 4, input, (int4_(1, 2, 3, 4), int4_(5, 6, 7, 8), int4_(9, 1, 2, 3), int4_(4, 5, 6, 7)) ); bc::vector output4(4, context); bc::copy( bc::detail::make_swizzle_iterator<4>(input.begin(), "wzyx"), bc::detail::make_swizzle_iterator<4>(input.end(), "wzyx"), output4.begin(), queue ); CHECK_RANGE_EQUAL(int4_, 4, output4, (int4_(4, 3, 2, 1), int4_(8, 7, 6, 5), int4_(3, 2, 1, 9), int4_(7, 6, 5, 4)) ); bc::vector output2(4, context); bc::copy( bc::detail::make_swizzle_iterator<2>(input.begin(), "xz"), bc::detail::make_swizzle_iterator<2>(input.end(), "xz"), output2.begin(), queue ); CHECK_RANGE_EQUAL(int2_, 4, output2, (int2_(1, 3), int2_(5, 7), int2_(9, 2), int2_(4, 6)) ); bc::vector output1(4, context); bc::copy( bc::detail::make_swizzle_iterator<1>(input.begin(), "y"), bc::detail::make_swizzle_iterator<1>(input.end(), "y"), output1.begin(), queue ); CHECK_RANGE_EQUAL(int, 4, output1, (2, 6, 1, 5)); } BOOST_AUTO_TEST_CASE(copy_int_async) { // setup host data int host_data[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; typedef int* host_iterator; // setup device data bc::vector device_data(8, context); typedef bc::vector::iterator device_iterator; // copy data to device bc::future host_to_device_future = bc::copy_async(host_data, host_data + 8, device_data.begin(), queue); // wait for copy to complete host_to_device_future.wait(); // check results CHECK_RANGE_EQUAL(int, 8, device_data, (1, 2, 3, 4, 5, 6, 7, 8)); BOOST_VERIFY(host_to_device_future.get() == device_data.end()); // fill host data with zeros std::fill(host_data, host_data + 8, int(0)); // copy data back to host bc::future device_to_host_future = bc::copy_async(device_data.begin(), device_data.end(), host_data, queue); // wait for copy to complete device_to_host_future.wait(); // check results BOOST_CHECK_EQUAL(host_data[0], int(1)); BOOST_CHECK_EQUAL(host_data[1], int(2)); BOOST_CHECK_EQUAL(host_data[2], int(3)); BOOST_CHECK_EQUAL(host_data[3], int(4)); BOOST_CHECK_EQUAL(host_data[4], int(5)); BOOST_CHECK_EQUAL(host_data[5], int(6)); BOOST_CHECK_EQUAL(host_data[6], int(7)); BOOST_CHECK_EQUAL(host_data[7], int(8)); BOOST_VERIFY(device_to_host_future.get() == host_data + 8); } BOOST_AUTO_TEST_CASE(copy_to_back_inserter) { compute::vector device_vector(5, context); compute::iota(device_vector.begin(), device_vector.end(), 10, queue); std::vector host_vector; compute::copy( device_vector.begin(), device_vector.end(), std::back_inserter(host_vector), queue ); BOOST_CHECK_EQUAL(host_vector.size(), size_t(5)); BOOST_CHECK_EQUAL(host_vector[0], 10); BOOST_CHECK_EQUAL(host_vector[1], 11); BOOST_CHECK_EQUAL(host_vector[2], 12); BOOST_CHECK_EQUAL(host_vector[3], 13); BOOST_CHECK_EQUAL(host_vector[4], 14); } BOOST_AUTO_TEST_CASE(copy_to_stringstream) { std::stringstream stream; int data[] = { 2, 3, 4, 5, 6, 7, 8, 9 }; compute::vector vector(data, data + 8, queue); compute::copy( vector.begin(), vector.end(), std::ostream_iterator(stream, " "), queue ); BOOST_CHECK_EQUAL(stream.str(), std::string("2 3 4 5 6 7 8 9 ")); } BOOST_AUTO_TEST_CASE(check_copy_type) { // copy from host to device and ensure clEnqueueWriteBuffer() is used int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; compute::vector a(8, context); compute::future future = compute::copy_async(data, data + 8, a.begin(), queue); BOOST_CHECK( future.get_event().get_command_type() == CL_COMMAND_WRITE_BUFFER ); future.wait(); CHECK_RANGE_EQUAL(int, 8, a, (1, 2, 3, 4, 5, 6, 7, 8)); // copy on the device and ensure clEnqueueCopyBuffer() is used compute::vector b(8, context); future = compute::copy_async(a.begin(), a.end(), b.begin(), queue); BOOST_CHECK( future.get_event().get_command_type() == CL_COMMAND_COPY_BUFFER ); future.wait(); CHECK_RANGE_EQUAL(int, 8, b, (1, 2, 3, 4, 5, 6, 7, 8)); // copy between vectors of different types on the device and ensure // that the copy kernel is used compute::vector c(8, context); future = compute::copy_async(a.begin(), a.end(), c.begin(), queue); BOOST_CHECK( future.get_event().get_command_type() == CL_COMMAND_NDRANGE_KERNEL ); future.wait(); CHECK_RANGE_EQUAL(short, 8, c, (1, 2, 3, 4, 5, 6, 7, 8)); // copy from device to host and ensure clEnqueueReadBuffer() is used future = compute::copy_async(b.begin(), b.end(), data, queue); BOOST_CHECK( future.get_event().get_command_type() == CL_COMMAND_READ_BUFFER ); future.wait(); CHECK_HOST_RANGE_EQUAL(int, 8, data, (1, 2, 3, 4, 5, 6, 7, 8)); } #ifdef CL_VERSION_2_0 BOOST_AUTO_TEST_CASE(copy_svm_ptr) { REQUIRES_OPENCL_VERSION(2, 0); if(bug_in_svmmemcpy(device)){ std::cerr << "skipping copy_svm_ptr test case" << std::endl; return; } int data[] = { 1, 3, 2, 4 }; compute::svm_ptr ptr = compute::svm_alloc(context, 4); compute::copy(data, data + 4, ptr, queue); int output[] = { 0, 0, 0, 0 }; compute::copy(ptr, ptr + 4, output, queue); CHECK_HOST_RANGE_EQUAL(int, 4, output, (1, 3, 2, 4)); compute::svm_free(context, ptr); } #endif // CL_VERSION_2_0 BOOST_AUTO_TEST_CASE(copy_to_vector_bool) { using compute::uchar_; compute::vector vec(context); vec.push_back(true, queue); vec.push_back(false, queue); std::vector host_vec(vec.size()); compute::copy(vec.begin(), vec.end(), host_vec.begin(), queue); BOOST_CHECK(host_vec[0] == true); BOOST_CHECK(host_vec[1] == false); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_copy_if.cpp000066400000000000000000000100631263566244600172220ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestCopyIf #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(copy_if_int) { int data[] = { 1, 6, 3, 5, 8, 2, 4 }; bc::vector input(data, data + 7, queue); bc::vector output(input.size(), context); bc::fill(output.begin(), output.end(), -1, queue); using ::boost::compute::_1; bc::vector::iterator iter = bc::copy_if(input.begin(), input.end(), output.begin(), _1 < 5, queue); BOOST_VERIFY(iter == output.begin() + 4); CHECK_RANGE_EQUAL(int, 7, output, (1, 3, 2, 4, -1, -1, -1)); bc::fill(output.begin(), output.end(), 42, queue); iter = bc::copy_if(input.begin(), input.end(), output.begin(), _1 * 2 >= 10, queue); BOOST_VERIFY(iter == output.begin() + 3); CHECK_RANGE_EQUAL(int, 7, output, (6, 5, 8, 42, 42, 42, 42)); } BOOST_AUTO_TEST_CASE(copy_if_odd) { int data[] = { 1, 2, 3, 4, 5, 1, 2, 3, 4, 5 }; bc::vector input(data, data + 10, queue); using ::boost::compute::_1; bc::vector odds(input.size(), context); bc::vector::iterator odds_end = bc::copy_if(input.begin(), input.end(), odds.begin(), _1 % 2 == 1, queue); BOOST_CHECK(odds_end == odds.begin() + 6); CHECK_RANGE_EQUAL(int, 6, odds, (1, 3, 5, 1, 3, 5)); bc::vector evens(input.size(), context); bc::vector::iterator evens_end = bc::copy_if(input.begin(), input.end(), evens.begin(), _1 % 2 == 0, queue); BOOST_CHECK(evens_end == evens.begin() + 4); CHECK_RANGE_EQUAL(int, 4, evens, (2, 4, 2, 4)); } BOOST_AUTO_TEST_CASE(clip_points_below_plane) { float data[] = { 1.0f, 2.0f, 3.0f, 0.0f, -1.0f, 2.0f, 3.0f, 0.0f, -2.0f, -3.0f, 4.0f, 0.0f, 4.0f, -3.0f, 2.0f, 0.0f }; bc::vector points(reinterpret_cast(data), reinterpret_cast(data) + 4, queue); // create output vector filled with (0, 0, 0, 0) bc::vector output(points.size(), context); bc::fill(output.begin(), output.end(), bc::float4_(0.0f, 0.0f, 0.0f, 0.0f), queue); // define the plane (at origin, +X normal) bc::float4_ plane_origin(0.0f, 0.0f, 0.0f, 0.0f); bc::float4_ plane_normal(1.0f, 0.0f, 0.0f, 0.0f); using ::boost::compute::_1; using ::boost::compute::lambda::dot; bc::vector::const_iterator iter = bc::copy_if(points.begin(), points.end(), output.begin(), dot(_1 - plane_origin, plane_normal) > 0.0f, queue); BOOST_CHECK(iter == output.begin() + 2); } BOOST_AUTO_TEST_CASE(copy_index_if_int) { int data[] = { 1, 6, 3, 5, 8, 2, 4 }; compute::vector input(data, data + 7, queue); compute::vector output(input.size(), context); compute::fill(output.begin(), output.end(), -1, queue); using ::boost::compute::_1; using ::boost::compute::detail::copy_index_if; compute::vector::iterator iter = copy_index_if(input.begin(), input.end(), output.begin(), _1 < 5, queue); BOOST_VERIFY(iter == output.begin() + 4); CHECK_RANGE_EQUAL(int, 7, output, (0, 2, 5, 6, -1, -1, -1)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_count.cpp000066400000000000000000000144111263566244600167230ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestCount #include #include #include #include #include #include #include #include #include #include #include #include #include "context_setup.hpp" namespace bc = boost::compute; namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(count_int) { int data[] = { 1, 2, 1, 2, 3 }; bc::vector vector(data, data + 5, queue); BOOST_CHECK_EQUAL(bc::count(vector.begin(), vector.end(), 1, queue), size_t(2)); BOOST_CHECK_EQUAL(bc::count(vector.begin(), vector.end(), 2, queue), size_t(2)); BOOST_CHECK_EQUAL(bc::count(vector.begin(), vector.end(), 3, queue), size_t(1)); BOOST_CHECK_EQUAL(bc::count(vector.begin() + 1, vector.end(), 1, queue), size_t(1)); BOOST_CHECK_EQUAL(bc::count(vector.begin() + 1, vector.end() - 1, 3, queue), size_t(0)); BOOST_CHECK_EQUAL(bc::count(vector.begin() + 1, vector.end() - 1, 2, queue), size_t(2)); } BOOST_AUTO_TEST_CASE(count_constant_int_range) { BOOST_CHECK_EQUAL( bc::count(bc::make_constant_iterator(18, 0), bc::make_constant_iterator(18, 5), 18, queue), size_t(5) ); BOOST_CHECK_EQUAL( bc::count(bc::make_constant_iterator(19, 0), bc::make_constant_iterator(19, 5), 18, queue), size_t(0) ); } BOOST_AUTO_TEST_CASE(count_if_greater_than_two) { float data[] = { 1.0f, 2.5f, -1.0f, 3.0f, 5.0f, -8.0f }; bc::vector vector(data, data + 5, queue); BOOST_CHECK_EQUAL( bc::count_if(vector.begin(), vector.end(), bc::_1 > 2.0f, queue), size_t(3) ); } BOOST_AUTO_TEST_CASE(count_int4) { int data[] = { 1, 2, 3, 4, 4, 5, 6, 7, 7, 8, 9, 1, 1, 2, 3, 4, 4, 5, 6, 7, 0, 3, 2, 2 }; bc::vector vector(reinterpret_cast(data), reinterpret_cast(data) + 6, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(6)); BOOST_CHECK_EQUAL( bc::count(vector.begin(), vector.end(), bc::int4_(1, 2, 3, 4), queue), size_t(2) ); BOOST_CHECK_EQUAL( bc::count(vector.begin(), vector.end(), bc::int4_(4, 5, 6, 7), queue), size_t(2) ); BOOST_CHECK_EQUAL( bc::count(vector.begin(), vector.end(), bc::int4_(7, 8, 9, 1), queue), size_t(1) ); BOOST_CHECK_EQUAL( bc::count(vector.begin(), vector.end(), bc::int4_(0, 3, 2, 2), queue), size_t(1) ); BOOST_CHECK_EQUAL( bc::count(vector.begin(), vector.end(), bc::int4_(3, 4, 4, 5), queue), size_t(0) ); BOOST_CHECK_EQUAL( bc::count(vector.begin(), vector.end(), bc::int4_(1, 2, 3, 0), queue), size_t(0) ); BOOST_CHECK_EQUAL( bc::count(vector.begin(), vector.end(), bc::int4_(1, 9, 8, 7), queue), size_t(0) ); } BOOST_AUTO_TEST_CASE(count_newlines) { std::string string = "abcdefg\nhijklmn\nopqrs\ntuv\nwxyz\n"; compute::vector data(string.size(), context); compute::copy(string.begin(), string.end(), data.begin(), queue); BOOST_CHECK_EQUAL( compute::count(data.begin(), data.end(), '\n', queue), size_t(5) ); } BOOST_AUTO_TEST_CASE(count_uchar) { using boost::compute::uchar_; unsigned char data[] = { 0x00, 0x10, 0x2F, 0x10, 0x01, 0x00, 0x01, 0x00 }; compute::vector vector(8, context); compute::copy(data, data + 8, vector.begin(), queue); BOOST_CHECK_EQUAL( compute::count(vector.begin(), vector.end(), 0x00, queue), size_t(3) ); BOOST_CHECK_EQUAL( compute::count(vector.begin(), vector.end(), 0x10, queue), size_t(2) ); BOOST_CHECK_EQUAL( compute::count(vector.begin(), vector.end(), 0x2F, queue), size_t(1) ); BOOST_CHECK_EQUAL( compute::count(vector.begin(), vector.end(), 0x01, queue), size_t(2) ); BOOST_CHECK_EQUAL( compute::count(vector.begin(), vector.end(), 0xFF, queue), size_t(0) ); } BOOST_AUTO_TEST_CASE(count_vector_component) { int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; using boost::compute::int2_; compute::vector vector(4, context); compute::copy( reinterpret_cast(data), reinterpret_cast(data) + 4, vector.begin(), queue ); using boost::compute::lambda::_1; using boost::compute::lambda::get; BOOST_CHECK_EQUAL( compute::count_if(vector.begin(), vector.end(), get<0>(_1) < 4, queue), size_t(2) ); BOOST_CHECK_EQUAL( compute::count_if(vector.begin(), vector.end(), get<1>(_1) > 3, queue), size_t(3) ); } BOOST_AUTO_TEST_CASE(count_if_odd) { compute::vector vec(2048, context); compute::iota(vec.begin(), vec.end(), 0, queue); BOOST_COMPUTE_FUNCTION(bool, is_odd, (int x), { return x & 1; }); BOOST_CHECK_EQUAL( compute::count_if(vec.begin(), vec.end(), is_odd, queue), vec.size() / 2 ); } BOOST_AUTO_TEST_CASE(count_if_with_reduce) { compute::vector vec(2048, context); compute::iota(vec.begin(), vec.end(), 0, queue); using boost::compute::lambda::_1; BOOST_CHECK_EQUAL( compute::detail::count_if_with_reduce( vec.begin(), vec.end(), _1 > 1024, queue ), size_t(1023) ); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_counting_iterator.cpp000066400000000000000000000050161263566244600213330ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestCountingIterator #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(value_type) { BOOST_STATIC_ASSERT(( boost::is_same< boost::compute::counting_iterator::value_type, int >::value )); BOOST_STATIC_ASSERT(( boost::is_same< boost::compute::counting_iterator::value_type, float >::value )); } BOOST_AUTO_TEST_CASE(distance) { BOOST_CHECK_EQUAL( std::distance( boost::compute::make_counting_iterator(0), boost::compute::make_counting_iterator(10) ), std::ptrdiff_t(10) ); BOOST_CHECK_EQUAL( std::distance( boost::compute::make_counting_iterator(5), boost::compute::make_counting_iterator(10) ), std::ptrdiff_t(5) ); BOOST_CHECK_EQUAL( std::distance( boost::compute::make_counting_iterator(-5), boost::compute::make_counting_iterator(5) ), std::ptrdiff_t(10) ); } BOOST_AUTO_TEST_CASE(copy) { boost::compute::vector vector(10, context); boost::compute::copy( boost::compute::make_counting_iterator(1), boost::compute::make_counting_iterator(11), vector.begin(), queue ); CHECK_RANGE_EQUAL( int, 10, vector, (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) ); } BOOST_AUTO_TEST_CASE(iota_with_copy_doctest) { //! [iota_with_copy] using boost::compute::make_counting_iterator; boost::compute::vector result(5, context); boost::compute::copy( make_counting_iterator(1), make_counting_iterator(6), result.begin(), queue ); // result == { 1, 2, 3, 4, 5 } //! [iota_with_copy] CHECK_RANGE_EQUAL(int, 5, result, (1, 2, 3, 4, 5)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_device.cpp000066400000000000000000000171301263566244600170330ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestDevice #include #include #include #include #include #include "opencl_version_check.hpp" BOOST_AUTO_TEST_CASE(null_device) { boost::compute::device null; BOOST_CHECK(null.id() == cl_device_id()); BOOST_CHECK(null.get() == cl_device_id()); } BOOST_AUTO_TEST_CASE(default_device_doctest) { //! [default_gpu] boost::compute::device gpu = boost::compute::system::default_device(); //! [default_gpu] BOOST_CHECK(gpu.id()); } BOOST_AUTO_TEST_CASE(device_platform) { boost::compute::platform p = boost::compute::system::platforms().at(0); BOOST_CHECK(p == p.devices().at(0).platform()); } BOOST_AUTO_TEST_CASE(get_device_name) { boost::compute::device gpu = boost::compute::system::default_device(); if(gpu.id()){ BOOST_CHECK(!gpu.name().empty()); } } BOOST_AUTO_TEST_CASE(equality_operator) { boost::compute::device device1 = boost::compute::system::default_device(); BOOST_CHECK(device1 == device1); boost::compute::device device2 = device1; BOOST_CHECK(device1 == device2); } BOOST_AUTO_TEST_CASE(get_max_work_item_sizes) { boost::compute::device device = boost::compute::system::default_device(); std::vector max_work_item_sizes = device.get_info >(CL_DEVICE_MAX_WORK_ITEM_SIZES); BOOST_CHECK_GE(max_work_item_sizes.size(), size_t(3)); BOOST_CHECK_GE(max_work_item_sizes[0], size_t(1)); BOOST_CHECK_GE(max_work_item_sizes[1], size_t(1)); BOOST_CHECK_GE(max_work_item_sizes[2], size_t(1)); } #ifdef CL_VERSION_1_2 // returns true if the device supports the partitioning type bool supports_partition_type(const boost::compute::device &device, cl_device_partition_property type) { const std::vector properties = device.get_info >( CL_DEVICE_PARTITION_PROPERTIES ); return std::find(properties.begin(), properties.end(), type) != properties.end(); } BOOST_AUTO_TEST_CASE(partition_device_equally) { // get default device and ensure it has at least two compute units boost::compute::device device = boost::compute::system::default_device(); REQUIRES_OPENCL_VERSION(1,2); if(device.compute_units() < 2){ std::cout << "skipping test: " << "device does not have enough compute units" << std::endl; return; } // check that the device supports partitioning equally if(!supports_partition_type(device, CL_DEVICE_PARTITION_EQUALLY)){ std::cout << "skipping test: " << "device does not support CL_DEVICE_PARTITION_EQUALLY" << std::endl; return; } // ensure device is not a sub-device BOOST_CHECK(device.is_subdevice() == false); // partition default device into sub-devices with two compute units each std::vector sub_devices = device.partition_equally(2); BOOST_CHECK_EQUAL(sub_devices.size(), size_t(device.compute_units() / 2)); // verify each of the sub-devices for(size_t i = 0; i < sub_devices.size(); i++){ const boost::compute::device &sub_device = sub_devices[i]; // ensure parent device id is correct cl_device_id parent_id = sub_device.get_info(CL_DEVICE_PARENT_DEVICE); BOOST_CHECK(parent_id == device.id()); // ensure device is a sub-device BOOST_CHECK(sub_device.is_subdevice() == true); // check number of compute units BOOST_CHECK_EQUAL(sub_device.compute_units(), size_t(2)); } } // used to sort devices by number of compute units bool compare_compute_units(const boost::compute::device &a, const boost::compute::device &b) { return a.compute_units() < b.compute_units(); } BOOST_AUTO_TEST_CASE(partition_by_counts) { // get default device and ensure it has at least four compute units boost::compute::device device = boost::compute::system::default_device(); REQUIRES_OPENCL_VERSION(1,2); if(device.compute_units() < 4){ std::cout << "skipping test: " << "device does not have enough compute units" << std::endl; return; } // check that the device supports partitioning by counts if(!supports_partition_type(device, CL_DEVICE_PARTITION_BY_COUNTS)){ std::cout << "skipping test: " << "device does not support CL_DEVICE_PARTITION_BY_COUNTS" << std::endl; return; } // ensure device is not a sub-device BOOST_CHECK(device.is_subdevice() == false); // create vector of sub-device compute unit counts std::vector counts; counts.push_back(2); counts.push_back(1); counts.push_back(1); // partition default device into sub-devices according to counts std::vector sub_devices = device.partition_by_counts(counts); BOOST_CHECK_EQUAL(sub_devices.size(), size_t(3)); // sort sub-devices by number of compute units (see issue #185) std::sort(sub_devices.begin(), sub_devices.end(), compare_compute_units); // verify each of the sub-devices BOOST_CHECK_EQUAL(sub_devices[0].compute_units(), size_t(1)); BOOST_CHECK_EQUAL(sub_devices[1].compute_units(), size_t(1)); BOOST_CHECK_EQUAL(sub_devices[2].compute_units(), size_t(2)); } BOOST_AUTO_TEST_CASE(partition_by_affinity_domain) { // get default device and ensure it has at least two compute units boost::compute::device device = boost::compute::system::default_device(); REQUIRES_OPENCL_VERSION(1,2); if(device.compute_units() < 2){ std::cout << "skipping test: " << "device does not have enough compute units" << std::endl; return; } // check that the device supports splitting by affinity domains if(!supports_partition_type(device, CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE)){ std::cout << "skipping test: " << "device does not support partitioning by affinity domain" << std::endl; return; } // ensure device is not a sub-device BOOST_CHECK(device.is_subdevice() == false); std::vector sub_devices = device.partition_by_affinity_domain( CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE); BOOST_CHECK(sub_devices.size() > 0); BOOST_CHECK(sub_devices[0].is_subdevice() == true); } #endif // CL_VERSION_1_2 BOOST_AUTO_TEST_CASE(nvidia_compute_capability) { boost::compute::device device = boost::compute::system::default_device(); int major, minor; boost::compute::detail::get_nvidia_compute_capability(device, major, minor); boost::compute::detail::check_nvidia_compute_capability(device, 3, 0); } BOOST_AUTO_TEST_CASE(get_info_specializations) { boost::compute::device device = boost::compute::system::default_device(); std::cout << device.get_info() << std::endl; } compute-0.5/test/test_discard_iterator.cpp000066400000000000000000000047571263566244600211310ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestDiscardIterator #include #include #include #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(value_type) { BOOST_STATIC_ASSERT(( boost::is_same< boost::compute::discard_iterator::value_type, void >::value )); } BOOST_AUTO_TEST_CASE(distance) { BOOST_CHECK_EQUAL( std::distance( boost::compute::make_discard_iterator(0), boost::compute::make_discard_iterator(10) ), std::ptrdiff_t(10) ); BOOST_CHECK_EQUAL( std::distance( boost::compute::make_discard_iterator(5), boost::compute::make_discard_iterator(10) ), std::ptrdiff_t(5) ); } BOOST_AUTO_TEST_CASE(discard_copy) { boost::compute::vector vector(10, context); boost::compute::fill(vector.begin(), vector.end(), 42, queue); boost::compute::copy( vector.begin(), vector.end(), boost::compute::make_discard_iterator(), queue ); } BOOST_AUTO_TEST_CASE(discard_copy_if) { int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; boost::compute::vector vector(data, data + 8, queue); using boost::compute::lambda::_1; boost::compute::discard_iterator end = boost::compute::copy_if( vector.begin(), vector.end(), boost::compute::make_discard_iterator(), _1 > 4, queue ); BOOST_CHECK(std::distance(boost::compute::discard_iterator(), end) == 4); } BOOST_AUTO_TEST_CASE(discard_fill) { boost::compute::fill( boost::compute::make_discard_iterator(0), boost::compute::make_discard_iterator(100), 42, queue ); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_discrete_distribution.cpp000066400000000000000000000032371263566244600222000ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestDiscreteDistribution #include #include #include #include #include #include #include #include #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(discrete_distribution_doctest) { using boost::compute::uint_; using boost::compute::lambda::_1; boost::compute::vector vec(100, context); //! [generate] // initialize the default random engine boost::compute::default_random_engine engine(queue); // initialize weights int weights[] = {2, 2}; // setup the discrete distribution to produce integers 0 and 1 // with equal weights boost::compute::discrete_distribution distribution(weights, weights+2); // generate the random values and store them to 'vec' distribution.generate(vec.begin(), vec.end(), engine, queue); // ! [generate] BOOST_CHECK_EQUAL( boost::compute::count_if( vec.begin(), vec.end(), _1 > 1, queue ), size_t(0) ); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_dynamic_bitset.cpp000066400000000000000000000053501263566244600205730ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestDynamicBitset #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(set_and_test) { compute::dynamic_bitset<> bits(1024, queue); bits.set(1, queue); BOOST_CHECK(bits.test(1, queue) == true); BOOST_CHECK(bits.test(2, queue) == false); bits.set(1, false, queue); BOOST_CHECK(bits.test(1, queue) == false); BOOST_CHECK(bits.test(2, queue) == false); } BOOST_AUTO_TEST_CASE(count) { compute::dynamic_bitset<> bits(1024, queue); BOOST_CHECK_EQUAL(bits.count(queue), size_t(0)); bits.set(1, queue); bits.set(8, queue); bits.set(129, queue); BOOST_CHECK_EQUAL(bits.count(queue), size_t(3)); bits.set(8, false, queue); BOOST_CHECK_EQUAL(bits.count(queue), size_t(2)); bits.reset(queue); BOOST_CHECK_EQUAL(bits.count(queue), size_t(0)); } BOOST_AUTO_TEST_CASE(resize) { compute::dynamic_bitset<> bits(0, queue); BOOST_CHECK_EQUAL(bits.size(), size_t(0)); BOOST_CHECK_EQUAL(bits.empty(), true); BOOST_CHECK_EQUAL(bits.count(queue), size_t(0)); bits.resize(100, queue); BOOST_CHECK_EQUAL(bits.size(), size_t(100)); BOOST_CHECK_EQUAL(bits.empty(), false); BOOST_CHECK_EQUAL(bits.count(queue), size_t(0)); bits.set(42, true, queue); BOOST_CHECK_EQUAL(bits.count(queue), size_t(1)); bits.resize(0, queue); BOOST_CHECK_EQUAL(bits.size(), size_t(0)); BOOST_CHECK_EQUAL(bits.empty(), true); BOOST_CHECK_EQUAL(bits.count(queue), size_t(0)); } BOOST_AUTO_TEST_CASE(none_and_any) { compute::dynamic_bitset<> bits(1024, queue); BOOST_CHECK(bits.any(queue) == false); BOOST_CHECK(bits.none(queue) == true); bits.set(1023, queue); BOOST_CHECK(bits.any(queue) == true); BOOST_CHECK(bits.none(queue) == false); bits.set(1023, false, queue); BOOST_CHECK(bits.any(queue) == false); BOOST_CHECK(bits.none(queue) == true); bits.set(1, queue); BOOST_CHECK(bits.any(queue) == true); BOOST_CHECK(bits.none(queue) == false); bits.reset(queue); BOOST_CHECK(bits.any(queue) == false); BOOST_CHECK(bits.none(queue) == true); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_equal.cpp000066400000000000000000000046141263566244600167060ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestEqual #include #include #include #include #include #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(equal_int) { int data1[] = { 1, 2, 3, 4, 5, 6 }; int data2[] = { 1, 2, 3, 7, 5, 6 }; boost::compute::vector vector1(data1, data1 + 6, queue); boost::compute::vector vector2(data2, data2 + 6, queue); BOOST_CHECK(boost::compute::equal(vector1.begin(), vector1.end(), vector2.begin(), queue) == false); BOOST_CHECK(boost::compute::equal(vector1.begin(), vector1.begin() + 2, vector2.begin(), queue) == true); BOOST_CHECK(boost::compute::equal(vector1.begin() + 4, vector1.end(), vector2.begin() + 4, queue) == true); } BOOST_AUTO_TEST_CASE(equal_string) { boost::compute::string a = "abcdefghijk"; boost::compute::string b = "abcdefghijk"; boost::compute::string c = "abcdezghijk"; BOOST_CHECK(boost::compute::equal(a.begin(), a.end(), b.begin(), queue) == true); BOOST_CHECK(boost::compute::equal(a.begin(), a.end(), c.begin(), queue) == false); } BOOST_AUTO_TEST_CASE(equal_different_range_sizes) { boost::compute::vector a(10, context); boost::compute::vector b(20, context); boost::compute::fill(a.begin(), a.end(), 3, queue); boost::compute::fill(b.begin(), b.end(), 3, queue); BOOST_CHECK(boost::compute::equal(a.begin(), a.end(), b.begin(), b.end(), queue) == false); BOOST_CHECK(boost::compute::equal(a.begin(), a.end(), a.begin(), a.end(), queue) == true); BOOST_CHECK(boost::compute::equal(b.begin(), b.end(), a.begin(), a.end(), queue) == false); BOOST_CHECK(boost::compute::equal(b.begin(), b.end(), b.begin(), b.end(), queue) == true); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_equal_range.cpp000066400000000000000000000061011263566244600200530ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestEqualRange #include #include #include #include #include #include #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(equal_range_int) { int data[] = { 1, 2, 2, 2, 3, 3, 4, 5 }; boost::compute::vector vector(data, data + 8, queue); typedef boost::compute::vector::iterator iterator; std::pair range0 = boost::compute::equal_range(vector.begin(), vector.end(), int(0), queue); BOOST_CHECK(range0.first == vector.begin()); BOOST_CHECK(range0.second == vector.begin()); BOOST_CHECK_EQUAL(std::distance(range0.first, range0.second), ptrdiff_t(0)); std::pair range1 = boost::compute::equal_range(vector.begin(), vector.end(), int(1), queue); BOOST_CHECK(range1.first == vector.begin()); BOOST_CHECK(range1.second == vector.begin() + 1); BOOST_CHECK_EQUAL(std::distance(range1.first, range1.second), ptrdiff_t(1)); std::pair range2 = boost::compute::equal_range(vector.begin(), vector.end(), int(2), queue); BOOST_CHECK(range2.first == vector.begin() + 1); BOOST_CHECK(range2.second == vector.begin() + 4); BOOST_CHECK_EQUAL(std::distance(range2.first, range2.second), ptrdiff_t(3)); std::pair range3 = boost::compute::equal_range(vector.begin(), vector.end(), int(3), queue); BOOST_CHECK(range3.first == vector.begin() + 4); BOOST_CHECK(range3.second == vector.begin() + 6); BOOST_CHECK_EQUAL(std::distance(range3.first, range3.second), ptrdiff_t(2)); std::pair range4 = boost::compute::equal_range(vector.begin(), vector.end(), int(4), queue); BOOST_CHECK(range4.first == vector.begin() + 6); BOOST_CHECK(range4.second == vector.begin() + 7); BOOST_CHECK_EQUAL(std::distance(range4.first, range4.second), ptrdiff_t(1)); std::pair range5 = boost::compute::equal_range(vector.begin(), vector.end(), int(5), queue); BOOST_CHECK(range5.first == vector.begin() + 7); BOOST_CHECK(range5.second == vector.end()); BOOST_CHECK_EQUAL(std::distance(range5.first, range5.second), ptrdiff_t(1)); std::pair range6 = boost::compute::equal_range(vector.begin(), vector.end(), int(6), queue); BOOST_CHECK(range6.first == vector.end()); BOOST_CHECK(range6.second == vector.end()); BOOST_CHECK_EQUAL(std::distance(range6.first, range6.second), ptrdiff_t(0)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_event.cpp000066400000000000000000000067621263566244600167260ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestEvent #include #include #ifdef BOOST_COMPUTE_USE_CPP11 #include #include #endif // BOOST_COMPUTE_USE_CPP11 #include #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(null_event) { boost::compute::event null; BOOST_CHECK(null.get() == cl_event()); } #if defined(CL_VERSION_1_1) && defined(BOOST_COMPUTE_USE_CPP11) std::mutex callback_mutex; std::condition_variable callback_condition_variable; static bool callback_invoked = false; static void BOOST_COMPUTE_CL_CALLBACK callback(cl_event event, cl_int status, void *user_data) { std::lock_guard lock(callback_mutex); callback_invoked = true; callback_condition_variable.notify_one(); } BOOST_AUTO_TEST_CASE(event_callback) { REQUIRES_OPENCL_VERSION(1,2); // ensure callback has not yet been executed BOOST_CHECK_EQUAL(callback_invoked, false); // enqueue marker and set callback to be invoked boost::compute::event marker = queue.enqueue_marker(); marker.set_callback(callback); marker.wait(); // wait up to one second for the callback to be executed std::unique_lock lock(callback_mutex); callback_condition_variable.wait_for( lock, std::chrono::seconds(1), [&](){ return callback_invoked; } ); // ensure callback has been executed BOOST_CHECK_EQUAL(callback_invoked, true); } BOOST_AUTO_TEST_CASE(lambda_callback) { REQUIRES_OPENCL_VERSION(1,2); bool lambda_invoked = false; boost::compute::event marker = queue.enqueue_marker(); marker.set_callback([&](){ std::lock_guard lock(callback_mutex); lambda_invoked = true; callback_condition_variable.notify_one(); }); marker.wait(); // wait up to one second for the callback to be executed std::unique_lock lock(callback_mutex); callback_condition_variable.wait_for( lock, std::chrono::seconds(1), [&](){ return lambda_invoked; } ); BOOST_CHECK_EQUAL(lambda_invoked, true); } void BOOST_COMPUTE_CL_CALLBACK event_promise_fulfiller_callback(cl_event event, cl_int status, void *user_data) { auto *promise = static_cast *>(user_data); promise->set_value(); delete promise; } BOOST_AUTO_TEST_CASE(event_to_std_future) { REQUIRES_OPENCL_VERSION(1,2); // enqueue an asynchronous copy to the device std::vector vector(1000, 3.14f); boost::compute::buffer buffer(context, 1000 * sizeof(float)); auto event = queue.enqueue_write_buffer_async( buffer, 0, 1000 * sizeof(float), vector.data() ); // create a promise and future to be set by the callback auto *promise = new std::promise; std::future future = promise->get_future(); event.set_callback(event_promise_fulfiller_callback, CL_COMPLETE, promise); // ensure commands are submitted to the device before waiting queue.flush(); // wait for future to become ready future.wait(); } #endif // CL_VERSION_1_1 BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_extents.cpp000066400000000000000000000052471263566244600172740ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestExtents #include #include #include #include #include #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(initialize) { compute::extents<1> one(1); BOOST_CHECK_EQUAL(one[0], size_t(1)); compute::extents<3> xyz = compute::dim(1, 2, 3); BOOST_CHECK_EQUAL(xyz[0], size_t(1)); BOOST_CHECK_EQUAL(xyz[1], size_t(2)); BOOST_CHECK_EQUAL(xyz[2], size_t(3)); } BOOST_AUTO_TEST_CASE(size) { BOOST_CHECK_EQUAL(compute::extents<1>().size(), size_t(1)); BOOST_CHECK_EQUAL(compute::extents<2>().size(), size_t(2)); BOOST_CHECK_EQUAL(compute::extents<3>().size(), size_t(3)); } BOOST_AUTO_TEST_CASE(subscript_operator) { compute::extents<3> xyz; BOOST_CHECK_EQUAL(xyz[0], size_t(0)); BOOST_CHECK_EQUAL(xyz[1], size_t(0)); BOOST_CHECK_EQUAL(xyz[2], size_t(0)); xyz[0] = 10; xyz[1] = 20; xyz[2] = 30; BOOST_CHECK_EQUAL(xyz[0], size_t(10)); BOOST_CHECK_EQUAL(xyz[1], size_t(20)); BOOST_CHECK_EQUAL(xyz[2], size_t(30)); } BOOST_AUTO_TEST_CASE(data) { compute::extents<3> xyz = compute::dim(5, 6, 7); BOOST_CHECK_EQUAL(xyz.data()[0], size_t(5)); BOOST_CHECK_EQUAL(xyz.data()[1], size_t(6)); BOOST_CHECK_EQUAL(xyz.data()[2], size_t(7)); } BOOST_AUTO_TEST_CASE(linear) { compute::extents<2> uv = compute::dim(16, 16); BOOST_CHECK_EQUAL(uv.linear(), size_t(256)); } BOOST_AUTO_TEST_CASE(equality_operator) { BOOST_CHECK(compute::dim(10, 20) == compute::dim(10, 20)); BOOST_CHECK(compute::dim(20, 10) != compute::dim(10, 20)); } BOOST_AUTO_TEST_CASE(empty_dim) { BOOST_CHECK(compute::dim<0>() == compute::dim()); BOOST_CHECK(compute::dim<1>() == compute::dim(0)); BOOST_CHECK(compute::dim<2>() == compute::dim(0, 0)); BOOST_CHECK(compute::dim<3>() == compute::dim(0, 0, 0)); } BOOST_AUTO_TEST_CASE(copy_to_vector) { compute::extents<3> exts = compute::dim(4, 5, 6); std::vector vec(3); std::copy(exts.begin(), exts.end(), vec.begin()); BOOST_CHECK_EQUAL(vec[0], 4); BOOST_CHECK_EQUAL(vec[1], 5); BOOST_CHECK_EQUAL(vec[2], 6); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_extrema.cpp000066400000000000000000000170051263566244600172420ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestExtrema #include #include #include #include #include #include #include #include #include #include #include #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(int_min_max) { boost::compute::vector vector(size_t(4096), int(0), queue); boost::compute::iota(vector.begin(), (vector.begin() + 512), 1, queue); boost::compute::fill((vector.end() - 512), vector.end(), 513, queue); boost::compute::vector::iterator min_iter = boost::compute::min_element(vector.begin(), vector.end(), queue); BOOST_CHECK(min_iter == vector.begin() + 512); BOOST_CHECK_EQUAL((vector.begin() + 512).read(queue), 0); BOOST_CHECK_EQUAL(min_iter.read(queue), 0); boost::compute::vector::iterator max_iter = boost::compute::max_element(vector.begin(), vector.end(), queue); BOOST_CHECK(max_iter == vector.end() - 512); BOOST_CHECK_EQUAL((vector.end() - 512).read(queue), 513); BOOST_CHECK_EQUAL(max_iter.read(queue), 513); } BOOST_AUTO_TEST_CASE(int2_min_max_custom_comparision_function) { using boost::compute::int2_; boost::compute::vector vector(context); vector.push_back(int2_(1, 10), queue); vector.push_back(int2_(2, -100), queue); vector.push_back(int2_(3, 30), queue); vector.push_back(int2_(4, 20), queue); vector.push_back(int2_(5, 5), queue); vector.push_back(int2_(6, -80), queue); vector.push_back(int2_(7, 21), queue); vector.push_back(int2_(8, -5), queue); BOOST_COMPUTE_FUNCTION(bool, compare_second, (const int2_ a, const int2_ b), { return a.y < b.y; }); boost::compute::vector::iterator min_iter = boost::compute::min_element( vector.begin(), vector.end(), compare_second, queue ); BOOST_CHECK(min_iter == vector.begin() + 1); BOOST_CHECK_EQUAL(*min_iter, int2_(2, -100)); boost::compute::vector::iterator max_iter = boost::compute::max_element( vector.begin(), vector.end(), compare_second, queue ); BOOST_CHECK(max_iter == vector.begin() + 2); BOOST_CHECK_EQUAL(*max_iter, int2_(3, 30)); } BOOST_AUTO_TEST_CASE(iota_min_max) { boost::compute::vector vector(5000, context); // fill with 0 -> 4999 boost::compute::iota(vector.begin(), vector.end(), 0, queue); boost::compute::vector::iterator min_iter = boost::compute::min_element(vector.begin(), vector.end(), queue); BOOST_CHECK(min_iter == vector.begin()); BOOST_CHECK_EQUAL(*min_iter, 0); boost::compute::vector::iterator max_iter = boost::compute::max_element(vector.begin(), vector.end(), queue); BOOST_CHECK(max_iter == vector.end() - 1); BOOST_CHECK_EQUAL(*max_iter, 4999); min_iter = boost::compute::min_element( vector.begin() + 1000, vector.end() - 1000, queue ); BOOST_CHECK(min_iter == vector.begin() + 1000); BOOST_CHECK_EQUAL(*min_iter, 1000); max_iter = boost::compute::max_element( vector.begin() + 1000, vector.end() - 1000, queue ); BOOST_CHECK(max_iter == vector.begin() + 3999); BOOST_CHECK_EQUAL(*max_iter, 3999); // fill with -2500 -> 2499 boost::compute::iota(vector.begin(), vector.end(), -2500, queue); min_iter = boost::compute::min_element(vector.begin(), vector.end(), queue); BOOST_CHECK(min_iter == vector.begin()); BOOST_CHECK_EQUAL(*min_iter, -2500); max_iter = boost::compute::max_element(vector.begin(), vector.end(), queue); BOOST_CHECK(max_iter == vector.end() - 1); BOOST_CHECK_EQUAL(*max_iter, 2499); } // uses max_element() and length() to find the longest 2d vector BOOST_AUTO_TEST_CASE(max_vector_length) { float data[] = { -1.5f, 3.2f, 10.0f, 0.0f, -4.2f, 2.0f, 0.0f, 0.5f, 1.9f, 1.9f }; boost::compute::vector vector( reinterpret_cast(data), reinterpret_cast(data) + 5, queue ); // find length of the longest vector typedef boost::compute::transform_iterator< boost::compute::vector::iterator, boost::compute::length > length_transform_iter; length_transform_iter max_iter = boost::compute::max_element( boost::compute::make_transform_iterator( vector.begin(), boost::compute::length() ), boost::compute::make_transform_iterator( vector.end(), boost::compute::length() ), queue ); BOOST_CHECK( max_iter == boost::compute::make_transform_iterator( vector.begin() + 1, boost::compute::length() ) ); BOOST_CHECK(max_iter.base() == vector.begin() + 1); BOOST_CHECK_EQUAL(*max_iter, float(10.0)); // find length of the shortest vector length_transform_iter min_iter = boost::compute::min_element( boost::compute::make_transform_iterator( vector.begin(), boost::compute::length() ), boost::compute::make_transform_iterator( vector.end(), boost::compute::length() ), queue ); BOOST_CHECK( min_iter == boost::compute::make_transform_iterator( vector.begin() + 3, boost::compute::length() ) ); BOOST_CHECK(min_iter.base() == vector.begin() + 3); BOOST_CHECK_EQUAL(*min_iter, float(0.5)); } // uses max_element() and popcount() to find the value with the most 1 bits BOOST_AUTO_TEST_CASE(max_bits_set) { using boost::compute::uint_; uint_ data[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; boost::compute::vector vector(data, data + 10, queue); boost::compute::vector::iterator iter = boost::compute::max_element( boost::compute::make_transform_iterator( vector.begin(), boost::compute::popcount() ), boost::compute::make_transform_iterator( vector.end(), boost::compute::popcount() ), queue ).base(); BOOST_CHECK(iter == vector.begin() + 7); BOOST_CHECK_EQUAL(uint_(*iter), uint_(7)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_fill.cpp000066400000000000000000000222211263566244600165170ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestFill #include #include #include #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; typedef boost::mpl::list scalar_types; template inline void test_fill(T v1, T v2, T v3, bc::command_queue queue) { if(boost::is_same::type, bc::double_>::value && !queue.get_device().supports_extension("cl_khr_fp64")) { std::cerr << "Skipping test_fill<" << bc::type_name() << ">() " "on device which doesn't support cl_khr_fp64" << std::endl; return; } bc::vector vector(4, queue.get_context()); bc::fill(vector.begin(), vector.end(), v1, queue); queue.finish(); CHECK_RANGE_EQUAL(T, 4, vector, (v1, v1, v1, v1)); vector.resize(1000, queue); bc::fill(vector.begin(), vector.end(), v2, queue); queue.finish(); BOOST_CHECK_EQUAL(vector.front(), v2); BOOST_CHECK_EQUAL(vector.back(), v2); bc::fill(vector.begin() + 500, vector.end(), v3, queue); queue.finish(); BOOST_CHECK_EQUAL(vector.front(), v2); BOOST_CHECK_EQUAL(vector[499], v2); BOOST_CHECK_EQUAL(vector[500], v3); BOOST_CHECK_EQUAL(vector.back(), v3); } BOOST_AUTO_TEST_CASE_TEMPLATE( fill_scalar, S, scalar_types ) { S v1 = S(1.5f); S v2 = S(2.5f); S v3 = S(42.0f); test_fill(v1, v2, v3, queue); } BOOST_AUTO_TEST_CASE_TEMPLATE( fill_vec2, S, scalar_types ) { typedef typename bc::make_vector_type::type T; S s1 = S(1.5f); S s2 = S(2.5f); S s3 = S(42.0f); S s4 = S(84.0f); T v1 = T(s1, s2); T v2 = T(s3, s4); T v3 = T(s2, s1); test_fill(v1, v2, v3, queue); } BOOST_AUTO_TEST_CASE_TEMPLATE( fill_vec4, S, scalar_types ) { typedef typename bc::make_vector_type::type T; S s1 = S(1.5f); S s2 = S(2.5f); S s3 = S(42.0f); S s4 = S(84.0f); T v1 = T(s1, s2, s3, s4); T v2 = T(s3, s4, s1, s2); T v3 = T(s4, s3, s2, s1); test_fill(v1, v2, v3, queue); } BOOST_AUTO_TEST_CASE_TEMPLATE( fill_vec8, S, scalar_types ) { typedef typename bc::make_vector_type::type T; S s1 = S(1.5f); S s2 = S(2.5f); S s3 = S(42.0f); S s4 = S(84.0f); S s5 = S(122.5f); S s6 = S(131.5f); S s7 = S(142.0f); S s8 = S(254.0f); T v1 = T(s1, s2, s3, s4, s5, s6, s7, s8); T v2 = T(s3, s4, s1, s2, s7, s8, s5, s6); T v3 = T(s4, s3, s2, s1, s8, s7, s6, s5); test_fill(v1, v2, v3, queue); } BOOST_AUTO_TEST_CASE_TEMPLATE( fill_vec16, S, scalar_types ) { typedef typename bc::make_vector_type::type T; S s1 = S(1.5f); S s2 = S(2.5f); S s3 = S(42.0f); S s4 = S(84.0f); S s5 = S(122.5f); S s6 = S(131.5f); S s7 = S(142.0f); S s8 = S(254.0f); T v1 = T(s1, s2, s3, s4, s5, s6, s7, s8, s1, s2, s3, s4, s5, s6, s7, s8); T v2 = T(s3, s4, s1, s2, s7, s8, s5, s6, s4, s3, s2, s1, s8, s7, s6, s5); T v3 = T(s4, s3, s2, s1, s8, s7, s6, s5, s8, s7, s6, s5, s4, s3, s2, s1); test_fill(v1, v2, v3, queue); } template inline void test_fill_n(T v1, T v2, T v3, bc::command_queue queue) { if(boost::is_same::type, bc::double_>::value && !queue.get_device().supports_extension("cl_khr_fp64")) { std::cerr << "Skipping test_fill_n<" << bc::type_name() << ">() " "on device which doesn't support cl_khr_fp64" << std::endl; return; } bc::vector vector(4, queue.get_context()); bc::fill_n(vector.begin(), 4, v1, queue); queue.finish(); CHECK_RANGE_EQUAL(T, 4, vector, (v1, v1, v1, v1)); bc::fill_n(vector.begin(), 3, v2, queue); queue.finish(); CHECK_RANGE_EQUAL(T, 4, vector, (v2, v2, v2, v1)); bc::fill_n(vector.begin() + 1, 2, v3, queue); queue.finish(); CHECK_RANGE_EQUAL(T, 4, vector, (v2, v3, v3, v1)); bc::fill_n(vector.begin(), 4, v2, queue); queue.finish(); CHECK_RANGE_EQUAL(T, 4, vector, (v2, v2, v2, v2)); // fill last element bc::fill_n(vector.end() - 1, 1, v3, queue); queue.finish(); CHECK_RANGE_EQUAL(T, 4, vector, (v2, v2, v2, v3)); // fill first element bc::fill_n(vector.begin(), 1, v1, queue); queue.finish(); CHECK_RANGE_EQUAL(T, 4, vector, (v1, v2, v2, v3)); } BOOST_AUTO_TEST_CASE_TEMPLATE( fill_n_scalar, S, scalar_types ) { S v1 = S(1.5f); S v2 = S(2.5f); S v3 = S(42.0f); test_fill_n(v1, v2, v3, queue); } BOOST_AUTO_TEST_CASE_TEMPLATE( fill_n_vec2, S, scalar_types ) { typedef typename bc::make_vector_type::type T; S s1 = S(1.5f); S s2 = S(2.5f); S s3 = S(42.0f); S s4 = S(84.0f); T v1 = T(s1, s2); T v2 = T(s3, s4); T v3 = T(s2, s1); test_fill_n(v1, v2, v3, queue); } BOOST_AUTO_TEST_CASE_TEMPLATE( fill_n_vec4, S, scalar_types ) { typedef typename bc::make_vector_type::type T; S s1 = S(1.5f); S s2 = S(2.5f); S s3 = S(42.0f); S s4 = S(84.0f); T v1 = T(s1, s2, s3, s4); T v2 = T(s3, s4, s1, s2); T v3 = T(s4, s3, s2, s1); test_fill_n(v1, v2, v3, queue); } BOOST_AUTO_TEST_CASE_TEMPLATE( fill_n_vec8, S, scalar_types ) { typedef typename bc::make_vector_type::type T; S s1 = S(1.5f); S s2 = S(2.5f); S s3 = S(42.0f); S s4 = S(84.0f); S s5 = S(122.5f); S s6 = S(131.5f); S s7 = S(142.0f); S s8 = S(254.0f); T v1 = T(s1, s2, s3, s4, s5, s6, s7, s8); T v2 = T(s3, s4, s1, s2, s7, s8, s5, s6); T v3 = T(s4, s3, s2, s1, s8, s7, s6, s5); test_fill_n(v1, v2, v3, queue); } BOOST_AUTO_TEST_CASE_TEMPLATE( fill_n_vec16, S, scalar_types ) { typedef typename bc::make_vector_type::type T; S s1 = S(1.5f); S s2 = S(2.5f); S s3 = S(42.0f); S s4 = S(84.0f); S s5 = S(122.5f); S s6 = S(131.5f); S s7 = S(142.0f); S s8 = S(254.0f); T v1 = T(s1, s2, s3, s4, s5, s6, s7, s8, s1, s2, s3, s4, s5, s6, s7, s8); T v2 = T(s3, s4, s1, s2, s7, s8, s5, s6, s4, s3, s2, s1, s8, s7, s6, s5); T v3 = T(s4, s3, s2, s1, s8, s7, s6, s5, s8, s7, s6, s5, s4, s3, s2, s1); test_fill_n(v1, v2, v3, queue); } BOOST_AUTO_TEST_CASE(check_fill_type) { bc::vector vector(5, context); bc::future future = bc::fill_async(vector.begin(), vector.end(), 42, queue); future.wait(); #ifdef CL_VERSION_1_2 BOOST_CHECK_EQUAL( future.get_event().get_command_type(), device.check_version(1,2) ? CL_COMMAND_FILL_BUFFER : CL_COMMAND_NDRANGE_KERNEL ); #else BOOST_CHECK( future.get_event().get_command_type() == CL_COMMAND_NDRANGE_KERNEL ); #endif } BOOST_AUTO_TEST_CASE(fill_clone_buffer) { int data[] = { 1, 2, 3, 4 }; bc::vector vec(data, data + 4, queue); CHECK_RANGE_EQUAL(int, 4, vec, (1, 2, 3, 4)); bc::buffer cloned_buffer = vec.get_buffer().clone(queue); BOOST_CHECK( bc::equal( vec.begin(), vec.end(), bc::make_buffer_iterator(cloned_buffer, 0), queue ) ); bc::fill(vec.begin(), vec.end(), 5, queue); BOOST_CHECK( !bc::equal( vec.begin(), vec.end(), bc::make_buffer_iterator(cloned_buffer, 0), queue ) ); bc::fill( bc::make_buffer_iterator(cloned_buffer, 0), bc::make_buffer_iterator(cloned_buffer, 4), 5, queue ); BOOST_CHECK( bc::equal( vec.begin(), vec.end(), bc::make_buffer_iterator(cloned_buffer, 0), queue ) ); } #ifdef CL_VERSION_2_0 BOOST_AUTO_TEST_CASE(fill_svm_buffer) { REQUIRES_OPENCL_VERSION(2, 0); size_t size = 4; bc::svm_ptr ptr = bc::svm_alloc(context, size * sizeof(cl_int)); bc::fill_n(ptr, size * sizeof(cl_int), 42, queue); queue.enqueue_svm_map(ptr.get(), size * sizeof(cl_int), CL_MAP_READ); for(size_t i = 0; i < size; i++) { BOOST_CHECK_EQUAL(static_cast(ptr.get())[i], 42); } queue.enqueue_svm_unmap(ptr.get()); bc::svm_free(context, ptr); } #endif // CL_VERSION_2_0 BOOST_AUTO_TEST_CASE(empty_fill) { bc::vector vec(0, context); bc::fill(vec.begin(), vec.end(), 42, queue); bc::fill_async(vec.begin(), vec.end(), 42, queue); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_find.cpp000066400000000000000000000066011263566244600165150ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestFind #include #include #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(find_int) { int data[] = { 9, 15, 1, 4, 9, 9, 4, 15, 12, 1 }; bc::vector vector(data, data + 10, queue); bc::vector::iterator iter = bc::find(vector.begin(), vector.end(), 4, queue); BOOST_CHECK(iter == vector.begin() + 3); BOOST_CHECK_EQUAL(*iter, 4); iter = bc::find(vector.begin(), vector.end(), 12, queue); BOOST_CHECK(iter == vector.begin() + 8); BOOST_CHECK_EQUAL(*iter, 12); iter = bc::find(vector.begin(), vector.end(), 1, queue); BOOST_CHECK(iter == vector.begin() + 2); BOOST_CHECK_EQUAL(*iter, 1); iter = bc::find(vector.begin(), vector.end(), 9, queue); BOOST_CHECK(iter == vector.begin()); BOOST_CHECK_EQUAL(*iter, 9); iter = bc::find(vector.begin(), vector.end(), 100, queue); BOOST_CHECK(iter == vector.end()); } BOOST_AUTO_TEST_CASE(find_int2) { using bc::int2_; int data[] = { 1, 2, 4, 5, 7, 8 }; bc::vector vector( reinterpret_cast(data), reinterpret_cast(data) + 3, queue ); CHECK_RANGE_EQUAL(int2_, 3, vector, (int2_(1, 2), int2_(4, 5), int2_(7, 8))); bc::vector::iterator iter = bc::find(vector.begin(), vector.end(), int2_(4, 5), queue); BOOST_CHECK(iter == vector.begin() + 1); BOOST_CHECK_EQUAL(*iter, int2_(4, 5)); iter = bc::find(vector.begin(), vector.end(), int2_(10, 11), queue); BOOST_CHECK(iter == vector.end()); } BOOST_AUTO_TEST_CASE(find_if_not_int) { int data[] = { 2, 4, 6, 8, 1, 3, 5, 7, 9 }; bc::vector vector(data, data + 9, queue); bc::vector::iterator iter = bc::find_if_not(vector.begin(), vector.end(), bc::_1 == 2, queue); BOOST_CHECK(iter == vector.begin() + 1); BOOST_CHECK_EQUAL(*iter, 4); } BOOST_AUTO_TEST_CASE(find_point_by_distance) { using boost::compute::float2_; using boost::compute::lambda::_1; using boost::compute::lambda::distance; float2_ points[] = { float2_(0, 0), float2_(2, 2), float2_(4, 4), float2_(8, 8) }; compute::vector vec(points, points + 4, queue); compute::vector::iterator iter = compute::find_if(vec.begin(), vec.end(), distance(_1, float2_(5, 5)) < 1.5f, queue); BOOST_CHECK(iter == vec.begin() + 2); float2_ value; compute::copy_n(iter, 1, &value, queue); BOOST_CHECK_EQUAL(value, float2_(4, 4)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_find_end.cpp000066400000000000000000000036271263566244600173500ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestFindEnd #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(find_end_int) { int data[] = {1, 4, 2, 6, 3, 2, 6, 3, 4, 6}; bc::vector vectort(data, data + 10, queue); int datap[] = {2, 6}; bc::vector vectorp(datap, datap + 2, queue); bc::vector::iterator iter = bc::find_end(vectort.begin(), vectort.end(), vectorp.begin(), vectorp.end(), queue); BOOST_VERIFY(iter == vectort.begin() + 5); vectorp[1] = 9; iter = bc::find_end(vectort.begin(), vectort.end(), vectorp.begin(), vectorp.end(), queue); BOOST_VERIFY(iter == vectort.begin() + 10); } BOOST_AUTO_TEST_CASE(find_end_string) { char text[] = "sdabababacabskjabacab"; bc::vector vectort(text, text + 21, queue); char pattern[] = "aba"; bc::vector vectorp(pattern, pattern + 3, queue); bc::vector::iterator iter = bc::find_end(vectort.begin(), vectort.end(), vectorp.begin(), vectorp.end(), queue); BOOST_VERIFY(iter == vectort.begin() + 15); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_flat_map.cpp000066400000000000000000000121021263566244600173510ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestFlatMap #include #include #include #include #include #include #include #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(concept_check) { BOOST_CONCEPT_ASSERT((boost::Container >)); // BOOST_CONCEPT_ASSERT((boost::SimpleAssociativeContainer >)); // BOOST_CONCEPT_ASSERT((boost::UniqueAssociativeContainer >)); BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator::iterator>)); BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator::const_iterator>)); } BOOST_AUTO_TEST_CASE(insert) { boost::compute::flat_map map(context); map.insert(std::make_pair(1, 1.1f), queue); map.insert(std::make_pair(-1, -1.1f), queue); map.insert(std::make_pair(3, 3.3f), queue); map.insert(std::make_pair(2, 2.2f), queue); BOOST_CHECK_EQUAL(map.size(), size_t(4)); BOOST_CHECK(map.find(-1) == map.begin() + 0); BOOST_CHECK(map.find(1) == map.begin() + 1); BOOST_CHECK(map.find(2) == map.begin() + 2); BOOST_CHECK(map.find(3) == map.begin() + 3); map.insert(std::make_pair(2, -2.2f), queue); BOOST_CHECK_EQUAL(map.size(), size_t(4)); } BOOST_AUTO_TEST_CASE(at) { boost::compute::flat_map map(context); map.insert(std::make_pair(1, 1.1f), queue); map.insert(std::make_pair(4, 4.4f), queue); map.insert(std::make_pair(3, 3.3f), queue); map.insert(std::make_pair(2, 2.2f), queue); BOOST_CHECK_EQUAL(float(map.at(1)), float(1.1f)); BOOST_CHECK_EQUAL(float(map.at(2)), float(2.2f)); BOOST_CHECK_EQUAL(float(map.at(3)), float(3.3f)); BOOST_CHECK_EQUAL(float(map.at(4)), float(4.4f)); } BOOST_AUTO_TEST_CASE(index_operator) { boost::compute::flat_map map; map[1] = 1.1f; map[2] = 2.2f; map[3] = 3.3f; map[4] = 4.4f; BOOST_CHECK_EQUAL(float(map[1]), float(1.1f)); BOOST_CHECK_EQUAL(float(map[2]), float(2.2f)); BOOST_CHECK_EQUAL(float(map[3]), float(3.3f)); BOOST_CHECK_EQUAL(float(map[4]), float(4.4f)); } BOOST_AUTO_TEST_CASE(custom_kernel) { typedef boost::compute::flat_map MapType; // map from int->float on device MapType map(context); map.insert(std::make_pair(1, 1.2f), queue); map.insert(std::make_pair(3, 3.4f), queue); map.insert(std::make_pair(5, 5.6f), queue); map.insert(std::make_pair(7, 7.8f), queue); // simple linear search for key in map const char lookup_source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( __kernel void lookup(__global const MapType *map, const int map_size, const KeyType key, __global ValueType *result) { for(int i = 0; i < map_size; i++){ if(map[i].first == key){ *result = map[i].second; break; } } } ); // create program source std::stringstream source; // add type definition for map type source << boost::compute::type_definition(); // add lookup function source source << lookup_source; // create lookup program boost::compute::program lookup_program = boost::compute::program::create_with_source(source.str(), context); // program build options std::stringstream options; options << "-DMapType=" << boost::compute::type_name() << " -DKeyType=" << boost::compute::type_name() << " -DValueType=" << boost::compute::type_name(); // build lookup program with options lookup_program.build(options.str()); // create buffer for result value boost::compute::vector result(1, context); // create lookup kernel boost::compute::kernel lookup_kernel = lookup_program.create_kernel("lookup"); // set kernel arguments lookup_kernel.set_arg(0, map.begin().get_buffer()); // map buffer lookup_kernel.set_arg(1, map.size()); // map size lookup_kernel.set_arg(2, 5); // key lookup_kernel.set_arg(3, result.get_buffer()); // result buffer // run kernel with a single work-item queue.enqueue_task(lookup_kernel); // check result from buffer BOOST_CHECK_EQUAL(result.begin().read(queue), 5.6f); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_flat_set.cpp000066400000000000000000000077711263566244600174070ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestFlatSet #include #include #include #include #include #include #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(concept_check) { BOOST_CONCEPT_ASSERT((boost::Container >)); // BOOST_CONCEPT_ASSERT((boost::SimpleAssociativeContainer >)); // BOOST_CONCEPT_ASSERT((boost::UniqueAssociativeContainer >)); BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator::iterator>)); BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator::const_iterator>)); } BOOST_AUTO_TEST_CASE(insert) { bc::flat_set set(context); typedef bc::flat_set::iterator iterator; std::pair location = set.insert(12, queue); BOOST_CHECK(location.first == set.begin()); BOOST_CHECK(location.second == true); BOOST_CHECK_EQUAL(*location.first, 12); BOOST_CHECK_EQUAL(set.size(), size_t(1)); location = set.insert(12, queue); BOOST_CHECK(location.first == set.begin()); BOOST_CHECK(location.second == false); BOOST_CHECK_EQUAL(set.size(), size_t(1)); location = set.insert(4, queue); BOOST_CHECK(location.first == set.begin()); BOOST_CHECK(location.second == true); BOOST_CHECK_EQUAL(set.size(), size_t(2)); location = set.insert(12, queue); BOOST_CHECK(location.first == set.begin() + 1); BOOST_CHECK(location.second == false); BOOST_CHECK_EQUAL(set.size(), size_t(2)); location = set.insert(9, queue); BOOST_CHECK(location.first == set.begin() + 1); BOOST_CHECK(location.second == true); BOOST_CHECK_EQUAL(set.size(), size_t(3)); } BOOST_AUTO_TEST_CASE(erase) { bc::flat_set set(context); typedef bc::flat_set::iterator iterator; set.insert(1, queue); set.insert(2, queue); set.insert(3, queue); set.insert(4, queue); set.insert(5, queue); BOOST_CHECK_EQUAL(set.size(), size_t(5)); iterator i = set.erase(set.begin(), queue); queue.finish(); BOOST_CHECK(i == set.begin() + 1); BOOST_CHECK_EQUAL(set.size(), size_t(4)); BOOST_CHECK_EQUAL(*set.begin(), 2); size_t count = set.erase(3, queue); queue.finish(); BOOST_CHECK_EQUAL(count, size_t(1)); BOOST_CHECK_EQUAL(set.size(), size_t(3)); BOOST_CHECK_EQUAL(*set.begin(), 2); count = set.erase(9, queue); queue.finish(); BOOST_CHECK_EQUAL(count, size_t(0)); BOOST_CHECK_EQUAL(set.size(), size_t(3)); BOOST_CHECK_EQUAL(*set.begin(), 2); i = set.erase(set.begin() + 1, queue); queue.finish(); BOOST_CHECK(i == set.begin() + 2); BOOST_CHECK_EQUAL(set.size(), size_t(2)); BOOST_CHECK_EQUAL(*set.begin(), 2); BOOST_CHECK_EQUAL(*(set.end() - 1), 5); set.erase(set.begin(), set.end(), queue); queue.finish(); BOOST_CHECK_EQUAL(set.size(), size_t(0)); } BOOST_AUTO_TEST_CASE(clear) { bc::flat_set set; BOOST_CHECK(set.empty() == true); BOOST_CHECK_EQUAL(set.size(), size_t(0)); set.clear(); BOOST_CHECK(set.empty() == true); BOOST_CHECK_EQUAL(set.size(), size_t(0)); set.insert(3.14f); BOOST_CHECK(set.empty() == false); BOOST_CHECK_EQUAL(set.size(), size_t(1)); set.insert(4.184f); BOOST_CHECK(set.empty() == false); BOOST_CHECK_EQUAL(set.size(), size_t(2)); set.clear(); BOOST_CHECK(set.empty() == true); BOOST_CHECK_EQUAL(set.size(), size_t(0)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_for_each.cpp000066400000000000000000000024631263566244600173450ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestForEach #include #include #include #include #include #include #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(for_each_nop) { bc::vector vector(4, context); bc::iota(vector.begin(), vector.end(), 0); BOOST_COMPUTE_FUNCTION(void, nop, (int ignored), {}); bc::for_each(vector.begin(), vector.end(), nop, queue); } BOOST_AUTO_TEST_CASE(for_each_n_nop) { bc::vector vector(4, context); bc::iota(vector.begin(), vector.end(), 0); BOOST_COMPUTE_FUNCTION(void, nop, (int ignored), {}); bc::for_each_n(vector.begin(), vector.size(), nop, queue); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_function.cpp000066400000000000000000000144431263566244600174250ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestFunction #include #include #include #include #include #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(add_three) { BOOST_COMPUTE_FUNCTION(int, add_three, (int x), { return x + 3; }); int data[] = { 1, 2, 3, 4 }; compute::vector vector(data, data + 4, queue); compute::transform( vector.begin(), vector.end(), vector.begin(), add_three, queue ); CHECK_RANGE_EQUAL(int, 4, vector, (4, 5, 6, 7)); } BOOST_AUTO_TEST_CASE(sum_odd_values) { BOOST_COMPUTE_FUNCTION(int, add_odd_value, (int sum, int value), { if(value & 1){ return sum + value; } else { return sum + 0; } }); int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; compute::vector vector(data, data + 8, queue); int result = compute::accumulate( vector.begin(), vector.end(), 0, add_odd_value, queue ); BOOST_CHECK_EQUAL(result, 16); } BOOST_AUTO_TEST_CASE(sort_pairs) { if(device.vendor() == "NVIDIA" && device.platform().name() == "Apple"){ // FIXME: this test currently segfaults on NVIDIA GPUs on Apple std::cerr << "skipping sort_pairs test on NVIDIA GPU on Apple platform" << std::endl; return; } std::vector > data; data.push_back(std::make_pair(1, 2.3f)); data.push_back(std::make_pair(0, 4.2f)); data.push_back(std::make_pair(2, 1.0f)); compute::vector > vector(data.begin(), data.end(), queue); // sort by first component BOOST_COMPUTE_FUNCTION(bool, compare_first, (std::pair a, std::pair b), { return a.first < b.first; }); compute::sort(vector.begin(), vector.end(), compare_first, queue); compute::copy(vector.begin(), vector.end(), data.begin(), queue); BOOST_CHECK(data[0] == std::make_pair(0, 4.2f)); BOOST_CHECK(data[1] == std::make_pair(1, 2.3f)); BOOST_CHECK(data[2] == std::make_pair(2, 1.0f)); // sort by second component BOOST_COMPUTE_FUNCTION(bool, compare_second, (std::pair a, std::pair b), { return a.second < b.second; }); compute::sort(vector.begin(), vector.end(), compare_second, queue); compute::copy(vector.begin(), vector.end(), data.begin(), queue); BOOST_CHECK(data[0] == std::make_pair(2, 1.0f)); BOOST_CHECK(data[1] == std::make_pair(1, 2.3f)); BOOST_CHECK(data[2] == std::make_pair(0, 4.2f)); } BOOST_AUTO_TEST_CASE(transform_zip_iterator) { float float_data[] = { 1.f, 2.f, 3.f, 4.f }; compute::vector input_floats(float_data, float_data + 4, queue); int int_data[] = { 2, 4, 6, 8 }; compute::vector input_ints(int_data, int_data + 4, queue); compute::vector results(4, context); BOOST_COMPUTE_FUNCTION(float, tuple_pown, (boost::tuple x), { return pown(boost_tuple_get(x, 0), boost_tuple_get(x, 1)); }); compute::transform( compute::make_zip_iterator( boost::make_tuple(input_floats.begin(), input_ints.begin()) ), compute::make_zip_iterator( boost::make_tuple(input_floats.end(), input_ints.end()) ), results.begin(), tuple_pown, queue ); float results_data[4]; compute::copy(results.begin(), results.end(), results_data, queue); BOOST_CHECK_CLOSE(results_data[0], 1.f, 1e-4); BOOST_CHECK_CLOSE(results_data[1], 16.f, 1e-4); BOOST_CHECK_CLOSE(results_data[2], 729.f, 1e-4); BOOST_CHECK_CLOSE(results_data[3], 65536.f, 1e-4); } static BOOST_COMPUTE_FUNCTION(int, static_function, (int x), { return x + 5; }); BOOST_AUTO_TEST_CASE(test_static_function) { int data[] = { 1, 2, 3, 4}; compute::vector vec(data, data + 4, queue); compute::transform( vec.begin(), vec.end(), vec.begin(), static_function, queue ); CHECK_RANGE_EQUAL(int, 4, vec, (6, 7, 8, 9)); } template inline compute::function make_negate_function() { BOOST_COMPUTE_FUNCTION(T, negate, (const T x), { return -x; }); return negate; } BOOST_AUTO_TEST_CASE(test_templated_function) { int int_data[] = { 1, 2, 3, 4 }; compute::vector int_vec(int_data, int_data + 4, queue); compute::function negate_int = make_negate_function(); compute::transform( int_vec.begin(), int_vec.end(), int_vec.begin(), negate_int, queue ); CHECK_RANGE_EQUAL(int, 4, int_vec, (-1, -2, -3, -4)); float float_data[] = { 1.1f, 2.2f, 3.3f, 4.4f }; compute::vector float_vec(float_data, float_data + 4, queue); compute::function negate_float = make_negate_function(); compute::transform( float_vec.begin(), float_vec.end(), float_vec.begin(), negate_float, queue ); CHECK_RANGE_EQUAL(float, 4, float_vec, (-1.1f, -2.2f, -3.3f, -4.4f)); } BOOST_AUTO_TEST_CASE(define) { BOOST_COMPUTE_FUNCTION(int, return_number, (), { return NUMBER; }); return_number.define("NUMBER", "4"); compute::vector vec(1, context); compute::generate(vec.begin(), vec.end(), return_number, queue); CHECK_RANGE_EQUAL(int, 1, vec, (4)); return_number.define("NUMBER", "2"); compute::generate(vec.begin(), vec.end(), return_number, queue); CHECK_RANGE_EQUAL(int, 1, vec, (2)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_function_input_iterator.cpp000066400000000000000000000025401263566244600225500ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestFunctionInputIterator #include #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(generate_42_doctest) { boost::compute::vector result(4, context); //! [generate_42] BOOST_COMPUTE_FUNCTION(int, ret42, (), { return 42; }); boost::compute::copy( boost::compute::make_function_input_iterator(ret42, 0), boost::compute::make_function_input_iterator(ret42, result.size()), result.begin(), queue ); // result == { 42, 42, 42, 42 } //! [generate_42] CHECK_RANGE_EQUAL(int, 4, result, (42, 42, 42, 42)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_functional_as.cpp000066400000000000000000000031261263566244600204210ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestFunctionalAs #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(roundtrip_int_float) { int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; compute::vector input(8, context); compute::copy_n(data, 8, input.begin(), queue); // convert int -> float compute::vector output(8, context); compute::transform( input.begin(), input.end(), output.begin(), compute::as(), queue ); // zero out input compute::fill(input.begin(), input.end(), 0, queue); // convert float -> int compute::transform( output.begin(), output.end(), input.begin(), compute::as(), queue ); // check values CHECK_RANGE_EQUAL( int, 8, output, (1, 2, 3, 4, 5, 6, 7, 8) ); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_functional_bind.cpp000066400000000000000000000141551263566244600207360ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestFunctionalBind #include #include #include #include #include #include #include #include #include #include #include #include // simple test struct struct data_struct { int int_value; float float_value; }; BOOST_COMPUTE_ADAPT_STRUCT(data_struct, data_struct, (int_value, float_value)) #include "quirks.hpp" #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; using compute::placeholders::_1; using compute::placeholders::_2; BOOST_AUTO_TEST_CASE(transform_plus_two) { int data[] = { 1, 2, 3, 4 }; compute::vector vector(4, context); compute::copy_n(data, 4, vector.begin(), queue); compute::transform( vector.begin(), vector.end(), vector.begin(), compute::bind(compute::plus(), _1, 2), queue ); CHECK_RANGE_EQUAL(int, 4, vector, (3, 4, 5, 6)); } BOOST_AUTO_TEST_CASE(transform_pow_two) { float data[] = { 2, 3, 4, 5 }; compute::vector vector(4, context); compute::copy_n(data, 4, vector.begin(), queue); compute::transform( vector.begin(), vector.end(), vector.begin(), compute::bind(compute::pow(), 2.0f, _1), queue ); compute::copy(vector.begin(), vector.end(), data, queue); BOOST_CHECK_CLOSE(data[0], 4.0f, 1e-4); BOOST_CHECK_CLOSE(data[1], 8.0f, 1e-4); BOOST_CHECK_CLOSE(data[2], 16.0f, 1e-4); BOOST_CHECK_CLOSE(data[3], 32.0f, 1e-4); } BOOST_AUTO_TEST_CASE(find_if_equal) { int data[] = { 1, 2, 3, 4 }; compute::vector vector(4, context); compute::copy_n(data, 4, vector.begin(), queue); BOOST_CHECK( compute::find_if( vector.begin(), vector.end(), compute::bind(compute::equal_to(), _1, 3), queue ) == vector.begin() + 2 ); } BOOST_AUTO_TEST_CASE(compare_less_than) { int data[] = { 1, 2, 3, 4 }; compute::vector vector(data, data + 4, queue); int count = boost::compute::count_if( vector.begin(), vector.end(), compute::bind(compute::less(), _1, 3), queue ); BOOST_CHECK_EQUAL(count, 2); count = boost::compute::count_if( vector.begin(), vector.end(), compute::bind(compute::less(), 3, _1), queue ); BOOST_CHECK_EQUAL(count, 1); } BOOST_AUTO_TEST_CASE(subtract_ranges) { int data1[] = { 1, 2, 3, 4 }; int data2[] = { 4, 3, 2, 1 }; compute::vector vector1(data1, data1 + 4, queue); compute::vector vector2(data2, data2 + 4, queue); compute::vector result(4, context); compute::transform( vector1.begin(), vector1.end(), vector2.begin(), result.begin(), compute::bind(compute::minus(), _1, _2), queue ); CHECK_RANGE_EQUAL(int, 4, result, (-3, -1, 1, 3)); compute::transform( vector1.begin(), vector1.end(), vector2.begin(), result.begin(), compute::bind(compute::minus(), _2, _1), queue ); CHECK_RANGE_EQUAL(int, 4, result, (3, 1, -1, -3)); compute::transform( vector1.begin(), vector1.end(), vector2.begin(), result.begin(), compute::bind(compute::minus(), 5, _1), queue ); CHECK_RANGE_EQUAL(int, 4, result, (4, 3, 2, 1)); compute::transform( vector1.begin(), vector1.end(), vector2.begin(), result.begin(), compute::bind(compute::minus(), 5, _2), queue ); CHECK_RANGE_EQUAL(int, 4, result, (1, 2, 3, 4)); } BOOST_AUTO_TEST_CASE(clamp_values) { int data[] = { 1, 2, 3, 4 }; compute::vector vector(data, data + 4, queue); compute::transform( vector.begin(), vector.end(), vector.begin(), compute::bind(compute::clamp(), _1, 2, 3), queue ); CHECK_RANGE_EQUAL(int, 4, vector, (2, 2, 3, 3)); } BOOST_AUTO_TEST_CASE(bind_custom_function) { int data[] = { 1, 2, 3, 4 }; compute::vector vector(data, data + 4, queue); BOOST_COMPUTE_FUNCTION(int, x_if_odd_else_y, (int x, int y), { if(x & 1) return x; else return y; }); compute::transform( vector.begin(), vector.end(), vector.begin(), compute::bind(x_if_odd_else_y, _1, 9), queue ); CHECK_RANGE_EQUAL(int, 4, vector, (1, 9, 3, 9)); compute::copy( data, data + 4, vector.begin(), queue ); compute::transform( vector.begin(), vector.end(), vector.begin(), compute::bind(x_if_odd_else_y, 2, _1), queue ); CHECK_RANGE_EQUAL(int, 4, vector, (1, 2, 3, 4)); } BOOST_AUTO_TEST_CASE(bind_struct) { if(bug_in_struct_assignment(device)){ std::cerr << "skipping bind_struct test" << std::endl; return; } BOOST_COMPUTE_FUNCTION(int, add_struct_value, (int x, data_struct s), { return s.int_value + x; }); data_struct data; data.int_value = 3; data.float_value = 4.56f; int input[] = { 1, 2, 3, 4 }; compute::vector vec(input, input + 4, queue); compute::transform( vec.begin(), vec.end(), vec.begin(), compute::bind(add_struct_value, _1, data), queue ); CHECK_RANGE_EQUAL(int, 4, vec, (4, 5, 6, 7)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_functional_convert.cpp000066400000000000000000000025111263566244600214730ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestFunctionalConvert #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(convert_int_float) { int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; compute::vector input(8, context); compute::copy_n(data, 8, input.begin(), queue); compute::vector output(8, context); compute::transform( input.begin(), input.end(), output.begin(), compute::convert(), queue ); CHECK_RANGE_EQUAL( float, 8, output, (1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f) ); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_functional_get.cpp000066400000000000000000000043461263566244600206020ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestFunctionalGet #include #include #include #include #include #include #include #include namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(get_vector_result_type) { BOOST_STATIC_ASSERT(( boost::is_same< boost::compute::result_of< compute::get<0>(compute::float4_) >::type, float >::value )); BOOST_STATIC_ASSERT(( boost::is_same< boost::compute::result_of< compute::get<1>(compute::int2_) >::type, int >::value )); } BOOST_AUTO_TEST_CASE(get_pair_result_type) { BOOST_STATIC_ASSERT(( boost::is_same< boost::compute::result_of< compute::get<0>(std::pair) >::type, int >::value )); BOOST_STATIC_ASSERT(( boost::is_same< boost::compute::result_of< compute::get<1>(std::pair) >::type, compute::char4_ >::value )); } BOOST_AUTO_TEST_CASE(get_tuple_result_type) { BOOST_STATIC_ASSERT(( boost::is_same< boost::compute::result_of< compute::get<0>(boost::tuple) >::type, int >::value )); BOOST_STATIC_ASSERT(( boost::is_same< boost::compute::result_of< compute::get<2>(boost::tuple) >::type, float >::value )); } compute-0.5/test/test_functional_hash.cpp000066400000000000000000000023401263566244600207360ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestFunctionalHash #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(hash_int) { using compute::ulong_; int data[] = { 1, 2, 3, 4 }; compute::vector input_values(data, data + 4, queue); compute::vector hash_values(4, context); compute::transform( input_values.begin(), input_values.end(), hash_values.begin(), compute::hash(), queue ); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_functional_identity.cpp000066400000000000000000000023201263566244600216420ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestFunctionalIdentity #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(copy_with_identity_transform) { int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; compute::vector input(data, data + 8, queue); compute::vector output(8, context); compute::transform( input.begin(), input.end(), output.begin(), compute::identity(), queue ); CHECK_RANGE_EQUAL( int, 8, output, (1, 2, 3, 4, 5, 6, 7, 8) ); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_functional_popcount.cpp000066400000000000000000000025441263566244600216700ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestFunctionalPopcount #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; typedef boost::mpl::list< compute::uchar_, compute::ushort_, compute::uint_, compute::ulong_ > popcount_test_types; BOOST_AUTO_TEST_CASE_TEMPLATE(popcount, T, popcount_test_types) { compute::vector vec(context); vec.push_back(0x00, queue); vec.push_back(0x01, queue); vec.push_back(0x03, queue); vec.push_back(0xff, queue); compute::vector popcounts(vec.size(), context); compute::transform( vec.begin(), vec.end(), popcounts.begin(), compute::popcount(), queue ); CHECK_RANGE_EQUAL(int, 4, popcounts, (0, 1, 2, 8)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_functional_unpack.cpp000066400000000000000000000057461263566244600213110ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestFunctionalUnpack #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(plus_int) { int data1[] = { 1, 3, 5, 7 }; int data2[] = { 2, 4, 6, 8 }; compute::vector input1(4, context); compute::vector input2(4, context); compute::copy_n(data1, 4, input1.begin(), queue); compute::copy_n(data2, 4, input2.begin(), queue); compute::vector output(4, context); compute::transform( compute::make_zip_iterator( boost::make_tuple(input1.begin(), input2.begin()) ), compute::make_zip_iterator( boost::make_tuple(input1.end(), input2.end()) ), output.begin(), compute::detail::unpack(compute::plus()), queue ); CHECK_RANGE_EQUAL(int, 4, output, (3, 7, 11, 15)); } BOOST_AUTO_TEST_CASE(fma_float) { float data1[] = { 1, 3, 5, 7 }; float data2[] = { 2, 4, 6, 8 }; float data3[] = { 0, 9, 1, 2 }; compute::vector input1(4, context); compute::vector input2(4, context); compute::vector input3(4, context); compute::copy_n(data1, 4, input1.begin(), queue); compute::copy_n(data2, 4, input2.begin(), queue); compute::copy_n(data3, 4, input3.begin(), queue); compute::vector output(4, context); compute::transform( compute::make_zip_iterator( boost::make_tuple(input1.begin(), input2.begin(), input3.begin()) ), compute::make_zip_iterator( boost::make_tuple(input1.end(), input2.end(), input3.begin()) ), output.begin(), compute::detail::unpack(compute::fma()), queue ); } BOOST_AUTO_TEST_CASE(subtract_int2) { using compute::int2_; int data[] = { 4, 2, 5, 1, 6, 3, 7, 0 }; compute::vector input(4, context); compute::copy_n(reinterpret_cast(data), 4, input.begin(), queue); compute::vector output(4, context); compute::transform( input.begin(), input.end(), output.begin(), compute::detail::unpack(compute::minus()), queue ); CHECK_RANGE_EQUAL(int, 4, output, (2, 4, 3, 7)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_gather.cpp000066400000000000000000000040411263566244600170430ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestGather #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(gather_int) { int input_data[] = { 1, 2, 3, 4, 5 }; compute::vector input(input_data, input_data + 5, queue); int indices_data[] = { 0, 4, 1, 3, 2 }; compute::vector indices(indices_data, indices_data + 5, queue); compute::vector output(5, context); compute::gather( indices.begin(), indices.end(), input.begin(), output.begin(), queue ); CHECK_RANGE_EQUAL(int, 5, output, (1, 5, 2, 4, 3)); } BOOST_AUTO_TEST_CASE(copy_index_then_gather) { // input data int data[] = { 1, 4, 3, 2, 5, 9, 8, 7 }; compute::vector input(data, data + 8, queue); // function returning true if the input is odd BOOST_COMPUTE_FUNCTION(bool, is_odd, (int x), { return x % 2 != 0; }); // copy indices of all odd values compute::vector odds(5, context); compute::detail::copy_index_if( input.begin(), input.end(), odds.begin(), is_odd, queue ); CHECK_RANGE_EQUAL(int, 5, odds, (0, 2, 4, 5, 7)); // gather all odd values compute::vector odd_values(5, context); compute::gather( odds.begin(), odds.end(), input.begin(), odd_values.begin(), queue ); CHECK_RANGE_EQUAL(int, 5, odd_values, (1, 3, 5, 9, 7)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_generate.cpp000066400000000000000000000042571263566244600173740ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestGenerate #include #include #include #include #include #include #include #include "quirks.hpp" #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(generate4) { bc::vector vector(4, context); bc::fill(vector.begin(), vector.end(), 2, queue); CHECK_RANGE_EQUAL(int, 4, vector, (2, 2, 2, 2)); BOOST_COMPUTE_FUNCTION(int, ret4, (void), { return 4; }); bc::generate(vector.begin(), vector.end(), ret4, queue); CHECK_RANGE_EQUAL(int, 4, vector, (4, 4, 4, 4)); } BOOST_AUTO_TEST_CASE(generate_pair) { if(bug_in_struct_assignment(device)){ std::cerr << "skipping generate_pair test" << std::endl; return; } // in order to use std::pair with BOOST_COMPUTE_FUNCTION() macro we // need a typedef. otherwise the commas in the type declaration screw it up. typedef std::pair pair_type; bc::vector vector(3, context); BOOST_COMPUTE_FUNCTION(pair_type, generate_pair, (void), { return boost_make_pair(int, 2, float, 3.14f); }); bc::generate(vector.begin(), vector.end(), generate_pair, queue); // check results std::vector host_vector(3); bc::copy(vector.begin(), vector.end(), host_vector.begin(), queue); BOOST_CHECK(host_vector[0] == std::make_pair(2, 3.14f)); BOOST_CHECK(host_vector[1] == std::make_pair(2, 3.14f)); BOOST_CHECK(host_vector[2] == std::make_pair(2, 3.14f)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_image1d.cpp000066400000000000000000000040411263566244600171000ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestImage1D #include #include #include #include #include "quirks.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(image1d_get_supported_formats) { const std::vector formats = compute::image1d::get_supported_formats(context); } #ifdef CL_VERSION_1_2 BOOST_AUTO_TEST_CASE(fill_image1d) { REQUIRES_OPENCL_VERSION(1, 2); // device OpenCL version check // single-channel unsigned char compute::image_format format(CL_R, CL_UNSIGNED_INT8); if(!compute::image1d::is_supported_format(format, context)){ std::cerr << "skipping fill_image1d test, image format not supported" << std::endl; return; } compute::image1d img(context, 64, format); BOOST_CHECK_EQUAL(img.width(), 64); BOOST_CHECK(img.size() == compute::dim(64)); BOOST_CHECK(img.format() == format); // fill image with '128' compute::uint4_ fill_color(128, 0, 0, 0); queue.enqueue_fill_image(img, &fill_color, img.origin(), img.size()); // read value of first pixel compute::uchar_ first_pixel = 0; queue.enqueue_read_image(img, compute::dim(0), compute::dim(1), &first_pixel); BOOST_CHECK_EQUAL(first_pixel, 128); } #endif // CL_VERSION_1_2 // check type_name() for image1d BOOST_AUTO_TEST_CASE(image1d_type_name) { BOOST_CHECK( std::strcmp( boost::compute::type_name(), "image1d_t" ) == 0 ); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_image2d.cpp000066400000000000000000000155071263566244600171120ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestImage2D #include #include #include #include #include #include "quirks.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(image2d_get_supported_formats) { const std::vector formats = compute::image2d::get_supported_formats(context); } BOOST_AUTO_TEST_CASE(create_image_doctest) { try { //! [create_image] // create 8-bit RGBA image format boost::compute::image_format rgba8(CL_RGBA, CL_UNSIGNED_INT8); // create 640x480 image object boost::compute::image2d img(context, 640, 480, rgba8); //! [create_image] // verify image has been created and format is correct BOOST_CHECK(img.get() != cl_mem()); BOOST_CHECK(img.format() == rgba8); BOOST_CHECK_EQUAL(img.width(), 640); BOOST_CHECK_EQUAL(img.height(), 480); } catch(compute::opencl_error &e){ if(e.error_code() == CL_IMAGE_FORMAT_NOT_SUPPORTED){ // image format not supported by device return; } // some other error, rethrow throw; } } BOOST_AUTO_TEST_CASE(get_info) { compute::image_format format(CL_RGBA, CL_UNSIGNED_INT8); if(!compute::image2d::is_supported_format(format, context)){ std::cerr << "skipping get_info test, image format not supported" << std::endl; return; } compute::image2d image( context, 48, 64, format, compute::image2d::read_only ); BOOST_CHECK_EQUAL(image.get_info(CL_IMAGE_WIDTH), size_t(48)); BOOST_CHECK_EQUAL(image.get_info(CL_IMAGE_HEIGHT), size_t(64)); BOOST_CHECK_EQUAL(image.get_info(CL_IMAGE_DEPTH), size_t(0)); BOOST_CHECK_EQUAL(image.get_info(CL_IMAGE_ROW_PITCH), size_t(48*4)); BOOST_CHECK_EQUAL(image.get_info(CL_IMAGE_SLICE_PITCH), size_t(0)); BOOST_CHECK_EQUAL(image.get_info(CL_IMAGE_ELEMENT_SIZE), size_t(4)); BOOST_CHECK(image.format() == format); BOOST_CHECK_EQUAL(image.width(), size_t(48)); BOOST_CHECK_EQUAL(image.height(), size_t(64)); } BOOST_AUTO_TEST_CASE(clone_image) { compute::image_format format(CL_RGBA, CL_UNORM_INT8); if(!compute::image2d::is_supported_format(format, context)){ std::cerr << "skipping clone_image test, image format not supported" << std::endl; return; } // image data unsigned int data[] = { 0x0000ffff, 0xff00ffff, 0x00ff00ff, 0xffffffff }; // create image on the device compute::image2d image(context, 2, 2, format); // ensure we have a valid image object BOOST_REQUIRE(image.get() != cl_mem()); // copy image data to the device queue.enqueue_write_image(image, image.origin(), image.size(), data); // clone image compute::image2d copy = image.clone(queue); // ensure image format is the same BOOST_CHECK(copy.format() == image.format()); // read cloned image data back to the host unsigned int cloned_data[4]; queue.enqueue_read_image(copy, image.origin(), image.size(), cloned_data); // ensure original data and cloned data are the same BOOST_CHECK_EQUAL(cloned_data[0], data[0]); BOOST_CHECK_EQUAL(cloned_data[1], data[1]); BOOST_CHECK_EQUAL(cloned_data[2], data[2]); BOOST_CHECK_EQUAL(cloned_data[3], data[3]); } #ifdef CL_VERSION_1_2 BOOST_AUTO_TEST_CASE(fill_image) { REQUIRES_OPENCL_VERSION(1, 2); // device OpenCL version check compute::image_format format(CL_RGBA, CL_UNSIGNED_INT8); if(!compute::image2d::is_supported_format(format, context)){ std::cerr << "skipping fill_image test, image format not supported" << std::endl; return; } compute::image2d img(context, 640, 480, format); // fill image with black compute::uint4_ black(0, 0, 0, 255); queue.enqueue_fill_image(img, &black, img.origin(), img.size()); // read value of first pixel compute::uchar4_ first_pixel; queue.enqueue_read_image(img, compute::dim(0), compute::dim(1), &first_pixel); BOOST_CHECK_EQUAL(first_pixel, compute::uchar4_(0, 0, 0, 255)); // fill image with white compute::uint4_ white(255, 255, 255, 255); queue.enqueue_fill_image(img, &white, img.origin(), img.size()); // read value of first pixel queue.enqueue_read_image(img, compute::dim(0), compute::dim(1), &first_pixel); BOOST_CHECK_EQUAL(first_pixel, compute::uchar4_(255, 255, 255, 255)); } #endif // check type_name() for image2d BOOST_AUTO_TEST_CASE(image2d_type_name) { BOOST_CHECK( std::strcmp( boost::compute::type_name(), "image2d_t" ) == 0 ); } BOOST_AUTO_TEST_CASE(map_image) { compute::image_format format(CL_RGBA, CL_UNSIGNED_INT8); if(!compute::image2d::is_supported_format(format, context)){ std::cerr << "skipping clone_image test, image format not supported" << std::endl; return; } // create image on the device compute::image2d image(context, 2, 2, format); // ensure we have a valid image object BOOST_REQUIRE(image.get() != cl_mem()); size_t row_pitch = 0; size_t slice_pitch = 0; // write map image compute::uint_* ptr = static_cast( queue.enqueue_map_image(image, CL_MAP_WRITE, image.origin(), image.size(), row_pitch, slice_pitch) ); BOOST_CHECK_EQUAL(row_pitch, size_t(2*4)); // image data compute::uint_ data[] = { 0x0000ffff, 0xff00ffff, 0x00ff00ff, 0xffffffff }; // copy data to image for(size_t i = 0; i < 4; i++){ *(ptr+i) = data[i]; } // unmap queue.enqueue_unmap_image(image, static_cast(ptr)); // read map image compute::event map_event; ptr = static_cast( queue.enqueue_map_image_async(image, CL_MAP_READ, image.origin(), image.size(), row_pitch, slice_pitch, map_event) ); map_event.wait(); BOOST_CHECK(map_event.get_status() == CL_COMPLETE); BOOST_CHECK_EQUAL(row_pitch, size_t(2*4)); // checking for(size_t i = 0; i < 4; i++){ BOOST_CHECK_EQUAL(*(ptr+i), data[i]); } // unmap queue.enqueue_unmap_image(image, static_cast(ptr)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_image3d.cpp000066400000000000000000000020711263566244600171030ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestImage3D #include #include #include #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(image3d_get_supported_formats) { const std::vector formats = compute::image3d::get_supported_formats(context); } // check type_name() for image3d BOOST_AUTO_TEST_CASE(image3d_type_name) { BOOST_CHECK( std::strcmp( boost::compute::type_name(), "image3d_t" ) == 0 ); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_image_sampler.cpp000066400000000000000000000041171263566244600204020ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestImageSampler #include #include #include #include #include "quirks.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(get_context) { if(!supports_image_samplers(device)){ std::cerr << "skipping get_context test" << std::endl; return; } compute::image_sampler sampler(context, true, CL_ADDRESS_NONE, CL_FILTER_NEAREST); BOOST_CHECK(sampler.get_context() == context); } BOOST_AUTO_TEST_CASE(get_info) { if(!supports_image_samplers(device)){ std::cerr << "skipping get_info test" << std::endl; return; } compute::image_sampler sampler(context, true, CL_ADDRESS_NONE, CL_FILTER_NEAREST); BOOST_CHECK_EQUAL(sampler.get_info(CL_SAMPLER_NORMALIZED_COORDS), true); BOOST_CHECK_EQUAL( sampler.get_info(CL_SAMPLER_ADDRESSING_MODE), cl_addressing_mode(CL_ADDRESS_NONE) ); BOOST_CHECK_EQUAL( sampler.get_info(CL_SAMPLER_FILTER_MODE), cl_filter_mode(CL_FILTER_NEAREST) ); sampler = compute::image_sampler(context, false, CL_ADDRESS_CLAMP, CL_FILTER_LINEAR); BOOST_CHECK_EQUAL(sampler.get_info(CL_SAMPLER_NORMALIZED_COORDS), false); BOOST_CHECK_EQUAL( sampler.get_info(CL_SAMPLER_ADDRESSING_MODE), cl_addressing_mode(CL_ADDRESS_CLAMP) ); BOOST_CHECK_EQUAL( sampler.get_info(CL_SAMPLER_FILTER_MODE), cl_filter_mode(CL_FILTER_LINEAR) ); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_includes.cpp000066400000000000000000000037651263566244600174130ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestIncludes #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(includes_int) { int dataset1[] = {1, 1, 2, 2, 2, 2, 3, 3, 4, 5, 6, 10}; bc::vector set1(dataset1, dataset1 + 12, queue); int dataset2[] = {2, 4, 5, 6}; bc::vector set2(dataset2, dataset2 + 4, queue); bool includes = bc::includes(set1.begin(), set1.begin() + 12, set2.begin(), set2.begin() + 4, queue); BOOST_VERIFY(includes == true); set2[3] = 7; includes = bc::includes(set1.begin(), set1.begin() + 12, set2.begin(), set2.begin() + 4, queue); BOOST_VERIFY(includes == false); } BOOST_AUTO_TEST_CASE(includes_string) { char string1[] = "abcccdddeeff"; bc::vector set1(string1, string1 + 12, queue); char string2[] = "bccdf"; bc::vector set2(string2, string2 + 5, queue); bool includes = bc::includes(set1.begin(), set1.begin() + 12, set2.begin(), set2.begin() + 5, queue); BOOST_VERIFY(includes == true); set2[0] = 'g'; includes = bc::includes(set1.begin(), set1.begin() + 12, set2.begin(), set2.begin() + 4, queue); BOOST_VERIFY(includes == false); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_inner_product.cpp000066400000000000000000000032061263566244600204460ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestInnerProduct #include #include #include #include #include #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(inner_product_int) { int data1[] = { 1, 2, 3, 4 }; bc::vector input1(data1, data1 + 4, queue); int data2[] = { 10, 20, 30, 40 }; bc::vector input2(data2, data2 + 4, queue); int product = bc::inner_product(input1.begin(), input1.end(), input2.begin(), 0, queue); BOOST_CHECK_EQUAL(product, 300); } BOOST_AUTO_TEST_CASE(inner_product_counting_iterator) { BOOST_CHECK_EQUAL( boost::compute::inner_product( boost::compute::make_counting_iterator(0), boost::compute::make_counting_iterator(100), boost::compute::make_counting_iterator(0), 0, queue ), 328350 ); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_inplace_merge.cpp000066400000000000000000000025361263566244600203720ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestInplaceMerge #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(simple_merge_int) { int data[] = { 1, 3, 5, 7, 2, 4, 6, 8 }; compute::vector vector(data, data + 8, queue); // merge each half in-place compute::inplace_merge( vector.begin(), vector.begin() + 4, vector.end(), queue ); CHECK_RANGE_EQUAL(int, 8, vector, (1, 2, 3, 4, 5, 6, 7, 8)); // run again on already sorted list compute::inplace_merge( vector.begin(), vector.begin() + 4, vector.end(), queue ); CHECK_RANGE_EQUAL(int, 8, vector, (1, 2, 3, 4, 5, 6, 7, 8)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_inplace_reduce.cpp000066400000000000000000000110671263566244600205410ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestInplaceReduce #include #include #include #include #include #include #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(sum_int) { int data[] = { 1, 5, 3, 4, 9, 3, 5, 3 }; boost::compute::vector vector(data, data + 8, queue); boost::compute::detail::inplace_reduce(vector.begin(), vector.end(), boost::compute::plus(), queue); queue.finish(); BOOST_CHECK_EQUAL(int(vector[0]), int(33)); vector.assign(data, data + 8); vector.push_back(3); boost::compute::detail::inplace_reduce(vector.begin(), vector.end(), boost::compute::plus(), queue); queue.finish(); BOOST_CHECK_EQUAL(int(vector[0]), int(36)); } BOOST_AUTO_TEST_CASE(multiply_int) { int data[] = { 1, 5, 3, 4, 9, 3, 5, 3 }; boost::compute::vector vector(data, data + 8, queue); boost::compute::detail::inplace_reduce(vector.begin(), vector.end(), boost::compute::multiplies(), queue); queue.finish(); BOOST_CHECK_EQUAL(int(vector[0]), int(24300)); vector.assign(data, data + 8); vector.push_back(3); boost::compute::detail::inplace_reduce(vector.begin(), vector.end(), boost::compute::multiplies(), queue); queue.finish(); BOOST_CHECK_EQUAL(int(vector[0]), int(72900)); } BOOST_AUTO_TEST_CASE(reduce_iota) { // 1 value boost::compute::vector vector(1, context); boost::compute::iota(vector.begin(), vector.end(), int(0), queue); boost::compute::detail::inplace_reduce(vector.begin(), vector.end(), boost::compute::plus(), queue); queue.finish(); BOOST_CHECK_EQUAL(int(vector[0]), int(0)); // 1000 values vector.resize(1000); boost::compute::iota(vector.begin(), vector.end(), int(0), queue); boost::compute::detail::inplace_reduce(vector.begin(), vector.end(), boost::compute::plus(), queue); queue.finish(); BOOST_CHECK_EQUAL(int(vector[0]), int(499500)); // 2499 values vector.resize(2499); boost::compute::iota(vector.begin(), vector.end(), int(0), queue); boost::compute::detail::inplace_reduce(vector.begin(), vector.end(), boost::compute::plus(), queue); queue.finish(); BOOST_CHECK_EQUAL(int(vector[0]), int(3121251)); // 4096 values vector.resize(4096); boost::compute::iota(vector.begin(), vector.end(), int(0), queue); boost::compute::detail::inplace_reduce(vector.begin(), vector.end(), boost::compute::plus(), queue); queue.finish(); BOOST_CHECK_EQUAL(int(vector[0]), int(8386560)); // 5000 values vector.resize(5000); boost::compute::iota(vector.begin(), vector.end(), int(0), queue); boost::compute::detail::inplace_reduce(vector.begin(), vector.end(), boost::compute::plus(), queue); queue.finish(); BOOST_CHECK_EQUAL(int(vector[0]), int(12497500)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_insertion_sort.cpp000066400000000000000000000162071263566244600206610ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestInsertionSort #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(sort_char_vector) { using boost::compute::char_; char_ data[] = { 'c', 'a', '0', '7', 'B', 'F', '\0', '$' }; boost::compute::vector vector(data, data + 8, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(8)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::detail::serial_insertion_sort(vector.begin(), vector.end(), queue); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL(char_, 8, vector, ('\0', '$', '0', '7', 'B', 'F', 'a', 'c')); } BOOST_AUTO_TEST_CASE(sort_uchar_vector) { using boost::compute::uchar_; uchar_ data[] = { 0x12, 0x00, 0xFF, 0xB4, 0x80, 0x32, 0x64, 0xA2 }; boost::compute::vector vector(data, data + 8, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(8)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::detail::serial_insertion_sort(vector.begin(), vector.end(), queue); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL(uchar_, 8, vector, (0x00, 0x12, 0x32, 0x64, 0x80, 0xA2, 0xB4, 0xFF)); } BOOST_AUTO_TEST_CASE(sort_short_vector) { using boost::compute::short_; short_ data[] = { -4, 152, -94, 963, 31002, -456, 0, -2113 }; boost::compute::vector vector(data, data + 8, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(8)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::detail::serial_insertion_sort(vector.begin(), vector.end(), queue); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL(short_, 8, vector, (-2113, -456, -94, -4, 0, 152, 963, 31002)); } BOOST_AUTO_TEST_CASE(sort_ushort_vector) { using boost::compute::ushort_; ushort_ data[] = { 4, 152, 94, 963, 63202, 34560, 0, 2113 }; boost::compute::vector vector(data, data + 8, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(8)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::detail::serial_insertion_sort(vector.begin(), vector.end(), queue); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL(ushort_, 8, vector, (0, 4, 94, 152, 963, 2113, 34560, 63202)); } BOOST_AUTO_TEST_CASE(sort_int_vector) { int data[] = { -4, 152, -5000, 963, 75321, -456, 0, 1112 }; boost::compute::vector vector(data, data + 8, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(8)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::detail::serial_insertion_sort(vector.begin(), vector.end(), queue); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL(int, 8, vector, (-5000, -456, -4, 0, 152, 963, 1112, 75321)); } BOOST_AUTO_TEST_CASE(sort_uint_vector) { using boost::compute::uint_; uint_ data[] = { 500, 1988, 123456, 562, 0, 4000000, 9852, 102030 }; boost::compute::vector vector(data, data + 8, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(8)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::detail::serial_insertion_sort(vector.begin(), vector.end(), queue); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL(uint_, 8, vector, (0, 500, 562, 1988, 9852, 102030, 123456, 4000000)); } BOOST_AUTO_TEST_CASE(sort_long_vector) { using boost::compute::long_; long_ data[] = { 500, 1988, 123456, 562, 0, 4000000, 9852, 102030 }; boost::compute::vector vector(data, data + 8, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(8)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::detail::serial_insertion_sort(vector.begin(), vector.end(), queue); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL(long_, 8, vector, (0, 500, 562, 1988, 9852, 102030, 123456, 4000000)); } BOOST_AUTO_TEST_CASE(sort_ulong_vector) { using boost::compute::ulong_; ulong_ data[] = { 500, 1988, 123456, 562, 0, 4000000, 9852, 102030 }; boost::compute::vector vector(data, data + 8, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(8)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::detail::serial_insertion_sort(vector.begin(), vector.end(), queue); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL(ulong_, 8, vector, (0, 500, 562, 1988, 9852, 102030, 123456, 4000000)); } BOOST_AUTO_TEST_CASE(sort_float_vector) { float data[] = { -6023.0f, 152.5f, -63.0f, 1234567.0f, 11.2f, -5000.1f, 0.0f, 14.0f, -8.25f, -0.0f }; boost::compute::vector vector(data, data + 10, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(10)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::detail::serial_insertion_sort(vector.begin(), vector.end(), queue); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL( float, 10, vector, (-6023.0f, -5000.1f, -63.0f, -8.25f, -0.0f, 0.0f, 11.2f, 14.0f, 152.5f, 1234567.0f) ); } BOOST_AUTO_TEST_CASE(sort_double_vector) { if(!device.supports_extension("cl_khr_fp64")){ std::cout << "skipping test: device does not support double" << std::endl; return; } double data[] = { -6023.0, 152.5, -63.0, 1234567.0, 11.2, -5000.1, 0.0, 14.0, -8.25, -0.0 }; boost::compute::vector vector(data, data + 10, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(10)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::detail::serial_insertion_sort(vector.begin(), vector.end(), queue); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL( double, 10, vector, (-6023.0, -5000.1, -63.0, -8.25, -0.0, 0.0, 11.2, 14.0, 152.5, 1234567.0) ); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_invoke.cpp000066400000000000000000000032431263566244600170670ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://kylelutz.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestInvoke #include #include #include #include #include #include #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(invoke_builtin) { BOOST_CHECK_EQUAL(compute::invoke(compute::abs(), queue, -3), 3); BOOST_CHECK_CLOSE(compute::invoke(compute::pow(), queue, 2.f, 8.f), 256.f, 1e-4); } BOOST_AUTO_TEST_CASE(invoke_function) { BOOST_COMPUTE_FUNCTION(int, plus_two, (int x), { return x + 2; }); BOOST_CHECK_EQUAL(compute::invoke(plus_two, queue, 2), 4); BOOST_COMPUTE_FUNCTION(float, get_max, (float x, float y), { if(x > y) return x; else return y; }); BOOST_CHECK_EQUAL(compute::invoke(get_max, queue, 10.f, 20.f), 20.f); } BOOST_AUTO_TEST_CASE(invoke_lambda) { using boost::compute::lambda::_1; using boost::compute::lambda::_2; BOOST_CHECK_EQUAL(compute::invoke(_1 / 2, queue, 4), 2); BOOST_CHECK_EQUAL(compute::invoke(_1 * _2 + 1, queue, 3, 3), 10); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_iota.cpp000066400000000000000000000040331263566244600165260ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestIota #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(iota_int) { bc::vector vector(4, context); bc::iota(vector.begin(), vector.end(), 0, queue); CHECK_RANGE_EQUAL(int, 4, vector, (0, 1, 2, 3)); bc::iota(vector.begin(), vector.end(), 10, queue); CHECK_RANGE_EQUAL(int, 4, vector, (10, 11, 12, 13)); bc::iota(vector.begin() + 2, vector.end(), -5, queue); CHECK_RANGE_EQUAL(int, 4, vector, (10, 11, -5, -4)); bc::iota(vector.begin(), vector.end() - 2, 4, queue); CHECK_RANGE_EQUAL(int, 4, vector, (4, 5, -5, -4)); } BOOST_AUTO_TEST_CASE(iota_doctest) { boost::compute::vector vec(3, context); //! [iota] boost::compute::iota(vec.begin(), vec.end(), 0, queue); //! [iota] CHECK_RANGE_EQUAL(int, 3, vec, (0, 1, 2)); } BOOST_AUTO_TEST_CASE(iota_permutation_iterator) { bc::vector output(5, context); bc::fill(output.begin(), output.end(), 0, queue); int map_data[] = { 2, 0, 1, 4, 3 }; bc::vector map(map_data, map_data + 5, queue); bc::iota( bc::make_permutation_iterator(output.begin(), map.begin()), bc::make_permutation_iterator(output.end(), map.end()), 1, queue ); CHECK_RANGE_EQUAL(int, 5, output, (2, 3, 1, 5, 4)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_is_permutation.cpp000066400000000000000000000042551263566244600206420ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestIsPermutation #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(is_permutation_int) { int dataset1[] = {1, 3, 1, 2, 5}; bc::vector vector1(dataset1, dataset1 + 5, queue); int dataset2[] = {3, 1, 5, 1, 2}; bc::vector vector2(dataset2, dataset2 + 5, queue); bool result = bc::is_permutation(vector1.begin(), vector1.begin() + 5, vector2.begin(), vector2.begin() + 5, queue); BOOST_VERIFY(result == true); vector2[0] = 1; result = bc::is_permutation(vector1.begin(), vector1.begin() + 5, vector2.begin(), vector2.begin() + 5, queue); BOOST_VERIFY(result == false); } BOOST_AUTO_TEST_CASE(is_permutation_string) { char dataset1[] = "abade"; bc::vector vector1(dataset1, dataset1 + 5, queue); char dataset2[] = "aadeb"; bc::vector vector2(dataset2, dataset2 + 5, queue); bool result = bc::is_permutation(vector1.begin(), vector1.begin() + 5, vector2.begin(), vector2.begin() + 5, queue); BOOST_VERIFY(result == true); vector2[0] = 'b'; result = bc::is_permutation(vector1.begin(), vector1.begin() + 5, vector2.begin(), vector2.begin() + 5, queue); BOOST_VERIFY(result == false); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_is_sorted.cpp000066400000000000000000000047271263566244600175770ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestIsSorted #include #include #include #include #include #include #include #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(is_sorted_int) { compute::vector vec(context); BOOST_CHECK(compute::is_sorted(vec.begin(), vec.end(), queue)); vec.push_back(1, queue); BOOST_CHECK(compute::is_sorted(vec.begin(), vec.end(), queue)); vec.push_back(2, queue); BOOST_CHECK(compute::is_sorted(vec.begin(), vec.end(), queue)); vec.push_back(0, queue); BOOST_CHECK(compute::is_sorted(vec.begin(), vec.end(), queue) == false); vec.push_back(-2, queue); BOOST_CHECK(compute::is_sorted(vec.begin(), vec.end(), queue) == false); compute::sort(vec.begin(), vec.end(), queue); BOOST_CHECK(compute::is_sorted(vec.begin(), vec.end(), queue)); } BOOST_AUTO_TEST_CASE(is_sorted_ones) { compute::vector vec(2048, context); compute::fill(vec.begin(), vec.end(), 1, queue); BOOST_CHECK(compute::is_sorted(vec.begin(), vec.end(), queue)); } BOOST_AUTO_TEST_CASE(is_sorted_iota) { // create vector with values from 1..1000 compute::vector vec(1000, context); compute::iota(vec.begin(), vec.end(), 1, queue); BOOST_CHECK(compute::is_sorted(vec.begin(), vec.end(), queue)); // reverse the range compute::reverse(vec.begin(), vec.end(), queue); BOOST_CHECK(compute::is_sorted(vec.begin(), vec.end(), queue) == false); BOOST_CHECK(compute::is_sorted(vec.begin(), vec.end(), compute::greater(), queue)); // reverse it back compute::reverse(vec.begin(), vec.end(), queue); BOOST_CHECK(compute::is_sorted(vec.begin(), vec.end(), queue)); BOOST_CHECK(compute::is_sorted(vec.begin(), vec.end(), compute::greater(), queue) == false); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_kernel.cpp000066400000000000000000000111341263566244600170520ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestKernel #include #include #include #include #include #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(name) { compute::kernel foo = compute::kernel::create_with_source( "__kernel void foo(int x) { }", "foo", context ); BOOST_CHECK_EQUAL(foo.name(), "foo"); compute::kernel bar = compute::kernel::create_with_source( "__kernel void bar(float x) { }", "bar", context ); BOOST_CHECK_EQUAL(bar.name(), "bar"); } BOOST_AUTO_TEST_CASE(arity) { compute::kernel foo = compute::kernel::create_with_source( "__kernel void foo(int x) { }", "foo", context ); BOOST_CHECK_EQUAL(foo.arity(), size_t(1)); compute::kernel bar = compute::kernel::create_with_source( "__kernel void bar(float x, float y) { }", "bar", context ); BOOST_CHECK_EQUAL(bar.arity(), size_t(2)); compute::kernel baz = compute::kernel::create_with_source( "__kernel void baz(char x, char y, char z) { }", "baz", context ); BOOST_CHECK_EQUAL(baz.arity(), size_t(3)); } BOOST_AUTO_TEST_CASE(set_buffer_arg) { const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( __kernel void foo(__global int *x, __global int *y) { x[get_global_id(0)] = -y[get_global_id(0)]; } ); compute::kernel foo = compute::kernel::create_with_source(source, "foo", context); compute::buffer x(context, 16); compute::buffer y(context, 16); foo.set_arg(0, x); foo.set_arg(1, y.get()); } BOOST_AUTO_TEST_CASE(get_work_group_info) { const char source[] = "__kernel void sum(__global const float *input,\n" " __global float *output)\n" "{\n" " __local float scratch[16];\n" " const uint gid = get_global_id(0);\n" " const uint lid = get_local_id(0);\n" " if(lid < 16)\n" " scratch[lid] = input[gid];\n" "}\n"; compute::program program = compute::program::create_with_source(source, context); program.build(); compute::kernel kernel = program.create_kernel("sum"); using compute::ulong_; // get local memory size kernel.get_work_group_info(device, CL_KERNEL_LOCAL_MEM_SIZE); // check work group size size_t work_group_size = kernel.get_work_group_info(device, CL_KERNEL_WORK_GROUP_SIZE); BOOST_CHECK(work_group_size >= 1); } #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES BOOST_AUTO_TEST_CASE(kernel_set_args) { compute::kernel k = compute::kernel::create_with_source( "__kernel void test(int x, float y, char z) { }", "test", context ); k.set_args(4, 2.4f, 'a'); } #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES #ifdef CL_VERSION_1_2 BOOST_AUTO_TEST_CASE(get_arg_info) { REQUIRES_OPENCL_VERSION(1, 2); const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( __kernel void sum_kernel(__global const int *input, const uint size, __global int *result) { int sum = 0; for(uint i = 0; i < size; i++){ sum += input[i]; } *result = sum; } ); compute::program program = compute::program::create_with_source(source, context); program.build("-cl-kernel-arg-info"); compute::kernel kernel = program.create_kernel("sum_kernel"); BOOST_CHECK_EQUAL(kernel.get_info(), 3); BOOST_CHECK_EQUAL(kernel.get_arg_info(0, CL_KERNEL_ARG_TYPE_NAME), "int*"); BOOST_CHECK_EQUAL(kernel.get_arg_info(0, CL_KERNEL_ARG_NAME), "input"); BOOST_CHECK_EQUAL(kernel.get_arg_info(1, CL_KERNEL_ARG_TYPE_NAME), "uint"); BOOST_CHECK_EQUAL(kernel.get_arg_info(1, CL_KERNEL_ARG_NAME), "size"); BOOST_CHECK_EQUAL(kernel.get_arg_info(2, CL_KERNEL_ARG_TYPE_NAME), "int*"); BOOST_CHECK_EQUAL(kernel.get_arg_info(2, CL_KERNEL_ARG_NAME), "result"); } #endif // CL_VERSION_1_2 BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_lambda.cpp000066400000000000000000000354241263566244600170220ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestLambda #include #include #include #include #include #include #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(squared_plus_one) { bc::vector vector(context); vector.push_back(1, queue); vector.push_back(2, queue); vector.push_back(3, queue); vector.push_back(4, queue); vector.push_back(5, queue); // multiply each value by itself and add one bc::transform(vector.begin(), vector.end(), vector.begin(), (bc::_1 * bc::_1) + 1, queue); CHECK_RANGE_EQUAL(int, 5, vector, (2, 5, 10, 17, 26)); } BOOST_AUTO_TEST_CASE(abs_int) { bc::vector vector(context); vector.push_back(-1, queue); vector.push_back(-2, queue); vector.push_back(3, queue); vector.push_back(-4, queue); vector.push_back(5, queue); bc::transform(vector.begin(), vector.end(), vector.begin(), abs(bc::_1), queue); CHECK_RANGE_EQUAL(int, 5, vector, (1, 2, 3, 4, 5)); } template void check_lambda_result(const Expr &) { BOOST_STATIC_ASSERT(( boost::is_same< typename ::boost::compute::lambda::result_of::type, Result >::value )); } template void check_lambda_result(const Expr &, const Arg1 &) { BOOST_STATIC_ASSERT(( boost::is_same< typename ::boost::compute::lambda::result_of< Expr, typename boost::tuple >::type, Result >::value )); } template void check_lambda_result(const Expr &, const Arg1 &, const Arg2 &) { BOOST_STATIC_ASSERT(( boost::is_same< typename ::boost::compute::lambda::result_of< Expr, typename boost::tuple >::type, Result >::value )); } template void check_lambda_result(const Expr &, const Arg1 &, const Arg2 &, const Arg3 &) { BOOST_STATIC_ASSERT(( boost::is_same< typename ::boost::compute::lambda::result_of< Expr, typename boost::tuple >::type, Result >::value )); } BOOST_AUTO_TEST_CASE(result_of) { using ::boost::compute::lambda::_1; using ::boost::compute::lambda::_2; using ::boost::compute::lambda::_3; namespace proto = ::boost::proto; check_lambda_result(proto::lit(1)); check_lambda_result(proto::lit(1) + 2); check_lambda_result(proto::lit(1.2f)); check_lambda_result(proto::lit(1) + 1.2f); check_lambda_result(proto::lit(1) / 2 + 1.2f); using boost::compute::float4_; check_lambda_result(_1, int(1)); check_lambda_result(_1, float(1.2f)); check_lambda_result(_1, float4_(1, 2, 3, 4)); check_lambda_result(2.0f * _1, float4_(1, 2, 3, 4)); check_lambda_result(_1 * 2.0f, float4_(1, 2, 3, 4)); check_lambda_result(dot(_1, _2), float4_(0, 1, 2, 3), float4_(3, 2, 1, 0)); check_lambda_result(dot(_1, float4_(3, 2, 1, 0)), float4_(0, 1, 2, 3)); check_lambda_result(distance(_1, _2), float4_(0, 1, 2, 3), float4_(3, 2, 1, 0)); check_lambda_result(distance(_1, float4_(3, 2, 1, 0)), float4_(0, 1, 2, 3)); check_lambda_result(cross(_1, _2), float4_(0, 1, 2, 3), float4_(3, 2, 1, 0)); check_lambda_result(cross(_1, float4_(3, 2, 1, 0)), float4_(0, 1, 2, 3)); check_lambda_result(_1 + 2, int(2)); check_lambda_result(_1 + 2, float(2.2f)); check_lambda_result(_1 + _2, int(1), int(2)); check_lambda_result(_1 + _2, int(1), float(2.2f)); check_lambda_result(_1 + _1, int(1)); check_lambda_result(_1 * _1, float(1)); using boost::compute::lambda::get; check_lambda_result(get<0>(_1), float4_(1, 2, 3, 4)); check_lambda_result(get<0>(_1) < 1.f, float4_(1, 2, 3, 4)); check_lambda_result(_1 < 1.f, float(2)); using boost::compute::lambda::make_pair; check_lambda_result(get<0>(make_pair(_1, _2)), int(1), float(1.2f)); check_lambda_result(get<1>(make_pair(_1, _2)), int(1), float(1.2f)); check_lambda_result >(make_pair(_1, _2), int(1), float(1.2f)); using boost::compute::lambda::make_tuple; check_lambda_result >(make_tuple(_1), int(1)); check_lambda_result >(make_tuple(_1, _2), int(1), float(1.2f)); check_lambda_result >(make_tuple(_1, _1), int(1)); check_lambda_result >(make_tuple(_1, _2), int(1), float(1.4f)); check_lambda_result >( make_tuple(_1, _2, _3), char('a'), int(2), float(3.4f) ); check_lambda_result >( make_tuple(_1, _1, _1), int(1), float(1.4f) ); check_lambda_result >( make_tuple(_1, _2, _1, _2, _1), int(1), float(1.4f) ); } BOOST_AUTO_TEST_CASE(make_function_from_lamdba) { using boost::compute::lambda::_1; int data[] = { 2, 4, 6, 8, 10 }; compute::vector vector(data, data + 5, queue); compute::function f = _1 * 2 + 3; compute::transform( vector.begin(), vector.end(), vector.begin(), f, queue ); CHECK_RANGE_EQUAL(int, 5, vector, (7, 11, 15, 19, 23)); } BOOST_AUTO_TEST_CASE(make_function_from_binary_lamdba) { using boost::compute::lambda::_1; using boost::compute::lambda::_2; using boost::compute::lambda::abs; int data1[] = { 2, 4, 6, 8, 10 }; int data2[] = { 10, 8, 6, 4, 2 }; compute::vector vec1(data1, data1 + 5, queue); compute::vector vec2(data2, data2 + 5, queue); compute::vector result(5, context); compute::function f = abs(_1 - _2); compute::transform( vec1.begin(), vec1.end(), vec2.begin(), result.begin(), f, queue ); CHECK_RANGE_EQUAL(int, 5, result, (8, 4, 0, 4, 8)); } BOOST_AUTO_TEST_CASE(lambda_get_vector) { using boost::compute::_1; using boost::compute::int2_; using boost::compute::lambda::get; int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; compute::vector vector(4, context); compute::copy( reinterpret_cast(data), reinterpret_cast(data) + 4, vector.begin(), queue ); // extract first component of each vector compute::vector first_component(4, context); compute::transform( vector.begin(), vector.end(), first_component.begin(), get<0>(_1), queue ); CHECK_RANGE_EQUAL(int, 4, first_component, (1, 3, 5, 7)); // extract second component of each vector compute::vector second_component(4, context); compute::transform( vector.begin(), vector.end(), first_component.begin(), get<1>(_1), queue ); CHECK_RANGE_EQUAL(int, 4, first_component, (2, 4, 6, 8)); } BOOST_AUTO_TEST_CASE(lambda_get_pair) { using boost::compute::_1; using boost::compute::lambda::get; compute::vector > vector(context); vector.push_back(std::make_pair(1, 1.2f), queue); vector.push_back(std::make_pair(3, 3.4f), queue); vector.push_back(std::make_pair(5, 5.6f), queue); vector.push_back(std::make_pair(7, 7.8f), queue); // extract first compoenent of each pair compute::vector first_component(4, context); compute::transform( vector.begin(), vector.end(), first_component.begin(), get<0>(_1), queue ); CHECK_RANGE_EQUAL(int, 4, first_component, (1, 3, 5, 7)); // extract second compoenent of each pair compute::vector second_component(4, context); compute::transform( vector.begin(), vector.end(), second_component.begin(), get<1>(_1), queue ); CHECK_RANGE_EQUAL(float, 4, second_component, (1.2f, 3.4f, 5.6f, 7.8f)); } BOOST_AUTO_TEST_CASE(lambda_get_tuple) { using boost::compute::_1; using boost::compute::lambda::get; compute::vector > vector(context); vector.push_back(boost::make_tuple(1, 'a', 1.2f), queue); vector.push_back(boost::make_tuple(3, 'b', 3.4f), queue); vector.push_back(boost::make_tuple(5, 'c', 5.6f), queue); vector.push_back(boost::make_tuple(7, 'd', 7.8f), queue); // extract first compoenent of each tuple compute::vector first_component(4, context); compute::transform( vector.begin(), vector.end(), first_component.begin(), get<0>(_1), queue ); CHECK_RANGE_EQUAL(int, 4, first_component, (1, 3, 5, 7)); // extract second compoenent of each tuple compute::vector second_component(4, context); compute::transform( vector.begin(), vector.end(), second_component.begin(), get<1>(_1), queue ); CHECK_RANGE_EQUAL(char, 4, second_component, ('a', 'b', 'c', 'd')); // extract third compoenent of each tuple compute::vector third_component(4, context); compute::transform( vector.begin(), vector.end(), third_component.begin(), get<2>(_1), queue ); CHECK_RANGE_EQUAL(float, 4, third_component, (1.2f, 3.4f, 5.6f, 7.8f)); } BOOST_AUTO_TEST_CASE(lambda_get_zip_iterator) { using boost::compute::_1; using boost::compute::lambda::get; float data[] = { 1.2f, 2.3f, 3.4f, 4.5f, 5.6f, 6.7f, 7.8f, 9.0f }; compute::vector input(8, context); compute::copy(data, data + 8, input.begin(), queue); compute::vector output(8, context); compute::for_each( compute::make_zip_iterator( boost::make_tuple(input.begin(), output.begin()) ), compute::make_zip_iterator( boost::make_tuple(input.end(), output.end()) ), get<1>(_1) = get<0>(_1), queue ); CHECK_RANGE_EQUAL(float, 8, output, (1.2f, 2.3f, 3.4f, 4.5f, 5.6f, 6.7f, 7.8f, 9.0f) ); } BOOST_AUTO_TEST_CASE(lambda_make_pair) { using boost::compute::_1; using boost::compute::_2; using boost::compute::lambda::make_pair; int int_data[] = { 1, 3, 5, 7 }; float float_data[] = { 1.2f, 2.3f, 3.4f, 4.5f }; compute::vector int_vector(int_data, int_data + 4, queue); compute::vector float_vector(float_data, float_data + 4, queue); compute::vector > output_vector(4, context); compute::transform( int_vector.begin(), int_vector.end(), float_vector.begin(), output_vector.begin(), make_pair(_1 - 1, 0 - _2), queue ); std::vector > host_vector(4); compute::copy_n(output_vector.begin(), 4, host_vector.begin(), queue); BOOST_CHECK(host_vector[0] == std::make_pair(0, -1.2f)); BOOST_CHECK(host_vector[1] == std::make_pair(2, -2.3f)); BOOST_CHECK(host_vector[2] == std::make_pair(4, -3.4f)); BOOST_CHECK(host_vector[3] == std::make_pair(6, -4.5f)); } BOOST_AUTO_TEST_CASE(lambda_make_tuple) { using boost::compute::_1; using boost::compute::lambda::get; using boost::compute::lambda::make_tuple; std::vector > data; data.push_back(boost::make_tuple(2, 1.2f)); data.push_back(boost::make_tuple(4, 2.4f)); data.push_back(boost::make_tuple(6, 4.6f)); data.push_back(boost::make_tuple(8, 6.8f)); compute::vector > input_vector(4, context); compute::copy(data.begin(), data.end(), input_vector.begin(), queue); // reverse the elements in the tuple compute::vector > output_vector(4, context); compute::transform( input_vector.begin(), input_vector.end(), output_vector.begin(), make_tuple(get<1>(_1), get<0>(_1)), queue ); std::vector > host_vector(4); compute::copy_n(output_vector.begin(), 4, host_vector.begin(), queue); BOOST_CHECK_EQUAL(host_vector[0], boost::make_tuple(1.2f, 2)); BOOST_CHECK_EQUAL(host_vector[1], boost::make_tuple(2.4f, 4)); BOOST_CHECK_EQUAL(host_vector[2], boost::make_tuple(4.6f, 6)); BOOST_CHECK_EQUAL(host_vector[3], boost::make_tuple(6.8f, 8)); // duplicate each element in the tuple compute::vector > doubled_vector(4, context); compute::transform( input_vector.begin(), input_vector.end(), doubled_vector.begin(), make_tuple(get<0>(_1), get<0>(_1), get<1>(_1), get<1>(_1)), queue ); std::vector > doubled_host_vector(4); compute::copy_n(doubled_vector.begin(), 4, doubled_host_vector.begin(), queue); BOOST_CHECK_EQUAL(doubled_host_vector[0], boost::make_tuple(2, 2, 1.2f, 1.2f)); BOOST_CHECK_EQUAL(doubled_host_vector[1], boost::make_tuple(4, 4, 2.4f, 2.4f)); BOOST_CHECK_EQUAL(doubled_host_vector[2], boost::make_tuple(6, 6, 4.6f, 4.6f)); BOOST_CHECK_EQUAL(doubled_host_vector[3], boost::make_tuple(8, 8, 6.8f, 6.8f)); } BOOST_AUTO_TEST_CASE(bind_lambda_function) { using compute::placeholders::_1; namespace lambda = compute::lambda; int data[] = { 1, 2, 3, 4 }; compute::vector vector(data, data + 4, queue); compute::transform( vector.begin(), vector.end(), vector.begin(), compute::bind(lambda::_1 * lambda::_2, _1, 2), queue ); CHECK_RANGE_EQUAL(int, 4, vector, (2, 4, 6, 8)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_lexicographical_compare.cpp000066400000000000000000000065431263566244600224460ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Mageswaran.D // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestLexicographicalCompare #include #include #include #include #include "context_setup.hpp" #include "check_macros.hpp" BOOST_AUTO_TEST_CASE(lexicographical_compare_string) { boost::compute::string a = "abcdefghijk"; boost::compute::string b = "abcdefghijk"; boost::compute::string c = "abcdefghija"; boost::compute::string d = "zabcdefghij"; BOOST_CHECK(boost::compute::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end()) == false); BOOST_CHECK(boost::compute::lexicographical_compare(c.begin(), c.end(), a.begin(), a.end()) == true); BOOST_CHECK(boost::compute::lexicographical_compare(c.begin(), c.end(), d.begin(), d.end()) == true); } BOOST_AUTO_TEST_CASE(lexicographical_compare_number) { int data1[] = { 1, 2, 3, 4, 5, 6 }; int data2[] = { 9, 2, 3, 4, 5, 6 }; int data3[] = { 1, 2, 3, 4, 5 }; int data4[] = { 9, 2, 3, 4, 5, 100 }; boost::compute::vector vector1(data1, data1 + 6, queue); boost::compute::vector vector2(data2, data2 + 6, queue); boost::compute::vector vector3(data3, data3 + 5, queue); boost::compute::vector vector4(data4, data4 + 6, queue); BOOST_CHECK(boost::compute::lexicographical_compare(vector1.begin(), vector1.end(), vector2.begin(), vector2.end(), queue) == true); BOOST_CHECK(boost::compute::lexicographical_compare(vector1.begin(), vector1.end(), vector3.begin(), vector3.end(), queue) == false); BOOST_CHECK(boost::compute::lexicographical_compare(vector3.begin(), vector3.end(), vector4.begin(), vector4.end(), queue) == true); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_linear_congruential_engine.cpp000066400000000000000000000034021263566244600231420ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestLinearCongruentialEngine #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(generate_uint) { using boost::compute::uint_; boost::compute::linear_congruential_engine rng(queue); boost::compute::vector vector(10, context); rng.generate(vector.begin(), vector.end(), queue); CHECK_RANGE_EQUAL( uint_, 10, vector, (uint_(1099087573), uint_(2291457337), uint_(4026424941), uint_(420705969), uint_(2250972997), uint_(153107049), uint_(3581708125), uint_(1733142113), uint_(3008982197), uint_(3237988505)) ); } BOOST_AUTO_TEST_CASE(discard_uint) { using boost::compute::uint_; boost::compute::linear_congruential_engine rng(queue); boost::compute::vector vector(5, context); rng.discard(5, queue); rng.generate(vector.begin(), vector.end(), queue); CHECK_RANGE_EQUAL( uint_, 5, vector, (uint_(153107049), uint_(3581708125), uint_(1733142113), uint_(3008982197), uint_(3237988505)) ); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_local_buffer.cpp000066400000000000000000000050721263566244600202210ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestLocalBuffer #include #include #include #include #include #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(local_buffer_arg) { if(device.get_info() != CL_LOCAL){ std::cerr << "skipping local buffer arg test: " << "device does not support local memory" << std::endl; return; } const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( __kernel void foo(__local float *local_buffer, __global float *global_buffer) { const uint gid = get_global_id(0); const uint lid = get_local_id(0); local_buffer[lid] = gid; global_buffer[gid] = local_buffer[lid]; } ); // create and build program compute::program program = compute::program::build_with_source(source, context); // create kernel compute::kernel kernel = program.create_kernel("foo"); // setup kernel arguments compute::buffer global_buf(context, 128 * sizeof(float)); kernel.set_arg(0, compute::local_buffer(32)); kernel.set_arg(1, global_buf); // some implementations don't correctly report dynamically-set local buffer sizes if(kernel.get_work_group_info(device, CL_KERNEL_LOCAL_MEM_SIZE) == 0){ std::cerr << "skipping checks for local memory size, device reports " << "zero after setting dynamically-sized local buffer size" << std::endl; return; } // check actual memory size BOOST_CHECK_GE( kernel.get_work_group_info(device, CL_KERNEL_LOCAL_MEM_SIZE), 32 * sizeof(float) ); // increase local buffer size and check new actual local memory size kernel.set_arg(0, compute::local_buffer(64)); BOOST_CHECK_GE( kernel.get_work_group_info(device, CL_KERNEL_LOCAL_MEM_SIZE), 64 * sizeof(float) ); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_malloc.cpp000066400000000000000000000023101263566244600170350ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestMalloc #include #include #include #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(malloc_int) { bc::experimental::device_ptr ptr = bc::experimental::malloc(5, context); int input_data[] = { 2, 5, 8, 3, 6 }; bc::copy(input_data, input_data + 5, ptr); int output_data[5]; bc::copy(ptr, ptr + 5, output_data); BOOST_CHECK_EQUAL(output_data[0], 2); BOOST_CHECK_EQUAL(output_data[1], 5); BOOST_CHECK_EQUAL(output_data[2], 8); BOOST_CHECK_EQUAL(output_data[3], 3); BOOST_CHECK_EQUAL(output_data[4], 6); bc::experimental::free(ptr); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_mapped_view.cpp000066400000000000000000000036411263566244600200760ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestMappedView #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(fill) { int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; for(int i = 0; i < 8; i++){ BOOST_CHECK_EQUAL(data[i], i+1); } compute::mapped_view view(data, 8, context); compute::fill(view.begin(), view.end(), 4, queue); view.map(CL_MAP_READ, queue); for(int i = 0; i < 8; i++){ BOOST_CHECK_EQUAL(data[i], 4); } view.unmap(queue); } BOOST_AUTO_TEST_CASE(sort) { int data[] = { 5, 2, 3, 1, 8, 7, 4, 9 }; compute::mapped_view view(data, 8, context); compute::sort(view.begin(), view.end(), queue); view.map(CL_MAP_READ, queue); BOOST_CHECK_EQUAL(data[0], 1); BOOST_CHECK_EQUAL(data[7], 9); view.unmap(queue); } BOOST_AUTO_TEST_CASE(mapped_view_reduce_doctest) { //! [reduce] // create data array on the host int data[] = { 5, 2, 3, 1, 8, 7, 4, 9 }; boost::compute::mapped_view view(data, 8, context); // use reduce() to calculate the sum on the device int sum = 0; boost::compute::reduce(view.begin(), view.end(), &sum, queue); //! [reduce] BOOST_CHECK_EQUAL(sum, 39); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_merge.cpp000066400000000000000000000122141263566244600166710ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestMerge #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(simple_merge_int) { int data1[] = { 1, 3, 5, 7 }; int data2[] = { 2, 4, 6, 8 }; boost::compute::vector v1(4, context); boost::compute::vector v2(4, context); boost::compute::vector v3(8, context); boost::compute::copy_n(data1, 4, v1.begin(), queue); boost::compute::copy_n(data2, 4, v2.begin(), queue); boost::compute::fill(v3.begin(), v3.end(), 0, queue); // merge v1 with v2 into v3 boost::compute::merge( v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin(), queue ); CHECK_RANGE_EQUAL(int, 8, v3, (1, 2, 3, 4, 5, 6, 7, 8)); // merge v2 with v1 into v3 boost::compute::merge( v2.begin(), v2.end(), v1.begin(), v1.end(), v3.begin(), queue ); CHECK_RANGE_EQUAL(int, 8, v3, (1, 2, 3, 4, 5, 6, 7, 8)); // merge v1 with v1 into v3 boost::compute::merge( v1.begin(), v1.end(), v1.begin(), v1.end(), v3.begin(), queue ); CHECK_RANGE_EQUAL(int, 8, v3, (1, 1, 3, 3, 5, 5, 7, 7)); // merge v2 with v2 into v3 boost::compute::merge( v2.begin(), v2.end(), v2.begin(), v2.end(), v3.begin(), queue ); CHECK_RANGE_EQUAL(int, 8, v3, (2, 2, 4, 4, 6, 6, 8, 8)); // merge v1 with empty range into v3 boost::compute::merge( v1.begin(), v1.end(), v1.begin(), v1.begin(), v3.begin(), queue ); CHECK_RANGE_EQUAL(int, 4, v3, (1, 3, 5, 7)); // merge v2 with empty range into v3 boost::compute::merge( v1.begin(), v1.begin(), v2.begin(), v2.end(), v3.begin(), queue ); CHECK_RANGE_EQUAL(int, 4, v3, (2, 4, 6, 8)); } BOOST_AUTO_TEST_CASE(merge_pairs) { std::vector > data1; std::vector > data2; data1.push_back(std::make_pair(0, 0.1f)); data1.push_back(std::make_pair(2, 2.1f)); data1.push_back(std::make_pair(4, 4.1f)); data1.push_back(std::make_pair(6, 6.1f)); data2.push_back(std::make_pair(1, 1.1f)); data2.push_back(std::make_pair(3, 3.1f)); data2.push_back(std::make_pair(5, 5.1f)); data2.push_back(std::make_pair(7, 7.1f)); std::vector > data3(data1.size() + data2.size()); std::fill(data3.begin(), data3.end(), std::make_pair(-1, -1.f)); boost::compute::vector > v1(data1.size(), context); boost::compute::vector > v2(data2.size(), context); boost::compute::vector > v3(data3.size(), context); boost::compute::copy(data1.begin(), data1.end(), v1.begin(), queue); boost::compute::copy(data2.begin(), data2.end(), v2.begin(), queue); using ::boost::compute::lambda::_1; using ::boost::compute::lambda::_2; using ::boost::compute::lambda::get; boost::compute::merge( v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin(), get<0>(_1) < get<0>(_2), queue ); boost::compute::copy(v3.begin(), v3.end(), data3.begin(), queue); BOOST_CHECK(v3[0] == std::make_pair(0, 0.1f)); BOOST_CHECK(v3[1] == std::make_pair(1, 1.1f)); BOOST_CHECK(v3[2] == std::make_pair(2, 2.1f)); BOOST_CHECK(v3[3] == std::make_pair(3, 3.1f)); BOOST_CHECK(v3[4] == std::make_pair(4, 4.1f)); BOOST_CHECK(v3[5] == std::make_pair(5, 5.1f)); BOOST_CHECK(v3[6] == std::make_pair(6, 6.1f)); BOOST_CHECK(v3[7] == std::make_pair(7, 7.1f)); } BOOST_AUTO_TEST_CASE(merge_floats) { float data1[] = { 1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f }; float data2[] = { 1.0f, 2.0f, 3.9f, 4.9f, 6.8f, 6.9f, 7.0f, 7.1f }; boost::compute::vector v1(8, context); boost::compute::vector v2(8, context); boost::compute::vector v3(v1.size() + v2.size(), context); boost::compute::copy_n(data1, 8, v1.begin(), queue); boost::compute::copy_n(data2, 8, v2.begin(), queue); boost::compute::fill(v3.begin(), v3.end(), 0.f, queue); boost::compute::merge( v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin(), queue ); CHECK_RANGE_EQUAL(float, 16, v3, (1.0f, 1.1f, 2.0f, 2.2f, 3.3f, 3.9f, 4.4f, 4.9f, 5.5f, 6.6f, 6.8f, 6.9f, 7.0f, 7.1f, 7.7f, 8.8f) ); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_mersenne_twister_engine.cpp000066400000000000000000000033041263566244600225140ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestMersenneTwisterEngine #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(generate_uint) { using boost::compute::uint_; boost::compute::mt19937 rng(queue); boost::compute::vector vector(10, context); rng.generate(vector.begin(), vector.end(), queue); CHECK_RANGE_EQUAL( uint_, 10, vector, (uint_(3499211612), uint_(581869302), uint_(3890346734), uint_(3586334585), uint_(545404204), uint_(4161255391), uint_(3922919429), uint_(949333985), uint_(2715962298), uint_(1323567403)) ); } BOOST_AUTO_TEST_CASE(discard_uint) { using boost::compute::uint_; boost::compute::mt19937 rng(queue); boost::compute::vector vector(5, context); rng.discard(5, queue); rng.generate(vector.begin(), vector.end(), queue); CHECK_RANGE_EQUAL( uint_, 5, vector, (uint_(4161255391), uint_(3922919429), uint_(949333985), uint_(2715962298), uint_(1323567403)) ); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_mismatch.cpp000066400000000000000000000041631263566244600174030ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestMismatch #include #include #include #include #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(mismatch_int) { int data1[] = { 1, 2, 3, 4, 5, 6 }; int data2[] = { 1, 2, 3, 7, 5, 6 }; boost::compute::vector vector1(data1, data1 + 6, queue); boost::compute::vector vector2(data2, data2 + 6, queue); typedef boost::compute::vector::iterator iter; std::pair location = boost::compute::mismatch(vector1.begin(), vector1.end(), vector2.begin(), queue); BOOST_CHECK(location.first == vector1.begin() + 3); BOOST_CHECK_EQUAL(int(*location.first), int(4)); BOOST_CHECK(location.second == vector2.begin() + 3); BOOST_CHECK_EQUAL(int(*location.second), int(7)); } BOOST_AUTO_TEST_CASE(mismatch_different_range_sizes) { boost::compute::vector a(10, context); boost::compute::vector b(20, context); boost::compute::fill(a.begin(), a.end(), 3, queue); boost::compute::fill(b.begin(), b.end(), 3, queue); typedef boost::compute::vector::iterator iter; std::pair location; location = boost::compute::mismatch( a.begin(), a.end(), b.begin(), b.end(), queue ); BOOST_CHECK(location.first == a.end()); BOOST_CHECK(location.second == b.begin() + 10); location = boost::compute::mismatch( b.begin(), b.end(), a.begin(), a.end(), queue ); BOOST_CHECK(location.first == b.begin() + 10); BOOST_CHECK(location.second == a.end()); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_next_permutation.cpp000066400000000000000000000043501263566244600212010ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestNextPermutation #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(next_permutation_int) { int dataset[] = {1, 3, 4, 2, 5}; bc::vector vector(dataset, dataset + 5, queue); bool result = bc::next_permutation(vector.begin(), vector.begin() + 5, queue); CHECK_RANGE_EQUAL(int, 5, vector, (1, 3, 4, 5, 2)); BOOST_VERIFY(result == true); vector[0] = 10; vector[1] = 9; vector[2] = 6; result = bc::next_permutation(vector.begin(), vector.begin() + 5, queue); CHECK_RANGE_EQUAL(int, 5, vector, (2, 5, 6, 9, 10)); BOOST_VERIFY(result == false); } BOOST_AUTO_TEST_CASE(next_permutation_string) { char dataset[] = "aaab"; bc::vector vector(dataset, dataset + 4, queue); bool result = bc::next_permutation(vector.begin(), vector.begin() + 4, queue); CHECK_RANGE_EQUAL(char, 4, vector, ('a', 'a', 'b', 'a')); BOOST_VERIFY(result == true); result = bc::next_permutation(vector.begin(), vector.begin() + 4, queue); CHECK_RANGE_EQUAL(char, 4, vector, ('a', 'b', 'a', 'a')); BOOST_VERIFY(result == true); result = bc::next_permutation(vector.begin(), vector.begin() + 4, queue); CHECK_RANGE_EQUAL(char, 4, vector, ('b', 'a', 'a', 'a')); BOOST_VERIFY(result == true); result = bc::next_permutation(vector.begin(), vector.begin() + 4, queue); CHECK_RANGE_EQUAL(char, 4, vector, ('a', 'a', 'a', 'b')); BOOST_VERIFY(result == false); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_no_device_found.cpp000066400000000000000000000016341263566244600207240ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2015 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestNoDeviceFound #include #include void throw_no_device_found() { throw boost::compute::no_device_found(); } BOOST_AUTO_TEST_CASE(what) { try { throw_no_device_found(); BOOST_REQUIRE(false); // should not get here } catch(boost::compute::no_device_found& e){ BOOST_CHECK_EQUAL(std::string(e.what()), "No OpenCL device found"); } } compute-0.5/test/test_normal_distribution.cpp000066400000000000000000000026331263566244600216650ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestNormalDistribution #include #include #include #include #include #include #include #include #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(normal_distribution_doctest) { using boost::compute::lambda::_1; boost::compute::vector vec(10, context); //! [generate] // initialize the default random engine boost::compute::default_random_engine engine(queue); // setup the normal distribution to produce floats centered at 5 boost::compute::normal_distribution distribution(5.0f, 1.0f); // generate the random values and store them to 'vec' distribution.generate(vec.begin(), vec.end(), engine, queue); //! [generate] } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_nth_element.cpp000066400000000000000000000073501263566244600201010ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestNthElement #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(nth_element_int) { int data[] = { 9, 15, 1, 4, 9, 9, 4, 15, 12, 1 }; boost::compute::vector vector(10, context); boost::compute::copy_n(data, 10, vector.begin(), queue); boost::compute::nth_element( vector.begin(), vector.begin() + 5, vector.end(), queue ); BOOST_CHECK_EQUAL(vector[5], 9); BOOST_VERIFY(boost::compute::is_partitioned( vector.begin(), vector.end(), boost::compute::_1 <= 9, queue )); BOOST_VERIFY(boost::compute::partition_point( vector.begin(), vector.end(), boost::compute::_1 <= 9, queue ) > vector.begin() + 5); boost::compute::copy_n(data, 10, vector.begin(), queue); boost::compute::nth_element( vector.begin(), vector.end(), vector.end(), queue ); CHECK_RANGE_EQUAL(int, 10, vector, (9, 15, 1, 4, 9, 9, 4, 15, 12, 1)); } BOOST_AUTO_TEST_CASE(nth_element_median) { int data[] = { 5, 6, 4, 3, 2, 6, 7, 9, 3 }; boost::compute::vector v(9, context); boost::compute::copy_n(data, 9, v.begin(), queue); boost::compute::nth_element(v.begin(), v.begin() + 4, v.end(), queue); BOOST_CHECK_EQUAL(v[4], 5); BOOST_VERIFY(boost::compute::is_partitioned( v.begin(), v.end(), boost::compute::_1 <= 5, queue )); BOOST_VERIFY(boost::compute::partition_point( v.begin(), v.end(), boost::compute::_1 <= 5, queue ) > v.begin() + 4); } BOOST_AUTO_TEST_CASE(nth_element_second_largest) { int data[] = { 5, 6, 4, 3, 2, 6, 7, 9, 3 }; boost::compute::vector v(9, context); boost::compute::copy_n(data, 9, v.begin(), queue); boost::compute::nth_element(v.begin(), v.begin() + 1, v.end(), queue); BOOST_CHECK_EQUAL(v[1], 3); BOOST_VERIFY(boost::compute::is_partitioned( v.begin(), v.end(), boost::compute::_1 <= 3, queue )); BOOST_VERIFY(boost::compute::partition_point( v.begin(), v.end(), boost::compute::_1 <= 3, queue ) > v.begin() + 1); } BOOST_AUTO_TEST_CASE(nth_element_comparator) { int data[] = { 9, 15, 1, 4, 9, 9, 4, 15, 12, 1 }; boost::compute::vector vector(10, context); boost::compute::less less_than; boost::compute::copy_n(data, 10, vector.begin(), queue); boost::compute::nth_element( vector.begin(), vector.begin() + 5, vector.end(), less_than, queue ); BOOST_CHECK_EQUAL(vector[5], 9); BOOST_VERIFY(boost::compute::is_partitioned( vector.begin(), vector.end(), boost::compute::_1 <= 9, queue )); BOOST_VERIFY(boost::compute::partition_point( vector.begin(), vector.end(), boost::compute::_1 <= 9, queue ) > vector.begin() + 5); boost::compute::copy_n(data, 10, vector.begin(), queue); boost::compute::nth_element( vector.begin(), vector.end(), vector.end(), less_than, queue ); CHECK_RANGE_EQUAL(int, 10, vector, (9, 15, 1, 4, 9, 9, 4, 15, 12, 1)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_opencl_error.cpp000066400000000000000000000022621263566244600202650ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestOpenCLError #include #include #include "check_macros.hpp" #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(error_to_string) { using boost::compute::opencl_error; BOOST_CHECK_EQUAL(opencl_error::to_string(CL_SUCCESS), "Success"); BOOST_CHECK_EQUAL(opencl_error::to_string(CL_INVALID_VALUE), "Invalid Value"); BOOST_CHECK_EQUAL(opencl_error::to_string(-123456), "Unknown OpenCL Error (-123456)"); } BOOST_AUTO_TEST_CASE(error_code) { boost::compute::opencl_error e(CL_INVALID_DEVICE); BOOST_CHECK_EQUAL(e.error_code(), CL_INVALID_DEVICE); BOOST_CHECK_EQUAL(e.error_string(), "Invalid Device"); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_pair.cpp000066400000000000000000000145301263566244600165300ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestPair #include #include #include #include #include #include #include #include #include #include #include "quirks.hpp" #include "check_macros.hpp" #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(vector_pair_int_float) { boost::compute::vector > vector(context); vector.push_back(std::make_pair(1, 1.1f), queue); vector.push_back(std::make_pair(2, 2.2f), queue); vector.push_back(std::make_pair(3, 3.3f), queue); BOOST_CHECK_EQUAL(vector.size(), size_t(3)); BOOST_CHECK(vector[0] == std::make_pair(1, 1.1f)); BOOST_CHECK(vector[1] == std::make_pair(2, 2.2f)); BOOST_CHECK(vector[2] == std::make_pair(3, 3.3f)); } BOOST_AUTO_TEST_CASE(copy_pair_vector) { boost::compute::vector > input(context); input.push_back(std::make_pair(1, 2.0f), queue); input.push_back(std::make_pair(3, 4.0f), queue); input.push_back(std::make_pair(5, 6.0f), queue); input.push_back(std::make_pair(7, 8.0f), queue); BOOST_CHECK_EQUAL(input.size(), size_t(4)); boost::compute::vector > output(4, context); boost::compute::copy(input.begin(), input.end(), output.begin(), queue); queue.finish(); BOOST_CHECK(output[0] == std::make_pair(1, 2.0f)); BOOST_CHECK(output[1] == std::make_pair(3, 4.0f)); BOOST_CHECK(output[2] == std::make_pair(5, 6.0f)); BOOST_CHECK(output[3] == std::make_pair(7, 8.0f)); } BOOST_AUTO_TEST_CASE(fill_pair_vector) { if(bug_in_struct_assignment(device)){ std::cerr << "skipping fill_pair_vector test" << std::endl; return; } boost::compute::vector > vector(5, context); boost::compute::fill(vector.begin(), vector.end(), std::make_pair(4, 2.0f), queue); queue.finish(); BOOST_CHECK(vector[0] == std::make_pair(4, 2.0f)); BOOST_CHECK(vector[1] == std::make_pair(4, 2.0f)); BOOST_CHECK(vector[2] == std::make_pair(4, 2.0f)); BOOST_CHECK(vector[3] == std::make_pair(4, 2.0f)); BOOST_CHECK(vector[4] == std::make_pair(4, 2.0f)); } BOOST_AUTO_TEST_CASE(fill_char_pair_vector) { if(bug_in_struct_assignment(device)){ std::cerr << "skipping fill_char_pair_vector test" << std::endl; return; } std::pair value('c', static_cast(127)); boost::compute::vector > vector(5, context); boost::compute::fill(vector.begin(), vector.end(), value, queue); queue.finish(); BOOST_CHECK(vector[0] == value); BOOST_CHECK(vector[1] == value); BOOST_CHECK(vector[2] == value); BOOST_CHECK(vector[3] == value); BOOST_CHECK(vector[4] == value); } BOOST_AUTO_TEST_CASE(transform_pair_get) { boost::compute::vector > input(context); input.push_back(std::make_pair(1, 2.0f), queue); input.push_back(std::make_pair(3, 4.0f), queue); input.push_back(std::make_pair(5, 6.0f), queue); input.push_back(std::make_pair(7, 8.0f), queue); boost::compute::vector first_output(4, context); boost::compute::transform( input.begin(), input.end(), first_output.begin(), ::boost::compute::get<0>(), queue ); CHECK_RANGE_EQUAL(int, 4, first_output, (1, 3, 5, 7)); boost::compute::vector second_output(4, context); boost::compute::transform( input.begin(), input.end(), second_output.begin(), ::boost::compute::get<1>(), queue ); CHECK_RANGE_EQUAL(float, 4, second_output, (2.0f, 4.0f, 6.0f, 8.0f)); } BOOST_AUTO_TEST_CASE(transform_pair_field) { boost::compute::vector > input(context); input.push_back(std::make_pair(1, 2.0f), queue); input.push_back(std::make_pair(3, 4.0f), queue); input.push_back(std::make_pair(5, 6.0f), queue); input.push_back(std::make_pair(7, 8.0f), queue); boost::compute::vector first_output(4, context); boost::compute::transform( input.begin(), input.end(), first_output.begin(), boost::compute::field("first"), queue ); CHECK_RANGE_EQUAL(int, 4, first_output, (1, 3, 5, 7)); boost::compute::vector second_output(4, context); boost::compute::transform( input.begin(), input.end(), second_output.begin(), boost::compute::field("second"), queue ); CHECK_RANGE_EQUAL(float, 4, second_output, (2.0f, 4.0f, 6.0f, 8.0f)); } BOOST_AUTO_TEST_CASE(find_vector_pair) { boost::compute::vector > vector(context); vector.push_back(std::make_pair(1, 1.1f), queue); vector.push_back(std::make_pair(2, 2.2f), queue); vector.push_back(std::make_pair(3, 3.3f), queue); BOOST_CHECK_EQUAL(vector.size(), size_t(3)); BOOST_CHECK( boost::compute::find( boost::compute::make_transform_iterator( vector.begin(), boost::compute::get<0>() ), boost::compute::make_transform_iterator( vector.end(), boost::compute::get<0>() ), int(2), queue ).base() == vector.begin() + 1 ); BOOST_CHECK( boost::compute::find( boost::compute::make_transform_iterator( vector.begin(), boost::compute::get<1>() ), boost::compute::make_transform_iterator( vector.end(), boost::compute::get<1>() ), float(3.3f), queue ).base() == vector.begin() + 2 ); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_partial_sum.cpp000066400000000000000000000024341263566244600201150ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestPartialSum #include #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(partial_sum_int) { int data[] = { 1, 2, 5, 3, 9, 1, 4, 2 }; bc::vector a(8, context); bc::copy(data, data + 8, a.begin(), queue); bc::vector b(a.size(), context); bc::vector::iterator iter = bc::partial_sum(a.begin(), a.end(), b.begin(), queue); BOOST_CHECK(iter == b.end()); CHECK_RANGE_EQUAL(int, 8, b, (1, 3, 8, 11, 20, 21, 25, 27)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_partition.cpp000066400000000000000000000056601263566244600176120ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestPartition #include #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(partition_float_vector) { bc::vector vector(context); vector.push_back(1.0f, queue); vector.push_back(2.0f, queue); vector.push_back(-1.0f, queue); vector.push_back(-2.0f, queue); vector.push_back(3.0f, queue); vector.push_back(4.0f, queue); vector.push_back(-3.0f, queue); vector.push_back(-4.0f, queue); // verify is_partitioned() BOOST_VERIFY(bc::is_partitioned(vector.begin(), vector.end(), bc::signbit_(), queue) == false); // partition by signbit bc::vector::iterator iter = bc::partition(vector.begin(), vector.end(), bc::signbit_(), queue); queue.finish(); BOOST_VERIFY(iter == vector.begin() + 4); BOOST_CHECK_LT(vector[0], 0.0f); BOOST_CHECK_LT(vector[1], 0.0f); BOOST_CHECK_LT(vector[2], 0.0f); BOOST_CHECK_LT(vector[3], 0.0f); BOOST_CHECK_GT(vector[4], 0.0f); BOOST_CHECK_GT(vector[5], 0.0f); BOOST_CHECK_GT(vector[6], 0.0f); BOOST_CHECK_GT(vector[7], 0.0f); // verify is_partitioned() BOOST_VERIFY(bc::is_partitioned(vector.begin(), vector.end(), bc::signbit_(), queue) == true); } BOOST_AUTO_TEST_CASE(partition_small_vector) { bc::vector vector(context); bc::partition(vector.begin(), vector.end(), bc::signbit_(), queue); vector.push_back(1.0f, queue); bc::partition(vector.begin(), vector.end(), bc::signbit_(), queue); CHECK_RANGE_EQUAL(float, 1, vector, (1.0f)); vector.push_back(-1.0f, queue); bc::partition(vector.begin(), vector.end(), bc::signbit_(), queue); CHECK_RANGE_EQUAL(float, 2, vector, (-1.0f, 1.0f)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_partition_point.cpp000066400000000000000000000023631263566244600210200ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestPartitionPoint #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(partition_point_int) { int dataset[] = {1, 1, 5, 2, 4, -2, 0, -1, 0, -1}; bc::vector vector(dataset, dataset + 10, queue); bc::vector::iterator iter = bc::partition_point(vector.begin(), vector.begin() + 10, bc::_1 > 0, queue); BOOST_VERIFY(iter == vector.begin()+5); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_permutation_iterator.cpp000066400000000000000000000063001263566244600220510ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestPermutationIterator #include #include #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(value_type) { using boost::compute::float4_; BOOST_STATIC_ASSERT(( boost::is_same< boost::compute::permutation_iterator< boost::compute::buffer_iterator, boost::compute::buffer_iterator >::value_type, float >::value )); BOOST_STATIC_ASSERT(( boost::is_same< boost::compute::permutation_iterator< boost::compute::buffer_iterator, boost::compute::buffer_iterator >::value_type, float4_ >::value )); } BOOST_AUTO_TEST_CASE(base_type) { BOOST_STATIC_ASSERT(( boost::is_same< boost::compute::permutation_iterator< boost::compute::buffer_iterator, boost::compute::buffer_iterator >::base_type, boost::compute::buffer_iterator >::value )); } BOOST_AUTO_TEST_CASE(copy) { int input_data[] = { 3, 4, 2, 1, 5 }; boost::compute::vector input(input_data, input_data + 5, queue); int map_data[] = { 3, 2, 0, 1, 4 }; boost::compute::vector map(map_data, map_data + 5, queue); boost::compute::vector output(5, context); boost::compute::copy( boost::compute::make_permutation_iterator(input.begin(), map.begin()), boost::compute::make_permutation_iterator(input.end(), map.end()), output.begin(), queue ); CHECK_RANGE_EQUAL(int, 5, output, (1, 2, 3, 4, 5)); } BOOST_AUTO_TEST_CASE(reverse_range_doctest) { int values_data[] = { 10, 20, 30, 40 }; int indices_data[] = { 3, 2, 1, 0 }; boost::compute::vector values(values_data, values_data + 4, queue); boost::compute::vector indices(indices_data, indices_data + 4, queue); boost::compute::vector result(4, context); //! [reverse_range] // values = { 10, 20, 30, 40 } // indices = { 3, 2, 1, 0 } boost::compute::copy( boost::compute::make_permutation_iterator(values.begin(), indices.begin()), boost::compute::make_permutation_iterator(values.end(), indices.end()), result.begin(), queue ); // result == { 40, 30, 20, 10 } //! [reverse_range] CHECK_RANGE_EQUAL(int, 4, result, (40, 30, 20, 10)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_pinned_allocator.cpp000066400000000000000000000015721263566244600211140ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestPinnedAllocator #include #include #include #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(vector_with_pinned_allocator) { compute::vector > vector(context); vector.push_back(12, queue); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_pipe.cpp000066400000000000000000000020011263566244600165200ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestPipe #include #include #include #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(empty) { } #ifdef CL_VERSION_2_0 BOOST_AUTO_TEST_CASE(create_pipe) { REQUIRES_OPENCL_VERSION(2, 0); compute::pipe pipe(context, 16 * sizeof(float), 128); BOOST_CHECK_EQUAL(pipe.get_info(), 64); BOOST_CHECK_EQUAL(pipe.get_info(), 128); } #endif // CL_VERSION_2_0 BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_platform.cpp000066400000000000000000000027521263566244600174240ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestPlatform #include #include #include #include BOOST_AUTO_TEST_CASE(platform_id) { boost::compute::platform platform = boost::compute::system::platforms().front(); boost::compute::platform platform_copy(platform.id()); BOOST_CHECK(platform == platform_copy); BOOST_CHECK(platform.id() == platform_copy.id()); } BOOST_AUTO_TEST_CASE(platform_supports_extension) { boost::compute::platform platform = boost::compute::system::platforms().front(); std::string extensions = platform.get_info(); if(extensions.empty()){ std::cerr << "platform doesn't support any extensions" << std::endl; return; } size_t space = extensions.find(' '); std::string first_extension = extensions.substr(0, space); BOOST_CHECK(platform.supports_extension(first_extension) == true); BOOST_CHECK(platform.supports_extension("invalid_extension_name") == false); } compute-0.5/test/test_prev_permutation.cpp000066400000000000000000000043271263566244600212030ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestPrevPermutation #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(prev_permutation_int) { int dataset[] = {1, 3, 4, 2, 5}; bc::vector vector(dataset, dataset + 5, queue); bool result = bc::prev_permutation(vector.begin(), vector.begin() + 5, queue); CHECK_RANGE_EQUAL(int, 5, vector, (1, 3, 2, 5, 4)); BOOST_VERIFY(result == true); vector[1] = 1; vector[4] = 6; result = bc::prev_permutation(vector.begin(), vector.begin() + 5, queue); CHECK_RANGE_EQUAL(int, 5, vector, (6, 5, 2, 1, 1)); BOOST_VERIFY(result == false); } BOOST_AUTO_TEST_CASE(prev_permutation_string) { char dataset[] = "baaa"; bc::vector vector(dataset, dataset + 4, queue); bool result = bc::prev_permutation(vector.begin(), vector.begin() + 4, queue); CHECK_RANGE_EQUAL(char, 4, vector, ('a', 'b', 'a', 'a')); BOOST_VERIFY(result == true); result = bc::prev_permutation(vector.begin(), vector.begin() + 4, queue); CHECK_RANGE_EQUAL(char, 4, vector, ('a', 'a', 'b', 'a')); BOOST_VERIFY(result == true); result = bc::prev_permutation(vector.begin(), vector.begin() + 4, queue); CHECK_RANGE_EQUAL(char, 4, vector, ('a', 'a', 'a', 'b')); BOOST_VERIFY(result == true); result = bc::prev_permutation(vector.begin(), vector.begin() + 4, queue); CHECK_RANGE_EQUAL(char, 4, vector, ('b', 'a', 'a', 'a')); BOOST_VERIFY(result == false); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_program.cpp000066400000000000000000000150441263566244600172450ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestProgram #include // disable the automatic kernel compilation debug messages. this allows the // test for program to check that compilation error exceptions are properly // thrown when invalid kernel code is passed to program::build(). #undef BOOST_COMPUTE_DEBUG_KERNEL_COMPILATION #include #include #include #include #include "context_setup.hpp" namespace compute = boost::compute; const char source[] = "__kernel void foo(__global float *x, const uint n) { }\n" "__kernel void bar(__global int *x, __global int *y) { }\n"; BOOST_AUTO_TEST_CASE(get_program_info) { // create program boost::compute::program program = boost::compute::program::create_with_source(source, context); // build program program.build(); // check program info #ifndef BOOST_COMPUTE_USE_OFFLINE_CACHE BOOST_CHECK(program.source().empty() == false); #endif BOOST_CHECK(program.get_context() == context); } BOOST_AUTO_TEST_CASE(program_source) { // create program from source boost::compute::program program = boost::compute::program::create_with_source(source, context); BOOST_CHECK_EQUAL(std::string(source), program.source()); } BOOST_AUTO_TEST_CASE(program_multiple_sources) { std::vector sources; sources.push_back("__kernel void foo(__global int* x) { }\n"); sources.push_back("__kernel void bar(__global float* y) { }\n"); // create program from sources boost::compute::program program = boost::compute::program::create_with_source(sources, context); program.build(); boost::compute::kernel foo = program.create_kernel("foo"); boost::compute::kernel bar = program.create_kernel("bar"); } BOOST_AUTO_TEST_CASE(program_source_no_file) { // create program from a non-existant source file // and verifies it throws. BOOST_CHECK_THROW(boost::compute::program program = boost::compute::program::create_with_source_file (std::string(), context), std::ios_base::failure); } BOOST_AUTO_TEST_CASE(create_kernel) { boost::compute::program program = boost::compute::program::create_with_source(source, context); program.build(); boost::compute::kernel foo = program.create_kernel("foo"); boost::compute::kernel bar = program.create_kernel("bar"); // try to create a kernel that doesn't exist BOOST_CHECK_THROW(program.create_kernel("baz"), boost::compute::opencl_error); } BOOST_AUTO_TEST_CASE(create_with_binary) { // create program from source boost::compute::program source_program = boost::compute::program::create_with_source(source, context); source_program.build(); // create kernels in source program boost::compute::kernel source_foo_kernel = source_program.create_kernel("foo"); boost::compute::kernel source_bar_kernel = source_program.create_kernel("bar"); // check source kernels BOOST_CHECK_EQUAL(source_foo_kernel.name(), std::string("foo")); BOOST_CHECK_EQUAL(source_bar_kernel.name(), std::string("bar")); // get binary std::vector binary = source_program.binary(); // create program from binary boost::compute::program binary_program = boost::compute::program::create_with_binary(binary, context); binary_program.build(); // create kernels in binary program boost::compute::kernel binary_foo_kernel = binary_program.create_kernel("foo"); boost::compute::kernel binary_bar_kernel = binary_program.create_kernel("bar"); // check binary kernels BOOST_CHECK_EQUAL(binary_foo_kernel.name(), std::string("foo")); BOOST_CHECK_EQUAL(binary_bar_kernel.name(), std::string("bar")); } BOOST_AUTO_TEST_CASE(create_with_source_doctest) { //! [create_with_source] std::string source = "__kernel void foo(__global int *data) { }"; boost::compute::program foo_program = boost::compute::program::create_with_source(source, context); //! [create_with_source] foo_program.build(); } #ifdef CL_VERSION_1_2 BOOST_AUTO_TEST_CASE(compile_and_link) { REQUIRES_OPENCL_VERSION(1,2); // create the library program const char library_source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( // for some reason the apple opencl compilers complains if a prototype // for the square() function is not available, so we add it here T square(T); // generic square function definition T square(T x) { return x * x; } ); compute::program library_program = compute::program::create_with_source(library_source, context); library_program.compile("-DT=int"); // create the kernel program const char kernel_source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( // forward declare square function extern int square(int); // square kernel definition __kernel void square_kernel(__global int *x) { x[0] = square(x[0]); } ); compute::program square_program = compute::program::create_with_source(kernel_source, context); square_program.compile(); // link the programs std::vector programs; programs.push_back(library_program); programs.push_back(square_program); compute::program linked_program = compute::program::link(programs, context); // create the square kernel compute::kernel square_kernel = linked_program.create_kernel("square_kernel"); BOOST_CHECK_EQUAL(square_kernel.name(), "square_kernel"); } #endif // CL_VERSION_1_2 BOOST_AUTO_TEST_CASE(build_log) { const char invalid_source[] = "__kernel void foo(__global int *input) { !@#$%^&*() }"; compute::program invalid_program = compute::program::create_with_source(invalid_source, context); try { invalid_program.build(); // should not get here BOOST_CHECK(false); } catch(compute::opencl_error &e){ std::string log = invalid_program.build_log(); BOOST_CHECK(!log.empty()); } } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_program_cache.cpp000066400000000000000000000063241263566244600203710ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestProgramCache #include #include #include #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(setup) { // get default context compute::context ctx = context; // get program cache boost::shared_ptr cache = compute::program_cache::get_global_cache(ctx); // try to load a null string BOOST_CHECK(cache->get(std::string()) == boost::none); // try to load a non-existant program BOOST_CHECK(cache->get("nonexistant") == boost::none); // create and store a program const char p1_source[] = "__kernel void add(__global int *a, int x)\n" "{\n" " a[get_global_id(0)] += x;\n" "}\n"; compute::program p1 = compute::program::create_with_source(p1_source, ctx); p1.build(); cache->insert("p1", p1); // try to load the program BOOST_CHECK(cache->get("p1") == p1); // create a copy of the context compute::context ctx_copy = ctx; // check that they both have the same cl_context BOOST_CHECK(ctx_copy.get() == ctx.get()); // check that the cache is the same boost::shared_ptr cache_copy = compute::program_cache::get_global_cache(ctx_copy); BOOST_CHECK(cache_copy == cache); // try to load the program again BOOST_CHECK(cache_copy->get("p1") == p1); } BOOST_AUTO_TEST_CASE(evict) { // create cache with capacity of four and insert four programs compute::program_cache cache(4); cache.insert("a", compute::program()); cache.insert("b", compute::program()); cache.insert("c", compute::program()); cache.insert("d", compute::program()); // check that all four programs still reside in the cache BOOST_CHECK(cache.get("a") != boost::none); BOOST_CHECK(cache.get("b") != boost::none); BOOST_CHECK(cache.get("c") != boost::none); BOOST_CHECK(cache.get("d") != boost::none); // insert fifth program which should evict the oldest ("a") cache.insert("e", compute::program()); // check that "a" has been evicted and that "e" is now present BOOST_CHECK(cache.get("a") == boost::none); BOOST_CHECK(cache.get("b") != boost::none); BOOST_CHECK(cache.get("c") != boost::none); BOOST_CHECK(cache.get("d") != boost::none); BOOST_CHECK(cache.get("e") != boost::none); // clear cache and ensure no program objects are found cache.clear(); BOOST_CHECK(cache.get("a") == boost::none); BOOST_CHECK(cache.get("b") == boost::none); BOOST_CHECK(cache.get("c") == boost::none); BOOST_CHECK(cache.get("d") == boost::none); BOOST_CHECK(cache.get("e") == boost::none); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_radix_sort.cpp000066400000000000000000000174061263566244600177600ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestRadixSort #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(sort_char_vector) { using boost::compute::char_; char_ data[] = { 'c', 'a', '0', '7', 'B', 'F', '\0', '$' }; boost::compute::vector vector(data, data + 8, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(8)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::detail::radix_sort(vector.begin(), vector.end(), queue); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL(char_, 8, vector, ('\0', '$', '0', '7', 'B', 'F', 'a', 'c')); } BOOST_AUTO_TEST_CASE(sort_uchar_vector) { using boost::compute::uchar_; uchar_ data[] = { 0x12, 0x00, 0xFF, 0xB4, 0x80, 0x32, 0x64, 0xA2 }; boost::compute::vector vector(data, data + 8, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(8)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::detail::radix_sort(vector.begin(), vector.end(), queue); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL(uchar_, 8, vector, (0x00, 0x12, 0x32, 0x64, 0x80, 0xA2, 0xB4, 0xFF)); } BOOST_AUTO_TEST_CASE(sort_short_vector) { using boost::compute::short_; short_ data[] = { -4, 152, -94, 963, 31002, -456, 0, -2113 }; boost::compute::vector vector(data, data + 8, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(8)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::detail::radix_sort(vector.begin(), vector.end(), queue); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL(short_, 8, vector, (-2113, -456, -94, -4, 0, 152, 963, 31002)); } BOOST_AUTO_TEST_CASE(sort_ushort_vector) { using boost::compute::ushort_; ushort_ data[] = { 4, 152, 94, 963, 63202, 34560, 0, 2113 }; boost::compute::vector vector(data, data + 8, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(8)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::detail::radix_sort(vector.begin(), vector.end(), queue); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL(ushort_, 8, vector, (0, 4, 94, 152, 963, 2113, 34560, 63202)); } BOOST_AUTO_TEST_CASE(sort_int_vector) { int data[] = { -4, 152, -5000, 963, 75321, -456, 0, 1112 }; boost::compute::vector vector(data, data + 8, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(8)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::detail::radix_sort(vector.begin(), vector.end(), queue); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL(int, 8, vector, (-5000, -456, -4, 0, 152, 963, 1112, 75321)); } BOOST_AUTO_TEST_CASE(sort_uint_vector) { using boost::compute::uint_; uint_ data[] = { 500, 1988, 123456, 562, 0, 4000000, 9852, 102030 }; boost::compute::vector vector(data, data + 8, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(8)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::detail::radix_sort(vector.begin(), vector.end(), queue); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL(uint_, 8, vector, (0, 500, 562, 1988, 9852, 102030, 123456, 4000000)); } BOOST_AUTO_TEST_CASE(sort_long_vector) { using boost::compute::long_; long_ data[] = { 500, 1988, 123456, 562, 0, 4000000, 9852, 102030 }; boost::compute::vector vector(data, data + 8, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(8)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::detail::radix_sort(vector.begin(), vector.end(), queue); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL(long_, 8, vector, (0, 500, 562, 1988, 9852, 102030, 123456, 4000000)); } BOOST_AUTO_TEST_CASE(sort_ulong_vector) { using boost::compute::ulong_; ulong_ data[] = { 500, 1988, 123456, 562, 0, 4000000, 9852, 102030 }; boost::compute::vector vector(data, data + 8, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(8)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::detail::radix_sort(vector.begin(), vector.end(), queue); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL(ulong_, 8, vector, (0, 500, 562, 1988, 9852, 102030, 123456, 4000000)); } BOOST_AUTO_TEST_CASE(sort_float_vector) { float data[] = { -6023.0f, 152.5f, -63.0f, 1234567.0f, 11.2f, -5000.1f, 0.0f, 14.0f, -8.25f, -0.0f }; boost::compute::vector vector(data, data + 10, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(10)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::detail::radix_sort(vector.begin(), vector.end(), queue); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL( float, 10, vector, (-6023.0f, -5000.1f, -63.0f, -8.25f, -0.0f, 0.0f, 11.2f, 14.0f, 152.5f, 1234567.0f) ); // copy data, sort, and check again (to check program caching) boost::compute::copy(data, data + 10, vector.begin(), queue); boost::compute::detail::radix_sort(vector.begin(), vector.end(), queue); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL( float, 10, vector, (-6023.0f, -5000.1f, -63.0f, -8.25f, -0.0f, 0.0f, 11.2f, 14.0f, 152.5f, 1234567.0f) ); } BOOST_AUTO_TEST_CASE(sort_double_vector) { if(!device.supports_extension("cl_khr_fp64")){ std::cout << "skipping test: device does not support double" << std::endl; return; } double data[] = { -6023.0, 152.5, -63.0, 1234567.0, 11.2, -5000.1, 0.0, 14.0, -8.25, -0.0 }; boost::compute::vector vector(data, data + 10, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(10)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::detail::radix_sort(vector.begin(), vector.end(), queue); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL( double, 10, vector, (-6023.0, -5000.1, -63.0, -8.25, -0.0, 0.0, 11.2, 14.0, 152.5, 1234567.0) ); } BOOST_AUTO_TEST_CASE(sort_partial_vector) { int data[] = { 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; boost::compute::vector vec(data, data + 10, queue); boost::compute::detail::radix_sort(vec.begin() + 2, vec.end() - 2, queue); CHECK_RANGE_EQUAL(int, 10, vec, (9, 8, 2, 3, 4, 5, 6, 7, 1, 0)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_random_fill.cpp000066400000000000000000000036651263566244600200720ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestRandomFill #include #include #include #include #include #include #include #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(random_fill_float) { using compute::lambda::_1; compute::vector vector(10, context); // fill with values between 0 and 1 compute::detail::random_fill( vector.begin(), vector.end(), 0.0f, 1.0f, queue ); BOOST_CHECK_EQUAL( compute::count_if( vector.begin(), vector.end(), _1 < 0.0f || _1 > 1.0f, queue ), size_t(0) ); // fill with values between 5 and 10 compute::detail::random_fill( vector.begin(), vector.end(), 5.0f, 10.0f, queue ); BOOST_CHECK_EQUAL( compute::count_if( vector.begin(), vector.end(), _1 < 5.0f || _1 > 10.0f, queue ), size_t(0) ); // fill with values between -25 and 25 compute::detail::random_fill( vector.begin(), vector.end(), -25.0f, 25.0f, queue ); BOOST_CHECK_EQUAL( compute::count_if( vector.begin(), vector.end(), _1 < -25.0f || _1 > 25.0f, queue ), size_t(0) ); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_random_shuffle.cpp000066400000000000000000000030621263566244600205670ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestRandomShuffle #include #include #include #include #include #include #include #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(shuffle_int_vector) { bc::vector vector(context); vector.push_back(1, queue); vector.push_back(9, queue); vector.push_back(19, queue); vector.push_back(29, queue); std::set original_values; for(size_t i = 0; i < vector.size(); i++){ original_values.insert(vector[i]); } BOOST_CHECK_EQUAL(original_values.size(), size_t(4)); bc::random_shuffle(vector.begin(), vector.end(), queue); std::set shuffled_values; bc::copy( vector.begin(), vector.end(), std::inserter(shuffled_values, shuffled_values.begin()), queue ); BOOST_CHECK_EQUAL(shuffled_values.size(), size_t(4)); BOOST_VERIFY(original_values == shuffled_values); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_reduce.cpp000066400000000000000000000163441263566244600170510ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestReduce #include #include #include #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(reduce_int) { int data[] = { 1, 5, 9, 13, 17 }; compute::vector vector(data, data + 5, queue); int sum; compute::reduce(vector.begin(), vector.end(), &sum, compute::plus(), queue); BOOST_CHECK_EQUAL(sum, 45); int product; compute::reduce(vector.begin(), vector.end(), &product, compute::multiplies(), queue); BOOST_CHECK_EQUAL(product, 9945); } BOOST_AUTO_TEST_CASE(reduce_empty_vector) { compute::vector vector(context); short sum = 0; compute::reduce(vector.begin(), vector.end(), &sum, queue); BOOST_CHECK_EQUAL(sum, short(0)); } BOOST_AUTO_TEST_CASE(reduce_doctest) { int data[] = { 1, 2, 3, 4 }; boost::compute::vector vec(data, data + 4, queue); //! [sum_int] int sum = 0; boost::compute::reduce(vec.begin(), vec.end(), &sum, queue); //! [sum_int] BOOST_CHECK_EQUAL(sum, 10); } BOOST_AUTO_TEST_CASE(reduce_twos) { using compute::uint_; compute::vector vector(8, context); compute::fill(vector.begin(), vector.end(), uint_(2), queue); uint_ sum; compute::reduce(vector.begin(), vector.end(), &sum, compute::plus(), queue); BOOST_CHECK_EQUAL(sum, uint_(16)); uint_ product; compute::reduce(vector.begin(), vector.end(), &product, compute::multiplies(), queue); BOOST_CHECK_EQUAL(product, uint_(256)); } BOOST_AUTO_TEST_CASE(reduce_on_device) { int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; compute::vector input(data, data + 8, queue); compute::vector result(2, context); compute::reduce(input.begin(), input.begin() + 4, result.begin(), queue); compute::reduce(input.begin() + 4, input.end(), result.end() - 1, queue); CHECK_RANGE_EQUAL(int, 2, result, (10, 26)); } BOOST_AUTO_TEST_CASE(reduce_int_min_max) { int data[] = { 11, 5, 92, 13, 42 }; compute::vector vector(data, data + 5, queue); int min_value; compute::reduce( vector.begin(), vector.end(), &min_value, compute::min(), queue ); BOOST_CHECK_EQUAL(min_value, 5); int max_value; compute::reduce( vector.begin(), vector.end(), &max_value, compute::max(), queue ); BOOST_CHECK_EQUAL(max_value, 92); } BOOST_AUTO_TEST_CASE(reduce_int2) { std::vector data; for(int i = 0; i < 6; i++){ compute::int2_ value; value[0] = i + 1; value[1] = 2 * i + 1; data.push_back(value); } compute::vector vector(data.begin(), data.end(), queue); compute::int2_ sum; compute::reduce( vector.begin(), vector.end(), &sum, queue ); BOOST_CHECK_EQUAL(sum, compute::int2_(21, 36)); } BOOST_AUTO_TEST_CASE(reduce_pinned_vector) { int data[] = { 2, 5, 8, 11, 15 }; std::vector vector(data, data + 5); compute::buffer buffer(context, vector.size() * sizeof(int), compute::buffer::read_only | compute::buffer::use_host_ptr, &vector[0]); int sum; compute::reduce( compute::make_buffer_iterator(buffer, 0), compute::make_buffer_iterator(buffer, 5), &sum, compute::plus() ); BOOST_CHECK_EQUAL(sum, 41); } BOOST_AUTO_TEST_CASE(reduce_constant_iterator) { int result; compute::reduce( compute::make_constant_iterator(1, 0), compute::make_constant_iterator(1, 5), &result, queue ); BOOST_CHECK_EQUAL(result, 5); compute::reduce( compute::make_constant_iterator(3, 0), compute::make_constant_iterator(3, 5), &result, queue ); BOOST_CHECK_EQUAL(result, 15); compute::reduce( compute::make_constant_iterator(2, 0), compute::make_constant_iterator(2, 5), &result, compute::multiplies(), queue ); BOOST_CHECK_EQUAL(result, 32); } BOOST_AUTO_TEST_CASE(reduce_counting_iterator) { int result; compute::reduce( compute::make_counting_iterator(1), compute::make_counting_iterator(10), &result, queue ); BOOST_CHECK_EQUAL(result, 45); compute::reduce( compute::make_counting_iterator(1), compute::make_counting_iterator(5), &result, compute::multiplies(), queue ); BOOST_CHECK_EQUAL(result, 24); } BOOST_AUTO_TEST_CASE(reduce_transform_iterator) { using ::boost::compute::_1; int data[] = { 1, 3, 5, 7, 9 }; compute::vector vector(data, data + 5, queue); int sum; compute::reduce( compute::make_transform_iterator(vector.begin(), _1 + 1), compute::make_transform_iterator(vector.end(), _1 + 1), &sum, queue ); BOOST_CHECK_EQUAL(sum, 30); compute::reduce( compute::make_transform_iterator(vector.begin(), _1 > 4), compute::make_transform_iterator(vector.end(), _1 > 4), &sum, compute::plus(), queue ); BOOST_CHECK_EQUAL(sum, 3); compute::reduce( compute::make_transform_iterator(vector.begin(), _1 * _1), compute::make_transform_iterator(vector.end(), _1 * _1), &sum, queue ); BOOST_CHECK_EQUAL(sum, 165); } BOOST_AUTO_TEST_CASE(reduce_complex) { std::vector > data; data.push_back(std::complex(1, 2)); data.push_back(std::complex(2, 4)); data.push_back(std::complex(3, 6)); data.push_back(std::complex(4, 8)); compute::vector > vector(data.size(), context); compute::copy(data.begin(), data.end(), vector.begin(), queue); std::complex result; compute::reduce( vector.begin(), vector.end(), &result, queue ); BOOST_CHECK(result == std::complex(10, 20)); compute::reduce( vector.begin(), vector.end(), &result, compute::plus >(), queue ); BOOST_CHECK(result == std::complex(10, 20)); compute::reduce( vector.begin(), vector.end(), &result, compute::multiplies >(), queue ); BOOST_CHECK(result == std::complex(-168, -576)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_reduce_by_key.cpp000066400000000000000000000167001263566244600204070ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestReduceByKey #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(reduce_by_key_int) { //! [reduce_by_key_int] // setup keys and values int keys[] = { 0, 2, -3, -3, -3, -3, -3, 4 }; int data[] = { 1, 1, 1, 1, 1, 2, 5, 1 }; boost::compute::vector keys_input(keys, keys + 8, queue); boost::compute::vector values_input(data, data + 8, queue); boost::compute::vector keys_output(8, context); boost::compute::vector values_output(8, context); // reduce by key boost::compute::reduce_by_key(keys_input.begin(), keys_input.end(), values_input.begin(), keys_output.begin(), values_output.begin(), queue); // keys_output = { 0, 2, -3, 4 } // values_output = { 1, 1, 10, 1 } //! [reduce_by_key_int] CHECK_RANGE_EQUAL(int, 4, keys_output, (0, 2, -3, 4)); CHECK_RANGE_EQUAL(int, 4, values_output, (1, 1, 10, 1)); } BOOST_AUTO_TEST_CASE(reduce_by_key_int_long_vector) { size_t size = 1024; bc::vector keys_input(size, int(0), queue); bc::vector values_input(size, int(1), queue); bc::vector keys_output(size, context); bc::vector values_output(size, context); bc::reduce_by_key(keys_input.begin(), keys_input.end(), values_input.begin(), keys_output.begin(), values_output.begin(), queue); CHECK_RANGE_EQUAL(int, 1, keys_output, (0)); CHECK_RANGE_EQUAL(int, 1, values_output, (static_cast(size))); keys_input[137] = 1; keys_input[677] = 1; keys_input[1001] = 1; bc::inclusive_scan(keys_input.begin(), keys_input.end(), keys_input.begin(), queue); bc::reduce_by_key(keys_input.begin(), keys_input.end(), values_input.begin(), keys_output.begin(), values_output.begin(), queue); CHECK_RANGE_EQUAL(int, 4, keys_output, (0, 1, 2, 3)); CHECK_RANGE_EQUAL(int, 4, values_output, (137, 540, 324, 23)); } BOOST_AUTO_TEST_CASE(reduce_by_key_empty_vector) { bc::vector keys_input(context); bc::vector values_input(context); bc::vector keys_output(context); bc::vector values_output(context); bc::reduce_by_key(keys_input.begin(), keys_input.end(), values_input.begin(), keys_output.begin(), values_output.begin(), queue); BOOST_CHECK(keys_output.empty()); BOOST_CHECK(values_output.empty()); } BOOST_AUTO_TEST_CASE(reduce_by_key_int_one_key_value) { int keys[] = { 22 }; int data[] = { -9 }; bc::vector keys_input(keys, keys + 1, queue); bc::vector values_input(data, data + 1, queue); bc::vector keys_output(1, context); bc::vector values_output(1, context); bc::reduce_by_key(keys_input.begin(), keys_input.end(), values_input.begin(), keys_output.begin(), values_output.begin(), queue); CHECK_RANGE_EQUAL(int, 1, keys_output, (22)); CHECK_RANGE_EQUAL(int, 1, values_output, (-9)); } BOOST_AUTO_TEST_CASE(reduce_by_key_int_min_max) { int keys[] = { 0, 2, 2, 3, 3, 3, 3, 3, 4 }; int data[] = { 1, 2, 1, -3, 1, 4, 2, 5, 77 }; bc::vector keys_input(keys, keys + 9, queue); bc::vector values_input(data, data + 9, queue); bc::vector keys_output(9, context); bc::vector values_output(9, context); bc::reduce_by_key(keys_input.begin(), keys_input.end(), values_input.begin(), keys_output.begin(), values_output.begin(), bc::min(), bc::equal_to(), queue); CHECK_RANGE_EQUAL(int, 4, keys_output, (0, 2, 3, 4)); CHECK_RANGE_EQUAL(int, 4, values_output, (1, 1, -3, 77)); bc::reduce_by_key(keys_input.begin(), keys_input.end(), values_input.begin(), keys_output.begin(), values_output.begin(), bc::max(), bc::equal_to(), queue); CHECK_RANGE_EQUAL(int, 4, keys_output, (0, 2, 3, 4)); CHECK_RANGE_EQUAL(int, 4, values_output, (1, 2, 5, 77)); } BOOST_AUTO_TEST_CASE(reduce_by_key_float_max) { int keys[] = { 0, 2, 2, 3, 3, 3, 3, 3, 4 }; float data[] = { 1.0, 2.0, -1.5, -3.0, 1.0, -0.24, 2, 5, 77.1 }; bc::vector keys_input(keys, keys + 9, queue); bc::vector values_input(data, data + 9, queue); bc::vector keys_output(9, context); bc::vector values_output(9, context); bc::reduce_by_key(keys_input.begin(), keys_input.end(), values_input.begin(), keys_output.begin(), values_output.begin(), bc::max(), queue); CHECK_RANGE_EQUAL(int, 4, keys_output, (0, 2, 3, 4)); BOOST_CHECK_CLOSE(float(values_output[0]), 1.0f, 1e-4f); BOOST_CHECK_CLOSE(float(values_output[1]), 2.0f, 1e-4f); BOOST_CHECK_CLOSE(float(values_output[2]), 5.0f, 1e-4f); BOOST_CHECK_CLOSE(float(values_output[3]), 77.1f, 1e-4f); } BOOST_AUTO_TEST_CASE(reduce_by_key_int2) { using bc::int2_; int keys[] = { 0, 2, 3, 3, 3, 3, 4, 4 }; int2_ data[] = { int2_(0, 1), int2_(-3, 2), int2_(0, 1), int2_(0, 1), int2_(-3, 0), int2_(0, 0), int2_(-3, 2), int2_(-7, -2) }; bc::vector keys_input(keys, keys + 8, queue); bc::vector values_input(data, data + 8, queue); bc::vector keys_output(8, context); bc::vector values_output(8, context); bc::reduce_by_key(keys_input.begin(), keys_input.end(), values_input.begin(), keys_output.begin(), values_output.begin(), queue); CHECK_RANGE_EQUAL(int, 4, keys_output, (0, 2, 3, 4)); CHECK_RANGE_EQUAL(int2_, 4, values_output, (int2_(0, 1), int2_(-3, 2), int2_(-3, 2), int2_(-10, 0))); } BOOST_AUTO_TEST_CASE(reduce_by_key_int2_long_vector) { using bc::int2_; size_t size = 1024; bc::vector keys_input(size, int(0), queue); bc::vector values_input(size, int2_(1, -1), queue); bc::vector keys_output(size, context); bc::vector values_output(size, context); bc::reduce_by_key(keys_input.begin(), keys_input.end(), values_input.begin(), keys_output.begin(), values_output.begin(), queue); CHECK_RANGE_EQUAL(int, 1, keys_output, (0)); CHECK_RANGE_EQUAL(int2_, 1, values_output, (int2_(size, -size))); keys_input[137] = 1; keys_input[677] = 1; keys_input[1001] = 1; bc::inclusive_scan(keys_input.begin(), keys_input.end(), keys_input.begin(), queue); bc::reduce_by_key(keys_input.begin(), keys_input.end(), values_input.begin(), keys_output.begin(), values_output.begin(), queue); CHECK_RANGE_EQUAL(int, 4, keys_output, (0, 1, 2, 3)); CHECK_RANGE_EQUAL(int2_, 4, values_output, (int2_(137, -137), int2_(540, -540), int2_(324, -324), int2_(23, -23))); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_remove.cpp000066400000000000000000000035231263566244600170720ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestRemove #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(remove_int) { int data[] = { 1, 2, 1, 3, 2, 4, 3, 4, 5 }; bc::vector vector(data, data + 9, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(9)); // remove 2's bc::vector::const_iterator iter = bc::remove(vector.begin(), vector.end(), 2, queue); BOOST_VERIFY(iter == vector.begin() + 7); CHECK_RANGE_EQUAL(int, 7, vector, (1, 1, 3, 4, 3, 4, 5)); // remove 4's iter = bc::remove(vector.begin(), vector.begin() + 7, 4, queue); BOOST_VERIFY(iter == vector.begin() + 5); CHECK_RANGE_EQUAL(int, 5, vector, (1, 1, 3, 3, 5)); // remove 1's iter = bc::remove(vector.begin(), vector.begin() + 5, 1, queue); BOOST_VERIFY(iter == vector.begin() + 3); CHECK_RANGE_EQUAL(int, 3, vector, (3, 3, 5)); // remove 5's iter = bc::remove(vector.begin(), vector.begin() + 3, 5, queue); BOOST_VERIFY(iter == vector.begin() + 2); CHECK_RANGE_EQUAL(int, 2, vector, (3, 3)); // remove 3's iter = bc::remove(vector.begin(), vector.begin() + 2, 3, queue); BOOST_VERIFY(iter == vector.begin()); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_replace.cpp000066400000000000000000000033151263566244600172070ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestReplace #include #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(replace_int) { bc::vector vector(5, context); bc::iota(vector.begin(), vector.end(), 0, queue); CHECK_RANGE_EQUAL(int, 5, vector, (0, 1, 2, 3, 4)); bc::replace(vector.begin(), vector.end(), 2, 6, queue); CHECK_RANGE_EQUAL(int, 5, vector, (0, 1, 6, 3, 4)); } BOOST_AUTO_TEST_CASE(replace_copy_int) { bc::vector a(5, context); bc::iota(a.begin(), a.end(), 0, queue); CHECK_RANGE_EQUAL(int, 5, a, (0, 1, 2, 3, 4)); bc::vector b(5, context); bc::vector::iterator iter = bc::replace_copy(a.begin(), a.end(), b.begin(), 3, 9, queue); BOOST_CHECK(iter == b.end()); CHECK_RANGE_EQUAL(int, 5, b, (0, 1, 2, 9, 4)); // ensure 'a' was not modified CHECK_RANGE_EQUAL(int, 5, a, (0, 1, 2, 3, 4)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_result_of.cpp000066400000000000000000000023411263566244600175740ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestResultOf #include #include #include #include BOOST_AUTO_TEST_CASE(result_of_function) { using boost::compute::function; using boost::compute::result_of; BOOST_STATIC_ASSERT(( boost::is_same()>::type, int>::value )); } BOOST_AUTO_TEST_CASE(result_of_operators) { using boost::compute::plus; using boost::compute::minus; using boost::compute::result_of; BOOST_STATIC_ASSERT(( boost::is_same(int, int)>::type, int>::value )); BOOST_STATIC_ASSERT(( boost::is_same(int, int)>::type, int>::value )); } compute-0.5/test/test_reverse.cpp000066400000000000000000000061701263566244600172510ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestReverse #include #include #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(reverse_int) { bc::vector vector(5, context); bc::iota(vector.begin(), vector.end(), 0, queue); CHECK_RANGE_EQUAL(int, 5, vector, (0, 1, 2, 3, 4)); bc::reverse(vector.begin(), vector.end(), queue); CHECK_RANGE_EQUAL(int, 5, vector, (4, 3, 2, 1, 0)); bc::reverse(vector.begin() + 1, vector.end(), queue); CHECK_RANGE_EQUAL(int, 5, vector, (4, 0, 1, 2, 3)); bc::reverse(vector.begin() + 1, vector.end() - 1, queue); CHECK_RANGE_EQUAL(int, 5, vector, (4, 2, 1, 0, 3)); bc::reverse(vector.begin(), vector.end() - 2, queue); CHECK_RANGE_EQUAL(int, 5, vector, (1, 2, 4, 0, 3)); vector.resize(6, queue); bc::iota(vector.begin(), vector.end(), 10, queue); CHECK_RANGE_EQUAL(int, 6, vector, (10, 11, 12, 13, 14, 15)); bc::reverse(vector.begin(), vector.end(), queue); CHECK_RANGE_EQUAL(int, 6, vector, (15, 14, 13, 12, 11, 10)); bc::reverse(vector.begin() + 3, vector.end(), queue); CHECK_RANGE_EQUAL(int, 6, vector, (15, 14, 13, 10, 11, 12)); bc::reverse(vector.begin() + 1, vector.end() - 2, queue); CHECK_RANGE_EQUAL(int, 6, vector, (15, 10, 13, 14, 11, 12)); } BOOST_AUTO_TEST_CASE(reverse_copy_int) { bc::vector a(5, context); bc::iota(a.begin(), a.end(), 0, queue); CHECK_RANGE_EQUAL(int, 5, a, (0, 1, 2, 3, 4)); bc::vector b(5, context); bc::vector::iterator iter = bc::reverse_copy(a.begin(), a.end(), b.begin(), queue); BOOST_CHECK(iter == b.end()); CHECK_RANGE_EQUAL(int, 5, b, (4, 3, 2, 1, 0)); iter = bc::reverse_copy(b.begin() + 1, b.end(), a.begin() + 1, queue); BOOST_CHECK(iter == a.end()); CHECK_RANGE_EQUAL(int, 5, a, (0, 0, 1, 2, 3)); iter = bc::reverse_copy(a.begin(), a.end() - 1, b.begin(), queue); BOOST_CHECK(iter == (b.end() - 1)); CHECK_RANGE_EQUAL(int, 5, b, (2, 1, 0, 0, 0)); } BOOST_AUTO_TEST_CASE(reverse_copy_counting_iterator) { bc::vector vector(5, context); bc::reverse_copy( bc::make_counting_iterator(1), bc::make_counting_iterator(6), vector.begin(), queue ); CHECK_RANGE_EQUAL(int, 5, vector, (5, 4, 3, 2, 1)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_rotate.cpp000066400000000000000000000044321263566244600170730ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestRotate #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(rotate_trivial) { int data[] = {1, 4, 2, 6, 3, 2, 5, 3, 4, 6}; boost::compute::vector vector(10, context); boost::compute::copy_n(data, 10, vector.begin(), queue); boost::compute::rotate(vector.begin(), vector.begin(), vector.end()); CHECK_RANGE_EQUAL(int, 10, vector, (1, 4, 2, 6, 3, 2, 5, 3, 4, 6)); boost::compute::rotate(vector.begin(), vector.end(), vector.end()); CHECK_RANGE_EQUAL(int, 10, vector, (1, 4, 2, 6, 3, 2, 5, 3, 4, 6)); } BOOST_AUTO_TEST_CASE(rotate_1) { int data[] = {1, 4, 2, 6, 3, 2, 5, 3, 4, 6}; boost::compute::vector vector(10, context); boost::compute::copy_n(data, 10, vector.begin(), queue); boost::compute::rotate(vector.begin(), vector.begin()+1, vector.end()); CHECK_RANGE_EQUAL(int, 10, vector, (4, 2, 6, 3, 2, 5, 3, 4, 6, 1)); } BOOST_AUTO_TEST_CASE(rotate_4) { int data[] = {1, 4, 2, 6, 3, 2, 5, 3, 4, 6}; boost::compute::vector vector(10, context); boost::compute::copy_n(data, 10, vector.begin(), queue); boost::compute::rotate(vector.begin(), vector.begin()+4, vector.end()); CHECK_RANGE_EQUAL(int, 10, vector, (3, 2, 5, 3, 4, 6, 1, 4, 2, 6)); } BOOST_AUTO_TEST_CASE(rotate_9) { int data[] = {1, 4, 2, 6, 3, 2, 5, 3, 4, 6}; boost::compute::vector vector(10, context); boost::compute::copy_n(data, 10, vector.begin(), queue); boost::compute::rotate(vector.begin(), vector.begin()+9, vector.end()); CHECK_RANGE_EQUAL(int, 10, vector, (6, 1, 4, 2, 6, 3, 2, 5, 3, 4)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_rotate_copy.cpp000066400000000000000000000052271263566244600201300ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestRotateCopy #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(rotate_copy_trivial) { int data[] = {1, 4, 2, 6, 3, 2, 5, 3, 4, 6}; boost::compute::vector vector(10, context); boost::compute::vector result(10, context); boost::compute::copy_n(data, 10, vector.begin(), queue); boost::compute::rotate_copy(vector.begin(), vector.begin(), vector.end(), result.begin(), queue); CHECK_RANGE_EQUAL(int, 10, result, (1, 4, 2, 6, 3, 2, 5, 3, 4, 6)); boost::compute::rotate_copy(vector.begin(), vector.end(), vector.end(), result.begin(), queue); CHECK_RANGE_EQUAL(int, 10, result, (1, 4, 2, 6, 3, 2, 5, 3, 4, 6)); } BOOST_AUTO_TEST_CASE(rotate_copy_1) { int data[] = {1, 4, 2, 6, 3, 2, 5, 3, 4, 6}; boost::compute::vector vector(10, context); boost::compute::vector result(10, context); boost::compute::copy_n(data, 10, vector.begin(), queue); boost::compute::rotate_copy(vector.begin(), vector.begin()+1, vector.end(), result.begin(), queue); CHECK_RANGE_EQUAL(int, 10, result, (4, 2, 6, 3, 2, 5, 3, 4, 6, 1)); } BOOST_AUTO_TEST_CASE(rotate_copy_4) { int data[] = {1, 4, 2, 6, 3, 2, 5, 3, 4, 6}; boost::compute::vector vector(10, context); boost::compute::vector result(10, context); boost::compute::copy_n(data, 10, vector.begin(), queue); boost::compute::rotate_copy(vector.begin(), vector.begin()+4, vector.end(), result.begin(), queue); CHECK_RANGE_EQUAL(int, 10, result, (3, 2, 5, 3, 4, 6, 1, 4, 2, 6)); } BOOST_AUTO_TEST_CASE(rotate_copy_9) { int data[] = {1, 4, 2, 6, 3, 2, 5, 3, 4, 6}; boost::compute::vector vector(10, context); boost::compute::vector result(10, context); boost::compute::copy_n(data, 10, vector.begin(), queue); boost::compute::rotate_copy(vector.begin(), vector.begin()+9, vector.end(), result.begin(), queue); CHECK_RANGE_EQUAL(int, 10, result, (6, 1, 4, 2, 6, 3, 2, 5, 3, 4)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_scan.cpp000066400000000000000000000261461263566244600165270ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestScan #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(inclusive_scan_int) { int data[] = { 1, 2, 1, 2, 3 }; bc::vector vector(data, data + 5, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(5)); bc::vector result(5, context); BOOST_CHECK_EQUAL(result.size(), size_t(5)); // inclusive scan bc::inclusive_scan(vector.begin(), vector.end(), result.begin(), queue); CHECK_RANGE_EQUAL(int, 5, result, (1, 3, 4, 6, 9)); // in-place inclusive scan CHECK_RANGE_EQUAL(int, 5, vector, (1, 2, 1, 2, 3)); bc::inclusive_scan(vector.begin(), vector.end(), vector.begin(), queue); CHECK_RANGE_EQUAL(int, 5, vector, (1, 3, 4, 6, 9)); } BOOST_AUTO_TEST_CASE(exclusive_scan_int) { int data[] = { 1, 2, 1, 2, 3 }; bc::vector vector(data, data + 5, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(5)); bc::vector result(5, context); BOOST_CHECK_EQUAL(vector.size(), size_t(5)); // exclusive scan bc::exclusive_scan(vector.begin(), vector.end(), result.begin(), queue); CHECK_RANGE_EQUAL(int, 5, result, (0, 1, 3, 4, 6)); // in-place exclusive scan CHECK_RANGE_EQUAL(int, 5, vector, (1, 2, 1, 2, 3)); bc::exclusive_scan(vector.begin(), vector.end(), vector.begin(), queue); CHECK_RANGE_EQUAL(int, 5, vector, (0, 1, 3, 4, 6)); } BOOST_AUTO_TEST_CASE(inclusive_scan_int2) { using boost::compute::int2_; int data[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 0 }; boost::compute::vector input(reinterpret_cast(data), reinterpret_cast(data) + 5, queue); BOOST_CHECK_EQUAL(input.size(), size_t(5)); boost::compute::vector output(5, context); boost::compute::inclusive_scan(input.begin(), input.end(), output.begin(), queue); CHECK_RANGE_EQUAL( int2_, 5, output, (int2_(1, 2), int2_(4, 6), int2_(9, 12), int2_(16, 20), int2_(25, 20)) ); } BOOST_AUTO_TEST_CASE(inclusive_scan_counting_iterator) { bc::vector result(10, context); bc::inclusive_scan(bc::make_counting_iterator(1), bc::make_counting_iterator(11), result.begin(), queue); CHECK_RANGE_EQUAL(int, 10, result, (1, 3, 6, 10, 15, 21, 28, 36, 45, 55)); } BOOST_AUTO_TEST_CASE(exclusive_scan_counting_iterator) { bc::vector result(10, context); bc::exclusive_scan(bc::make_counting_iterator(1), bc::make_counting_iterator(11), result.begin(), queue); CHECK_RANGE_EQUAL(int, 10, result, (0, 1, 3, 6, 10, 15, 21, 28, 36, 45)); } BOOST_AUTO_TEST_CASE(inclusive_scan_transform_iterator) { float data[] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f }; bc::vector input(data, data + 5, queue); bc::vector output(5, context); // normal inclusive scan of the input bc::inclusive_scan(input.begin(), input.end(), output.begin(), queue); bc::system::finish(); BOOST_CHECK_CLOSE(float(output[0]), 1.0f, 1e-4f); BOOST_CHECK_CLOSE(float(output[1]), 3.0f, 1e-4f); BOOST_CHECK_CLOSE(float(output[2]), 6.0f, 1e-4f); BOOST_CHECK_CLOSE(float(output[3]), 10.0f, 1e-4f); BOOST_CHECK_CLOSE(float(output[4]), 15.0f, 1e-4f); // inclusive scan of squares of the input using ::boost::compute::_1; bc::inclusive_scan(bc::make_transform_iterator(input.begin(), pown(_1, 2)), bc::make_transform_iterator(input.end(), pown(_1, 2)), output.begin(), queue); bc::system::finish(); BOOST_CHECK_CLOSE(float(output[0]), 1.0f, 1e-4f); BOOST_CHECK_CLOSE(float(output[1]), 5.0f, 1e-4f); BOOST_CHECK_CLOSE(float(output[2]), 14.0f, 1e-4f); BOOST_CHECK_CLOSE(float(output[3]), 30.0f, 1e-4f); BOOST_CHECK_CLOSE(float(output[4]), 55.0f, 1e-4f); } BOOST_AUTO_TEST_CASE(inclusive_scan_doctest) { //! [inclusive_scan_int] // setup input int data[] = { 1, 2, 3, 4 }; boost::compute::vector input(data, data + 4, queue); // setup output boost::compute::vector output(4, context); // scan values boost::compute::inclusive_scan( input.begin(), input.end(), output.begin(), queue ); // output = [ 1, 3, 6, 10 ] //! [inclusive_scan_int] CHECK_RANGE_EQUAL(int, 4, output, (1, 3, 6, 10)); } BOOST_AUTO_TEST_CASE(exclusive_scan_doctest) { //! [exclusive_scan_int] // setup input int data[] = { 1, 2, 3, 4 }; boost::compute::vector input(data, data + 4, queue); // setup output boost::compute::vector output(4, context); // scan values boost::compute::exclusive_scan( input.begin(), input.end(), output.begin(), queue ); // output = [ 0, 1, 3, 6 ] //! [exclusive_scan_int] CHECK_RANGE_EQUAL(int, 4, output, (0, 1, 3, 6)); } BOOST_AUTO_TEST_CASE(inclusive_scan_int_multiplies) { //! [inclusive_scan_int_multiplies] // setup input int data[] = { 1, 2, 1, 2, 3 }; boost::compute::vector input(data, data + 5, queue); // setup output boost::compute::vector output(5, context); // inclusive scan with multiplication boost::compute::inclusive_scan( input.begin(), input.end(), output.begin(), boost::compute::multiplies(), queue ); // output = [1, 2, 2, 4, 12] //! [inclusive_scan_int_multiplies] BOOST_CHECK_EQUAL(input.size(), size_t(5)); BOOST_CHECK_EQUAL(output.size(), size_t(5)); CHECK_RANGE_EQUAL(int, 5, output, (1, 2, 2, 4, 12)); // in-place inclusive scan CHECK_RANGE_EQUAL(int, 5, input, (1, 2, 1, 2, 3)); boost::compute::inclusive_scan(input.begin(), input.end(), input.begin(), boost::compute::multiplies(), queue); CHECK_RANGE_EQUAL(int, 5, input, (1, 2, 2, 4, 12)); } BOOST_AUTO_TEST_CASE(exclusive_scan_int_multiplies) { //! [exclusive_scan_int_multiplies] // setup input int data[] = { 1, 2, 1, 2, 3 }; boost::compute::vector input(data, data + 5, queue); // setup output boost::compute::vector output(5, context); // exclusive_scan with multiplication // initial value equals 10 boost::compute::exclusive_scan( input.begin(), input.end(), output.begin(), int(10), boost::compute::multiplies(), queue ); // output = [10, 10, 20, 20, 40] //! [exclusive_scan_int_multiplies] BOOST_CHECK_EQUAL(input.size(), size_t(5)); BOOST_CHECK_EQUAL(output.size(), size_t(5)); CHECK_RANGE_EQUAL(int, 5, output, (10, 10, 20, 20, 40)); // in-place exclusive scan CHECK_RANGE_EQUAL(int, 5, input, (1, 2, 1, 2, 3)); bc::exclusive_scan(input.begin(), input.end(), input.begin(), int(10), bc::multiplies(), queue); CHECK_RANGE_EQUAL(int, 5, input, (10, 10, 20, 20, 40)); } BOOST_AUTO_TEST_CASE(inclusive_scan_int_multiplies_long_vector) { size_t size = 1000; bc::vector device_vector(size, int(2), queue); BOOST_CHECK_EQUAL(device_vector.size(), size); bc::inclusive_scan(device_vector.begin(), device_vector.end(), device_vector.begin(), bc::multiplies(), queue); std::vector host_vector(size, 2); BOOST_CHECK_EQUAL(host_vector.size(), size); bc::copy(device_vector.begin(), device_vector.end(), host_vector.begin(), queue); std::vector test(size, 2); BOOST_CHECK_EQUAL(test.size(), size); std::partial_sum(test.begin(), test.end(), test.begin(), std::multiplies()); BOOST_CHECK_EQUAL_COLLECTIONS(host_vector.begin(), host_vector.end(), test.begin(), test.end()); } BOOST_AUTO_TEST_CASE(exclusive_scan_int_multiplies_long_vector) { size_t size = 1000; bc::vector device_vector(size, int(2), queue); BOOST_CHECK_EQUAL(device_vector.size(), size); bc::exclusive_scan(device_vector.begin(), device_vector.end(), device_vector.begin(), int(10), bc::multiplies(), queue); std::vector host_vector(size, 2); BOOST_CHECK_EQUAL(host_vector.size(), size); bc::copy(device_vector.begin(), device_vector.end(), host_vector.begin(), queue); std::vector test(size, 2); BOOST_CHECK_EQUAL(test.size(), size); test[0] = 10; std::partial_sum(test.begin(), test.end(), test.begin(), std::multiplies()); BOOST_CHECK_EQUAL_COLLECTIONS(host_vector.begin(), host_vector.end(), test.begin(), test.end()); } BOOST_AUTO_TEST_CASE(inclusive_scan_int_custom_function) { BOOST_COMPUTE_FUNCTION(int, multi, (int x, int y), { return x * y * 2; }); int data[] = { 1, 2, 1, 2, 3 }; bc::vector vector(data, data + 5, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(5)); bc::vector result(5, context); BOOST_CHECK_EQUAL(result.size(), size_t(5)); // inclusive scan bc::inclusive_scan(vector.begin(), vector.end(), result.begin(), multi, queue); CHECK_RANGE_EQUAL(int, 5, result, (1, 4, 8, 32, 192)); // in-place inclusive scan CHECK_RANGE_EQUAL(int, 5, vector, (1, 2, 1, 2, 3)); bc::inclusive_scan(vector.begin(), vector.end(), vector.begin(), multi, queue); CHECK_RANGE_EQUAL(int, 5, vector, (1, 4, 8, 32, 192)); } BOOST_AUTO_TEST_CASE(exclusive_scan_int_custom_function) { BOOST_COMPUTE_FUNCTION(int, multi, (int x, int y), { return x * y * 2; }); int data[] = { 1, 2, 1, 2, 3 }; bc::vector vector(data, data + 5, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(5)); bc::vector result(5, context); BOOST_CHECK_EQUAL(result.size(), size_t(5)); // exclusive_scan bc::exclusive_scan(vector.begin(), vector.end(), result.begin(), int(1), multi, queue); CHECK_RANGE_EQUAL(int, 5, result, (1, 2, 8, 16, 64)); // in-place exclusive scan CHECK_RANGE_EQUAL(int, 5, vector, (1, 2, 1, 2, 3)); bc::exclusive_scan(vector.begin(), vector.end(), vector.begin(), int(1), multi, queue); CHECK_RANGE_EQUAL(int, 5, vector, (1, 2, 8, 16, 64)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_scatter.cpp000066400000000000000000000035651263566244600172500ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestScatter #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(scatter_int) { int input_data[] = { 1, 2, 3, 4, 5 }; bc::vector input(input_data, input_data + 5, queue); int map_data[] = { 0, 4, 1, 3, 2 }; bc::vector map(map_data, map_data + 5, queue); bc::vector output(5, context); bc::scatter(input.begin(), input.end(), map.begin(), output.begin()); CHECK_RANGE_EQUAL(int, 5, output, (1, 3, 5, 4, 2)); } BOOST_AUTO_TEST_CASE(scatter_constant_indices) { int input_data[] = { 1, 2, 3, 4, 5 }; bc::vector input(input_data, input_data + 5, queue); int map_data[] = { 0, 4, 1, 3, 2 }; bc::buffer map_buffer(context, 5 * sizeof(int), bc::buffer::read_only | bc::buffer::use_host_ptr, map_data); bc::vector output(5, context); bc::scatter(input.begin(), input.end(), bc::make_constant_buffer_iterator(map_buffer, 0), output.begin(), queue); CHECK_RANGE_EQUAL(int, 5, output, (1, 3, 5, 4, 2)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_scatter_if.cpp000066400000000000000000000105151263566244600177170ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Pola // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestScatterIf #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(scatter_if_int) { int input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; bc::vector input(input_data, input_data + 10, queue); int map_data[] = {9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; bc::vector map(map_data, map_data + 10, queue); int stencil_data[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; bc::vector stencil(stencil_data, stencil_data + 10, queue); bc::vector output(input.size(), -1, queue); bc::scatter_if(input.begin(), input.end(), map.begin(), stencil.begin(), output.begin()); CHECK_RANGE_EQUAL(int, 10, output, (9, -1, 7, -1, 5, -1, 3, -1, 1, -1) ); } BOOST_AUTO_TEST_CASE(scatter_if_constant_indices) { int input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; bc::vector input(input_data, input_data + 10, queue); int map_data[] = {9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; bc::buffer map_buffer(context, 10 * sizeof(int), bc::buffer::read_only | bc::buffer::use_host_ptr, map_data); int stencil_data[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; bc::buffer stencil_buffer(context, 10 * sizeof(int), bc::buffer::read_only | bc::buffer::use_host_ptr, stencil_data); bc::vector output(input.size(), -1, queue); bc::scatter_if(input.begin(), input.end(), bc::make_constant_buffer_iterator(map_buffer, 0), bc::make_constant_buffer_iterator(stencil_buffer, 0), output.begin(), queue); CHECK_RANGE_EQUAL(int, 10, output, (9, -1, 7, -1, 5, -1, 3, -1, 1, -1) ); } BOOST_AUTO_TEST_CASE(scatter_if_function) { int input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; bc::vector input(input_data, input_data + 10, queue); int map_data[] = {9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; bc::vector map(map_data, map_data + 10, queue); int stencil_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; bc::vector stencil(stencil_data, stencil_data + 10, queue); bc::vector output(input.size(), -1, queue); BOOST_COMPUTE_FUNCTION(int, gt_than_5, (int x), { if (x > 5) return true; else return false; }); bc::scatter_if(input.begin(), input.end(), map.begin(), stencil.begin(), output.begin(), gt_than_5, queue); CHECK_RANGE_EQUAL(int, 10, output, (9, 8, 7, 6, -1, -1, -1, -1, -1, -1) ); } BOOST_AUTO_TEST_CASE(scatter_if_counting_iterator) { int input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; bc::vector input(input_data, input_data + 10, queue); int map_data[] = {9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; bc::vector map(map_data, map_data + 10, queue); bc::vector output(input.size(), -1, queue); BOOST_COMPUTE_FUNCTION(int, gt_than_5, (int x), { if (x > 5) return true; else return false; }); bc::scatter_if(input.begin(), input.end(), map.begin(), bc::make_counting_iterator(0), output.begin(), gt_than_5, queue); CHECK_RANGE_EQUAL(int, 10, output, (9, 8, 7, 6, -1, -1, -1, -1, -1, -1) ); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_search.cpp000066400000000000000000000041341263566244600170410ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestSearch #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(search_int) { int data[] = {1, 4, 2, 6, 3, 2, 6, 3, 4, 6, 6}; bc::vector vectort(data, data + 11, queue); int datap[] = {2, 6}; bc::vector vectorp(datap, datap + 2, queue); bc::vector::iterator iter = bc::search(vectort.begin(), vectort.end(), vectorp.begin(), vectorp.end(), queue); BOOST_CHECK(iter == vectort.begin() + 2); vectorp[1] = 9; iter = bc::search(vectort.begin(), vectort.end(), vectorp.begin(), vectorp.end(), queue); BOOST_CHECK(iter == vectort.begin() + 11); vectorp[0] = 6; vectorp[1] = 6; iter = bc::search(vectort.begin(), vectort.end(), vectorp.begin(), vectorp.end(), queue); BOOST_CHECK(iter == vectort.begin() + 9); } BOOST_AUTO_TEST_CASE(search_string) { char text[] = "sdabababacabskjabacab"; bc::vector vectort(text, text + 21, queue); char pattern[] = "aba"; bc::vector vectorp(pattern, pattern + 3, queue); bc::vector::iterator iter = bc::search(vectort.begin(), vectort.end(), vectorp.begin(), vectorp.end(), queue); BOOST_CHECK(iter == vectort.begin() + 2); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_search_n.cpp000066400000000000000000000032111263566244600173510ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestSearchN #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(search_int) { int data[] = {1, 2, 2, 2, 3, 2, 2, 2, 4, 6, 6}; bc::vector vectort(data, data + 11, queue); bc::vector::iterator iter = bc::search_n(vectort.begin(), vectort.end(), 3, 2, queue); BOOST_CHECK(iter == vectort.begin() + 1); iter = bc::search_n(vectort.begin(), vectort.end(), 5, 2, queue); BOOST_CHECK(iter == vectort.begin() + 11); iter = bc::search_n(vectort.begin(), vectort.end(), 2, 6, queue); BOOST_CHECK(iter == vectort.begin() + 9); } BOOST_AUTO_TEST_CASE(search_string) { char text[] = "asaaababaaca"; bc::vector vectort(text, text + 12, queue); bc::vector::iterator iter = bc::search_n(vectort.begin(), vectort.end(), 2, 'a', queue); BOOST_CHECK(iter == vectort.begin() + 2); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_set_difference.cpp000066400000000000000000000041471263566244600205450ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestSetDifference #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(set_difference_int) { int dataset1[] = {1, 1, 2, 2, 2, 2, 3, 3, 4, 5, 6, 10}; bc::vector set1(dataset1, dataset1 + 12, queue); int dataset2[] = {0, 2, 2, 4, 5, 6, 8, 8, 9, 9, 9, 13}; bc::vector set2(dataset2, dataset2 + 12, queue); bc::vectorresult(7, queue.get_context()); bc::vector::iterator iter = bc::set_difference(set1.begin(), set1.begin() + 12, set2.begin(), set2.begin() + 12, result.begin(), queue); CHECK_RANGE_EQUAL(int, 7, result, (1, 1, 2, 2, 3, 3, 10)); BOOST_VERIFY(iter == result.begin()+7); } BOOST_AUTO_TEST_CASE(set_difference_string) { char string1[] = "abcccdddeeff"; bc::vector set1(string1, string1 + 12, queue); char string2[] = "bccdfgh"; bc::vector set2(string2, string2 + 7, queue); bc::vectorresult(7, queue.get_context()); bc::vector::iterator iter = bc::set_difference(set1.begin(), set1.begin() + 12, set2.begin(), set2.begin() + 7, result.begin(), queue); CHECK_RANGE_EQUAL(char, 7, result, ('a', 'c', 'd', 'd', 'e', 'e', 'f')); BOOST_VERIFY(iter == result.begin()+7); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_set_intersection.cpp000066400000000000000000000041431263566244600211550ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestSetIntersection #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(set_intersection_int) { int dataset1[] = {1, 1, 2, 2, 2, 2, 3, 3, 4, 5, 6, 10}; bc::vector set1(dataset1, dataset1 + 12, queue); int dataset2[] = {0, 2, 2, 4, 5, 6, 8, 8, 9, 9, 9, 13}; bc::vector set2(dataset2, dataset2 + 12, queue); bc::vectorresult(10, queue.get_context()); bc::vector::iterator iter = bc::set_intersection(set1.begin(), set1.begin() + 12, set2.begin(), set2.begin() + 12, result.begin(), queue); CHECK_RANGE_EQUAL(int, 5, result, (2, 2, 4, 5, 6)); BOOST_VERIFY(iter == result.begin()+5); } BOOST_AUTO_TEST_CASE(set_intersection_string) { char string1[] = "abcccdddeeff"; bc::vector set1(string1, string1 + 12, queue); char string2[] = "bccdfgh"; bc::vector set2(string2, string2 + 7, queue); bc::vectorresult(5, queue.get_context()); bc::vector::iterator iter = bc::set_intersection(set1.begin(), set1.begin() + 12, set2.begin(), set2.begin() + 7, result.begin(), queue); CHECK_RANGE_EQUAL(char, 5, result, ('b', 'c', 'c', 'd', 'f')); BOOST_VERIFY(iter == result.begin()+5); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_set_symmetric_difference.cpp000066400000000000000000000043551263566244600226420ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestSetSymmetricDifference #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(set_symmetric_difference_int) { int dataset1[] = {1, 1, 2, 2, 2, 2, 3, 3, 4, 5, 6, 10}; bc::vector set1(dataset1, dataset1 + 12, queue); int dataset2[] = {0, 2, 2, 4, 5, 6, 8, 8, 9, 9, 9, 13}; bc::vector set2(dataset2, dataset2 + 12, queue); bc::vectorresult(14, queue.get_context()); bc::vector::iterator iter = bc::set_symmetric_difference(set1.begin(), set1.begin() + 12, set2.begin(), set2.begin() + 12, result.begin(), queue); CHECK_RANGE_EQUAL(int, 14, result, (0, 1, 1, 2, 2, 3, 3, 8, 8, 9, 9, 9, 10, 13)); BOOST_VERIFY(iter == result.begin()+14); } BOOST_AUTO_TEST_CASE(set_symmetric_difference_string) { char string1[] = "abcccdddeeff"; bc::vector set1(string1, string1 + 12, queue); char string2[] = "bccdfgh"; bc::vector set2(string2, string2 + 7, queue); bc::vectorresult(9, queue.get_context()); bc::vector::iterator iter = bc::set_symmetric_difference(set1.begin(), set1.begin() + 12, set2.begin(), set2.begin() + 7, result.begin(), queue); CHECK_RANGE_EQUAL(char, 9, result, ('a', 'c', 'd', 'd', 'e', 'e', 'f', 'g', 'h')); BOOST_VERIFY(iter == result.begin()+9); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_set_union.cpp000066400000000000000000000043471263566244600176050ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestSetUnion #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(set_union_int) { int dataset1[] = {1, 1, 2, 2, 2, 2, 3, 3, 4, 5, 6, 10}; bc::vector set1(dataset1, dataset1 + 12, queue); int dataset2[] = {0, 2, 2, 4, 5, 6, 8, 8, 9, 9, 9, 13}; bc::vector set2(dataset2, dataset2 + 12, queue); bc::vectorresult(19, queue.get_context()); bc::vector::iterator iter = bc::set_union(set1.begin(), set1.begin() + 12, set2.begin(), set2.begin() + 12, result.begin(), queue); CHECK_RANGE_EQUAL(int, 19, result, (0, 1, 1, 2, 2, 2, 2, 3, 3, 4, 5, 6, 8, 8, 9, 9, 9, 10, 13)); BOOST_VERIFY(iter == result.begin()+19); } BOOST_AUTO_TEST_CASE(set_union_string) { char string1[] = "abcccdddeeff"; bc::vector set1(string1, string1 + 12, queue); char string2[] = "bccdfgh"; bc::vector set2(string2, string2 + 7, queue); bc::vectorresult(14, queue.get_context()); bc::vector::iterator iter = bc::set_union(set1.begin(), set1.begin() + 12, set2.begin(), set2.begin() + 7, result.begin(), queue); CHECK_RANGE_EQUAL(char, 14, result, ('a', 'b', 'c', 'c', 'c', 'd', 'd', 'd', 'e', 'e', 'f', 'f', 'g', 'h')); BOOST_VERIFY(iter == result.begin()+14); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_sort.cpp000066400000000000000000000315641263566244600165720ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestSort #include #include #include #include #include #include struct Particle { Particle(): x(0.f), y(0.f) { } Particle(float _x, float _y): x(_x), y(_y) { } float x; float y; }; // adapt struct for OpenCL BOOST_COMPUTE_ADAPT_STRUCT(Particle, Particle, (x, y)) #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; // test trivial sorting of zero and one element vectors BOOST_AUTO_TEST_CASE(sort_int_0_and_1) { boost::compute::vector vec(context); BOOST_CHECK_EQUAL(vec.size(), size_t(0)); BOOST_CHECK(boost::compute::is_sorted(vec.begin(), vec.end(), queue) == true); boost::compute::sort(vec.begin(), vec.end(), queue); vec.push_back(11, queue); BOOST_CHECK_EQUAL(vec.size(), size_t(1)); BOOST_CHECK(boost::compute::is_sorted(vec.begin(), vec.end(), queue) == true); boost::compute::sort(vec.begin(), vec.end(), queue); } // test sorting of two element int vectors BOOST_AUTO_TEST_CASE(sort_int_2) { int data[] = { 4, 2 }; boost::compute::vector vec(data, data + 2, queue); // check that vec is unsorted BOOST_CHECK(boost::compute::is_sorted(vec.begin(), vec.end(), queue) == false); // sort vec boost::compute::sort(vec.begin(), vec.end(), queue); // check that vec is sorted BOOST_CHECK(boost::compute::is_sorted(vec.begin(), vec.end(), queue) == true); // sort already sorted vec and ensure it is still sorted boost::compute::sort(vec.begin(), vec.end()); BOOST_CHECK(boost::compute::is_sorted(vec.begin(), vec.end(), queue) == true); } BOOST_AUTO_TEST_CASE(sort_float_3) { float data[] = { 2.3f, 0.1f, 1.2f }; boost::compute::vector vec(data, data + 3, queue); boost::compute::sort(vec.begin(), vec.end(), queue); CHECK_RANGE_EQUAL(float, 3, vec, (0.1f, 1.2f, 2.3f)); } BOOST_AUTO_TEST_CASE(sort_char_vector) { using boost::compute::char_; char_ data[] = { 'c', 'a', '0', '7', 'B', 'F', '\0', '$' }; boost::compute::vector vector(data, data + 8, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(8)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::sort(vector.begin(), vector.end()); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL(char_, 8, vector, ('\0', '$', '0', '7', 'B', 'F', 'a', 'c')); } BOOST_AUTO_TEST_CASE(sort_uchar_vector) { using boost::compute::uchar_; uchar_ data[] = { 0x12, 0x00, 0xFF, 0xB4, 0x80, 0x32, 0x64, 0xA2 }; boost::compute::vector vector(data, data + 8, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(8)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::sort(vector.begin(), vector.end()); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL(uchar_, 8, vector, (0x00, 0x12, 0x32, 0x64, 0x80, 0xA2, 0xB4, 0xFF)); } BOOST_AUTO_TEST_CASE(sort_short_vector) { using boost::compute::short_; short_ data[] = { -4, 152, -94, 963, 31002, -456, 0, -2113 }; boost::compute::vector vector(data, data + 8, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(8)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::sort(vector.begin(), vector.end()); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL(short_, 8, vector, (-2113, -456, -94, -4, 0, 152, 963, 31002)); } BOOST_AUTO_TEST_CASE(sort_ushort_vector) { using boost::compute::ushort_; ushort_ data[] = { 4, 152, 94, 963, 63202, 34560, 0, 2113 }; boost::compute::vector vector(data, data + 8, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(8)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::sort(vector.begin(), vector.end()); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL(ushort_, 8, vector, (0, 4, 94, 152, 963, 2113, 34560, 63202)); } BOOST_AUTO_TEST_CASE(sort_int_vector) { int data[] = { -4, 152, -5000, 963, 75321, -456, 0, 1112 }; boost::compute::vector vector(data, data + 8, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(8)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::sort(vector.begin(), vector.end()); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL(int, 8, vector, (-5000, -456, -4, 0, 152, 963, 1112, 75321)); } BOOST_AUTO_TEST_CASE(sort_uint_vector) { using boost::compute::uint_; uint_ data[] = { 500, 1988, 123456, 562, 0, 4000000, 9852, 102030 }; boost::compute::vector vector(data, data + 8, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(8)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::sort(vector.begin(), vector.end()); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL(uint_, 8, vector, (0, 500, 562, 1988, 9852, 102030, 123456, 4000000)); } BOOST_AUTO_TEST_CASE(sort_long_vector) { using boost::compute::long_; long_ data[] = { 500, 1988, 123456, 562, 0, 4000000, 9852, 102030 }; boost::compute::vector vector(data, data + 8, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(8)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::sort(vector.begin(), vector.end()); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL(long_, 8, vector, (0, 500, 562, 1988, 9852, 102030, 123456, 4000000)); } BOOST_AUTO_TEST_CASE(sort_ulong_vector) { using boost::compute::ulong_; ulong_ data[] = { 500, 1988, 123456, 562, 0, 4000000, 9852, 102030 }; boost::compute::vector vector(data, data + 8, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(8)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::sort(vector.begin(), vector.end()); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end()) == true); CHECK_RANGE_EQUAL(ulong_, 8, vector, (0, 500, 562, 1988, 9852, 102030, 123456, 4000000)); } BOOST_AUTO_TEST_CASE(sort_float_vector) { float data[] = { -6023.0f, 152.5f, -63.0f, 1234567.0f, 11.2f, -5000.1f, 0.0f, 14.0f, -8.25f, -0.0f }; boost::compute::vector vector(data, data + 10, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(10)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::sort(vector.begin(), vector.end()); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL( float, 10, vector, (-6023.0f, -5000.1f, -63.0f, -8.25f, -0.0f, 0.0f, 11.2f, 14.0f, 152.5f, 1234567.0f) ); } BOOST_AUTO_TEST_CASE(sort_double_vector) { if(!device.supports_extension("cl_khr_fp64")){ std::cout << "skipping test: device does not support double" << std::endl; return; } double data[] = { -6023.0, 152.5, -63.0, 1234567.0, 11.2, -5000.1, 0.0, 14.0, -8.25, -0.0 }; boost::compute::vector vector(data, data + 10, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(10)); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == false); boost::compute::sort(vector.begin(), vector.end()); BOOST_CHECK(boost::compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL( double, 10, vector, (-6023.0, -5000.1, -63.0, -8.25, -0.0, 0.0, 11.2, 14.0, 152.5, 1234567.0) ); } BOOST_AUTO_TEST_CASE(reverse_sort_int_vector) { int data[] = { -4, 152, -5000, 963, 75321, -456, 0, 1112 }; boost::compute::vector vector(data, data + 8, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(8)); boost::compute::sort(vector.begin(), vector.end(), boost::compute::greater(), queue); CHECK_RANGE_EQUAL(int, 8, vector, (75321, 1112, 963, 152, 0, -4, -456, -5000)); } BOOST_AUTO_TEST_CASE(sort_vectors_by_length) { using boost::compute::float2_; using boost::compute::lambda::_1; using boost::compute::lambda::_2; float data[] = { 1.0f, 0.2f, 1.3f, 1.0f, 6.7f, 0.0f, 5.2f, 3.4f, 1.4f, 1.4f }; // create vector on device containing vectors boost::compute::vector vector( reinterpret_cast(data), reinterpret_cast(data) + 5, queue ); // sort vectors by length boost::compute::sort( vector.begin(), vector.end(), length(_1) < length(_2), queue ); // copy sorted values back to host boost::compute::copy( vector.begin(), vector.end(), reinterpret_cast(data), queue ); // check values BOOST_CHECK_EQUAL(data[0], 1.0f); BOOST_CHECK_EQUAL(data[1], 0.2f); BOOST_CHECK_EQUAL(data[2], 1.3f); BOOST_CHECK_EQUAL(data[3], 1.0f); BOOST_CHECK_EQUAL(data[4], 1.4f); BOOST_CHECK_EQUAL(data[5], 1.4f); BOOST_CHECK_EQUAL(data[6], 5.2f); BOOST_CHECK_EQUAL(data[7], 3.4f); BOOST_CHECK_EQUAL(data[8], 6.7f); BOOST_CHECK_EQUAL(data[9], 0.0f); } BOOST_AUTO_TEST_CASE(sort_host_vector) { int data[] = { 5, 2, 3, 6, 7, 4, 0, 1 }; std::vector vector(data, data + 8); boost::compute::sort(vector.begin(), vector.end(), queue); CHECK_RANGE_EQUAL(int, 8, vector, (0, 1, 2, 3, 4, 5, 6, 7)); } BOOST_AUTO_TEST_CASE(sort_custom_struct) { // function to compare particles by their x-coordinate BOOST_COMPUTE_FUNCTION(bool, sort_by_x, (Particle a, Particle b), { return a.x < b.x; }); std::vector particles; particles.push_back(Particle(0.1f, 0.f)); particles.push_back(Particle(-0.4f, 0.f)); particles.push_back(Particle(10.0f, 0.f)); particles.push_back(Particle(0.001f, 0.f)); boost::compute::vector vector(4, context); boost::compute::copy(particles.begin(), particles.end(), vector.begin(), queue); BOOST_CHECK_EQUAL(vector.size(), size_t(4)); BOOST_CHECK( boost::compute::is_sorted(vector.begin(), vector.end(), sort_by_x, queue) == false ); boost::compute::sort(vector.begin(), vector.end(), sort_by_x, queue); BOOST_CHECK( boost::compute::is_sorted(vector.begin(), vector.end(), sort_by_x, queue) == true ); boost::compute::copy(vector.begin(), vector.end(), particles.begin(), queue); BOOST_CHECK_CLOSE(particles[0].x, -0.4f, 0.1); BOOST_CHECK_CLOSE(particles[1].x, 0.001f, 0.1); BOOST_CHECK_CLOSE(particles[2].x, 0.1f, 0.1); BOOST_CHECK_CLOSE(particles[3].x, 10.0f, 0.1); } BOOST_AUTO_TEST_CASE(sort_int2) { using bc::int2_; BOOST_COMPUTE_FUNCTION(bool, sort_int2, (int2_ a, int2_ b), { return a.x < b.x; }); const size_t size = 100; std::vector host(size, int2_(0, 0)); host[0] = int2_(100.f, 0.f); host[size/4] = int2_(20.f, 0.f); host[(size*3)/4] = int2_(9.f, 0.f); host[size-3] = int2_(-10.0f, 0.f); boost::compute::vector vector(size, context); boost::compute::copy(host.begin(), host.end(), vector.begin(), queue); BOOST_CHECK_EQUAL(vector.size(), size); BOOST_CHECK( boost::compute::is_sorted(vector.begin(), vector.end(), sort_int2, queue) == false ); boost::compute::sort(vector.begin(), vector.end(), sort_int2, queue); BOOST_CHECK( boost::compute::is_sorted(vector.begin(), vector.end(), sort_int2, queue) == true ); boost::compute::copy(vector.begin(), vector.end(), host.begin(), queue); BOOST_CHECK_CLOSE(host[0][0], -10.f, 0.1); BOOST_CHECK_CLOSE(host[(size - 3)][0], 9.f, 0.1); BOOST_CHECK_CLOSE(host[(size - 2)][0], 20.f, 0.1); BOOST_CHECK_CLOSE(host[(size - 1)][0], 100.f, 0.1); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_sort_by_key.cpp000066400000000000000000000074001263566244600201240ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestSortByKey #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; // test trivial sorting of zero element vectors BOOST_AUTO_TEST_CASE(sort_int_0) { compute::vector keys(context); compute::vector values(context); BOOST_CHECK_EQUAL(keys.size(), size_t(0)); BOOST_CHECK_EQUAL(values.size(), size_t(0)); BOOST_CHECK(compute::is_sorted(keys.begin(), keys.end()) == true); BOOST_CHECK(compute::is_sorted(values.begin(), values.end()) == true); compute::sort_by_key(keys.begin(), keys.end(), values.begin(), queue); } // test trivial sorting of one element vectors BOOST_AUTO_TEST_CASE(sort_int_1) { int keys_data[] = { 11 }; int values_data[] = { 100 }; compute::vector keys(keys_data, keys_data + 1, queue); compute::vector values(values_data, values_data + 1, queue); BOOST_CHECK(compute::is_sorted(keys.begin(), keys.end(), queue) == true); BOOST_CHECK(compute::is_sorted(values.begin(), values.end(), queue) == true); compute::sort_by_key(keys.begin(), keys.end(), values.begin(), queue); } // test trivial sorting of two element vectors BOOST_AUTO_TEST_CASE(sort_int_2) { int keys_data[] = { 4, 2 }; int values_data[] = { 42, 24 }; compute::vector keys(keys_data, keys_data + 2, queue); compute::vector values(values_data, values_data + 2, queue); BOOST_CHECK(compute::is_sorted(keys.begin(), keys.end(), queue) == false); BOOST_CHECK(compute::is_sorted(values.begin(), values.end(), queue) == false); compute::sort_by_key(keys.begin(), keys.end(), values.begin(), queue); BOOST_CHECK(compute::is_sorted(keys.begin(), keys.end(), queue) == true); BOOST_CHECK(compute::is_sorted(values.begin(), values.end(), queue) == true); } BOOST_AUTO_TEST_CASE(sort_char_by_int) { int keys_data[] = { 6, 2, 1, 3, 4, 7, 5, 0 }; char values_data[] = { 'g', 'c', 'b', 'd', 'e', 'h', 'f', 'a' }; compute::vector keys(keys_data, keys_data + 8, queue); compute::vector values(values_data, values_data + 8, queue); compute::sort_by_key(keys.begin(), keys.end(), values.begin(), queue); CHECK_RANGE_EQUAL(int, 8, keys, (0, 1, 2, 3, 4, 5, 6, 7)); CHECK_RANGE_EQUAL(char, 8, values, ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h')); } BOOST_AUTO_TEST_CASE(sort_int_and_float) { int n = 1024; std::vector host_keys(n); std::vector host_values(n); for(int i = 0; i < n; i++){ host_keys[i] = n - i; host_values[i] = (n - i) / 2.f; } compute::vector keys(host_keys.begin(), host_keys.end(), queue); compute::vector values(host_values.begin(), host_values.end(), queue); BOOST_CHECK(compute::is_sorted(keys.begin(), keys.end(), queue) == false); BOOST_CHECK(compute::is_sorted(values.begin(), values.end(), queue) == false); compute::sort_by_key(keys.begin(), keys.end(), values.begin(), queue); BOOST_CHECK(compute::is_sorted(keys.begin(), keys.end(), queue) == true); BOOST_CHECK(compute::is_sorted(values.begin(), values.end(), queue) == true); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_sort_by_transform.cpp000066400000000000000000000062761263566244600213610ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestSortByTransform #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(sort_int_by_abs) { int data[] = { 1, -2, 4, -3, 0, 5, -8, -9 }; compute::vector vector(data, data + 8, queue); compute::experimental::sort_by_transform( vector.begin(), vector.end(), compute::abs(), compute::less(), queue ); CHECK_RANGE_EQUAL(int, 8, vector, (0, 1, -2, -3, 4, 5, -8, -9)); } BOOST_AUTO_TEST_CASE(sort_vectors_by_length) { using compute::float4_; float data[] = { 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 3.0f, 2.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.5f, 0.0f }; compute::vector vector(4, context); compute::copy_n( reinterpret_cast(data), 4, vector.begin(), queue ); compute::experimental::sort_by_transform( vector.begin(), vector.end(), compute::length(), compute::less(), queue ); std::vector host_vector(4); compute::copy( vector.begin(), vector.end(), host_vector.begin(), queue ); BOOST_CHECK_EQUAL(host_vector[0], float4_(0.0f, 0.0f, 0.5f, 0.0f)); BOOST_CHECK_EQUAL(host_vector[1], float4_(1.0f, 0.0f, 0.0f, 0.0f)); BOOST_CHECK_EQUAL(host_vector[2], float4_(0.0f, 1.0f, 1.0f, 0.0f)); BOOST_CHECK_EQUAL(host_vector[3], float4_(3.0f, 2.0f, 1.0f, 0.0f)); } BOOST_AUTO_TEST_CASE(sort_vectors_by_component) { using compute::float4_; float data[] = { 1.0f, 2.0f, 3.0f, 0.0f, 9.0f, 8.0f, 7.0f, 0.0f, 4.0f, 5.0f, 6.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }; compute::vector vector(4, context); compute::copy_n( reinterpret_cast(data), 4, vector.begin(), queue ); // sort by y-component compute::experimental::sort_by_transform( vector.begin(), vector.end(), compute::get<1>(), compute::less(), queue ); std::vector host_vector(4); compute::copy( vector.begin(), vector.end(), host_vector.begin(), queue ); BOOST_CHECK_EQUAL(host_vector[0], float4_(0.0f, 0.0f, 0.0f, 0.0f)); BOOST_CHECK_EQUAL(host_vector[1], float4_(1.0f, 2.0f, 3.0f, 0.0f)); BOOST_CHECK_EQUAL(host_vector[2], float4_(4.0f, 5.0f, 6.0f, 0.0f)); BOOST_CHECK_EQUAL(host_vector[3], float4_(9.0f, 8.0f, 7.0f, 0.0f)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_stable_partition.cpp000066400000000000000000000024271263566244600211420ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestStablePartition #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(partition_int) { int dataset[] = {1, 1, -2, 0, 5, -1, 2, 4, 0, -1}; bc::vector vector(dataset, dataset + 10, queue); bc::vector::iterator iter = bc::stable_partition(vector.begin(), vector.begin() + 10, bc::_1 > 0, queue); CHECK_RANGE_EQUAL(int, 10, vector, (1, 1, 5, 2, 4, -2, 0, -1, 0, -1)); BOOST_VERIFY(iter == vector.begin()+5); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_stable_sort.cpp000066400000000000000000000064111263566244600201150ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestStableSort #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(sort_int_vector) { int data[] = { -4, 152, -5000, 963, 75321, -456, 0, 1112 }; compute::vector vector(data, data + 8, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(8)); BOOST_CHECK(compute::is_sorted(vector.begin(), vector.end(), queue) == false); compute::stable_sort(vector.begin(), vector.end(), queue); BOOST_CHECK(compute::is_sorted(vector.begin(), vector.end(), queue) == true); CHECK_RANGE_EQUAL(int, 8, vector, (-5000, -456, -4, 0, 152, 963, 1112, 75321)); // sort reversed compute::stable_sort(vector.begin(), vector.end(), compute::greater(), queue); CHECK_RANGE_EQUAL(int, 8, vector, (75321, 1112, 963, 152, 0, -4, -456, -5000)); } BOOST_AUTO_TEST_CASE(sort_int2) { using compute::int2_; // device vector of int2's compute::vector vec(context); vec.push_back(int2_(2, 1), queue); vec.push_back(int2_(2, 2), queue); vec.push_back(int2_(1, 2), queue); vec.push_back(int2_(1, 1), queue); // function comparing the first component of each int2 BOOST_COMPUTE_FUNCTION(bool, compare_first, (int2_ a, int2_ b), { return a.x < b.x; }); // ensure vector is not sorted BOOST_CHECK(compute::is_sorted(vec.begin(), vec.end(), compare_first, queue) == false); // sort elements based on their first component compute::stable_sort(vec.begin(), vec.end(), compare_first, queue); // ensure vector is now sorted BOOST_CHECK(compute::is_sorted(vec.begin(), vec.end(), compare_first, queue) == true); // check sorted vector order std::vector result(vec.size()); compute::copy(vec.begin(), vec.end(), result.begin(), queue); BOOST_CHECK_EQUAL(result[0], int2_(1, 2)); BOOST_CHECK_EQUAL(result[1], int2_(1, 1)); BOOST_CHECK_EQUAL(result[2], int2_(2, 1)); BOOST_CHECK_EQUAL(result[3], int2_(2, 2)); // function comparing the second component of each int2 BOOST_COMPUTE_FUNCTION(bool, compare_second, (int2_ a, int2_ b), { return a.y < b.y; }); // sort elements based on their second component compute::stable_sort(vec.begin(), vec.end(), compare_second, queue); // check sorted vector order compute::copy(vec.begin(), vec.end(), result.begin(), queue); BOOST_CHECK_EQUAL(result[0], int2_(1, 1)); BOOST_CHECK_EQUAL(result[1], int2_(2, 1)); BOOST_CHECK_EQUAL(result[2], int2_(1, 2)); BOOST_CHECK_EQUAL(result[3], int2_(2, 2)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_stack.cpp000066400000000000000000000025011263566244600166750ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestStack #include #include #include "context_setup.hpp" namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(size) { bc::stack stack; BOOST_CHECK_EQUAL(stack.size(), size_t(0)); stack.push(1); stack.push(2); stack.push(3); BOOST_CHECK_EQUAL(stack.size(), size_t(3)); } BOOST_AUTO_TEST_CASE(push_and_pop) { bc::stack stack; stack.push(1); stack.push(2); stack.push(3); BOOST_CHECK_EQUAL(stack.top(), 3); BOOST_CHECK_EQUAL(stack.size(), size_t(3)); stack.pop(); BOOST_CHECK_EQUAL(stack.top(), 2); BOOST_CHECK_EQUAL(stack.size(), size_t(2)); stack.pop(); BOOST_CHECK_EQUAL(stack.top(), 1); BOOST_CHECK_EQUAL(stack.size(), size_t(1)); stack.pop(); BOOST_CHECK_EQUAL(stack.size(), size_t(0)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_strided_iterator.cpp000066400000000000000000000116661263566244600211530ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2015 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestStridedIterator #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(value_type) { BOOST_STATIC_ASSERT(( boost::is_same< boost::compute::strided_iterator< boost::compute::buffer_iterator >::value_type, int >::value )); BOOST_STATIC_ASSERT(( boost::is_same< boost::compute::strided_iterator< boost::compute::buffer_iterator >::value_type, float >::value )); } BOOST_AUTO_TEST_CASE(base_type) { BOOST_STATIC_ASSERT(( boost::is_same< boost::compute::strided_iterator< boost::compute::buffer_iterator >::base_type, boost::compute::buffer_iterator >::value )); } BOOST_AUTO_TEST_CASE(distance) { int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; boost::compute::vector vec(data, data + 8, queue); BOOST_CHECK_EQUAL( std::distance( boost::compute::make_strided_iterator(vec.begin(), 1), boost::compute::make_strided_iterator(vec.end(), 1) ), std::ptrdiff_t(8) ); BOOST_CHECK_EQUAL( std::distance( boost::compute::make_strided_iterator(vec.begin(), 2), boost::compute::make_strided_iterator(vec.end(), 2) ), std::ptrdiff_t(4) ); BOOST_CHECK_EQUAL( std::distance( boost::compute::make_strided_iterator(vec.begin(), 3), boost::compute::make_strided_iterator(vec.begin()+6, 3) ), std::ptrdiff_t(2) ); } BOOST_AUTO_TEST_CASE(copy) { int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; boost::compute::vector vec(data, data + 8, queue); boost::compute::vector result(4, context); // copy every other element to result boost::compute::copy( boost::compute::make_strided_iterator(vec.begin(), 2), boost::compute::make_strided_iterator(vec.end(), 2), result.begin(), queue ); CHECK_RANGE_EQUAL(int, 4, result, (1, 3, 5, 7)); // copy every 3rd element to result boost::compute::copy( boost::compute::make_strided_iterator(vec.begin(), 3), boost::compute::make_strided_iterator(vec.begin()+9, 3), result.begin(), queue ); CHECK_RANGE_EQUAL(int, 3, result, (1, 4, 7)); } BOOST_AUTO_TEST_CASE(make_strided_iterator_end) { int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; boost::compute::vector vec(data, data + 8, queue); // stride equals 3 boost::compute::strided_iterator::iterator> end = boost::compute::make_strided_iterator_end(vec.begin(), vec.end(), 3); // end should be vec.begin() + 9 which is one step after last element // accessible through strided_iterator, i.e. vec.begin()+6 BOOST_CHECK(boost::compute::make_strided_iterator(vec.begin()+9, 3) == end); // stride equals 2 end = boost::compute::make_strided_iterator_end(vec.begin(), vec.end(), 2); // end should be vec.end(), because vector size is divisible by 2 BOOST_CHECK(boost::compute::make_strided_iterator(vec.end(), 2) == end); // stride equals 1000 end = boost::compute::make_strided_iterator_end(vec.begin(), vec.end(), 1000); // end should be vec.begin() + 1000, because stride > vector size BOOST_CHECK(boost::compute::make_strided_iterator(vec.begin()+1000, 1000) == end); // test boost::compute::make_strided_iterator_end with copy(..) boost::compute::vector result(4, context); // copy every other element to result boost::compute::copy( boost::compute::make_strided_iterator(vec.begin()+1, 2), boost::compute::make_strided_iterator_end(vec.begin()+1, vec.end(), 2), result.begin(), queue ); CHECK_RANGE_EQUAL(int, 4, result, (2, 4, 6, 8)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_string.cpp000066400000000000000000000043741263566244600171100ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestString #include #include #include #include #include "context_setup.hpp" #include "check_macros.hpp" using boost::test_tools::output_test_stream; BOOST_AUTO_TEST_CASE(empty) { boost::compute::string str; BOOST_VERIFY(str.empty()); } BOOST_AUTO_TEST_CASE(swap) { boost::compute::string str1 = "compute"; boost::compute::string str2 = "boost"; BOOST_VERIFY(!str2.empty()); BOOST_VERIFY(!str2.empty()); str1.swap(str2); CHECK_STRING_EQUAL(str1, "boost"); CHECK_STRING_EQUAL(str2, "compute"); str1.clear(); str1.swap(str2); CHECK_STRING_EQUAL(str1, "compute"); CHECK_STRING_EQUAL(str2, ""); str2.swap(str1); CHECK_STRING_EQUAL(str1, ""); CHECK_STRING_EQUAL(str2, "compute"); str1.swap(str1); CHECK_STRING_EQUAL(str1, ""); } BOOST_AUTO_TEST_CASE(size) { boost::compute::string str = "string"; BOOST_VERIFY(!str.empty()); BOOST_CHECK_EQUAL(str.size(), size_t(6)); BOOST_CHECK_EQUAL(str.length(), size_t(6)); } BOOST_AUTO_TEST_CASE(find_doctest) { //! [string_find] boost::compute::string str = "boost::compute::string"; int pos = str.find("::"); //! [string_find] boost::compute::string pattern = "string"; BOOST_VERIFY(!str.empty()); BOOST_CHECK_EQUAL(str.find('o'), 1); BOOST_CHECK_NE(str.find('o'), 2); BOOST_CHECK_EQUAL(str.find(pattern), 16); BOOST_CHECK_EQUAL(pos, 5); BOOST_CHECK_EQUAL(str.find("@#$"), size_t(-1)); } BOOST_AUTO_TEST_CASE(outStream) { output_test_stream output; boost::compute::string str = "string"; output< // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #include #define BOOST_TEST_MODULE TestStruct #include #include #include #include #include #include #include #include #include #include namespace compute = boost::compute; // example code defining an atom class namespace chemistry { struct Atom { Atom(float _x, float _y, float _z, int _number) : x(_x), y(_y), z(_z), number(_number) { } float x; float y; float z; int number; }; } // end chemistry namespace // adapt the chemistry::Atom class BOOST_COMPUTE_ADAPT_STRUCT(chemistry::Atom, Atom, (x, y, z, number)) struct StructWithArray { int value; int array[3]; }; BOOST_COMPUTE_ADAPT_STRUCT(StructWithArray, StructWithArray, (value, array)) #include "check_macros.hpp" #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(atom_type_name) { BOOST_CHECK(std::strcmp(compute::type_name(), "Atom") == 0); } BOOST_AUTO_TEST_CASE(atom_struct) { std::vector atoms; atoms.push_back(chemistry::Atom(1.f, 0.f, 0.f, 1)); atoms.push_back(chemistry::Atom(0.f, 1.f, 0.f, 1)); atoms.push_back(chemistry::Atom(0.f, 0.f, 0.f, 8)); compute::vector vec(atoms.size(), context); compute::copy(atoms.begin(), atoms.end(), vec.begin(), queue); // find the oxygen atom BOOST_COMPUTE_FUNCTION(bool, is_oxygen, (chemistry::Atom atom), { return atom.number == 8; }); compute::vector::iterator iter = compute::find_if(vec.begin(), vec.end(), is_oxygen, queue); BOOST_CHECK(iter == vec.begin() + 2); // copy the atomic numbers to another vector compute::vector atomic_numbers(vec.size(), context); compute::transform( vec.begin(), vec.end(), atomic_numbers.begin(), compute::field("number"), queue ); CHECK_RANGE_EQUAL(int, 3, atomic_numbers, (1, 1, 8)); } BOOST_AUTO_TEST_CASE(custom_kernel) { std::vector data; data.push_back(chemistry::Atom(1.f, 0.f, 0.f, 1)); data.push_back(chemistry::Atom(0.f, 1.f, 0.f, 1)); data.push_back(chemistry::Atom(0.f, 0.f, 0.f, 8)); compute::vector atoms(data.size(), context); compute::copy(data.begin(), data.end(), atoms.begin(), queue); std::string source = BOOST_COMPUTE_STRINGIZE_SOURCE( __kernel void custom_kernel(__global const Atom *atoms, __global float *distances) { const uint i = get_global_id(0); const __global Atom *atom = &atoms[i]; const float4 center = { 0, 0, 0, 0 }; const float4 position = { atom->x, atom->y, atom->z, 0 }; distances[i] = distance(position, center); } ); // add type definition for Atom to the start of the program source source = compute::type_definition() + "\n" + source; compute::program program = compute::program::build_with_source(source, context); compute::vector distances(atoms.size(), context); compute::kernel custom_kernel = program.create_kernel("custom_kernel"); custom_kernel.set_arg(0, atoms); custom_kernel.set_arg(1, distances); queue.enqueue_1d_range_kernel(custom_kernel, 0, atoms.size(), 1); } // Creates a StructWithArray containing 'x', 'y', 'z'. StructWithArray make_struct_with_array(int x, int y, int z) { StructWithArray s; s.value = 0; s.array[0] = x; s.array[1] = y; s.array[2] = z; return s; } BOOST_AUTO_TEST_CASE(struct_with_array) { compute::vector structs(context); structs.push_back(make_struct_with_array(1, 2, 3), queue); structs.push_back(make_struct_with_array(4, 5, 6), queue); structs.push_back(make_struct_with_array(7, 8, 9), queue); BOOST_COMPUTE_FUNCTION(int, sum_array, (StructWithArray x), { return x.array[0] + x.array[1] + x.array[2]; }); compute::vector results(structs.size(), context); compute::transform( structs.begin(), structs.end(), results.begin(), sum_array, queue ); CHECK_RANGE_EQUAL(int, 3, results, (6, 15, 24)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_svm_ptr.cpp000066400000000000000000000063741263566244600172760ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestSvmPtr #include #include #include #include #include #include "quirks.hpp" #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(empty) { } #ifdef CL_VERSION_2_0 BOOST_AUTO_TEST_CASE(alloc) { REQUIRES_OPENCL_VERSION(2, 0); compute::svm_ptr ptr = compute::svm_alloc(context, 8); compute::svm_free(context, ptr); } BOOST_AUTO_TEST_CASE(svmmemcpy) { REQUIRES_OPENCL_VERSION(2, 0); if(bug_in_svmmemcpy(device)){ std::cerr << "skipping svmmemcpy test case" << std::endl; return; } cl_int input[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; cl_int output[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; compute::svm_ptr ptr = compute::svm_alloc(context, 8); compute::svm_ptr ptr2 = compute::svm_alloc(context, 8); // copying from and to host mem queue.enqueue_svm_memcpy(ptr.get(), input, 8 * sizeof(cl_int)); queue.enqueue_svm_memcpy(output, ptr.get(), 8 * sizeof(cl_int)); queue.finish(); CHECK_HOST_RANGE_EQUAL(cl_int, 8, output, (1, 2, 3, 4, 5, 6, 7, 8)); // copying between svm mem queue.enqueue_svm_memcpy(ptr2.get(), ptr.get(), 8 * sizeof(cl_int)); queue.enqueue_svm_memcpy(output, ptr2.get(), 8 * sizeof(cl_int)); queue.finish(); CHECK_HOST_RANGE_EQUAL(cl_int, 8, output, (1, 2, 3, 4, 5, 6, 7, 8)); compute::svm_free(context, ptr); compute::svm_free(context, ptr2); } BOOST_AUTO_TEST_CASE(sum_svm_kernel) { REQUIRES_OPENCL_VERSION(2, 0); const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( __kernel void sum_svm_mem(__global const int *ptr, __global int *result) { int sum = 0; for(uint i = 0; i < 8; i++){ sum += ptr[i]; } *result = sum; } ); compute::program program = compute::program::build_with_source(source, context, "-cl-std=CL2.0"); compute::kernel sum_svm_mem_kernel = program.create_kernel("sum_svm_mem"); cl_int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; compute::svm_ptr ptr = compute::svm_alloc(context, 8); queue.enqueue_svm_map(ptr.get(), 8 * sizeof(cl_int), CL_MAP_WRITE); for(size_t i = 0; i < 8; i ++) { static_cast(ptr.get())[i] = data[i]; } queue.enqueue_svm_unmap(ptr.get()); compute::vector result(1, context); sum_svm_mem_kernel.set_arg(0, ptr); sum_svm_mem_kernel.set_arg(1, result); queue.enqueue_task(sum_svm_mem_kernel); queue.finish(); BOOST_CHECK_EQUAL(result[0], (36)); compute::svm_free(context, ptr); } #endif // CL_VERSION_2_0 BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_system.cpp000066400000000000000000000022701263566244600171170ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestSystem #include #include #include BOOST_AUTO_TEST_CASE(platform_count) { BOOST_CHECK(boost::compute::system::platform_count() >= 1); } BOOST_AUTO_TEST_CASE(device_count) { BOOST_CHECK(boost::compute::system::device_count() >= 1); } BOOST_AUTO_TEST_CASE(default_device) { boost::compute::device device = boost::compute::system::default_device(); BOOST_CHECK(device.id() != cl_device_id()); } BOOST_AUTO_TEST_CASE(find_device) { boost::compute::device device = boost::compute::system::default_device(); const std::string &name = device.name(); BOOST_CHECK(boost::compute::system::find_device(name).name() == device.name()); } compute-0.5/test/test_tabulate.cpp000066400000000000000000000022561263566244600174000ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestTabulate #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(tabulate_negative_int) { BOOST_COMPUTE_FUNCTION(int, negate, (int x), { return -x; }); compute::vector vector(10, context); compute::experimental::tabulate(vector.begin(), vector.end(), negate, queue); CHECK_RANGE_EQUAL(int, 10, vector, (0, -1, -2, -3, -4, -5, -6, -7, -8, -9)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_threefry_engine.cpp000066400000000000000000000033451263566244600207540ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Muhammad Junaid Muzammil // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://kylelutz.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestThreefry #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(generate_uint) { using boost::compute::uint_; boost::compute::threefry_engine<> rng(queue); boost::compute::vector vector_ctr(20, context); uint32_t ctr[20]; for(int i = 0; i < 10; i++) { ctr[i*2] = i; ctr[i*2+1] = 0; } boost::compute::copy(ctr, ctr+20, vector_ctr.begin(), queue); rng.generate(vector_ctr.begin(), vector_ctr.end(), queue); CHECK_RANGE_EQUAL( uint_, 20, vector_ctr, (uint_(0x6b200159), uint_(0x99ba4efe), uint_(0x508efb2c), uint_(0xc0de3f32), uint_(0x64a626ec), uint_(0xfc15e573), uint_(0xb8abc4d1), uint_(0x537eb86), uint_(0xac6dc2bb), uint_(0xa7adb3c3), uint_(0x5641e094), uint_(0xe4ab4fd), uint_(0xa53c1ce9), uint_(0xabcf1dba), uint_(0x2677a25a), uint_(0x76cf5efc), uint_(0x2d08247f), uint_(0x815480f1), uint_(0x2d1fa53a), uint_(0xdfe8514c)) ); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_transform.cpp000066400000000000000000000227471263566244600176210ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestTransform #include #include #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(transform_int_abs) { int data[] = { 1, -2, -3, -4, 5 }; bc::vector vector(data, data + 5, queue); CHECK_RANGE_EQUAL(int, 5, vector, (1, -2, -3, -4, 5)); bc::transform(vector.begin(), vector.end(), vector.begin(), bc::abs(), queue); CHECK_RANGE_EQUAL(int, 5, vector, (1, 2, 3, 4, 5)); } BOOST_AUTO_TEST_CASE(transform_float_sqrt) { float data[] = { 1.0f, 4.0f, 9.0f, 16.0f }; bc::vector vector(data, data + 4, queue); CHECK_RANGE_EQUAL(float, 4, vector, (1.0f, 4.0f, 9.0f, 16.0f)); bc::transform(vector.begin(), vector.end(), vector.begin(), bc::sqrt(), queue); queue.finish(); BOOST_CHECK_CLOSE(float(vector[0]), 1.0f, 1e-4f); BOOST_CHECK_CLOSE(float(vector[1]), 2.0f, 1e-4f); BOOST_CHECK_CLOSE(float(vector[2]), 3.0f, 1e-4f); BOOST_CHECK_CLOSE(float(vector[3]), 4.0f, 1e-4f); } BOOST_AUTO_TEST_CASE(transform_float_clamp) { float data[] = { 10.f, 20.f, 30.f, 40.f, 50.f }; bc::vector vector(data, data + 5, queue); CHECK_RANGE_EQUAL(float, 5, vector, (10.0f, 20.0f, 30.0f, 40.0f, 50.0f)); bc::transform(vector.begin(), vector.end(), vector.begin(), clamp(bc::_1, 15.f, 45.f), queue); CHECK_RANGE_EQUAL(float, 5, vector, (15.0f, 20.0f, 30.0f, 40.0f, 45.0f)); } BOOST_AUTO_TEST_CASE(transform_add_int) { int data1[] = { 1, 2, 3, 4 }; bc::vector input1(data1, data1 + 4, queue); int data2[] = { 10, 20, 30, 40 }; bc::vector input2(data2, data2 + 4, queue); bc::vector output(4, context); bc::transform(input1.begin(), input1.end(), input2.begin(), output.begin(), bc::plus(), queue); CHECK_RANGE_EQUAL(int, 4, output, (11, 22, 33, 44)); bc::transform(input1.begin(), input1.end(), input2.begin(), output.begin(), bc::multiplies(), queue); CHECK_RANGE_EQUAL(int, 4, output, (10, 40, 90, 160)); } BOOST_AUTO_TEST_CASE(transform_pow4) { float data[] = { 1.0f, 2.0f, 3.0f, 4.0f }; bc::vector vector(data, data + 4, queue); CHECK_RANGE_EQUAL(float, 4, vector, (1.0f, 2.0f, 3.0f, 4.0f)); bc::vector result(4, context); bc::transform(vector.begin(), vector.end(), result.begin(), pown(bc::_1, 4), queue); queue.finish(); BOOST_CHECK_CLOSE(float(result[0]), 1.0f, 1e-4f); BOOST_CHECK_CLOSE(float(result[1]), 16.0f, 1e-4f); BOOST_CHECK_CLOSE(float(result[2]), 81.0f, 1e-4f); BOOST_CHECK_CLOSE(float(result[3]), 256.0f, 1e-4f); } BOOST_AUTO_TEST_CASE(transform_custom_function) { float data[] = { 9.0f, 7.0f, 5.0f, 3.0f }; bc::vector vector(data, data + 4, queue); BOOST_COMPUTE_FUNCTION(float, pow3add4, (float x), { return pow(x, 3.0f) + 4.0f; }); bc::vector result(4, context); bc::transform(vector.begin(), vector.end(), result.begin(), pow3add4, queue); queue.finish(); BOOST_CHECK_CLOSE(float(result[0]), 733.0f, 1e-4f); BOOST_CHECK_CLOSE(float(result[1]), 347.0f, 1e-4f); BOOST_CHECK_CLOSE(float(result[2]), 129.0f, 1e-4f); BOOST_CHECK_CLOSE(float(result[3]), 31.0f, 1e-4f); } BOOST_AUTO_TEST_CASE(extract_vector_component) { using bc::int2_; int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; bc::vector vector( reinterpret_cast(data), reinterpret_cast(data) + 4, queue ); CHECK_RANGE_EQUAL( int2_, 4, vector, (int2_(1, 2), int2_(3, 4), int2_(5, 6), int2_(7, 8)) ); bc::vector x_components(4, context); bc::transform(vector.begin(), vector.end(), x_components.begin(), bc::get<0>(), queue); CHECK_RANGE_EQUAL(int, 4, x_components, (1, 3, 5, 7)); bc::vector y_components(4, context); bc::transform(vector.begin(), vector.end(), y_components.begin(), bc::get<1>(), queue); CHECK_RANGE_EQUAL(int, 4, y_components, (2, 4, 6, 8)); } BOOST_AUTO_TEST_CASE(transform_pinned_vector) { int data[] = { 2, -3, 4, -5, 6, -7 }; std::vector vector(data, data + 6); bc::buffer buffer(context, vector.size() * sizeof(int), bc::buffer::read_write | bc::buffer::use_host_ptr, &vector[0]); bc::transform(bc::make_buffer_iterator(buffer, 0), bc::make_buffer_iterator(buffer, 6), bc::make_buffer_iterator(buffer, 0), bc::abs(), queue); void *ptr = queue.enqueue_map_buffer(buffer, bc::command_queue::map_read, 0, buffer.size()); BOOST_VERIFY(ptr == &vector[0]); BOOST_CHECK_EQUAL(vector[0], 2); BOOST_CHECK_EQUAL(vector[1], 3); BOOST_CHECK_EQUAL(vector[2], 4); BOOST_CHECK_EQUAL(vector[3], 5); BOOST_CHECK_EQUAL(vector[4], 6); BOOST_CHECK_EQUAL(vector[5], 7); queue.enqueue_unmap_buffer(buffer, ptr); } BOOST_AUTO_TEST_CASE(transform_popcount) { using boost::compute::uint_; uint_ data[] = { 0, 1, 2, 3, 4, 45, 127, 5000, 789, 15963 }; bc::vector input(data, data + 10, queue); bc::vector output(input.size(), context); bc::transform( input.begin(), input.end(), output.begin(), bc::popcount(), queue ); CHECK_RANGE_EQUAL(uint_, 10, output, (0, 1, 1, 2, 1, 4, 7, 5, 5, 10)); } // generates the first 25 fibonacci numbers in parallel using the // rounding-based fibonacci formula BOOST_AUTO_TEST_CASE(generate_fibonacci_sequence) { using boost::compute::uint_; boost::compute::vector sequence(25, context); BOOST_COMPUTE_FUNCTION(uint_, nth_fibonacci, (const uint_ n), { const float golden_ratio = (1.f + sqrt(5.f)) / 2.f; return floor(pown(golden_ratio, n) / sqrt(5.f) + 0.5f); }); boost::compute::transform( boost::compute::make_counting_iterator(uint_(0)), boost::compute::make_counting_iterator(uint_(sequence.size())), sequence.begin(), nth_fibonacci, queue ); CHECK_RANGE_EQUAL( uint_, 25, sequence, (0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584, 4181, 6765, 10946, 17711, 28657, 46368) ); } BOOST_AUTO_TEST_CASE(field) { using compute::uint2_; using compute::uint4_; using compute::field; unsigned int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; compute::vector input( reinterpret_cast(data), reinterpret_cast(data) + 2, queue ); compute::vector output(input.size(), context); compute::transform( input.begin(), input.end(), output.begin(), compute::field("xz"), queue ); queue.finish(); BOOST_CHECK_EQUAL(uint2_(output[0]), uint2_(1, 3)); BOOST_CHECK_EQUAL(uint2_(output[1]), uint2_(5, 7)); } BOOST_AUTO_TEST_CASE(transform_abs_doctest) { //! [transform_abs] int data[] = { -1, -2, -3, -4 }; boost::compute::vector vec(data, data + 4, queue); using boost::compute::abs; // calculate the absolute value for each element in-place boost::compute::transform( vec.begin(), vec.end(), vec.begin(), abs(), queue ); // vec == { 1, 2, 3, 4 } //! [transform_abs] CHECK_RANGE_EQUAL(int, 4, vec, (1, 2, 3, 4)); } BOOST_AUTO_TEST_CASE(abs_if_odd) { // return absolute value only for odd values BOOST_COMPUTE_FUNCTION(int, abs_if_odd, (int x), { if(x & 1){ return abs(x); } else { return x; } }); int data[] = { -2, -3, -4, -5, -6, -7, -8, -9 }; compute::vector vector(data, data + 8, queue); compute::transform( vector.begin(), vector.end(), vector.begin(), abs_if_odd, queue ); CHECK_RANGE_EQUAL(int, 8, vector, (-2, +3, -4, +5, -6, +7, -8, +9)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_transform_if.cpp000066400000000000000000000024621263566244600202670ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestTransformIf #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(transform_if_odd) { using boost::compute::abs; using boost::compute::lambda::_1; int data[] = { -2, -3, -4, -5, -6, -7, -8, -9 }; compute::vector input(data, data + 8, queue); compute::vector output(input.size(), context); compute::vector::iterator end = compute::transform_if( input.begin(), input.end(), output.begin(), abs(), _1 % 2 != 0, queue ); BOOST_CHECK_EQUAL(std::distance(output.begin(), end), 4); CHECK_RANGE_EQUAL(int, 4, output, (+3, +5, +7, +9)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_transform_iterator.cpp000066400000000000000000000057431263566244600215270ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestTransformIterator #include #include #include #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(value_type) { using boost::compute::float4_; BOOST_STATIC_ASSERT(( boost::is_same< boost::compute::transform_iterator< boost::compute::buffer_iterator, boost::compute::sqrt >::value_type, float >::value )); BOOST_STATIC_ASSERT(( boost::is_same< boost::compute::transform_iterator< boost::compute::buffer_iterator, boost::compute::length >::value_type, float >::value )); } BOOST_AUTO_TEST_CASE(base_type) { BOOST_STATIC_ASSERT(( boost::is_same< boost::compute::transform_iterator< boost::compute::buffer_iterator, boost::compute::abs >::base_type, boost::compute::buffer_iterator >::value )); } BOOST_AUTO_TEST_CASE(copy) { int data[] = { 1, -2, 3, -4, 5 }; boost::compute::vector a(data, data + 5, queue); boost::compute::vector b(5, context); boost::compute::copy( boost::compute::make_transform_iterator( a.begin(), boost::compute::abs() ), boost::compute::make_transform_iterator( a.end(), boost::compute::abs() ), b.begin(), queue ); CHECK_RANGE_EQUAL(int, 5, b, (1, 2, 3, 4, 5)); } BOOST_AUTO_TEST_CASE(copy_abs_doctest) { int data[] = { -1, -2, -3, -4 }; boost::compute::vector input(data, data + 4, queue); boost::compute::vector output(4, context); //! [copy_abs] // use abs() from boost.compute using boost::compute::abs; // copy the absolute value for each element in input to output boost::compute::copy( boost::compute::make_transform_iterator(input.begin(), abs()), boost::compute::make_transform_iterator(input.end(), abs()), output.begin(), queue ); //! [copy_abs] CHECK_RANGE_EQUAL(int, 4, output, (1, 2, 3, 4)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_transform_reduce.cpp000066400000000000000000000051441263566244600211400ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestTransformReduce #include #include #include #include #include #include #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(sum_abs_int_doctest) { using boost::compute::abs; using boost::compute::plus; int data[] = { 1, -2, -3, -4, 5 }; compute::vector vec(data, data + 5, queue); //! [sum_abs_int] int sum = 0; boost::compute::transform_reduce( vec.begin(), vec.end(), &sum, abs(), plus(), queue ); //! [sum_abs_int] BOOST_CHECK_EQUAL(sum, 15); } BOOST_AUTO_TEST_CASE(multiply_vector_length) { float data[] = { 2.0f, 0.0f, 0.0f, 0.0f, 0.0f, 3.0f, 0.0f, 0.0f, 0.0f, 0.0f, 4.0f, 0.0f }; compute::vector vector( reinterpret_cast(data), reinterpret_cast(data) + 3, queue ); float product; compute::transform_reduce( vector.begin(), vector.end(), &product, compute::length(), compute::multiplies(), queue ); BOOST_CHECK_CLOSE(product, 24.0f, 1e-4f); } BOOST_AUTO_TEST_CASE(mean_and_std_dev) { using compute::lambda::_1; using compute::lambda::pow; float data[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; compute::vector vector(data, data + 10, queue); float sum; compute::reduce( vector.begin(), vector.end(), &sum, compute::plus(), queue ); float mean = sum / vector.size(); BOOST_CHECK_CLOSE(mean, 5.5f, 1e-4); compute::transform_reduce( vector.begin(), vector.end(), &sum, pow(_1 - mean, 2), compute::plus(), queue ); float variance = sum / vector.size(); BOOST_CHECK_CLOSE(variance, 8.25f, 1e-4); float std_dev = std::sqrt(variance); BOOST_CHECK_CLOSE(std_dev, 2.8722813232690143, 1e-4); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_tuple.cpp000066400000000000000000000111101263566244600167150ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestTuple #include #include #include #include #include #include #include #include #include #include #include #include "quirks.hpp" #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(vector_tuple_int_float) { boost::compute::vector > vector(context); vector.push_back(boost::make_tuple(1, 2.1f), queue); vector.push_back(boost::make_tuple(2, 3.2f), queue); vector.push_back(boost::make_tuple(3, 4.3f), queue); } BOOST_AUTO_TEST_CASE(copy_vector_tuple) { // create vector of tuples on device boost::compute::vector > input(context); input.push_back(boost::make_tuple('a', 1, 2.3f), queue); input.push_back(boost::make_tuple('c', 3, 4.5f), queue); input.push_back(boost::make_tuple('f', 6, 7.8f), queue); // copy on device boost::compute::vector > output(context); boost::compute::copy( input.begin(), input.end(), output.begin(), queue ); // copy to host std::vector > host_output(3); boost::compute::copy( input.begin(), input.end(), host_output.begin(), queue ); // check tuple data BOOST_CHECK_EQUAL(host_output[0], boost::make_tuple('a', 1, 2.3f)); BOOST_CHECK_EQUAL(host_output[1], boost::make_tuple('c', 3, 4.5f)); BOOST_CHECK_EQUAL(host_output[2], boost::make_tuple('f', 6, 7.8f)); } BOOST_AUTO_TEST_CASE(extract_tuple_elements) { compute::vector > vector(context); vector.push_back(boost::make_tuple('a', 1, 2.3f), queue); vector.push_back(boost::make_tuple('c', 3, 4.5f), queue); vector.push_back(boost::make_tuple('f', 6, 7.8f), queue); compute::vector chars(3, context); compute::transform( vector.begin(), vector.end(), chars.begin(), compute::get<0>(), queue ); CHECK_RANGE_EQUAL(char, 3, chars, ('a', 'c', 'f')); compute::vector ints(3, context); compute::transform( vector.begin(), vector.end(), ints.begin(), compute::get<1>(), queue ); CHECK_RANGE_EQUAL(int, 3, ints, (1, 3, 6)); compute::vector floats(3, context); compute::transform( vector.begin(), vector.end(), floats.begin(), compute::get<2>(), queue ); CHECK_RANGE_EQUAL(float, 3, floats, (2.3f, 4.5f, 7.8f)); } BOOST_AUTO_TEST_CASE(fill_tuple_vector) { if(bug_in_struct_assignment(device)){ std::cerr << "skipping fill_tuple_vector test" << std::endl; return; } compute::vector > vector(5, context); compute::fill(vector.begin(), vector.end(), boost::make_tuple('z', 4, 3.14f), queue); std::vector > host_output(5); compute::copy(vector.begin(), vector.end(), host_output.begin(), queue); BOOST_CHECK_EQUAL(host_output[0], boost::make_tuple('z', 4, 3.14f)); BOOST_CHECK_EQUAL(host_output[1], boost::make_tuple('z', 4, 3.14f)); BOOST_CHECK_EQUAL(host_output[2], boost::make_tuple('z', 4, 3.14f)); BOOST_CHECK_EQUAL(host_output[3], boost::make_tuple('z', 4, 3.14f)); BOOST_CHECK_EQUAL(host_output[4], boost::make_tuple('z', 4, 3.14f)); } #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES BOOST_AUTO_TEST_CASE(variadic_tuple) { BOOST_CHECK_EQUAL( (compute::type_name >()), "boost_tuple_char_short_int_float_t" ); } #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES #ifndef BOOST_COMPUTE_NO_STD_TUPLE BOOST_AUTO_TEST_CASE(std_tuple) { BOOST_CHECK_EQUAL( (compute::type_name>()), "std_tuple_char_short_int_float_t" ); } #endif // BOOST_COMPUTE_NO_STD_TUPLE BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_type_traits.cpp000066400000000000000000000141501263566244600201420ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestTypeTraits #include #include #include #include #include #include #include #include #include #include #include #include namespace bc = boost::compute; BOOST_AUTO_TEST_CASE(scalar_type) { BOOST_STATIC_ASSERT((boost::is_same::type, int>::value)); BOOST_STATIC_ASSERT((boost::is_same::type, int>::value)); BOOST_STATIC_ASSERT((boost::is_same::type, float>::value)); BOOST_STATIC_ASSERT((boost::is_same::type, float>::value)); } BOOST_AUTO_TEST_CASE(vector_size) { BOOST_STATIC_ASSERT(bc::vector_size::value == 1); BOOST_STATIC_ASSERT(bc::vector_size::value == 2); BOOST_STATIC_ASSERT(bc::vector_size::value == 1); BOOST_STATIC_ASSERT(bc::vector_size::value == 4); } BOOST_AUTO_TEST_CASE(is_vector_type) { BOOST_STATIC_ASSERT(bc::is_vector_type::value == false); BOOST_STATIC_ASSERT(bc::is_vector_type::value == true); BOOST_STATIC_ASSERT(bc::is_vector_type::value == false); BOOST_STATIC_ASSERT(bc::is_vector_type::value == true); } BOOST_AUTO_TEST_CASE(make_vector_type) { BOOST_STATIC_ASSERT((boost::is_same::type, bc::uint2_>::value)); BOOST_STATIC_ASSERT((boost::is_same::type, bc::int4_>::value)); BOOST_STATIC_ASSERT((boost::is_same::type, bc::float8_>::value)); BOOST_STATIC_ASSERT((boost::is_same::type, bc::char16_>::value)); } BOOST_AUTO_TEST_CASE(is_fundamental_type) { BOOST_STATIC_ASSERT((bc::is_fundamental::value == true)); BOOST_STATIC_ASSERT((bc::is_fundamental::value == true)); BOOST_STATIC_ASSERT((bc::is_fundamental::value == true)); BOOST_STATIC_ASSERT((bc::is_fundamental::value == true)); BOOST_STATIC_ASSERT((bc::is_fundamental::value == true)); BOOST_STATIC_ASSERT((bc::is_fundamental::value == true)); BOOST_STATIC_ASSERT((bc::is_fundamental >::value == false)); BOOST_STATIC_ASSERT((bc::is_fundamental >::value == false)); } BOOST_AUTO_TEST_CASE(type_name) { // scalar types BOOST_CHECK(std::strcmp(bc::type_name(), "char") == 0); BOOST_CHECK(std::strcmp(bc::type_name(), "uchar") == 0); BOOST_CHECK(std::strcmp(bc::type_name(), "short") == 0); BOOST_CHECK(std::strcmp(bc::type_name(), "ushort") == 0); BOOST_CHECK(std::strcmp(bc::type_name(), "int") == 0); BOOST_CHECK(std::strcmp(bc::type_name(), "uint") == 0); BOOST_CHECK(std::strcmp(bc::type_name(), "long") == 0); BOOST_CHECK(std::strcmp(bc::type_name(), "ulong") == 0); BOOST_CHECK(std::strcmp(bc::type_name(), "float") == 0); BOOST_CHECK(std::strcmp(bc::type_name(), "double") == 0); BOOST_CHECK(std::strcmp(bc::type_name(), "bool") == 0); // vector types BOOST_CHECK(std::strcmp(bc::type_name(), "char16") == 0); BOOST_CHECK(std::strcmp(bc::type_name(), "uint4") == 0); BOOST_CHECK(std::strcmp(bc::type_name(), "ulong8") == 0); BOOST_CHECK(std::strcmp(bc::type_name(), "float2") == 0); BOOST_CHECK(std::strcmp(bc::type_name(), "double4") == 0); } BOOST_AUTO_TEST_CASE(is_contiguous_iterator) { using boost::compute::detail::is_contiguous_iterator; BOOST_STATIC_ASSERT(is_contiguous_iterator::value == true); BOOST_STATIC_ASSERT(is_contiguous_iterator::iterator>::value == true); BOOST_STATIC_ASSERT(is_contiguous_iterator::const_iterator>::value == true); BOOST_STATIC_ASSERT(is_contiguous_iterator::iterator>::value == false); BOOST_STATIC_ASSERT(is_contiguous_iterator::iterator>::value == false); BOOST_STATIC_ASSERT(is_contiguous_iterator > >::value == false); BOOST_STATIC_ASSERT(is_contiguous_iterator > >::value == false); } BOOST_AUTO_TEST_CASE(is_buffer_iterator) { using boost::compute::detail::is_buffer_iterator; BOOST_STATIC_ASSERT(is_buffer_iterator >::value == true); BOOST_STATIC_ASSERT(is_buffer_iterator >::value == false); } BOOST_AUTO_TEST_CASE(is_device_iterator) { using boost::compute::is_device_iterator; BOOST_STATIC_ASSERT(is_device_iterator >::value == true); BOOST_STATIC_ASSERT(is_device_iterator >::value == true); BOOST_STATIC_ASSERT(is_device_iterator >::value == true); BOOST_STATIC_ASSERT(is_device_iterator >::value == true); BOOST_STATIC_ASSERT(is_device_iterator::value == false); BOOST_STATIC_ASSERT(is_device_iterator::value == false); BOOST_STATIC_ASSERT(is_device_iterator::iterator>::value == false); BOOST_STATIC_ASSERT(is_device_iterator::iterator>::value == false); } compute-0.5/test/test_types.cpp000066400000000000000000000026761263566244600167510ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestTypes #include #include #include #include BOOST_AUTO_TEST_CASE(vector_ctor) { boost::compute::int4_ i4(1, 2, 3, 4); BOOST_CHECK(i4 == boost::compute::int4_(1, 2, 3, 4)); BOOST_CHECK_EQUAL(i4, boost::compute::int4_(1, 2, 3, 4)); BOOST_CHECK_EQUAL(i4[0], 1); BOOST_CHECK_EQUAL(i4[1], 2); BOOST_CHECK_EQUAL(i4[2], 3); BOOST_CHECK_EQUAL(i4[3], 4); i4 = boost::compute::int4_(1); BOOST_CHECK(i4 == boost::compute::int4_(1, 1, 1, 1)); BOOST_CHECK(i4 == (boost::compute::vector_type(1))); BOOST_CHECK_EQUAL(i4, boost::compute::int4_(1, 1, 1, 1)); BOOST_CHECK_EQUAL(i4[0], 1); BOOST_CHECK_EQUAL(i4[1], 1); BOOST_CHECK_EQUAL(i4[2], 1); BOOST_CHECK_EQUAL(i4[3], 1); } BOOST_AUTO_TEST_CASE(vector_string) { std::stringstream stream; stream << boost::compute::int2_(1, 2); BOOST_CHECK_EQUAL(stream.str(), std::string("int2(1, 2)")); } compute-0.5/test/test_uniform_int_distribution.cpp000066400000000000000000000043551263566244600227310ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestUniformIntDistribution #include #include #include #include #include #include #include #include #include "context_setup.hpp" namespace compute=boost::compute; BOOST_AUTO_TEST_CASE(uniform_int_distribution_doctest) { using boost::compute::uint_; using boost::compute::lambda::_1; boost::compute::vector vec(128, context); //! [generate] // initialize the default random engine boost::compute::default_random_engine engine(queue); // setup the uniform distribution to produce integers 0 and 1 boost::compute::uniform_int_distribution distribution(0, 1); // generate the random values and store them to 'vec' distribution.generate(vec.begin(), vec.end(), engine, queue); //! [generate] BOOST_CHECK_EQUAL( boost::compute::count_if( vec.begin(), vec.end(), _1 > 1, queue ), size_t(0) ); } BOOST_AUTO_TEST_CASE(issue159) { using boost::compute::lambda::_1; boost::compute::vector input(10, context); // generate random numbers between 1 and 10 compute::default_random_engine rng(queue); compute::uniform_int_distribution d(1, 10); d.generate(input.begin(), input.end(), rng, queue); BOOST_CHECK_EQUAL( boost::compute::count_if( input.begin(), input.end(), _1 > 10, queue ), size_t(0) ); BOOST_CHECK_EQUAL( boost::compute::count_if( input.begin(), input.end(), _1 < 1, queue ), size_t(0) ); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_uniform_real_distribution.cpp000066400000000000000000000031341263566244600230540ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestUniformRealDistribution #include #include #include #include #include #include #include #include #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(uniform_real_distribution_doctest) { using boost::compute::lambda::_1; boost::compute::vector vec(128, context); //! [generate] // initialize the default random engine boost::compute::default_random_engine engine(queue); // setup the uniform distribution to produce floats between 1 and 100 boost::compute::uniform_real_distribution distribution(1.0f, 100.0f); // generate the random values and store them to 'vec' distribution.generate(vec.begin(), vec.end(), engine, queue); //! [generate] BOOST_CHECK_EQUAL( boost::compute::count_if( vec.begin(), vec.end(), _1 < 1.0f || _1 > 100.0f, queue ), size_t(0) ); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_unique.cpp000066400000000000000000000054041263566244600171030ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestUnique #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(unique_int) { int data[] = {1, 6, 6, 4, 2, 2, 4}; compute::vector input(data, data + 7, queue); compute::vector::iterator iter = compute::unique(input.begin(), input.end(), queue); BOOST_VERIFY(iter == input.begin() + 5); CHECK_RANGE_EQUAL(int, 7, input, (1, 6, 4, 2, 4, 2, 4)); } BOOST_AUTO_TEST_CASE(all_same_float) { compute::vector vec(1024, context); compute::fill(vec.begin(), vec.end(), 3.14f, queue); compute::vector::iterator iter = compute::unique(vec.begin(), vec.end(), queue); BOOST_VERIFY(iter == vec.begin() + 1); float first; compute::copy_n(vec.begin(), 1, &first, queue); BOOST_CHECK_EQUAL(first, 3.14f); } BOOST_AUTO_TEST_CASE(unique_even_uints) { using compute::uint_; // create vector filled with [0, 1, 2, ...] compute::vector vec(1024, context); compute::iota(vec.begin(), vec.end(), 0, queue); // all should be unique compute::vector::iterator iter = compute::unique( vec.begin(), vec.end(), queue ); BOOST_VERIFY(iter == vec.end()); // if odd, return the prior even number, else return the number BOOST_COMPUTE_FUNCTION(uint_, odd_to_even, (uint_ x), { if(x & 1){ return x - 1; } else { return x; } }); // set all odd numbers the previous even number compute::transform( vec.begin(), vec.end(), vec.begin(), odd_to_even, queue ); // now the vector should contain [0, 0, 2, 2, 4, 4, ...] iter = compute::unique(vec.begin(), vec.end(), queue); BOOST_VERIFY(iter == vec.begin() + (vec.size() / 2)); // ensure all of the values are even BOOST_COMPUTE_FUNCTION(bool, is_odd, (uint_ x), { return x & 1; }); BOOST_VERIFY(compute::none_of(vec.begin(), vec.end(), is_odd, queue)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_unique_copy.cpp000066400000000000000000000022241263566244600201320ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Roshan // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestUniqueCopy #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(unique_copy_int) { int data[] = {1, 6, 6, 4, 2, 2, 4}; bc::vector input(data, data + 7, queue); bc::vector result(5, context); bc::vector::iterator iter = bc::unique_copy(input.begin(), input.end(), result.begin(), queue); BOOST_VERIFY(iter == result.begin() + 5); CHECK_RANGE_EQUAL(int, 5, result, (1, 6, 4, 2, 4)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_unsupported_extension.cpp000066400000000000000000000015271263566244600222630ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2014 Fabian Köhler // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestUnsupportedExtension #include #include BOOST_AUTO_TEST_CASE(unsupported_extension_error_what) { boost::compute::unsupported_extension_error error("CL_DUMMY_EXTENSION"); BOOST_CHECK_EQUAL(std::string(error.what()), std::string("OpenCL extension CL_DUMMY_EXTENSION not supported")); } compute-0.5/test/test_user_defined_types.cpp000066400000000000000000000063141263566244600214560ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestUserDefinedTypes #include #include #include #include #include #include #include namespace compute = boost::compute; // user-defined data type containing two int's and a float struct UDD { int a; int b; float c; }; // make UDD available to OpenCL BOOST_COMPUTE_ADAPT_STRUCT(UDD, UDD, (a, b, c)) // comparison operator for UDD bool operator==(const UDD &lhs, const UDD &rhs) { return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c; } // output stream operator for UDD std::ostream& operator<<(std::ostream &stream, const UDD &x) { return stream << "(" << x.a << ", " << x.b << ", " << x.c << ")"; } // function to generate a random UDD on the host UDD rand_UDD() { UDD udd; udd.a = rand() % 100; udd.b = rand() % 100; udd.c = (float)(rand() % 100) / 1.3f; return udd; } // function to compare two UDD's on the host by their first component bool compare_UDD_host(const UDD &lhs, const UDD &rhs) { return lhs.a < rhs.a; } // function to compate two UDD's on the device by their first component BOOST_COMPUTE_FUNCTION(bool, compare_UDD_device, (UDD lhs, UDD rhs), { return lhs.a < rhs.a; }); #include "check_macros.hpp" #include "context_setup.hpp" // see: issue #11 (https://github.com/boostorg/compute/issues/11) BOOST_AUTO_TEST_CASE(issue_11) { if(device.vendor() == "NVIDIA" && device.platform().name() == "Apple"){ // FIXME: this test currently segfaults on NVIDIA GPUs on Apple std::cerr << "skipping issue test on NVIDIA GPU on Apple platform" << std::endl; return; } // create vector of random values on the host std::vector host_vector(10); std::generate(host_vector.begin(), host_vector.end(), rand_UDD); // transfer the values to the device compute::vector device_vector(host_vector.size(), context); compute::copy( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); // sort values on the device compute::sort( device_vector.begin(), device_vector.end(), compare_UDD_device, queue ); // sort values on the host std::sort( host_vector.begin(), host_vector.end(), compare_UDD_host ); // copy sorted device values back to the host std::vector tmp(10); compute::copy( device_vector.begin(), device_vector.end(), tmp.begin(), queue ); // verify sorted values for(size_t i = 0; i < host_vector.size(); i++){ BOOST_CHECK_EQUAL(tmp[i], host_vector[i]); } } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_user_event.cpp000066400000000000000000000017661263566244600177630ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestUserEvent #include #include #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(empty){} #ifdef CL_VERSION_1_1 BOOST_AUTO_TEST_CASE(user_event) { REQUIRES_OPENCL_VERSION(1, 1); boost::compute::user_event event(context); BOOST_CHECK(event.get() != cl_event()); BOOST_CHECK(event.status() != CL_COMPLETE); event.set_status(CL_COMPLETE); event.wait(); BOOST_CHECK(event.status() == CL_COMPLETE); } #endif // CL_VERSION_1_1 BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_valarray.cpp000066400000000000000000000327261263566244600174250ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestValarray #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" BOOST_AUTO_TEST_CASE(size) { boost::compute::valarray array; BOOST_CHECK_EQUAL(array.size(), size_t(0)); array.resize(10); BOOST_CHECK_EQUAL(array.size(), size_t(10)); } BOOST_AUTO_TEST_CASE(at) { int data[] = { 1, 2, 3, 4, 5 }; boost::compute::valarray array(data, 5); BOOST_CHECK_EQUAL(array.size(), size_t(5)); boost::compute::system::finish(); BOOST_CHECK_EQUAL(int(array[0]), int(1)); BOOST_CHECK_EQUAL(int(array[1]), int(2)); BOOST_CHECK_EQUAL(int(array[2]), int(3)); BOOST_CHECK_EQUAL(int(array[3]), int(4)); BOOST_CHECK_EQUAL(int(array[4]), int(5)); } BOOST_AUTO_TEST_CASE(min_and_max) { int data[] = { 5, 2, 3, 7, 1, 9, 6, 5 }; boost::compute::valarray array(data, 8); BOOST_CHECK_EQUAL(array.size(), size_t(8)); BOOST_CHECK_EQUAL((array.min)(), int(1)); BOOST_CHECK_EQUAL((array.max)(), int(9)); } BOOST_AUTO_TEST_CASE(sum) { int data[] = { 1, 2, 3, 4 }; boost::compute::valarray array(data, 4); boost::compute::system::finish(); BOOST_CHECK_EQUAL(array.size(), size_t(4)); BOOST_CHECK_EQUAL(array.sum(), int(10)); } BOOST_AUTO_TEST_CASE(apply) { int data[] = { -1, 2, -3, 4 }; boost::compute::valarray array(data, 4); boost::compute::abs abs; boost::compute::valarray result = array.apply(abs); boost::compute::system::finish(); BOOST_CHECK_EQUAL(int(result[0]), int(1)); BOOST_CHECK_EQUAL(int(result[1]), int(2)); BOOST_CHECK_EQUAL(int(result[2]), int(3)); BOOST_CHECK_EQUAL(int(result[3]), int(4)); } /// \internal_ /// Tests for compound assignment operators that works for floating /// point types. #define BOOST_COMPUTE_TEST_VALARRAY_COMPOUND_ASSIGNMENT(op, op_name) \ BOOST_AUTO_TEST_CASE(op_name##_ca_operator_no_fp) \ { \ float data[] = { 1, 2, 3, 4 }; \ boost::compute::valarray array1(data, 4); \ boost::compute::valarray array2(data, 4); \ boost::compute::system::finish(); \ \ array1 op##= 1; \ boost::compute::system::finish(); \ BOOST_CHECK_CLOSE(float(array1[0]), float(1.0f op 1.0f), 1e-4f); \ BOOST_CHECK_CLOSE(float(array1[1]), float(2.0f op 1.0f), 1e-4f); \ BOOST_CHECK_CLOSE(float(array1[2]), float(3.0f op 1.0f), 1e-4f); \ BOOST_CHECK_CLOSE(float(array1[3]), float(4.0f op 1.0f), 1e-4f); \ \ array1 = boost::compute::valarray(data, 4); \ boost::compute::system::finish(); \ \ array1 op##= array2; \ boost::compute::system::finish(); \ BOOST_CHECK_CLOSE(float(array1[0]), float(1.0f op 1.0f), 1e-4f); \ BOOST_CHECK_CLOSE(float(array1[1]), float(2.0f op 2.0f), 1e-4f); \ BOOST_CHECK_CLOSE(float(array1[2]), float(3.0f op 3.0f), 1e-4f); \ BOOST_CHECK_CLOSE(float(array1[3]), float(4.0f op 4.0f), 1e-4f); \ \ array2 op##= array2; \ boost::compute::system::finish(); \ BOOST_CHECK_CLOSE(float(array2[0]), float(1.0f op 1.0f), 1e-4f); \ BOOST_CHECK_CLOSE(float(array2[1]), float(2.0f op 2.0f), 1e-4f); \ BOOST_CHECK_CLOSE(float(array2[2]), float(3.0f op 3.0f), 1e-4f); \ BOOST_CHECK_CLOSE(float(array2[3]), float(4.0f op 4.0f), 1e-4f); \ \ } BOOST_COMPUTE_TEST_VALARRAY_COMPOUND_ASSIGNMENT(+, plus) BOOST_COMPUTE_TEST_VALARRAY_COMPOUND_ASSIGNMENT(-, minus) BOOST_COMPUTE_TEST_VALARRAY_COMPOUND_ASSIGNMENT(*, multiplies) BOOST_COMPUTE_TEST_VALARRAY_COMPOUND_ASSIGNMENT(/, divides) #undef BOOST_COMPUTE_TEST_VALARRAY_COMPOUND_ASSIGNMENT /// \internal_ /// Tests for compound assignment operators that does NOT work for floating /// point types. /// Note: modulo operator works only for integer types. #define BOOST_COMPUTE_TEST_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(op, op_name) \ BOOST_AUTO_TEST_CASE(op_name##_ca_operator) \ { \ int data[] = { 1, 2, 3, 4 }; \ boost::compute::valarray array1(data, 4); \ boost::compute::valarray array2(data, 4); \ boost::compute::system::finish(); \ \ array1 op##= 1; \ boost::compute::system::finish(); \ BOOST_CHECK_EQUAL(int(array1[0]), int(1 op 1)); \ BOOST_CHECK_EQUAL(int(array1[1]), int(2 op 1)); \ BOOST_CHECK_EQUAL(int(array1[2]), int(3 op 1)); \ BOOST_CHECK_EQUAL(int(array1[3]), int(4 op 1)); \ \ array1 = boost::compute::valarray(data, 4); \ boost::compute::system::finish(); \ \ array1 op##= array2; \ boost::compute::system::finish(); \ BOOST_CHECK_EQUAL(int(array1[0]), int(1 op 1)); \ BOOST_CHECK_EQUAL(int(array1[1]), int(2 op 2)); \ BOOST_CHECK_EQUAL(int(array1[2]), int(3 op 3)); \ BOOST_CHECK_EQUAL(int(array1[3]), int(4 op 4)); \ \ array2 op##= array2; \ boost::compute::system::finish(); \ BOOST_CHECK_EQUAL(int(array2[0]), int(1 op 1)); \ BOOST_CHECK_EQUAL(int(array2[1]), int(2 op 2)); \ BOOST_CHECK_EQUAL(int(array2[2]), int(3 op 3)); \ BOOST_CHECK_EQUAL(int(array2[3]), int(4 op 4)); \ \ } BOOST_COMPUTE_TEST_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(%, modulus) BOOST_COMPUTE_TEST_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(^, bit_xor) BOOST_COMPUTE_TEST_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(&, bit_and) BOOST_COMPUTE_TEST_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(|, bit_or) BOOST_COMPUTE_TEST_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(<<, shift_left) BOOST_COMPUTE_TEST_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(>>, shift_right) #undef BOOST_COMPUTE_TEST_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP BOOST_AUTO_TEST_CASE(unary_plus_operator) { int data[] = { 1, 2, 3, 4 }; boost::compute::valarray array(data, 4); boost::compute::system::finish(); boost::compute::valarray result = +array; boost::compute::system::finish(); BOOST_CHECK_EQUAL(int(result[0]), +(int(1))); BOOST_CHECK_EQUAL(int(result[1]), +(int(2))); BOOST_CHECK_EQUAL(int(result[2]), +(int(3))); BOOST_CHECK_EQUAL(int(result[3]), +(int(4))); } BOOST_AUTO_TEST_CASE(unary_minus_operator) { int data[] = { -1, 2, 0, 4 }; boost::compute::valarray array(data, 4); boost::compute::system::finish(); boost::compute::valarray result = -array; boost::compute::system::finish(); BOOST_CHECK_EQUAL(int(result[0]), int(1)); BOOST_CHECK_EQUAL(int(result[1]), int(-2)); BOOST_CHECK_EQUAL(int(result[2]), int(0)); BOOST_CHECK_EQUAL(int(result[3]), int(-4)); } BOOST_AUTO_TEST_CASE(unary_bitwise_not_operator) { int data[] = { 1, 2, 3, 4 }; boost::compute::valarray array(data, 4); boost::compute::system::finish(); boost::compute::valarray result = ~array; boost::compute::system::finish(); BOOST_CHECK_EQUAL(int(result[0]), ~(int(1))); BOOST_CHECK_EQUAL(int(result[1]), ~(int(2))); BOOST_CHECK_EQUAL(int(result[2]), ~(int(3))); BOOST_CHECK_EQUAL(int(result[3]), ~(int(4))); } BOOST_AUTO_TEST_CASE(unary_logical_not_operator) { int data[] = { 1, -2, 0, 4 }; boost::compute::valarray array(data, 4); boost::compute::system::finish(); boost::compute::valarray result = !array; boost::compute::system::finish(); BOOST_CHECK_EQUAL(bool(result[0]), !(int(1))); BOOST_CHECK_EQUAL(bool(result[1]), !(int(-2))); BOOST_CHECK_EQUAL(bool(result[2]), !(int(0))); BOOST_CHECK_EQUAL(bool(result[3]), !(int(4))); } /// \internal_ /// Tests for binary operators that works for floating /// point types. #define BOOST_COMPUTE_TEST_VALARRAY_BINARY_OPERATOR(op, op_name) \ BOOST_AUTO_TEST_CASE(op_name##_binary_operator) \ { \ float data1[] = { 1, 2, 3, 4 }; \ float data2[] = { 4, 2, 3, 0 }; \ boost::compute::valarray array1(data1, 4); \ boost::compute::valarray array2(data2, 4); \ boost::compute::system::finish(); \ \ boost::compute::valarray result = 2.0f op array1; \ boost::compute::system::finish(); \ BOOST_CHECK_CLOSE(float(result[0]), float(2.0f op 1.0f), 1e-4f); \ BOOST_CHECK_CLOSE(float(result[1]), float(2.0f op 2.0f), 1e-4f); \ BOOST_CHECK_CLOSE(float(result[2]), float(2.0f op 3.0f), 1e-4f); \ BOOST_CHECK_CLOSE(float(result[3]), float(2.0f op 4.0f), 1e-4f); \ \ result = array1 op 2.0f; \ boost::compute::system::finish(); \ BOOST_CHECK_CLOSE(float(result[0]), float(1.0f op 2.0f), 1e-4f); \ BOOST_CHECK_CLOSE(float(result[1]), float(2.0f op 2.0f), 1e-4f); \ BOOST_CHECK_CLOSE(float(result[2]), float(3.0f op 2.0f), 1e-4f); \ BOOST_CHECK_CLOSE(float(result[3]), float(4.0f op 2.0f), 1e-4f); \ \ result = array2 op array1; \ boost::compute::system::finish(); \ BOOST_CHECK_CLOSE(float(result[0]), float(4.0f op 1.0f), 1e-4f); \ BOOST_CHECK_CLOSE(float(result[1]), float(2.0f op 2.0f), 1e-4f); \ BOOST_CHECK_CLOSE(float(result[2]), float(3.0f op 3.0f), 1e-4f); \ BOOST_CHECK_CLOSE(float(result[3]), float(0.0f op 4.0f), 1e-4f); \ } BOOST_COMPUTE_TEST_VALARRAY_BINARY_OPERATOR(+, plus) BOOST_COMPUTE_TEST_VALARRAY_BINARY_OPERATOR(-, minus) BOOST_COMPUTE_TEST_VALARRAY_BINARY_OPERATOR(*, multiplies) BOOST_COMPUTE_TEST_VALARRAY_BINARY_OPERATOR(/, divides) #undef BOOST_COMPUTE_TEST_VALARRAY_BINARY_OPERATOR /// \internal_ /// Tests for compound assignment operators that does NOT work for floating /// point types. /// Note: modulo operator works only for integer types. #define BOOST_COMPUTE_TEST_VALARRAY_BINARY_OPERATOR_NO_FP(op, op_name) \ BOOST_AUTO_TEST_CASE(op_name##_binary_operator) \ { \ int data1[] = { 1, 2, 3, 4 }; \ int data2[] = { 4, 5, 2, 1 }; \ boost::compute::valarray array1(data1, 4); \ boost::compute::valarray array2(data2, 4); \ boost::compute::system::finish(); \ \ boost::compute::valarray result = 5 op array1; \ boost::compute::system::finish(); \ BOOST_CHECK_EQUAL(int(result[0]), int(5 op 1)); \ BOOST_CHECK_EQUAL(int(result[1]), int(5 op 2)); \ BOOST_CHECK_EQUAL(int(result[2]), int(5 op 3)); \ BOOST_CHECK_EQUAL(int(result[3]), int(5 op 4)); \ \ result = array1 op 5; \ boost::compute::system::finish(); \ BOOST_CHECK_EQUAL(int(result[0]), int(1 op 5)); \ BOOST_CHECK_EQUAL(int(result[1]), int(2 op 5)); \ BOOST_CHECK_EQUAL(int(result[2]), int(3 op 5)); \ BOOST_CHECK_EQUAL(int(result[3]), int(4 op 5)); \ \ result = array1 op array2; \ boost::compute::system::finish(); \ BOOST_CHECK_EQUAL(int(result[0]), int(1 op 4)); \ BOOST_CHECK_EQUAL(int(result[1]), int(2 op 5)); \ BOOST_CHECK_EQUAL(int(result[2]), int(3 op 2)); \ BOOST_CHECK_EQUAL(int(result[3]), int(4 op 1)); \ } BOOST_COMPUTE_TEST_VALARRAY_BINARY_OPERATOR_NO_FP(^, bit_xor) BOOST_COMPUTE_TEST_VALARRAY_BINARY_OPERATOR_NO_FP(&, bit_and) BOOST_COMPUTE_TEST_VALARRAY_BINARY_OPERATOR_NO_FP(|, bit_or) BOOST_COMPUTE_TEST_VALARRAY_BINARY_OPERATOR_NO_FP(<<, shift_left) BOOST_COMPUTE_TEST_VALARRAY_BINARY_OPERATOR_NO_FP(>>, shift_right) #undef BOOST_COMPUTE_TEST_VALARRAY_BINARY_OPERATOR_NO_FP /// \internal_ /// Macro for generating tests for valarray comparison operators. #define BOOST_COMPUTE_TEST_VALARRAY_COMPARISON_OPERATOR(op, op_name) \ BOOST_AUTO_TEST_CASE(op_name##_comparision_operator) \ { \ int data1[] = { 1, 2, 0, 4 }; \ int data2[] = { 4, 0, 2, 1 }; \ boost::compute::valarray array1(data1, 4); \ boost::compute::valarray array2(data2, 4); \ boost::compute::system::finish(); \ \ boost::compute::valarray result = 2 op array1; \ boost::compute::system::finish(); \ BOOST_CHECK_EQUAL(bool(result[0]), bool(2 op 1)); \ BOOST_CHECK_EQUAL(bool(result[1]), bool(2 op 2)); \ BOOST_CHECK_EQUAL(bool(result[2]), bool(2 op 0)); \ BOOST_CHECK_EQUAL(bool(result[3]), bool(2 op 4)); \ \ result = array1 op 2; \ boost::compute::system::finish(); \ BOOST_CHECK_EQUAL(bool(result[0]), bool(1 op 2)); \ BOOST_CHECK_EQUAL(bool(result[1]), bool(2 op 2)); \ BOOST_CHECK_EQUAL(bool(result[2]), bool(0 op 2)); \ BOOST_CHECK_EQUAL(bool(result[3]), bool(4 op 2)); \ \ result = array1 op array2; \ boost::compute::system::finish(); \ BOOST_CHECK_EQUAL(bool(result[0]), bool(1 op 4)); \ BOOST_CHECK_EQUAL(bool(result[1]), bool(2 op 0)); \ BOOST_CHECK_EQUAL(bool(result[2]), bool(0 op 2)); \ BOOST_CHECK_EQUAL(bool(result[3]), bool(4 op 1)); \ } BOOST_COMPUTE_TEST_VALARRAY_COMPARISON_OPERATOR(==, equal_to) BOOST_COMPUTE_TEST_VALARRAY_COMPARISON_OPERATOR(!=, not_equal_to) BOOST_COMPUTE_TEST_VALARRAY_COMPARISON_OPERATOR(>, greater) BOOST_COMPUTE_TEST_VALARRAY_COMPARISON_OPERATOR(<, less) BOOST_COMPUTE_TEST_VALARRAY_COMPARISON_OPERATOR(>=, greater_equal) BOOST_COMPUTE_TEST_VALARRAY_COMPARISON_OPERATOR(<=, less_equal) /// \internal_ /// Macro for generating tests for valarray binary logical operators. #define BOOST_COMPUTE_TEST_VALARRAY_LOGICAL_OPERATOR(op, op_name) \ BOOST_COMPUTE_TEST_VALARRAY_COMPARISON_OPERATOR(op, op_name) BOOST_COMPUTE_TEST_VALARRAY_LOGICAL_OPERATOR(&&, logical_and) BOOST_COMPUTE_TEST_VALARRAY_LOGICAL_OPERATOR(||, logical_or) #undef BOOST_COMPUTE_TEST_VALARRAY_LOGICAL_OPERATOR #undef BOOST_COMPUTE_TEST_VALARRAY_COMPARISON_OPERATOR BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_vector.cpp000066400000000000000000000336141263566244600171030ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestVector #include #include #include #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace bc = boost::compute; namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(concept_check) { BOOST_CONCEPT_ASSERT((boost::Container >)); //BOOST_CONCEPT_ASSERT((boost::SequenceConcept >)); BOOST_CONCEPT_ASSERT((boost::ReversibleContainer >)); BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator::iterator>)); BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator::const_iterator>)); } BOOST_AUTO_TEST_CASE(size) { bc::vector empty_vector(context); BOOST_CHECK_EQUAL(empty_vector.size(), size_t(0)); BOOST_CHECK_EQUAL(empty_vector.empty(), true); bc::vector int_vector(10, context); BOOST_CHECK_EQUAL(int_vector.size(), size_t(10)); BOOST_CHECK_EQUAL(int_vector.empty(), false); } BOOST_AUTO_TEST_CASE(resize) { bc::vector int_vector(10, context); BOOST_CHECK_EQUAL(int_vector.size(), size_t(10)); int_vector.resize(20, queue); BOOST_CHECK_EQUAL(int_vector.size(), size_t(20)); int_vector.resize(5, queue); BOOST_CHECK_EQUAL(int_vector.size(), size_t(5)); } BOOST_AUTO_TEST_CASE(array_operator) { bc::vector vector(10); bc::fill(vector.begin(), vector.end(), 0); CHECK_RANGE_EQUAL(int, 10, vector, (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); bc::fill(vector.begin(), vector.end(), 42); CHECK_RANGE_EQUAL(int, 10, vector, (42, 42, 42, 42, 42, 42, 42, 42, 42, 42)); vector[0] = 9; CHECK_RANGE_EQUAL(int, 10, vector, (9, 42, 42, 42, 42, 42, 42, 42, 42, 42)); } BOOST_AUTO_TEST_CASE(front_and_back) { int int_data[] = { 1, 2, 3, 4, 5 }; bc::vector int_vector(5, context); bc::copy(int_data, int_data + 5, int_vector.begin(), queue); queue.finish(); BOOST_CHECK_EQUAL(int_vector.front(), 1); BOOST_CHECK_EQUAL(int_vector.back(), 5); bc::fill(int_vector.begin(), int_vector.end(), 10, queue); queue.finish(); BOOST_CHECK_EQUAL(int_vector.front(), 10); BOOST_CHECK_EQUAL(int_vector.back(), 10); float float_data[] = { 1.1f, 2.2f, 3.3f, 4.4f, 5.5f }; bc::vector float_vector(5, context); bc::copy(float_data, float_data + 5, float_vector.begin(), queue); queue.finish(); BOOST_CHECK_EQUAL(float_vector.front(), 1.1f); BOOST_CHECK_EQUAL(float_vector.back(), 5.5f); } BOOST_AUTO_TEST_CASE(host_iterator_constructor) { std::vector host_vector; host_vector.push_back(10); host_vector.push_back(20); host_vector.push_back(30); host_vector.push_back(40); bc::vector device_vector(host_vector.begin(), host_vector.end(), queue); CHECK_RANGE_EQUAL(int, 4, device_vector, (10, 20, 30, 40)); } BOOST_AUTO_TEST_CASE(device_iterator_constructor) { int data[] = { 1, 5, 10, 15 }; bc::vector a(data, data + 4, queue); CHECK_RANGE_EQUAL(int, 4, a, (1, 5, 10, 15)); bc::vector b(a.begin(), a.end(), queue); CHECK_RANGE_EQUAL(int, 4, b, (1, 5, 10, 15)); } BOOST_AUTO_TEST_CASE(push_back) { bc::vector vector(context); BOOST_VERIFY(vector.empty()); vector.push_back(12, queue); BOOST_VERIFY(!vector.empty()); BOOST_CHECK_EQUAL(vector.size(), size_t(1)); CHECK_RANGE_EQUAL(int, 1, vector, (12)); vector.push_back(24, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(2)); CHECK_RANGE_EQUAL(int, 2, vector, (12, 24)); vector.push_back(36, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(3)); CHECK_RANGE_EQUAL(int, 3, vector, (12, 24, 36)); for(int i = 0; i < 100; i++){ vector.push_back(i, queue); } BOOST_CHECK_EQUAL(vector.size(), size_t(103)); BOOST_CHECK_EQUAL(vector[0], 12); BOOST_CHECK_EQUAL(vector[1], 24); BOOST_CHECK_EQUAL(vector[2], 36); BOOST_CHECK_EQUAL(vector[102], 99); } BOOST_AUTO_TEST_CASE(at) { bc::vector vector(context); vector.push_back(1, queue); vector.push_back(2, queue); vector.push_back(3, queue); BOOST_CHECK_EQUAL(vector.at(0), 1); BOOST_CHECK_EQUAL(vector.at(1), 2); BOOST_CHECK_EQUAL(vector.at(2), 3); BOOST_CHECK_THROW(vector.at(3), std::out_of_range); } BOOST_AUTO_TEST_CASE(erase) { int data[] = { 1, 2, 5, 7, 9 }; bc::vector vector(data, data + 5, queue); queue.finish(); BOOST_CHECK_EQUAL(vector.size(), 5); vector.erase(vector.begin() + 1, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(4)); CHECK_RANGE_EQUAL(int, 4, vector, (1, 5, 7, 9)); vector.erase(vector.begin() + 2, vector.end(), queue); BOOST_CHECK_EQUAL(vector.size(), size_t(2)); CHECK_RANGE_EQUAL(int, 2, vector, (1, 5)); } BOOST_AUTO_TEST_CASE(max_size) { bc::vector vector(100, context); BOOST_CHECK_EQUAL(vector.size(), size_t(100)); BOOST_VERIFY(vector.max_size() > vector.size()); } #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES BOOST_AUTO_TEST_CASE(move_ctor) { int data[] = { 11, 12, 13, 14 }; bc::vector a(data, data + 4, queue); BOOST_CHECK_EQUAL(a.size(), size_t(4)); CHECK_RANGE_EQUAL(int, 4, a, (11, 12, 13, 14)); bc::vector b(std::move(a)); BOOST_CHECK(a.size() == 0); BOOST_CHECK(a.get_buffer().get() == 0); BOOST_CHECK_EQUAL(b.size(), size_t(4)); CHECK_RANGE_EQUAL(int, 4, b, (11, 12, 13, 14)); } BOOST_AUTO_TEST_CASE(move_ctor_custom_alloc) { int data[] = { 11, 12, 13, 14 }; bc::vector > a(data, data + 4, queue); BOOST_CHECK_EQUAL(a.size(), size_t(4)); CHECK_RANGE_EQUAL(int, 4, a, (11, 12, 13, 14)); bc::vector > b(std::move(a)); BOOST_CHECK(a.size() == 0); BOOST_CHECK(a.get_buffer().get() == 0); BOOST_CHECK_EQUAL(b.size(), size_t(4)); CHECK_RANGE_EQUAL(int, 4, b, (11, 12, 13, 14)); } #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES #ifdef BOOST_COMPUTE_USE_CPP11 BOOST_AUTO_TEST_CASE(initializer_list_ctor) { bc::vector vector = { 2, 4, 6, 8 }; BOOST_CHECK_EQUAL(vector.size(), size_t(4)); CHECK_RANGE_EQUAL(int, 4, vector, (2, 4, 6, 8)); } #endif // BOOST_COMPUTE_USE_CPP11 BOOST_AUTO_TEST_CASE(vector_double) { if(!device.supports_extension("cl_khr_fp64")){ return; } bc::vector vector(context); vector.push_back(1.21, queue); vector.push_back(3.14, queue); vector.push_back(7.89, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(3)); CHECK_RANGE_EQUAL(double, 3, vector, (1.21, 3.14, 7.89)); bc::vector other(vector.begin(), vector.end(), queue); CHECK_RANGE_EQUAL(double, 3, other, (1.21, 3.14, 7.89)); bc::fill(other.begin(), other.end(), 8.95, queue); CHECK_RANGE_EQUAL(double, 3, other, (8.95, 8.95, 8.95)); } BOOST_AUTO_TEST_CASE(vector_iterator) { bc::vector vector(context); vector.push_back(2, queue); vector.push_back(4, queue); vector.push_back(6, queue); vector.push_back(8, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(4)); BOOST_CHECK_EQUAL(vector[0], 2); BOOST_CHECK_EQUAL(*vector.begin(), 2); BOOST_CHECK_EQUAL(vector.begin()[0], 2); BOOST_CHECK_EQUAL(vector[1], 4); BOOST_CHECK_EQUAL(*(vector.begin()+1), 4); BOOST_CHECK_EQUAL(vector.begin()[1], 4); BOOST_CHECK_EQUAL(vector[2], 6); BOOST_CHECK_EQUAL(*(vector.begin()+2), 6); BOOST_CHECK_EQUAL(vector.begin()[2], 6); BOOST_CHECK_EQUAL(vector[3], 8); BOOST_CHECK_EQUAL(*(vector.begin()+3), 8); BOOST_CHECK_EQUAL(vector.begin()[3], 8); } BOOST_AUTO_TEST_CASE(vector_erase_remove) { int data[] = { 2, 6, 3, 4, 2, 4, 5, 6, 1 }; bc::vector vector(data, data + 9, queue); BOOST_CHECK_EQUAL(vector.size(), size_t(9)); // remove 4's vector.erase(bc::remove(vector.begin(), vector.end(), 4, queue), vector.end()); BOOST_CHECK_EQUAL(vector.size(), size_t(7)); BOOST_VERIFY(bc::find(vector.begin(), vector.end(), 4, queue) == vector.end()); // remove 2's vector.erase(bc::remove(vector.begin(), vector.end(), 2, queue), vector.end()); BOOST_CHECK_EQUAL(vector.size(), size_t(5)); BOOST_VERIFY(bc::find(vector.begin(), vector.end(), 2, queue) == vector.end()); // remove 6's vector.erase(bc::remove(vector.begin(), vector.end(), 6, queue), vector.end()); BOOST_CHECK_EQUAL(vector.size(), size_t(3)); BOOST_VERIFY(bc::find(vector.begin(), vector.end(), 6, queue) == vector.end()); // check the rest of the values CHECK_RANGE_EQUAL(int, 3, vector, (3, 5, 1)); } // see issue #132 (https://github.com/boostorg/compute/issues/132) BOOST_AUTO_TEST_CASE(swap_between_contexts) { compute::context ctx1(device); compute::context ctx2(device); compute::vector vec1(32, ctx1); compute::vector vec2(32, ctx2); BOOST_CHECK(vec1.get_allocator().get_context() == ctx1); BOOST_CHECK(vec2.get_allocator().get_context() == ctx2); vec1.swap(vec2); BOOST_CHECK(vec1.get_allocator().get_context() == ctx2); BOOST_CHECK(vec2.get_allocator().get_context() == ctx1); vec1.resize(64); vec2.resize(64); } BOOST_AUTO_TEST_CASE(assign_from_std_vector) { std::vector host_vector; host_vector.push_back(1); host_vector.push_back(9); host_vector.push_back(7); host_vector.push_back(9); compute::vector device_vector(context); device_vector.assign(host_vector.begin(), host_vector.end(), queue); BOOST_CHECK_EQUAL(device_vector.size(), size_t(4)); CHECK_RANGE_EQUAL(int, 4, device_vector, (1, 9, 7, 9)); } BOOST_AUTO_TEST_CASE(assign_constant_value) { compute::vector device_vector(10, context); device_vector.assign(3, 6.28f, queue); BOOST_CHECK_EQUAL(device_vector.size(), size_t(3)); CHECK_RANGE_EQUAL(float, 3, device_vector, (6.28f, 6.28f, 6.28f)); } BOOST_AUTO_TEST_CASE(resize_throw_exception) { // create vector with eight items int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; compute::vector vec(data, data + 8, queue); // try to resize to 2x larger than the global memory size BOOST_CHECK_THROW( vec.resize((device.global_memory_size() / sizeof(int)) * 2), boost::compute::opencl_error ); // ensure vector data is still the same BOOST_CHECK_EQUAL(vec.size(), 8); CHECK_RANGE_EQUAL(int, 8, vec, (1, 2, 3, 4, 5, 6, 7, 8)); } BOOST_AUTO_TEST_CASE(copy_ctor_custom_alloc) { int data[] = { 11, 12, 13, 14 }; bc::vector > a(data, data + 4, queue); BOOST_CHECK_EQUAL(a.size(), size_t(4)); CHECK_RANGE_EQUAL(int, 4, a, (11, 12, 13, 14)); bc::vector > b(a, queue); BOOST_CHECK_EQUAL(b.size(), size_t(4)); CHECK_RANGE_EQUAL(int, 4, b, (11, 12, 13, 14)); } BOOST_AUTO_TEST_CASE(copy_ctor_different_alloc) { int data[] = { 11, 12, 13, 14 }; bc::vector a(data, data + 4, queue); BOOST_CHECK_EQUAL(a.size(), size_t(4)); CHECK_RANGE_EQUAL(int, 4, a, (11, 12, 13, 14)); bc::vector > b(a, queue); BOOST_CHECK_EQUAL(b.size(), size_t(4)); CHECK_RANGE_EQUAL(int, 4, b, (11, 12, 13, 14)); std::vector host_vector; host_vector.push_back(1); host_vector.push_back(9); host_vector.push_back(7); host_vector.push_back(9); bc::vector > c(host_vector, queue); BOOST_CHECK_EQUAL(c.size(), size_t(4)); CHECK_RANGE_EQUAL(int, 4, c, (1, 9, 7, 9)); } BOOST_AUTO_TEST_CASE(assignment_operator) { int adata[] = { 11, 12, 13, 14 }; bc::vector a(adata, adata + 4, queue); BOOST_CHECK_EQUAL(a.size(), size_t(4)); CHECK_RANGE_EQUAL(int, 4, a, (11, 12, 13, 14)); bc::vector b = a; BOOST_CHECK_EQUAL(b.size(), size_t(4)); CHECK_RANGE_EQUAL(int, 4, b, (11, 12, 13, 14)); bc::vector > c = b; BOOST_CHECK_EQUAL(c.size(), size_t(4)); CHECK_RANGE_EQUAL(int, 4, c, (11, 12, 13, 14)); int ddata[] = { 21, 22, 23 }; bc::vector > d(ddata, ddata + 3, queue); BOOST_CHECK_EQUAL(d.size(), size_t(3)); CHECK_RANGE_EQUAL(int, 3, d, (21, 22, 23)); a = d; BOOST_CHECK_EQUAL(a.size(), size_t(3)); CHECK_RANGE_EQUAL(int, 3, a, (21, 22, 23)); std::vector host_vector; host_vector.push_back(1); host_vector.push_back(9); host_vector.push_back(7); host_vector.push_back(9); d = host_vector; BOOST_CHECK_EQUAL(d.size(), size_t(4)); CHECK_RANGE_EQUAL(int, 4, d, (1, 9, 7, 9)); } BOOST_AUTO_TEST_CASE(swap_ctor_custom_alloc) { int adata[] = { 11, 12, 13, 14 }; bc::vector > a(adata, adata + 4, queue); BOOST_CHECK_EQUAL(a.size(), size_t(4)); CHECK_RANGE_EQUAL(int, 4, a, (11, 12, 13, 14)); int bdata[] = { 21, 22, 23 }; bc::vector > b(bdata, bdata + 3, queue); BOOST_CHECK_EQUAL(b.size(), size_t(3)); CHECK_RANGE_EQUAL(int, 3, b, (21, 22, 23)); a.swap(b); BOOST_CHECK_EQUAL(a.size(), size_t(3)); CHECK_RANGE_EQUAL(int, 3, a, (21, 22, 23)); BOOST_CHECK_EQUAL(b.size(), size_t(4)); CHECK_RANGE_EQUAL(int, 4, b, (11, 12, 13, 14)); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_wait_list.cpp000066400000000000000000000037351263566244600176010ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013-2014 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestWaitList #include #include #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(create_wait_list) { compute::wait_list events; BOOST_CHECK_EQUAL(events.size(), 0); BOOST_CHECK_EQUAL(events.empty(), true); BOOST_CHECK(events.get_event_ptr() == 0); } BOOST_AUTO_TEST_CASE(insert_future) { // create vector on the host std::vector host_vector(4); std::fill(host_vector.begin(), host_vector.end(), 7); // create vector on the device compute::vector device_vector(4, context); // create wait list compute::wait_list events; // copy values to device compute::future future = compute::copy_async( host_vector.begin(), host_vector.end(), device_vector.begin(), queue ); // add future event to the wait list events.insert(future); BOOST_CHECK_EQUAL(events.size(), 1); BOOST_CHECK(events.get_event_ptr() != 0); // wait for copy to complete events.wait(); // check values CHECK_RANGE_EQUAL(int, 4, device_vector, (7, 7, 7, 7)); // clear the event list events.clear(); BOOST_CHECK_EQUAL(events.size(), 0); } BOOST_AUTO_TEST_SUITE_END() compute-0.5/test/test_zip_iterator.cpp000066400000000000000000000151271263566244600203130ustar00rootroot00000000000000//---------------------------------------------------------------------------// // Copyright (c) 2013 Kyle Lutz // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #define BOOST_TEST_MODULE TestZipIterator #include #include #include #include #include #include #include #include #include #include #include #include #include "check_macros.hpp" #include "context_setup.hpp" namespace compute = boost::compute; BOOST_AUTO_TEST_CASE(value_type) { BOOST_STATIC_ASSERT(( boost::is_same< boost::compute::zip_iterator< boost::tuple< boost::compute::buffer_iterator, boost::compute::buffer_iterator > >::value_type, boost::tuple >::value )); } BOOST_AUTO_TEST_CASE(distance) { boost::compute::vector char_vector(5, context); boost::compute::vector int_vector(5, context); BOOST_CHECK_EQUAL( std::distance( boost::compute::make_zip_iterator( boost::make_tuple( char_vector.begin(), int_vector.begin() ) ), boost::compute::make_zip_iterator( boost::make_tuple( char_vector.end(), int_vector.end() ) ) ), ptrdiff_t(5) ); BOOST_CHECK_EQUAL( std::distance( boost::compute::make_zip_iterator( boost::make_tuple( char_vector.begin(), int_vector.begin() ) ) + 1, boost::compute::make_zip_iterator( boost::make_tuple( char_vector.end(), int_vector.end() ) ) - 1 ), ptrdiff_t(3) ); BOOST_CHECK_EQUAL( std::distance( boost::compute::make_zip_iterator( boost::make_tuple( char_vector.begin() + 2, int_vector.begin() + 2 ) ), boost::compute::make_zip_iterator( boost::make_tuple( char_vector.end() - 1, int_vector.end() - 1 ) ) ), ptrdiff_t(2) ); } BOOST_AUTO_TEST_CASE(copy) { // create three separate vectors of three different types char char_data[] = { 'x', 'y', 'z' }; boost::compute::vector char_vector(char_data, char_data + 3, queue); int int_data[] = { 4, 7, 9 }; boost::compute::vector int_vector(int_data, int_data + 3, queue); float float_data[] = { 3.2f, 4.5f, 7.6f }; boost::compute::vector float_vector(float_data, float_data + 3, queue); // zip all three vectors into a single tuple vector boost::compute::vector > tuple_vector(3, context); boost::compute::copy( boost::compute::make_zip_iterator( boost::make_tuple( char_vector.begin(), int_vector.begin(), float_vector.begin() ) ), boost::compute::make_zip_iterator( boost::make_tuple( char_vector.end(), int_vector.end(), float_vector.end() ) ), tuple_vector.begin(), queue ); // copy tuple vector to host std::vector > host_vector(3); boost::compute::copy( tuple_vector.begin(), tuple_vector.end(), host_vector.begin(), queue ); // check tuple values BOOST_CHECK_EQUAL(host_vector[0], boost::make_tuple('x', 4, 3.2f)); BOOST_CHECK_EQUAL(host_vector[1], boost::make_tuple('y', 7, 4.5f)); BOOST_CHECK_EQUAL(host_vector[2], boost::make_tuple('z', 9, 7.6f)); } BOOST_AUTO_TEST_CASE(zip_iterator_get) { int data1[] = { 0, 2, 4, 6, 8 }; int data2[] = { 1, 3, 5, 7, 9 }; compute::vector input1(data1, data1 + 5, queue); compute::vector input2(data2, data2 + 5, queue); compute::vector output(5, context); // extract first component from (input1) compute::transform( compute::make_zip_iterator( boost::make_tuple(input1.begin()) ), compute::make_zip_iterator( boost::make_tuple(input1.end()) ), output.begin(), compute::get<0>(), queue ); CHECK_RANGE_EQUAL(int, 5, output, (0, 2, 4, 6, 8)); // extract first component from (input2, input1) compute::transform( compute::make_zip_iterator( boost::make_tuple(input2.begin(), input1.begin()) ), compute::make_zip_iterator( boost::make_tuple(input2.end(), input1.end()) ), output.begin(), compute::get<0>(), queue ); CHECK_RANGE_EQUAL(int, 5, output, (1, 3, 5, 7, 9)); // extract second component from (input1, input2, input1) compute::transform( compute::make_zip_iterator( boost::make_tuple(input1.begin(), input2.begin(), input1.begin()) ), compute::make_zip_iterator( boost::make_tuple(input1.end(), input2.end(), input1.end()) ), output.begin(), compute::get<1>(), queue ); CHECK_RANGE_EQUAL(int, 5, output, (1, 3, 5, 7, 9)); } BOOST_AUTO_TEST_CASE(zip_constant_iterator) { compute::vector result(4, context); compute::transform( compute::make_zip_iterator( boost::make_tuple( compute::make_constant_iterator(7) ) ), compute::make_zip_iterator( boost::make_tuple( compute::make_constant_iterator(7, result.size()) ) ), result.begin(), compute::get<0>(), queue ); CHECK_RANGE_EQUAL(int, 4, result, (7, 7, 7, 7)); } BOOST_AUTO_TEST_SUITE_END()