pax_global_header00006660000000000000000000000064141313162530014511gustar00rootroot0000000000000052 comment=3f420ef735672e439097d020db605778dbc4a6a1 pocl-1.8/000077500000000000000000000000001413131625300123165ustar00rootroot00000000000000pocl-1.8/.drone.yml000066400000000000000000000112671413131625300142350ustar00rootroot00000000000000kind: pipeline name: amd64_ub2004_distro platform: os: linux arch: amd64 steps: - name: build_and_test image: amd64/ubuntu:20.04 environment: POCL_CACHE_DIR: /tmp/cache POCL_MAX_PTHREAD_COUNT: 2 DEBIAN_FRONTEND: noninteractive commands: - apt update - apt upgrade -y - apt install -y build-essential ocl-icd-libopencl1 cmake git pkg-config libclang-12-dev libclang-cpp12-dev clang-12 llvm-12-dev make ninja-build ocl-icd-libopencl1 ocl-icd-dev ocl-icd-opencl-dev libhwloc-dev zlib1g zlib1g-dev clinfo dialog apt-utils - mkdir build - cd build - cmake -DKERNELLIB_HOST_CPU_VARIANTS=distro -DPOCL_ICD_ABSOLUTE_PATH=OFF -DENABLE_POCL_BUILDING=OFF -DCMAKE_INSTALL_PREFIX=/usr -G Ninja .. - ninja - ninja install - clinfo - rm CTestCustom.cmake - ctest -j32 --output-on-failure -L internal --- kind: pipeline name: arm64_ub1804_llvm6 platform: os: linux arch: arm64 steps: - name: build_and_test image: arm64v8/ubuntu:18.04 environment: POCL_CACHE_DIR: /tmp/cache POCL_MAX_PTHREAD_COUNT: 2 DEBIAN_FRONTEND: noninteractive commands: - apt update - apt upgrade -y - apt install -y build-essential ocl-icd-libopencl1 cmake git pkg-config libclang-6.0-dev clang-6.0 llvm-6.0 make ninja-build ocl-icd-libopencl1 ocl-icd-dev ocl-icd-opencl-dev libhwloc-dev zlib1g zlib1g-dev clinfo dialog apt-utils - mkdir build - cd build - cmake -DDEVELOPER_MODE=ON -DLLC_HOST_CPU=thunderx -DCMAKE_INSTALL_PREFIX=/usr -G Ninja .. - ninja - ninja install - clinfo - ctest -j32 --output-on-failure -L internal --- kind: pipeline name: arm64_ub1804_llvm9 platform: os: linux arch: arm64 steps: - name: build_and_test image: arm64v8/ubuntu:18.04 environment: POCL_BUILDING: 1 POCL_CACHE_DIR: /tmp/cache POCL_MAX_PTHREAD_COUNT: 2 DEBIAN_FRONTEND: noninteractive commands: - apt update - apt upgrade -y - apt install -y build-essential ocl-icd-libopencl1 cmake git pkg-config libclang-9-dev clang-9 llvm-9 make ninja-build ocl-icd-libopencl1 ocl-icd-dev ocl-icd-opencl-dev libhwloc-dev zlib1g zlib1g-dev clinfo dialog apt-utils - mkdir build - cd build - cmake -DDEVELOPER_MODE=ON -DLLC_HOST_CPU=thunderx -DCMAKE_INSTALL_PREFIX=/usr -G Ninja .. - ninja - ninja install - clinfo - ctest -j32 --output-on-failure -L internal --- kind: pipeline name: arm64_ub2004_llvm10 platform: os: linux arch: arm64 steps: - name: build_and_test image: arm64v8/ubuntu:20.04 environment: POCL_BUILDING: 1 POCL_CACHE_DIR: /tmp/cache POCL_MAX_PTHREAD_COUNT: 2 DEBIAN_FRONTEND: noninteractive commands: - apt update - apt upgrade -y - apt install -y build-essential ocl-icd-libopencl1 cmake git pkg-config libclang-10-dev libclang-cpp10-dev clang-10 llvm-10-dev make ninja-build ocl-icd-libopencl1 ocl-icd-dev ocl-icd-opencl-dev libhwloc-dev zlib1g zlib1g-dev clinfo dialog apt-utils - mkdir build - cd build - cmake -DDEVELOPER_MODE=ON -DLLC_HOST_CPU=thunderx -DCMAKE_INSTALL_PREFIX=/usr -G Ninja .. - ninja - ninja install - clinfo - ctest -j32 --output-on-failure -L internal --- kind: pipeline name: arm64_ub2004_llvm12 platform: os: linux arch: arm64 steps: - name: build_and_test image: arm64v8/ubuntu:20.04 environment: POCL_BUILDING: 1 POCL_CACHE_DIR: /tmp/cache POCL_MAX_PTHREAD_COUNT: 2 DEBIAN_FRONTEND: noninteractive commands: - apt update - apt upgrade -y - apt install -y build-essential ocl-icd-libopencl1 cmake git pkg-config libclang-12-dev libclang-cpp12-dev clang-12 llvm-12-dev make ninja-build ocl-icd-libopencl1 ocl-icd-dev ocl-icd-opencl-dev libhwloc-dev zlib1g zlib1g-dev clinfo dialog apt-utils - mkdir build - cd build - cmake -DDEVELOPER_MODE=ON -DLLC_HOST_CPU=thunderx -DCMAKE_INSTALL_PREFIX=/usr -G Ninja .. - ninja - ninja install - clinfo - ctest -j32 --output-on-failure -L internal --- kind: pipeline name: arm32_ub1804_llvm9 platform: os: linux arch: arm steps: - name: build_and_test image: arm32v7/ubuntu:18.04 environment: POCL_BUILDING: 1 POCL_CACHE_DIR: /tmp/cache POCL_MAX_PTHREAD_COUNT: 2 DEBIAN_FRONTEND: noninteractive commands: - apt update - apt upgrade -y - apt install -y build-essential ocl-icd-libopencl1 cmake git pkg-config libclang-9-dev clang-9 llvm-9 make ninja-build ocl-icd-libopencl1 ocl-icd-dev ocl-icd-opencl-dev libhwloc-dev zlib1g zlib1g-dev clinfo dialog apt-utils - mkdir build - cd build - cmake -DENABLE_FP64=OFF -DDEVELOPER_MODE=ON -DLLC_HOST_CPU=cortex-a15 -DLLC_TRIPLE=armv7l-unknown-linux-gnueabihf -DEXTRA_KERNEL_FLAGS="-mfloat-abi=hard -mfpu=neon" -DCMAKE_INSTALL_PREFIX=/usr -G Ninja .. - ninja - ninja install - clinfo - ctest -j32 --output-on-failure -L internal -E test_issue_757 pocl-1.8/.gitattributes000066400000000000000000000033701413131625300152140ustar00rootroot00000000000000doc/benchmark_results/ export-ignore doc/buildbot/ export-ignore doc/luxmark.txt export-ignore doc/handling_loops.txt export-ignore doc/LAUNDRY export-ignore doc/notes*.txt export-ignore doc/spir-todo.txt export-ignore doc/ttasim_kernel_capturer.txt export-ignore doc/www/ export-ignore examples/piglit/sorted_ref* export-ignore # this one is ~20M examples/Rodinia/pathfinder.stdout export-ignore lib/kernel/amdgcn export-ignore lib/kernel/convert_type.py export-ignore lib/kernel/libclc-pocl/gen_vectorize.rb export-ignore lib/kernel/sleef/gen* export-ignore scripts/pocl-build.in export-ignore scripts/pocl-kernel.in export-ignore scripts/pocl-workgroup.in export-ignore tests/kernel/test_convert_type.py export-ignore tests/kernel/test_convert_type.sh export-ignore tests/testsuite* export-ignore tests/amdsdk.at export-ignore tests/atlocal.in export-ignore tools/gdb-breakpoints export-ignore tools/scripts/benchmark_barchart.py export-ignore tools/scripts/benchmark.py export-ignore tools/scripts/devel-configure export-ignore # should we include these ? android/ export-ignore windows/ export-ignore pocl-1.8/.gitignore000066400000000000000000000021101413131625300143000ustar00rootroot00000000000000build*/* *~ **/.deps **/.libs *.bc *.la *.lo *.o Makefile Makefile.in aclocal.m4 autom4te.cache config.h config2.h config.h.in config.log config.status config/ar-lib config/compile config/config.guess config/config.sub config/depcomp config/install-sh config/ltmain.sh config/missing configure doc/sphinx/build/ examples/EinsteinToolkit/EinsteinToolkit examples/example1/example1 examples/example1-spir32/example1-spir32 examples/example1-spir64/example1-spir examples/example2/example2 examples/example2a/example2a examples/scalarwave/scalarwave examples/standalone/standalone.h examples/trig/trig include/arm/types.h include/cellspu/types.h include/powerpc/types.h include/powerpc64/types.h include/x86_64/types.h install-paths.h libtool lib/CL/kernellib_hash.* m4/libtool.m4 m4/ltoptions.m4 m4/ltsugar.m4 m4/ltversion.m4 m4/lt~obsolete.m4 ocl-vendors/pocl-tests.icd pocl.icd pocl.pc bin/poclcc stamp-h1 # these are created by Qt Creator pocl.config pocl.creator pocl.creator.user pocl.files pocl.includes CMakeLists.txt.includes CMakeLists.txt.user /examples/CLBlast/CLBlast/ pocl-1.8/.mailmap000066400000000000000000000046261413131625300137470ustar00rootroot00000000000000Carlos Sánchez de La Lama Carlos Sánchez de La Lama Carlos Sánchez de La Lama Carlos Sanchez de La Lama Clement Leger Clément Daniel Sanders Daniel Sanders META COSY Erik Schnetter Erik Schnetter Erik Schnetter <> Heikki Kultala Heikki Kultala Heikki Kultala heikki-llvm-svn-testing Heikki Kultala hkultala@cs.tut.fi <> Hugo van der Wijst Kalle Raiskila Kalle Raiskila Kalle Raiskila Kalle Kalle Raiskila Kalle Raiskila <> Kalle Raiskila kraiskil@debian <> Kalle Raiskila kraiskil Kalle Raiskila kraiskil Krishnaraj Raghavendra Bhat Krishnaraj Bhat Krishnaraj Raghavendra Bhat Krishnaraj R Bhat Matias Koskela Michal Babej Michal Babej Pekka Jääskeläinen Pekka Jääskeläinen Pekka Jääskeläinen Pekka Jääskeläinen Pekka Jääskeläinen Pekka Jääskeläinen Pekka Jaaskelainen Ville Korhonen Ville Korhonen vkorhonen Vincent Danjean Vladimir Guzma pocl-1.8/.travis.yml000066400000000000000000000041541413131625300144330ustar00rootroot00000000000000sudo: false language: c++ os: - linux matrix: exclude: - os: linux include: - os: linux docker: true compiler: gcc env: LLVM_VERSION=6.0 HWLOC_VERSION=1.11 DOCKERFILE=Ubuntu/16_04.64bit - os: osx compiler: clang env: LLVM_VERSION=6.0 HWLOC_VERSION=2.0 CONDA=True before_install: - if [ "$TRAVIS_OS_NAME" = "osx" ] ; then export MINICONDA_FILE="Miniconda3-latest-MacOSX-x86_64.sh"; else export MINICONDA_FILE="Miniconda3-latest-Linux-x86_64.sh"; fi - if [ "$CONDA" = "True" ] ; then echo "Installing a fresh version of Miniconda."; MINICONDA_URL="https://repo.continuum.io/miniconda"; curl -L -O "${MINICONDA_URL}/${MINICONDA_FILE}"; bash $MINICONDA_FILE -b; source $HOME/miniconda3/bin/activate root; conda config --add channels conda-forge; conda install --yes --quiet llvmdev=${LLVM_VERSION}.* clangdev=${LLVM_VERSION}.* libhwloc=${HWLOC_VERSION}.*; export LD_LIBRARY_PATH=$HOME/miniconda3/lib:$LD_LIBRARY_PATH; fi - if [ "$CONDA" = "True" ] ; then export MY_CMAKE_PREFIX_PATH="-DCMAKE_PREFIX_PATH=$HOME/miniconda3" ; fi - if [ "$TRAVIS_OS_NAME" = "osx" ] ; then export MY_CMAKE_ICD_OFF="-DENABLE_ICD=OFF" ; fi - if [ "$TRAVIS_OS_NAME" = "osx" ] && [ "$CXX" = "clang++" ] ; then MY_CMAKE_LIBCXX="-DCMAKE_CXX_FLAGS=-stdlib=libc++ -DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath,$HOME/miniconda3/lib" ; fi - if [ "$TRAVIS_PULL_REQUEST" = "false" ]; then export GIT_COMMIT="$TRAVIS_COMMIT"; else export GH_PR=$TRAVIS_PULL_REQUEST; fi script: - if [ "$CONDA" = "True" ] ; then mkdir build && cd build; cmake .. -DCMAKE_INSTALL_PREFIX=/tmp $MY_CMAKE_PREFIX_PATH $MY_CMAKE_LIBCXX $MY_CMAKE_ICD_OFF; make -j2 && make check && make install; fi - if [ ! "$DOCKERFILE" = "" ] ; then docker build -f tools/docker/$DOCKERFILE . --build-arg GH_PR=$GH_PR --build-arg GH_SLUG=$TRAVIS_REPO_SLUG --build-arg GH_COMMIT=$GIT_COMMIT --build-arg LLVM_VERSION=$LLVM_VERSION -t travis_ci_pocl_test; docker run `docker images -q travis_ci_pocl_test`; fi notifications: email: false pocl-1.8/CHANGES000066400000000000000000001001511413131625300133070ustar00rootroot000000000000001.8 unreleased ============== Notable User Facing Changes --------------------------- - support for LLVM 13 - CMake: Inter-Procedural Optimization is enabled on code of runtime library (libpocl.so is compiled with -flto on systems that support it). - LTTng tracing improved - more command types are traced, and also some synchronous API calls (like clCreateBuffer) are traced. - poclcc, tests and examples can be disabled with CMake options - Valgrind support improved by making Valgrind aware of pocl's reference counting of cl_* objects - kernels which are called by kernels are now force-inlined - Support for NetBSD. - Support for Unix systems without libdl. - PoCL can now (optionally) respond to SIGUSR2 by printing some live debug information. - improved SPIR support for CUDA devices Notable Bug Fixes ----------------- - Fixed a potential crash on Unix systems without sysfs mounted. - Fixed compilation errors when building on macOS. - Fixed POCL_FAST_INIT macro; POCL_INIT_LOCK must be invoked with only one argument. - Fix bin/poclcc to not depend on OpenCL 2.0 symbols - Fixed miscompilation in kernel loops with multiple conditionals with barriers in them. Other ----- - Add cmake options PARALLEL_COMPILE_JOBS, PARALLEL_LINK_JOBS to use ninja's seperate compile and link job pools. - Improve memory architecture, buffer migration and allocation. Buffers are now allocated on a device when first used (previously each buffer was allocated on every device in context). - the single global LLVMContext was replaced with multiple LLVMContexts, one per OpenCL cl_context. OpenCL code can now be compiled in parallel when using separate cl_contexts. This feature is disabled by default since it significantly slowed down PyOpenCL. This should be resolved by separating LLVM compilation in their own threads in the future. - a new OpenCL extension was added to PoCL: cl_pocl_content_size. The extension allows the user to give optimization hint to PoCL, which will be used internally by PoCL to optimize buffer transfers between multiple devices. 1.7 May 2021 ============ Notable User Facing Changes --------------------------- - Support for LLVM 12. - support for cross-compiling PoCL - Added support for the cl_nv_device_attribute_query extension on CUDA devices. - improved support for SPIR-V binaries when using CPU device: - improved local variables support - OpenCL 2.0 atomics are now supported - work_group_barrier, to_local/to_global are implemented - Implemented OpenCL 3.0 features - clGetDeviceInfo queries - CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES (Minimal implementation) - CL_DEVICE_ATOMIC_FENCE_CAPABILITIES (Minimal implementation) 1.6 December 2020 ================= Notable User Facing Changes --------------------------- - Support for LLVM 11. - CUDA kernels using constant __local blocks are now ABI incompatible with previous release. Users need to delete their pocl cache. - SINGLE_LLVM_LIB CMake option removed. Instead reintroduce STATIC_LLVM and PoCL now relies on llvm-config to provide correct shared/static libraries for linkage. - improved debugging of OpenCL code with CPU driver. See doc/sphinx/source/debug.rst Optimizations ------------- - Improved the PTX code generation for __local blocks. Previously constant __local blocks and __local arguments were using one dynamic shared CUDA memory block with offsets computed at runtime. Now if there is no __local arguments, separate static shared CUDA memory is used. If there are __local arguments, the constant __local blocks are indexed with compile time constants. This improves the performance due to better SASS code generation because it avoids what appears to be a pointer aliasing issue. Running SHOC benchmark GEMM with size class 4 on a NVIDIA Titan X gives the following performance improvements. sgemm_n: 23.2% sgemm_t: 18.5% sgemm_n_pcie: 23.3% sgemm_t_pcie: 19.5% dgemm_n: 51.4% dgemm_t: 2.8% dgemm_n_pcie: 51.6% dgemm_t_pcie: 6.9% - Improved handling of command queue barriers: Previously an internal event was added from all previous commands to it, even with in-order queues, causing slowdown with applications that have a lot of commands. Now additional events are omitted in in-order queues. Measured with the PolyBench OpenCL Gramschmidt kernel, execution time went down from 44 seconds to around 0.5 seconds. Notable Bug Fixes ----------------- - Fix LLVM loop vectorizing remarks printing (POCL_VECTORIZER_REMARKS=1). - Fix an issue in which the loop vectorizer produced code with invalid memory reads (issue #757). - Fix compilation error when CMake option SINGLE_LLVM_LIB is set to OFF. - Fix wrongly output dlerror (Undefined symbol) after dlopen, caused by a previous libdl call in an ICD loader (issue #877). - [CPU] safety margin of pocl's CPU driver local memory allocation has been reduced to a much more reasonable value - [CPU] buffer size for OpenCL printf is now configurable with PRINTF_BUFFER_SIZE CMake variable - [CPU] local memory size reported is now the size of last level of non-shared data cache (usually L1 or L2 depending on CPU), if hwloc can determine it. Security -------- - Added a build parameter HARDENING_ENABLE that applies hardening flags present in some modern compilers to produce a more secure libpocl.so with the trade-off in performance. 1.5 April 2020 ============== Notable User Facing Changes --------------------------- - Support for LLVM 10. - POCL_TRACE_EVENT, POCL_TRACE_EVENT_OPT and POCL_TRACE_EVENT_FILTER environment variables were renamed to POCL_TRACING, POCL_TRACING_OPT and POCL_TRACING_FILTER, respectively. - Refactored the implementation of convert_T() OpenCL functions to better meet autovectorization criteria under LLVM, thus utilizing device's SIMD ISA capabilities where available; e.g. on an ARM64 Cortex-A72 convert_int8(short8) is 5.5x faster now when measured in a tight loop. - A lot of fixes. Usability --------- - A simple per-kernel execution time statistics atexit() for quick and easy low-impact per-device profiling purposes (relies on event time stamps purely). It can be enabled by setting POCL_TRACING env to 'cq'. 1.4 September 2019 ================== Highlights ---------- - pocl-accel: An example driver and support infrastructure for OpenCL 1.2 CL_DEVICE_TYPE_CUSTOM hardware accelerators which implement a memory mapped control interface. - Improved SPIR and SPIR-V support. clCreateProgramWithIL() implemented, Kernel library (for CPU target) support for SPIR-mangling improved Kernel Compiler --------------- - Specialize work-group functions for global offset (0,0,0). - A pocl installation with clang, hwloc statically linked in is now relocatable. - Clang/LLVM versions older than 6.0 are no longer supported. - Create specialized work-group functions for small (defined by a device driver specific limit) grid dimensions. - Add Range Metadata to various ID queries etc. to improve vectorizing index computation to smaller lane widths and other optimizations. - Passes only the launched kernel to work-group generation and code gen, thus speeding up the compilation process. Misc. ----- - hsa-native: Downgraded the advertised version to 1.2 which is closer to the truth (fixes OCLTest of Glow). - hsa-native: Add support for byval (struct) argument passing. - hsa-native: Allow offsets in block copy. Notable Internal Changes ------------------------ - Allow devices to utilize the ROCm-Device-Libs ocml builtins for their builtin libraries if seen fit. https://github.com/RadeonOpenCompute/ROCm-Device-Libs/tree/master/ocml was mirrored in lib/kernel and made it easy to cherry pick implementations to targets' kernel libary. - libltdl is replaced with libdl on UNIX platforms. Notable Bug Fixes ----------------- - Fix a race condition in device initialization, which caused issues in applications that cause reinitialization of pocl device drivers (appeared in Glow's OCLTest). Device Driver Specific ---------------------- - hsa-native: Downgraded the advertised version to 1.2 which is closer to the truth (fixes OCLTest of Glow). - hsa-native: Add support for byval (struct) argument passing. - hsa-native: Allow offsets in block copy. 1.3 April 2019 ============== Highlights ---------- - Support for Clang/LLVM 8.0. - Support ICD on OSX. Misc. ----- - Ability to have size_t (basically derived from the largest supported object) smaller than CL_ADDRESS_BITS. This is an unofficial optional extension as the OpenCL standard mandates it to be the same. - POCL_EXTRA_BUILD_FLAGS can be used to force add extra build flags such as '-g' to all clBuildProgram() calls. - Allow building pocl without CPU backend drivers. When set to off, CPU will not appear in the list of OpenCL devices reported by pocl. Controllable via ENABLE_HOST_CPU_DEVICES=off cmake option. - Build logs are now produced also for illegal options passed to the kernel build e.g. via the options parameter of clBuildProgram(). - hsa-native: Device side printf-support and alternative < 1.2 non-standard C99 printf exposing support. - pocl's binary format has been slightly updated (changes are listed in the top of pocl_binary.c file) to version 7, but pocl can still read also the previous version 6 format. - Allow local-size-specializing also SPMD-targeted kernels to enable compile time optimization of code depending on the local dimensions. - Support older GLIBC versions. - HSA: Initial experimental support for native-ISA compilation on top of HSA runtime. Tested and works currently only on phsa-runtime. Can be enabled with ENABLE_HSAIL=off cmake option. - Add option to disable installing of OpenCL headers. Notable Bug Fixes ----------------- - Fixed kernel debug symbol generation. - HSA: fix kernel caching. - Fix issue #661: clCreateImage doesn't fail with unsupported image type. - Fix issue #668: handle non-kernel functions with barriers properly. - Fix issue #671: Unable to build pocl with CUDA support with LLVM 7 and host GCC 8.2. - Fix image format/size handling with multiple devices in context. - Fix padding issue with context arrays that manifested as unaligned access errors after autovectorization. Notable Internal Changes ------------------------ - Add group ids as hidden kernel arguments instead of digging them up from the context struct. - Ability to generate the final binary via separate assembly text + assembler call. Useful for supporting LLVM targets without direct binary emission support. - Use Clang's Driver API for launching the final linkage step. This way we utilize the toolchain registry with correct linkage steps required for the target at hand. - Add 'device_aux_functions' to the driver layer attributes. This can be used to retain device-specific functions required by the target across the pruning of unused globals. - The "default kernels" hack which was used to store kernel metadata, has been removed. Kernel metadata are now stored only once, in cl_program struct; every new cl_kernel structs holds only a pointer. - Major 'pthread' CPU driver cleanup. - Major Workgroup.cc cleanup. 1.2 September 2018 ================== - LLVM 7.0 is now supported. - Version 2.0 of hwloc library is supported. - device-side printf; more consistent printf output. 1.1 March 2018 ============== Highlights ---------- - LLVM 6.0 is now supported. - Reintroduced experimental SPIR LLVM bitcode support to pocl. Requires LLVM 5 or newer. New experimental feature: SPIR-V support; requires a working llvm-spirv converter. Currently only loading of SPIR-V binaries by pocl is supported, not output. See docs/features.rst for more details. - Refactored pocl cache now does away with LLVM file locks and relies entirely on system calls for proper synchronization. Additionally, cache file writes are now fdatasync()ed. - Improved kernel compilation time (with cold cache). Improvement depends on sources - it's bigger for large programs with many kernels. Luxmark now compiles in seconds instead of dozens of seconds; internal pocl tests run in 30-50% less time. - LLVM Scalarizer pass is now only called for SPMD devices. Performance change varies across tests, but positive seems to outweigh negative. - Implemented uninitialization callback for device drivers. This is triggered when the last cl_context is released. Currently only the CPU driver implements the callback. - Removed libpoclu from installed files; this library contains helpers for pocl's internal tests, and from installed files was only used by poclcc, which has been updated to not rely on it. - POCL_MAX_WORK_GROUP_SIZE is now respected by all devices. This variable limits the reported maximum WG sizes & dimensions; tuning max WG size may improve performance due to cache locality improvement. - CL_PLATFORM_VERSION now contains much more information about how pocl was built. - For users still building with Vecmathlib, performance should be back to levels of pocl 0.14 (there was a huge drop caused by a change in -O0 optimization level of LLVM 5.0). - Improved support for ARM and ARM64 architectures. All internal tests now pass (on Cortex-A53 and Cortex-A15), although it's still far from full conformance. 1.0 December 2017 ================= Highlights ---------- - Improved automatic local work-group sizing on kernel enqueue, taking into account standard constraints, SIMD width for vectorization as well as the number of compute units available on the device. - Support for NVIDIA GPUs via a new CUDA backend (currently experimental). - Removed support for BBVectorizer. - LLVM 5.0 is now supported. - A few build options have been added for distribution builds, see README.packaging. - Somewhat improved scalability in the CPU driver. CPUs with many cores and programs using a lot of WIs with small kernels can run somewhat faster. - The OpenCL 1.2 conformance tests now pass with selected CPUs. There are some caveats though - see the documentation. - When conformance is enabled, some kernel library functions might be slower than in previous releases. - Pocl now reports OpenCL 1.2 instead of 2.0, except HSA enabled builds. - Updated format of pocl binaries, which is NOT backwards compatible. You'll need to clean any kernel caches. - Fixed several memory leaks. - Unresolved symbols (missing/misspelled functions etc) in a kernel will result in error in clBuildProgram() instead of pocl silently ignoring them and then aborting at dlopen(). - New env variable POCL_MEMORY_LIMIT= limits the Global memory size reported by pocl to gigabytes. - New env variable POCL_AFFINITY (defaults to 0): if enabled, sets the affinity of each CPU driver pthread to a single core. - Improved AVX512 support (with LLVM 5.0). Note that even with LLVM 5.0 there are still a few bugs (see pocl issue #555); AVX512 + LLVM 4.0 are a lot more broken, and probably not worth trying. - POCL_DEBUG env var has been revamped. You can now limit debuginfo to these categories (or their combination): all,error,warning,general memory,llvm,events,cache,locking,refcounts,timing,hsa,tce,cuda The old setting POCL_DEBUG=1 now equals error+warning+general. 0.14 April 2017 =============== Highlights ---------- - Support for LLVM/Clang versions 3.9 and 4.0. Version 3.9 was the first release to include all frontend features for OpenCL 2.0. - Ability to build pocl in a mode where online compilation is not supported to run in hosts without LLVM and binaries compiled offline e.g. using poclcc. - pocl's binary format now can contain all the necessary bits to execute the programs on a host without online compiler support. - Initial support for out-of-order execution execution of command queues. - It's now possible to cross-compile pocl when building an offline compiler build. - New driver api extension to support out-of-order and asynchronous devices/drivers. - Pthread and HSA drivers are now fully asynchronous. - CMake now the only supported build system, autotools removed. - LTTng tracing support OpenCL Runtime/Platform API support ----------------------------------- - implemented clEnqueueBarrierWithWaitList - implemented clEnqueueMigrateMemObjects Other ----- - Support for reqd_work_group_size attribute in the binary format and poclcc: Generates a static sized work-group function to help optimizations such as autovectorization. - HSA: added support for phsa (https://github.com/HSAFoundation/phsa) - A lot of bug and memory leak fixes. Some notable ones: - Issue #1, passing aggregates as kernel value parameters, can be now fixed with an LLVM patch. - Now it's possible to build pocl without using the fake address space ids, which were a source of many annoying issues. 0.13 April 2016 =============== Highlights ----------- - Support for LLVM/Clang 3.8 - initial (partial) OpenCL 2.0 support (only Shared Virtual Memory and Atomics are supported ATM) - CMake build system almost on parity with autotools (TCE, all external testsuites) - CMake build is now able to build multiple kernel libraries for different CPUs and let pocl select a suitable one at runtime Bugfixes --------- - clEnqueueCopyImage() now works properly - improved file locking (much less disk access to kernel cache) - Address spaces of structs are handled properly Other ------ - removed custom buffer alloc from pthread device - removed IBM Cell support - removed support for older LLVM versions (before 3.7) - significantly higher performance with a lot of small kernel enqueues (due to improved file locking) - vecmathlib now supports AVX2 - a few more HSA kernel library implementations: l/tgamma, erf(c), hypot - implemented OpenCL 2.0 API calls: clEnqueueSVM*, clSVMalloc/free, clEnqueueFillBuffer, clSetKernelExecInfo, clSetKernelArgSVMPointer, clCreateCommandQueueWithProperties - no device side queues yet - OpenCL 2.0 atomics (C11 atomics subset) for x86-64 and HSA - new testsuites: AMD SDK 3.0, Intel SVM - New CMake-only testsuites: ASL, clBLAS, clFFT, arrayfire - more debugging info (timing, mem stats) - ansi colors with POCL_DEBUG=1 if the output is a terminal 0.12 October 2015 =============== Highlights ---------- - Support for HSA-compliant devices (kernel agents). The GPU of AMD Kaveri now works through pocl with a bunch of test cases in the AMD SDK 2.9 example suite. - New and improved kernel cache system that enables caching kernels with #includes. - Support for LLVM/Clang 3.7. - Little endian MIPS32 now passes almost all pocl testsuite tests. OpenCL Runtime/Platform API support ----------------------------------- - Transferred buffer read/write/copy offset calculation to device driver side. - these driver api functions have changed; got offset as a new argument. - Maximum allocation is not limited to 1/4th of total memory size. - Maximum image dimensions grow to fit maximum allocation. - clGetDeviceInfo() reports better information about CPU vendor and cache. - experimental clCreateSubDevices() for pthread CPU device. OpenCL C Builtin Function Implementations ----------------------------------------- - Implemented get_image_dim(). Bugfixes -------- - Avoid infinite loops when users recycle an event waiting list. - Correctly report the base address alignment. - Lots of others. Misc ---- - Tests now using new cl2.hpp, removing dependency on OpenGL headers 0.11 March 2015 =============== Highlights ---------- - Support for LLVM/Clang 3.6 - Kernel compiler cache. - Android support. Kernel compiler --------------- - Do not add implicit barriers to kernels without WG barriers to avoid WI context data overheads. - Setting the POCL_VECTORIZER_REMARKS env to 1 prints out LLVM vectorizer remarks during kernel compilation. - Implicit work-group vectorizer improvements. - POCL_VECTORIZER_REMARKS: When set to 1, prints out remarks produced by the loop vectorizer of LLVM during kernel compilation. OpenCL Runtime/Platform API support ----------------------------------- - Minimal initial implementation for clCreateSubDevices() Bugfixes -------- - Fix falsely detecting operations with side-effects (especially atomic operations) as uniform. This caused deadlock/race situations due to illegal implicit barrier injection. - Fix several reference counting issues. - Memory leak fixes. - ARM/openSUSE build fixes. - Plenty of CMake fixes. New test/example cases ---------------------- - Several Halide examples using its OpenCL backend added. - CloverLeaf Misc. ----- - The old BBVectorizer forked WIVectorizer removed due to bit rot and the general hackiness of it. - Experimental Windows/Visual Studio support (in progress). - Initial support for MIPS architecture (with known issues). - Runtime debug printouts that can be enabled via POCL_DEBUG=1. - Streamlined the buffer allocation and fixed several issues with it. 0.10 September 2014 =================== This lists only the most interesting changes. Please refer to the version control log for a full listing. Highlights ---------- - Support for LLVM/Clang 3.5 - Support for building using CMake (experimental with known issues). Bugfixes -------- - TCE: kernel building was broken when running pocl from install location - thread-safety (as required since OpenCL 1.1) improved Kernel compiler --------------- - Final code generation now done via LLVM API calls instead of calling the llc binary. - Sensible linking of functions from the monolithic kernel built-in library. Major compilation speedup for smaller kernels. OpenCL C Builtin Function Implementations ----------------------------------------- - Improved support for halfN functions. - ilogb and ldexp available with vecmathlib OpenCL Runtime/Platform API support ----------------------------------- - Implement clCreateKernelsInProgram() - OpenCL-C shuffle() and shuffle2() implementation added - Device probing modified to allow for device driver to detect device during runtime. POCL_DEVICES still supported. - Checks in clSetKernelArgs() for argument validity - Checks in clEnqueueNDRange() for arguments to be all set - Implement clGetKernelArgInfo() - clEnqueueCopyImage() Misc ---- - ViennaCL testsuite updated to 1.5.1 0.9 January 2014 ================ This lists only the most interesting changes. Please refer to the version control log for a full listing. Highlights ---------- - Major improvements to the kernel compiler's vectorization performance. Twofold speedups in some benchmarks - Support for most of the piglit CL tests OpenCL Runtime/Platform API support ----------------------------------- - clCreateImage2D() and clCreateImage3D() implementation moved to clCreateImage() - Image creation now uses clCreateBuffer() - clBuildProgram: Propagate the supported -cl* compiler options to Clang's OpenCL frontend. - clFinish: works with commands with event wait lists. - Preliminary support for OpenCL 2.0 blocks - Added support for clEnqueueNativeKernel() Builtin Function Implementations (OpenCL 1.2 Section 6.12) ---------------------------------------------------------- - Refactored read/write_image()-functions to support refactored device image object. (Only functions used by SimpleImage test) - Introduced new macro based implementation for read/write_image()-functions - Added sampler implementation for CLK_ADDRESS_CLAMP and CLK_ADDRESS_CLAMP_TO_EDGE (Only integer coords supported) - Most of the printf() format strings now works. Missing features: - long on 32-bit architectures Performance Improvements ------------------------ - Kernel compiler now tries to avoid replicating uniform variables, this leads to less context data to be saved per work-item and cleaner kernel bitcode for later optimizations - Use a precompiled header for OpenCL C builtin declarations to speed up the kernel compilation - Kernel compiler vectorization optimizations: - Inject implicit barriers both to loop starts and ends to horizontally vectorize the inner loop. - Reduce "peeling" by minimizing the conditional barrier region by injecting implicit barrier close to the branch points for conditional barrier cases. - Breaking of vector datatypes for more efficient loop vectorization. - Support LLVM 3.4 parallel loop metadata. Misc ---- - Explicitly specify the target architecture/CPU for the kernel complier. - Kernel compiler frontend defaults to implementation using LLVM API directly instead of the scripts. - __OPENCL_VERSION__ defined to 120 - poclu: helpers for converting between the C float and OpenCL cl_half types - clEnqueueNativeKernel implemented - Static and cmake-builds of LLVM can now be used. Bugfixes -------- - Correct isequal, isnan, and similar routines 0.8 August 2013 ================ This lists only the most interesting changes. Please refer to the version control log for a full listing. Overall ------- - Added support for LLVM/Clang 3.3. - Dropped support for LLVM/Clang v3.1. - Removed the depedency on llvm-ld (which was copied to pocl-llvm-ld to pocl tree). Now uses llvm-link instead. - Project renamed to Portable Computing Language (pocl). - Luxmark v2.0 now works. - x86_64 can now use efficient math built-in function implementations from the vecmathlib project to avoid libm calls and to exploit the SIMD instructions more efficiently in case of vector datatypes in the kernel. - Parallelize kernel inner loops "horizontally", if possible. This converts possibly sequential inner kernel loops to parallel loops by effectively performing "loop interchange" of the work-item loop and the kernel's inner loop. - Added VexCL tests to the test suite. All but one of them work with pocl. Major bugfixes -------------- - Fixed passing NULL as a buffer argument to clSetKernelArg (this time with a regression test added). - Constant BitCast expressions broken to variables to avoid crashing when copying a kernel with casts on automatic local pointers. - Fixes for i386/i686. Tested on Pentium4/Ubuntu 10.04 LTS. - Lots of API error checking added (found by the Piglit testing suite). - Fixed bug in select producing incorrect results when the third conditional argument is an unsigned scalar or vector. - Replaced deprecated SSE 4.1 assembly mneunomics in x86-64 min/max kernel functions that have since been removed in more recent versions of gas and llvm-as. - SPIR/LLVM IR 'byval' attributes are now handled correctly on kernel function arguments, allowing for structs and oversized vectors to be passed in with value semantics. - Fixed to work with the latest Khronos OpenCL headers for 1.2. Some issues fixed with the new cl.hpp. - The ICD dispatch table was too small which might have caused "interesting" behavior when calling the later functions in the table and not using ocl-icd as the dispatcher. - Several kernel compiler bugs fixed. - A multithreaded host application could free the same object multiple times due to a race issue. Platform Layer implementations (OpenCL 1.2 Chapter 4) ----------------------------------------------------- - Return correctly formatted CL_DEVICE_VERSION and CL_DEVICE_OPENCL_C_VERSION. - clGetDeviceInfo: Use the 'cpufreq' sys interface of Linux for querying the CPU clock frequency, if available. The OpenCL Runtime (OpenCL 1.2 Chapter 5) ----------------------------------------- - clGetEventInfo: Querying the command type, command queue, and the reference count of the event. Builtin Function Implementations (OpenCL 1.2 Section 6.12) ---------------------------------------------------------- - convert_type* builtins now generated with a Python script by Victor Oliveira. - length() fingerprint was assuming two arguments instead of one. - The kernel bitcode library is now optimized when built in pocl. Speeds up kernel optimization for cases which use the kernel functions a lot. - Fix mul_hi() implementation ICD --- - Fixed pocl tests to work when executed through the Khronos supplied icd loader (needs a patch applied to the loader be able to override the .icd search path). Misc. ----- - Fix to the helper script search logic: Search from the BUILDDIR only if env POCL_BUILDING is defined. Otherwise search from PKGDATADIR first, then from the PATH. - Fixed memory leaks in clCreateContext* and clCreateKernel - Ensured that stored arguments are adequately aligned in clSetKernelArg and clEnqueueNDRangeKernel. 0.7 January 2013 ================= This lists only the most interesting changes. Please refer to the version control log for a full listing. Overall ------- - Support for LLVM 3.2. - Multi-WI work group functions can be now generated using loops which are only partially unrolled. Reduces code size explosion with large WGs in comparison to the full replication method. - PowerPC 64 support (tested on Cell/Debian Sid/PS3). - PowerPC 32 support (tested on Cell/Debian Sid/PS3). - ARM v7 support (on Linux) - Beginning of Cell SPU support (very experimental!). - Most of the AMD APP SDK OpenCL examples now work and have been added to the pocl test suite. - Most of the Parboil benchmark cases added to the test suite. Kernel Compiler Passes ---------------------- - Several miscompilations and compiler crashes fixed. - Multiple bugs fixed from the work group vectorizer. - Updated metadata format pocl uses to pass information to vectorization and TCE backend to simplify debuging. - Kernel pointer arguments are not always marked 'noalias' (restricted). Doing this previously was a specs misunderstanding. - ConstantGEPs to static variables generated from automated locals caused problems. Now converting them to normal GEPs using a pass from the SAFECode project. OpenCL Platform Layer implementations (OpenCL 1.2 Chapter 4) ------------------------------------------------------- - clGetDeviceInfo now uses the hwloc lib for device property queries. Many new queries implemented. - clGetKernelInfo (initial implementation) - clGetMemObjectInfo (initial implementation) - clGetCommandQueueInfo (initial implementation) - clReleaseDevice - clRetainDevice - Proper freeing of devices in clReleaseContext The OpenCL Runtime Implementations (OpenCL 1.2 Chapter 5) --------------------------------------------------------- - clBuildProgram: support for passing options to the compiler. - clEnqueueMarker OpenCL C Builtin Function Implementations (OpenCL 1.2 Section 6.12) ------------------------------------------------------------------- - Atomic Functions (6.12.11) - get_global_offset() was not linked correctly Framework --------- - Made it possible to override the .cl -> .bc build command called by clBuildProgram per device. Device Drivers -------------- - pthread/basic: * extract CPU clock frequency from /proc/cpuinfo, if available * return cl_khr_fp64 if doubles supported by the CPU - ttasim: support for explicitly calling custom/special operations through the vendor extensions API Misc. ----- - Fixes for MacOSX builds. - Fixed passing NULL as a buffer argument to clSetKernelArguments - Fixed a major bug when launching the same kernel multiple times: the arguments very not copied to the command object. - Fixed several issues with ICD, it is now considered stable to be used by default. 0.6 August 2012 ================= Kernel library -------------- - Added initial optimized kernel library for X86_64/SSE. - Preliminary support for ARM architectures on Linux (briefly tested on MeeGo/Nokia N9). Pthread device driver --------------------- - Multithreading at the work group granularity using pthreads. - Tries to figure out the optimal maximum number of threads for the system based on the available hardware threads. Currently works only in Linux using the /proc/cpuinfo interface. - Region-based customized memory allocator for speeding up buffer allocations. Kernel compiler --------------- - Most of the tricky work group barrier cases (barriers inside for-loops etc) now supported. - Support for local variables, also automatic locals. - Reuse previous compilation results, if available. - Automatic vectorization of work groups (multiple work items in parallel). Miscellaneous ------------- - Installable Client Driver (icd) support. - Event profiling support (incomplete, works only for kernel and buffer read/write/map/unmap events). Known issues ------------ - Non-pointer struct kernel arguments fail due to varying ABIs * https://bugs.launchpad.net/pocl/+bug/987905 - Produces always "fully unrolled" chains of work items for work groups causing code size explosion for large WGs. pocl-1.8/CMakeLists.txt000066400000000000000000001760261413131625300150720ustar00rootroot00000000000000#============================================================================= # CMake build system files # # Copyright (c) 2014-2018 pocl developers # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # #============================================================================= cmake_minimum_required(VERSION 3.3 FATAL_ERROR) project(pocl) set(CMAKE_PROJECT_DESCRIPTION "pocl is a portable OpenCl runtime.") set(LATEST_KNOWN_CXX_STD_VERSION "20") set(SUPPORTED_CXX_STD_VERSION "11") option(ENABLE_LATEST_CXX_STD "Upgrade C++ standard version to ${LATEST_KNOWN_CXX_STD_VERSION}. Required to get rid of unused variables warnings in compilers not supporting [[gnu::*]] attributes. Can bring other benefits, including performance and efficiency ones. Before a pull request build with this disabled." OFF) if(ENABLE_LATEST_CXX_STD) set(CMAKE_CXX_STANDARD "${LATEST_KNOWN_CXX_STD_VERSION}") else() set(CMAKE_CXX_STANDARD "${SUPPORTED_CXX_STD_VERSION}") endif() # Fix behavior of CMAKE_CXX_STANDARD when targeting macOS. if(POLICY CMP0025) cmake_policy(SET CMP0025 NEW) endif() include(CheckCCompilerFlag) include(CPackComponent) macro(pass_through_cpack_vars) get_cmake_property(cpackVarsToPassthrough VARIABLES) foreach(varName ${cpackVarsToPassthrough}) if(varName MATCHES "^CPACK_DEBIAN_") message(STATUS "${varName}") set("${varName}" "${${varName}}" PARENT_SCOPE) endif() endforeach() endmacro() # don't allow implicit function declarations if(UNIX) if((CMAKE_C_COMPILER_ID STREQUAL "GNU") OR (CMAKE_C_COMPILER_ID STREQUAL "Clang")) add_compile_options("$<$:-Werror=implicit-function-declaration>") check_c_compiler_flag("-Wincompatible-pointer-types" HAVE_WARN_INCOMPATIBLE_POINTER_TYPES) if (HAVE_WARN_INCOMPATIBLE_POINTER_TYPES) add_compile_options("$<$:-Wincompatible-pointer-types>") endif() add_compile_options("-Wno-ignored-attributes") else() message(WARNING "Don't know how to forbid this compiler from allowing implicit function declarations.") endif() endif() set(MAJOR_VERSION 1) set(MINOR_VERSION 8) set(VERSION_SUFFIX_FIXED_TEXT "") set(VERSION_SUFFIX "${VERSION_SUFFIX_FIXED_TEXT}") set(VERSION_STRING ${MAJOR_VERSION}.${MINOR_VERSION}${VERSION_SUFFIX}) set(POCL_VERSION_BASE ${VERSION_STRING}) # required b/c SHARED libs defaults to ON while OBJECT defaults to OFF set(CMAKE_POSITION_INDEPENDENT_CODE ON) # CMake doesn't add "-pie" by default for executables (CMake issue #14983) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pie") enable_testing() ##################################################### if(EXISTS "${CMAKE_SOURCE_DIR}/.git") set(DEFAULT_BUILD_TYPE "Debug") else() set(DEFAULT_BUILD_TYPE "RelWithDebInfo") endif() if(NOT CMAKE_BUILD_TYPE) message(STATUS "Setting build type to '${DEFAULT_BUILD_TYPE}' as none was specified.") set(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}" CACHE STRING "Choose the type of build." FORCE) # Set the possible values of build type for cmake-gui set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") endif() ################################################################################## macro(set_expr VAR) if(${ARGN}) set(${VAR} 1) else() set(${VAR} 0) endif() endmacro() find_program(BASH "bash") find_program(MAKE_PROGRAM NAMES "make") find_program(GIT_CMD "git") set_expr(HAVE_GIT GIT_CMD) if(HAVE_GIT) execute_process(COMMAND "${GIT_CMD}" "rev-parse" "HEAD" OUTPUT_VARIABLE GIT_COMMIT RESULT_VARIABLE EXITCODE WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" OUTPUT_STRIP_TRAILING_WHITESPACE) endif() if(HAVE_GIT AND (VERSION_SUFFIX MATCHES "pre") AND (EXITCODE EQUAL 0)) message(STATUS "Pocl source Git commit: ${GIT_COMMIT}") execute_process(COMMAND "${GIT_CMD}" "branch" "--contains" "${GIT_COMMIT}" OUTPUT_VARIABLE GIT_BRANCH RESULT_VARIABLE EXITCODE WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" OUTPUT_STRIP_TRAILING_WHITESPACE) message(STATUS "Pocl source Git branch: ${GIT_BRANCH}") execute_process(COMMAND "${GIT_CMD}" describe "--always" "--long" "--all" "${GIT_COMMIT}" OUTPUT_VARIABLE GIT_DESCRIBE RESULT_VARIABLE EXITCODE WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" OUTPUT_STRIP_TRAILING_WHITESPACE) string(REPLACE "heads/" "" GIT_DESCRIBE "${GIT_DESCRIBE}") message(STATUS "Pocl source Git describe: ${GIT_DESCRIBE}") set(VERSION_SUFFIX "${VERSION_SUFFIX_FIXED_TEXT} ${GIT_DESCRIBE}") set(VERSION_STRING ${MAJOR_VERSION}.${MINOR_VERSION}${VERSION_SUFFIX}) set(POCL_VERSION_FULL "${VERSION_STRING}") else() message(STATUS "No git and/or not a prerelease -> not adding git commit to version.") set(POCL_VERSION_FULL "${POCL_VERSION_BASE}") endif() set(CPACK_PACKAGE_NAME pocl) set(CPACK_PACKAGE_VENDOR pocl) set(CPACK_PACKAGE_VERSION_MAJOR "${MAJOR_VERSION}") set(CPACK_PACKAGE_VERSION_MINOR "${MINOR_VERSION}") set(CPACK_PACKAGE_VERSION "${MAJOR_VERSION}.${MINOR_VERSION}~${VERSION_SUFFIX_FIXED_TEXT}") if(HAVE_GIT) set(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION}.${GIT_COMMIT}") endif() ################################################################################## if(DEFINED OCS_AVAILABLE) message(WARNING "The OCS_AVAILABLE option is deprecated, since it actually meant 'LLVM available', but LLVM is not the only way to get compiler support in a device. Please use ENABLE_LLVM in future if you want to enable/disable building pocl against LLVM.") set(ENABLE_LLVM ${OCS_AVAILABLE} CACHE BOOL "build against LLVM" FORCE) else() option(ENABLE_LLVM "Build pocl with LLVM. Default is ON." ON) endif() option(STATIC_LLVM "If ON, link to static LLVM libraries. OFF (default) = link to shared LLVM libraries." OFF) option(BUILD_SHARED_LIBS "ON=Build shared libs, OFF=static libs" ON) option(POCL_DEBUG_MESSAGES "Enable debug messages from pocl (useful for OpenCL developers), must be enabled at runtime, with env var POCL_DEBUG" ON) option(ENABLE_LOADABLE_DRIVERS "Enable drivers to be dlopen()-ed at pocl runtime, instead of being linked into libpocl" ON) option(ENABLE_HSA "Enable the HSA base profile runtime device driver" OFF) option(ENABLE_CUDA "Enable the CUDA device driver for NVIDIA devices" OFF) option(KERNEL_CACHE_DEFAULT "Default value for the kernel compile cache. If disabled, pocl will still use the kernel cache, but will delete cachefiles on exit. You can still enable keeping the files it at runtime with an env var." ON) option(POCL_ICD_ABSOLUTE_PATH "Use absolute path in pocl.icd" ON) option(ENABLE_POCL_BUILDING "When OFF, env var POCL_BUILDING has no effect. Defaults to ON" ON) option(VISIBILITY_HIDDEN "Build with -fvisibility=hidden -fvisibility-inlines-hidden" OFF) if(VISIBILITY_HIDDEN) add_compile_options(-fvisibility=hidden) add_compile_options($<$:-fvisibility-inlines-hidden>) endif() # Ninja Job Pool support set(PARALLEL_COMPILE_JOBS "" CACHE STRING "Define the maximum number of concurrent compilation jobs (Ninja only).") if(PARALLEL_COMPILE_JOBS) if(CMAKE_GENERATOR STREQUAL "Ninja") set_property(GLOBAL APPEND PROPERTY JOB_POOLS compile_job_pool=${PARALLEL_COMPILE_JOBS}) set(CMAKE_JOB_POOL_COMPILE compile_job_pool) endif() endif() set(PARALLEL_LINK_JOBS "" CACHE STRING "Define the maximum number of concurrent link jobs (Ninja only).") if(CMAKE_GENERATOR STREQUAL "Ninja") if(PARALLEL_LINK_JOBS) set_property(GLOBAL APPEND PROPERTY JOB_POOLS link_job_pool=${PARALLEL_LINK_JOBS}) set(CMAKE_JOB_POOL_LINK link_job_pool) endif() endif() if(NOT CMAKE_GENERATOR STREQUAL "Ninja" AND (PARALLEL_COMPILE_JOBS OR PARALLEL_LINK_JOBS)) message(WARNING "Job pooling is only available with Ninja generators.") endif() #### these are mostly useful for pocl developers option(ENABLE_EXTRA_VALIDITY_CHECKS "Enable extra checks on cl_* object validity" OFF) option(DEVELOPER_MODE "This will SIGNIFICANTLY slow down pocl (but speed up its compilation). Only turn on if you know what you're doing." OFF) option(USE_POCL_MEMMANAGER "Enables custom memory manager. Except for special circumstances, this should be disabled." OFF) option(EXAMPLES_USE_GIT_MASTER "If enabled, some of the external testsuites in examples/ will try to use sources from Git master, instead of releases. This may result in failure to build or run the examples" OFF) option(ENABLE_HOST_CPU_DEVICES "Add host CPUs as OpenCL devices (basic and pthread)." ON) option(ENABLE_HOST_CPU_DEVICE_CL20 "Enable reporting OpenCL 2.0 for the CPU device" OFF) option(ENABLE_ACCEL_DEVICE "Enable the generic hardware accelerator device driver." OFF) option(ENABLE_POCLCC "Build poclcc. Defaults to ON" ON) option(ENABLE_TESTS "Build tests. Defaults to ON" ON) option(ENABLE_EXAMPLES "Build examples. Defaults to ON" ON) ########################################################## if(CMAKE_SIZEOF_VOID_P EQUAL 8) set(HOST_DEVICE_ADDRESS_BITS 64) elseif(CMAKE_SIZEOF_VOID_P EQUAL 4) set(HOST_DEVICE_ADDRESS_BITS 32) else() message(FATAL_ERROR "Cannot figure out HOST_DEVICE_ADDRESS_BITS") endif() # printf buffer size, in KB if(NOT DEFINED PRINTF_BUFFER_SIZE) set(PRINTF_BUFFER_SIZE 16384 CACHE STRING "printf buffer size, in KB") endif() ################################################################################## if(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64le") set(POWERPC 1) set(POWERPC64LE 1) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc") set(POWERPC 1) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "mips") set(MIPS 1) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "(arm|aarch64)") set(ARM 1) if(HOST_DEVICE_ADDRESS_BITS MATCHES "32") set(ARM32 1) else() set(ARM64 1) endif() elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "(i.86|AMD64|x86_64|amd64)") set(X86 1) if(HOST_DEVICE_ADDRESS_BITS MATCHES "32") set(I386 1) else() set(X86_64 1) endif() endif() if(CMAKE_MAJOR_VERSION GREATER 2) include(ProcessorCount) ProcessorCount(CORECOUNT) if(CORECOUNT LESS 1) set(CORECOUNT 1) endif() else() set(CORECOUNT 1) endif() message(STATUS "Host CPU cores: ${CORECOUNT}") ###################################################################################### function(rename_if_different SRC DST) if(EXISTS "${DST}") file(MD5 "${SRC}" OLD_MD5) file(MD5 "${DST}" NEW_MD5) if(NOT OLD_MD5 STREQUAL NEW_MD5) file(RENAME "${SRC}" "${DST}") endif() else() file(RENAME "${SRC}" "${DST}") endif() endfunction() ###################################################################################### # Recent versions of CMake can make use of Ninja's console pool to avoid # buffering the output of particular commands. if(CMAKE_VERSION VERSION_LESS 3.2.0) set(COMMAND_USES_TERMINAL) else() set(COMMAND_USES_TERMINAL USES_TERMINAL) endif() if(UNIX) include(GNUInstallDirs) else() if (WIN32) set(${CMAKE_INSTALL_LIBDIR} "lib") set(${CMAKE_INSTALL_DATADIR} "share") set(${CMAKE_INSTALL_INCLUDEDIR} "include") set(${CMAKE_INSTALL_BINDIR} "bin") message(STATUS "Setting installation destination on Windows to: ${CMAKE_INSTALL_PREFIX}") else() message(FATAL_ERROR "System not UNIX nor WIN32 - not implemented yet") endif() endif() # for libpocl.so set(POCL_INSTALL_PUBLIC_LIBDIR "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}" CACHE PATH "POCL public libdir") # for llvmopencl.so set(POCL_INSTALL_PRIVATE_LIBDIR "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/pocl" CACHE PATH "POCL private libdir") # for pocl.icd if(UNIX AND (NOT CMAKE_CROSSCOMPILING) AND (CMAKE_INSTALL_PREFIX STREQUAL "/usr")) set(POCL_INSTALL_ICD_VENDORDIR "/etc/OpenCL/vendors" CACHE PATH "POCL ICD file destination") else() set(POCL_INSTALL_ICD_VENDORDIR "${CMAKE_INSTALL_PREFIX}/etc/OpenCL/vendors" CACHE PATH "POCL ICD file destination") endif() # for kernel-.bc set(POCL_INSTALL_PRIVATE_DATADIR "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_DATADIR}/pocl" CACHE PATH "POCL private datadir") # for poclu.h set(POCL_INSTALL_PUBLIC_HEADER_DIR "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}" CACHE PATH "POCL public header dir") # for _kernel.h et al set(POCL_INSTALL_PRIVATE_HEADER_DIR "${POCL_INSTALL_PRIVATE_DATADIR}/include" CACHE PATH "POCL private header dir") # for pocl-standalone et al set(POCL_INSTALL_PUBLIC_BINDIR "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}" CACHE PATH "POCL public bindir") # for PoclConfig.cmake & stuff set(POCL_INSTALL_CMAKE_CONFIG_DIR "${POCL_INSTALL_PRIVATE_LIBDIR}" CACHE PATH "Installation directory for CMake files") # TODO maybe use output of pkg-config --variable=pc_path pkg-config ? set(POCL_INSTALL_PKGCONFIG_DIR "${POCL_INSTALL_PUBLIC_LIBDIR}/pkgconfig" CACHE PATH "Destination for pocl.pc") if(APPLE) set(CMAKE_MACOSX_RPATH ON) set(POCL_INSTALL_OPENCL_HEADER_DIR "${POCL_INSTALL_PUBLIC_HEADER_DIR}/OpenCL" CACHE PATH "POCL header dir for OpenCL headers") else() set(POCL_INSTALL_OPENCL_HEADER_DIR "${POCL_INSTALL_PUBLIC_HEADER_DIR}/CL" CACHE PATH "POCL header dir for OpenCL headers") endif() ###################################################################################### ###################################################################################### set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") option(HARDENING_ENABLE "Enable hardening against various attacks. May worsen performance" OFF) if(HARDENING_ENABLE) include(Hardening) else() function(harden target) endfunction() endif() find_package(PkgConfig MODULE) find_package(Hwloc) if(NOT Hwloc_FOUND) message(STATUS "hwloc package not found") set(ENABLE_HWLOC OFF CACHE BOOL "Hwloc" FORCE) else() if("${Hwloc_VERSION}" VERSION_LESS "1.0") message(FATAL_ERROR "Hwloc version must be >= 1.0 !") endif() message(STATUS "Hwloc_VERSION ${Hwloc_VERSION}") message(STATUS "Hwloc_LIBRARIES ${Hwloc_LIBRARIES}") message(STATUS "Hwloc_INCLUDE_DIRS ${Hwloc_INCLUDE_DIRS}") set(ENABLE_HWLOC ON CACHE BOOL "Hwloc" FORCE) endif() include(sanitizers) ###################################################################################### if(NOT HOST_CPU_CACHELINE_SIZE) set(CL_SIZE 0) if(UNIX OR CMAKE_HOST_SYSTEM_NAME MATCHES "Linux|Darwin") find_program(GETCONF "getconf") if(GETCONF) execute_process(COMMAND "getconf" "LEVEL1_DCACHE_LINESIZE" RESULT_VARIABLE RES OUTPUT_VARIABLE CL_SIZE) if(RES) message(WARNING "getconf exited with nonzero status!") set(CL_SIZE 0) else() # getconf may in rare conditions return "undefined" value if (CL_SIZE STREQUAL "undefined\n") set(CL_SIZE 0) endif() # getconf sometimes just returns zero if(NOT (CL_SIZE EQUAL 0)) string(STRIP "${CL_SIZE}" CL_SIZE) message(STATUS "L1D Cacheline size detected: ${CL_SIZE}") set(HOST_CPU_CACHELINE_SIZE "${CL_SIZE}" CACHE STRING "L1D Cacheline size") endif() endif() endif() endif() if(CL_SIZE EQUAL 0) message(WARNING "Unable to detect cacheline size - assuming 64byte cacheline, override with -DHOST_CPU_CACHELINE_SIZE= (Note: this is merely used for optimization, at worst pocl will be slightly slower)") set(HOST_CPU_CACHELINE_SIZE "64" CACHE STRING "L1D Cacheline size") endif() endif() ###################################################################################### # # Find executables to few tools required during build # find_program(PATCH_EXEC NAMES patch HINTS ENV PATH ) find_program(XARGS_EXEC NAMES xargs HINTS ENV PATH ) if(NOT PATCH_EXEC) message(FATAL_ERROR "Could not find patch command.") endif() if(NOT XARGS_EXEC) message(FATAL_ERROR "Could not find xargs command.") endif() ###################################################################################### if (ENABLE_LLVM) include(LLVM RESULT_VARIABLE RES) if(NOT RES) message(FATAL_ERROR "Could not load LLVM.cmake") endif() if(ENABLE_HOST_CPU_DEVICES) if(NOT DEFINED HOST_DEVICE_BUILD_HASH) if(KERNELLIB_HOST_CPU_VARIANTS STREQUAL "distro") set(HOST_DEVICE_BUILD_HASH "${LLC_TRIPLE}") else() set(HOST_DEVICE_BUILD_HASH "${LLC_TRIPLE}-${LLC_HOST_CPU}") endif() endif() if(INTEL_SDE_AVX512) set(HOST_CPU_FORCED 1 CACHE INTERNAL "CPU is forced by user" FORCE) set(LLC_HOST_CPU "skylake-avx512" CACHE STRING "The Host CPU to use with llc" FORCE) endif() endif() else() if(ENABLE_HOST_CPU_DEVICES AND (NOT DEFINED HOST_DEVICE_BUILD_HASH)) message(FATAL_ERROR "For compiler-less builds of CPU backend, you must define HOST_DEVICE_BUILD_HASH") endif() endif() ###################################################################################### if(ENABLE_HSA) include(HSA RESULT_VARIABLE RES) if(NOT RES) message(FATAL_ERROR "Could not load HSA.cmake") endif() endif() ###################################################################################### if (NOT MSVC) find_program(LINK_COMMAND NAMES ld${CMAKE_EXECUTABLE_SUFFIX} HINTS ENV PATH ) else() set(LINK_COMMAND "${CLANGXX}") endif() ###################################################################################### # if variable FEATURE_X isn't defined, sets it to DEFAULT_FEATURE_X; # also, if DEFAULT_FEATURE_X is 0, prevents FEATURE_X being 1 # since it takes DEFAULT_FEATURE_X=0 to mean "FEATURE_X is unavailable" macro(setup_cached_var VARNAME DESCRIPTION DOCS_FEATURE_IS_UNAVAILABLE DOCS_REQUESTED_DISABLING_FEATURE) if(DEFINED ${VARNAME}) set(_CACHED "(cached)") else() set(_CACHED "") set(${VARNAME} ${DEFAULT_${VARNAME}}) endif() if(${VARNAME} AND (NOT ${DEFAULT_${VARNAME}})) message(WARNING "${DOCS_FEATURE_IS_UNAVAILABLE}") set(${VARNAME} 0) set(_CACHED "(override)") endif() if((NOT ${VARNAME}) AND ${DEFAULT_${VARNAME}} ) message(STATUS "${DOCS_REQUESTED_DISABLING_FEATURE}") endif() if(${VARNAME}) message(STATUS "${DESCRIPTION} ${_CACHED}: 1") else() message(STATUS "${DESCRIPTION} ${_CACHED}: 0") endif() endmacro() ###################################################################################### if(UNIX) include(CheckCSourceCompiles) include(CheckSymbolExists) # don't allow implicit function declarations set(CMAKE_REQUIRED_FLAGS "-std=c99") if (CMAKE_SYSTEM_NAME MATCHES "Linux") set(CMAKE_REQUIRED_LIBRARIES "rt") endif () CHECK_SYMBOL_EXISTS("fork" "sys/types.h;unistd.h" HAVE_FORK) CHECK_SYMBOL_EXISTS("fsync" "unistd.h" HAVE_FSYNC) CHECK_SYMBOL_EXISTS("sleep" "unistd.h" HAVE_SLEEP) CHECK_SYMBOL_EXISTS("getrlimit" "sys/time.h;sys/resource.h" HAVE_GETRLIMIT) CHECK_SYMBOL_EXISTS("utime" "sys/types.h;utime.h" HAVE_UTIME) CHECK_SYMBOL_EXISTS("ANNOTATE_HAPPENS_BEFORE" "valgrind/helgrind.h" HAVE_VALGRIND) set(CMAKE_REQUIRED_DEFINITIONS "-D_POSIX_C_SOURCE=200809L") CHECK_SYMBOL_EXISTS("futimens" "fcntl.h;sys/stat.h" HAVE_FUTIMENS) set(CMAKE_REQUIRED_DEFINITIONS "-D_POSIX_C_SOURCE=200112L") CHECK_SYMBOL_EXISTS("posix_memalign" "stdlib.h" HAVE_POSIX_MEMALIGN) set(CMAKE_REQUIRED_DEFINITIONS "-D_POSIX_C_SOURCE=199309L") CHECK_SYMBOL_EXISTS("clock_gettime" "time.h" HAVE_CLOCK_GETTIME) CHECK_SYMBOL_EXISTS("fdatasync" "unistd.h" HAVE_FDATASYNC) set(CMAKE_REQUIRED_DEFINITIONS "-D_BSD_SOURCE" "-D_DEFAULT_SOURCE") CHECK_SYMBOL_EXISTS("mkdtemp" "stdlib.h;unistd.h" HAVE_MKDTEMP) CHECK_SYMBOL_EXISTS("mkstemps" "stdlib.h;unistd.h" HAVE_MKSTEMPS) CHECK_SYMBOL_EXISTS("vfork" "sys/types.h;unistd.h" HAVE_VFORK) set(CMAKE_REQUIRED_DEFINITIONS "-D_GNU_SOURCE") CHECK_SYMBOL_EXISTS("mkostemps" "stdlib.h" HAVE_MKOSTEMPS) set(CMAKE_REQUIRED_LIBRARIES "dl") CHECK_SYMBOL_EXISTS("dladdr" "dlfcn.h" HAVE_DLADDR) unset(CMAKE_REQUIRED_DEFINITIONS) unset(CMAKE_REQUIRED_FLAGS) unset(CMAKE_REQUIRED_LIBRARIES) else() set(HAVE_CLOCK_GETTIME 0) set(HAVE_FDATASYNC 0) set(HAVE_FSYNC 0) set(HAVE_SLEEP 0) set(HAVE_MKOSTEMPS 0) set(HAVE_MKSTEMPS 0) set(HAVE_MKDTEMP 0) set(HAVE_FUTIMENS 0) set(HAVE_FORK 0) set(HAVE_GETRLIMIT 0) set(HAVE_VFORK 0) set(HAVE_UTIME 0) set(HAVE_DLADDR 0) set(HAVE_VALGRIND 0) endif() ###################################################################################### if(UNIX AND ENABLE_LLVM AND HAVE_DLADDR) option(ENABLE_RELOCATION "make libpocl relocatable" ON) else() message(STATUS "Relocation not available") set(ENABLE_RELOCATION OFF CACHE INTERNAL "libpocl relocatable" FORCE) endif() if(ENABLE_RELOCATION) file(RELATIVE_PATH POCL_INSTALL_PRIVATE_DATADIR_REL ${POCL_INSTALL_PUBLIC_LIBDIR} ${POCL_INSTALL_PRIVATE_DATADIR}) message(STATUS "Private Datadir Relative path: ${POCL_INSTALL_PRIVATE_DATADIR_REL}") install(FILES ${CLANG_OPENCL_HEADERS} DESTINATION "${POCL_INSTALL_PRIVATE_DATADIR}/include" COMPONENT "dev") endif() file(RELATIVE_PATH POCL_INSTALL_PRIVATE_LIBDIR_REL ${POCL_INSTALL_PUBLIC_LIBDIR} ${POCL_INSTALL_PRIVATE_LIBDIR}) ###################################################################################### # IPO support for runtime library if(POLICY CMP0069) cmake_policy(SET CMP0069 NEW) endif() if(NOT DEFINED DEFAULT_ENABLE_IPO) set(DEFAULT_ENABLE_IPO OFF CACHE BOOL "IPO" FORCE) if(NOT CMAKE_VERSION VERSION_LESS "3.9") include(CheckIPOSupported) check_ipo_supported(RESULT IPO OUTPUT IPO_OUTPUT) set(DEFAULT_ENABLE_IPO ${IPO} CACHE BOOL "IPO" FORCE) message(STATUS "Compiler supports IPO: ${DEFAULT_ENABLE_IPO}") #message(STATUS "IPO check message: ${IPO_OUTPUT}") endif() endif() setup_cached_var(ENABLE_IPO "Enable Link-Time Optimization (IPO) while building pocl runtime" "Requested build with IPO, but IPO is not available" "IPO available, but requested build without it") ###################################################################################### option(ENABLE_SLEEF "Use SLEEF for kernel library" ON) option(ENABLE_CONFORMANCE "Enable conformance to OpenCL standard. Disabling this may enable slightly faster kernel library functions (at a price of range/precision). Note that enabling this does not guarantee conformance (depends on hardware)" ON) if(ENABLE_CONFORMANCE AND (NOT ENABLE_SLEEF)) message(FATAL_ERROR "conformance needs enabled SLEEF") endif() ###################################################################################### # fully device-side printf on devices which support it (only CPU backend ATM), disabled by default. # this requires 128bit integer support because of the code in "errol" float-to-string conversion routine # the output is not 100% compatible with glibc's printf (%f with large argument prints zeroes after # last significant digit - 16-18th digit or so, unlike glibc which prints digits up to decimal point). if(CLANG_HAS_128B_MATH) option(ENABLE_POCL_FLOAT_CONVERSION "Enable use of pocl's own float-to-decimal conversion code in OpenCL printf(). Defaults to OFF (uses snprintf from C library). Requires compiler-rt." OFF) else() set(ENABLE_POCL_FLOAT_CONVERSION OFF CACHE INTERNAL "pocl's own float-to-decimal conversion code") endif() unset(FLOATCONV_FLAG) if(ENABLE_POCL_FLOAT_CONVERSION) # force link with Clang; otherwise not needed on x86 but in this case we need rtlib set(FLOATCONV_FLAG "-DENABLE_POCL_FLOAT_CONVERSION") endif() ###################################################################################### # for kernel code, disable PIC & stack protector # # it seems PIC and stack-protector defaults somehow depend on # clang build type or environment. PIC causes problems with # constant addrspace variables, and stack protector likely slows # down the kernels, so it needs to be determined whether it's worth # the trouble. set(DEFAULT_KERNEL_CL_FLAGS "-xcl -fno-stack-protector -fPIC ${FLOATCONV_FLAG}") set(DEFAULT_KERNEL_C_FLAGS "-xc -std=c11 -D__CBUILD__ -fno-math-errno -fno-stack-protector -fPIC ${FLOATCONV_FLAG}") set(DEFAULT_KERNEL_CXX_FLAGS "-xc++ -std=c++11 -fno-stack-protector -fPIC ${FLOATCONV_FLAG}") set(EXTRA_KERNEL_FLAGS "" CACHE STRING "Extra arguments to all kernel compilation commands (defaults to empty)") set(EXTRA_KERNEL_CL_FLAGS "" CACHE STRING "Extra arguments to kernel CL compiler (defaults to empty)") set(EXTRA_KERNEL_CXX_FLAGS "" CACHE STRING "Extra arguments to kernel CXX compiler (defaults to empty)") set(EXTRA_KERNEL_C_FLAGS "" CACHE STRING "Extra arguments to kernel C compiler (defaults to empty)") set(KERNEL_CXX_FLAGS "${DEFAULT_KERNEL_CXX_FLAGS}${EXTRA_KERNEL_FLAGS}${EXTRA_KERNEL_CXX_FLAGS}") set(KERNEL_CL_FLAGS "${DEFAULT_KERNEL_CL_FLAGS}${EXTRA_KERNEL_FLAGS}${EXTRA_KERNEL_CL_FLAGS}") set(KERNEL_C_FLAGS "${DEFAULT_KERNEL_C_FLAGS}${EXTRA_KERNEL_FLAGS}${EXTRA_KERNEL_C_FLAGS}") ###################################################################################### if(UNIX) if(APPLE) # MacOS ld outputs useless warnings like # ld: warning: -macosx_version_min not specificed, assuming 10.7 # suppress them with -w. set(DEFAULT_HOST_LD_FLAGS "-dynamiclib -w -lm") elseif(ANDROID) set(DEFAULT_HOST_LD_FLAGS "-L/system/lib/ -shared -ldl -lc /system/lib/crtbegin_so.o /system/lib/crtend_so.o") else() set(DEFAULT_HOST_LD_FLAGS "-shared") endif() set(LIBMATH "-lm") elseif(WIN32) set(LIBMATH) endif() if(CLANG_NEEDS_RTLIB) set(DEFAULT_HOST_LD_FLAGS "${DEFAULT_HOST_LD_FLAGS} --rtlib=compiler-rt") endif() ###################################################################################### if(UNIX) if(APPLE) # TODO MACOSX_BUNDLE target prop set(ICD_LD_FLAGS "-single_module") else() set(ICD_LD_FLAGS "-Wl,-Bsymbolic") endif() endif() ###################################################################################### set(SPIRV OFF) if(ENABLE_LLVM AND X86 AND (NOT KERNELLIB_HOST_CPU_VARIANTS STREQUAL "distro")) option(ENABLE_SPIR "Enable SPIR support (default ON when available)" ON) else() set(ENABLE_SPIR OFF CACHE INTERNAL "SPIR enabled" FORCE) endif() if(ENABLE_SPIR) message(WARNING "SPIR support is available but highly experimental; use at your own risk.") if(LLVM_SPIRV AND (EXISTS "${LLVM_SPIRV}")) message(WARNING "SPIR-V support enabled but highly experimental; you must use a llvm-spirv " "converter that produces bitcode FOR YOUR LLVM VERSION. " "E.g. if you're compiling pocl against LLVM 5 then using Khronos' " "llvm-spirv based on LLVM 3.6 branch WILL NOT WORK.") set(SPIRV ON) endif() endif() set(ENABLE_SPIRV ${SPIRV} CACHE INTERNAL "SPIR-V enabled" FORCE) ###################################################################################### add_definitions(-DCL_USE_DEPRECATED_OPENCL_1_0_APIS -DCL_USE_DEPRECATED_OPENCL_1_1_APIS -DCL_USE_DEPRECATED_OPENCL_1_2_APIS -DCL_USE_DEPRECATED_OPENCL_2_0_APIS -DCL_USE_DEPRECATED_OPENCL_2_1_APIS ) add_definitions(-DCL_TARGET_OPENCL_VERSION=300) cpack_add_component("dev") include_directories("include") ###################################################################################### set(HAVE_DLFCN_H OFF CACHE BOOL "dlopen" FORCE) if(WIN32 AND (NOT MINGW)) message(STATUS "Using LoadLibrary/FreeLibrary in Windows, libltdl not needed.") elseif(UNIX) if (CMAKE_CROSSCOMPILING AND (NOT ENABLE_HOST_CPU_DEVICES) AND (NOT ENABLE_HSA)) message(STATUS "Cross-compiling without CPU/HSA devices -> skipping LIBDL search") else() find_library(DL_LIB "dl") find_file(DL_H "dlfcn.h") if(DL_LIB AND DL_H) message(STATUS "libdl found") else() message(STATUS "libdl not found, assuming dlopen() is in libc") set(DL_LIB "") endif() if(DL_H) get_filename_component(DL_H_INCLUDE_DIR "${DL_H}" DIRECTORY) string(FIND "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}" "${DL_H_INCLUDE_DIR}" LTPOSITION) # include the directory of dlfcn.h, if its not in the default system include dirs # also when cross-compiling this includes /usr/include, which screws things up if((LTPOSITION LESS "0") AND (NOT CMAKE_CROSSCOMPILING)) include_directories("${DL_H_INCLUDE_DIR}") endif() set(HAVE_DLFCN_H ON CACHE BOOL "dlfcn.h" FORCE) else() message(FATAL_ERROR "Could not find dlfcn.h!") endif() endif() else() message(STATUS "Unknown OS, don't know how to load a dynamic library") endif() ###################################################################################### set(CMAKE_THREAD_PREFER_PTHREAD TRUE) set(THREADS_PREFER_PTHREAD_FLAG TRUE) find_package(Threads REQUIRED) if(CMAKE_VERSION VERSION_GREATER "3.0.99") set(PTHREAD_LIBRARY Threads::Threads) else() set(PTHREAD_LIBRARY ${CMAKE_THREAD_LIBS_INIT}) endif() ###################################################################################### # LTTNG if(UNIX) if(PKG_CONFIG_EXECUTABLE) pkg_check_modules(LTTNG_UST lttng-ust>=2.7) endif() if(LTTNG_UST_FOUND) set(HAVE_LTTNG_UST 1) else() set(HAVE_LTTNG_UST 0) endif() endif() ###################################################################################### if(NOT DEFINED DEFAULT_ENABLE_ICD) if (MSVC) message(STATUS "Building ICD not yet supported on Windows.") set(DEFAULT_ENABLE_ICD 0 CACHE INTERNAL "Going to use ICD loader") else() # pkg-config doesn't work with cross-compiling if(PKG_CONFIG_EXECUTABLE) pkg_check_modules(OCL_ICD ocl-icd>=1.3) endif() if (NOT OCL_ICD_FOUND) find_path(OCL_ICD_INCLUDE_DIR NAMES ocl_icd.h ) find_library(OCL_ICD_LIBRARIES NAMES OpenCL ) if(OCL_ICD_INCLUDE_DIR AND OCL_ICD_LIBRARIES) set(OCL_ICD_FOUND 1) endif() endif() if(OCL_ICD_FOUND) set(HAVE_OCL_ICD 1 CACHE INTERNAL "ICL library is ocl-icd") set(OPENCL_FOUND 1 CACHE INTERNAL "opencl ICD/library found") # duh, why doesn't ocl-icd set this in its .pc file ?? separate_arguments(OCL_LDFLAGS UNIX_COMMAND "${OCL_ICD_LDFLAGS}") list(APPEND OCL_LDFLAGS "OpenCL") set(OPENCL_LIBRARIES "${OCL_LDFLAGS}" CACHE INTERNAL "opencl ICD/library link flags") set(DEFAULT_ENABLE_ICD 1 CACHE INTERNAL "ICD loader availability") else() set(HAVE_OCL_ICD 0 CACHE INTERNAL "OCL library is ocl-icd") unset (OPENCL_FOUND CACHE) # fallback to other ICD loaders message(STATUS "ocl-icd not found -> trying fallback ICD implementations") if(PKG_CONFIG_EXECUTABLE) pkg_check_modules(OPENCL OpenCL>=1.2) endif() if(NOT OPENCL_FOUND) find_library(OPENCL_LIBRARIES OpenCL) # version check the found library if(OPENCL_LIBRARIES) set(CMAKE_REQUIRED_LIBRARIES "${OPENCL_LIBRARIES}") include(CheckFunctionExists) unset (OPENCL_FOUND CACHE) CHECK_FUNCTION_EXISTS("clEnqueueFillImage" OPENCL_FOUND) endif() endif() if(OPENCL_FOUND) # no ocl-icd, but libopencl message(STATUS "libOpenCL (unknown ICD loader) found") set(DEFAULT_ENABLE_ICD 1 CACHE INTERNAL "ICD loader availability") else() message(STATUS "No ICD loader of any kind found (or its OpenCL version is <1.2)") # no ocl-icd, no libopencl set(DEFAULT_ENABLE_ICD 0 CACHE INTERNAL "no ICL loader found availability") endif() endif() endif() endif() setup_cached_var(ENABLE_ICD "Using an ICD loader" "Requested build with icd, but ICD loader not found! some examples will not work.." "ICD loader found, but requested build without it") if(ENABLE_ICD) # only meaningful to link tests with ocl-icd set(TESTS_USE_ICD ${HAVE_OCL_ICD}) set(POCL_LIBRARY_NAME "pocl") else() set(TESTS_USE_ICD 0) set(POCL_LIBRARY_NAME "OpenCL") endif() message(STATUS "Run tests with ICD: ${TESTS_USE_ICD}") ###################################################################################### if(INSTALL_OPENCL_HEADERS) message(STATUS "Install POCL's OpenCL headers: ${INSTALL_OPENCL_HEADERS}") elseif(DEFINED INSTALL_OPENCL_HEADERS AND NOT INSTALL_OPENCL_HEADERS) message(STATUS "Not installing OpenCL headers.") else() # Undefined = auto -> check find_file(OPENCL_H opencl.h PATH_SUFFIXES CL OpenCL) if(OPENCL_H) message(STATUS "OpenCL.h found, NOT installing our headers") set(IOH 0) else() message(STATUS "OpenCL.h not found, installing our headers") set(IOH 1) endif() set(INSTALL_OPENCL_HEADERS ${IOH} CACHE BOOL "Install POCL's OpenCL headers. (Ones from Khronos should be installed instead)") endif() ###################################################################################### option(PEDANTIC "Compile host library with stricter compiler flags." OFF) if(PEDANTIC) add_compile_options("-Wno-unused-result" "-Werror") # maybe "-Wimplicit" endif() ###################################################################################### set_expr(POCL_KERNEL_CACHE_DEFAULT KERNEL_CACHE_DEFAULT) string(TIMESTAMP POCL_BUILD_TIMESTAMP "%d%m%Y%H%M%S") file(WRITE "${CMAKE_BINARY_DIR}/pocl_build_timestamp.h" "#define POCL_BUILD_TIMESTAMP \"${POCL_BUILD_TIMESTAMP}\"") #################################################################### # Host (basic/pthread) driver setup set(DEFAULT_HOST_CLANG_FLAGS "${CLANG_TARGET_OPTION}${LLC_TRIPLE}") set(DEFAULT_HOST_LLC_FLAGS "-relocation-model=pic -mtriple=${LLC_TRIPLE}") if(ARM) option(ENABLE_FP64 "Enable FP64 on ARM32 - if you have at least VFP support for doubles, you can leave it ON" ON ) else() set(ENABLE_FP64 ON CACHE INTERNAL "FP64, always on except ARM") endif() if(ARM32 OR (LLC_TRIPLE MATCHES "^arm")) if(LLC_TRIPLE MATCHES "gnueabihf") # hardfloat set(DEFAULT_HOST_LLC_FLAGS "${DEFAULT_HOST_LLC_FLAGS} -float-abi=hard") set(DEFAULT_HOST_CLANG_FLAGS "${DEFAULT_HOST_CLANG_FLAGS} -mfloat-abi=hard") set(DEFAULT_HOST_AS_FLAGS "${DEFAULT_HOST_AS_FLAGS} -mfloat-abi=hard") else() # softfloat set(HOST_FLOAT_SOFT_ABI 1) set(DEFAULT_HOST_LLC_FLAGS "${DEFAULT_HOST_LLC_FLAGS} -float-abi=soft") set(DEFAULT_HOST_CLANG_FLAGS "${DEFAULT_HOST_CLANG_FLAGS} -mfloat-abi=soft") set(DEFAULT_HOST_AS_FLAGS "${DEFAULT_HOST_AS_FLAGS} -mfloat-abi=soft") endif() endif() if(CL_DISABLE_HALF) set(DEFAULT_HOST_CLANG_FLAGS "${DEFAULT_HOST_CLANG_FLAGS} -D_CL_DISABLE_HALF") endif() if(ENABLE_HOST_CPU_DEVICE_CL20) set(HOST_DEVICE_CL_VERSION "200") set(HOST_DEVICE_CL_STD "2.0") set(HOST_DEVICE_CL_VERSION_MAJOR 2) set(HOST_DEVICE_CL_VERSION_MINOR 0) else() set(HOST_DEVICE_CL_VERSION "120") set(HOST_DEVICE_CL_STD "1.2") set(HOST_DEVICE_CL_VERSION_MAJOR 1) set(HOST_DEVICE_CL_VERSION_MINOR 2) endif() # define it here, b/c we'll need these both at runtime and buildtime if(X86 OR ARM) set(HOST_DEVICE_EXTENSIONS "cl_khr_byte_addressable_store cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_3d_image_writes") else() # set some conservative defaults set(HOST_DEVICE_EXTENSIONS "cl_khr_global_int32_base_atomics cl_khr_local_int32_base_atomics cl_khr_3d_image_writes") endif() if(ENABLE_SPIR) set(HOST_DEVICE_EXTENSIONS "${HOST_DEVICE_EXTENSIONS} cl_khr_spir") set(DEFAULT_DEVICE_EXTENSIONS "${DEFAULT_DEVICE_EXTENSIONS} cl_khr_spir") endif() if(NOT CL_DISABLE_HALF) set(HOST_DEVICE_EXTENSIONS "${HOST_DEVICE_EXTENSIONS} cl_khr_fp16") set(DEFAULT_DEVICE_EXTENSIONS "${DEFAULT_DEVICE_EXTENSIONS} cl_khr_fp16") endif() # must not be defined in HOST_DEVICE_EXTENSIONS list, because # this extension doesn't exist in official extension list # there is "cles_khr_int64" which indicates int64 support for embedded profiles set(HOST_DEVICE_EXTENSION_DEFINES "-Dcl_khr_int64") if(X86) set(HOST_DEVICE_EXTENSIONS "${HOST_DEVICE_EXTENSIONS} cl_khr_fp64 cl_khr_int64_base_atomics cl_khr_int64_extended_atomics") endif() if(ENABLE_FP64) set(HOST_DEVICE_EXTENSIONS "${HOST_DEVICE_EXTENSIONS} cl_khr_fp64") set(_CL_DISABLE_DOUBLE 0) else() set(_CL_DISABLE_DOUBLE 1) endif() set(TEMP_EXT "${HOST_DEVICE_EXTENSIONS}") separate_arguments(TEMP_EXT) set(TEMP_CLEXT "-Xclang -cl-ext=-all,") foreach(EXT ${TEMP_EXT}) set(HOST_DEVICE_EXTENSION_DEFINES "${HOST_DEVICE_EXTENSION_DEFINES} -D${EXT}") set(TEMP_CLEXT "${TEMP_CLEXT}+${EXT},") endforeach() set(HOST_DEVICE_EXTENSION_DEFINES "${HOST_DEVICE_EXTENSION_DEFINES} ${TEMP_CLEXT}") if(NOT DEFINED KERNELLIB_HOST_CPU_VARIANTS) set(KERNELLIB_HOST_CPU_VARIANTS "native") # else TODO test cpu list for unknown values endif() set(KERNELLIB_HOST_DISTRO_VARIANTS 0) if(KERNELLIB_HOST_CPU_VARIANTS STREQUAL "distro") if(HOST_CPU_FORCED) message(FATAL_ERROR "Cannot build with CPU autodetection distro variants build, and enforce LLC_HOST_CPU at the same time. Please pick one") endif() if(X86_64 OR I386) set(KERNELLIB_HOST_CPU_VARIANTS sse2 ssse3 sse41 avx avx_f16c avx_fma4 avx2 avx512) elseif(POWERPC64LE) set(KERNELLIB_HOST_CPU_VARIANTS pwr8 pwr9) elseif(APPLE AND ARM64) set(KERNELLIB_HOST_CPU_VARIANTS cyclone) else() message(FATAL_ERROR "Don't know what CPU variants to use for kernel library on this platform.") endif() set(KERNELLIB_HOST_DISTRO_VARIANTS 1) endif() #################################################################### set(EXTRA_HOST_AS_FLAGS "" CACHE STRING "Extra parameters to as for code generation in the host. (default: empty)") set(EXTRA_HOST_LD_FLAGS "" CACHE STRING "Extra parameter to compiler to generate loadable module. (default: empty)") set(EXTRA_HOST_CLANG_FLAGS "" CACHE STRING "Extra parameters to clang for host compilation. (default: empty)") set(EXTRA_HOST_LLC_FLAGS "" CACHE STRING "Extra parameters to llc for code generation in the host. (default: empty)") #################################################################### set(HOST_AS_FLAGS "${DEFAULT_HOST_AS_FLAGS} ${EXTRA_HOST_AS_FLAGS}") set(HOST_LD_FLAGS "${DEFAULT_HOST_LD_FLAGS} ${EXTRA_HOST_LD_FLAGS}" ) string(STRIP "${HOST_LD_FLAGS}" HOST_LD_FLAGS_STRIPPED) string(REGEX REPLACE "[\r\n\t ]+" "\", \"" HOST_LD_FLAGS_ARRAY "${HOST_LD_FLAGS_STRIPPED}") # string(REPLACE "###, ###" " oo \", \" oo " HOST_LD_FLAGS_ARRAY "${HOST_LD_FLAGS_ARRAY_1}") set(HOST_CLANG_FLAGS "${DEFAULT_HOST_CLANG_FLAGS} ${EXTRA_HOST_CLANG_FLAGS}") set(HOST_LLC_FLAGS "${DEFAULT_HOST_LLC_FLAGS} ${EXTRA_HOST_LLC_FLAGS}") if(ENABLE_HOST_CPU_DEVICES) set(OCL_TARGETS "host") set(OCL_DRIVERS "basic pthreads") # TODO OCL_KERNEL_TARGET -> CPU_TARGET_TRIPLE # TODO OCL_KERNEL_TARGET_CPU -> OCL_KERNEL_TARGET_CPU set(OCL_KERNEL_TARGET "${LLC_TRIPLE}") #The kernel target triplet. set(OCL_KERNEL_TARGET_CPU "${LLC_HOST_CPU}") #The kernel target CPU variant. set(BUILD_BASIC 1) set(BUILD_PTHREAD 1) endif() # The accel device could be built by default, but it's implemented in C++, # thus requires a C++ compiler, so let's not. if(ENABLE_ACCEL_DEVICE) set(BUILD_ACCEL 1) set(OCL_DRIVERS "${OCL_DRIVERS} accel") endif() if(DEFINED EXTRA_OCL_TARGETS) set(OCL_TARGETS "${OCL_TARGETS} ${EXTRA_OCL_TARGETS}") endif() #################################################################### # Determine which device drivers to build. if(NOT DEFINED DEFAULT_ENABLE_TCE) set(HAVE_TCE 0) set(HAVE_TCEMC 0) if (NOT WITH_TCE) set(WITH_TCE ENV PATH) endif() # THESE are only used in makefile.am & scripts/pocl* set(TCE_TARGET_CLANG_FLAGS "" CACHE STRING "Extra parameters to Clang for TCE compilation.") set(TCE_TARGET_LLC_FLAGS "" CACHE STRING "Extra parameters to LLVM's llc for TCE compilation.") find_program(TCE_CONFIG NAMES "tce-config" HINTS ${WITH_TCE}) find_program(TCECC NAMES "tcecc" HINTS ${WITH_TCE}) find_program(TTASIM NAMES "ttasim" HINTS ${WITH_TCE}) if(TCE_CONFIG AND TCECC AND TTASIM) message(STATUS "Found tcecc + tce-config + ttasim, testing setup") get_filename_component(TCE_BASEDIR "${TCE_CONFIG}" DIRECTORY) find_library(TCE_LIBS "tce" HINTS "${TCE_BASEDIR}/../lib" ENV PATH) if(NOT TCE_LIBS) execute_process(COMMAND "${TCE_CONFIG}" --libs OUTPUT_VARIABLE TCE_LIBS RESULT_VARIABLE RESV1 OUTPUT_STRIP_TRAILING_WHITESPACE) endif() execute_process(COMMAND "${TCE_CONFIG}" --includes OUTPUT_VARIABLE TCE_INCLUDES RESULT_VARIABLE RESV2 OUTPUT_STRIP_TRAILING_WHITESPACE) execute_process(COMMAND "${TCE_CONFIG}" --version OUTPUT_VARIABLE TCE_VERSION RESULT_VARIABLE RESV3 OUTPUT_STRIP_TRAILING_WHITESPACE) execute_process(COMMAND "${TCE_CONFIG}" --cxxflags OUTPUT_VARIABLE TCE_CXXFLAGS RESULT_VARIABLE RESV4 OUTPUT_STRIP_TRAILING_WHITESPACE) execute_process(COMMAND "${TCE_CONFIG}" --prefix OUTPUT_VARIABLE TCE_PREFIX RESULT_VARIABLE RESV5 OUTPUT_STRIP_TRAILING_WHITESPACE) execute_process(COMMAND "${TCE_CONFIG}" --llvm-config OUTPUT_VARIABLE TCE_LLVM_CONFIG RESULT_VARIABLE RESV6 OUTPUT_STRIP_TRAILING_WHITESPACE) execute_process(COMMAND "${TTASIM}" --help OUTPUT_VARIABLE TTASIM_HELP RESULT_VARIABLE RESV9) if(NOT (LLVM_CONFIG STREQUAL TCE_LLVM_CONFIG)) message(WARNING "tce-config returned llvm-config is ${TCE_LLVM_CONFIG} but LLVM_CONFIG given to pocl is ${LLVM_CONFIG}") endif() if (RESV1 OR RESV2 OR RESV3 OR RESV4 OR RESV5) message(WARNING "tce-config: Nonzero exit status, disabling TCE") elseif (RESV9) message(WARNING "ttasim: Nonzero exit status, disabling TCE") else() string(STRIP "${TCE_LIBS}" TCE_LIBS) separate_arguments(TCE_LIBS) string(STRIP "${TCE_INCLUDES}" TCE_INCLUDES) separate_arguments(TCE_INCLUDES) string(STRIP "${TCE_CXXFLAGS}" TCE_CXXFLAGS) separate_arguments(TCE_CXXFLAGS) string(STRIP "${TCE_VERSION}" TCE_VERSION) string(STRIP "${TCE_PREFIX}" TCE_PREFIX) set(TCE_LIBS "${TCE_LIBS}" CACHE INTERNAL "tce-config --libs") set(TCE_INCLUDES "${TCE_INCLUDES}" CACHE INTERNAL "tce-config --includes") set(TCE_VERSION "${TCE_VERSION}" CACHE INTERNAL "tce-config --version") set(TCE_CXXFLAGS "${TCE_CXXFLAGS}" CACHE INTERNAL "tce-config --cxxflags") set(TCE_PREFIX "${TCE_PREFIX}" CACHE INTERNAL "tce-config --prefix") set(HAVE_TCE 1) if(TCE_VERSION MATCHES "trunk") set(HAVE_TCEMC 1) endif() endif() else() message(STATUS "Failed to find tcecc or tce-config, disabling TCE") endif() set(DEFAULT_ENABLE_TCE ${HAVE_TCE} CACHE INTERNAL "TCE available") set(DEFAULT_ENABLE_TCEMC ${HAVE_TCEMC} CACHE INTERNAL "TCEMC available") endif() setup_cached_var(ENABLE_TCE "TCE support" "Requested enabling TCE, but no usable TCE installation found !" "TCE is available, but requested disabling it") if(ENABLE_TCE) set(OCL_DRIVERS "${OCL_DRIVERS} tce") set(OCL_TARGETS "${OCL_TARGETS} tce") if(DEFAULT_ENABLE_TCEMC) set(ENABLE_TCEMC 1) set(OCL_DRIVERS "${OCL_DRIVERS} tcemc") # TCEMC is a "superset" of TCE (lp:tce) features. endif() set(TCE_DEVICE_EXTENSIONS "cl_khr_byte_addressable_store cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_fp16 cl_khr_spir") set(TEMP_EXT "${TCE_DEVICE_EXTENSIONS}") set(TCE_DEVICE_EXTENSION_DEFINES "") separate_arguments(TEMP_EXT) foreach(EXT ${TEMP_EXT}) set(TCE_DEVICE_EXTENSION_DEFINES "${TCE_DEVICE_EXTENSION_DEFINES} -D${EXT}") endforeach() set(TCE_DEVICE_CL_VERSION "120") set(TCE_DEVICE_CL_STD "1.2") if("${LLVM_CXXFLAGS}" MATCHES "-fno-rtti") message(WARNING "TCE is enabled but your LLVM was not built with RTTI. You should rebuild LLVM with 'make REQUIRES_RTTI=1'. See the INSTALL file for more information.") endif() else() set(ENABLE_TCEMC 0) endif() ########################################################## if(ENABLE_HSA) set(OCL_DRIVERS "${OCL_DRIVERS} hsa") if (HSAIL_ENABLED) set(OCL_TARGETS "${OCL_TARGETS} hsail64") endif() # this is for config.h set(HSA_DEVICE_EXTENSIONS "cl_khr_byte_addressable_store cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_fp64 cl_khr_int64_base_atomics cl_khr_int64_extended_atomics") set(HSA_DEVICE_CL_VERSION "120") set(HSA_DEVICE_CL_STD "1.2") find_path(HAVE_HSA_EXT_AMD_H "hsa_ext_amd.h" HINTS "${HSA_INCLUDEDIR}" ENV PATH) endif() ########################################################## if(ENABLE_CUDA) if(NOT "${LLVM_ALL_TARGETS}" MATCHES "NVPTX") message(FATAL_ERROR "CUDA build requested but LLVM does not support NVPTX target!") endif() set(OCL_DRIVERS "${OCL_DRIVERS} cuda") set(OCL_TARGETS "${OCL_TARGETS} cuda") # this is for config.h # TODO unify with autotools set(BUILD_CUDA 1) set(CUDA_DEVICE_EXTENSIONS "cl_khr_byte_addressable_store cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_fp64 cl_khr_int64_base_atomics cl_khr_int64_extended_atomics cl_nv_device_attribute_query") if(ENABLE_SPIR) set(CUDA_DEVICE_EXTENSIONS "${CUDA_DEVICE_EXTENSIONS} cl_khr_spir") endif() set(CUDA_DEVICE_CL_VERSION "120") set(CUDA_DEVICE_CL_STD "1.2") endif() ########################################################## message(STATUS "Building the following device drivers: ${OCL_DRIVERS}") set(BUILDDIR "${CMAKE_BINARY_DIR}") set(SRCDIR "${CMAKE_SOURCE_DIR}") ########################################################## # Checks for library features. if(NOT CMAKE_CROSSCOMPILING) # AC_C_BIGENDIAN include(TestBigEndian) TEST_BIG_ENDIAN(WORDS_BIGENDIAN) else() # Set default as little-endian set(WORDS_BIGENDIAN 0) endif() ########################################################## if (ENABLE_LLVM AND NOT CMAKE_CROSSCOMPILING) CHECK_ALIGNOF("double16" "typedef double double16 __attribute__((__ext_vector_type__(16)));" ALIGNOF_DOUBLE16) else() set(ALIGNOF_DOUBLE16 128) endif() if(ALIGNOF_DOUBLE16 LESS 128) set(ALIGNOF_DOUBLE16 128) endif() set(MAX_EXTENDED_ALIGNMENT "${ALIGNOF_DOUBLE16}") ########################################################## string(TOUPPER "${CMAKE_BUILD_TYPE}" BTYPE) if("${CMAKE_C_FLAGS_${BTYPE}}" MATCHES "DNDEBUG") set(POCL_ASSERTS_BUILD 0) else() set(POCL_ASSERTS_BUILD 1) endif() ########################################################## # cmake docs: # SOVERSION: What version number is this target. # For shared libraries VERSION and SOVERSION can be used to specify the # build version and API version respectively. When building or installing # appropriate symlinks are created if the platform supports symlinks and # the linker supports so-names. If only one of both is specified the # missing is assumed to have the same version number. # # For executables VERSION can be used to specify the build version. # SOVERSION is ignored if NO_SONAME property is set. For shared libraries # and executables on Windows the VERSION attribute is parsed to extract # a "major.minor" version number. These numbers are used as the # image version of the binary. # cmake usage: # SET_TARGET_PROPERTIES(pocl PROPERTIES SOVERSION 1.6.3 VERSION 4) ... # The libtool library version string to use (passed to -version-info). # See: http://www.nondot.org/sabre/Mirrored/libtool-2.1a/libtool_6.html # libpocl.so should get only API additions as we are implementing a standard. # # The library version encodings into the library file name are platform # dependent. Therefore we need to be a bit verbose here for the pocl.icd file # creation to succeed (see Makefile.am). # Chiefly, GNU differs from BSD, and others are untested. See e.g. # http://en.opensuse.org/openSUSE%3aShared_library_packaging_policy#Versioning_schemes # # 0:0:0 == 0.6 # 1:0:0 == 0.7 (not backwards compatible with 0:0:0 due to the ICD) # 2:0:1 == 0.8 (currently backwards compatible with 0.7, thus age = 1). # 3:0:2 == 0.9 (currently backwards compatible with 0.7, thus age = 2). # 4:0:3 == 0.10 (currently backwards compatible with 0.7, thus age = 3). # 5:0:4 == 0.11 (currently backwards compatible with 0.7, thus age = 4). # 6:0:5 == 0.12 (currently backwards compatible with 0.7, thus age = 5). # 7:0:6 == 0.13 (currently backwards compatible with 0.7, thus age = 6). # 8:0:7 == 0.14 (currently backwards compatible with 0.7, thus age = 7). # pocl 1.0 bumped the API version: # 2:0:0 == 1.0 (the libpocl.so will be named libpocl.so.2.0.X ) # 3:0:1 == 1.1 (the libpocl.so will be named libpocl.so.2.1.X ) # 4:0:2 == 1.2 (the libpocl.so will be named libpocl.so.2.2.X ) # 5:0:3 == 1.3 (the libpocl.so will be named libpocl.so.2.3.X ) # 6:0:4 == 1.4 (the libpocl.so will be named libpocl.so.2.4.X ) # 7:0:5 == 1.5 (the libpocl.so will be named libpocl.so.2.5.X ) # 8:0:6 == 1.6 (the libpocl.so will be named libpocl.so.2.6.X ) # 9:0:7 == 1.7 (the libpocl.so will be named libpocl.so.2.7.X ) # 10:0:8 == 1.8 (the libpocl.so will be named libpocl.so.2.8.X ) set(LIB_CURRENT_VERSION 10) set(LIB_REVISION_VERSION 0) set(LIB_AGE_VERSION 8) math(EXPR LIB_FIRST_VERSION "${LIB_CURRENT_VERSION} - ${LIB_AGE_VERSION}") # libtool takes "c:r:a" arguments, but the result is ".so.(c-a).a.r" # cmake has "build version" and "API version" # these vars map libtool -> cmake # for set_target_properties set(LIB_BUILD_VERSION "${LIB_FIRST_VERSION}.${LIB_AGE_VERSION}.${LIB_REVISION_VERSION}") set(LIB_API_VERSION "${LIB_FIRST_VERSION}") # The kernel compiler opt plugin shared library, however, changes more # drastically. Let's try to follow the similar 'current' numbering as # the pocl host API library and perhaps tune the 'revision' and 'age' later. math(EXPR KER_LIB_CURRENT_VERSION "${LIB_CURRENT_VERSION} + 7") set(KERNEL_COMPILER_LIB_VERSION "${KER_LIB_CURRENT_VERSION}.0.0") ########################################################## #TODO # these vars are copies b/c tons of sources use BUILD_ICD etc set(BUILD_ICD ${ENABLE_ICD}) set(BUILD_HSA ${ENABLE_HSA}) set(TCE_AVAILABLE ${ENABLE_TCE}) set(TCEMC_AVAILABLE ${ENABLE_TCEMC}) set(_CL_DISABLE_HALF ${CL_DISABLE_HALF}) set(PACKAGE_VERSION "${POCL_VERSION}") configure_file("config.h.in.cmake" "config.h.new" ESCAPE_QUOTES) rename_if_different("${CMAKE_BINARY_DIR}/config.h.new" "${CMAKE_BINARY_DIR}/config.h") configure_file("config2.h.in.cmake" "config2.h.new") rename_if_different("${CMAKE_BINARY_DIR}/config2.h.new" "${CMAKE_BINARY_DIR}/config2.h") include_directories("${CMAKE_BINARY_DIR}") # This is used to generate the compiler feature detection header. # Currently it's not enabled because it requires CMake > 3.x and # also the autogenerated header needs some editing by hand # (it errors on all compilers except gcc > 4 and clang > 3) # # #include(WriteCompilerDetectionHeader) #write_compiler_detection_header( # FILE "${CMAKE_BINARY_DIR}/compiler_features.h" # PREFIX POCL # COMPILERS GNU Clang # FEATURES # c_function_prototypes # c_restrict # c_static_assert # c_variadic_macros #) ########################################################## if(ENABLE_ICD) if(POCL_ICD_ABSOLUTE_PATH) set(CONTENT "${POCL_INSTALL_PUBLIC_LIBDIR}/$") else() set(CONTENT "$") endif() file(GENERATE OUTPUT "${CMAKE_BINARY_DIR}/pocl.icd" CONTENT "${CONTENT}" CONDITION 1) install(FILES "${CMAKE_BINARY_DIR}/pocl.icd" DESTINATION "${POCL_INSTALL_ICD_VENDORDIR}" COMPONENT "icd") # write icd file for pocl testing file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/ocl-vendors") file(GENERATE OUTPUT "${CMAKE_BINARY_DIR}/ocl-vendors/pocl-tests.icd" CONTENT "$" CONDITION 1) cpack_add_component("icd") set("CPACK_DEBIAN_ICD_PACKAGE_NAME" "pocl-opencl-icd") list(APPEND CPACK_DEBIAN_ICD_PACKAGE_DEPENDS "libpocl2 (>= ${CPACK_PACKAGE_VERSION}~)") set(CPACK_DEBIAN_ICD_PACKAGE_PROVIDES "opencl-icd,opencl-icd-1.1-1,opencl-icd-1.2-1") set(CPACK_DEBIAN_ICD_PACKAGE_RECOMMENDS "poclcc") endif() if(ENABLE_ASAN OR ENABLE_LSAN) file(GENERATE OUTPUT "${CMAKE_BINARY_DIR}/lsan.supp" CONTENT "leak:${LLVM_SRC_ROOT}/lib/Support/Unix/Signals.inc") set(SAN_EXTRA "set(ENV{LSAN_OPTIONS} \"suppressions=${CMAKE_BINARY_DIR}/lsan.supp\")") endif() file(GENERATE OUTPUT "${CMAKE_BINARY_DIR}/CTestCustom.cmake" CONTENT " ${SAN_EXTRA} set(ENV{POCL_ENABLE_UNINIT} \"1\") set(ENV{POCL_BUILDING} \"1\") set(ENV{OCL_ICD_VENDORS} \"${CMAKE_BINARY_DIR}/ocl-vendors\") ") ########################################################## if(UNIX) configure_file("${CMAKE_SOURCE_DIR}/pocl.pc.in.cmake" "${CMAKE_BINARY_DIR}/pocl.pc" @ONLY) install(FILES "${CMAKE_BINARY_DIR}/pocl.pc" DESTINATION "${POCL_INSTALL_PKGCONFIG_DIR}" COMPONENT "dev") endif() # For now always use a mirror copy of ocml, but allow overriding # this path later to point to an out-of-tree copy. set(OCML_SOURCE_DIR "${CMAKE_SOURCE_DIR}/lib/kernel/ocml") ############################################################# add_subdirectory("include") add_subdirectory("lib") # these are set in lib/cmakelists.txt message(STATUS "OPENCL_LIBS: ${OPENCL_LIBS}") message(STATUS "OPENCL_CFLAGS: ${OPENCL_CFLAGS}") # for tests / examples set(POCLU_LINK_OPTIONS poclu ${OPENCL_LIBS} ${LIBMATH}) message(STATUS "POCLU LINK OPTS: ${POCLU_LINK_OPTIONS}") # poclcc bin if(ENABLE_POCLCC) add_subdirectory("bin") endif() include(add_test_pocl) if(ENABLE_TESTS) add_subdirectory("tests") # make check & make check_tier1 add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} "--output-on-failure" -j ${CORECOUNT} ${COMMAND_USES_TERMINAL}) add_custom_target(check_tier1 COMMAND ${CMAKE_CTEST_COMMAND} "--output-on-failure" -L "'internal|amdsdk_30|piglit|PyOpenCL|conformance_suite_micro'" -j ${CORECOUNT} ${COMMAND_USES_TERMINAL}) endif() if(ENABLE_EXAMPLES) add_subdirectory("examples") endif() set(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_SOURCE_DIR}/CPack.pocl.description.txt") set(CPACK_PACKAGE_ICON "${CMAKE_SOURCE_DIR}/doc/www/img/pocl-80x60.png") set(CPACK_PACKAGE_HOMEPAGE_URL "https://github.com/pocl/pocl") set(CPACK_PACKAGE_CONTACT "https://github.com/pocl/pocl") set(CPACK_PACKAGE_CHECKSUM "SHA512") set(CPACK_RESOURCE_FILE_README "${CMAKE_SOURCE_DIR}/README") set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_SOURCE_DIR}/COPYING") set(CPACK_GENERATOR "DEB") set("CPACK_DEBIAN_DEV_PACKAGE_NAME" "libpocl-dev") list(APPEND CPACK_DEBIAN_DEV_PACKAGE_DEPENDS "libpocl2 (>= ${CPACK_PACKAGE_VERSION}~)") set(CPACK_DEBIAN_DEV_PACKAGE_BREAKS "libpocl1-common (<< 0.13-9)") set(CPACK_DEBIAN_DEV_PACKAGE_REPLACES "libpocl1-common (<< 0.13-9)") set(CPACK_RPM_COMPONENT_INSTALL ON) set(CPACK_DEB_COMPONENT_INSTALL ON) include(CPack) ########################################################## MESSAGE(STATUS " ") MESSAGE(STATUS "*********************** SUMMARY ***************************") MESSAGE(STATUS " ") MESSAGE(STATUS "******* Directories:") MESSAGE(STATUS " ") MESSAGE(STATUS "CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}") MESSAGE(STATUS "POCL_INSTALL_CMAKE_CONFIG_DIR: ${POCL_INSTALL_CMAKE_CONFIG_DIR}") MESSAGE(STATUS "POCL_INSTALL_ICD_VENDORDIR: ${POCL_INSTALL_ICD_VENDORDIR}") MESSAGE(STATUS "POCL_INSTALL_OPENCL_HEADER_DIR: ${POCL_INSTALL_OPENCL_HEADER_DIR}") MESSAGE(STATUS "POCL_INSTALL_PKGCONFIG_DIR: ${POCL_INSTALL_PKGCONFIG_DIR}") MESSAGE(STATUS "POCL_INSTALL_PRIVATE_DATADIR: ${POCL_INSTALL_PRIVATE_DATADIR}") MESSAGE(STATUS "POCL_INSTALL_PRIVATE_HEADER_DIR: ${POCL_INSTALL_PRIVATE_HEADER_DIR}") MESSAGE(STATUS "POCL_INSTALL_PRIVATE_LIBDIR: ${POCL_INSTALL_PRIVATE_LIBDIR}") MESSAGE(STATUS "POCL_INSTALL_PUBLIC_BINDIR: ${POCL_INSTALL_PUBLIC_BINDIR}") MESSAGE(STATUS "POCL_INSTALL_PUBLIC_HEADER_DIR: ${POCL_INSTALL_PUBLIC_HEADER_DIR}") MESSAGE(STATUS "POCL_INSTALL_PUBLIC_LIBDIR: ${POCL_INSTALL_PUBLIC_LIBDIR}") MESSAGE(STATUS " ") if (ENABLE_LLVM) MESSAGE(STATUS " ") MESSAGE(STATUS "******* LLVM Programs:") MESSAGE(STATUS " ") MESSAGE(STATUS "LLVM_CONFIG: ${LLVM_CONFIG}") MESSAGE(STATUS "LLVM_OPT: ${LLVM_OPT}") MESSAGE(STATUS "LLVM_LLC: ${LLVM_LLC}") MESSAGE(STATUS "LLVM_AS: ${LLVM_AS}") MESSAGE(STATUS "LLVM_LINK: ${LLVM_LINK}") MESSAGE(STATUS "LLVM_LLI: ${LLVM_LLI}") MESSAGE(STATUS "WITH_LLVM_CONFIG (User preferred llvm-config): ${WITH_LLVM_CONFIG}") endif() MESSAGE(STATUS " ") MESSAGE(STATUS "******* Various Flags:") MESSAGE(STATUS " ") MESSAGE(STATUS "CL_DISABLE_HALF: ${CL_DISABLE_HALF}") MESSAGE(STATUS "HAVE_CLOCK_GETTIME: ${HAVE_CLOCK_GETTIME}") MESSAGE(STATUS "HAVE_GLEW: ${HAVE_GLEW}") MESSAGE(STATUS "HAVE_LTTNG_UST: ${HAVE_LTTNG_UST}") MESSAGE(STATUS "HOST_AS_FLAGS: ${HOST_AS_FLAGS}") MESSAGE(STATUS "HOST_CLANG_FLAGS: ${HOST_CLANG_FLAGS}") MESSAGE(STATUS "HOST_LD_FLAGS: ${HOST_LD_FLAGS}") MESSAGE(STATUS "HOST_LLC_FLAGS: ${HOST_LLC_FLAGS}") if (ENABLE_HSA) MESSAGE(STATUS "") MESSAGE(STATUS "HSA_INCLUDES: ${HSA_INCLUDES}") MESSAGE(STATUS "HSALIB: ${HSALIB}") MESSAGE(STATUS "HSAIL_ASM: ${HSAIL_ASM}") endif() MESSAGE(STATUS "") MESSAGE(STATUS "LIB_API_VERSION: ${LIB_API_VERSION}") MESSAGE(STATUS "LIB_BUILD_VERSION: ${LIB_BUILD_VERSION}") MESSAGE(STATUS "ICD_LD_FLAGS: ${ICD_LD_FLAGS}") MESSAGE(STATUS "EXTRA_KERNEL_FLAGS: ${EXTRA_KERNEL_FLAGS}") MESSAGE(STATUS "EXTRA_KERNEL_CXX_FLAGS: ${EXTRA_KERNEL_CXX_FLAGS}") MESSAGE(STATUS "EXTRA_KERNEL_CL_FLAGS: ${EXTRA_KERNEL_CL_FLAGS}") MESSAGE(STATUS "EXTRA_KERNEL_C_FLAGS: ${EXTRA_KERNEL_C_FLAGS}") MESSAGE(STATUS "final KERNEL_CXX_FLAGS: ${KERNEL_CXX_FLAGS}") MESSAGE(STATUS "final KERNEL_CL_FLAGS: ${KERNEL_CL_FLAGS}") MESSAGE(STATUS "final KERNEL_C_FLAGS: ${KERNEL_C_FLAGS}") if (ENABLE_LLVM) MESSAGE(STATUS "") MESSAGE(STATUS "CLANG_HAS_64B_MATH: ${CLANG_HAS_64B_MATH}") MESSAGE(STATUS "CLANG_HAS_128B_MATH: ${CLANG_HAS_128B_MATH}") MESSAGE(STATUS "CLANG_NEEDS_RTLIB: ${CLANG_NEEDS_RTLIB}") MESSAGE(STATUS "CLANG_MARCH_FLAG: ${CLANG_MARCH_FLAG}") MESSAGE(STATUS "CLANG_TARGET_OPTION: ${CLANG_TARGET_OPTION}") MESSAGE(STATUS "LLVM_VERSION: ${LLVM_VERSION}") MESSAGE(STATUS "LLVM_LIB_IS_SHARED: ${LLVM_LIB_IS_SHARED}") MESSAGE(STATUS "LLVM_HAS_RTTI: ${LLVM_HAS_RTTI}") MESSAGE(STATUS "LLVM_LIB_MODE: ${LLVM_LIB_MODE}") MESSAGE(STATUS "LLVM_ASSERTS_BUILD: ${LLVM_ASSERTS_BUILD}") MESSAGE(STATUS "LLVM_BUILD_MODE: ${LLVM_BUILD_MODE}") MESSAGE(STATUS "LLVM_CFLAGS: ${LLVM_CFLAGS}") MESSAGE(STATUS "LLVM_CXXFLAGS: ${LLVM_CXXFLAGS}") MESSAGE(STATUS "LLVM_CPPFLAGS: ${LLVM_CPPFLAGS}") MESSAGE(STATUS "LLVM_LDFLAGS: ${LLVM_LDFLAGS}") MESSAGE(STATUS "LLVM_LIBDIR: ${LLVM_LIBDIR}") MESSAGE(STATUS "LLVM_INCLUDEDIR: ${LLVM_INCLUDEDIR}") MESSAGE(STATUS "LLVM_SRC_ROOT: ${LLVM_SRC_ROOT}") MESSAGE(STATUS "LLVM_OBJ_ROOT: ${LLVM_OBJ_ROOT}") MESSAGE(STATUS "LLVM_INCLUDE_DIRS: ${LLVM_INCLUDE_DIRS}") MESSAGE(STATUS "LLVM_ALL_TARGETS: ${LLVM_ALL_TARGETS}") MESSAGE(STATUS "LLVM_HOST_TARGET: ${LLVM_HOST_TARGET}") MESSAGE(STATUS "LLC_TRIPLE: ${LLC_TRIPLE}") MESSAGE(STATUS "LLC_HOST_CPU: ${LLC_HOST_CPU}") MESSAGE(STATUS "") endif() MESSAGE(STATUS "MAX_EXTENDED_ALIGNMENT: ${MAX_EXTENDED_ALIGNMENT}") MESSAGE(STATUS "OCL_KERNEL_TARGET: ${OCL_KERNEL_TARGET}") MESSAGE(STATUS "OCL_KERNEL_TARGET_CPU: ${OCL_KERNEL_TARGET_CPU}") MESSAGE(STATUS "HOST_DEVICE_ADDRESS_BITS: ${HOST_DEVICE_ADDRESS_BITS}") if (ENABLE_TCE) MESSAGE(STATUS "") MESSAGE(STATUS "TCE_TARGET_CLANG_FLAGS: ${TCE_TARGET_CLANG_FLAGS}") MESSAGE(STATUS "TCE_TARGET_LLC_FLAGS: ${TCE_TARGET_LLC_FLAGS}") MESSAGE(STATUS "TCE_CXXFLAGS: ${TCE_CXXFLAGS}") MESSAGE(STATUS "TCE_INCLUDES: ${TCE_INCLUDES}") MESSAGE(STATUS "TCE_LIBS: ${TCE_LIBS}") MESSAGE(STATUS "TCE_VERSION: ${TCE_VERSION}") MESSAGE(STATUS "TCE_PREFIX: ${TCE_PREFIX}") endif() MESSAGE(STATUS "") if (ENABLE_LLVM) MESSAGE(STATUS "----------- -------------------------------- --------") MESSAGE(STATUS "llvm libs libpocl will be linked to (POCL_LLVM_LIBS):") MESSAGE(STATUS "${POCL_LLVM_LIBS}") MESSAGE(STATUS "----------- -------------------------------- --------") MESSAGE(STATUS "clang libs libpocl will be linked to (CLANG_LIBFILES):") MESSAGE(STATUS "${CLANG_LIBFILES}") MESSAGE(STATUS "----------- -------------------------------- --------") MESSAGE(STATUS "system libs libpocl will be linked to (LLVM_SYSLIBS):") MESSAGE(STATUS "${LLVM_SYSLIBS}") MESSAGE(STATUS "----------- -------------------------------- --------") endif() MESSAGE(STATUS "******* Enabled features:") MESSAGE(STATUS " ") MESSAGE(STATUS "DEVELOPER_MODE: ${DEVELOPER_MODE}") MESSAGE(STATUS "ENABLE_CONFORMANCE: ${ENABLE_CONFORMANCE}") if(ARM) MESSAGE(STATUS "ENABLE_FP64: ${ENABLE_FP64}") endif() MESSAGE(STATUS "ENABLE_IPO: ${ENABLE_IPO}") MESSAGE(STATUS "ENABLE_ICD: ${ENABLE_ICD}") MESSAGE(STATUS "ENABLE_TCE: ${ENABLE_TCE}") MESSAGE(STATUS "ENABLE_TCEMC: ${ENABLE_TCEMC}") MESSAGE(STATUS "ENABLE_HSA: ${ENABLE_HSA}") MESSAGE(STATUS "ENABLE_CUDA: ${ENABLE_CUDA}") MESSAGE(STATUS "ENABLE_ASAN (address sanitizer): ${ENABLE_ASAN}") MESSAGE(STATUS "ENABLE_LSAN (leak sanitizer): ${ENABLE_LSAN}") MESSAGE(STATUS "ENABLE_TSAN (thread sanitizer): ${ENABLE_TSAN}") MESSAGE(STATUS "ENABLE_UBSAN (UB sanitizer): ${ENABLE_UBSAN}") MESSAGE(STATUS "ENABLE_POCL_FLOAT_CONVERSION: ${ENABLE_POCL_FLOAT_CONVERSION}") MESSAGE(STATUS "ENABLE_RELOCATION: ${ENABLE_RELOCATION}") MESSAGE(STATUS "ENABLE_SLEEF: ${ENABLE_SLEEF}") MESSAGE(STATUS "ENABLE_SPIR: ${ENABLE_SPIR}") MESSAGE(STATUS "ENABLE_SPIRV: ${ENABLE_SPIRV}") MESSAGE(STATUS "ENABLE_POCL_BUILDING: ${ENABLE_POCL_BUILDING}") MESSAGE(STATUS "INSTALL_OPENCL_HEADERS (Install our headers): ${INSTALL_OPENCL_HEADERS}") MESSAGE(STATUS "OCL_DRIVERS (Drivers built): ${OCL_DRIVERS}") MESSAGE(STATUS "OCL_TARGETS (Targets built): ${OCL_TARGETS}") MESSAGE(STATUS "ENABLE_LLVM: ${ENABLE_LLVM}") if(PARALLEL_COMPILE_JOBS AND CMAKE_GENERATOR STREQUAL "Ninja") MESSAGE(STATUS "PARALLEL_COMPILE_JOBS: ${PARALLEL_COMPILE_JOBS}") endif() if(PARALLEL_LINK_JOBS AND CMAKE_GENERATOR STREQUAL "Ninja") MESSAGE(STATUS "PARALLEL_LINK_JOBS: ${PARALLEL_LINK_JOBS}") endif() MESSAGE(STATUS "POCL_ICD_ABSOLUTE_PATH: ${POCL_ICD_ABSOLUTE_PATH}") MESSAGE(STATUS "POCL_ASSERTS_BUILD: ${POCL_ASSERTS_BUILD}") MESSAGE(STATUS "TESTS_USE_ICD: ${TESTS_USE_ICD}") MESSAGE(STATUS "Available testsuites: ${ALL_TESTSUITES}") MESSAGE(STATUS "Enabled testsuites: ${ACTUALLY_ENABLED_TESTSUITES}") MESSAGE(STATUS "Disabled testsuites: ${DISABLED_TESTSUITES}") MESSAGE(STATUS "Testsuites are built from git master: ${EXAMPLES_USE_GIT_MASTER}") MESSAGE(STATUS "Kernel caching: ${KERNEL_CACHE_DEFAULT}") MESSAGE(STATUS "Kernel library CPU variants: ${KERNELLIB_HOST_CPU_VARIANTS}") MESSAGE(STATUS "Kernel library distro build: ${KERNELLIB_HOST_DISTRO_VARIANTS}") MESSAGE(STATUS "Use pocl custom memory allocator: ${USE_POCL_MEMMANAGER}") MESSAGE(STATUS "L1d cacheline size: ${HOST_CPU_CACHELINE_SIZE}") pocl-1.8/COPYING000066400000000000000000000020431413131625300133500ustar00rootroot00000000000000Copyright (c) 2011 pocl developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. pocl-1.8/CPack.pocl.description.txt000066400000000000000000000005551413131625300173230ustar00rootroot00000000000000Portable Computing Language is an open source implementation of the OpenCL standard which can be easily adapted for new targets. One of the goals of the project is improving performance portability of OpenCL programs, avoiding the need for target-dependent manual optimizations. A "native" target is included, which allows running OpenCL kernels on the host (CPU). pocl-1.8/CREDITS000066400000000000000000000046671413131625300133530ustar00rootroot00000000000000This file tries to list all the people contributed code, documentation, test cases, etc. to the pocl project in the chronological order of their first contribution. Please tell if we missed your name (sorry in advance). Carlos Sánchez de La Lama Pekka Jääskeläinen Erik Schnetter Heikki Kultala Vladimír Guzma Kalle Raiskila Vincent Danjean Timo Viitanen Cyril Roelandt Victor Oliveira Jesse Towner Brandon Surmanski Bryan Bell Andreas Klöckner Oliver Hartmann Ville Korhonen Giuseppe Bilotta Peter Colberg Mikael Lepistö Clément Léger Logan Chien Richard Sandiford (Scalarizer.cpp) Michal Babej Kristian Mört Felix Bytow Matias Koskela Martin Stumpf James Price Lars-Dominik Braun Daniel Sanders Lee Ki-ju Krishnaraj Bhat Martin Hauke Volkan Keleş Lassi Koskinen Hugo van der Wijst Mateusz Szpakowski Lars Buitinck (larsmans) Chen Chou-chuan Shao-chung Wang Pavan Yalamanchili Romaric Jodin Masataro Asai Richard Crowder Matthias Noack Sam McKelvie Tom Gall Arda Coskunses Minh Quan HO Matt Wala Jonas Hahnfeld Ronan Keryell Rodrigo Tobar Martin Krastev Tom Stellard Nick Curtis Konstantin Bakanov Andreas Beckmann Isuru Fernando Jeff Hammond Julius Ikkala Steve Holland Wilfried Holzke Maxim Eremenko Andrew Gozillon Jan Solanti Stefan Brüns Tobias Baumann Alberto Cerato Simon Branford Alexandru Fikl Matthias Diener Mauri Mustonen Lars Herschke Roman Rusyaev Väinö Liukko Nia Alarie fn ⌃ ⌥ (FnControlOption) Tom Rix Joachim Meyer Alexandre Ghiti pocl-1.8/INSTALL000077700000000000000000000000001413131625300211202doc/sphinx/source/install.rstustar00rootroot00000000000000pocl-1.8/LICENSE000066400000000000000000000020431413131625300133220ustar00rootroot00000000000000Copyright (c) 2011 pocl developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. pocl-1.8/README000066400000000000000000000006141413131625300131770ustar00rootroot00000000000000Portable Computing Language (pocl) ---------------------------------- pocl is being developed towards an efficient implementation of OpenCL standard which can be easily adapted for new targets. Please refer to the file INSTALL in this directory for building and installing pocl. More documentation available at http://portablecl.org/docs/html/ The main web page is at http://portablecl.org pocl-1.8/README.ARM000066400000000000000000000012541413131625300136160ustar00rootroot00000000000000pocl builds (as of Aug 2017) on ODROID XU3 and ODROID C2 but some tests fail. How to build: * get a clang / llvm. DO NOT use the ones downloaded from llvm.org, they only work on the distro where they were compiled. Ubuntu LTS these days ships multiple llvm versions even quite recent ones; get the clang+llvm from your distro's packages. * read the pocl install/build instructions in docs * LLVM will likely not recognize your cpu, and running cmake will give you a warning. run cmake with -DLLC_HOST_CPU=. "yourcpu" must be something LLVM recognizes, usually it's simply "cortex-aXX" like cortex-a15 etc. You can get the full list by running `llc -mcpu=help`. pocl-1.8/README.Windows000066400000000000000000000026121413131625300146300ustar00rootroot00000000000000 # WARNING: this document is seriously outdated # # considering we haven't had anyone maintaining # the Windows build since about 2016, getting the # current PoCL code to build on Windows will likely # require much more effort than this howto describes. # Compiling pocl on Windows ## Dependencies: - Visual Studio 2013 - Git and Git bash http://git-scm.com/downloads - CMake 2.8 or newer http://www.cmake.org/download/ - Python 2.7 for LLVM - Pthreads-win32 binary distribution https://www.sourceware.org/pthreads-win32/ - Hwloc for Windows x64 binary distribution http://www.open-mpi.org/software/hwloc/v1.10/ - LLVM + Clang latest release sources ## Support: - Only 64bit compiling for now - No ICD compiling - No VML (no stdcxxlib finding done for windows) - Static compilation ## Building There is shell script in `pocl/windows/setup_and_build_win64.sh` Shell script may be ran in `Git Bash` and it downloads and installs pocl and all the library dependencies and builds them to `/c/pocl-playground`. To download and build everything without first fetching pocl repository one can do simply: curl https://github.com/pocl/pocl/raw/master/windows/setup_and_build_win64.sh | sh Script requires following software installed on Windows 7 or later (64bit only): - Visual Studio 2013 (e.g. community edition) - Cmake 2.8 or later (must be added to PATH) - Git + Git Bash - Python 2.7 for compiling LLVM pocl-1.8/README.packaging000066400000000000000000000027301413131625300151230ustar00rootroot00000000000000This file contains notes for making distribution packages of pocl. ICD --- Pocl should probably be built with ICD enabled (``-DENABLE_ICD=ON`` CMake option) for desktop distributions. Pocl does not have an ICD loader, so a dependancy on one would be beneficial. CMake options for a distribution build -------------------------------------- - ``-DKERNELLIB_HOST_CPU_VARIANTS=distro`` Note: this note only works for x86(-64) platform currently, on other platforms, it has zero effect. Enables runtime detection of CPU and builds separate kernel libraries for most common x86 CPUs. - ``-DPOCL_ICD_ABSOLUTE_PATH=OFF`` The pocl.icd file (which the ICD loader uses to load the pocl lib) by default has a full path to the installed libpocl.so file. Set this option to OFF and pocl will only put the dynamic library name into pocl.icd. - ``-DENABLE_POCL_BUILDING=OFF`` When OFF, POCL_BUILDING option (which causes pocl to look for required files in build / source directories) will be ignored and pocl will always look in installed paths only. Mesa (OpenGL) interoperability ------------------------------ On some current (Jan 2014) Linux distibutions, mesa is built with LLVMpipe. If pocl is built against a shared LLVM library, the mesa calls to its LLVM will be re-routed to the LLVM linked in pocl, causing a segfault. Consider linking LLVM statically to pocl. At least 'nouveau' and 'swrast_dri' are known to suffer from this. See https://github.com/pocl/pocl/issues/46 pocl-1.8/TODO000066400000000000000000000026021413131625300130060ustar00rootroot00000000000000Known ambiguous OpenCL 1.2 features ----------------------------------- The OpenCL 1.2 and later standards are very ambiguous when it comes to sub-devices. On the one hand, they claim that sub-devices can be used wherever their parent devices can be used, on the other hand various parts of the standard hint that they should be treated independently. In particular, it's not clear whether sub-devices can be used within a context that only holds their parent device, or not. This might even depend on whether the context was created "from type" or not. The implementation of subdevices in pocl currently converts subdevices to their parents in most places, with the exception being clEnqueueNDRangeKernel. This means, for example, that sub-devices can be used in a context that does not contain them (but contains their parent device). Note this is equivalent to the AMD behavior (which is tested in the DeviceFission AMD APP SDK example), but differs from e.g. Intel's behavior. Clarification from the standard body is needed on which behavior is correct. Known missing OpenCL 1.2 features --------------------------------- Missing APIs used by the tested OpenCL example suites are entered here. OpenCL 1.2 Extensions * 9.7 Sharing Memory Objects with OpenGL / OpenGL ES Buffer, Texture and Renderbuffer Objects * 9.7.6 Sharing memory objects that map to GL objects between GL and CL contexts pocl-1.8/ToolchainExample.cmake000066400000000000000000000047721413131625300165660ustar00rootroot00000000000000# This is an example Toolchain file to cross-compile for ARM/MIPS/other # boards from x86_64. Copy & modify. Skip 4-8 if using LLVM less build # # x86_64 = "build" # ARM/MIPS/other board = "host" or "board" # # Steps: # (note: hwloc is now optional) # 1) on build system, install g++ and gcc cross-compilers # (apt install gcc-arm-linux-gnueabihf g++-arm-linux-gnueabihf) # 2) On the board, install ocl-icd and libhwloc (optional) + their development headers # 3) copy the entire root filesystem of the board somewhere to the build system, # then set CMAKE_FIND_ROOT_PATH below to this path # 4) Build clang and llvm for the build system and install them. ($BUILD_PREFIX) # 5) Build clang and llvm for the host and install them. ($HOST_PREFIX) # 6) copy llvm-config from build to host. (cp $BUILD_PREFIX/bin/llvm-config $HOST_PREFIX/bin/llvm-config) # 7) Install pkg-config for build # 8) Install hwloc, ocl-icd for host and set `PKG_CONFIG_PATH` env variable to the paths # eg: export PKG_CONFIG_PATH=/path/to/hwloc/prefix/lib/pkgconfig:/path/to/opencl/prefix/lib/pkgconfig # 9) run cmake like this: # cmake -DHOST_DEVICE_BUILD_HASH= (see below) # -DENABLE_LLVM=<0 if LLVM-less, 1 if with LLVM> # -DCMAKE_TOOLCHAIN_FILE= # -DCMAKE_PREFIX_PATH=$HOST_PREFIX # -DLLC_TRIPLE= # -DLLVM_BINDIR=$BUILD_PREFIX/bin # # # ... where SOME_HASH is a string that can be set to anything; # when loading OpenCL program binaries, PoCL uses it to check # that the PoCL which built the binary is compatible with the # PoCL that's loading the binary. SET(CMAKE_SYSTEM_NAME Linux) # specify the cross compiler SET(CMAKE_C_COMPILER /usr/bin/arm-linux-gnueabihf-gcc) SET(CMAKE_CXX_COMPILER /usr/bin/arm-linux-gnueabihf-g++) # should work, but does not yet. Instead set FIND_ROOT below # set(CMAKE_SYSROOT /home/a/zynq/ZYNQ_ROOT) # where is the target environment SET(CMAKE_FIND_ROOT_PATH /path/to/target_ROOT) # where to find libraries in target environment SET(CMAKE_LIBRARY_PATH /path/to/target_ROOT/usr/lib/arm-linux-gnueabihf) # search for programs in the build host directories SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) # for libraries and headers in the target directories SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) pocl-1.8/bin/000077500000000000000000000000001413131625300130665ustar00rootroot00000000000000pocl-1.8/bin/CMakeLists.txt000066400000000000000000000033351413131625300156320ustar00rootroot00000000000000#============================================================================= # CMake build system files # # Copyright (c) 2016 pocl developers # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # #============================================================================= add_executable(poclcc poclcc.c "${CMAKE_SOURCE_DIR}/lib/poclu/misc.c") harden(poclcc) target_link_libraries(poclcc ${OPENCL_LIBS}) target_compile_definitions(poclcc PRIVATE DISABLE_OPENCL_20) install(TARGETS "poclcc" RUNTIME DESTINATION "${POCL_INSTALL_PUBLIC_BINDIR}" COMPONENT "poclcc") set("CPACK_DEBIAN_POCLCC_PACKAGE_NAME" "poclcc") list(APPEND CPACK_DEBIAN_POCLCC_PACKAGE_DEPENDS "opencl-icd") pass_through_cpack_vars() pocl-1.8/bin/poclcc.c000066400000000000000000000223741413131625300145050ustar00rootroot00000000000000/* Pocl tool: poclcc Copyright (c) 2016 pocl developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include "poclu.h" #define DEVICE_INFO_MAX_LENGTH 2048 #define NUM_OF_DEVICE_ID 32 #define NUM_OPTIONS 6 #define ERRNO_EXIT(filename) do { \ printf("IO error on file %s: %s\n", filename, strerror(errno)); \ exit(2); \ } while(0) char *kernel_source = NULL; char *output_file = NULL; cl_uint opencl_device = CL_DEVICE_TYPE_DEFAULT; unsigned opencl_device_id = 0; int list_devices = 0; int list_devices_only = 0; char *build_options = NULL; /**********************************************************/ typedef int(*poclcc_process)(int, char **, int); typedef struct _poclcc_option { poclcc_process fct; char *id; char *helper; int num_args_read; } poclcc_option; /**********************************************************/ poclcc_option *options_help; static int print_help() { printf("USAGE: poclcc [OPTION]... [FILE]\n"); printf("\n"); printf("OPTIONS:\n"); int i; for (i=0; i= argc) return poclcc_error("Incomplete argument for input file!\n"); char *filename = argv[arg]; char *ext = ".pocl"; kernel_source = poclu_read_file(filename); if (!kernel_source) ERRNO_EXIT(filename); if (output_file == NULL) { output_file = malloc (strlen (filename) + strlen (ext) + 2); strcpy(output_file, filename); strcat(output_file, ext); } return 0; } /********************************************************** * OPTIONS PROCESS FUNCTIONS*/ static int process_help(int arg, char **argv, int argc) { print_help(); return 0; } static int process_output(int arg, char **argv, int argc) { if (arg >= argc) return poclcc_error("Incomplete argument for output file!\n"); output_file = argv[arg]; return 0; } static int process_opencl_device(int arg, char **argv, int argc) { if (arg >= argc) return poclcc_error("Incomplete argument for device_type!\n"); char *opencl_string = argv[arg]; if (!strcmp(opencl_string, "CL_DEVICE_TYPE_CPU")) opencl_device = CL_DEVICE_TYPE_CPU; else if (!strcmp(opencl_string, "CL_DEVICE_TYPE_GPU")) opencl_device = CL_DEVICE_TYPE_GPU; else if (!strcmp(opencl_string, "CL_DEVICE_TYPE_ACCELERATOR")) opencl_device = CL_DEVICE_TYPE_ACCELERATOR; else if (!strcmp(opencl_string, "CL_DEVICE_TYPE_DEFAULT")) opencl_device = CL_DEVICE_TYPE_DEFAULT; else if (!strcmp(opencl_string, "CL_DEVICE_TYPE_ALL")) opencl_device = CL_DEVICE_TYPE_ALL; else { printf("Invalid argument for device_type!\n"); return print_help(); } return 0; } static int process_build_options(int arg, char **argv, int argc) { if (arg >= argc) return poclcc_error("Incomplete argument for build_options!\n"); build_options = argv[arg]; return 0; } static int process_device_id(int arg, char **argv, int argc) { if (arg >= argc) return poclcc_error("Incomplete argument for build_options!\n"); opencl_device_id = atoi(argv[arg]); return 0; } static int process_list_devices(int arg, char **argv, int argc) { list_devices = 1; opencl_device = CL_DEVICE_TYPE_ALL; return 0; } /**********************************************************/ static poclcc_option options[NUM_OPTIONS] = { {process_help, "-h", "\t-h\n" "\t\tDisplay the help\n", 1}, {process_build_options, "-b", "\t-b \n" "\t\tBuild the program with options\n", 2}, {process_opencl_device, "-d", "\t-d \n" "\t\tSelect as the device_type for clGetDeviceIDs.\n" "\t\tDefault: CL_DEVICE_TYPE_DEFAULT\n", 2}, {process_list_devices, "-l", "\t-l\n" "\t\tList the opencl device found (that match the \n", 1}, {process_device_id, "-i", "\t-i \n" "\t\tSelect the opencl device to generate the pocl binary file\n" "\t\tDefault: 0\n", 2}, {process_output, "-o", "\t-o \n" "\t\tWrite output to \n", 2} }; /**********************************************************/ static int search_process(char *arg) { int i; for (i=0; inum_args_read; *arg = prev_arg + num_args_read; return current_option->fct(prev_arg + 1, argv, argc); } } /**********************************************************/ int main(int argc, char **argv) { //MANAGEMENT OF ARGUMENTS options_help = options; int arg_num=1; if (argc < 2) return poclcc_error("Invalid argument!\n"); while (arg_num < argc-1) if (process_arg(&arg_num, argv, argc)) return -1; if (arg_num >= argc && list_devices) list_devices_only = 1; else if (arg_num >= argc) poclcc_error("Invalid arguments!\n"); else { int current_process = search_process(argv[arg_num]); if (current_process == -1 && process_kernel_file(arg_num, argv, argc)) return -1; else if (current_process != -1) { process_arg(&arg_num, argv, argc); list_devices_only = 1; } } //OPENCL STUFF cl_platform_id cpPlatform; cl_device_id device_ids[NUM_OF_DEVICE_ID]; cl_context context; cl_program program; cl_int err; cl_uint num_devices, i; CHECK_CL_ERROR(clGetPlatformIDs(1, &cpPlatform, NULL)); CHECK_CL_ERROR(clGetDeviceIDs(cpPlatform, opencl_device, NUM_OF_DEVICE_ID, device_ids, &num_devices)); if (opencl_device_id >= num_devices) return poclcc_error("Invalid opencl device_id!\n"); if (list_devices) { context = clCreateContext(0, num_devices, device_ids, NULL, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateContext"); printf("LIST OF DEVICES:\n"); for (i=0; ipoclbin_hash_string CHECK_CL_ERROR(clGetDeviceInfo(device_ids[i], CL_DEVICE_VERSION, DEVICE_INFO_MAX_LENGTH, str, NULL)); printf(" Version: %s\n", str); } clReleaseContext(context); } if (list_devices_only) return 0; context = clCreateContext(0, 1, &device_ids[opencl_device_id], NULL, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateContext"); program = clCreateProgramWithSource(context, 1, (const char **)&kernel_source, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateProgramWithSource"); CHECK_CL_ERROR(clBuildProgram(program, 0, NULL, build_options, NULL, NULL)); size_t binary_sizes; char *binary; CHECK_CL_ERROR(clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binary_sizes, NULL)); binary = malloc(sizeof(char)*binary_sizes); if (!binary) { printf("malloc(binary) failed\n"); exit(1); } CHECK_CL_ERROR(clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(unsigned char*), &binary, NULL)); CHECK_CL_ERROR(clReleaseProgram(program)); CHECK_CL_ERROR(clReleaseContext(context)); if (poclu_write_file(output_file, binary, binary_sizes)) ERRNO_EXIT(output_file); free(binary); return 0; } pocl-1.8/cmake/000077500000000000000000000000001413131625300133765ustar00rootroot00000000000000pocl-1.8/cmake/FindHwloc.cmake000066400000000000000000000117671413131625300162710ustar00rootroot00000000000000#.rst: # FindHwloc # ---------- # # Try to find Portable Hardware Locality (hwloc) libraries. # https://www.open-mpi.org/software/hwloc # # You may declare HWLOC_ROOT environment variable to tell where # your hwloc library is installed. # # Once done this will define:: # # Hwloc_FOUND - True if hwloc was found # Hwloc_INCLUDE_DIRS - include directories for hwloc # Hwloc_LIBRARIES - link against these libraries to use hwloc # Hwloc_VERSION - version # Hwloc_CFLAGS - include directories as compiler flags # Hwloc_LDLFAGS - link paths and libs as compiler flags # #============================================================================= # Copyright 2014 Mikael Lepistö # # Distributed under the OSI-approved BSD License (the "License"); # # This software is distributed WITHOUT ANY WARRANTY; without even the # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # See the License for more information. #============================================================================= if(WIN32) find_path(Hwloc_INCLUDE_DIR NAMES hwloc.h PATHS ENV "PROGRAMFILES(X86)" ENV HWLOC_ROOT PATH_SUFFIXES include ) find_library(Hwloc_LIBRARY NAMES hwloc PATHS ENV "PROGRAMFILES(X86)" ENV HWLOC_ROOT PATH_SUFFIXES lib ) # # Check if the found library can be used to linking # SET (_TEST_SOURCE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/linktest.c") FILE (WRITE "${_TEST_SOURCE}" " #include int main() { hwloc_topology_t topology; int nbcores; hwloc_topology_init(&topology); hwloc_topology_load(topology); nbcores = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_CORE); hwloc_topology_destroy(topology); return 0; } " ) TRY_COMPILE(_LINK_SUCCESS ${CMAKE_BINARY_DIR} "${_TEST_SOURCE}" CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${Hwloc_INCLUDE_DIR}" CMAKE_FLAGS "-DLINK_LIBRARIES:STRING=${Hwloc_LIBRARY}" ) IF(NOT _LINK_SUCCESS) if(CMAKE_SIZEOF_VOID_P EQUAL 8) message(STATUS "You are building 64bit target.") ELSE() message(STATUS "You are building 32bit code. If you like to build x64 use e.g. -G 'Visual Studio 12 Win64' generator." ) ENDIF() message(FATAL_ERROR "Library found, but linking test program failed.") ENDIF() # # Resolve version if some compiled binary found... # find_program(HWLOC_INFO_EXECUTABLE NAMES hwloc-info PATHS ENV HWLOC_ROOT PATH_SUFFIXES bin ) if(HWLOC_INFO_EXECUTABLE) execute_process( COMMAND ${HWLOC_INFO_EXECUTABLE} "--version" OUTPUT_VARIABLE HWLOC_VERSION_LINE OUTPUT_STRIP_TRAILING_WHITESPACE ) string(REGEX MATCH "([0-9]+.[0-9]+)$" Hwloc_VERSION "${HWLOC_VERSION_LINE}") unset(HWLOC_VERSION_LINE) endif() # # All good # set(Hwloc_LIBRARIES ${Hwloc_LIBRARY}) set(Hwloc_INCLUDE_DIRS ${Hwloc_INCLUDE_DIR}) include(FindPackageHandleStandardArgs) find_package_handle_standard_args( Hwloc FOUND_VAR Hwloc_FOUND REQUIRED_VARS Hwloc_LIBRARIES Hwloc_INCLUDE_DIRS VERSION_VAR Hwloc_VERSION) mark_as_advanced( Hwloc_INCLUDE_DIR Hwloc_LIBRARY) foreach(arg ${Hwloc_INCLUDE_DIRS}) set(Hwloc_CFLAGS "${Hwloc_CFLAGS} /I${arg}") endforeach() set(Hwloc_LDFLAGS "${Hwloc_LIBRARY}") else() find_package(PkgConfig) if(HWLOC_ROOT) set(ENV{PKG_CONFIG_PATH} "${HWLOC_ROOT}/lib/pkgconfig:$ENV{PKG_CONFIG_PATH}") else() foreach(PREFIX ${CMAKE_PREFIX_PATH}) set(PKG_CONFIG_PATH "${PKG_CONFIG_PATH}:${PREFIX}/lib/pkgconfig") endforeach() set(ENV{PKG_CONFIG_PATH} "${PKG_CONFIG_PATH}:$ENV{PKG_CONFIG_PATH}") endif() if(hwloc_FIND_REQUIRED) set(_hwloc_OPTS "REQUIRED") elseif(hwloc_FIND_QUIETLY) set(_hwloc_OPTS "QUIET") else() set(_hwloc_output 1) endif() if(hwloc_FIND_VERSION) if(hwloc_FIND_VERSION_EXACT) pkg_check_modules(Hwloc ${_hwloc_OPTS} hwloc=${hwloc_FIND_VERSION}) else() pkg_check_modules(Hwloc ${_hwloc_OPTS} hwloc>=${hwloc_FIND_VERSION}) endif() else() pkg_check_modules(Hwloc ${_hwloc_OPTS} hwloc) endif() if(Hwloc_FOUND) string(REPLACE "." ";" Hwloc_VERSION_PARSED "${Hwloc_VERSION}") set(Hwloc_VERSION "${Hwloc_VERSION}" CACHE STRING "version of Hwloc as a list") list(GET Hwloc_VERSION_PARSED 0 Hwloc_VERSION_MAJOR) set(Hwloc_VERSION_MAJOR "${Hwloc_VERSION_MAJOR}" CACHE STRING "Major version of Hwloc") list(GET Hwloc_VERSION_PARSED 1 Hwloc_VERSION_MINOR) set(Hwloc_VERSION_MINOR "${Hwloc_VERSION_MINOR}" CACHE STRING "Minor version of Hwloc") include(FindPackageHandleStandardArgs) find_package_handle_standard_args(Hwloc DEFAULT_MSG Hwloc_LIBRARIES) if(NOT ${Hwloc_VERSION} VERSION_LESS 1.7.0) set(Hwloc_GL_FOUND 1) endif() if(_hwloc_output) message(STATUS "Found hwloc ${Hwloc_VERSION} in ${Hwloc_INCLUDE_DIRS}:${Hwloc_LIBRARIES}") endif() endif() endif() pocl-1.8/cmake/FindPthreadsWin32.cmake000066400000000000000000000026211413131625300175770ustar00rootroot00000000000000#.rst: # FindPthreadsWin32 library # ------------------------- # # Try to find pthreads libraries. # https://sourceware.org/pthreads-win32/ # # You may declare PTHREADS_ROOT environment variable to tell where # your library is installed. # # Once done this will define:: # # Pthreads_FOUND - True if pthreads was found # Pthreads_INCLUDE_DIRS - include directories for pthreads # Pthreads_LIBRARIES - link against this library to use pthreads # # The module will also define two cache variables:: # # Pthreads_INCLUDE_DIR - the pthreads include directory # Pthreads_LIBRARY - the path to the pthreads library # find_path(Pthreads_INCLUDE_DIR NAMES pthread.h PATHS ENV "PROGRAMFILES(X86)" ENV PTHREADS_ROOT PATH_SUFFIXES include ) if(CMAKE_SIZEOF_VOID_P EQUAL 8) set(LIB_PATH lib/x64) else() set(LIB_PATH lib/x86) endif() find_library(Pthreads_LIBRARY NAMES pthread.lib pthreadVC2.lib pthreadVC2.lib PATHS ENV PTHREADS_ROOT PATH_SUFFIXES ${LIB_PATH} ) # # All good # set(Pthreads_LIBRARIES ${Pthreads_LIBRARY}) set(Pthreads_INCLUDE_DIRS ${Pthreads_INCLUDE_DIR}) include(FindPackageHandleStandardArgs) find_package_handle_standard_args( Pthreads FOUND_VAR Pthreads_FOUND REQUIRED_VARS Pthreads_LIBRARY Pthreads_INCLUDE_DIR VERSION_VAR Pthreads_VERSION_STRING) mark_as_advanced( Pthreads_INCLUDE_DIR Pthreads_LIBRARY) pocl-1.8/cmake/HSA.cmake000066400000000000000000000101741413131625300150160ustar00rootroot00000000000000#============================================================================= # CMake build system files # # Copyright (c) 2014-2018 pocl developers # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # #============================================================================= if (DEFINED ENABLE_HSAIL AND NOT ENABLE_HSAIL) set(HSAIL_ENABLED 0) else() message(STATUS "Trying HSA support in LLVM") # test that Clang supports the amdgcn--amdhsa target custom_try_compile_clangxx("" "return 0;" RESULT "-target" "amdgcn--amdhsa" "-emit-llvm" "-S") if(RESULT) message(FATAL_ERROR "LLVM support for amdgcn--amdhsa target is required") endif() set(HSAIL_ENABLED 1) endif() if (NOT DEFINED AMD_HSA) set(AMD_HSA 1) endif() # find the headers & the library if(DEFINED WITH_HSA_RUNTIME_DIR AND WITH_HSA_RUNTIME_DIR) set(HSA_RUNTIME_DIR "${WITH_HSA_RUNTIME_DIR}") else() message(STATUS "WITH_HSA_RUNTIME_DIR not given, trying default path") set(HSA_RUNTIME_DIR "/opt/hsa") endif() if(DEFINED WITH_HSA_RUNTIME_LIB_DIR AND WITH_HSA_RUNTIME_LIB_DIR) set(HSA_LIBDIR "${WITH_HSA_RUNTIME_LIB_DIR}") elseif((IS_ABSOLUTE "${HSA_RUNTIME_DIR}") AND (EXISTS "${HSA_RUNTIME_DIR}")) set(HSA_INCLUDEDIR "${HSA_RUNTIME_DIR}/include") set(HSA_LIBDIR "${HSA_RUNTIME_DIR}/lib") else() message(WARNING "${HSA_RUNTIME_DIR} is not a directory (using default system paths for search)") set(HSA_INCLUDEDIR "") set(HSA_LIBDIR "") endif() if(DEFINED WITH_HSA_RUNTIME_INCLUDE_DIR AND WITH_HSA_RUNTIME_INCLUDE_DIR) set(HSA_INCLUDEDIR "${WITH_HSA_RUNTIME_INCLUDE_DIR}") elseif((IS_ABSOLUTE "${HSA_RUNTIME_DIR}") AND (EXISTS "${HSA_RUNTIME_DIR}")) set(HSA_INCLUDEDIR "${HSA_RUNTIME_DIR}/include") else() message(WARNING "${HSA_RUNTIME_DIR} is not a directory (using default system paths for search)") set(HSA_INCLUDEDIR "") endif() find_path(HSA_INCLUDES "hsa.h" PATHS "${HSA_INCLUDEDIR}" NO_DEFAULT_PATH) find_path(HSA_INCLUDES "hsa.h") if(NOT HSA_INCLUDES) message(FATAL_ERROR "hsa.h header not found (use -DHSA_RUNTIME_DIR=... to specify path to HSA runtime)") endif() find_library(HSALIB NAMES "hsa-runtime64" "hsa-runtime" "phsa-runtime64" PATHS "${HSA_LIBDIR}" NO_DEFAULT_PATH) find_library(HSALIB NAMES "hsa-runtime64" "hsa-runtime" "phsa-runtime64") if(NOT HSALIB) message(FATAL_ERROR "libhsa-runtime not found (use -DWITH_HSA_RUNTIME_DIR=... to specify path to HSA runtime) ${HSA_LIBDIR}") endif() if (HSAIL_ENABLED) if(DEFINED WITH_HSAILASM_PATH) set(HSAILASM_SEARCH_PATH "${WITH_HSAILASM_PATH}") else() set(HSAILASM_SEARCH_PATH "${HSA_RUNTIME_DIR}") endif() if((EXISTS "${HSAILASM_SEARCH_PATH}") AND (NOT IS_DIRECTORY "${HSAILASM_SEARCH_PATH}")) set(HSAIL_ASM "${HSAILASM_SEARCH_PATH}") else() find_program(HSAIL_ASM "HSAILasm${CMAKE_EXECUTABLE_SUFFIX}" PATHS "${HSAILASM_SEARCH_PATH}" "${HSAILASM_SEARCH_PATH}/bin") endif() if(NOT HSAIL_ASM) message(FATAL_ERROR "HSAILasm executable not found (use -DWITH_HSAILASM_PATH=... to specify)") endif() endif() if (HSAIL_ENABLED) message(STATUS "OK, building HSA with HSAIL") else() message(STATUS "OK, building HSA with native code generation") endif() pocl-1.8/cmake/Hardening.cmake000066400000000000000000000216231413131625300163030ustar00rootroot00000000000000#This is free and unencumbered software released into the public domain. #Anyone is free to copy, modify, publish, use, compile, sell, or distribute this software, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means. #In jurisdictions that recognize copyright laws, the author or authors of this software dedicate any and all copyright interest in the software to the public domain. We make this dedication for the benefit of the public at large and to the detriment of our heirs and successors. We intend this dedication to be an overt act of relinquishment in perpetuity of all present and future rights to this software under copyright law. #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #For more information, please refer to include(CheckCXXCompilerFlag) option(HARDENING_SSE2 "Enable hardening flags requiring at least SSE2 support for target" OFF) set(CLANG_WORKAROUND_SCRIPT "${CMAKE_CURRENT_LIST_DIR}/clangLinkerWorkaround.sh") function(determineSupportedHardeningFlags property) set(FLAGS_HARDENING "") foreach(flag ${ARGN}) unset(var_name) string(REPLACE "=" "_eq_" var_name ${flag}) string(REPLACE "," "_comma_" var_name ${var_name}) set(var_name "SUPPORTS_HARDENING_${property}_${var_name}") check_cxx_compiler_flag(${flag} ${var_name})#since linker flags and other flags are in the form of compiler flags if(${${var_name}}) list(APPEND FLAGS_HARDENING "${flag}") endif() endforeach(flag) string(REPLACE ";" " " FLAGS_HARDENING "${FLAGS_HARDENING}") #message(STATUS "FLAGS_HARDENING ${FLAGS_HARDENING}") set(HARDENING_${property} "${FLAGS_HARDENING}" PARENT_SCOPE) endfunction(determineSupportedHardeningFlags) function(processFlagsList target property cache) get_target_property(FLAGS_UNHARDENED ${target} ${property}) if(FLAGS_UNHARDENED MATCHES "FLAGS_UNHARDENED-NOTFOUND") set(FLAGS_UNHARDENED "") endif() #message(STATUS "processFlagsList ${target} ${property} ${FLAGS_UNHARDENED}") #message(STATUS "HARDENING_${property} ${HARDENING_${property}}") if(cache) if(HARDENING_${property}) else() determineSupportedHardeningFlags(${property} ${ARGN}) set(HARDENING_${property} "${HARDENING_${property}}" CACHE STRING "Hardening flags") endif() else() determineSupportedHardeningFlags(${property} ${ARGN}) endif() set(FLAGS_HARDENED ${FLAGS_UNHARDENED}) list(APPEND FLAGS_HARDENED ${HARDENING_${property}}) string(REPLACE ";" " " FLAGS_HARDENED "${FLAGS_HARDENED}") #message(STATUS "${target} PROPERTIES ${property} ${FLAGS_HARDENED}") set_target_properties(${target} PROPERTIES ${property} "${FLAGS_HARDENED}") endfunction(processFlagsList) function(setupPIC target) set_property(TARGET ${target} PROPERTY POSITION_INDEPENDENT_CODE ON) # bad, doesn't work if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") get_target_property(type ${target} TYPE) if(type STREQUAL "EXECUTABLE") list(APPEND HARDENING_PIC_COMPILE_FLAGS "-fPIE" ) else() list(APPEND HARDENING_PIC_COMPILE_FLAGS "-fPIC" ) endif() if(type STREQUAL "EXECUTABLE") # https://mropert.github.io/2018/02/02/pic_pie_sanitizers/ list(APPEND HARDENING_PIC_LINKER_FLAGS "-Wl,-pie" ) if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") message(STATUS "Working around Clang bug https://bugs.llvm.org/show_bug.cgi?id=44594 ...") list(APPEND HARDENING_PIC_LINKER_FLAGS "--ld-path=\"${CLANG_WORKAROUND_SCRIPT}\"" ) endif() endif() elseif(MSVC) list(APPEND HARDENING_PIC_COMPILE_FLAGS "/dynamicbase" "/HIGHENTROPYVA" ) else() message(ERROR "The compiler is not supported") endif() processFlagsList(${target} COMPILE_FLAGS OFF ${HARDENING_PIC_COMPILE_FLAGS}) processFlagsList(${target} LINK_FLAGS OFF ${HARDENING_PIC_LINKER_FLAGS}) endfunction(setupPIC) function(harden target) setupPIC("${target}") if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") list(APPEND HARDENING_COMPILER_FLAGS "-Wall" "-Wextra" "-Wconversion" "-Wformat" "-Wformat-security" "-Werror=format-security" "-fno-strict-aliasing" "-fno-common" #"-fstack-check" #"-fcf-protection=full" # conflicts to "-mindirect-branch" "-fcf-runtime-abi=full" "-ffp-exception-behavior=strict" "-fstack-clash-protection" "-mcet" "-fsanitize=cfi" "-fsanitize=cfi-cast-strict" "-fsanitize=cfi-derived-cast" "-fsanitize=cfi-unrelated-cast" "-fsanitize=cfi-nvcall" "-fsanitize=cfi-vcall" "-fsanitize=cfi-icall" "-fsanitize=cfi-mfcall" # CLang-ish flags "-mretpoline" "-mspeculative-load-hardening" "-lvi-load-hardening" "-lvi-cfi" #"-fsanitize=safe-stack;compiler-rt" # https://clang.llvm.org/docs/SafeStack.html "-fsanitize=address" # https://clang.llvm.org/docs/AddressSanitizer.html # TODO implement compiler flag dependence on libs linking #"-fsanitize=undefined;ubsan" # https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html , gcc also has it #"-fsanitize=thread" # https://clang.llvm.org/docs/ThreadSanitizer.html , 15x slowdown and 10x memory overhead #"-fsanitize=memory" # https://clang.llvm.org/docs/MemorySanitizer.html 3x slowdown #"-fsanitize=dataflow" # https://clang.llvm.org/docs/DataFlowSanitizer.html, taint analysis, requires explicit annotation of code #"-fvtable-verify=std;vtv" # this conflicts with gcc which now has -fcf-protection=full hardcoded "-fcf-protection=none -mindirect-branch" "-fcf-protection=none -mindirect-branch=thunk-extern" "-fcf-protection=none -mindirect-branch=thunk-inline" "-fcf-protection=none -mindirect-return" "-fcf-protection=none -mindirect-branch-register" "-fcf-protection=none -mindirect-branch-loop" "-x86-speculative-load-hardening" "-mno-indirect-branch-register" ) if(HARDENING_SSE2) list(APPEND HARDENING_COMPILER_FLAGS "-mlfence-after-load=yes" "-mlfence-before-indirect-branch=all" "-mlfence-before-ret=not" ) endif(HARDENING_SSE2) # some flags are bugged in GCC if(CMAKE_CXX_COMPILER_ID MATCHES "GNU") else() list(APPEND HARDENING_COMPILER_FLAGS "-ftrapv" # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=35412 ) endif() # GCC 9 has removed these flags if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 9 AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 10)) message(STATUS "GCC 9 removes some hardening flags but doesn't fail if they are present, instead shows deprecation message. In order to not to put garbage into warnings we don't insert them. See the code of Harden.cmake for the details.") else() list(APPEND HARDENING_COMPILER_FLAGS "-mmitigate-rop" "-fcheck-pointer-bounds" "-fchkp-treat-zero-size-reloc-as-infinite" "-fchkp-first-field-has-own-bounds" "-fchkp-narrow-bounds" "-fchkp-narrow-to-innermost-array" "-fchkp-optimize" "-fchkp-use-fast-string-functions" "-fchkp-use-nochk-string-functions" "-fchkp-use-static-const-bounds" ) endif() list(APPEND HARDENING_LINKER_FLAGS "-Wl,-O1" "-Wl,--sort-common" "-Wl,--as-needed" "-Wl,-flto" ) if(CMAKE_SYSTEM_NAME MATCHES "Windows") list(APPEND HARDENING_LINKER_FLAGS "-Wl,--export-all-symbols" "-Wl,--nxcompat" "-Wl,--dynamicbase" ) if(CMAKE_SIZEOF_VOID_P EQUAL 8) # list(APPEND HARDENING_LINKER_FLAGS "-Wl,--image-base,0x140000000") # doesn't work for this project endif() elseif(CMAKE_SYSTEM_NAME MATCHES "Linux") # other using ELF too? list(APPEND HARDENING_COMPILER_FLAGS # on MinGW hello world works, but more complex things just exit without any output or crash in the middle of execution "-fstack-protector" "-fstack-protector-strong" ) list(APPEND HARDENING_LINKER_FLAGS # not present in MinGW "-Wl,-z,relro" "-Wl,-z,now" "-Wl,-z,ibtplt" "-Wl,-z,ibt" "-Wl,-z,shstk" "-Wl,-z,notext" # may be required for PIC ) endif() list(APPEND HARDENING_MACRODEFS "-D_FORTIFY_SOURCE=2" "-D_GLIBCXX_ASSERTIONS" ) elseif(MSVC) set(HARDENING_COMPILER_FLAGS "/sdl" "/GS" "/SafeSEH" "/guard:cf" "/HIGHENTROPYVA") set(HARDENING_LINKER_FLAGS "/guard:cf") else() message(ERROR "The compiler is not supported") endif() processFlagsList(${target} COMPILE_FLAGS ON ${HARDENING_COMPILER_FLAGS}) processFlagsList(${target} LINK_FLAGS ON ${HARDENING_LINKER_FLAGS}) #list(JOIN HARDENING_MACRODEFS " " HARDENING_MACRODEFS) # unneeded, list is needed, not string set(HARDENING_MACRODEFS "${HARDENING_MACRODEFS}" CACHE STRING "Hardening flags CMake list (not string!)") target_compile_definitions(${target} PRIVATE ${HARDENING_MACRODEFS}) endfunction(harden) pocl-1.8/cmake/LLVM.cmake000066400000000000000000000755121413131625300151640ustar00rootroot00000000000000 #============================================================================= # CMake build system files for detecting Clang and LLVM # # Copyright (c) 2014-2020 pocl developers # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # #============================================================================= if(DEFINED WITH_LLVM_CONFIG AND WITH_LLVM_CONFIG) # search for preferred version if(IS_ABSOLUTE "${WITH_LLVM_CONFIG}") if(EXISTS "${WITH_LLVM_CONFIG}") set(LLVM_CONFIG "${WITH_LLVM_CONFIG}") endif() else() find_program(LLVM_CONFIG NAMES "${WITH_LLVM_CONFIG}") endif() else() # search for any version find_program(LLVM_CONFIG NAMES "llvmtce-config" "llvm-config" "llvm-config-mp-13.0" "llvm-config-13" "llvm-config130" "llvm-config-mp-12.0" "llvm-config-12" "llvm-config120" "llvm-config-mp-11.0" "llvm-config-11" "llvm-config110" "llvm-config-mp-10.0" "llvm-config-10" "llvm-config100" "llvm-config-mp-9.0" "llvm-config-9" "llvm-config90" "llvm-config-mp-8.0" "llvm-config-8" "llvm-config80" "llvm-config-mp-7.0" "llvm-config-7" "llvm-config70" "llvm-config-mp-6.0" "llvm-config-6.0" "llvm-config60" DOC "llvm-config executable") endif() set(WITH_LLVM_CONFIG "${WITH_LLVM_CONFIG}" CACHE PATH "Path to preferred llvm-config") if(NOT LLVM_CONFIG) message(FATAL_ERROR "llvm-config not found !") else() file(TO_CMAKE_PATH "${LLVM_CONFIG}" LLVM_CONFIG) message(STATUS "Using llvm-config: ${LLVM_CONFIG}") if(LLVM_CONFIG MATCHES "llvmtce-config${CMAKE_EXECUTABLE_SUFFIX}$") set(LLVM_BINARY_SUFFIX "") elseif(LLVM_CONFIG MATCHES "llvm-config${CMAKE_EXECUTABLE_SUFFIX}$") set(LLVM_BINARY_SUFFIX "") elseif(LLVM_CONFIG MATCHES "llvm-config(.*)${CMAKE_EXECUTABLE_SUFFIX}$") set(LLVM_BINARY_SUFFIX "${CMAKE_MATCH_1}") else() message(WARNING "Cannot determine llvm binary suffix from ${LLVM_CONFIG}") endif() message(STATUS "LLVM binaries suffix : ${LLVM_BINARY_SUFFIX}") endif() get_filename_component(LLVM_CONFIG_LOCATION "${LLVM_CONFIG}" DIRECTORY) ########################################################################## # A macro to run llvm config macro(run_llvm_config VARIABLE_NAME) execute_process( COMMAND "${LLVM_CONFIG}" ${ARGN} OUTPUT_VARIABLE LLVM_CONFIG_VALUE RESULT_VARIABLE LLVM_CONFIG_RETVAL OUTPUT_STRIP_TRAILING_WHITESPACE ) if(LLVM_CONFIG_RETVAL) message(SEND_ERROR "Error running llvm-config with arguments: ${ARGN}") else() set(${VARIABLE_NAME} ${LLVM_CONFIG_VALUE} CACHE STRING "llvm-config's ${VARIABLE_NAME} value") message(STATUS "llvm-config's ${VARIABLE_NAME} is: ${${VARIABLE_NAME}}") endif() endmacro(run_llvm_config) run_llvm_config(LLVM_PREFIX --prefix) # on windows, llvm-config returs "C:\llvm_prefix/bin" mixed style paths, # and cmake doesn't like the "\" - thinks its an escape char.. file(TO_CMAKE_PATH "${LLVM_PREFIX}" LLVM_PREFIX_CMAKE) set(LLVM_PREFIX_BIN "${LLVM_PREFIX_CMAKE}/bin") run_llvm_config(LLVM_VERSION_FULL --version) # sigh, sanitize version... `llvm --version` on debian might return 3.4.1 but llvm command names are still -3.4 string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\1.\\2" LLVM_VERSION "${LLVM_VERSION_FULL}") message(STATUS "LLVM_VERSION: ${LLVM_VERSION}") string(REPLACE "." ";" LLVM_VERSION_PARSED "${LLVM_VERSION}") list(GET LLVM_VERSION_PARSED 0 LLVM_VERSION_MAJOR) list(GET LLVM_VERSION_PARSED 1 LLVM_VERSION_MINOR) run_llvm_config(LLVM_CFLAGS --cflags) string(REPLACE "${LLVM_PREFIX}" "${LLVM_PREFIX_CMAKE}" LLVM_CFLAGS "${LLVM_CFLAGS}") run_llvm_config(LLVM_CXXFLAGS --cxxflags) string(REPLACE "${LLVM_PREFIX}" "${LLVM_PREFIX_CMAKE}" LLVM_CXXFLAGS "${LLVM_CXXFLAGS}") run_llvm_config(LLVM_CPPFLAGS --cppflags) string(REPLACE "${LLVM_PREFIX}" "${LLVM_PREFIX_CMAKE}" LLVM_CPPFLAGS "${LLVM_CPPFLAGS}") run_llvm_config(LLVM_LDFLAGS --ldflags) string(REPLACE "${LLVM_PREFIX}" "${LLVM_PREFIX_CMAKE}" LLVM_LDFLAGS "${LLVM_LDFLAGS}") run_llvm_config(LLVM_BINDIR --bindir) string(REPLACE "${LLVM_PREFIX}" "${LLVM_PREFIX_CMAKE}" LLVM_BINDIR "${LLVM_BINDIR}") run_llvm_config(LLVM_LIBDIR --libdir) string(REPLACE "${LLVM_PREFIX}" "${LLVM_PREFIX_CMAKE}" LLVM_LIBDIR "${LLVM_LIBDIR}") run_llvm_config(LLVM_INCLUDEDIR --includedir) string(REPLACE "${LLVM_PREFIX}" "${LLVM_PREFIX_CMAKE}" LLVM_INCLUDEDIR "${LLVM_INCLUDEDIR}") run_llvm_config(LLVM_SRC_ROOT --src-root) run_llvm_config(LLVM_OBJ_ROOT --obj-root) string(REPLACE "${LLVM_PREFIX}" "${LLVM_PREFIX_CMAKE}" LLVM_OBJ_ROOT "${LLVM_OBJ_ROOT}") run_llvm_config(LLVM_ALL_TARGETS --targets-built) run_llvm_config(LLVM_HOST_TARGET --host-target) run_llvm_config(LLVM_BUILD_MODE --build-mode) run_llvm_config(LLVM_ASSERTS_BUILD --assertion-mode) run_llvm_config(LLVM_SYSLIBS --system-libs) string(STRIP "${LLVM_SYSLIBS}" LLVM_SYSLIBS) if(MSVC) string(REPLACE "-L${LLVM_LIBDIR}" "" LLVM_LDFLAGS "${LLVM_LDFLAGS}") string(STRIP "${LLVM_LDFLAGS}" LLVM_LDFLAGS) endif() if(LLVM_BUILD_MODE MATCHES "Debug") set(LLVM_BUILD_MODE_DEBUG 1) else() set(LLVM_BUILD_MODE_DEBUG 0) endif() # A few work-arounds for llvm-config issues # - pocl doesn't compile with '-pedantic' #LLVM_CXX_FLAGS=$($LLVM_CONFIG --cxxflags | sed -e 's/ -pedantic / /g') string(REPLACE " -pedantic" "" LLVM_CXXFLAGS "${LLVM_CXXFLAGS}") #llvm-config clutters CXXFLAGS with a lot of -W flags. #(They are not needed - we want to use -Wall anyways) #This is a problem if LLVM was built with a different compiler than we use here, #and our compiler chokes on unrecognized command-line options. string(REGEX REPLACE "-W[^ ]*" "" LLVM_CXXFLAGS "${LLVM_CXXFLAGS}") # Ubuntu's llvm reports "arm-unknown-linux-gnueabihf" triple, then if one tries # `clang --target=arm-unknown-linux-gnueabihf ...` it will produce armv6 code, # even if one's running armv7; # Here we replace the "arm" string with whatever's in CMAKE_HOST_SYSTEM_PROCESSOR # which should be "armv6l" on rasp pi, or "armv7l" on my cubieboard, hopefully its # more reasonable and reliable than llvm's own host flags if(NOT CMAKE_CROSSCOMPILING) string(REPLACE "arm-" "${CMAKE_HOST_SYSTEM_PROCESSOR}-" LLVM_HOST_TARGET "${LLVM_HOST_TARGET}") endif() # In windows llvm-config reports --target=x86_64-pc-windows-msvc # however this causes clang to use MicrosoftCXXMangler, which does not # yet support mangling for extended vector types (with llvm 3.5) # so for now hardcode LLVM_HOST_TARGET to be x86_64-pc with windows if(WIN32) set(LLVM_HOST_TARGET "x86_64-pc") endif(WIN32) # required for sources.. if(LLVM_VERSION MATCHES "^6[.]0") set(LLVM_MAJOR 6) set(LLVM_6_0 1) set(LLVM_OLDER_THAN_7_0 1) set(LLVM_OLDER_THAN_8_0 1) set(LLVM_OLDER_THAN_9_0 1) set(LLVM_OLDER_THAN_10_0 1) elseif(LLVM_VERSION MATCHES "^7[.]") set(LLVM_MAJOR 7) set(LLVM_7_0 1) set(LLVM_OLDER_THAN_8_0 1) set(LLVM_OLDER_THAN_9_0 1) set(LLVM_OLDER_THAN_10_0 1) elseif(LLVM_VERSION MATCHES "^8[.]") set(LLVM_MAJOR 8) set(LLVM_8_0 1) set(LLVM_OLDER_THAN_9_0 1) set(LLVM_OLDER_THAN_10_0 1) elseif(LLVM_VERSION MATCHES "^9[.]") set(LLVM_MAJOR 9) set(LLVM_9_0 1) set(LLVM_OLDER_THAN_10_0 1) elseif(LLVM_VERSION MATCHES "^10[.]") set(LLVM_MAJOR 10) set(LLVM_10_0 1) elseif(LLVM_VERSION MATCHES "^11[.]") set(LLVM_MAJOR 11) set(LLVM_11_0 1) elseif(LLVM_VERSION MATCHES "^12[.]") set(LLVM_MAJOR 12) set(LLVM_12_0 1) elseif(LLVM_VERSION MATCHES "^13[.]") set(LLVM_MAJOR 13) set(LLVM_13_0 1) else() message(FATAL_ERROR "LLVM version between 6.0 and 13.0 required, found: ${LLVM_VERSION}") endif() ############################################################# run_llvm_config(LLVM_HAS_RTTI --has-rtti) if(DEFINED SINGLE_LLVM_LIB) message(AUTHOR_WARNING "SINGLE_LLVM_LIB option was removed; pocl now uses only llvm-config to get the libraries. Use STATIC_LLVM=ON/OFF to affect which libraries pocl requests from llvm-config") endif() if(STATIC_LLVM) set(LLVM_LIB_MODE --link-static) else() set(LLVM_LIB_MODE --link-shared) endif() unset(LLVM_LIBS) run_llvm_config(LLVM_LIBS --libs ${LLVM_LIB_MODE}) # Convert LLVM_LIBS from string -> list format to make handling them easier separate_arguments(LLVM_LIBS) # With Visual Studio llvm-config gives invalid list of static libs (libXXXX.a instead of XXXX.lib) # we extract the pure names (LLVMLTO, LLVMMipsDesc etc) and let find_library do its job foreach(LIBFLAG ${LLVM_LIBS}) STRING(REGEX REPLACE "^-l(.*)$" "\\1" LIB_NAME ${LIBFLAG}) list(APPEND LLVM_LIBNAMES "${LIB_NAME}") endforeach() foreach(LIBNAME ${LLVM_LIBNAMES}) find_library(L_LIBFILE_${LIBNAME} NAMES "${LIBNAME}" HINTS "${LLVM_LIBDIR}") list(APPEND LLVM_LIBFILES "${L_LIBFILE_${LIBNAME}}") endforeach() set(POCL_LLVM_LIBS ${LLVM_LIBFILES}) #################################################################### run_llvm_config(LLVM_SYSLIBS --system-libs ${LLVM_LIB_MODE}) string(STRIP "${LLVM_SYSLIBS}" LLVM_SYSLIBS) #################################################################### # llvm-config does not include clang libs if((9 LESS LLVM_MAJOR) AND (NOT STATIC_LLVM)) # For Clang 10+, link against a single shared library instead of multiple component shared # libraries. if("${LLVM_LIBNAMES}" MATCHES "LLVMTCE") set(CLANG_LIBNAMES clangTCE-cpp) else() set(CLANG_LIBNAMES clang-cpp) endif() else() set(CLANG_LIBNAMES clangCodeGen clangFrontendTool clangFrontend clangDriver clangSerialization clangParse clangSema clangRewrite clangRewriteFrontend clangStaticAnalyzerFrontend clangStaticAnalyzerCheckers clangStaticAnalyzerCore clangAnalysis clangEdit clangAST clangASTMatchers clangLex clangBasic) endif() foreach(LIBNAME ${CLANG_LIBNAMES}) find_library(C_LIBFILE_${LIBNAME} NAMES "${LIBNAME}" HINTS "${LLVM_LIBDIR}") list(APPEND CLANG_LIBFILES "${C_LIBFILE_${LIBNAME}}") if(UNIX AND (NOT APPLE)) set(LLVM_LDFLAGS "${LLVM_LDFLAGS} -Wl,--exclude-libs,lib${LIBNAME}") endif() endforeach() #################################################################### macro(find_program_or_die OUTPUT_VAR PROG_NAME DOCSTRING) find_program(${OUTPUT_VAR} NAMES "${PROG_NAME}${LLVM_BINARY_SUFFIX}${CMAKE_EXECUTABLE_SUFFIX}" "${PROG_NAME}${CMAKE_EXECUTABLE_SUFFIX}" HINTS "${LLVM_BINDIR}" "${LLVM_CONFIG_LOCATION}" "${LLVM_PREFIX}" "${LLVM_PREFIX_BIN}" DOC "${DOCSTRING}" NO_CMAKE_PATH NO_CMAKE_ENVIRONMENT_PATH ) if(${OUTPUT_VAR}) message(STATUS "Found ${PROG_NAME}: ${${OUTPUT_VAR}}") else() message(FATAL_ERROR "${PROG_NAME} executable not found!") endif() endmacro() find_program_or_die( CLANG "clang" "clang binary") execute_process(COMMAND "${CLANG}" "--version" OUTPUT_VARIABLE LLVM_CLANG_VERSION RESULT_VARIABLE CLANG_RES) # TODO this should be optional find_program_or_die( CLANGXX "clang++" "clang++ binary") execute_process(COMMAND "${CLANGXX}" "--version" OUTPUT_VARIABLE LLVM_CLANGXX_VERSION RESULT_VARIABLE CLANGXX_RES) if(CLANGXX_RES OR CLANG_RES) message(FATAL_ERROR "Failed running clang/clang++ --version") endif() find_program_or_die(LLVM_OPT "opt" "LLVM optimizer") find_program_or_die(LLVM_LLC "llc" "LLVM static compiler") find_program_or_die(LLVM_AS "llvm-as" "LLVM assembler") find_program_or_die(LLVM_LINK "llvm-link" "LLVM IR linker") find_program_or_die(LLVM_LLI "lli" "LLVM interpreter") if(NOT DEFINED LLVM_SPIRV) find_program(LLVM_SPIRV NAMES "llvm-spirv${LLVM_BINARY_SUFFIX}${CMAKE_EXECUTABLE_SUFFIX}" "llvm-spirv${CMAKE_EXECUTABLE_SUFFIX}" HINTS "${LLVM_BINDIR}" "${LLVM_CONFIG_LOCATION}" "${LLVM_PREFIX}" "${LLVM_PREFIX_BIN}") if(LLVM_SPIRV) message(STATUS "Found llvm-spirv: ${LLVM_SPIRV}") endif() endif() #################################################################### # try compile with any compiler (supplied as argument) macro(custom_try_compile_any SILENT COMPILER SUFFIX SOURCE RES_VAR) string(RANDOM RNDNAME) set(RANDOM_FILENAME "${CMAKE_BINARY_DIR}/compile_test_${RNDNAME}.${SUFFIX}") file(WRITE "${RANDOM_FILENAME}" "${SOURCE}") math(EXPR LSIZE "${ARGC} - 4") execute_process(COMMAND "${COMPILER}" ${ARGN} "${RANDOM_FILENAME}" RESULT_VARIABLE ${RES_VAR} OUTPUT_VARIABLE OV ERROR_VARIABLE EV) if(${${RES_VAR}} AND (NOT ${SILENT})) message(STATUS " ########## The command: ") string(REPLACE ";" " " ARGN_STR "${ARGN}") message(STATUS "${COMPILER} ${ARGN_STR} ${RANDOM_FILENAME}") message(STATUS " ########## Exited with nonzero status: ${${RES_VAR}}") if(OV) message(STATUS "STDOUT: ${OV}") endif() if(EV) message(STATUS "STDERR: ${EV}") endif() endif() file(REMOVE "${RANDOM_FILENAME}") endmacro() # convenience c/c++ source wrapper macro(custom_try_compile_c_cxx COMPILER SUFFIX SOURCE1 SOURCE2 RES_VAR) set(SOURCE_PROG " ${SOURCE1} int main(int argc, char** argv) { ${SOURCE2} }") custom_try_compile_any(FALSE "${COMPILER}" ${SUFFIX} "${SOURCE_PROG}" ${RES_VAR} ${ARGN}) endmacro() # convenience c/c++ source wrapper macro(custom_try_compile_c_cxx_silent COMPILER SUFFIX SOURCE1 SOURCE2 RES_VAR) set(SOURCE_PROG " ${SOURCE1} int main(int argc, char** argv) { ${SOURCE2} }") custom_try_compile_any(TRUE "${COMPILER}" ${SUFFIX} "${SOURCE_PROG}" ${RES_VAR} ${ARGN}) endmacro() # clang++ try-compile macro macro(custom_try_compile_clangxx SOURCE1 SOURCE2 RES_VAR) custom_try_compile_c_cxx("${CLANGXX}" "cc" "${SOURCE1}" "${SOURCE2}" ${RES_VAR} "-c" ${ARGN}) endmacro() # clang++ try-compile macro macro(custom_try_compile_clang SOURCE1 SOURCE2 RES_VAR) custom_try_compile_c_cxx("${CLANG}" "c" "${SOURCE1}" "${SOURCE2}" ${RES_VAR} "-c" ${ARGN}) endmacro() # clang++ try-compile macro macro(custom_try_compile_clang_silent SOURCE1 SOURCE2 RES_VAR) custom_try_compile_c_cxx_silent("${CLANG}" "c" "${SOURCE1}" "${SOURCE2}" ${RES_VAR} "-c" ${ARGN}) endmacro() # clang++ try-link macro macro(custom_try_link_clang SOURCE1 SOURCE2 RES_VAR) set(RANDOM_FILENAME "${CMAKE_BINARY_DIR}/compile_test_${RNDNAME}.${SUFFIX}") custom_try_compile_c_cxx_silent("${CLANG}" "c" "${SOURCE1}" "${SOURCE2}" ${RES_VAR} "-o" "${RANDOM_FILENAME}" ${ARGN}) file(REMOVE "${RANDOM_FILENAME}") endmacro() # clang try-compile-run macro, running via native executable macro(custom_try_run_exe SOURCE1 SOURCE2 OUTPUT_VAR RES_VAR) set(OUTF "${CMAKE_BINARY_DIR}/try_run${CMAKE_EXECUTABLE_SUFFIX}") if(EXISTS "${OUTF}") file(REMOVE "${OUTF}") endif() custom_try_compile_c_cxx("${CLANG}" "c" "${SOURCE1}" "${SOURCE2}" RESV "-o" "${OUTF}" "-x" "c") set(${OUTPUT_VAR} "") set(${RES_VAR} "") if(RESV OR (NOT EXISTS "${OUTF}")) message(STATUS " ########## Compilation failed") else() execute_process(COMMAND "${OUTF}" RESULT_VARIABLE RESV OUTPUT_VARIABLE ${OUTPUT_VAR} ERROR_VARIABLE EV) set(${RES_VAR} ${RESV}) file(REMOVE "${OUTF}") if(${RESV}) message(STATUS " ########## Running ${OUTF}") message(STATUS " ########## Exited with nonzero status: ${RESV}") if(${${OUTPUT_VAR}}) message(STATUS " ########## STDOUT: ${${OUTPUT_VAR}}") endif() if(EV) message(STATUS " ########## STDERR: ${EV}") endif() endif() endif() endmacro() # clang try-compile-run macro, run via lli, the llvm interpreter macro(custom_try_run_lli SILENT SOURCE1 SOURCE2 OUTPUT_VAR RES_VAR) # this uses "lli" - the interpreter, so we can run any -target # TODO variable for target !! set(OUTF "${CMAKE_BINARY_DIR}/try_run.bc") if(EXISTS "${OUTF}") file(REMOVE "${OUTF}") endif() custom_try_compile_c_cxx("${CLANG}" "c" "${SOURCE1}" "${SOURCE2}" RESV "-o" "${OUTF}" "-x" "c" "-emit-llvm" "-c" ${ARGN}) set(${OUTPUT_VAR} "") set(${RES_VAR} "") if(RESV OR (NOT EXISTS "${OUTF}")) message(STATUS " ########## Compilation failed") else() execute_process(COMMAND "${LLVM_LLI}" "-force-interpreter" "${OUTF}" RESULT_VARIABLE RESV OUTPUT_VARIABLE ${OUTPUT_VAR} ERROR_VARIABLE EV) set(${RES_VAR} ${RESV}) file(REMOVE "${OUTF}") if(${RESV} AND (NOT ${SILENT})) message(STATUS " ########## The command ${LLVM_LLI} -force-interpreter ${OUTF}") message(STATUS " ########## Exited with nonzero status: ${RESV}") if(${${OUTPUT_VAR}}) message(STATUS " ########## STDOUT: ${${OUTPUT_VAR}}") endif() if(EV) message(STATUS " ########## STDERR: ${EV}") endif() endif() endif() endmacro() #################################################################### #################################################################### # The option for specifying the target changed; try the modern syntax # first, and fall back to the old-style syntax if this failed if(NOT DEFINED CLANG_TARGET_OPTION AND ENABLE_HOST_CPU_DEVICES) custom_try_compile_clangxx("" "return 0;" RES "--target=${LLVM_HOST_TARGET}") if(NOT RES) set(CLANG_TGT "--target=") else() #EXECUTE_PROCESS(COMMAND "${CLANG}" "-target ${LLVM_HOST_TARGET}" "-x" "c" "/dev/null" "-S" RESULT_VARIABLE RES) custom_try_compile_clangxx("" "return 0;" RES "-target ${LLVM_HOST_TARGET}") if(NOT RES) set(CLANG_TGT "-target ") else() message(FATAL_ERROR "Cannot determine Clang option to specify the target") endif() endif() set(CLANG_TARGET_OPTION ${CLANG_TGT} CACHE INTERNAL "Clang option used to specify the target" ) endif() #################################################################### #################################################################### if(NOT DEFINED CLANG_NEEDS_RTLIB) set(RT128 OFF) set(RT64 OFF) set(NEEDS_RTLIB_FLAG OFF) # on 32bit systems, we need 64bit emulation if(CMAKE_SIZEOF_VOID_P EQUAL 4) set(INC "#include \n#include ") set(SRC "int64_t a = argc; int64_t b = argc-1; int64_t c = a / b; return (int)c; ") custom_try_link_clang("${INC}" "${SRC}" RES) if(NOT RES) message(STATUS "64bit division compiles without extra flags") set(RT64 ON) else() custom_try_link_clang("${INC}" "${SRC}" RES "--rtlib=compiler-rt") if(NOT RES) message(STATUS "64bit division compiles WITH --rtlib=compiler-rt") set(NEEDS_RTLIB_FLAG ON) set(RT64 ON) else() message(WARNING "64bit division doesn't compile at all!") endif() endif() else() set(RT64 ON) # on 64bit systems, we need 128bit integers for Errol set(INC "extern __uint128_t __udivmodti4(__uint128_t a, __uint128_t b, __uint128_t* rem);") set(SRC "__uint128_t low, mid, tmp1, pow19 = (__uint128_t)1000000000; mid = __udivmodti4(low, pow19, &tmp1); return 0;") custom_try_link_clang("${INC}" "${SRC}" RES) if(NOT RES) message(STATUS "udivmodti4 compiles without extra flags") set(RT128 ON) else() custom_try_link_clang("${INC}" "${SRC}" RES "--rtlib=compiler-rt") if(NOT RES) message(STATUS "udivmodti4 compiles WITH --rtlib=compiler-rt") set(NEEDS_RTLIB_FLAG ON) set(RT128 ON) else() message(WARNING "udivmodti4 doesn't compile at all!") endif() endif() endif() set(CLANG_HAS_64B_MATH ${RT64} CACHE INTERNAL "Clang's available with 64bit math") set(CLANG_HAS_128B_MATH ${RT128} CACHE INTERNAL "Clang's available with 128bit math") set(CLANG_NEEDS_RTLIB ${NEEDS_RTLIB_FLAG} CACHE INTERNAL "Clang needs extra --rtlib flag for compiler-rt math") endif() #################################################################### macro(CHECK_ALIGNOF TYPE TYPEDEF OUT_VAR) if(NOT DEFINED "${OUT_VAR}") custom_try_run_lli(TRUE " #ifndef offsetof #define offsetof(type, member) ((char *) &((type *) 0)->member - (char *) 0) #endif ${TYPEDEF}" "typedef struct { char x; ${TYPE} y; } ac__type_alignof_; int r = offsetof(ac__type_alignof_, y); return r;" SIZEOF_STDOUT RESULT "${CLANG_TARGET_OPTION}${LLC_TRIPLE}") #message(FATAL_ERROR "SIZEOF: ${SIZEOF_STDOUT} RES: ${RESULT}") if(NOT ${RESULT}) message(SEND_ERROR "Could not determine align of(${TYPE})") endif() set(${OUT_VAR} "${RESULT}" CACHE INTERNAL "Align of ${TYPE}") endif() endmacro() #################################################################### # # clangxx works check # # TODO clang + vecmathlib doesn't work on Windows yet... if(CLANGXX AND (NOT WIN32) AND ENABLE_HOST_CPU_DEVICES) message(STATUS "Checking if clang++ works (required by vecmathlib)") set(CXX_WORKS 0) set(CXX_STDLIB "") if(NOT DEFINED CLANGXX_WORKS) custom_try_compile_clangxx("namespace std { class type_info; } \n #include \n #include " "std::cout << \"Hello clang++ world!\" << std::endl;" _STATUS_FAIL "-std=c++11") if(NOT _STATUS_FAIL) set(CXX_WORKS 1) else() custom_try_compile_clangxx("namespace std { class type_info; } \n #include \n #include " "std::cout << \"Hello clang++ world!\" << std::endl;" _STATUS_FAIL "-stdlib=libstdc++" "-std=c++11") if (NOT _STATUS_FAIL) set(CXX_STDLIB "-stdlib=libstdc++") set(CXX_WORKS 1) else() custom_try_compile_clangxx("namespace std { class type_info; } \n #include \n #include " "std::cout << \"Hello clang++ world!\" << std::endl;" _STATUS_FAIL "-stdlib=libc++" "-std=c++11") if(NOT _STATUS_FAIL) set(CXX_STDLIB "-stdlib=libc++") set(CXX_WORKS 1) endif() endif() endif() set(CLANGXX_WORKS ${CXX_WORKS} CACHE INTERNAL "Clang++ ") set(CLANGXX_STDLIB ${CXX_STDLIB} CACHE INTERNAL "Clang++ stdlib") endif() endif() if(CLANGXX_STDLIB AND (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) set(LLVM_CXXFLAGS "${CLANGXX_STDLIB} ${LLVM_CXXFLAGS}") set(LLVM_LDFLAGS "${CLANGXX_STDLIB} ${LLVM_LDFLAGS}") endif() #################################################################### # # - '-DNDEBUG' is a work-around for llvm bug 18253 # # llvm-config does not always report the "-DNDEBUG" flag correctly # (see LLVM bug 18253). If LLVM and the pocl passes are built with # different NDEBUG settings, problems arise if(NOT DEFINED LLVM_NDEBUG_BUILD) message(STATUS "Checking if LLVM is a DEBUG build") separate_arguments(_FLAGS UNIX_COMMAND "${LLVM_CXXFLAGS}") set(_TEST_SOURCE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/llvmNDEBUG.cc") file(WRITE "${_TEST_SOURCE}" " #include int main(int argc, char** argv) { llvm::DebugFlag=true; } ") set(TRY_COMPILE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${LLVM_CXXFLAGS} -UNDEBUG") try_compile(_TRY_SUCCESS ${CMAKE_BINARY_DIR} "${_TEST_SOURCE}" CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${LLVM_INCLUDE_DIRS}" CMAKE_FLAGS "-DLINK_DIRECTORIES:STRING=${LLVM_LIBDIR}" LINK_LIBRARIES "${LLVM_LIBS} ${LLVM_SYSLIBS} ${LLVM_LDFLAGS}" COMPILE_DEFINITIONS ${TRY_COMPILE_CXX_FLAGS} OUTPUT_VARIABLE _TRY_COMPILE_OUTPUT ) file(APPEND "${CMAKE_BINARY_DIR}/CMakeFiles/CMakeOutput.log" "Test -NDEBUG flag: ${_TRY_COMPILE_OUTPUT}\n") if(_TRY_SUCCESS) message(STATUS "DEBUG build") set(LLVM_NDEBUG_BUILD 0 CACHE INTERNAL "DNDEBUG") else() message(STATUS "Not a DEBUG build") set(LLVM_NDEBUG_BUILD 1 CACHE INTERNAL "DNDEBUG") endif() endif() if((NOT LLVM_CXXFLAGS MATCHES "-DNDEBUG") AND LLVM_NDEBUG_BUILD) message(STATUS "adding -DNDEBUG explicitly") set(LLVM_CXXFLAGS "${LLVM_CXXFLAGS} -DNDEBUG") endif() #################################################################### # TODO: We need to set both target-triple and cpu-type when # building, since the ABI depends on both. We can either add flags # to all the scripts, or set the respective flags here in # *_CLANG_FLAGS and *_LLC_FLAGS. Note that clang and llc use # different option names to set these. Note that clang calls the # triple "target" and the cpu "architecture", which is different # from llc. # Normalise the triple. Otherwise, clang normalises it when # passing it to llc, which is then different from the triple we # pass to llc. This would lead to inconsistent bytecode files, # depending on whether they are generated via clang or directly # via llc. if(ENABLE_HOST_CPU_DEVICES AND NOT DEFINED LLC_TRIPLE) message(STATUS "Find out LLC target triple (for host ${LLVM_HOST_TARGET})") set(_EMPTY_C_FILE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/tripletfind.c") file(WRITE "${_EMPTY_C_FILE}" "") execute_process(COMMAND ${CLANG} "${CLANG_TARGET_OPTION}${LLVM_HOST_TARGET}" -x c ${_EMPTY_C_FILE} -S -emit-llvm -o - RESULT_VARIABLE RES_VAR OUTPUT_VARIABLE OUTPUT_VAR) if(RES_VAR) message(FATAL_ERROR "Error ${RES_VAR} while determining target triple") endif() if(OUTPUT_VAR MATCHES "target triple = \"([^\"]+)") string(STRIP "${CMAKE_MATCH_1}" LLC_TRIPLE) else() message(FATAL_ERROR "Could not find target triple in llvm output") endif() # TODO the armv7hl normalize string(REPLACE "armv7l-" "armv7-" LLC_TRIPLE "${LLC_TRIPLE}") set(LLC_TRIPLE "${LLC_TRIPLE}" CACHE INTERNAL "LLC_TRIPLE") endif() # FIXME: The cpu name printed by llc --version is the same cpu that will be # targeted if you pass -mcpu=native to llc, so we could replace this auto-detection # with just: set(LLC_HOST_CPU "native"), however, we can't do this at the moment # because of the work-around for arm1176jz-s. if(ENABLE_HOST_CPU_DEVICES AND NOT DEFINED LLC_HOST_CPU_AUTO) message(STATUS "Find out LLC host CPU with ${LLVM_LLC}") execute_process(COMMAND ${LLVM_LLC} "--version" RESULT_VARIABLE RES_VAR OUTPUT_VARIABLE OUTPUT_VAR) # WTF, ^^ has return value 1 #if(RES_VAR) # message(FATAL_ERROR "Error ${RES_VAR} while determining LLC host CPU") #endif() if(OUTPUT_VAR MATCHES "Host CPU: ([^ ]*)") # sigh... STRING(STRIP is to workaround regexp bug in cmake string(STRIP "${CMAKE_MATCH_1}" LLC_HOST_CPU_AUTO) else() message(FATAL_ERROR "Couldnt determine host CPU from llc output") endif() #TODO better if(CMAKE_LIBRARY_ARCHITECTURE MATCHES "gnueabihf" AND LLC_HOST_CPU_AUTO MATCHES "arm1176jz-s") set(LLC_HOST_CPU_AUTO "arm1176jzf-s") endif() endif() if((LLC_HOST_CPU_AUTO MATCHES "unknown") AND (NOT LLC_HOST_CPU)) message(FATAL_ERROR "LLVM could not recognize your CPU model automatically. Please run CMake with -DLLC_HOST_CPU= (you can find valid names with: llc -mcpu=help)") else() set(LLC_HOST_CPU_AUTO "${LLC_HOST_CPU_AUTO}" CACHE INTERNAL "Autodetected CPU") endif() if((DEFINED LLC_HOST_CPU) AND (NOT LLC_HOST_CPU STREQUAL LLC_HOST_CPU_AUTO)) message(STATUS "Autodetected CPU ${LLC_HOST_CPU_AUTO} overriden by user to ${LLC_HOST_CPU}") set(HOST_CPU_FORCED 1 CACHE INTERNAL "CPU is forced by user") else() set(LLC_HOST_CPU "${LLC_HOST_CPU_AUTO}" CACHE STRING "The Host CPU to use with llc") set(HOST_CPU_FORCED 0 CACHE INTERNAL "CPU is forced by user") endif() #################################################################### # Some architectures have -march and -mcpu reversed if(NOT DEFINED CLANG_MARCH_FLAG) message(STATUS "Checking clang -march vs. -mcpu flag") custom_try_compile_clang_silent("" "return 0;" RES ${CLANG_TARGET_OPTION}${LLC_TRIPLE} -march=${LLC_HOST_CPU}) if(NOT RES) set(CLANG_MARCH_FLAG "-march=") else() custom_try_compile_clang_silent("" "return 0;" RES ${CLANG_TARGET_OPTION}${LLC_TRIPLE} -mcpu=${LLC_HOST_CPU}) if(NOT RES) set(CLANG_MARCH_FLAG "-mcpu=") else() message(FATAL_ERROR "Could not determine whether to use -march or -mcpu with clang") endif() endif() message(STATUS " Using ${CLANG_MARCH_FLAG}") set(CLANG_MARCH_FLAG ${CLANG_MARCH_FLAG} CACHE INTERNAL "Clang option used to specify the target cpu") endif() #################################################################### # This tests that we can actually link to the llvm libraries. # Mostly to catch issues like #295 - cannot find -ledit if(NOT DEFINED LLVM_LINK_TEST) set(LLVM_LINK_TEST_SOURCE " #include #include \"llvm/IR/LLVMContext.h\" #include \"llvm/Support/SourceMgr.h\" #include \"llvm/IR/Module.h\" #include \"llvm/IRReader/IRReader.h\" int main( int argc, char* argv[] ) { if( argc < 2 ) exit(2); llvm::LLVMContext context; llvm::SMDiagnostic err; std::unique_ptr module = llvm::parseIRFile( argv[1], err, context ); if( !module ) exit(1); else printf(\"DataLayout = %s\\n\", module->getDataLayoutStr().c_str()); return 0; }") string(RANDOM RNDNAME) set(LLVM_LINK_TEST_FILENAME "${CMAKE_BINARY_DIR}/llvm_link_test_${RNDNAME}.cc") file(WRITE "${LLVM_LINK_TEST_FILENAME}" "${LLVM_LINK_TEST_SOURCE}") try_compile(LLVM_LINK_TEST ${CMAKE_BINARY_DIR} "${LLVM_LINK_TEST_FILENAME}" CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${LLVM_INCLUDE_DIRS}" CMAKE_FLAGS "-DLINK_DIRECTORIES:STRING=${LLVM_LIBDIR}" LINK_LIBRARIES "${LLVM_LDFLAGS} ${LLVM_LIBS} ${LLVM_SYSLIBS}" COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${LLVM_CXXFLAGS}" OUTPUT_VARIABLE _TRY_COMPILE_OUTPUT) if (LLVM_LINK_TEST) message(STATUS "LLVM link test OK") set(LLVM_LINK_TEST 1 CACHE INTERNAL "LLVM link test result") else() message(STATUS "LLVM link test output: ${_TRY_COMPILE_OUTPUT}") message(FATAL_ERROR "LLVM link test FAILED. This mostly happens when your LLVM installation does not have all dependencies installed.") endif() endif() #################################################################### if(ENABLE_HOST_CPU_DEVICES AND NOT DEFINED ${CL_DISABLE_HALF}) set(CL_DISABLE_HALF 0) message(STATUS "Checking fp16 support") custom_try_compile_clang_silent("__fp16 callfp16(__fp16 a) { return a * (__fp16)1.8; };" "__fp16 x=callfp16((__fp16)argc);" RESV ${CLANG_TARGET_OPTION}${LLC_TRIPLE} ${CLANG_MARCH_FLAG}${LLC_HOST_CPU}) if(RESV) set(CL_DISABLE_HALF 1) endif() endif() set(CL_DISABLE_HALF "${CL_DISABLE_HALF}" CACHE INTERNAL "Disable cl_khr_fp16 because fp16 is not supported") message(STATUS "FP16 is disabled: ${CL_DISABLE_HALF}") ##################################################################### execute_process(COMMAND "${CLANG}" "--print-resource-dir" OUTPUT_VARIABLE RESOURCE_DIR) string(STRIP "${RESOURCE_DIR}" RESOURCE_DIR) set(CLANG_RESOURCE_DIR "${RESOURCE_DIR}" CACHE INTERNAL "Clang resource dir") set(CLANG_OPENCL_HEADERS "${CLANG_RESOURCE_DIR}/include/opencl-c.h") if(NOT LLVM_OLDER_THAN_9_0) list(APPEND CLANG_OPENCL_HEADERS "${CLANG_RESOURCE_DIR}/include/opencl-c-base.h") endif() pocl-1.8/cmake/Sphinx.cmake000066400000000000000000000031231413131625300156500ustar00rootroot00000000000000#============================================================================= # CMake build system files # # Copyright (c) 2014 pocl developers # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # #============================================================================= find_program(SPHINX_EXECUTABLE NAMES sphinx-build HINTS $ENV{SPHINX_DIR} PATH_SUFFIXES bin DOC "Sphinx documentation generator" ) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(Sphinx DEFAULT_MSG SPHINX_EXECUTABLE ) mark_as_advanced(SPHINX_EXECUTABLE) pocl-1.8/cmake/add_test_pocl.cmake000066400000000000000000000060201413131625300172020ustar00rootroot00000000000000#============================================================================= # CMake build system files - add_test_pocl() test wrapper # # Copyright (c) 2014-2017 pocl developers # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # #============================================================================= include(CMakeParseArguments) # This is a wrapper around add_test # Solves several problems: # 1) allows expected outputs (optionally sorted) # 2) handles the exit status problem (test properties WILL_FAIL does not work if # the test exits with !0 exit status) function(add_test_pocl) set(options SORT_OUTPUT) set(oneValueArgs EXPECTED_OUTPUT NAME WORKING_DIRECTORY) set(multiValueArgs COMMAND) cmake_parse_arguments(POCL_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) unset(RUN_CMD) foreach(LOOPVAR ${POCL_TEST_COMMAND}) if(NOT RUN_CMD) set(RUN_CMD "${CMAKE_CURRENT_BINARY_DIR}/${LOOPVAR}") else() set(RUN_CMD "${RUN_CMD}####${LOOPVAR}") endif() endforeach() set(POCL_TEST_ARGLIST "NAME" "${POCL_TEST_NAME}") if(POCL_TEST_WORKING_DIRECTORY) list(APPEND POCL_TEST_ARGLIST "WORKING_DIRECTORY") list(APPEND POCL_TEST_ARGLIST "${POCL_TEST_WORKING_DIRECTORY}") endif() list(APPEND POCL_TEST_ARGLIST "COMMAND" "${CMAKE_COMMAND}" "-Dtest_cmd=${RUN_CMD}") if(INTEL_SDE_AVX512) list(APPEND POCL_TEST_ARGLIST "-DSDE=${INTEL_SDE_AVX512}") endif() if(POCL_TEST_EXPECTED_OUTPUT) list(APPEND POCL_TEST_ARGLIST "-Doutput_blessed=${CMAKE_CURRENT_SOURCE_DIR}/${POCL_TEST_EXPECTED_OUTPUT}") endif() if(POCL_TEST_SORT_OUTPUT) list(APPEND POCL_TEST_ARGLIST "-Dsort_output=1") endif() list(APPEND POCL_TEST_ARGLIST "-P" "${CMAKE_SOURCE_DIR}/cmake/run_test.cmake") add_test(${POCL_TEST_ARGLIST} ) if(NOT ENABLE_ANYSAN) set_tests_properties("${POCL_TEST_NAME}" PROPERTIES PASS_REGULAR_EXPRESSION "OK" FAIL_REGULAR_EXPRESSION "FAIL") endif() endfunction() pocl-1.8/cmake/bitcode_rules.cmake000066400000000000000000000210261413131625300172240ustar00rootroot00000000000000#============================================================================= # CMake build system files # # Copyright (c) 2014 pocl developers # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # #============================================================================= # cmake version of lib/kernel/rules.mk separate_arguments(KERNEL_C_FLAGS) separate_arguments(KERNEL_CL_FLAGS) separate_arguments(KERNEL_CXX_FLAGS) function(compile_c_to_bc FILENAME SUBDIR BC_FILE_LIST) get_filename_component(FNAME "${FILENAME}" NAME) set(BC_FILE "${CMAKE_CURRENT_BINARY_DIR}/${SUBDIR}/${FNAME}.bc") set(${BC_FILE_LIST} ${${BC_FILE_LIST}} ${BC_FILE} PARENT_SCOPE) set(FULL_F_PATH "${CMAKE_SOURCE_DIR}/lib/kernel/${FILENAME}") add_custom_command( OUTPUT "${BC_FILE}" DEPENDS "${FULL_F_PATH}" "${CMAKE_SOURCE_DIR}/include/pocl_types.h" "${CMAKE_SOURCE_DIR}/include/_kernel_c.h" COMMAND "${CLANG}" ${CLANG_FLAGS} ${DEVICE_CL_FLAGS} "-O1" ${KERNEL_C_FLAGS} "-o" "${BC_FILE}" "-c" "${FULL_F_PATH}" "-I${CMAKE_SOURCE_DIR}/include" "-include" "${CMAKE_SOURCE_DIR}/include/_kernel_c.h" COMMENT "Building C to LLVM bitcode ${BC_FILE}" VERBATIM) endfunction() function(compile_cc_to_bc FILENAME SUBDIR BC_FILE_LIST) get_filename_component(FNAME "${FILENAME}" NAME) set(BC_FILE "${CMAKE_CURRENT_BINARY_DIR}/${SUBDIR}/${FNAME}.bc") set(${BC_FILE_LIST} ${${BC_FILE_LIST}} ${BC_FILE} PARENT_SCOPE) set(FULL_F_PATH "${CMAKE_SOURCE_DIR}/lib/kernel/${FILENAME}") add_custom_command(OUTPUT "${BC_FILE}" DEPENDS "${FULL_F_PATH}" COMMAND "${CLANGXX}" ${CLANG_FLAGS} ${KERNEL_CXX_FLAGS} ${DEVICE_C_FLAGS} "-o" "${BC_FILE}" "-c" "${FULL_F_PATH}" "-O1" COMMENT "Building C++ to LLVM bitcode ${BC_FILE}" VERBATIM) endfunction() function(compile_cl_to_bc FILENAME SUBDIR BC_FILE_LIST EXTRA_CONFIG) get_filename_component(FNAME "${FILENAME}" NAME) get_filename_component(FNAME_WE "${FILENAME}" NAME_WE) set(BC_FILE "${CMAKE_CURRENT_BINARY_DIR}/${SUBDIR}/${FNAME}.bc") set(${BC_FILE_LIST} ${${BC_FILE_LIST}} ${BC_FILE} PARENT_SCOPE) set(FULL_F_PATH "${CMAKE_SOURCE_DIR}/lib/kernel/${FILENAME}") set(DEPENDLIST "${CMAKE_SOURCE_DIR}/include/_kernel.h" "${CMAKE_SOURCE_DIR}/include/_kernel_c.h" "${CMAKE_SOURCE_DIR}/include/pocl_types.h") set(INCLUDELIST "-include" "${CMAKE_SOURCE_DIR}/include/_kernel.h" "-include" "${CMAKE_SOURCE_DIR}/include/_enable_all_exts.h") if(FILENAME MATCHES "sleef") list(APPEND DEPENDLIST "${EXTRA_CONFIG}" ) list(APPEND DEPENDLIST ${SLEEF_CL_KERNEL_DEPEND_HEADERS}) list(APPEND INCLUDELIST "-DMAX_PRECISION" "-I" "${CMAKE_SOURCE_DIR}/lib/kernel/sleef/include" # for sleef_cl.h "-include" "${EXTRA_CONFIG}") endif() if(FILENAME MATCHES "libclc") list(APPEND DEPENDLIST ${LIBCLC_KERNEL_DEPEND_HEADERS}) set(I32 "${CMAKE_SOURCE_DIR}/lib/kernel/libclc/${FNAME_WE}_fp32.cl") if(EXISTS "${I32}") list(APPEND DEPENDLIST "${I32}") endif() set(I64 "${CMAKE_SOURCE_DIR}/lib/kernel/libclc/${FNAME_WE}_fp64.cl") if(EXISTS "${I64}") list(APPEND DEPENDLIST "${I64}") endif() list(APPEND INCLUDELIST "-I" "${CMAKE_SOURCE_DIR}/lib/kernel/libclc") endif() add_custom_command( OUTPUT "${BC_FILE}" DEPENDS "${FULL_F_PATH}" ${DEPENDLIST} COMMAND "${CLANG}" ${CLANG_FLAGS} ${KERNEL_CL_FLAGS} ${DEVICE_CL_FLAGS} "-o" "${BC_FILE}" "-c" "${FULL_F_PATH}" ${INCLUDELIST} COMMENT "Building CL to LLVM bitcode ${BC_FILE}" VERBATIM) endfunction() # ARGN - extra defines / arguments to clang # can't use c_to_bc, since SLEEF's C files need to be prefixed with EXT # (because the same files are compiled multiple times) function(compile_sleef_c_to_bc EXT FILENAME SUBDIR BCLIST) get_filename_component(FNAME "${FILENAME}" NAME) set(BC_FILE "${CMAKE_CURRENT_BINARY_DIR}/${SUBDIR}/${EXT}_${FNAME}.bc") list(APPEND ${BCLIST} "${BC_FILE}") set(${BCLIST} ${${BCLIST}} PARENT_SCOPE) set(FULL_F_PATH "${CMAKE_SOURCE_DIR}/lib/kernel/${FILENAME}") add_custom_command( OUTPUT "${BC_FILE}" DEPENDS "${FULL_F_PATH}" ${SLEEF_C_KERNEL_DEPEND_HEADERS} COMMAND "${CLANG}" ${CLANG_FLAGS} ${DEVICE_C_FLAGS} ${KERNEL_C_FLAGS} ${ARGN} "-I" "${CMAKE_SOURCE_DIR}/lib/kernel/sleef/arch" "-I" "${CMAKE_SOURCE_DIR}/lib/kernel/sleef/libm" "-I" "${CMAKE_SOURCE_DIR}/lib/kernel/sleef/include" "-O1" "-o" "${BC_FILE}" "-c" "${FULL_F_PATH}" COMMENT "Building SLEEF to LLVM bitcode ${BC_FILE}" VERBATIM) endfunction() function(compile_ll_to_bc FILENAME SUBDIR BC_FILE_LIST) get_filename_component(FNAME "${FILENAME}" NAME) set(BC_FILE "${CMAKE_CURRENT_BINARY_DIR}/${SUBDIR}/${FNAME}.bc") set(${BC_FILE_LIST} ${${BC_FILE_LIST}} ${BC_FILE} PARENT_SCOPE) set(FULL_F_PATH "${CMAKE_SOURCE_DIR}/lib/kernel/${FILENAME}") add_custom_command( OUTPUT "${BC_FILE}" DEPENDS "" COMMAND "${LLVM_AS}" "-o" "${BC_FILE}" "${CMAKE_CURRENT_SOURCE_DIR}/../${FILENAME}" COMMENT "Building LL to LLVM bitcode ${BC_FILE}" VERBATIM) endfunction() macro(compile_to_bc SUBDIR OUTPUT_FILE_LIST EXTRA_CONFIG) foreach(FILENAME ${ARGN}) if(FILENAME MATCHES "[.]c$") compile_c_to_bc("${FILENAME}" "${SUBDIR}" ${OUTPUT_FILE_LIST}) elseif(FILENAME MATCHES "[.]cc$") compile_cc_to_bc("${FILENAME}" "${SUBDIR}" ${OUTPUT_FILE_LIST}) elseif(FILENAME MATCHES "[.]cl$") compile_cl_to_bc("${FILENAME}" "${SUBDIR}" ${OUTPUT_FILE_LIST} "${EXTRA_CONFIG}") elseif(FILENAME MATCHES "[.]ll$") compile_ll_to_bc("${FILENAME}" "${SUBDIR}" ${OUTPUT_FILE_LIST}) else() message(FATAL_ERROR "Dont know how to compile ${FILENAME} to .bc !") endif() endforeach() endmacro() function(make_kernel_bc OUTPUT_VAR NAME SUBDIR USE_SLEEF EXTRA_BC EXTRA_CONFIG) set(KERNEL_BC "${CMAKE_CURRENT_BINARY_DIR}/kernel-${NAME}.bc") set(${OUTPUT_VAR} "${KERNEL_BC}" PARENT_SCOPE) file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${SUBDIR}") compile_to_bc("${SUBDIR}" BC_LIST "${EXTRA_CONFIG}" ${ARGN}) set(DEPENDLIST ${BC_LIST}) # fix too long commandline with cat and xargs set(BC_LIST_FILE_TXT "") foreach(FILENAME ${BC_LIST}) # straight parsing semicolon separated list with xargs -d didn't work on windows.. no such switch available set(BC_LIST_FILE_TXT "${BC_LIST_FILE_TXT} \"${FILENAME}\"") endforeach() if(USE_SLEEF) set(BC_LIST_FILE_TXT "${BC_LIST_FILE_TXT} \"${EXTRA_BC}\"") list(APPEND DEPENDLIST ${EXTRA_BC} "sleef_config_${VARIANT}") endif() set(BC_LIST_FILE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/kernel_${NAME}_linklist.txt") file(WRITE "${BC_LIST_FILE}" "${BC_LIST_FILE_TXT}") # don't waste time optimizing the kernels IR when in developer mode if(DEVELOPER_MODE) set(LINK_OPT_COMMAND COMMAND "${XARGS_EXEC}" "${LLVM_LINK}" "-o" "${KERNEL_BC}" < "${BC_LIST_FILE}") else() set(LINK_CMD COMMAND "${XARGS_EXEC}" "${LLVM_LINK}" "-o" "kernel-${NAME}-unoptimized.bc" < "${BC_LIST_FILE}") set(OPT_CMD COMMAND "${LLVM_OPT}" ${LLC_FLAGS} "-O3" "-fp-contract=off" "-o" "${KERNEL_BC}" "kernel-${NAME}-unoptimized.bc") set(LINK_OPT_COMMAND ${LINK_CMD} ${OPT_CMD}) endif() add_custom_command( OUTPUT "${KERNEL_BC}" DEPENDS ${DEPENDLIST} ${LINK_OPT_COMMAND} COMMENT "Linking & optimizing Kernel bitcode ${KERNEL_BC}" VERBATIM) endfunction() pocl-1.8/cmake/clangLinkerWorkaround.sh000077500000000000000000000032411413131625300202420ustar00rootroot00000000000000#!/usr/bin/env bash #This is free and unencumbered software released into the public domain. #Anyone is free to copy, modify, publish, use, compile, sell, or distribute this software, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means. #In jurisdictions that recognize copyright laws, the author or authors of this software dedicate any and all copyright interest in the software to the public domain. We make this dedication for the benefit of the public at large and to the detriment of our heirs and successors. We intend this dedication to be an overt act of relinquishment in perpetuity of all present and future rights to this software under copyright law. #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #For more information, please refer to echo "There is a long-ignored bug in CLang. https://bugs.llvm.org/show_bug.cgi?id=44594 . Have to work around." $( echo ld i=0 for el in $@; do if [ -e "$el" ]; then fn=$(basename $el); ext=${fn##*.} dn=$(dirname $el); if [[ $dn == *"/lib/gcc/"* ]]; then if [ "$ext" = "o" ]; then bn="${fn%.*}"; newFulN="$dn/${bn}S.o"; if [ -e "$newFulN" ]; then el=$newFulN; fi; fi; fi; fi; echo $el; done; ) | xargs pocl-1.8/cmake/kernellib_hash.cmake000066400000000000000000000016121413131625300173520ustar00rootroot00000000000000# TODO this is duplicated in top CMakeLists.txt function(rename_if_different SRC DST) if(EXISTS "${DST}") file(MD5 "${SRC}" OLD_MD5) file(MD5 "${DST}" NEW_MD5) if(NOT OLD_MD5 STREQUAL NEW_MD5) message(STATUS "Renaming ${SRC} to ${DST}") file(RENAME "${SRC}" "${DST}") endif() else() message(STATUS "Renaming ${SRC} to ${DST}") file(RENAME "${SRC}" "${DST}") endif() endfunction() string(REPLACE "****" ";" KERNEL_BC_LIST "${KERNEL_BC_LIST_ESCAPED}") foreach(KERNEL_BC IN LISTS KERNEL_BC_LIST) if(EXISTS ${KERNEL_BC}) file(SHA1 "${KERNEL_BC}" S) set(S1 "${S}__${S1}") endif() endforeach() file(SHA1 "${INCLUDEDIR}/_kernel.h" S2) file(SHA1 "${INCLUDEDIR}/_kernel_c.h" S3) file(SHA1 "${INCLUDEDIR}/pocl_types.h" S4) file(WRITE "${OUTPUT}.new" "#define POCL_KERNELLIB_SHA1 \"${S1}${S2}_${S3}_${S4}\"") rename_if_different("${OUTPUT}.new" "${OUTPUT}") pocl-1.8/cmake/multi_exec_test.cmake000066400000000000000000000014061413131625300175760ustar00rootroot00000000000000macro(execute_command_with_args CMD_WITH_ARGS) string(REPLACE "####" ";" CMD_SEPARATED "${CMD_WITH_ARGS}") execute_process(COMMAND ${CMD_SEPARATED} RESULT_VARIABLE CMD_RESULT OUTPUT_VARIABLE stdout ERROR_VARIABLE stderr ) if( CMD_RESULT ) message( SEND_ERROR "FAIL: Command exited with nonzero code (${CMD_RESULT}): ${CMD}\nSTDOUT:\n${stdout}\nSTDERR:\n${stderr}" ) else() message("${stdout}") message("${stderr}") endif() endmacro() if(CMD1) execute_command_with_args(${CMD1}) endif() if(CMD2) execute_command_with_args(${CMD2}) endif() if(CMD3) execute_command_with_args(${CMD3}) endif() if(CMD4) execute_command_with_args(${CMD4}) endif() if(CMD5) execute_command_with_args(${CMD5}) endif() pocl-1.8/cmake/run_test.cmake000066400000000000000000000041571413131625300162520ustar00rootroot00000000000000# some argument checking: # test_cmd is the command to run with all its arguments, separated by "####" if( NOT test_cmd ) message( FATAL_ERROR "Variable test_cmd not defined" ) endif() # output_blessed contains the name of the file with expected output if(output_blessed) message(STATUS "Expecting output: ${output_blessed}") endif() string(REPLACE "####" ";" test_cmd_separated "${test_cmd}") execute_process( COMMAND ${test_cmd_separated} RESULT_VARIABLE test_not_successful OUTPUT_VARIABLE stdout ERROR_VARIABLE stderr ) # the first run would fail, but still pre-compile the kernels # for the 2nd run through SDE if(SDE) execute_process( COMMAND "${SDE}" -skx -- ${test_cmd_separated} RESULT_VARIABLE test_not_successful OUTPUT_VARIABLE stdout ERROR_VARIABLE stderr ) endif() if( test_not_successful ) message( SEND_ERROR "FAIL: Test exited with nonzero code (${test_not_successful}): ${test_cmd_separated}\nSTDOUT:\n${stdout}\nSTDERR:\n${stderr}" ) else() message("${stdout}") message("${stderr}") endif() if(output_blessed) string(RANDOM RAND_STR) set(RANDOM_FILE "/tmp/cmake_testrun_${RAND_STR}") file(WRITE "${RANDOM_FILE}" "${stdout}") if( sort_output ) message(STATUS "SORTING FILE") file(STRINGS "${RANDOM_FILE}" output_string_list) list(SORT output_string_list) # for some reason sorting doesn't work when list contains newlines, # have to add them after the sort file(REMOVE "${RANDOM_FILE}") string(REPLACE ";" "\n" OUTPUT "${output_string_list}") set(RANDOM_FILE "${RANDOM_FILE}_sorted") file(WRITE "${RANDOM_FILE}" "${OUTPUT}\n") endif() message(STATUS "Comparing output..") execute_process( COMMAND ${CMAKE_COMMAND} -E compare_files "${output_blessed}" "${RANDOM_FILE}" RESULT_VARIABLE test_not_successful ) if( test_not_successful ) message(SEND_ERROR "FAIL: Test output does not match the expected output; output stored in ${RANDOM_FILE}" ) else() file(REMOVE "${RANDOM_FILE}") endif() endif() if ((NOT "${stdout}${stderr}" MATCHES "OK") AND (NOT "${stdout}${stderr}" MATCHES "FAIL")) message(STATUS "OK") endif() pocl-1.8/cmake/sanitizers.cmake000066400000000000000000000040551413131625300165770ustar00rootroot00000000000000# currently only works with gcc as host compiler if (CMAKE_C_COMPILER_ID STREQUAL "GNU") if (CMAKE_C_COMPILER_VERSION VERSION_GREATER "4.7.99") option(ENABLE_ASAN "Enable AddressSanitizer" OFF) option(ENABLE_TSAN "Enable ThreadSanitizer" OFF) else() set(ENABLE_ASAN OFF CACHE INTERNAL "Enable AddressSanitizer") set(ENABLE_TSAN OFF CACHE INTERNAL "Enable ThreadSanitizer") endif() if (CMAKE_C_COMPILER_VERSION VERSION_GREATER "4.8.99") option(ENABLE_UBSAN "Enable UBSanitizer" OFF) else() set(ENABLE_UBSAN OFF CACHE INTERNAL "Enable UBSanitizer") endif() if (CMAKE_C_COMPILER_VERSION VERSION_GREATER "5.0.99") option(ENABLE_LSAN "Enable LeakSanitizer" OFF) else() set(ENABLE_LSAN OFF CACHE INTERNAL "Enable LeakSanitizer") endif() else() set(ENABLE_ASAN OFF CACHE INTERNAL "Enable AddressSanitizer") set(ENABLE_TSAN OFF CACHE INTERNAL "Enable ThreadSanitizer") endif() set(SANITIZER_OPTIONS "") if(ENABLE_ASAN) if("${CMAKE_C_COMPILER_VERSION}" VERSION_LESS "6.0.0") list(APPEND SANITIZER_OPTIONS "-fsanitize=address") else() list(APPEND SANITIZER_OPTIONS "-fsanitize=address" "-fsanitize-recover=address") endif() list(APPEND SANITIZER_LIBS "asan") endif() if(ENABLE_LSAN) list(APPEND SANITIZER_OPTIONS "-fsanitize=leak") list(APPEND SANITIZER_LIBS "lsan") endif() if(ENABLE_TSAN) list(APPEND SANITIZER_OPTIONS "-fsanitize=thread") list(APPEND SANITIZER_LIBS "tsan") endif() if(ENABLE_UBSAN) list(APPEND SANITIZER_OPTIONS "-fsanitize=undefined") list(APPEND SANITIZER_LIBS "ubsan") endif() if(SANITIZER_OPTIONS) list(APPEND SANITIZER_OPTIONS "-fno-omit-frame-pointer") add_compile_options(${SANITIZER_OPTIONS}) endif() # Unfortunately the way CMake tests work, if they're given # a pass/fail expression, they don't check for exit status. # This was causing some false negatives with ASan (test was # returning with 1, but CMake reported it as pass because # the pass expression was present in output). if(ENABLE_ASAN OR ENABLE_TSAN OR ENABLE_UBSAN OR ENABLE_LSAN) set(ENABLE_ANYSAN 1) endif() pocl-1.8/config.h.in.cmake000066400000000000000000000112551413131625300154240ustar00rootroot00000000000000 #cmakedefine BUILD_HSA #cmakedefine BUILD_CUDA #cmakedefine BUILD_BASIC #cmakedefine BUILD_PTHREAD #cmakedefine BUILD_ACCEL #define BUILDDIR "@BUILDDIR@" /* "Build with ICD" */ #cmakedefine BUILD_ICD #define CMAKE_BUILD_TYPE "@CMAKE_BUILD_TYPE@" #cmakedefine ENABLE_ASAN #cmakedefine ENABLE_LSAN #cmakedefine ENABLE_TSAN #cmakedefine ENABLE_UBSAN #cmakedefine ENABLE_EXTRA_VALIDITY_CHECKS #cmakedefine ENABLE_CONFORMANCE #cmakedefine ENABLE_HWLOC #cmakedefine ENABLE_HOST_CPU_DEVICES #cmakedefine ENABLE_POCL_BUILDING #cmakedefine ENABLE_POCL_FLOAT_CONVERSION #cmakedefine ENABLE_RELOCATION #cmakedefine ENABLE_SLEEF #cmakedefine ENABLE_SPIR #cmakedefine ENABLE_SPIRV #cmakedefine HAVE_DLFCN_H #cmakedefine HAVE_FORK #cmakedefine HAVE_VFORK #cmakedefine HAVE_CLOCK_GETTIME #cmakedefine HAVE_FDATASYNC #cmakedefine HAVE_FSYNC #cmakedefine HAVE_GETRLIMIT #cmakedefine HAVE_MKOSTEMPS #cmakedefine HAVE_MKSTEMPS #cmakedefine HAVE_MKDTEMP #cmakedefine HAVE_FUTIMENS #cmakedefine HAVE_LTTNG_UST #cmakedefine HAVE_OCL_ICD #cmakedefine HAVE_POSIX_MEMALIGN #cmakedefine HAVE_SLEEP #cmakedefine HAVE_UTIME #cmakedefine HAVE_VALGRIND #cmakedefine ENABLE_LLVM #cmakedefine ENABLE_LOADABLE_DRIVERS /* this is used all over the runtime code */ #define HOST_CPU_CACHELINE_SIZE @HOST_CPU_CACHELINE_SIZE@ #if defined(BUILD_CUDA) #define CUDA_DEVICE_EXTENSIONS "@CUDA_DEVICE_EXTENSIONS@" #endif #if defined(BUILD_BASIC) || defined(BUILD_PTHREAD) #define HOST_AS_FLAGS "@HOST_AS_FLAGS@" #define HOST_CLANG_FLAGS "@HOST_CLANG_FLAGS@" #define HOST_DEVICE_EXTENSIONS "@HOST_DEVICE_EXTENSIONS@" #cmakedefine HOST_CPU_FORCED #define HOST_LD_FLAGS "@HOST_LD_FLAGS@" #define HOST_LLC_FLAGS "@HOST_LLC_FLAGS@" #cmakedefine HOST_FLOAT_SOFT_ABI #endif #define HOST_DEVICE_BUILD_HASH "@HOST_DEVICE_BUILD_HASH@" #define DEFAULT_DEVICE_EXTENSIONS "@DEFAULT_DEVICE_EXTENSIONS@" #ifdef BUILD_HSA #cmakedefine HAVE_HSA_EXT_AMD_H #define AMD_HSA @AMD_HSA@ #define HSA_DEVICE_EXTENSIONS "@HSA_DEVICE_EXTENSIONS@" #define HSAIL_ASM "@HSAIL_ASM@" #define HSAIL_ENABLED @HSAIL_ENABLED@ #endif #define CMAKE_BUILD_TYPE "@CMAKE_BUILD_TYPE@" #define LINK_COMMAND "@LINK_COMMAND@" #ifdef ENABLE_LLVM #define KERNELLIB_HOST_CPU_VARIANTS "@KERNELLIB_HOST_CPU_VARIANTS@" #cmakedefine KERNELLIB_HOST_DISTRO_VARIANTS #define CLANG "@CLANG@" #define CLANG_RESOURCE_DIR "@CLANG_RESOURCE_DIR@" #define CLANGXX "@CLANGXX@" #define LLVM_LLC "@LLVM_LLC@" #define LLVM_SPIRV "@LLVM_SPIRV@" /* "Using LLVM 6.0" */ #cmakedefine LLVM_6_0 /* "Using LLVM 7.0" */ #cmakedefine LLVM_7_0 /* "Using LLVM 8.0" */ #cmakedefine LLVM_8_0 #cmakedefine LLVM_9_0 #cmakedefine LLVM_10_0 #cmakedefine LLVM_11_0 #cmakedefine LLVM_MAJOR @LLVM_VERSION_MAJOR@ #cmakedefine LLVM_BUILD_MODE_DEBUG #ifndef LLVM_VERSION #define LLVM_VERSION "@LLVM_VERSION_FULL@" #endif #endif /* Defined to greatest expected alignment for extended types, in bytes. */ #define MAX_EXTENDED_ALIGNMENT @MAX_EXTENDED_ALIGNMENT@ #define PRINTF_BUFFER_SIZE @PRINTF_BUFFER_SIZE@ /* used in lib/CL/devices/basic */ #define OCL_KERNEL_TARGET "@OCL_KERNEL_TARGET@" #define OCL_KERNEL_TARGET_CPU "@OCL_KERNEL_TARGET_CPU@" #define POCL_VERSION_BASE "@POCL_VERSION_BASE@" #define POCL_VERSION_FULL "@POCL_VERSION_FULL@" #define POCL_KERNEL_CACHE_DEFAULT @POCL_KERNEL_CACHE_DEFAULT@ #define HOST_DEVICE_ADDRESS_BITS @HOST_DEVICE_ADDRESS_BITS@ #cmakedefine POCL_DEBUG_MESSAGES #define POCL_INSTALL_PRIVATE_HEADER_DIR "@POCL_INSTALL_PRIVATE_HEADER_DIR@" #define POCL_INSTALL_PRIVATE_DATADIR "@POCL_INSTALL_PRIVATE_DATADIR@" #define POCL_INSTALL_PRIVATE_DATADIR_REL "@POCL_INSTALL_PRIVATE_DATADIR_REL@" #define POCL_INSTALL_PRIVATE_LIBDIR "@POCL_INSTALL_PRIVATE_LIBDIR@" #define POCL_INSTALL_PRIVATE_LIBDIR_REL "@POCL_INSTALL_PRIVATE_LIBDIR_REL@" #cmakedefine POCL_ASSERTS_BUILD /* these are *host* values */ /* used in tce_common.c & pocl_llvm_api.cc */ #define SRCDIR "@SRCDIR@" #cmakedefine TCEMC_AVAILABLE #cmakedefine TCE_AVAILABLE #define TCE_DEVICE_EXTENSIONS "@TCE_DEVICE_EXTENSIONS@" /* Defined on big endian systems */ #define WORDS_BIGENDIAN @WORDS_BIGENDIAN@ /* Disable cl_khr_fp16 because fp16 is not supported */ #cmakedefine _CL_DISABLE_HALF /* Disable cl_khr_fp64 because fp64 is not supported */ #cmakedefine _CL_DISABLE_DOUBLE #define POCL_CL_VERSION "2.0" #define HSA_DEVICE_CL_VERSION_MAJOR 1 #define HSA_DEVICE_CL_VERSION_MINOR 2 #define CUDA_DEVICE_CL_VERSION_MAJOR 1 #define CUDA_DEVICE_CL_VERSION_MINOR 2 #define HOST_DEVICE_CL_VERSION_MAJOR @HOST_DEVICE_CL_VERSION_MAJOR@ #define HOST_DEVICE_CL_VERSION_MINOR @HOST_DEVICE_CL_VERSION_MINOR@ #define TCE_DEVICE_CL_VERSION_MAJOR 1 #define TCE_DEVICE_CL_VERSION_MINOR 2 #cmakedefine USE_POCL_MEMMANAGER pocl-1.8/config2.h.in.cmake000066400000000000000000000002651413131625300155050ustar00rootroot00000000000000/* this config file is for values NOT escaped for C/C++ * required e.g. for values with doublequotes, like C string arrays */ #define HOST_LD_FLAGS_ARRAY "@HOST_LD_FLAGS_ARRAY@" pocl-1.8/doc/000077500000000000000000000000001413131625300130635ustar00rootroot00000000000000pocl-1.8/doc/sphinx/000077500000000000000000000000001413131625300143745ustar00rootroot00000000000000pocl-1.8/doc/sphinx/Makefile000066400000000000000000000063331413131625300160410ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = build # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source .PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " changes to make an overview of all changed/added/deprecated items" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: -rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/PortableComputingLanguagepocl.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/PortableComputingLanguagepocl.qhc" latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ "run these through (pdf)latex." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." publish_to_web: rsync -r build/html $(SOURCEFORGE_USER)@web.sourceforge.net:/home/project-web/pocl/htdocs/docs/ pocl-1.8/doc/sphinx/source/000077500000000000000000000000001413131625300156745ustar00rootroot00000000000000pocl-1.8/doc/sphinx/source/accel.rst000066400000000000000000000153361413131625300175050ustar00rootroot00000000000000=========================== Fixed-Function Accelerators =========================== The ``accel`` driver can be used for easy integration of custom fixed-function accelerators through a standardized hardware interface and a standardized procedure for enqueuing commands. Interface --------- The control register interface for the fixed-function accelerators is quite simple. The address space of the device is split into four regions, the size of which is determined by the largest of the memories in these regions. Therefore, the region is selected with the highest bits of the address space of the accelerator: +-------------+--------------------+ | High bits | Address Space | | | | +=============+====================+ | 00 | Control registers | +-------------+--------------------+ | 01 | Instruction memory | +-------------+--------------------+ | 10 | Data memory | +-------------+--------------------+ | 11 | Parameter memory | +-------------+--------------------+ The size of the memories is read from the control registers, which is sufficient to determine the size of the address space of the accelerator as well as the offsets of each memory. The control registers are also used to control the execution of the accelerator: .. list-table:: :widths: 20 25 55 :header-rows: 1 * - Offset - Name - Description * - 0x000 - STATUS - Status of the accelerator. Bit 0 is high when the execution is stalled due to any reason, bit 1 is high when the external stall signal is active, and bit 2 is high when the accelerator reset is active. * - 0x100 - AQL_READ_IDX_LOW - Read index of the AQL queue (low 32 bits). Read only. * - 0x104 - AQL_READ_IDX_HIGH - Read index of the AQL queue (high 32 bits). Read only. * - 0x108 - AQL_WRITE_IDX_LOW - Write index of the AQL queue (low 32 bits). Writing to this register increments the 64-bit value. * - 0x10C - AQL_WRITE_IDX_HIGH - Write index of the AQL queue (high 32 bits). Read only. * - 0x200 - COMMAND - Command register to control execution. Writing 1 to this register resets the accelerator, writing 2 lifts reset and external stall, and writing 4 enables the external stall signal, pausing execution. * - 0x300 - DEVICE_CLASS - Device class (vendor ID) of the accelerator. Currently unused by the driver. * - 0x304 - DEVICE_ID - Device ID of the accelerator. Currently unused by the driver. * - 0x308 - INTERFACE_TYPE - Version number of the interface. This describes interface version 2. * - 0x30C - CORE_COUNT - Core count of the accelerator. Multicore devices are currently not supported. * - 0x310 - CTRL_SIZE - Size of control memory (this register space) in bytes. Must be at least 1024. * - 0x314 - DMEM_SIZE - Size of the data memory in bytes * - 0x318 - IMEM_SIZE - Size of the instruction memory in bytes * - 0x31c - PMEM_SIZE - Size of the parameter memory in bytes. The instruction memory can be used to configure the accelerator. However, it currently has to be done manually, and is not managed by pocl. The data memory is used to store an AQL Queue, as defined by the `HSA Runtime Programmer’s Reference Manual `_, the write and read indexes of which are exposed by the control registers. The size of the queue is such that it uses all of the data memory. Finally, the parameter memory is used to store data and argument buffers as well as completion signals for the kernels. As a practical example, enqueuing a kernel dispatch packet proceeds as follows: - The driver allocates and populates the OpenCL buffers and the argument buffer for the kernel, as well as space for a 32-bit completion signal. - The driver writes the kernel packet, excluding the header, to the device. Its position depends on the value of the write index. The completion signal address as well as the argument buffer address and pointers to buffer arguments are given as physical addresses in the accelerator's address space. The kernel object simply corresponds to the kernel IDs shown in the table below. - The driver sets the packet header and increments the queue write index. - The device executes the kernel and writes a 1 in case of a success or a 2 in case of a failure to the completion signal address, if it is not 0. - The driver sees the completion signal change, and can consider the command completed. Usage ----- To enable this driver, simply add ``-DENABLE_ACCEL_DEVICE=1`` to the cmake arguments. On small FPGA SoCs and other relatively low performance hosts, you may wish to follow the instructions in :ref:`pocl-without-llvm`. The fixed-function accelerators need to be told what kernel to execute. For this, the accel driver has a list of builtin kernels that can be referred to in the ``clCreateProgramWithBuiltInKernels`` call: .. list-table:: :widths: 20 20 60 :header-rows: 1 * - Kernel name - Kernel ID - Function * - pocl.copy - 0 - Copies from argument 0 to argument 1 as many bytes as there are work items * - pocl.add32 - 1 - 32-bit element-wise addition on arrays pointed to by arguments 0 and 1, stored in an array pointed to by argument 3 * - pocl.mul32 - 2 - As pocl.add32, but with 32-bit multiplication This list will be expanded in the future. There is an example program using the accel driver in ``examples/accel`` which also includes the VHDL code for synthesizing the accelerator. The accelerator has been developed with the `TCE toolset `_. In order to synthesize the accelerator for a Xilinx FPGA SoC, you can follow the instructions in the `TCE manual `_, in the section titled System-on-a-Chip design with AlmaIF Integrator. Make sure to check the accelerator base address from Vivado. Driver arguments are used to tell pocl where the accelerator is and what functions it supports. To run this example manually, execute:: POCL_DEVICES=accel POCL_ACCEL0_PARAMETERS=0x43C00000,1,2 ./accel_example The environment variables define an accelerator with base physical address of 0x43C0_0000 that can execute pocl.add32 and pocl.mul32. When running the example, verify that the address given in the parameter matches the base address of the accelerator. Note that as the driver requires write access to ``/dev/mem`` for memory mapping, you may need to execute the application with elevated privileges. In this case, note that ``sudo`` by default overrides your environment variables. You can either assign them in the same command, or use ``sudo`` with the ``--preserve-env`` switch. pocl-1.8/doc/sphinx/source/conf.py000066400000000000000000000144461413131625300172040ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Portable Computing Language (pocl) documentation build configuration file, created by # sphinx-quickstart on Fri May 3 10:53:18 2013. # # This file is execfile()d with the current directory set to its containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import sys, os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. #sys.path.append(os.path.abspath('.')) # -- General configuration ----------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = ['sphinx.ext.imgmath'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix of source filenames. source_suffix = '.rst' # The encoding of source files. #source_encoding = 'utf-8' # The master toctree document. master_doc = 'index' # General information about the project. project = u'Portable Computing Language (PoCL)' copyright = u'2010-2021 PoCL developers' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = '1.8' # The full version, including alpha/beta/rc tags. release = '1.8' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. #language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: #today = '' # Else, today_fmt is used as the format for a strftime call. #today_fmt = '%B %d, %Y' # List of documents that shouldn't be included in the build. #unused_docs = [] # List of directories, relative to source directory, that shouldn't be searched # for source files. exclude_trees = [] # The reST default role (used for this markup: `text`) to use for all documents. #default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. #add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). #add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. #show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. Major themes that come with # Sphinx are currently 'default' and 'sphinxdoc'. html_theme = 'default' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. #html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". #html_title = None # A shorter title for the navigation bar. Default is the same as html_title. #html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. #html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. #html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. #html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. #html_use_smartypants = True # Custom sidebar templates, maps document names to template names. #html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. #html_additional_pages = {} # If false, no module index is generated. #html_use_modindex = True # If false, no index is generated. #html_use_index = True # If true, the index is split into individual pages for each letter. #html_split_index = False # If true, links to the reST sources are added to the pages. #html_show_sourcelink = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. #html_use_opensearch = '' # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). #html_file_suffix = '' # Output file base name for HTML help builder. htmlhelp_basename = 'PortableComputingLanguagepocldoc' # -- Options for LaTeX output -------------------------------------------------- # The paper size ('letter' or 'a4'). #latex_paper_size = 'letter' # The font size ('10pt', '11pt' or '12pt'). #latex_font_size = '10pt' # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', 'PortableComputingLanguagepocl.tex', u'Portable Computing Language (PoCL) Documentation', u'PoCL developers', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. #latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. #latex_use_parts = False # Additional stuff for the LaTeX preamble. #latex_preamble = '' # Documents to append as an appendix to all manuals. #latex_appendices = [] # If false, no module index is generated. #latex_use_modindex = True pocl-1.8/doc/sphinx/source/conformance.rst000066400000000000000000000326031413131625300207240ustar00rootroot00000000000000.. _pocl-conformance: ======================= OpenCL conformance ======================= Conformance related CMake options --------------------------------- - ``-DENABLE_CONFORMANCE=ON/OFF`` This is mostly related to the kernel library (the runtime is always built to be conformant on x86). Defaults to ON. This option by itself does not guarantee OpenCL-conformant build; it merely ensures that a build fails if some options which would result in non-conformant kernel library are given. Non-conformant kernel library might be somewhat faster, at the expense of precision and/or range. Note that conformance was tested **only** on certain hardware and software (Linux, x86-64, CPU with AVX & FMA instructions). How to run the conformance test suite on your hardware ------------------------------------------------------ First you need to enable the suite in the pocl's external test suite set. This is done by adding switch ``-DENABLE_TESTSUITES=conformance`` to the cmake command line. After this ``make prepare_examples`` fetches and prepares the conformance suite for testing. To run a shortened version of the conformance suite, run: ``ctest -L conformance_suite_mini`` This might take a few hours on slow hardware. There is also a ``conformance_suite_micro`` label, which takes about 20-30 minutes on slow hardware. To run the full conformance testsuite, run: ``ctest -L conformance_suite_full`` Note that this can take a week to finish on slow hardware, and about a day on relatively fast hardware (6C/12T Intel or equivalent). Known issues with the conformance testsuite ------------------------------------------- - a few tests from ``basic/test_basic`` may fail / segfault because they request a huge amount of memory for buffers. - compiler_defines_for_extensions from ``compiler/test_compiler`` might fail because cl_khr_spir extension is not recognized with OpenCL 1.2 - officially it's only recognized since OpenCL 2.0. - a few tests from ``conversions/test_conversions`` may report failures. This is likely a bug in the test; the same test from branch cl20_trunk of CTS passes. - some tests from ``relationals/test_relationals`` can fail with specific LLVM versions, this is an LLVM bug, fixed in LLVM 13. - ``math_brute_force/bruteforce`` tests may occasionally fail with an empty build log, this is a bug in CTS. See pocl issue #614. ``export CL_TEST_SINGLE_THREADED=1`` might help. - a few tests may run much faster if you limit the reported Global memory size with POCL_MEMORY_LIMIT env var. In particular, "kernel_image_methods" test with "max_images" argument. - two tests in ``api/test_api`` fail with LLVM 5.0 because of LLVM commit 1c1154229a41b688f9: ``[OpenCL] Do not generate "kernel_arg_type_qual" metadata for non-pointer args`` This is a bug in CTS, which tests for non-pointer type qualifiers, not in pocl. See: https://www.khronos.org/registry/OpenCL/specs/opencl-1.2.pdf page 169: ``CL_KERNEL_ARG_TYPE_VOLATILE`` is returned if the **argument is a pointer** and the referenced type is declared with the volatile qualifier. Similarly, ``CL_KERNEL_ARG_TYPE_RESTRICT`` or ``CL_KERNEL_ARG_TYPE_CONST`` is returned if the **argument is a pointer** and the referenced type is declared with the restrict or const qualifier .. _sigfpe-handler: Known issues in pocl / things to be aware of -------------------------------------------- - Integer division by zero. OpenCL 1.2 specification requires that division by zero on integers results in undefined values, instead of raising exceptions. This requires pocl to install a handler of SIGFPE. Unfortunately signal handlers are per-process not per-thread, and pocl drivers do not run in a separate process, which means that integer division by zero will not raise SIGFPE for the entire pocl library and also the user's program. The handler may be disabled by setting the env variable POCL_SIGFPE_HANDLER to 0. Note that this is currently only relevant for x86(-64) + Linux, on all other systems this issue is not handled in any way (thus Pocl is likely non-conformant there). - Several options to clBuildProgram() are accepted but currently have no effect. This is related mostly to optimization options like `-cl-fast-relaxed-math`. The `-cl-denorms-are-zero` and `-cl-fp32-correctly-rounded-divide-sqrt` options are honored. - Many of ``native_`` and ``half_`` variants of kernel library functions are mapped to the "full" variants. - the optional OpenGL / D3D extensions are not supported. There is experimental support for SPIR - clUnloadCompiler() only actually unload LLVM after all programs & kernels have been released. - clSetUserEventStatus() called with negative status. The Spec leaves the behaviour in this case as "implementation defined", and this part of pocl is only very lightly tested by the conformance tests. clSetUserEventStatus() called with CL_COMPLETE works as expected, and is heavily used by the conversions conformance test. Conformance tests results (kernel library precision) on tested hardware ----------------------------------------------------------------------- Note that it's impossible to test double precision on the entire range, therefore the results may vary. x86-64 CPU with AVX2+FMA, LLVM 4.0, tested on Nov 1, 2017 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ==================== ========================= =========================================================== NAME Worst ULP WHERE ==================== ========================= =========================================================== add 0.00 {0x0p+0, 0x0p+0} addD 0.00 {0x0p+0, 0x0p+0} assignment 0.00 0x0p+0 assignmentD 0.00 0x0p+0 cbrt 0.50 -0x1.5629d2p+116 cbrtD 0.59 0x1.0000000000136p+1022 ceil 0.00 0x0p+0 ceilD 0.00 0x0p+0 copysign 0.00 {0x0p+0, 0x0p+0} copysignD 0.00 {0x0p+0, 0x0p+0} cos 2.37 0x1.1338ccp+20 cosD 2.27 -0x1.d10000000074p+380 cosh 2.41 -0x1.602166p+2 coshD 1.43 -0x1.98000000003efp+5 cospi 1.94 0x1.d73b56p-2 cospiD 2.46 -0x1.adffffffffa91p-2 divide 0.00 {0x0p+0, 0x0p+0} divideD 0.00 {0x0p+0, 0x0p+0} exp 0.95 -0x1.762532p+2 expD 0.94 0x1.2f0000000023dp+7 exp10 0.79 -0x1.309022p+5 exp10D 0.64 -0x1.34ffffffffcc9p+8 exp2 0.79 -0x1.fa3d0ep+6 exp2D 0.75 -0x1.ff00000000417p+9 expm1 1.00 -0x1.7a0002p-25 expm1D 0.99 -0x1.26p+5 fabs 0.00 0x0p+0 fabsD 0.00 0x0p+0 fdim 0.00 {0x0p+0, 0x0p+0} fdimD 0.00 {0x0p+0, 0x0p+0} floor 0.00 0x0p+0 floorD 0.00 0x0p+0 fma 0.00 {0x0p+0, 0x0p+0, 0x0p+0} fmaD 0.00 {0x0p+0, 0x0p+0, 0x0p+0} fmax 0.00 {0x0p+0, 0x0p+0} fmaxD 0.00 {0x0p+0, 0x0p+0} fmin 0.00 {0x0p+0, 0x0p+0} fminD 0.00 {0x0p+0, 0x0p+0} fmod 0.00 {0x0p+0, 0x0p+0} fmodD 0.00 {0x0p+0, 0x0p+0} fract { 0.00, 0.00} {0x0p+0, 0x0p+0} fractD { 0.00, 0.00} {0x0p+0, 0x0p+0} frexp { 0.00, 0} 0x0p+0 frexpD { 0.00, 0} 0x0p+0 hypot 1.93 {0x1.17c998p-127, -0x1.5fedb8p-127} hypotD 1.73 {0x1.5d2ebeed7663cp-1022, 0x1.67457048a2318p-1022} ldexp 0.00 {0x0p+0, 0} ldexpD 0.00 {0x0p+0, 0} log10 0.50 0x1.7fee2ep-1 log10D 0.50 0x1.9100000000639p+1022 log 0.63 0x1.7fcb3ep-1 logD 0.75 0x1.7d00000000381p+0 log1p 1.00 -0x1.fa0002p-126 log1pD 1.00 -0x1.e000000000001p-1022 log2 0.59 0x1.1107a2p+0 log2D 0.72 0x1.120000000063dp+0 logb 0.00 0x0p+0 logbD 0.00 0x0p+0 mad 0.00 {0x0p+0, 0x0p+0, 0x0p+0} no ULP check madD 0.00 {0x0p+0, 0x0p+0, 0x0p+0} no ULP check maxmag 0.00 {0x0p+0, 0x0p+0} maxmagD 0.00 {0x0p+0, 0x0p+0} minmag 0.00 {0x0p+0, 0x0p+0} minmagD 0.00 {0x0p+0, 0x0p+0} modf { 0.00, 0.00} {0x0p+0, 0x0p+0} modfD { 0.00, 0.00} {0x0p+0, 0x0p+0} multiply 0.00 {0x0p+0, 0x0p+0} multiplyD 0.00 {0x0p+0, 0x0p+0} nan 0.00 0x0p+0 nanD 0.00 0x0p+0 nextafter 0.00 {0x0p+0, 0x0p+0} nextafterD 0.00 {0x0p+0, 0x0p+0} pow 0.82 {0x1.91237cp-1, 0x1.4da146p+8} powD 0.80 {0x1.2bfb4b18164c9p+65, -0x1.b78438ae9c3bdp-8} pown 0.65 {-0x1.9p+6, -2} pownD 0.62 {-0x1.7ffffffffffffp+1, 3} powr 0.82 {0x1.91237cp-1, 0x1.4da146p+8} powrD 0.80 {0x1.2bfb4b18164c9p+65, -0x1.b78438ae9c3bdp-8} remainder 0.00 {0x0p+0, 0x0p+0} remainderD 0.00 {0x0p+0, 0x0p+0} remquo { 0.00, 0} 0x0p+0 remquoD { 0.00, 0} 0x0p+0 rint 0.00 0x0p+0 rintD 0.00 0x0p+0 rootn 0.69 {-0x1.e2fe6ep-74, -141} rootnD 0.68 {-0x1.8000000000001p+1, 3} round 0.00 0x0p+0 roundD 0.00 0x0p+0 rsqrt 1.49 0x1.019566p+124 rsqrtD 1.49 0x1.01ffffffffa39p+1016 sin 2.48 -0x1.09f07ap+21 sinD 1.87 -0x1.f2fffffffffbap+32 sincos { 2.48, 2.37} {0x1.09f07ap+21, 0x1.1338ccp+20} sincosD { 1.87, 2.27} {0x1.f2fffffffffbap+32, 0x1.d10000000074p+380} sinh 2.32 0x1.e76078p+2 sinhD 1.53 -0x1.3100000000278p+4 sinpi 2.13 -0x1.45f3ep-9 sinpiD 2.50 -0x1.46000000000dap-7 sqrt 0.00 0x0p+0 sqrtD 0.00 0x0p+0 subtract 0.00 {0x0p+0, 0x0p+0} subtractD 0.00 {0x0p+0, 0x0p+0} tan 4.35 -0x1.b4eba2p+22 tanD 4.00 -0x1.2f000000003edp+333 tanh 1.18 -0x1.ca742ap-1 tanhD 1.19 0x1.f400000000395p-1 tanpi 4.21 -0x1.f99d16p-3 tanpiD 4.09 0x1.f6000000001d3p-3 trunc 0.00 0x0p+0 truncD 0.00 0x0p+0 ==================== ========================= =========================================================== pocl-1.8/doc/sphinx/source/cuda.rst000066400000000000000000000117311413131625300173450ustar00rootroot00000000000000================== NVIDIA GPU support ================== NOTE: Support for NVIDIA GPUs via the CUDA backend is currently experimental and many features may be missing or incomplete. The experimental CUDA backend provides support for CUDA-capable NVIDIA GPUs under Linux or macOS. The goal of this backend is to provide an open-source alternative to the proprietary NVIDIA OpenCL implementation. This makes use of the NVPTX backend in LLVM and the CUDA driver API. Building pocl with CUDA support ------------------------------- 1) Install prerequisites ~~~~~~~~~~~~~~~~~~~~~~~~ Aside from the usual pocl dependencies, you will also need the CUDA toolkit. Currently this backend has only been tested against CUDA 8.0, but it may also be possible to build against other versions. If you experience build failures regarding missing CUDA headers or libraries, you may need to add the include directory containing ``cuda.h`` to your header search path, and/or the library directory containing ``libcuda.{so,dylib}`` to your library search path. The CUDA backend requires LLVM built with the NVPTX backend enabled. 2) Build pocl ~~~~~~~~~~~~~ To enable the CUDA backend, add ``-DENABLE_CUDA=ON`` to your CMake configuration command line. Otherwise, build and install pocl as normal. 3) Run tests ~~~~~~~~~~~~ After building pocl, you can smoke test the CUDA backend by executing the subset of pocl's tests that are known to pass on NVIDIA GPUs:: ../tools/scripts/run_cuda_tests 4) Configuration ~~~~~~~~~~~~~~~~ Use ``POCL_DEVICES=CUDA`` to select only CUDA devices. If the system has more than one GPU, specify the ``CUDA`` device multiple times (e.g. ``POCL_DEVICES=CUDA,CUDA`` for two GPUs). The CUDA backend currently has a runtime dependency on the CUDA toolkit. If you receive errors regarding a failure to load ``libdevice``, you may need to set the ``POCL_CUDA_TOOLKIT_PATH`` environment variable to tell pocl where the CUDA toolkit is installed. Set this variable to the root of the toolkit installation (the directory containing the ``nvvm`` directory). The ``POCL_CUDA_GPU_ARCH`` environment variable can be set to override the target GPU architecture (e.g. ``POCL_CUDA_GPU_ARCH=sm_35``), which may be necessary in cases where LLVM doesn't yet support the architecture. The ``POCL_CUDA_VERIFY_MODULE`` environment variable can be set to ``0`` to skip verification that the LLVM module produced by the CUDA backend is well formed. Currently defaults to 1 = ON. The ``POCL_CUDA_DUMP_NVVM`` environment variable can be set to ``1`` to dump the LLVM IR that is fed into the NVPTX backend for debugging purposes (requires ``POCL_DEBUG=1``). The ``POCL_CUDA_DISABLE_QUEUE_THREADS`` environment variable can be set to ``1`` to disable background threads for handling command submission. This can potentially reduce command launch latency, but can cause problems if using user events or sharing a context with a non-CUDA device. CUDA backend status ------------------- (last updated: 2017-06-02) The CUDA backend currently passes 73 tests from pocl's internal testsuite, and is capable of running various real OpenCL codes. Unlike NVIDIA's proprietary OpenCL implementation, pocl supports SPIR consumption, and so this backend has also been able to run (for example) SYCL codes using Codeplay's ComputeCpp implementation on NVIDIA GPUs. Since it uses CUDA under-the-hood, this backend also works with all of the NVIDIA CUDA profiling and debugging tools, many of which don't work with NVIDIA's own OpenCL implementation. Conformance status ~~~~~~~~~~~~~~~~~~ The Khronos OpenCL 1.2 conformance tests are `available here `_. The following test categories are known to pass on at least one NVIDIA GPU using pocl's CUDA backend: * allocations * api * atomics * basic * commonfns * computeinfo * contractions * events * profiling * relationals * thread_dimensions * vec_step Tested platforms ~~~~~~~~~~~~~~~~ The CUDA backend has been tested on Linux (CentOS 7.3) with SM_35, SM_52, SM_60, and SM_61 capable NVIDIA GPUs. The backend is also functional on macOS, with just one additional test failure compared to Linux (``test_event_cycle``). Known issues ~~~~~~~~~~~~ The following is a non-comprehensive list of known issues in the CUDA backend: * image types and samplers are unimplemented * printf format support is incomplete Additionally, there has been little effort to optimize the performance of this backend so far - the current effort is on implementing remaining functionality. Once the core functionality is completed, optimization of the code generation and runtime can begin. Support ~~~~~~~ For bug reports and questions, please use pocl's `GitHub issue tracker `_. Pull requests and other contributions are also very welcome. This work has primarily been done by James Price from the `University of Bristol's High Performance Computing Group `_. pocl-1.8/doc/sphinx/source/debug.rst000066400000000000000000000336251413131625300175250ustar00rootroot00000000000000Debugging OpenCL applications with PoCL ======================================== There are several ways to debug applications with PoCL, differing in debugging coverage and impact on speed. This document chapter describes means for debugging OpenCL kernel code by using the CPU drivers of PoCL. "Offline" debugging -------------------- Offline debugging ca be done by setting ``POCL_LEAVE_KERNEL_COMPILER_TEMP_FILES`` env var to 1. This causes the intermediate output files from the kernel compilation process to be left in PoCL's disk cache for inspection. By default these files are deleted, and only the final executable output is left in the cache. This is useful for manually inspecting the LLVM IR of the compilation stages, but it's also useful for GDB and Valgrind debugging as described later. Simple debugging with PoCL's debug log -------------------------------------------- Upsides: * doesn't require recompiling the application or PoCL Downsides: * very limited scope Setup: Just set the "POCL_DEBUG" environment variable to some value. The most useful values are: * ``POCL_DEBUG=err,warn`` - this will limit the output to errors and warnings. These messages might help spot some OpenCL API calls which return an error value. Also it helps if a call can return CL_INVALID_VALUE for multiple reasons, since PoCL prints a more specific reason in that case. * ``POCL_DEBUG=refcount`` - this will limit the output to refcount increases and decreases. Might help spot CL object leaks Example:: gcc example.c -o example -lOpenCL export POCL_DEBUG=refcount ./example Output:: [2020-07-20 12:37:18.472185807]POCL: in fn POclReleaseContext at line 48: | REFCOUNTS | Release Context [2020-07-20 12:37:18.472196073]POCL: in fn POclReleaseContext at line 56: | REFCOUNTS | Free Context 0x5566430d84a0 [2020-07-20 12:37:18.472207597]POCL: in fn POclReleaseCommandQueue at line 41: | REFCOUNTS | Release Command Queue 0x5566430d85f0 0 [2020-07-20 12:37:18.472228759]POCL: in fn POclReleaseCommandQueue at line 55: | REFCOUNTS | Free Command Queue 0x5566430d85f0 "Release X" is printed when the refcount is lowered by 1. "Free X" is printed when the refcount becomes 0 and the object is actually freed. Debugging with GDB ----------------------------------------------- Upsides: * the entire OpenCL application, including the launched kernels can be debugged * does not require PoCL recompilation (but it is recommended, if PoCL wasn't compiled with debuginfo) * single stepping kernels Downsides: * limited scope (not the best tool for tracking memory leaks & race conditions) Setup: * Optional: build PoCL with ``-DCMAKE_BUILD_TYPE=Debug`` * ``export POCL_EXTRA_BUILD_FLAGS="-g -cl-opt-disable"``, or add these flags to the ``clBuildProgram`` call. This will cause all kernels to compile with debuginfo. * ``export POCL_LEAVE_KERNEL_COMPILER_TEMP_FILES=1`` This will leave the source files in PoCL's cache. * Optional: ``export POCL_MAX_PTHREAD_COUNT=1`` This limits the pthread driver to a single worker thread. * Run your application with gdb, as usual. Example 1: Let's say we have an `example` host program with a `dot_product` kernel with this source:: __kernel void dot_product (__global const float4 *a, __global const float4 *b, __global float4 *c) { size_t gid = get_global_id(0); gid += 18298392UL; c[gid] = a[gid] * b[gid] + (float4)(1.0f, 6.0f, 9.0f, 4.0f); } Run it in gdb:: POCL_DEBUG=all gdb ./example Output 1: The program crashes since it tries to access memory beyond buffer boundaries:: [2020-06-30 08:28:14.888355355]POCL: in fn pocl_check_kernel_disk_cache at line 963: | GENERAL | Built a WG function: /tmp/POCL_CACHE/BJ/JMEICBEBICMMDJCKNIADBFKHIMHDBIIKHCHED/dot_product/2-1-1-goffs0-smallgrid/dot_product.so Thread 8 "example" received signal SIGSEGV, Segmentation fault. [Switching to Thread 0x7fffddffe700 (LWP 10585)] 0x00007fffec532458 in dot_product (a=0x5555557bb580, b=0x5555557e6500, c=0x5555557ba480) at /tmp/POCL_CACHE/tempfile-1c-aa-cd-3e-5e.cl:10 10 c[gid] = a[gid] * b[gid] + (float4)(1.0f, 6.0f, 9.0f, 4.0f); (gdb) list 5 __global const float4 *b, __global float4 *c) 6 { 7 size_t gid = get_global_id(0); 8 9 gid += 18298392UL; 10 c[gid] = a[gid] * b[gid] + (float4)(1.0f, 6.0f, 9.0f, 4.0f); 11 } (gdb) print gid $1 = 18298392 (gdb) bt #0 0x00007fffec532458 in dot_product (a=0x5555557bb580, b=0x5555557e6500, c=0x5555557ba480) at /tmp/POCL_CACHE/tempfile-1c-aa-cd-3e-5e.cl:10 #1 0x00007fffec5324c3 in _pocl_kernel_dot_product_workgroup () from /tmp/POCL_CACHE/BJ/JMEICBEBICMMDJCKNIADBFKHIMHDBIIKHCHED/dot_product/2-1-1-goffs0-smallgrid/dot_product.so #2 0x00007ffff72924ed in work_group_scheduler (k=0x7fffb91935c0, thread_data=0x5555557ae600) at /tmp/pocl_source/lib/CL/devices/pthread/pthread_scheduler.c:307 #3 0x00007ffff7292b72 in pthread_scheduler_get_work (td=0x5555557ae600) at /tmp/pocl_source/lib/CL/devices/pthread/pthread_scheduler.c:454 #4 0x00007ffff7292fd2 in pocl_pthread_driver_thread (p=0x5555557ae600) at /tmp/pocl_source/lib/CL/devices/pthread/pthread_scheduler.c:530 #5 0x00007fffee90e6db in start_thread (arg=0x7fffddffe700) at pthread_create.c:463 #6 0x00007ffff78faa3f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95 Note: printing variables (e.g. gid) could instead result in this: (gdb) print gid $1 = {{{18298392, 9223372036854775822, 0, 0}}} This happens when PoCL uses the "loops" workgroup method. The high-level overview of "loops" is that PoCL it creates a 3D for-loop (for each dimension of workgroup-size) around the kernel code, and the LLVM optimizer then tries to vectorize that loop. For this to work, PoCL must create a copy of variables in private address space, one copy for each workitem in the workgroup; that's why the variable printed is an array. Example 2: Lets say we want to step the "dot_product" kernel from the previous example. Launch gdb:: POCL_MAX_PTHREAD_COUNT=1 gdb ./example Make a breakpoint on the kernel name:: (gdb) break dot_product Function "dot_product" not defined. Make breakpoint pending on future shared library load? (y or [n]) y Breakpoint 1 (dot_product) pending. Run the program:: (gdb) r Starting program: /tmp/example [Thread debugging using libthread_db enabled] Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1". [New Thread 0x7fffedf36700 (LWP 18595)] [Switching to Thread 0x7fffedf36700 (LWP 18595)] Thread 2 "example" hit Breakpoint 1, dot_product (a=0x5555557bc080, b=0x5555557e5380, c=0x5555557baf00) at /tmp/POCL_CACHE/tempfile-db-70-03-45-d6.cl:7 7 size_t gid = get_global_id(0); We can now step through the kernel:: (gdb) print gid $1 = 140737103657472 (gdb) next 9 gid += 18298392UL; (gdb) print gid $2 = 0 (gdb) next 10 c[gid] = a[gid] * b[gid] + (float4)(1.0f, 6.0f, 9.0f, 4.0f); (gdb) print gid $3 = 18298392 Debugging with Valgrind ----------------------------------------------- Upsides: * The entire application including kernels can be debugged. * Does not strictly require recompilation (though for usable backtraces, requires debuginfo). Downsides: * Can be very slow, especially with computationally intensive kernels. * May report some leaks which are not ones (see below). Setup: * Optional: build PoCL with ``-DCMAKE_BUILD_TYPE=Debug`` * ``export POCL_EXTRA_BUILD_FLAGS="-g -cl-opt-disable"``, or add these flags to the ``clBuildProgram`` call. This will cause all kernels to compile with debuginfo. * Run your application with valgrind as normally. Example 1: Uninitializing both LLVM (calling clUnloadPlatformCompiler) and drivers:: POCL_ENABLE_UNINIT=1 valgrind ./examples/example1/example1 Output 1:: ==18291== LEAK SUMMARY: ==18291== definitely lost: 40 bytes in 1 blocks ==18291== indirectly lost: 0 bytes in 0 blocks ==18291== possibly lost: 0 bytes in 0 blocks ==18291== still reachable: 545,683 bytes in 2,705 blocks ==18291== suppressed: 0 bytes in 0 blocks ==18291== Rerun with --leak-check=full to see details of leaked memory Example 2: Uninitializing LLVM (calling clUnloadPlatformCompiler) but not drivers:: valgrind ./examples/example1/example1 Output 2:: ==18301== LEAK SUMMARY: ==18301== definitely lost: 0 bytes in 0 blocks ==18301== indirectly lost: 0 bytes in 0 blocks ==18301== possibly lost: 2,816 bytes in 8 blocks ==18301== still reachable: 403,199,350 bytes in 2,720 blocks ==18301== suppressed: 0 bytes in 0 blocks ==18301== Rerun with --leak-check=full to see details of leaked memory Example 3: Both LLVM and drivers left (not calling clUnloadPlatformCompiler):: valgrind ./examples/example1/example1 Output 3:: ==18726== LEAK SUMMARY: ==18726== definitely lost: 536 bytes in 2 blocks ==18726== indirectly lost: 1,299,332 bytes in 3,433 blocks ==18726== possibly lost: 53,773,316 bytes in 524,329 blocks ==18726== still reachable: 411,350,622 bytes in 73,488 blocks ==18726== suppressed: 0 bytes in 0 blocks Debugging with Thread/Address sanitizers ----------------------------------------------- Currently PoCL recognizes four sanitizers: Address, Leak, Undefined behaviour and Thread. Corresponding PoCL CMake options to enable them are: ``ENABLE_ASAN, ENABLE_LSAN, ENABLE_UBSAN, ENABLE_TSAN.`` Upsides: * Much faster than Valgrind. * Less false detections. * Can check undefined behaviour (most other tools can't). Downsides: * Requires rebuilding both the application and PoCL. * The application and PoCL's runtime code are compiled with sanitizer, but at the moment, the kernels cannot be compiled with the sanitizer. Setup: * For example, to use the Address Sanitizer (ASan), build PoCL with these flags:: -DENABLE_ASAN=1 -DENABLE_ICD=0 -DCMAKE_BUILD_TYPE=Debug * This will result in ``lib/CL/libOpenCL.so``. Rebuild your application with the correct ``-fsanitize=X`` flag and link it to ``lib/CL/libOpenCL.so``. Example: Building an "example.c" with the ASan:: gcc -O0 -ggdb -fsanitize=address -fno-omit-frame-pointer -pthread -o example.o -c example.c gcc -fsanitize=address -o example example.o -lasan -Wl,-rpath,/lib/CL /lib/CL/libOpenCL.so Output: If there's an OpenCL object remaining, ASan will print a backtrace with an OpenCL call name in it:: Indirect leak of 8 byte(s) in 1 object(s) allocated from: #0 0x7fa8f7b0a198 in calloc (/usr/lib/x86_64-linux-gnu/libasan.so.5+0xee198) #1 0x7fa8f7607bc0 in pocl_unique_device_list /tmp/lib/CL/pocl_util.c:866 #2 0x7fa8f75d37ca in POclCreateContext /tmp/lib/CL/clCreateContext.c:172 #3 0x55d50f21e428 in poclu_get_any_device2 /tmp/lib/poclu/misc.c:84 #4 0x55d50f21c165 in main /tmp/examples/example1/example1.c:59 #5 0x7fa8f707bb96 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x21b96) If there's any memory leak in the user's program, ASan will print something like:: Direct leak of 64 byte(s) in 1 object(s) allocated from: #0 0x7f738e999f90 in __interceptor_malloc (/usr/lib/x86_64-linux-gnu/libasan.so.5+0xedf90) #1 0x562f6f33e493 in main /tmp/examples/example1/example1.c:74 #2 0x7f738df0bb96 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x21b96) Handling LLVM and driver-allocated memory ----------------------------------------------- Both valgrind and sanitizers might report a huge amount of memory leaks coming from PoCL; this is caused mainly by two factors; LLVM and driver-held static data. The problem is that the OpenCL API unfortunately doesn't provide any API entry to uninitialize the entire implementation (e.g. all driver data). It does provide API entries to unload compiler though: ``clUnloadPlatformCompiler()`` and ``clUnloadCompiler()``. User can use these to ask PoCL to unload all LLVM data, but it should be noted that with PoCL, the LLVM data is freed only if all cl_programs and cl_kernels have been released before calling it. Usage is simple: call ``clUnloadPlatformCompiler()`` once after all other OpenCL objects have been released, right before the program exit. If the user sets ``POCL_ENABLE_UNINIT`` env var to 1, PoCL will also try to unload driver data. This feature might not work reliably so it's currently considered experimental. Example: Running a program compiled with AddrSanitizer, which calls ``clUnloadPlatformCompiler()``, with ``POCL_DEBUG=all POCL_ENABLE_UNINIT=1`` env variables will result in (if the program has no memleaks):: [2020-06-20 15:25:01.722343448]POCL: in fn POclReleaseContext at line 50: | REFCOUNTS | Free Context 0x60f000000310 [2020-06-20 15:25:01.722369150]POCL: in fn void pocl_llvm_release() at line 370: | LLVM | releasing LLVM [2020-06-20 15:25:01.823218919]POCL: in fn pocl_check_uninit_devices at line 107: | REFCOUNTS | Zero contexts left, calling pocl_uninit_devices [2020-06-20 15:25:01.823266761]POCL: in fn pocl_uninit_devices at line 334: | GENERAL | UNINIT all devices Running the same program with empty PoCL cache and removed ``clUnloadPlatformCompiler()`` call (therefore, with LLVM context alive at program exit), ASan will print a lot of memory leaks:: Indirect leak of 8 byte(s) in 1 object(s) allocated from: #0 0x7f99eef43ba0 in operator new(unsigned long) (/usr/lib/x86_64-linux-gnu/libasan.so.5+0xefba0) #1 0x7f99eead5aea in WorkItemAliasAnalysis::runOnFunction(llvm::Function&) /tmp/lib/llvmopencl/WorkItemAliasAnalysis.cc:130 #2 0x7f99e6f76ed5 in llvm::FPPassManager::runOnFunction(llvm::Function&) (/usr/lib/llvm-10/lib/libLLVM-10.so.1+0xb11ed5) SUMMARY: AddressSanitizer: 1047772 byte(s) leaked in 3046 allocation(s). pocl-1.8/doc/sphinx/source/design.rst000066400000000000000000000003671413131625300177050ustar00rootroot00000000000000Notes on internal design =========================== Higher-level notes of pocl software design and implementation are collected to this part. .. toctree:: :maxdepth: 2 host_library kernel_compiler memory_management pocl_binary pocl-1.8/doc/sphinx/source/development.rst000066400000000000000000000321671413131625300207610ustar00rootroot00000000000000Information for Pocl developers =================================== Testsuite ---------- Before changes are committed to the mainline, all tests in the 'make check' tier-1 suite should pass:: make check_tier1 "make check_tier1" will invoke ctest with tier-1 testsuites. See `maintenance-policy`_ for list of what's included in tier-1. Under the 'examples' directory there are placeholder directories for external OpenCL application projects which are used as test suites for pocl (e.g. ViennaCL). These test suites can be enabled for cmake with -DENABLE_TESTSUITES (you can specify a list of test suites if you do not want to enabled all of them, see configure help for the available list). Note that these additional test suites require additional software (tools and libraries). The configure script checks some of them but the check is not exhaustive. Test suites are disabled if their requirement files are not available. You can run the tests or built examples using "ctest" directly; ``ctest --print-labels`` prints the available labels (testsuites); Invoke ctest with -jX option to run X tests in parallel. In order to prepare the external OpenCL examples for the testsuite, you need to run the following build command once:: make prepare_examples IMPORTANT: using the ICD for in tree 'make check' requires an icd loader that allows overriding the icd search path. Other ICD loaders wont be able to work in tree (they require the ICD config file to be installed in the system). There are now two options for such a loader: the open source ocl-icd loader and the Khronos supplied loader with a patch applied. Debugging a Failed Test ^^^^^^^^^^^^^^^^^^^^^^^ If there are failing tests in the suite, the usual way to start debugging is to look what was printed to the logs for the failing cases. After running the test suite, the logs are stored under ``Testing/Temporary/*.log`` Or one could re-run the test with more verbose output. Useful ctest options are "-V" and "--output-on-failure"; to make pocl more chatty, use the POCL_DEBUG env variable. Ocl-icd ------- Ocl-icd is packaged for most popular linux distributions, but can also be downloaded from: https://forge.imag.fr/projects/ocl-icd/. It allows overriding the path from which the icd files are searched which is used to select only the OpenCL library in the build tree of pocl for the make check. Note, however, if you run the tests or examples manually this overriding is not done automatically. To direct the ocl-icd to use only the pocl *in the build tree*, export the following environment variable in your shell:: export OCL_ICD_VENDORS="PATH_TO_THE_POCL_BUILD_TREE/ocl-vendors" Inside the 'ocl-vendors' directory there's a single .icd file which is generated to point to the pocl library in the build tree. Coding Style ------------ The code base of pocl consists most of pure C sources and C++ sources. 1) In the C sources, follow the GNU C style, but with spaces for indent. The GNU C style guide is here: http://www.gnu.org/prep/standards/html_node/Writing-C.html This guide should be followed except please use 2 spaces instead of the confusing "smart" mix of tabs and spaces for indentation. 2) In the C++ sources (mostly the LLVM passes), follow the LLVM coding guidelines so it is easier to upstream general code to the LLVM project at any point. http://llvm.org/docs/CodingStandards.html It's acknowledged that the pocl code base does not fully adhere to these principles at the moment, but the aim is to gradually fix the style with every new commit improving the style. There are clang-format scripts to help in getting the style gradually improved. Running ``tools/scripts/format-branch.sh`` in the root of the repository diffs against a ``master`` branch and formats the difference, and leaves the diff uncommitted in the working tree. ``tools/scripts/format-last-commit.sh`` formats only the last commit and can be used in an interactive rebase session. An example emacs configuration to help get the pocl code style correct:: (setq default-tab-width 2) (setq-default indent-tabs-mode nil) (setq-default show-trailing-whitespace t) (defun my-c-mode-common-hook () (c-set-style "gnu") (setq tab-width 2) (setq c-basic-offset 2) ) (add-hook 'c-mode-common-hook 'my-c-mode-common-hook) (defun my-cpp-mode-common-hook () (c-set-style "stroustrup") (setq tab-width 4) (setq c-basic-offset 4) ) (add-hook 'c++-mode-hook 'my-cpp-mode-common-hook) (add-to-list 'auto-mode-alist '("\\.cl$" . c-mode)) (add-to-list 'auto-mode-alist '("\\.icc$" . c++-mode)) (add-to-list 'auto-mode-alist '("\\.cc$" . c++-mode)) Khronos ICD Loader ------------------ The ICD loader supplied by Khronos can be used for pocl development by applying a minor patch that enables overriding the ICD search path as explained above (OCL-ICD). The steps to build and install the Khronos ICD loader so it can be used to run the pocl test suite: #. Download the loader from http://www.khronos.org/registry/cl Unpack it. Copy the OpenCL headers to inc/CL like instructed in inc/README.txt. #. Apply a patch from the pocl checkout:: cd icd patch -p1 < ~/pocl/tools/patches/khronos-icd-loader.patch #. Build it with 'make'. #. Copy the loader to a library search path: sudo cp bin/libOpenCL* /usr/lib Now it should use the Khronos loader for ICD dispatching and you (and the pocl build system) should be able to override the icd search path with OCL_ICD_VENDORS environment variable. Using pocl from the Build Tree ------------------------------ If you want use the pocl from the build tree, you must export POCL_BUILDING=1 so pocl searches for its utility scripts from the build tree first, then the installation location. The "make check" testsuite does this automatically. There's a helper script that, when sourced, in addition to setting POCL_BUILDING setups the OCL_ICD_VENDORS path to point to the pocl in the build tree. This removes the need to install pocl to test the built version. It should be executed in the build root, typically:: . ../tools/scripts/devel-envs.sh Target and Host CPU Architectures for 'basic' and 'pthread' Devices ------------------------------------------------------------------- By default, pocl build system compiles the kernel libraries for the host CPU architecture, to be used by 'basic' and 'pthread' devices. LLVM is used to detect the CPU variant to be used as target. This can be overridden by passing -DLLC_HOST_CPU=... to CMake. See the documentation for LLC_HOST_CPU build option. Cross-compilation where 'build' is different from 'host' has not been tested. Cross-compilation where 'host' is a different architecture from 'target' has not been tested for 'basic' and 'pthread' devices. Writing Documentation --------------------- The documentation is written using the `Sphinx documentation generator `_ and the reStructuredText markup. This Sphinx documentation can be built by:: cd doc/sphinx make html This builds the html version of the documents under the 'build/html' directory. .. _maintenance-policy: Maintenance Policy ------------------- pocl development is currently managed mostly by researchers and research assistants of the `Customized Parallel Computing `_ group of Tampere University. We provide general maintenance for pocl on the side of our research projects (which on the other hand might use and/or extend it) because we consider it an important project that helps the "heterogeneous parallel programming cause". However, doing maintenance "on the side" unfortunately means that there is limited time to respond to external support requests due to other activities. To make pocl maintenance feasible within our limited time, we have set the following policy regarding releases: **External projects using OpenCL that have a test suite included in "regularly tested suites" (we later call 'tier-1' test suites) will be kept regression free, but for the rest we cannot make any promises.** Tier-1 tests will be executed successfully before the lead developer pushes new pull requests (PR) to the master branch, and some of them are additionally executed with multiple continuous integration (buildbot) servers on different platforms. Active developers are also assumed to run them locally before submitting PRs. Thus, regressions on these suites should be detected early. The required testsuites can be enabled at buildtime with ``-DENABLE_TESTSUITES=tier1`` cmake option. Currently (2017-03-16) the following are included in the tier-1 test suites: * The standard test suite of pocl. * AMD SDK 3.0 test suite * PyOpenCL test suite * piglit test suite * conformance_suite_micro test suite * CLBlast tests (excluding the longest running ones) * HSA test suite (uses the LLVM 3.7 with an HSAIL backend and targets an AMD Kaveri GPU) * TCE short smoke test suite (against the latest TCE open source release) Please note that not necessarily all the tests currently pass in the suites, we just ensure the currently passing ones do not regress with new commits (expected failing ones are marked as XFAILs or skipped). The primary test platform is x86-64. The latest LLVM release is given priority when testing, and we cannot guarantee older LLVM versions keep working over pocl releases due to the constantly changing library API. If you would like get your favourite OpenCL-using project's test suite included in the tier-1 suite, please send a pull request that adds the suite under the 'examples' dir and the main CMakeLists.txt along with instructions (a README will do) on how to setup it so it is included in the 'make check' run. Please make the test suite short enough to be suitable for frequent "smoke testing" (under 5 minutes per typical run preferred). If your favourite project is already under 'example', but not listed as a tier-1 test suite, please update its status so that 'make check' passes with the current HEAD of pocl and let us know, and we do our best to add it. Naturally this policy/support promise concerns only the lead developers (the CPC group). Any community involvement to provide a wider support/maintenance level will be heartily welcomed. .. _releasing: Release management ---------------------------------- We aim to make a new release according to the Clang/LLVM release schedule. For each release, a release manager is assigned. Release manager is responsible for creating and uploading new release candidate tar balls and requesting for testers from different platforms. After a release candidate round with success reports and no failure reports, a release is published. See the `maintenance-policy`_ for the current release criteria. A checklist and hints for testing and making a release successfully: * Check that CHANGES has the most interesting updates done during the release cycle. Add missing notable changes from git log. * Update the release notes in *doc/notes-VERNUM.txt*. * Create a single commit in master branch: change the version to the release one (without -pre), in all relevant places (CHANGES, docs, CMakeLists.txt, etc); update the .so version (if required); check that supported LLVM versions in cmake/LLVM.cmake are correct. Create the release branch from this commit and push it to github. * In the master branch, create a new commit: increase version number (with -pre) in all relevant places; update the .so version; increase the supported LLVM versions in cmake/LLVM.cmake. Commit, push master to github. Now development can go on in master while the release branch is being stabilized. * The previous two steps ensure that merge-base of release & master is the start of release branch, which ensures that merging release to the master will not screw up the version numbers in the master. Bugs which need to be fixed in both branches, should be committed to the release branch, then release branch merged to master. * Create a new release on Github. Mark it as pre-release. This should create both a tarball and a git tag. * Upload the package to portablecl.org/downloads via SFTP or to the sourceforge file listing for the pocl project. * Request for testers in Twitter and/or mailing list. Point the testers to send their test reports to you privately or by adding them to the wiki. A good way is to create a wiki page for the release schedule and a test log. See https://github.com/pocl/pocl/wiki/pocl-0.10-release-testing for an example. * To publish a release, create a new release on Github without the checking the pre-release checkbox. Upload the tar ball to the sourceforge download page and to http://portablecl.org/downloads. * Update the CHANGES and ANNOUNCEMENT text files in these directories. ANNOUNCEMENT is a copy of the latest release notes. A direct link to it can be easily circulated in IRC, for example. * Update the http://portablecl.org web page with the release information. * Advertise everywhere you can. At least in Twitter and the mailing list. In case of any problems, ask any previous release manager for help. Previous releases were managed by the following pocl developers: * 0.14: Pekka Jääskeläinen * 0.11: Michal Babej * 0.10: Pekka Jääskeläinen * 0.9: Kalle Raiskila * 0.8: Erik Schnetter * 0.6 and 0.7: Pekka Jääskeläinen pocl-1.8/doc/sphinx/source/extensions.rst000066400000000000000000000005701413131625300206270ustar00rootroot00000000000000================== PoCL extensions ================== PoCL currently supports one extension, cl_pocl_content_size. cl_pocl_content_size ~~~~~~~~~~~~~~~~~~~~~~~ This extension provides a way to to indicate a buffer which will hold the meaningful bytes of another buffer, after kernel execution. Full specification can be found on: https://www.khronos.org/registry/OpenCL/ pocl-1.8/doc/sphinx/source/faq.rst000066400000000000000000000163421413131625300172030ustar00rootroot00000000000000Frequently asked questions ========================== Common problems and questions related to using and developing pocl are listed here. Using pocl ---------- .. _supported-compilers: Supported compilers and compiler combinations ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Pocl usually uses two different compilers (though may be built using only one). One is used to compile C and C++ files - this is usually the "system compiler". It's specified by CC and CXX vars to configure script, or CMAKE_C{,XX}_COMPILER variables to cmake, but usually just left to default. The second compiler is used to build OpenCL files - this is always clang+llvm. It's specified by LLVM_CONFIG= to configure, or -DWITH_LLVM_CONFIG= to cmake. You may use clang as both "system" and OpenCL compiler for pocl. Note however that pocl uses the CXX_FLAGS *which the 2nd compiler (clang) was built with*, to build parts of pocl that link with that compiler. This may cause some issues, if you try to build pocl with a different compiler as the one used to build the 2nd compiler - because gcc and clang are not 100% compatible with each other in flags. So far though we've only seen warnings about unknown flags, not actual bugs. Anyway, the most trouble-free solution is to use the same "system" compiler to build pocl, as the one that was used to build the 2nd compiler. Note that while most Linux distributions use gcc to build their clang/llvm, the official downloads from llvm.org are built using clang. Pocl is not listed by clinfo / is not found ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Occasionally, proprietary implementations rewrite the ICD loader by their own version. E.g. Intel SDK installer silently replaces ``/usr/lib/x86_64-linux-gnu/libOpenCL.so`` with a link to ``/etc/alternatives/opencl-libOpenCL.so`` which itself is a link to the intel's libOpenCL implementation. The fix is to remove the symlinks manually and reinstall the ICD loader after which both pocl and the Intel SDK can be used through the ICD loader. Deadlocks (freezes) on FreeBSD ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The issue here is that a library may not initialize the threading on BSD independently. This will cause pocl to stall on some uninitialized internal mutex. See: http://www.freebsd.org/cgi/query-pr.cgi?pr=163512 A simple work-around is to compile the OpenCL application with "-pthread", but this of course cannot be enforced from pocl, especially if an ICD loader is used. The internal testsuite works only if "-pthread" is passed to ./configure in CFLAGS and CXXFLAGS, even if an ICD loader is used. clReleaseDevice or clCreateImage missing when linking against -lOpenCL (ICD) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ These functions were introduced in OpenCL 1.2. If you have built your ICD loader against 1.1 headers, you cannot access the pocl implementations of them because they are missing from the ICD dispatcher. The solution is to rebuild the ICD loader against OpenCL 1.2 headers. See: https://github.com/pocl/pocl/issues/27 "Two passes with the same argument (-barriers) attempted to be registered!" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ If you see this error:: Two passes with the same argument (-barriers) attempted to be registered! UNREACHABLE executed at /include/llvm/Support/PassNameParser.h:73! It's caused by initializers of static variables (like pocl's LLVM Pass names) called more than once. This happens for example when you link libpocl twice to your program. One way that could happen, is building pocl with ``--disable-icd`` while having hwloc "plugins" package installed (with the opencl plugin). What happens is: * libpocl.so gets built, and also libOpenCL.so which is it's copy * program gets linked to the built libOpenCL.so; that is linked to hwloc * at runtime, hwloc will try to open the hwloc-opencl plugin; that links to system-installed libOpenCL.so (usually the ICD loader); * the ICD loader will try to dlopen libpocl.so -> you get the error. The solution is either to use ``--enable-icd --disable-direct-linkage``, or to uninstall the hwloc "plugins" package. Why is pocl slow? ^^^^^^^^^^^^^^^^^ If pocl's kernel build seems really slow, it is very possible you have built your LLVM with Debug+Asserts on (not configure --enable-optimized). This should result in up to 10x kernel compiler slow downs. You can really feel it when running 'make check', for example. The kernel compiler cache often removes that overhead when you run your OpenCL app the next time. If pocl is otherwise slower than other OpenCL implementations, it's normal. pocl is known to run certain benchmarks faster, certain ones slower, when comparing against the Intel and AMD OpenCL SDKs. We hope to improve the performance in each release, so if you encounter performance regressions (an older pocl/LLVM version used to run an app faster), please report a bug. pocl source code ---------------- Why C99 in host library? ^^^^^^^^^^^^^^^^^^^^^^^^ The kernel compiler passes and some of the driver implementations are in C++11 and it's much faster to implement things in C++11. Why require using C99 in the host library? pocl is meant to be very portable to various type of devices, also to those with very little resources (no operating system at all and with pruned runtime libraries). C has better portability to low end CPUs and VMs. Thus, in order for a CPU to act as an OpenCL host without online kernel compilation support, only C99 support is required from the target, no C++ compiler, runtime or STL is needed. Also, C programs are said to sometimes produce more "lightweight" binaries, but that is debatable. Benchmarks ============== CLPeak issues ---------------- Currently (Dec 2017) does not work. First, there's a global memory size detection bug in CLPeak which makes it fail on all OpenCL calls (this can be workarounded by using POCL_MEMORY_LIMIT=1). Second, compilation takes forever - this can't be fixed in pocl and needs to be fixed in either CLPeak or LLVM. CLPeak sources use recursive macros to create a giant stream of instructions. Certain optimization passes in LLVM seem to explode exponentially on this code. The second consequence of giant instruction stream is, it easily overflows the instruction caches of a CPU, therefore CLPeak results are highly dependent on whether the compiler manages to fit the code into icache, perhaps using loop re-rolling, and as such are not a reliable measure of peak device FLOPS. Luxmark issues --------------- * Using the binary downloaded from www.luxmark.info might lead to pocl abort on creating cache directory. This is not a bug in Pocl, it's a consequence of the two programs (pocl & luxmark) having been compiled with different libstdc++. Using a distribution packaged Luxmark fixes this problem. * It's recommended to remove luxmark cache (~/.config/luxrender.net) after updating pocl version. * There's another bug (http://www.luxrender.net/mantis/view.php?id=1640) - it crashes after compiling kernels, because it doesn't recognize an OpenCL device. This requires editing scenes//render.cfg, you must add ``opencl.cpu.use = 0`` and ``film.opencl.device = 0`` * All scenes (Microphone, Luxball and Hotel) should compile & run with LLVM 6 and newer. pocl-1.8/doc/sphinx/source/features.rst000066400000000000000000000005321413131625300202440ustar00rootroot00000000000000Supported features and devices =============================== Pocl currently supports CPUs (x86-64 with full 1.2 conformance, ARM 32b/64b ligthly tested), NVidia GPUs via CUDA backend, HSA devices, TCE devices and fixed-function accelerators. .. toctree:: :maxdepth: 2 opencl_status conformance hsa cuda accel extensions pocl-1.8/doc/sphinx/source/host_library.rst000066400000000000000000000011171413131625300211270ustar00rootroot00000000000000OpenCL host library ------------------- The API implementations of The OpenCL Runtime and the The OpenCL Platform Layer are compiled to a single dynamic library (e.g., ``libpocl.so``). This library contains all implementations and, if pocl is compiled in the `ICD mode `_, is what the ICD loader accesses. In case pocl is instructed (via -DENABLE_ICD=0) to compile a "directly linkable library", ``libOpenCL.so`` is produced which can be linked directly to the OpenCL programs (instead of linking against the ICD loader). pocl-1.8/doc/sphinx/source/hsa.rst000066400000000000000000000277651413131625300172220ustar00rootroot00000000000000=== HSA === Note: pocl's HSA support is currently in experimental stage. The experimental HSA driver works with AMD Kaveri or Carrizo APUs using an AMD's HSA Runtime implementation using the HSAIL-supported LLVM and Clang. Also, generic HSA Agent support (e.g. for your CPU) can be enabled using the phsa project. Installing prerequisite software --------------------------------- 1) Install an HSA AMD runtime library implementation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For AMD devices, pre-built binaries can be found here: https://github.com/HSAFoundation/HSA-Runtime-AMD This usually installs into /opt/hsa. Make sure to read Q&A in README.md (it lists some common issues (like /dev/kfd permissions) and run sample/vector_copy to verify you have a working runtime. Alternatively, you can use *phsa* to add generic HSA support on your gcc-supported CPU. Its installation instructions are here: https://github.com/HSAFoundation/phsa 2) Build & install the LLVM with HSAIL support ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fetch the HSAIL branch of LLVM 3.7:: git clone https://github.com/HSAFoundation/HLC-HSAIL-Development-LLVM/ -b hsail-stable-3.7 Fetch the upstream Clang 3.7 branch:: cd HLC-HSAIL-Development-LLVM/tools svn co http://llvm.org/svn/llvm-project/cfe/branches/release_37 clang Patch it:: cd clang; patch -p0 < PATHTO-POCL/tools/patches/clang-3.7-hsail-branch.patch An LLVM cmake configuration command like this worked for me:: cd ../../ mkdir build cd build cmake .. -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=HSAIL \ -DBUILD_SHARED_LIBS=off -DCMAKE_INSTALL_PREFIX=INSTALL_DIR \ -DLLVM_ENABLE_RTTI=on -DLLVM_BUILD_LLVM_DYLIB=on -DLLVM_ENABLE_EH=ON -DHSAIL_USE_LIBHSAIL=OFF ``-DHSAIL_USE_LIBHSAIL=OFF`` is only for safety. If you accidentally build clang with libHSAIL, it will cause mysterious link errors later when building pocl. Change the INSTALL_DIR to your installation location of choice. Note that these are **required**:: -DLLVM_ENABLE_RTTI=ON -DLLVM_ENABLE_EH=ON -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=HSAIL Also, if you don't want to build all the default targets, you'll need AMDGPU. Then build and install the Clang/LLVM:: make -j4 && make install 3) Get HSAIL-Tools ~~~~~~~~~~~~~~~~~~~~~ Clone the repo:: git clone https://github.com/HSAFoundation/HSAIL-Tools Then either copy ``HSAILasm`` executable to /opt/hsa/bin, or give the path to ``HSAILasm`` on the build command line (see below) 4) Build pocl ~~~~~~~~~~~~~ Using cmake:: mkdir build ; cd build cmake -DENABLE_HSA=ON -DWITH_HSA_RUNTIME_DIR=\ \ -DWITH_HSAILASM_PATH=\ -DSINGLE_LLVM_LIB=off .. It should result in "hsa" appearing in pocl's targets to build. ``-DSINGLE_LLVM_LIB=off`` workarounds an LLVM 3.7 build system issue. 5) Run tests & play around ~~~~~~~~~~~~~~~~~~~~~~~~~~~ After building pocl, you can smoke test the HSA driver by executing the HSA tests of the pocl testsuite:: ../tools/scripts/run_hsa_tests HSA Support notes ------------------ Note that the support is still experimental and very much unfinished. You're welcome to try it out and report any issues, though. HSA support implementation status as of 2016-05-17 -------------------------------------------------- What’s Implemented ~~~~~~~~~~~~~~~~~~~ * global/local/private memory * barriers * most of the OpenCL 1.2 kernel builtins * OpenCL 2.0 shared virtual memory (SVM) * OpenCL 2.0 atomics What's Missing ~~~~~~~~~~~~~~~ * printf() is not implemented, this should wait until we have a proper in-tree printf() in pocl with a stdout ring buffer * several builtins are not implemented yet (logb, remainder, nextafter); some are suboptimal or may give incorrect results with under/overflows (most of the builtins are taken from vecmathlib library, rewritten to fit HSAIL). * image support is not implemented * support for GPU devices other than Kaveri; currently only Kaveri and phsa-based CPU Agents have been tested * support for 32bit HSA devices About the Shared Virtual Memory Implementation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ OpenCL 2.0 SVM is a feature that lets you share virtual memory between CPU and GPUs. Note that while SVM works in pocl, one must carefully align all structs explicitly (both struct members and struct itself). This is because the alignment of the structs with the host's compiler might differ from the one in the device. For example, you can see the issue in Intel's SVM examples: .. code-block:: c typedef struct _Element { global float* internal; //points to the "value" of another Element from the same array global float* external; //points to the entry in a separate array of floating-point values float value; } Element; This *may* work with Intel's OpenCL SDK in case only using CPU devices, but crashes when offlodaing to HSA via pocl's HSA driver. The reason is that when using HSA, pocl compiles this header with two different compilers: usually gcc/clang for host C code and, llvm-HSAIL (Clang) for the device side, and they do *not* use the same alignment rules. The C standard specify almost nothing with regards to struct alignment in memory, so one must take care to explicitly specify alignment when using structs in shared memory. A proper way to declare the struct would be to utilize the widely supported 'aligned' attribute. .. code-block:: c typedef struct _Element { global float* internal __attribute__ ((aligned (8))); //points to the "value" of another Element from the same array global float* external __attribute__ ((aligned (8))); //points to the entry in a separate array of floating-point values float value __attribute__ ((aligned (8))); } Element __attribute__ ((aligned (32))); phsa ~~~~~ `Portable HSA (phsa) `_ provides similar portable HSA implementation for CPUs/DSPs and other processors as pocl aims to do for OpenCL. Using phsa one can implement HSA Agent support for any processor which has a gcc backend with ease. pocl supports phsa as a backend for its HSA driver, thus any processor utilizing phsa for HSA Agent support can get OpenCL support via pocl. We used phsa for testing the HSA driver works with other devices and runtimes than AMD's. Known Issues --------------- OpenCL 2.0 Atomics and HSA Memory Scope ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ There is a "memory scope" parameter present in HSA, which applies to atomic memory instructions or memory fences. Its purpose is to limit the scope of these instructions. However, pocl translates to HSAIL via LLVM bitcode, and the "atomicrmw" LLVM instruction only takes a memory order parameter, not scope. For this reason the memory scope in HSAIL is always the widest "system" scope. Multiple HSA Agent Support ~~~~~~~~~~~~~~~~~~~~~~~~~~~ While multiple OpenCL device support is not a problem for pocl, the HSA 1.0 specification lacks a "loader/proxy" feature that OpenCL has in ICD. Thus, support for devices is limited to what the linked HSA runtime supports. Currently, if one wants to control multiple HSA Agents as multiple pocl OpenCL devices, one needs to implement a HSA runtime that lists all the Agents to pocl. There is no capability to load multiple HSA runtimes in pocl as we consider it out of scope and a job for a proxy HSA runtime similar to ICD. Performance ------------- We conducted preliminary benchmarking with a set of test cases to serve as a basis for future optimization efforts. Evaluation Setup ~~~~~~~~~~~~~~~~~~ Hardware: AMD A10-7800, 8GB 1600Mhz of dual-channel memory, TDP set to 65W * Configuration 1: Windows 10 x86-64, AMD Crimson drivers * Configuration 2: Ubuntu 15.04 x86-64, kernel 4.0.0 & runtime 1.0.3 from https://github.com/HSAFoundation Test applications from AMD SDK 3.0 samples/opencl/bin/x86_64. The tests were run with -i (iterations) parameter ranging from 10 to 200 (longer tests were ran with fewer iterations). The performance currently lags behind the AMD's proprietary OpenCL on Windows by a factor of 1x to 5x =================================================== ============== ====================== =============== ============= ============================= AMD SDK example with arguments AMD runtime(s) other(GB/s,opts/s etc) POCL runtime(s) other POCL/AMD (>1.0 = POCL slower) =================================================== ============== ====================== =============== ============= ============================= BitonicSort -q -t -x 1048576 0.0978 10713500 0.2116 4954540 2.162 BinomialOption -q -t -x 10000 0.0164 25855.1 0.0233 37030.3 1.416 BlackScholes -s -q -t -x 16777216 0.0098 1708340000 0.0790 212347000 8.045 DCT -q -t -x 4000 -y 4000 0.0493 - 0.0582 - 1.181 FastWalshTransform -q -t -x 134217728 1.5895 - 2.4367 - 1.533 FloydWarshall -q -t -x 512 0.0671 - 0.1802 - 2.682 MatrixTranspose -t -x 8192 -q 0.0317 16920500000 0.1675 3204580000 5.280 MatrixMultiplication -q -t -x 1024 -y 1024 -z 2048 0.0175 245.07 0.0776 55.29 4.432 QuasiRandomSequence -q -t -y 10200 -x 10000 0.0009 2754120000 0.0100 1188730000 10.603 Reduction -q -t -x 100000000 0.1108 - 0.1165 - 1.051 SimpleConvolution -q -t -x 204800 0.1056 0.565378 0.1154 1.68136 2.973 =================================================== ============== ====================== =============== ============= ============================= We briefly analyzed the bottlenecks and the first clear issue is that we have recently introduced out-of-order queues in pocl, and the driver layer changed significantly with this regard, and it has not yet been fully optimized for HSA. There is ongoing work in this area. The slow kernel launches may be the reason why extremely short kernels like QuasiRandomSequence are >5x slower. The other major issue is that the LLVM 3.7 based HSAIL compiler is sometimes producing clearly suboptimal code. If we take MatrixMultiplication as an example, the GPU code generated by the proprietary AMD OpenCL driver on windows uses 76 VGPRs, 26 SGPRs and has no spills. The HSAIL code from pocl contains about 70 spills! While the HSA PRM (programmer's reference manual) states "the finalizer might be able to deploy extra hardware registers and remove the spills", it's likely not successful in this case, assuming AMD's HSAIL finalizer is putting only minimal effort to optimize the code to provide fast finalization times. This hopefully will change when LLVM-HSAIL is updated to later LLVM versions and its main bottlenecks are optimized, or in case new AMD SDK versions do optimization in the finalization of the suboptimal HSAIL input. Credits ---------- The current implementation was mainly done by our `Customized Parallel Computing `_ group of Tampere University, Finland with early prototype code contributions from the Programming Language Lab at National Tsing-Hua University, Hsinchu, Taiwan. CPC group thanks HSA Foundation and ARTEMIS JU (under grant agreement no 621439, ALMARVI) for funding this initial pocl HSA driver work. This driver added GPU device support to pocl for the first time, and, on the other hand, produced an easier path for HSA-supported devices to implement the OpenCL API by utilizing the pocl code base as a starting point. In the future we hope to see more effort put in optimizing the results to reach the performance of the proprietary SDKs on HSA devices. pocl-1.8/doc/sphinx/source/index.rst000066400000000000000000000011751413131625300175410ustar00rootroot00000000000000.. Portable Computing Language (pocl) documentation master file, created by sphinx-quickstart on Fri May 3 10:53:18 2013. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. Welcome to Portable Computing Language (pocl)'s documentation! ============================================================== Contents: .. toctree:: :maxdepth: 2 install using features debug faq development design Back to `pocl home page `_. Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` pocl-1.8/doc/sphinx/source/install.rst000066400000000000000000000324371413131625300201050ustar00rootroot00000000000000.. _pocl-install: ============ Installation ============ Requirements ------------ In order to build pocl, you need the following support libraries and tools: * Latest released version of LLVM & Clang * development files for LLVM & Clang + their transitive dependencies (e.g. libclang-dev, libllvm-dev, zlib1g-dev, libtinfo-dev...) * GNU make or ninja * pthread (should be installed by default) * Optional: hwloc v1.0 or newer (e.g. libhwloc-dev) * pkg-config * cmake Installing requirements for Ubuntu:: Note: The binary packages from https://apt.llvm.org/ are recommended (and tested for each release) instead of the binary tar balls or the packages included in the distribution. The following assumes apt.llvm.org is added to your apt repos (LLVM_VERSION=12 recommended for PoCL 1.7):: apt install -y build-essential ocl-icd-libopencl1 cmake git pkg-config libclang-${LLVM_VERSION}-dev clang llvm-${LLVM_VERSION} make ninja-build ocl-icd-libopencl1 ocl-icd-dev ocl-icd-opencl-dev libhwloc-dev zlib1g zlib1g-dev clinfo dialog apt-utils libxml2-dev libclang-cpp${LLVM_VERSION}-dev libclang-cpp${LLVM_VERSION} llvm-${LLVM_VERSION}-dev Installing requirements for Arch Linux:: pacman -S gcc patch hwloc cmake git pkg-config make ninja ocl-icd clang llvm llvm-libs clinfo opencl-headers Installing requirements for Fedora:: dnf install gcc gcc-c++ clinfo hwloc-devel hwloc-libs cmake git-core pkgconfig make ninja-build ocl-icd ocl-icd-devel clang clang-devel clang-libs llvm llvm-devel llvm-libs patch redhat-rpm-config findutils There are also Dockerfiles available for a few most common linux distributions in ``tools/docker``, looking into them might be helpful. Clang / LLVM Notes ------------------ **IMPORTANT NOTE!** Some targets (TCE and possibly HSA) require that you compile & build LLVM with RTTI on. It can be enabled on cmake command line, as follows:: cmake [other CMake options] -DLLVM_ENABLE_RTTI=ON -DLLVM_ENABLE_EH=ON Supported LLVM versions ~~~~~~~~~~~~~~~~~~~~~~~~ Note that pocl aims to support **the latest LLVM version** at the time of pocl release, **plus the previous** LLVM version. All older LLVM versions are supported with "best effort" basis; there might not be build bots continuously testing the code base nor anyone fixing their possible breakage. Configure & Build ----------------- CMake version 3.3 or higher is required. The build+install is the usual CMake way:: cd mkdir build cd build cmake [-D