pax_global_header00006660000000000000000000000064135501114770014516gustar00rootroot0000000000000052 comment=882ba371458cfb2996f21e4134f948abc8b0d2d5 pocl-1.4/000077500000000000000000000000001355011147700123175ustar00rootroot00000000000000pocl-1.4/.drone.yml000066400000000000000000000041171355011147700142320ustar00rootroot00000000000000kind: pipeline name: amd64_arch platform: os: linux arch: amd64 steps: - name: build_and_test image: archlinux/base environment: POCL_BUILDING: 1 POCL_CACHE_DIR: /tmp/cache POCL_MAX_PTHREAD_COUNT: 2 commands: - pacman --noconfirm -Sy - pacman --noconfirm -S gcc patch hwloc cmake git pkg-config make ninja ocl-icd clang llvm llvm-libs clinfo opencl-headers - mkdir build - cd build - cmake -DDEVELOPER_MODE=ON -DCMAKE_INSTALL_PREFIX=/usr -G Ninja .. - ninja - ninja install - clinfo - ctest -j12 --output-on-failure -L internal --- kind: pipeline name: arm64_ub1804 platform: os: linux arch: arm64 steps: - name: build_and_test image: arm64v8/ubuntu:18.04 environment: POCL_BUILDING: 1 POCL_CACHE_DIR: /tmp/cache POCL_MAX_PTHREAD_COUNT: 2 commands: - apt update - apt upgrade -y - apt install -y build-essential ocl-icd-libopencl1 cmake git pkg-config libclang-6.0-dev clang-6.0 llvm-6.0 make ninja-build ocl-icd-libopencl1 ocl-icd-dev ocl-icd-opencl-dev libhwloc-dev zlib1g zlib1g-dev clinfo dialog apt-utils - mkdir build - cd build - cmake -DDEVELOPER_MODE=ON -DLLC_HOST_CPU=thunderx -DCMAKE_INSTALL_PREFIX=/usr -G Ninja .. - ninja - ninja install - clinfo - ctest -j32 --output-on-failure -L internal --- kind: pipeline name: arm32_ub1804 platform: os: linux arch: arm steps: - name: build_and_test image: arm32v7/ubuntu:18.04 environment: POCL_BUILDING: 1 POCL_CACHE_DIR: /tmp/cache POCL_MAX_PTHREAD_COUNT: 2 commands: - apt update - apt upgrade -y - apt install -y build-essential ocl-icd-libopencl1 cmake git pkg-config libclang-6.0-dev clang-6.0 llvm-6.0 make ninja-build ocl-icd-libopencl1 ocl-icd-dev ocl-icd-opencl-dev libhwloc-dev zlib1g zlib1g-dev clinfo dialog apt-utils - mkdir build - cd build - cmake -DENABLE_FP64=OFF -DDEVELOPER_MODE=ON -DLLC_HOST_CPU=cortex-a15 -DLLC_TRIPLE=armv7l-unknown-linux-gnueabihf -DEXTRA_KERNEL_FLAGS="-mfloat-abi=hard -mfpu=neon" -DCMAKE_INSTALL_PREFIX=/usr -G Ninja .. - ninja - ninja install - clinfo - ctest -j32 --output-on-failure -L internal pocl-1.4/.gitattributes000066400000000000000000000033701355011147700152150ustar00rootroot00000000000000doc/benchmark_results/ export-ignore doc/buildbot/ export-ignore doc/luxmark.txt export-ignore doc/handling_loops.txt export-ignore doc/LAUNDRY export-ignore doc/notes*.txt export-ignore doc/spir-todo.txt export-ignore doc/ttasim_kernel_capturer.txt export-ignore doc/www/ export-ignore examples/piglit/sorted_ref* export-ignore # this one is ~20M examples/Rodinia/pathfinder.stdout export-ignore lib/kernel/amdgcn export-ignore lib/kernel/convert_type.py export-ignore lib/kernel/libclc-pocl/gen_vectorize.rb export-ignore lib/kernel/sleef/gen* export-ignore scripts/pocl-build.in export-ignore scripts/pocl-kernel.in export-ignore scripts/pocl-workgroup.in export-ignore tests/kernel/test_convert_type.py export-ignore tests/kernel/test_convert_type.sh export-ignore tests/testsuite* export-ignore tests/amdsdk.at export-ignore tests/atlocal.in export-ignore tools/gdb-breakpoints export-ignore tools/scripts/benchmark_barchart.py export-ignore tools/scripts/benchmark.py export-ignore tools/scripts/devel-configure export-ignore # should we include these ? android/ export-ignore windows/ export-ignore pocl-1.4/.gitignore000066400000000000000000000021101355011147700143010ustar00rootroot00000000000000build*/* *~ **/.deps **/.libs *.bc *.la *.lo *.o Makefile Makefile.in aclocal.m4 autom4te.cache config.h config2.h config.h.in config.log config.status config/ar-lib config/compile config/config.guess config/config.sub config/depcomp config/install-sh config/ltmain.sh config/missing configure doc/sphinx/build/ examples/EinsteinToolkit/EinsteinToolkit examples/example1/example1 examples/example1-spir32/example1-spir32 examples/example1-spir64/example1-spir examples/example2/example2 examples/example2a/example2a examples/scalarwave/scalarwave examples/standalone/standalone.h examples/trig/trig include/arm/types.h include/cellspu/types.h include/powerpc/types.h include/powerpc64/types.h include/x86_64/types.h install-paths.h libtool lib/CL/kernellib_hash.* m4/libtool.m4 m4/ltoptions.m4 m4/ltsugar.m4 m4/ltversion.m4 m4/lt~obsolete.m4 ocl-vendors/pocl-tests.icd pocl.icd pocl.pc bin/poclcc stamp-h1 # these are created by Qt Creator pocl.config pocl.creator pocl.creator.user pocl.files pocl.includes CMakeLists.txt.includes CMakeLists.txt.user /examples/CLBlast/CLBlast/ pocl-1.4/.mailmap000066400000000000000000000046261355011147700137500ustar00rootroot00000000000000Carlos Sánchez de La Lama Carlos Sánchez de La Lama Carlos Sánchez de La Lama Carlos Sanchez de La Lama Clement Leger Clément Daniel Sanders Daniel Sanders META COSY Erik Schnetter Erik Schnetter Erik Schnetter <> Heikki Kultala Heikki Kultala Heikki Kultala heikki-llvm-svn-testing Heikki Kultala hkultala@cs.tut.fi <> Hugo van der Wijst Kalle Raiskila Kalle Raiskila Kalle Raiskila Kalle Kalle Raiskila Kalle Raiskila <> Kalle Raiskila kraiskil@debian <> Kalle Raiskila kraiskil Kalle Raiskila kraiskil Krishnaraj Raghavendra Bhat Krishnaraj Bhat Krishnaraj Raghavendra Bhat Krishnaraj R Bhat Matias Koskela Michal Babej Michal Babej Pekka Jääskeläinen Pekka Jääskeläinen Pekka Jääskeläinen Pekka Jääskeläinen Pekka Jääskeläinen Pekka Jääskeläinen Pekka Jaaskelainen Ville Korhonen Ville Korhonen vkorhonen Vincent Danjean Vladimir Guzma pocl-1.4/.travis.yml000066400000000000000000000041541355011147700144340ustar00rootroot00000000000000sudo: false language: c++ os: - linux matrix: exclude: - os: linux include: - os: linux docker: true compiler: gcc env: LLVM_VERSION=6.0 HWLOC_VERSION=1.11 DOCKERFILE=Ubuntu/16_04.64bit - os: osx compiler: clang env: LLVM_VERSION=6.0 HWLOC_VERSION=2.0 CONDA=True before_install: - if [ "$TRAVIS_OS_NAME" = "osx" ] ; then export MINICONDA_FILE="Miniconda3-latest-MacOSX-x86_64.sh"; else export MINICONDA_FILE="Miniconda3-latest-Linux-x86_64.sh"; fi - if [ "$CONDA" = "True" ] ; then echo "Installing a fresh version of Miniconda."; MINICONDA_URL="https://repo.continuum.io/miniconda"; curl -L -O "${MINICONDA_URL}/${MINICONDA_FILE}"; bash $MINICONDA_FILE -b; source $HOME/miniconda3/bin/activate root; conda config --add channels conda-forge; conda install --yes --quiet llvmdev=${LLVM_VERSION}.* clangdev=${LLVM_VERSION}.* libhwloc=${HWLOC_VERSION}.*; export LD_LIBRARY_PATH=$HOME/miniconda3/lib:$LD_LIBRARY_PATH; fi - if [ "$CONDA" = "True" ] ; then export MY_CMAKE_PREFIX_PATH="-DCMAKE_PREFIX_PATH=$HOME/miniconda3" ; fi - if [ "$TRAVIS_OS_NAME" = "osx" ] ; then export MY_CMAKE_ICD_OFF="-DENABLE_ICD=OFF" ; fi - if [ "$TRAVIS_OS_NAME" = "osx" ] && [ "$CXX" = "clang++" ] ; then MY_CMAKE_LIBCXX="-DCMAKE_CXX_FLAGS=-stdlib=libc++ -DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath,$HOME/miniconda3/lib" ; fi - if [ "$TRAVIS_PULL_REQUEST" = "false" ]; then export GIT_COMMIT="$TRAVIS_COMMIT"; else export GH_PR=$TRAVIS_PULL_REQUEST; fi script: - if [ "$CONDA" = "True" ] ; then mkdir build && cd build; cmake .. -DCMAKE_INSTALL_PREFIX=/tmp $MY_CMAKE_PREFIX_PATH $MY_CMAKE_LIBCXX $MY_CMAKE_ICD_OFF; make -j2 && make check && make install; fi - if [ ! "$DOCKERFILE" = "" ] ; then docker build -f tools/docker/$DOCKERFILE . --build-arg GH_PR=$GH_PR --build-arg GH_SLUG=$TRAVIS_REPO_SLUG --build-arg GH_COMMIT=$GIT_COMMIT --build-arg LLVM_VERSION=$LLVM_VERSION -t travis_ci_pocl_test; docker run `docker images -q travis_ci_pocl_test`; fi notifications: email: false pocl-1.4/CHANGES000066400000000000000000000636771355011147700133350ustar00rootroot000000000000001.4 October 2019 ================ Highlights ---------- - Improved SPIR and SPIR-V support. clCreateProgramWithIL() implemented, Kernel library (for CPU target) support for SPIR-mangling improved - pocl-accel: An example driver and support infrastructure for OpenCL 1.2 CL_DEVICE_TYPE_CUSTOM hardware accelerators which implement a memory mapped control interface. Kernel Compiler --------------- - Specialize work-group functions for global offset (0,0,0). - A pocl installation with clang, hwloc statically linked in is now relocatable. - Clang/LLVM versions older than 6.0 are no longer supported. - Create specialized work-group functions for small (defined by a device driver specific limit) grid dimensions. - Add Range Metadata to various ID queries etc. to improve vectorizing index computation to smaller lane widths and other optimizations. - Passes only the launched kernel to work-group generation and code gen, thus speeding up the compilation process. Misc. ----- - hsa-native: Downgraded the advertised version to 1.2 which is closer to the truth (fixes OCLTest of Glow). - hsa-native: Add support for byval (struct) argument passing. - hsa-native: Allow offsets in block copy. Notable Internal Changes ------------------------ - Allow devices to utilize the ROCm-Device-Libs ocml builtins for their builtin libraries if seen fit. https://github.com/RadeonOpenCompute/ROCm-Device-Libs/tree/master/ocml was mirrored in lib/kernel and made it easy to cherry pick implementations to targets' kernel libary. - libltdl is replaced with libdl on UNIX platforms. Notable Bug Fixes ----------------- - Fix a race condition in device initialization, which caused issues in applications that cause reinitialization of pocl device drivers (appeared in Glow's OCLTest). Device Driver Specific ---------------------- - hsa-native: Downgraded the advertised version to 1.2 which is closer to the truth (fixes OCLTest of Glow). - hsa-native: Add support for byval (struct) argument passing. - hsa-native: Allow offsets in block copy. 1.3 April 2019 ============== Highlights ---------- - Support for Clang/LLVM 8.0. - Support ICD on OSX. Misc. ----- - Ability to have size_t (basically derived from the largest supported object) smaller than CL_ADDRESS_BITS. This is an unofficial optional extension as the OpenCL standard mandates it to be the same. - POCL_EXTRA_BUILD_FLAGS can be used to force add extra build flags such as '-g' to all clBuildProgram() calls. - Allow building pocl without CPU backend drivers. When set to off, CPU will not appear in the list of OpenCL devices reported by pocl. Controllable via ENABLE_HOST_CPU_DEVICES=off cmake option. - Build logs are now produced also for illegal options passed to the kernel build e.g. via the options parameter of clBuildProgram(). - hsa-native: Device side printf-support and alternative < 1.2 non-standard C99 printf exposing support. - pocl's binary format has been slightly updated (changes are listed in the top of pocl_binary.c file) to version 7, but pocl can still read also the previous version 6 format. - Allow local-size-specializing also SPMD-targeted kernels to enable compile time optimization of code depending on the local dimensions. - Support older GLIBC versions. - HSA: Initial experimental support for native-ISA compilation on top of HSA runtime. Tested and works currently only on phsa-runtime. Can be enabled with ENABLE_HSAIL=off cmake option. - Add option to disable installing of OpenCL headers. Notable Bug Fixes ----------------- - Fixed kernel debug symbol generation. - HSA: fix kernel caching. - Fix issue #661: clCreateImage doesn't fail with unsupported image type. - Fix issue #668: handle non-kernel functions with barriers properly. - Fix issue #671: Unable to build pocl with CUDA support with LLVM 7 and host GCC 8.2. - Fix image format/size handling with multiple devices in context. - Fix padding issue with context arrays that manifested as unaligned access errors after autovectorization. Notable Internal Changes ------------------------ - Add group ids as hidden kernel arguments instead of digging them up from the context struct. - Ability to generate the final binary via separate assembly text + assembler call. Useful for supporting LLVM targets without direct binary emission support. - Use Clang's Driver API for launching the final linkage step. This way we utilize the toolchain registry with correct linkage steps required for the target at hand. - Add 'device_aux_functions' to the driver layer attributes. This can be used to retain device-specific functions required by the target across the pruning of unused globals. - The "default kernels" hack which was used to store kernel metadata, has been removed. Kernel metadata are now stored only once, in cl_program struct; every new cl_kernel structs holds only a pointer. - Major 'pthread' CPU driver cleanup. - Major Workgroup.cc cleanup. 1.2 September 2018 ================== - LLVM 7.0 is now supported. - Version 2.0 of hwloc library is supported. - device-side printf; more consistent printf output. 1.1 March 2018 ============== Highlights ---------- - LLVM 6.0 is now supported. - Reintroduced experimental SPIR LLVM bitcode support to pocl. Requires LLVM 5 or newer. New experimental feature: SPIR-V support; requires a working llvm-spirv converter. Currently only loading of SPIR-V binaries by pocl is supported, not output. See docs/features.rst for more details. - Refactored pocl cache now does away with LLVM file locks and relies entirely on system calls for proper synchronization. Additionally, cache file writes are now fdatasync()ed. - Improved kernel compilation time (with cold cache). Improvement depends on sources - it's bigger for large programs with many kernels. Luxmark now compiles in seconds instead of dozens of seconds; internal pocl tests run in 30-50% less time. - LLVM Scalarizer pass is now only called for SPMD devices. Performance change varies across tests, but positive seems to outweigh negative. - Implemented uninitialization callback for device drivers. This is triggered when the last cl_context is released. Currently only the CPU driver implements the callback. - Removed libpoclu from installed files; this library contains helpers for pocl's internal tests, and from installed files was only used by poclcc, which has been updated to not rely on it. - POCL_MAX_WORK_GROUP_SIZE is now respected by all devices. This variable limits the reported maximum WG sizes & dimensions; tuning max WG size may improve performance due to cache locality improvement. - CL_PLATFORM_VERSION now contains much more information about how pocl was built. - For users still building with Vecmathlib, performance should be back to levels of pocl 0.14 (there was a huge drop caused by a change in -O0 optimization level of LLVM 5.0). - Improved support for ARM and ARM64 architectures. All internal tests now pass (on Cortex-A53 and Cortex-A15), although it's still far from full conformance. 1.0 December 2017 ================= Highlights ---------- - Improved automatic local work-group sizing on kernel enqueue, taking into account standard constraints, SIMD width for vectorization as well as the number of compute units available on the device. - Support for NVIDIA GPUs via a new CUDA backend (currently experimental). - Removed support for BBVectorizer. - LLVM 5.0 is now supported. - A few build options have been added for distribution builds, see README.packaging. - Somewhat improved scalability in the CPU driver. CPUs with many cores and programs using a lot of WIs with small kernels can run somewhat faster. - The OpenCL 1.2 conformance tests now pass with selected CPUs. There are some caveats though - see the documentation. - When conformance is enabled, some kernel library functions might be slower than in previous releases. - Pocl now reports OpenCL 1.2 instead of 2.0, except HSA enabled builds. - Updated format of pocl binaries, which is NOT backwards compatible. You'll need to clean any kernel caches. - Fixed several memory leaks. - Unresolved symbols (missing/misspelled functions etc) in a kernel will result in error in clBuildProgram() instead of pocl silently ignoring them and then aborting at dlopen(). - New env variable POCL_MEMORY_LIMIT= limits the Global memory size reported by pocl to gigabytes. - New env variable POCL_AFFINITY (defaults to 0): if enabled, sets the affinity of each CPU driver pthread to a single core. - Improved AVX512 support (with LLVM 5.0). Note that even with LLVM 5.0 there are still a few bugs (see pocl issue #555); AVX512 + LLVM 4.0 are a lot more broken, and probably not worth trying. - POCL_DEBUG env var has been revamped. You can now limit debuginfo to these categories (or their combination): all,error,warning,general memory,llvm,events,cache,locking,refcounts,timing,hsa,tce,cuda The old setting POCL_DEBUG=1 now equals error+warning+general. 0.14 April 2017 =============== Highlights ---------- - Support for LLVM/Clang versions 3.9 and 4.0. Version 3.9 was the first release to include all frontend features for OpenCL 2.0. - Ability to build pocl in a mode where online compilation is not supported to run in hosts without LLVM and binaries compiled offline e.g. using poclcc. - pocl's binary format now can contain all the necessary bits to execute the programs on a host without online compiler support. - Initial support for out-of-order execution execution of command queues. - It's now possible to cross-compile pocl when building an offline compiler build. - New driver api extension to support out-of-order and asynchronous devices/drivers. - Pthread and HSA drivers are now fully asynchronous. - CMake now the only supported build system, autotools removed. - LTTng tracing support OpenCL Runtime/Platform API support ----------------------------------- - implemented clEnqueueBarrierWithWaitList - implemented clEnqueueMigrateMemObjects Other ----- - Support for reqd_work_group_size attribute in the binary format and poclcc: Generates a static sized work-group function to help optimizations such as autovectorization. - HSA: added support for phsa (https://github.com/HSAFoundation/phsa) - A lot of bug and memory leak fixes. Some notable ones: - Issue #1, passing aggregates as kernel value parameters, can be now fixed with an LLVM patch. - Now it's possible to build pocl without using the fake address space ids, which were a source of many annoying issues. 0.13 April 2016 =============== Highlights ----------- - Support for LLVM/Clang 3.8 - initial (partial) OpenCL 2.0 support (only Shared Virtual Memory and Atomics are supported ATM) - CMake build system almost on parity with autotools (TCE, all external testsuites) - CMake build is now able to build multiple kernel libraries for different CPUs and let pocl select a suitable one at runtime Bugfixes --------- - clEnqueueCopyImage() now works properly - improved file locking (much less disk access to kernel cache) - Address spaces of structs are handled properly Other ------ - removed custom buffer alloc from pthread device - removed IBM Cell support - removed support for older LLVM versions (before 3.7) - significantly higher performance with a lot of small kernel enqueues (due to improved file locking) - vecmathlib now supports AVX2 - a few more HSA kernel library implementations: l/tgamma, erf(c), hypot - implemented OpenCL 2.0 API calls: clEnqueueSVM*, clSVMalloc/free, clEnqueueFillBuffer, clSetKernelExecInfo, clSetKernelArgSVMPointer, clCreateCommandQueueWithProperties - no device side queues yet - OpenCL 2.0 atomics (C11 atomics subset) for x86-64 and HSA - new testsuites: AMD SDK 3.0, Intel SVM - New CMake-only testsuites: ASL, clBLAS, clFFT, arrayfire - more debugging info (timing, mem stats) - ansi colors with POCL_DEBUG=1 if the output is a terminal 0.12 October 2015 =============== Highlights ---------- - Support for HSA-compliant devices (kernel agents). The GPU of AMD Kaveri now works through pocl with a bunch of test cases in the AMD SDK 2.9 example suite. - New and improved kernel cache system that enables caching kernels with #includes. - Support for LLVM/Clang 3.7. - Little endian MIPS32 now passes almost all pocl testsuite tests. OpenCL Runtime/Platform API support ----------------------------------- - Transferred buffer read/write/copy offset calculation to device driver side. - these driver api functions have changed; got offset as a new argument. - Maximum allocation is not limited to 1/4th of total memory size. - Maximum image dimensions grow to fit maximum allocation. - clGetDeviceInfo() reports better information about CPU vendor and cache. - experimental clCreateSubDevices() for pthread CPU device. OpenCL C Builtin Function Implementations ----------------------------------------- - Implemented get_image_dim(). Bugfixes -------- - Avoid infinite loops when users recycle an event waiting list. - Correctly report the base address alignment. - Lots of others. Misc ---- - Tests now using new cl2.hpp, removing dependency on OpenGL headers 0.11 March 2015 =============== Highlights ---------- - Support for LLVM/Clang 3.6 - Kernel compiler cache. - Android support. Kernel compiler --------------- - Do not add implicit barriers to kernels without WG barriers to avoid WI context data overheads. - Setting the POCL_VECTORIZER_REMARKS env to 1 prints out LLVM vectorizer remarks during kernel compilation. - Implicit work-group vectorizer improvements. - POCL_VECTORIZER_REMARKS: When set to 1, prints out remarks produced by the loop vectorizer of LLVM during kernel compilation. OpenCL Runtime/Platform API support ----------------------------------- - Minimal initial implementation for clCreateSubDevices() Bugfixes -------- - Fix falsely detecting operations with side-effects (especially atomic operations) as uniform. This caused deadlock/race situations due to illegal implicit barrier injection. - Fix several reference counting issues. - Memory leak fixes. - ARM/openSUSE build fixes. - Plenty of CMake fixes. New test/example cases ---------------------- - Several Halide examples using its OpenCL backend added. - CloverLeaf Misc. ----- - The old BBVectorizer forked WIVectorizer removed due to bit rot and the general hackiness of it. - Experimental Windows/Visual Studio support (in progress). - Initial support for MIPS architecture (with known issues). - Runtime debug printouts that can be enabled via POCL_DEBUG=1. - Streamlined the buffer allocation and fixed several issues with it. 0.10 September 2014 =================== This lists only the most interesting changes. Please refer to the version control log for a full listing. Highlights ---------- - Support for LLVM/Clang 3.5 - Support for building using CMake (experimental with known issues). Bugfixes -------- - TCE: kernel building was broken when running pocl from install location - thread-safety (as required since OpenCL 1.1) improved Kernel compiler --------------- - Final code generation now done via LLVM API calls instead of calling the llc binary. - Sensible linking of functions from the monolithic kernel built-in library. Major compilation speedup for smaller kernels. OpenCL C Builtin Function Implementations ----------------------------------------- - Improved support for halfN functions. - ilogb and ldexp available with vecmathlib OpenCL Runtime/Platform API support ----------------------------------- - Implement clCreateKernelsInProgram() - OpenCL-C shuffle() and shuffle2() implementation added - Device probing modified to allow for device driver to detect device during runtime. POCL_DEVICES still supported. - Checks in clSetKernelArgs() for argument validity - Checks in clEnqueueNDRange() for arguments to be all set - Implement clGetKernelArgInfo() - clEnqueueCopyImage() Misc ---- - ViennaCL testsuite updated to 1.5.1 0.9 January 2014 ================ This lists only the most interesting changes. Please refer to the version control log for a full listing. Highlights ---------- - Major improvements to the kernel compiler's vectorization performance. Twofold speedups in some benchmarks - Support for most of the piglit CL tests OpenCL Runtime/Platform API support ----------------------------------- - clCreateImage2D() and clCreateImage3D() implementation moved to clCreateImage() - Image creation now uses clCreateBuffer() - clBuildProgram: Propagate the supported -cl* compiler options to Clang's OpenCL frontend. - clFinish: works with commands with event wait lists. - Preliminary support for OpenCL 2.0 blocks - Added support for clEnqueueNativeKernel() Builtin Function Implementations (OpenCL 1.2 Section 6.12) ---------------------------------------------------------- - Refactored read/write_image()-functions to support refactored device image object. (Only functions used by SimpleImage test) - Introduced new macro based implementation for read/write_image()-functions - Added sampler implementation for CLK_ADDRESS_CLAMP and CLK_ADDRESS_CLAMP_TO_EDGE (Only integer coords supported) - Most of the printf() format strings now works. Missing features: - long on 32-bit architectures Performance Improvements ------------------------ - Kernel compiler now tries to avoid replicating uniform variables, this leads to less context data to be saved per work-item and cleaner kernel bitcode for later optimizations - Use a precompiled header for OpenCL C builtin declarations to speed up the kernel compilation - Kernel compiler vectorization optimizations: - Inject implicit barriers both to loop starts and ends to horizontally vectorize the inner loop. - Reduce "peeling" by minimizing the conditional barrier region by injecting implicit barrier close to the branch points for conditional barrier cases. - Breaking of vector datatypes for more efficient loop vectorization. - Support LLVM 3.4 parallel loop metadata. Misc ---- - Explicitly specify the target architecture/CPU for the kernel complier. - Kernel compiler frontend defaults to implementation using LLVM API directly instead of the scripts. - __OPENCL_VERSION__ defined to 120 - poclu: helpers for converting between the C float and OpenCL cl_half types - clEnqueueNativeKernel implemented - Static and cmake-builds of LLVM can now be used. Bugfixes -------- - Correct isequal, isnan, and similar routines 0.8 August 2013 ================ This lists only the most interesting changes. Please refer to the version control log for a full listing. Overall ------- - Added support for LLVM/Clang 3.3. - Dropped support for LLVM/Clang v3.1. - Removed the depedency on llvm-ld (which was copied to pocl-llvm-ld to pocl tree). Now uses llvm-link instead. - Project renamed to Portable Computing Language (pocl). - Luxmark v2.0 now works. - x86_64 can now use efficient math built-in function implementations from the vecmathlib project to avoid libm calls and to exploit the SIMD instructions more efficiently in case of vector datatypes in the kernel. - Parallelize kernel inner loops "horizontally", if possible. This converts possibly sequential inner kernel loops to parallel loops by effectively performing "loop interchange" of the work-item loop and the kernel's inner loop. - Added VexCL tests to the test suite. All but one of them work with pocl. Major bugfixes -------------- - Fixed passing NULL as a buffer argument to clSetKernelArg (this time with a regression test added). - Constant BitCast expressions broken to variables to avoid crashing when copying a kernel with casts on automatic local pointers. - Fixes for i386/i686. Tested on Pentium4/Ubuntu 10.04 LTS. - Lots of API error checking added (found by the Piglit testing suite). - Fixed bug in select producing incorrect results when the third conditional argument is an unsigned scalar or vector. - Replaced deprecated SSE 4.1 assembly mneunomics in x86-64 min/max kernel functions that have since been removed in more recent versions of gas and llvm-as. - SPIR/LLVM IR 'byval' attributes are now handled correctly on kernel function arguments, allowing for structs and oversized vectors to be passed in with value semantics. - Fixed to work with the latest Khronos OpenCL headers for 1.2. Some issues fixed with the new cl.hpp. - The ICD dispatch table was too small which might have caused "interesting" behavior when calling the later functions in the table and not using ocl-icd as the dispatcher. - Several kernel compiler bugs fixed. - A multithreaded host application could free the same object multiple times due to a race issue. Platform Layer implementations (OpenCL 1.2 Chapter 4) ----------------------------------------------------- - Return correctly formatted CL_DEVICE_VERSION and CL_DEVICE_OPENCL_C_VERSION. - clGetDeviceInfo: Use the 'cpufreq' sys interface of Linux for querying the CPU clock frequency, if available. The OpenCL Runtime (OpenCL 1.2 Chapter 5) ----------------------------------------- - clGetEventInfo: Querying the command type, command queue, and the reference count of the event. Builtin Function Implementations (OpenCL 1.2 Section 6.12) ---------------------------------------------------------- - convert_type* builtins now generated with a Python script by Victor Oliveira. - length() fingerprint was assuming two arguments instead of one. - The kernel bitcode library is now optimized when built in pocl. Speeds up kernel optimization for cases which use the kernel functions a lot. - Fix mul_hi() implementation ICD --- - Fixed pocl tests to work when executed through the Khronos supplied icd loader (needs a patch applied to the loader be able to override the .icd search path). Misc. ----- - Fix to the helper script search logic: Search from the BUILDDIR only if env POCL_BUILDING is defined. Otherwise search from PKGDATADIR first, then from the PATH. - Fixed memory leaks in clCreateContext* and clCreateKernel - Ensured that stored arguments are adequately aligned in clSetKernelArg and clEnqueueNDRangeKernel. 0.7 January 2013 ================= This lists only the most interesting changes. Please refer to the version control log for a full listing. Overall ------- - Support for LLVM 3.2. - Multi-WI work group functions can be now generated using loops which are only partially unrolled. Reduces code size explosion with large WGs in comparison to the full replication method. - PowerPC 64 support (tested on Cell/Debian Sid/PS3). - PowerPC 32 support (tested on Cell/Debian Sid/PS3). - ARM v7 support (on Linux) - Beginning of Cell SPU support (very experimental!). - Most of the AMD APP SDK OpenCL examples now work and have been added to the pocl test suite. - Most of the Parboil benchmark cases added to the test suite. Kernel Compiler Passes ---------------------- - Several miscompilations and compiler crashes fixed. - Multiple bugs fixed from the work group vectorizer. - Updated metadata format pocl uses to pass information to vectorization and TCE backend to simplify debuging. - Kernel pointer arguments are not always marked 'noalias' (restricted). Doing this previously was a specs misunderstanding. - ConstantGEPs to static variables generated from automated locals caused problems. Now converting them to normal GEPs using a pass from the SAFECode project. OpenCL Platform Layer implementations (OpenCL 1.2 Chapter 4) ------------------------------------------------------- - clGetDeviceInfo now uses the hwloc lib for device property queries. Many new queries implemented. - clGetKernelInfo (initial implementation) - clGetMemObjectInfo (initial implementation) - clGetCommandQueueInfo (initial implementation) - clReleaseDevice - clRetainDevice - Proper freeing of devices in clReleaseContext The OpenCL Runtime Implementations (OpenCL 1.2 Chapter 5) --------------------------------------------------------- - clBuildProgram: support for passing options to the compiler. - clEnqueueMarker OpenCL C Builtin Function Implementations (OpenCL 1.2 Section 6.12) ------------------------------------------------------------------- - Atomic Functions (6.12.11) - get_global_offset() was not linked correctly Framework --------- - Made it possible to override the .cl -> .bc build command called by clBuildProgram per device. Device Drivers -------------- - pthread/basic: * extract CPU clock frequency from /proc/cpuinfo, if available * return cl_khr_fp64 if doubles supported by the CPU - ttasim: support for explicitly calling custom/special operations through the vendor extensions API Misc. ----- - Fixes for MacOSX builds. - Fixed passing NULL as a buffer argument to clSetKernelArguments - Fixed a major bug when launching the same kernel multiple times: the arguments very not copied to the command object. - Fixed several issues with ICD, it is now considered stable to be used by default. 0.6 August 2012 ================= Kernel library -------------- - Added initial optimized kernel library for X86_64/SSE. - Preliminary support for ARM architectures on Linux (briefly tested on MeeGo/Nokia N9). Pthread device driver --------------------- - Multithreading at the work group granularity using pthreads. - Tries to figure out the optimal maximum number of threads for the system based on the available hardware threads. Currently works only in Linux using the /proc/cpuinfo interface. - Region-based customized memory allocator for speeding up buffer allocations. Kernel compiler --------------- - Most of the tricky work group barrier cases (barriers inside for-loops etc) now supported. - Support for local variables, also automatic locals. - Reuse previous compilation results, if available. - Automatic vectorization of work groups (multiple work items in parallel). Miscellaneous ------------- - Installable Client Driver (icd) support. - Event profiling support (incomplete, works only for kernel and buffer read/write/map/unmap events). Known issues ------------ - Non-pointer struct kernel arguments fail due to varying ABIs * https://bugs.launchpad.net/pocl/+bug/987905 - Produces always "fully unrolled" chains of work items for work groups causing code size explosion for large WGs. pocl-1.4/CMakeLists.txt000066400000000000000000001631061355011147700150660ustar00rootroot00000000000000#============================================================================= # CMake build system files # # Copyright (c) 2014-2018 pocl developers # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # #============================================================================= cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR) project(pocl) # Fix behavior of CMAKE_CXX_STANDARD when targeting macOS. if(POLICY CMP0025) cmake_policy(SET CMP0025 NEW) endif() include(CheckCCompilerFlag) # don't allow implicit function declarations if(UNIX) if((CMAKE_C_COMPILER_ID STREQUAL "GNU") OR (CMAKE_C_COMPILER_ID STREQUAL "Clang")) check_c_compiler_flag("-Wincompatible-pointer-types" HAVE_WARN_INCOMPATIBLE_POINTER_TYPES) set(FORBIT_IMPLICIT_FUNCTIONS "-Werror=implicit-function-declaration") if (HAVE_WARN_INCOMPATIBLE_POINTER_TYPES) set(FORBIT_IMPLICIT_FUNCTIONS ${FORBIT_IMPLICIT_FUNCTIONS} "-Werror=incompatible-pointer-types") endif() add_compile_options(${FORBIT_IMPLICIT_FUNCTIONS}) else() message(WARNING "Don't know how to forbid this compiler from allowing implicit function declarations.") endif() endif() set(MAJOR_VERSION 1) set(MINOR_VERSION 4) set(VERSION_SUFFIX "") set(VERSION_STRING ${MAJOR_VERSION}.${MINOR_VERSION}${VERSION_SUFFIX}) set(POCL_VERSION ${VERSION_STRING}) # required b/c SHARED libs defaults to ON while OBJECT defaults to OFF set(CMAKE_POSITION_INDEPENDENT_CODE ON) enable_testing() ##################################################### if(EXISTS "${CMAKE_SOURCE_DIR}/.git") set(DEFAULT_BUILD_TYPE "Debug") else() set(DEFAULT_BUILD_TYPE "RelWithDebInfo") endif() if(NOT CMAKE_BUILD_TYPE) message(STATUS "Setting build type to '${DEFAULT_BUILD_TYPE}' as none was specified.") set(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}" CACHE STRING "Choose the type of build." FORCE) # Set the possible values of build type for cmake-gui set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") endif() ################################################################################## macro(set_expr VAR) if(${ARGN}) set(${VAR} 1) else() set(${VAR} 0) endif() endmacro() find_program(BASH "bash") find_program(MAKE_PROGRAM NAMES "make") find_program(GIT_CMD "git") set_expr(HAVE_GIT GIT_CMD) if(HAVE_GIT AND (VERSION_SUFFIX MATCHES "pre")) execute_process(COMMAND "${GIT_CMD}" "rev-parse" "HEAD" OUTPUT_VARIABLE GIT_COMMIT RESULT_VARIABLE EXITCODE WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" OUTPUT_STRIP_TRAILING_WHITESPACE) message(STATUS "Pocl source Git commit: ${GIT_COMMIT}") execute_process(COMMAND "${GIT_CMD}" "branch" "--contains" "${GIT_COMMIT}" OUTPUT_VARIABLE GIT_BRANCH RESULT_VARIABLE EXITCODE WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" OUTPUT_STRIP_TRAILING_WHITESPACE) message(STATUS "Pocl source Git branch: ${GIT_BRANCH}") execute_process(COMMAND "${GIT_CMD}" describe "--always" "--long" "--all" "${GIT_COMMIT}" OUTPUT_VARIABLE GIT_DESCRIBE RESULT_VARIABLE EXITCODE WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" OUTPUT_STRIP_TRAILING_WHITESPACE) string(REPLACE "heads/" "" GIT_DESCRIBE "${GIT_DESCRIBE}") message(STATUS "Pocl source Git describe: ${GIT_DESCRIBE}") set(VERSION_SUFFIX "-pre/${GIT_DESCRIBE}") set(VERSION_STRING ${MAJOR_VERSION}.${MINOR_VERSION}${VERSION_SUFFIX}) set(POCL_VERSION ${VERSION_STRING}) else() message(STATUS "No git and/or not a prerelease -> not adding git commit to version.") endif() ################################################################################## option(OCS_AVAILABLE "Online compiler support available to build pocl with. Default is available." ON) option(BUILD_SHARED_LIBS "ON=Build shared libs, OFF=static libs" ON) option(POCL_DEBUG_MESSAGES "Enable debug messages from pocl (useful for OpenCL developers), must be enabled at runtime, with env var POCL_DEBUG" ON) option(ENABLE_HSA "Enable the HSA base profile runtime device driver" OFF) option(ENABLE_CUDA "Enable the CUDA device driver for NVIDIA devices" OFF) option(KERNEL_CACHE_DEFAULT "Default value for the kernel compile cache. If disabled, pocl will still use the kernel cache, but will delete cachefiles on exit. You can still enable keeping the files it at runtime with an env var." ON) option(POCL_ICD_ABSOLUTE_PATH "Use absolute path in pocl.icd" ON) option(ENABLE_POCL_BUILDING "When OFF, env var POCL_BUILDING has no effect. Defaults to ON" ON) #### these are mostly useful for pocl developers option(DEVELOPER_MODE "This will SIGNIFICANTLY slow down pocl (but speed up its compilation). Only turn on if you know what you're doing." OFF) option(USE_POCL_MEMMANAGER "Enables custom memory manager. Except for special circumstances, this should be disabled." OFF) option(EXAMPLES_USE_GIT_MASTER "If enabled, some of the external testsuites in examples/ will try to use sources from Git master, instead of releases. This may result in failure to build or run the examples" OFF) option(ENABLE_HOST_CPU_DEVICES "Add host CPUs as OpenCL devices (basic and pthread)." ON) option(ENABLE_ACCEL_DEVICE "Enable the generic hardware accelerator device driver." OFF) #### # currently only works with gcc as host compiler if (CMAKE_C_COMPILER_ID STREQUAL "GNU") if (CMAKE_C_COMPILER_VERSION VERSION_GREATER "4.7.99") option(ENABLE_ASAN "Enable AddressSanitizer" OFF) option(ENABLE_TSAN "Enable ThreadSanitizer" OFF) else() set(ENABLE_ASAN OFF CACHE INTERNAL "Enable AddressSanitizer") set(ENABLE_TSAN OFF CACHE INTERNAL "Enable ThreadSanitizer") endif() if (CMAKE_C_COMPILER_VERSION VERSION_GREATER "4.8.99") option(ENABLE_UBSAN "Enable UBSanitizer" OFF) else() set(ENABLE_UBSAN OFF CACHE INTERNAL "Enable UBSanitizer") endif() if (CMAKE_C_COMPILER_VERSION VERSION_GREATER "5.0.99") option(ENABLE_LSAN "Enable LeakSanitizer" OFF) else() set(ENABLE_LSAN OFF CACHE INTERNAL "Enable LeakSanitizer") endif() else() set(ENABLE_ASAN OFF CACHE INTERNAL "Enable AddressSanitizer") set(ENABLE_TSAN OFF CACHE INTERNAL "Enable ThreadSanitizer") endif() # Unfortunately the way CMake tests work, if they're given # a pass/fail expression, they don't check for exit status. # This was causing some false negatives with ASan (test was # returning with 1, but CMake reported it as pass because # the pass expression was present in output). if(ENABLE_ASAN OR ENABLE_TSAN OR ENABLE_UBSAN OR ENABLE_LSAN) set(ENABLE_ANYSAN 1) endif() ########################################################## if(CMAKE_SIZEOF_VOID_P EQUAL 8) set(HOST_DEVICE_ADDRESS_BITS 64) elseif(CMAKE_SIZEOF_VOID_P EQUAL 4) set(HOST_DEVICE_ADDRESS_BITS 32) else() message(FATAL_ERROR "Cannot figure out HOST_DEVICE_ADDRESS_BITS") endif() ################################################################################## if(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc") set(POWERPC 1) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "mips") set(MIPS 1) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "(arm|aarch64)") set(ARM 1) if(HOST_DEVICE_ADDRESS_BITS MATCHES "32") set(ARM32 1) else() set(ARM64 1) endif() elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "(i.86|AMD64|x86_64|amd64)") set(X86 1) if(HOST_DEVICE_ADDRESS_BITS MATCHES "32") set(I386 1) else() set(X86_64 1) endif() endif() if(CMAKE_MAJOR_VERSION GREATER 2) include(ProcessorCount) ProcessorCount(CORECOUNT) if(CORECOUNT LESS 1) set(CORECOUNT 1) endif() else() set(CORECOUNT 1) endif() message(STATUS "Host CPU cores: ${CORECOUNT}") ###################################################################################### function(rename_if_different SRC DST) if(EXISTS "${DST}") file(MD5 "${SRC}" OLD_MD5) file(MD5 "${DST}" NEW_MD5) if(NOT OLD_MD5 STREQUAL NEW_MD5) file(RENAME "${SRC}" "${DST}") endif() else() file(RENAME "${SRC}" "${DST}") endif() endfunction() ###################################################################################### # Recent versions of CMake can make use of Ninja's console pool to avoid # buffering the output of particular commands. if(CMAKE_VERSION VERSION_LESS 3.2.0) set(COMMAND_USES_TERMINAL) else() set(COMMAND_USES_TERMINAL USES_TERMINAL) endif() if(UNIX) include(GNUInstallDirs) else() if (WIN32) set(${CMAKE_INSTALL_LIBDIR} "lib") set(${CMAKE_INSTALL_DATADIR} "share") set(${CMAKE_INSTALL_INCLUDEDIR} "include") set(${CMAKE_INSTALL_BINDIR} "bin") message(STATUS "Setting installation destination on Windows to: ${CMAKE_INSTALL_PREFIX}") else() message(FATAL_ERROR "System not UNIX nor WIN32 - not implemented yet") endif() endif() # for libpocl.so set(POCL_INSTALL_PUBLIC_LIBDIR "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}" CACHE PATH "POCL public libdir") # for llvmopencl.so set(POCL_INSTALL_PRIVATE_LIBDIR "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/pocl" CACHE PATH "POCL private libdir") # for pocl.icd if(UNIX AND (NOT CMAKE_CROSSCOMPILING) AND (CMAKE_INSTALL_PREFIX STREQUAL "/usr")) set(POCL_INSTALL_ICD_VENDORDIR "/etc/OpenCL/vendors" CACHE PATH "POCL ICD file destination") else() set(POCL_INSTALL_ICD_VENDORDIR "${CMAKE_INSTALL_PREFIX}/etc/OpenCL/vendors" CACHE PATH "POCL ICD file destination") endif() # for kernel-.bc set(POCL_INSTALL_PRIVATE_DATADIR "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_DATADIR}/pocl" CACHE PATH "POCL private datadir") # for poclu.h set(POCL_INSTALL_PUBLIC_HEADER_DIR "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}" CACHE PATH "POCL public header dir") # for _kernel.h et al set(POCL_INSTALL_PRIVATE_HEADER_DIR "${POCL_INSTALL_PRIVATE_DATADIR}/include" CACHE PATH "POCL private header dir") # for pocl-standalone et al set(POCL_INSTALL_PUBLIC_BINDIR "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}" CACHE PATH "POCL public bindir") # for PoclConfig.cmake & stuff set(POCL_INSTALL_CMAKE_CONFIG_DIR "${POCL_INSTALL_PRIVATE_LIBDIR}" CACHE PATH "Installation directory for CMake files") # TODO maybe use output of pkg-config --variable=pc_path pkg-config ? set(POCL_INSTALL_PKGCONFIG_DIR "${POCL_INSTALL_PUBLIC_LIBDIR}/pkgconfig" CACHE PATH "Destination for pocl.pc") if(APPLE) set(CMAKE_MACOSX_RPATH ON) set(POCL_INSTALL_OPENCL_HEADER_DIR "${POCL_INSTALL_PUBLIC_HEADER_DIR}/OpenCL" CACHE PATH "POCL header dir for OpenCL headers") else() set(POCL_INSTALL_OPENCL_HEADER_DIR "${POCL_INSTALL_PUBLIC_HEADER_DIR}/CL" CACHE PATH "POCL header dir for OpenCL headers") endif() ###################################################################################### ###################################################################################### set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") if(UNIX AND (NOT CMAKE_CROSSCOMPILING)) find_package(PkgConfig MODULE REQUIRED) find_package(Hwloc) if(NOT Hwloc_FOUND) message(WARNING "hwloc package not found") else() if("${Hwloc_VERSION}" VERSION_LESS "1.0") message(FATAL_ERROR "Hwloc version must be >= 1.0 !") endif() message(STATUS "Hwloc_VERSION ${Hwloc_VERSION}") message(STATUS "Hwloc_LDFLAGS ${Hwloc_LDFLAGS}") message(STATUS "Hwloc_CFLAGS ${Hwloc_CFLAGS}") set(ENABLE_HWLOC ON CACHE BOOL "Hwloc" FORCE) endif() else() message(STATUS "Not on UNIX or cross-compiling -> building without HWLOC") set(ENABLE_HWLOC OFF CACHE BOOL "Hwloc" FORCE) endif() ###################################################################################### if(NOT HOST_CPU_CACHELINE_SIZE) set(CL_SIZE 0) if(UNIX) find_program(GETCONF "getconf") if(GETCONF) execute_process(COMMAND "getconf" "LEVEL1_DCACHE_LINESIZE" RESULT_VARIABLE RES OUTPUT_VARIABLE CL_SIZE) if(RES) message(WARNING "getconf exited with nonzero status!") set(CL_SIZE 0) else() # getconf sometimes just returns zero if(NOT (CL_SIZE EQUAL 0)) string(STRIP "${CL_SIZE}" CL_SIZE) message(STATUS "L1D Cacheline size detected: ${CL_SIZE}") set(HOST_CPU_CACHELINE_SIZE "${CL_SIZE}" CACHE STRING "L1D Cacheline size") endif() endif() endif() endif() if(CL_SIZE EQUAL 0) message(WARNING "Unable to detect cacheline size - assuming 64byte cacheline, override with -DHOST_CPU_CACHELINE_SIZE= (Note: this is merely used for optimization, at worst pocl will be slightly slower)") set(HOST_CPU_CACHELINE_SIZE "64" CACHE STRING "L1D Cacheline size") endif() endif() ###################################################################################### # # Find executables to few tools required during build # find_program(PATCH_EXEC NAMES patch${CMAKE_EXECUTABLE_SUFFIX} HINTS ENV PATH ) find_program(XARGS_EXEC NAMES xargs${CMAKE_EXECUTABLE_SUFFIX} HINTS ENV PATH ) if(NOT PATCH_EXEC) message(FATAL_ERROR "Could not find patch command.") endif() if(NOT XARGS_EXEC) message(FATAL_ERROR "Could not find xargs command.") endif() ###################################################################################### if (OCS_AVAILABLE) include(LLVM RESULT_VARIABLE RES) if(NOT RES) message(FATAL_ERROR "Could not load LLVM.cmake") endif() if(ENABLE_HOST_CPU_DEVICES) if(NOT DEFINED HOST_DEVICE_BUILD_HASH) if(KERNELLIB_HOST_CPU_VARIANTS STREQUAL "distro") set(HOST_DEVICE_BUILD_HASH "${LLC_TRIPLE}") else() set(HOST_DEVICE_BUILD_HASH "${LLC_TRIPLE}-${LLC_HOST_CPU}") endif() endif() if(INTEL_SDE_AVX512) set(HOST_CPU_FORCED 1 CACHE INTERNAL "CPU is forced by user" FORCE) set(LLC_HOST_CPU "skylake-avx512" CACHE STRING "The Host CPU to use with llc" FORCE) endif() endif() else() if(ENABLE_HOST_CPU_DEVICES AND (NOT DEFINED HOST_DEVICE_BUILD_HASH)) message(FATAL_ERROR "For compiler-less builds of CPU backend, you must define HOST_DEVICE_BUILD_HASH") endif() endif() ###################################################################################### if(ENABLE_HSA) include(HSA RESULT_VARIABLE RES) if(NOT RES) message(FATAL_ERROR "Could not load HSA.cmake") endif() endif() ###################################################################################### if (NOT MSVC) find_program(LINK_COMMAND NAMES ld${CMAKE_EXECUTABLE_SUFFIX} HINTS ENV PATH ) else() set(LINK_COMMAND "${CLANGXX}") endif() ###################################################################################### # if variable FEATURE_X isn't defined, sets it to DEFAULT_FEATURE_X; # also, if DEFAULT_FEATURE_X is 0, prevents FEATURE_X being 1 # since it takes DEFAULT_FEATURE_X=0 to mean "FEATURE_X is unavailable" macro(setup_cached_var VARNAME DESCRIPTION DOCS_FEATURE_IS_UNAVAILABLE DOCS_REQUESTED_DISABLING_FEATURE) if(DEFINED ${VARNAME}) set(_CACHED "(cached)") else() set(_CACHED "") set(${VARNAME} ${DEFAULT_${VARNAME}}) endif() if(${VARNAME} AND (NOT ${DEFAULT_${VARNAME}})) message(WARNING "${DOCS_FEATURE_IS_UNAVAILABLE}") set(${VARNAME} 0) set(_CACHED "(override)") endif() if((NOT ${VARNAME}) AND ${DEFAULT_${VARNAME}} ) message(STATUS "${DOCS_REQUESTED_DISABLING_FEATURE}") endif() if(${VARNAME}) message(STATUS "${DESCRIPTION} ${_CACHED}: 1") else() message(STATUS "${DESCRIPTION} ${_CACHED}: 0") endif() endmacro() ###################################################################################### if(UNIX) include(CheckCSourceCompiles) include(CheckSymbolExists) # don't allow implicit function declarations set(CMAKE_REQUIRED_FLAGS "-std=c99 ${FORBIT_IMPLICIT_FUNCTIONS}") if (CMAKE_SYSTEM_NAME MATCHES "Linux") set(CMAKE_REQUIRED_LIBRARIES "rt") endif () CHECK_SYMBOL_EXISTS("fork" "sys/types.h;unistd.h" HAVE_FORK) CHECK_SYMBOL_EXISTS("fsync" "unistd.h" HAVE_FSYNC) CHECK_SYMBOL_EXISTS("sleep" "unistd.h" HAVE_SLEEP) CHECK_SYMBOL_EXISTS("getrlimit" "sys/time.h;sys/resource.h" HAVE_GETRLIMIT) CHECK_SYMBOL_EXISTS("utime" "sys/types.h;utime.h" HAVE_UTIME) set(CMAKE_REQUIRED_DEFINITIONS "-D_POSIX_C_SOURCE=200809L") CHECK_SYMBOL_EXISTS("futimens" "fcntl.h;sys/stat.h" HAVE_FUTIMENS) set(CMAKE_REQUIRED_DEFINITIONS "-D_POSIX_C_SOURCE=200112L") CHECK_SYMBOL_EXISTS("posix_memalign" "stdlib.h" HAVE_POSIX_MEMALIGN) set(CMAKE_REQUIRED_DEFINITIONS "-D_POSIX_C_SOURCE=199309L") CHECK_SYMBOL_EXISTS("clock_gettime" "time.h" HAVE_CLOCK_GETTIME) CHECK_SYMBOL_EXISTS("fdatasync" "unistd.h" HAVE_FDATASYNC) set(CMAKE_REQUIRED_DEFINITIONS "-D_BSD_SOURCE" "-D_DEFAULT_SOURCE") CHECK_SYMBOL_EXISTS("mkdtemp" "stdlib.h;unistd.h" HAVE_MKDTEMP) CHECK_SYMBOL_EXISTS("mkstemps" "stdlib.h;unistd.h" HAVE_MKSTEMPS) CHECK_SYMBOL_EXISTS("vfork" "sys/types.h;unistd.h" HAVE_VFORK) set(CMAKE_REQUIRED_DEFINITIONS "-D_GNU_SOURCE") CHECK_SYMBOL_EXISTS("mkostemps" "stdlib.h" HAVE_MKOSTEMPS) set(CMAKE_REQUIRED_LIBRARIES "dl") CHECK_SYMBOL_EXISTS("dladdr" "dlfcn.h" HAVE_DLADDR) unset(CMAKE_REQUIRED_DEFINITIONS) unset(CMAKE_REQUIRED_FLAGS) unset(CMAKE_REQUIRED_LIBRARIES) else() set(HAVE_CLOCK_GETTIME 0) set(HAVE_FDATASYNC 0) set(HAVE_FSYNC 0) set(HAVE_SLEEP 0) set(HAVE_MKOSTEMPS 0) set(HAVE_MKSTEMPS 0) set(HAVE_MKDTEMP 0) set(HAVE_FUTIMENS 0) set(HAVE_FORK 0) set(HAVE_GETRLIMIT 0) set(HAVE_VFORK 0) set(HAVE_UTIME 0) set(HAVE_DLADDR 0) endif() ###################################################################################### if(UNIX AND OCS_AVAILABLE AND HAVE_DLADDR) option(ENABLE_RELOCATION "make libpocl relocatable" ON) else() message(STATUS "Relocation not available") set(ENABLE_RELOCATION OFF CACHE INTERNAL "libpocl relocatable" FORCE) endif() if(ENABLE_RELOCATION) file(RELATIVE_PATH POCL_INSTALL_PRIVATE_DATADIR_REL ${POCL_INSTALL_PUBLIC_LIBDIR} ${POCL_INSTALL_PRIVATE_DATADIR}) message(STATUS "Private Datadir Relative path: ${POCL_INSTALL_PRIVATE_DATADIR_REL}") install(FILES ${CLANG_OPENCL_HEADERS} DESTINATION "${POCL_INSTALL_PRIVATE_DATADIR}/include") endif() ###################################################################################### option(ENABLE_SLEEF "Use SLEEF for kernel library" ON) option(ENABLE_CONFORMANCE "Enable conformance to OpenCL standard. Disabling this may enable slightly faster kernel library functions (at a price of range/precision). Note that enabling this does not guarantee conformance (depends on hardware)" ON) if(ENABLE_CONFORMANCE AND (NOT ENABLE_SLEEF)) message(FATAL_ERROR "conformance needs enabled SLEEF") endif() ###################################################################################### # fully device-side printf on devices which support it (only CPU backend ATM), disabled by default. # this requires 128bit integer support because of the code in "errol" float-to-string conversion routine # the output is not 100% compatible with glibc's printf (%f with large argument prints zeroes after # last significant digit - 16-18th digit or so, unlike glibc which prints digits up to decimal point). if(CLANG_HAS_128B_MATH) option(ENABLE_POCL_FLOAT_CONVERSION "Enable use of pocl's own float-to-decimal conversion code in OpenCL printf(). Defaults to OFF (uses snprintf from C library). Requires compiler-rt." OFF) else() set(ENABLE_POCL_FLOAT_CONVERSION OFF CACHE INTERNAL "pocl's own float-to-decimal conversion code") endif() unset(FLOATCONV_FLAG) if(ENABLE_POCL_FLOAT_CONVERSION) # force link with Clang; otherwise not needed on x86 but in this case we need rtlib set(FLOATCONV_FLAG "-DENABLE_POCL_FLOAT_CONVERSION") endif() ###################################################################################### # for kernel code, disable PIC & stack protector # # it seems PIC and stack-protector defaults somehow depend on # clang build type or environment. PIC causes problems with # constant addrspace variables, and stack protector likely slows # down the kernels, so it needs to be determined whether it's worth # the trouble. set(DEFAULT_KERNEL_CL_FLAGS "-xcl -fno-stack-protector -fPIC ${FLOATCONV_FLAG}") set(DEFAULT_KERNEL_C_FLAGS "-xc -std=c11 -D__CBUILD__ -fno-math-errno -fno-stack-protector -fPIC ${FLOATCONV_FLAG}") set(DEFAULT_KERNEL_CXX_FLAGS "-xc++ -std=c++11 -fno-stack-protector -fPIC ${FLOATCONV_FLAG}") set(EXTRA_KERNEL_FLAGS "" CACHE STRING "Extra arguments to all kernel compilation commands (defaults to empty)") set(EXTRA_KERNEL_CL_FLAGS "" CACHE STRING "Extra arguments to kernel CL compiler (defaults to empty)") set(EXTRA_KERNEL_CXX_FLAGS "" CACHE STRING "Extra arguments to kernel CXX compiler (defaults to empty)") set(EXTRA_KERNEL_C_FLAGS "" CACHE STRING "Extra arguments to kernel C compiler (defaults to empty)") set(KERNEL_CXX_FLAGS "${DEFAULT_KERNEL_CXX_FLAGS}${EXTRA_KERNEL_FLAGS}${EXTRA_KERNEL_CXX_FLAGS}") set(KERNEL_CL_FLAGS "${DEFAULT_KERNEL_CL_FLAGS}${EXTRA_KERNEL_FLAGS}${EXTRA_KERNEL_CL_FLAGS}") set(KERNEL_C_FLAGS "${DEFAULT_KERNEL_C_FLAGS}${EXTRA_KERNEL_FLAGS}${EXTRA_KERNEL_C_FLAGS}") ###################################################################################### if(UNIX) if(APPLE) # MacOS ld outputs useless warnings like # ld: warning: -macosx_version_min not specificed, assuming 10.7 # suppress them with -w. set(DEFAULT_HOST_LD_FLAGS "-dynamiclib -w -lm") elseif(ANDROID) set(DEFAULT_HOST_LD_FLAGS "-L/system/lib/ -shared -ldl -lc /system/lib/crtbegin_so.o /system/lib/crtend_so.o") else() set(DEFAULT_HOST_LD_FLAGS "-shared") endif() set(LIBMATH "-lm") elseif(WIN32) set(LIBMATH) endif() if(CLANG_NEEDS_RTLIB) set(DEFAULT_HOST_LD_FLAGS "${DEFAULT_HOST_LD_FLAGS} --rtlib=compiler-rt") endif() ###################################################################################### if(UNIX) if(APPLE) # TODO MACOSX_BUNDLE target prop set(ICD_LD_FLAGS "-single_module") else() set(ICD_LD_FLAGS "-Wl,-Bsymbolic") endif() endif() ###################################################################################### if (OCS_AVAILABLE) option(SINGLE_LLVM_LIB "When on, tries to link pocl to the single big libLLVM before falling back to LLVM_LIBFILES)." ON) if (DEFINED STATIC_LLVM) message(AUTHOR_WARNING "STATIC_LLVM option was renamed, it wasn't really ensuring anything was static. Please see the SINGLE_LLVM_LIB option") endif() if(NOT SINGLE_LLVM_LIB) message(STATUS "Linking LLVM to LLVM_LIBFILES") set(POCL_LLVM_LIBS ${LLVM_LIBFILES}) else() message(STATUS "Trying to link LLVM to the single big libLLVM") find_library(LLVM_SHARED_LIB_FILE NAMES "LLVM-${LLVM_VERSION}" "LLVM" PATHS "${LLVM_LIBDIR}" NO_DEFAULT_PATH) if(LLVM_SHARED_LIB_FILE AND EXISTS "${LLVM_SHARED_LIB_FILE}") message(STATUS " .. OK, using ${LLVM_SHARED_LIB_FILE}") set(POCL_LLVM_LIBS "${LLVM_SHARED_LIB_FILE}") else() message(STATUS "single big libLLVM library not found (Probably because LLVM is built with cmake). Falling back to linking libpocl to LLVM_LIBFILES") set(POCL_LLVM_LIBS ${LLVM_LIBFILES}) set(SINGLE_LLVM_LIB OFF CACHE BOOL "single big libLLVM") endif() endif() endif() ###################################################################################### set(SPIRV OFF) if(OCS_AVAILABLE AND X86 AND (NOT KERNELLIB_HOST_CPU_VARIANTS STREQUAL "distro")) option(ENABLE_SPIR "Enable SPIR support (default ON when available)" ON) else() set(ENABLE_SPIR OFF CACHE INTERNAL "SPIR enabled" FORCE) endif() if(ENABLE_SPIR) message(WARNING "SPIR support is available but highly experimental; use at your own risk.") if(LLVM_SPIRV AND (EXISTS "${LLVM_SPIRV}")) message(WARNING "SPIR-V support enabled but highly experimental; you must use a llvm-spirv " "converter that produces bitcode FOR YOUR LLVM VERSION. " "E.g. if you're compiling pocl against LLVM 5 then using Khronos' " "llvm-spirv based on LLVM 3.6 branch WILL NOT WORK.") set(SPIRV ON) endif() endif() set(ENABLE_SPIRV ${SPIRV} CACHE INTERNAL "SPIR-V enabled" FORCE) ###################################################################################### add_definitions(-DCL_USE_DEPRECATED_OPENCL_1_0_APIS -DCL_USE_DEPRECATED_OPENCL_1_1_APIS -DCL_USE_DEPRECATED_OPENCL_1_2_APIS -DCL_USE_DEPRECATED_OPENCL_2_0_APIS -DCL_USE_DEPRECATED_OPENCL_2_1_APIS ) add_definitions(-DCL_TARGET_OPENCL_VERSION=220) include_directories("include") ###################################################################################### set(HAVE_LIBDL OFF CACHE BOOL "dlopen" FORCE) if(WIN32) message(STATUS "Using LoadLibrary/FreeLibrary in Windows, libltdl not needed.") elseif(UNIX) if (CMAKE_CROSSCOMPILING AND (NOT ENABLE_HOST_CPU_DEVICES) AND (NOT ENABLE_HSA)) message(STATUS "Cross-compiling without CPU/HSA devices -> skipping LIBDL search") else() find_library(DL_LIB "dl") find_file(DL_H "dlfcn.h") if(DL_LIB AND DL_H) message(STATUS "libdl found") get_filename_component(DL_H_INCLUDE_DIR "${DL_H}" DIRECTORY) string(FIND "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}" "${DL_H_INCLUDE_DIR}" LTPOSITION) # include the directory of dlfcn.h, if its not in the default system include dirs # also when cross-compiling this includes /usr/include, which screws things up if((LTPOSITION LESS "0") AND (NOT CMAKE_CROSSCOMPILING)) include_directories("${DL_H_INCLUDE_DIR}") endif() set(HAVE_LIBDL ON CACHE BOOL "dlopen" FORCE) else() message(FATAL_ERROR "Could not find DL library!") endif() endif() else() message(STATUS "Unknown OS, don't know how to load a dynamic library") endif() ###################################################################################### set(CMAKE_THREAD_PREFER_PTHREAD TRUE) set(THREADS_PREFER_PTHREAD_FLAG TRUE) find_package(Threads REQUIRED) if(CMAKE_VERSION VERSION_GREATER "3.0.99") set(PTHREAD_LIBRARY Threads::Threads) else() set(PTHREAD_LIBRARY ${CMAKE_THREAD_LIBS_INIT}) endif() ###################################################################################### # LTTNG if((NOT MSVC) AND (NOT ANDROID)) pkg_check_modules(LTTNG_UST lttng-ust>=2.7) if(LTTNG_UST_FOUND) set(HAVE_LTTNG_UST 1) else() set(HAVE_LTTNG_UST 0) endif() endif() ###################################################################################### if(NOT DEFINED DEFAULT_ENABLE_ICD) if (MSVC) message(STATUS "Building ICD not yet supported on Windows.") set(DEFAULT_ENABLE_ICD 0 CACHE INTERNAL "Going to use ICD loader") else() # pkg-config doesn't work with cross-compiling if (NOT CMAKE_CROSSCOMPILING) pkg_check_modules(OCL_ICD ocl-icd>=1.3) endif() if (CMAKE_CROSSCOMPILING OR (NOT OCL_ICD_FOUND)) find_path(OCL_ICD_INCLUDE_DIR NAMES ocl_icd.h ) find_library(OCL_ICD_LIBRARIES NAMES OpenCL ) if(OCL_ICD_INCLUDE_DIR AND OCL_ICD_LIBRARIES) set(OCL_ICD_FOUND 1) endif() endif() if(OCL_ICD_FOUND) set(HAVE_OCL_ICD 1 CACHE INTERNAL "ICL library is ocl-icd") set(OPENCL_FOUND 1 CACHE INTERNAL "opencl ICD/library found") # duh, why doesn't ocl-icd set this in its .pc file ?? if (CMAKE_CROSSCOMPILING) set(OPENCL_LIBRARIES "${OCL_ICD_LIBRARIES}" CACHE INTERNAL "opencl ICD/library link flags") else() separate_arguments(OCL_LDFLAGS UNIX_COMMAND "${OCL_ICD_LDFLAGS}") list(APPEND OCL_LDFLAGS "OpenCL") set(OPENCL_LIBRARIES "${OCL_LDFLAGS}" CACHE INTERNAL "opencl ICD/library link flags") endif() set(DEFAULT_ENABLE_ICD 1 CACHE INTERNAL "ICD loader availability") else() set(HAVE_OCL_ICD 0 CACHE INTERNAL "ICL library is ocl-icd") unset (OPENCL_FOUND CACHE) if (NOT CMAKE_CROSSCOMPILING) # fallback to other ICD loaders message(STATUS "ocl-icd not found -> trying fallback ICD implementations") pkg_check_modules(OPENCL OpenCL>=1.2) if(NOT OPENCL_FOUND) find_library(OPENCL_LIBRARIES OpenCL) # version check the found library if(OPENCL_LIBRARIES) set(CMAKE_REQUIRED_LIBRARIES "${OPENCL_LIBRARIES}") include(CheckFunctionExists) unset (OPENCL_FOUND CACHE) CHECK_FUNCTION_EXISTS("clEnqueueFillImage" OPENCL_FOUND) endif() endif() endif() if(OPENCL_FOUND) # no ocl-icd, but libopencl message(STATUS "libOpenCL (unknown ICD loader) found") set(DEFAULT_ENABLE_ICD 1 CACHE INTERNAL "ICD loader availability") else() message(STATUS "No ICD loader of any kind found (or its OpenCL version is <1.2)") # no ocl-icd, no libopencl set(DEFAULT_ENABLE_ICD 0 CACHE INTERNAL "no ICL loader found availability") endif() endif() endif() endif() setup_cached_var(ENABLE_ICD "Using an ICD loader" "Requested build with icd, but ICD loader not found! some examples will not work.." "ICD loader found, but requested build without it") if(ENABLE_ICD) # only meaningful to link tests with ocl-icd set(TESTS_USE_ICD ${HAVE_OCL_ICD}) set(POCL_LIBRARY_NAME "pocl") else() set(TESTS_USE_ICD 0) set(POCL_LIBRARY_NAME "OpenCL") endif() message(STATUS "Run tests with ICD: ${TESTS_USE_ICD}") ###################################################################################### if(INSTALL_OPENCL_HEADERS) message(STATUS "Install POCL's OpenCL headers: ${INSTALL_OPENCL_HEADERS}") elseif(DEFINED INSTALL_OPENCL_HEADERS AND NOT INSTALL_OPENCL_HEADERS) message(STATUS "Not installing OpenCL headers.") else() # Undefined = auto -> check find_file(OPENCL_H opencl.h PATH_SUFFIXES CL OpenCL) if(OPENCL_H) message(STATUS "OpenCL.h found, NOT installing our headers") set(IOH 0) else() message(STATUS "OpenCL.h not found, installing our headers") set(IOH 1) endif() set(INSTALL_OPENCL_HEADERS ${IOH} CACHE BOOL "Install POCL's OpenCL headers. (Ones from Khronos should be installed instead)") endif() ###################################################################################### option(PEDANTIC "Compile host library with stricter compiler flags." OFF) if(PEDANTIC) add_compile_options("-Wno-unused-result" "-Werror") # maybe "-Wimplicit" endif() ###################################################################################### set_expr(POCL_KERNEL_CACHE_DEFAULT KERNEL_CACHE_DEFAULT) string(TIMESTAMP POCL_BUILD_TIMESTAMP "%d%m%Y%H%M%S") file(WRITE "${CMAKE_BINARY_DIR}/pocl_build_timestamp.h" "#define POCL_BUILD_TIMESTAMP \"${POCL_BUILD_TIMESTAMP}\"") #################################################################### # Host (basic/pthread) driver setup set(DEFAULT_HOST_CLANG_FLAGS "${CLANG_TARGET_OPTION}${LLC_TRIPLE}") set(DEFAULT_HOST_LLC_FLAGS "-relocation-model=pic -mtriple=${LLC_TRIPLE}") if(ARM) option(ENABLE_FP64 "Enable FP64 on ARM32 - if you have at least VFP support for doubles, you can leave it ON" ON ) else() set(ENABLE_FP64 ON CACHE INTERNAL "FP64, always on except ARM") endif() if(ARM32 OR (LLC_TRIPLE MATCHES "^arm")) if(LLC_TRIPLE MATCHES "gnueabihf") # hardfloat set(DEFAULT_HOST_LLC_FLAGS "${DEFAULT_HOST_LLC_FLAGS} -float-abi=hard") set(DEFAULT_HOST_CLANG_FLAGS "${DEFAULT_HOST_CLANG_FLAGS} -mfloat-abi=hard") set(DEFAULT_HOST_AS_FLAGS "${DEFAULT_HOST_AS_FLAGS} -mfloat-abi=hard") else() # softfloat set(HOST_FLOAT_SOFT_ABI 1) set(DEFAULT_HOST_LLC_FLAGS "${DEFAULT_HOST_LLC_FLAGS} -float-abi=soft") set(DEFAULT_HOST_CLANG_FLAGS "${DEFAULT_HOST_CLANG_FLAGS} -mfloat-abi=soft") set(DEFAULT_HOST_AS_FLAGS "${DEFAULT_HOST_AS_FLAGS} -mfloat-abi=soft") endif() endif() if(CL_DISABLE_HALF) set(DEFAULT_HOST_CLANG_FLAGS "${DEFAULT_HOST_CLANG_FLAGS} -D_CL_DISABLE_HALF") endif() set(HOST_DEVICE_CL_VERSION "120") set(HOST_DEVICE_CL_STD "1.2") # define it here, b/c we'll need these both at runtime and buildtime if(X86 OR ARM) set(HOST_DEVICE_EXTENSIONS "cl_khr_byte_addressable_store cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_3d_image_writes") else() # set some conservative defaults set(HOST_DEVICE_EXTENSIONS "cl_khr_global_int32_base_atomics cl_khr_local_int32_base_atomics cl_khr_3d_image_writes") endif() if(ENABLE_SPIR) set(HOST_DEVICE_EXTENSIONS "${HOST_DEVICE_EXTENSIONS} cl_khr_spir") endif() if(NOT CL_DISABLE_HALF) set(HOST_DEVICE_EXTENSIONS "${HOST_DEVICE_EXTENSIONS} cl_khr_fp16") endif() # must not be defined in HOST_DEVICE_EXTENSIONS list, because # this extension doesn't exist in official extension list # there is "cles_khr_int64" which indicates int64 support for embedded profiles set(HOST_DEVICE_EXTENSION_DEFINES "-Dcl_khr_int64") if(X86) set(HOST_DEVICE_EXTENSIONS "${HOST_DEVICE_EXTENSIONS} cl_khr_fp64 cl_khr_int64_base_atomics cl_khr_int64_extended_atomics") endif() if(ENABLE_FP64) set(HOST_DEVICE_EXTENSIONS "${HOST_DEVICE_EXTENSIONS} cl_khr_fp64") set(_CL_DISABLE_DOUBLE 0) else() set(_CL_DISABLE_DOUBLE 1) endif() set(TEMP_EXT "${HOST_DEVICE_EXTENSIONS}") separate_arguments(TEMP_EXT) set(TEMP_CLEXT "-Xclang -cl-ext=-all,") foreach(EXT ${TEMP_EXT}) set(HOST_DEVICE_EXTENSION_DEFINES "${HOST_DEVICE_EXTENSION_DEFINES} -D${EXT}") set(TEMP_CLEXT "${TEMP_CLEXT}+${EXT},") endforeach() set(HOST_DEVICE_EXTENSION_DEFINES "${HOST_DEVICE_EXTENSION_DEFINES} ${TEMP_CLEXT}") if(NOT DEFINED KERNELLIB_HOST_CPU_VARIANTS) set(KERNELLIB_HOST_CPU_VARIANTS "native") # else TODO test cpu list for unknown values endif() set(KERNELLIB_HOST_DISTRO_VARIANTS 0) if(KERNELLIB_HOST_CPU_VARIANTS STREQUAL "distro") if(X86_64 OR I386) if(HOST_CPU_FORCED) message(FATAL_ERROR "Cannot build with CPU autodetection distro variants build, and enforce LLC_HOST_CPU at the same time. Please pick one") endif() set(KERNELLIB_HOST_CPU_VARIANTS sse2 ssse3 sse41 avx avx_f16c avx_fma4 avx2 avx512) else() message(FATAL_ERROR "Don't know what CPU variants to use for kernel library on this platform.") endif() set(KERNELLIB_HOST_DISTRO_VARIANTS 1) endif() #################################################################### set(EXTRA_HOST_AS_FLAGS "" CACHE STRING "Extra parameters to as for code generation in the host. (default: empty)") set(EXTRA_HOST_LD_FLAGS "" CACHE STRING "Extra parameter to compiler to generate loadable module. (default: empty)") set(EXTRA_HOST_CLANG_FLAGS "" CACHE STRING "Extra parameters to clang for host compilation. (default: empty)") set(EXTRA_HOST_LLC_FLAGS "" CACHE STRING "Extra parameters to llc for code generation in the host. (default: empty)") #################################################################### set(HOST_AS_FLAGS "${DEFAULT_HOST_AS_FLAGS} ${EXTRA_HOST_AS_FLAGS}") set(HOST_LD_FLAGS "${DEFAULT_HOST_LD_FLAGS} ${EXTRA_HOST_LD_FLAGS}" ) string(STRIP "${HOST_LD_FLAGS}" HOST_LD_FLAGS_STRIPPED) string(REGEX REPLACE "[\r\n\t ]+" "\", \"" HOST_LD_FLAGS_ARRAY "${HOST_LD_FLAGS_STRIPPED}") # string(REPLACE "###, ###" " oo \", \" oo " HOST_LD_FLAGS_ARRAY "${HOST_LD_FLAGS_ARRAY_1}") set(HOST_CLANG_FLAGS "${DEFAULT_HOST_CLANG_FLAGS} ${EXTRA_HOST_CLANG_FLAGS}") set(HOST_LLC_FLAGS "${DEFAULT_HOST_LLC_FLAGS} ${EXTRA_HOST_LLC_FLAGS}") # HSA and pthread drivers require basic driver if(ENABLE_HSA OR ENABLE_HOST_CPU_DEVICES) set(BUILD_BASIC 1) endif() if(ENABLE_HOST_CPU_DEVICES) set(OCL_TARGETS "host") set(OCL_DRIVERS "basic pthreads") # TODO OCL_KERNEL_TARGET -> CPU_TARGET_TRIPLE # TODO OCL_KERNEL_TARGET_CPU -> OCL_KERNEL_TARGET_CPU set(OCL_KERNEL_TARGET "${LLC_TRIPLE}") #The kernel target triplet. set(OCL_KERNEL_TARGET_CPU "${LLC_HOST_CPU}") #The kernel target CPU variant. set(BUILD_PTHREAD 1) endif() # The accel device could be built by default, but it's implemented in C++, # thus requires a C++ compiler, so let's not. if(ENABLE_ACCEL_DEVICE) set(BUILD_BASIC 1) set(BUILD_ACCEL 1) set(OCL_DRIVERS "${OCL_DRIVERS} accel") endif() if(DEFINED EXTRA_OCL_TARGETS) set(OCL_TARGETS "${OCL_TARGETS} ${EXTRA_OCL_TARGETS}") endif() #################################################################### # Determine which device drivers to build. if(NOT DEFINED DEFAULT_ENABLE_TCE) set(HAVE_TCE 0) set(HAVE_TCEMC 0) if (NOT WITH_TCE) set(WITH_TCE ENV PATH) endif() # THESE are only used in makefile.am & scripts/pocl* set(TCE_TARGET_CLANG_FLAGS "" CACHE STRING "Extra parameters to Clang for TCE compilation.") set(TCE_TARGET_LLC_FLAGS "" CACHE STRING "Extra parameters to LLVM's llc for TCE compilation.") find_program(TCE_CONFIG NAMES "tce-config" HINTS ${WITH_TCE}) find_program(TCECC NAMES "tcecc" HINTS ${WITH_TCE}) find_program(TTASIM NAMES "ttasim" HINTS ${WITH_TCE}) if(TCE_CONFIG AND TCECC AND TTASIM) message(STATUS "Found tcecc + tce-config + ttasim, testing setup") get_filename_component(TCE_BASEDIR "${TCE_CONFIG}" DIRECTORY) find_library(TCE_LIBS "tce" HINTS "${TCE_BASEDIR}/../lib" ENV PATH) if(NOT TCE_LIBS) execute_process(COMMAND "${TCE_CONFIG}" --libs OUTPUT_VARIABLE TCE_LIBS RESULT_VARIABLE RESV1) endif() execute_process(COMMAND "${TCE_CONFIG}" --includes OUTPUT_VARIABLE TCE_INCLUDES RESULT_VARIABLE RESV2) execute_process(COMMAND "${TCE_CONFIG}" --version OUTPUT_VARIABLE TCE_VERSION RESULT_VARIABLE RESV3) execute_process(COMMAND "${TCE_CONFIG}" --cxxflags OUTPUT_VARIABLE TCE_CXXFLAGS RESULT_VARIABLE RESV4) execute_process(COMMAND "${TCE_CONFIG}" --prefix OUTPUT_VARIABLE TCE_PREFIX RESULT_VARIABLE RESV5) execute_process(COMMAND "${TTASIM}" --help OUTPUT_VARIABLE TTASIM_HELP RESULT_VARIABLE RESV9) if (RESV1 OR RESV2 OR RESV3 OR RESV4 OR RESV5) message(WARNING "tce-config: Nonzero exit status, disabling TCE") elseif (RESV9) message(WARNING "ttasim: Nonzero exit status, disabling TCE") else() string(STRIP "${TCE_LIBS}" TCE_LIBS) separate_arguments(TCE_LIBS) string(STRIP "${TCE_INCLUDES}" TCE_INCLUDES) separate_arguments(TCE_INCLUDES) string(STRIP "${TCE_CXXFLAGS}" TCE_CXXFLAGS) separate_arguments(TCE_CXXFLAGS) string(STRIP "${TCE_VERSION}" TCE_VERSION) string(STRIP "${TCE_PREFIX}" TCE_PREFIX) set(TCE_LIBS "${TCE_LIBS}" CACHE INTERNAL "tce-config --libs") set(TCE_INCLUDES "${TCE_INCLUDES}" CACHE INTERNAL "tce-config --includes") set(TCE_VERSION "${TCE_VERSION}" CACHE INTERNAL "tce-config --version") set(TCE_CXXFLAGS "${TCE_CXXFLAGS}" CACHE INTERNAL "tce-config --cxxflags") set(TCE_PREFIX "${TCE_PREFIX}" CACHE INTERNAL "tce-config --prefix") set(HAVE_TCE 1) if(TCE_VERSION MATCHES "trunk") set(HAVE_TCEMC 1) endif() endif() else() message(STATUS "Failed to find tcecc or tce-config, disabling TCE") endif() set(DEFAULT_ENABLE_TCE ${HAVE_TCE} CACHE INTERNAL "TCE available") set(DEFAULT_ENABLE_TCEMC ${HAVE_TCEMC} CACHE INTERNAL "TCEMC available") endif() setup_cached_var(ENABLE_TCE "TCE support" "Requested enabling TCE, but no usable TCE installation found !" "TCE is available, but requested disabling it") if(ENABLE_TCE) set(OCL_DRIVERS "${OCL_DRIVERS} tce") set(OCL_TARGETS "${OCL_TARGETS} tce") if(DEFAULT_ENABLE_TCEMC) set(ENABLE_TCEMC 1) set(OCL_DRIVERS "${OCL_DRIVERS} tcemc") # TCEMC is a "superset" of TCE (lp:tce) features. endif() set(TCE_DEVICE_EXTENSIONS "cl_khr_byte_addressable_store cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_fp16 cl_khr_spir") set(TEMP_EXT "${TCE_DEVICE_EXTENSIONS}") set(TCE_DEVICE_EXTENSION_DEFINES "") separate_arguments(TEMP_EXT) foreach(EXT ${TEMP_EXT}) set(TCE_DEVICE_EXTENSION_DEFINES "${TCE_DEVICE_EXTENSION_DEFINES} -D${EXT}") endforeach() set(TCE_DEVICE_CL_VERSION "120") set(TCE_DEVICE_CL_STD "1.2") if("${LLVM_CXXFLAGS}" MATCHES "-fno-rtti") message(WARNING "TCE is enabled but your LLVM was not built with RTTI. You should rebuild LLVM with 'make REQUIRES_RTTI=1'. See the INSTALL file for more information.") endif() else() set(ENABLE_TCEMC 0) endif() ########################################################## if(ENABLE_HSA) set(OCL_DRIVERS "${OCL_DRIVERS} hsa") if (HSAIL_ENABLED) set(OCL_TARGETS "${OCL_TARGETS} hsail64") endif() # this is for config.h set(HSA_DEVICE_EXTENSIONS "cl_khr_byte_addressable_store cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_fp64 cl_khr_int64_base_atomics cl_khr_int64_extended_atomics") set(HSA_DEVICE_CL_VERSION "120") set(HSA_DEVICE_CL_STD "1.2") find_path(HAVE_HSA_EXT_AMD_H "hsa_ext_amd.h" HINTS "${HSA_INCLUDEDIR}" ENV PATH) endif() ########################################################## if(ENABLE_CUDA) if(NOT "${LLVM_ALL_TARGETS}" MATCHES "NVPTX") message(FATAL_ERROR "CUDA build requested but LLVM does not support NVPTX target!") endif() set(OCL_DRIVERS "${OCL_DRIVERS} cuda") set(OCL_TARGETS "${OCL_TARGETS} cuda") # this is for config.h # TODO unify with autotools set(BUILD_CUDA 1) set(CUDA_DEVICE_EXTENSIONS "cl_khr_byte_addressable_store cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_fp64 cl_khr_int64_base_atomics cl_khr_int64_extended_atomics") set(CUDA_DEVICE_CL_VERSION "120") set(CUDA_DEVICE_CL_STD "1.2") endif() ########################################################## message(STATUS "Building the following device drivers: ${OCL_DRIVERS}") set(BUILDDIR "${CMAKE_BINARY_DIR}") set(SRCDIR "${CMAKE_SOURCE_DIR}") ########################################################## # Checks for library features. if(NOT CMAKE_CROSSCOMPILING) # AC_C_BIGENDIAN include(TestBigEndian) TEST_BIG_ENDIAN(WORDS_BIGENDIAN) else() # Set default as little-endian set(WORDS_BIGENDIAN 0) endif() ########################################################## if (OCS_AVAILABLE) CHECK_ALIGNOF("double16" "typedef double double16 __attribute__((__ext_vector_type__(16)));" ALIGNOF_DOUBLE16) else() set(ALIGNOF_DOUBLE16 128) endif() if(ALIGNOF_DOUBLE16 LESS 128) set(ALIGNOF_DOUBLE16 128) endif() set(MAX_EXTENDED_ALIGNMENT "${ALIGNOF_DOUBLE16}") ########################################################## string(TOUPPER "${CMAKE_BUILD_TYPE}" BTYPE) if("${CMAKE_C_FLAGS_${BTYPE}}" MATCHES "DNDEBUG") set(POCL_ASSERTS_BUILD 0) else() set(POCL_ASSERTS_BUILD 1) endif() ########################################################## # cmake docs: # SOVERSION: What version number is this target. # For shared libraries VERSION and SOVERSION can be used to specify the # build version and API version respectively. When building or installing # appropriate symlinks are created if the platform supports symlinks and # the linker supports so-names. If only one of both is specified the # missing is assumed to have the same version number. # # For executables VERSION can be used to specify the build version. # SOVERSION is ignored if NO_SONAME property is set. For shared libraries # and executables on Windows the VERSION attribute is parsed to extract # a "major.minor" version number. These numbers are used as the # image version of the binary. # cmake usage: # SET_TARGET_PROPERTIES(pocl PROPERTIES SOVERSION 1.6.3 VERSION 4) ... # The libtool library version string to use (passed to -version-info). # See: http://www.nondot.org/sabre/Mirrored/libtool-2.1a/libtool_6.html # libpocl.so should get only API additions as we are implementing a standard. # # The library version encodings into the library file name are platform # dependent. Therefore we need to be a bit verbose here for the pocl.icd file # creation to succeed (see Makefile.am). # Chiefly, GNU differs from BSD, and others are untested. See e.g. # http://en.opensuse.org/openSUSE%3aShared_library_packaging_policy#Versioning_schemes # # 0:0:0 == 0.6 # 1:0:0 == 0.7 (not backwards compatible with 0:0:0 due to the ICD) # 2:0:1 == 0.8 (currently backwards compatible with 0.7, thus age = 1). # 3:0:2 == 0.9 (currently backwards compatible with 0.7, thus age = 2). # 4:0:3 == 0.10 (currently backwards compatible with 0.7, thus age = 3). # 5:0:4 == 0.11 (currently backwards compatible with 0.7, thus age = 4). # 6:0:5 == 0.12 (currently backwards compatible with 0.7, thus age = 5). # 7:0:6 == 0.13 (currently backwards compatible with 0.7, thus age = 6). # 8:0:7 == 0.14 (currently backwards compatible with 0.7, thus age = 7). # pocl 1.0 bumped the API version: # 2:0:0 == 1.0 (the libpocl.so will be named libpocl.so.2.0.X ) # 3:0:1 == 1.1 (the libpocl.so will be named libpocl.so.2.1.X ) # 4:0:2 == 1.2 (the libpocl.so will be named libpocl.so.2.2.X ) # 5:0:3 == 1.3 (the libpocl.so will be named libpocl.so.2.3.X ) # 6:0:4 == 1.4 (the libpocl.so will be named libpocl.so.2.4.X ) set(LIB_CURRENT_VERSION 6) set(LIB_REVISION_VERSION 0) set(LIB_AGE_VERSION 4) math(EXPR LIB_FIRST_VERSION "${LIB_CURRENT_VERSION} - ${LIB_AGE_VERSION}") # libtool takes "c:r:a" arguments, but the result is ".so.(c-a).a.r" # cmake has "build version" and "API version" # these vars map libtool -> cmake # for set_target_properties set(LIB_BUILD_VERSION "${LIB_FIRST_VERSION}.${LIB_AGE_VERSION}.${LIB_REVISION_VERSION}") set(LIB_API_VERSION "${LIB_FIRST_VERSION}") # The kernel compiler opt plugin shared library, however, changes more # drastically. Let's try to follow the similar 'current' numbering as # the pocl host API library and perhaps tune the 'revision' and 'age' later. math(EXPR KER_LIB_CURRENT_VERSION "${LIB_CURRENT_VERSION} + 7") set(KERNEL_COMPILER_LIB_VERSION "${KER_LIB_CURRENT_VERSION}.0.0") ########################################################## #TODO # these vars are copies b/c tons of sources use BUILD_ICD etc set(BUILD_ICD ${ENABLE_ICD}) set(BUILD_HSA ${ENABLE_HSA}) set(TCE_AVAILABLE ${ENABLE_TCE}) set(TCEMC_AVAILABLE ${ENABLE_TCEMC}) set(_CL_DISABLE_HALF ${CL_DISABLE_HALF}) set(PACKAGE_VERSION "${POCL_VERSION}") configure_file("config.h.in.cmake" "config.h.new" ESCAPE_QUOTES) rename_if_different("${CMAKE_BINARY_DIR}/config.h.new" "${CMAKE_BINARY_DIR}/config.h") configure_file("config2.h.in.cmake" "config2.h.new") rename_if_different("${CMAKE_BINARY_DIR}/config2.h.new" "${CMAKE_BINARY_DIR}/config2.h") include_directories("${CMAKE_BINARY_DIR}") # This is used to generate the compiler feature detection header. # Currently it's not enabled because it requires CMake > 3.x and # also the autogenerated header needs some editing by hand # (it errors on all compilers except gcc > 4 and clang > 3) # # #include(WriteCompilerDetectionHeader) #write_compiler_detection_header( # FILE "${CMAKE_BINARY_DIR}/compiler_features.h" # PREFIX POCL # COMPILERS GNU Clang # FEATURES # c_function_prototypes # c_restrict # c_static_assert # c_variadic_macros #) ########################################################## if(ENABLE_ICD) if(POCL_ICD_ABSOLUTE_PATH) set(CONTENT "${POCL_INSTALL_PUBLIC_LIBDIR}/$") else() set(CONTENT "$") endif() file(GENERATE OUTPUT "${CMAKE_BINARY_DIR}/pocl.icd" CONTENT "${CONTENT}" CONDITION 1) install(FILES "${CMAKE_BINARY_DIR}/pocl.icd" DESTINATION "${POCL_INSTALL_ICD_VENDORDIR}") # write icd file for pocl testing file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/ocl-vendors") file(GENERATE OUTPUT "${CMAKE_BINARY_DIR}/ocl-vendors/pocl-tests.icd" CONTENT "$" CONDITION 1) endif() if(ENABLE_ASAN OR ENABLE_LSAN) file(GENERATE OUTPUT "${CMAKE_BINARY_DIR}/lsan.supp" CONTENT "leak:${LLVM_SRC_ROOT}/lib/Support/Unix/Signals.inc") set(SAN_EXTRA "set(ENV{LSAN_OPTIONS} \"suppressions=${CMAKE_BINARY_DIR}/lsan.supp\")") endif() file(GENERATE OUTPUT "${CMAKE_BINARY_DIR}/CTestCustom.cmake" CONTENT " ${SAN_EXTRA} set(ENV{POCL_ENABLE_UNINIT} \"1\") set(ENV{POCL_BUILDING} \"1\") set(ENV{OCL_ICD_VENDORS} \"${CMAKE_BINARY_DIR}/ocl-vendors\") ") ########################################################## if(UNIX) configure_file("${CMAKE_SOURCE_DIR}/pocl.pc.in.cmake" "${CMAKE_BINARY_DIR}/pocl.pc" @ONLY) install(FILES "${CMAKE_BINARY_DIR}/pocl.pc" DESTINATION "${POCL_INSTALL_PKGCONFIG_DIR}") endif() # For now always use a mirror copy of ocml, but allow overriding # this path later to point to an out-of-tree copy. set(OCML_SOURCE_DIR "${CMAKE_SOURCE_DIR}/lib/kernel/ocml") ############################################################# add_subdirectory("include") add_subdirectory("lib") # these are set in lib/cmakelists.txt message(STATUS "OPENCL_LIBS: ${OPENCL_LIBS}") message(STATUS "OPENCL_CFLAGS: ${OPENCL_CFLAGS}") # for tests / examples set(POCLU_LINK_OPTIONS poclu ${OPENCL_LIBS} ${LIBMATH}) message(STATUS "POCLU LINK OPTS: ${POCLU_LINK_OPTIONS}") # poclcc bin add_subdirectory("bin") include(add_test_pocl) add_subdirectory("tests") add_subdirectory("examples") # make check & make check_tier1 add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} "--output-on-failure" -j ${CORECOUNT} ${COMMAND_USES_TERMINAL}) add_custom_target(check_tier1 COMMAND ${CMAKE_CTEST_COMMAND} "--output-on-failure" -L "'internal|amdsdk_30|piglit|PyOpenCL|conformance_suite_micro'" -j ${CORECOUNT} ${COMMAND_USES_TERMINAL}) ########################################################## MESSAGE(STATUS " ") MESSAGE(STATUS "*********************** SUMMARY ***************************") MESSAGE(STATUS " ") MESSAGE(STATUS "******* Directories:") MESSAGE(STATUS " ") MESSAGE(STATUS "POCL_INSTALL_CMAKE_CONFIG_DIR: ${POCL_INSTALL_CMAKE_CONFIG_DIR}") MESSAGE(STATUS "POCL_INSTALL_ICD_VENDORDIR: ${POCL_INSTALL_ICD_VENDORDIR}") MESSAGE(STATUS "POCL_INSTALL_OPENCL_HEADER_DIR: ${POCL_INSTALL_OPENCL_HEADER_DIR}") MESSAGE(STATUS "POCL_INSTALL_PKGCONFIG_DIR: ${POCL_INSTALL_PKGCONFIG_DIR}") MESSAGE(STATUS "POCL_INSTALL_PRIVATE_DATADIR: ${POCL_INSTALL_PRIVATE_DATADIR}") MESSAGE(STATUS "POCL_INSTALL_PRIVATE_HEADER_DIR: ${POCL_INSTALL_PRIVATE_HEADER_DIR}") MESSAGE(STATUS "POCL_INSTALL_PRIVATE_LIBDIR: ${POCL_INSTALL_PRIVATE_LIBDIR}") MESSAGE(STATUS "POCL_INSTALL_PUBLIC_BINDIR: ${POCL_INSTALL_PUBLIC_BINDIR}") MESSAGE(STATUS "POCL_INSTALL_PUBLIC_HEADER_DIR: ${POCL_INSTALL_PUBLIC_HEADER_DIR}") MESSAGE(STATUS "POCL_INSTALL_PUBLIC_LIBDIR: ${POCL_INSTALL_PUBLIC_LIBDIR}") MESSAGE(STATUS " ") if (OCS_AVAILABLE) MESSAGE(STATUS " ") MESSAGE(STATUS "******* LLVM Programs:") MESSAGE(STATUS " ") MESSAGE(STATUS "LLVM_CONFIG: ${LLVM_CONFIG}") MESSAGE(STATUS "LLVM_OPT: ${LLVM_OPT}") MESSAGE(STATUS "LLVM_LLC: ${LLVM_LLC}") MESSAGE(STATUS "LLVM_AS: ${LLVM_AS}") MESSAGE(STATUS "LLVM_LINK: ${LLVM_LINK}") MESSAGE(STATUS "LLVM_LLI: ${LLVM_LLI}") MESSAGE(STATUS "WITH_LLVM_CONFIG (User preferred llvm-config): ${WITH_LLVM_CONFIG}") endif() MESSAGE(STATUS " ") MESSAGE(STATUS "******* Various Flags:") MESSAGE(STATUS " ") MESSAGE(STATUS "CL_DISABLE_HALF: ${CL_DISABLE_HALF}") MESSAGE(STATUS "HAVE_CLOCK_GETTIME: ${HAVE_CLOCK_GETTIME}") MESSAGE(STATUS "HAVE_GLEW: ${HAVE_GLEW}") MESSAGE(STATUS "HAVE_LTTNG_UST: ${HAVE_LTTNG_UST}") MESSAGE(STATUS "HOST_AS_FLAGS: ${HOST_AS_FLAGS}") MESSAGE(STATUS "HOST_CLANG_FLAGS: ${HOST_CLANG_FLAGS}") MESSAGE(STATUS "HOST_LD_FLAGS: ${HOST_LD_FLAGS}") MESSAGE(STATUS "HOST_LLC_FLAGS: ${HOST_LLC_FLAGS}") if (ENABLE_HSA) MESSAGE(STATUS "") MESSAGE(STATUS "HSA_INCLUDES: ${HSA_INCLUDES}") MESSAGE(STATUS "HSALIB: ${HSALIB}") MESSAGE(STATUS "HSAIL_ASM: ${HSAIL_ASM}") endif() MESSAGE(STATUS "") MESSAGE(STATUS "LIB_API_VERSION: ${LIB_API_VERSION}") MESSAGE(STATUS "LIB_BUILD_VERSION: ${LIB_BUILD_VERSION}") MESSAGE(STATUS "ICD_LD_FLAGS: ${ICD_LD_FLAGS}") MESSAGE(STATUS "EXTRA_KERNEL_FLAGS: ${EXTRA_KERNEL_FLAGS}") MESSAGE(STATUS "EXTRA_KERNEL_CXX_FLAGS: ${EXTRA_KERNEL_CXX_FLAGS}") MESSAGE(STATUS "EXTRA_KERNEL_CL_FLAGS: ${EXTRA_KERNEL_CL_FLAGS}") MESSAGE(STATUS "EXTRA_KERNEL_C_FLAGS: ${EXTRA_KERNEL_C_FLAGS}") MESSAGE(STATUS "final KERNEL_CXX_FLAGS: ${KERNEL_CXX_FLAGS}") MESSAGE(STATUS "final KERNEL_CL_FLAGS: ${KERNEL_CL_FLAGS}") MESSAGE(STATUS "final KERNEL_C_FLAGS: ${KERNEL_C_FLAGS}") if (OCS_AVAILABLE) MESSAGE(STATUS "") MESSAGE(STATUS "CLANG_HAS_64B_MATH: ${CLANG_HAS_64B_MATH}") MESSAGE(STATUS "CLANG_HAS_128B_MATH: ${CLANG_HAS_128B_MATH}") MESSAGE(STATUS "CLANG_NEEDS_RTLIB: ${CLANG_NEEDS_RTLIB}") MESSAGE(STATUS "CLANG_MARCH_FLAG: ${CLANG_MARCH_FLAG}") MESSAGE(STATUS "CLANG_TARGET_OPTION: ${CLANG_TARGET_OPTION}") MESSAGE(STATUS "LLVM_VERSION: ${LLVM_VERSION}") MESSAGE(STATUS "LLVM_LIB_IS_SHARED: ${LLVM_LIB_IS_SHARED}") MESSAGE(STATUS "LLVM_HAS_RTTI: ${LLVM_HAS_RTTI}") MESSAGE(STATUS "LLVM_LIB_MODE: ${LLVM_LIB_MODE}") MESSAGE(STATUS "LLVM_ASSERTS_BUILD: ${LLVM_ASSERTS_BUILD}") MESSAGE(STATUS "LLVM_BUILD_MODE: ${LLVM_BUILD_MODE}") MESSAGE(STATUS "LLVM_CFLAGS: ${LLVM_CFLAGS}") MESSAGE(STATUS "LLVM_CXXFLAGS: ${LLVM_CXXFLAGS}") MESSAGE(STATUS "LLVM_CPPFLAGS: ${LLVM_CPPFLAGS}") MESSAGE(STATUS "LLVM_LDFLAGS: ${LLVM_LDFLAGS}") MESSAGE(STATUS "LLVM_LIBDIR: ${LLVM_LIBDIR}") MESSAGE(STATUS "LLVM_INCLUDEDIR: ${LLVM_INCLUDEDIR}") MESSAGE(STATUS "LLVM_SRC_ROOT: ${LLVM_SRC_ROOT}") MESSAGE(STATUS "LLVM_OBJ_ROOT: ${LLVM_OBJ_ROOT}") MESSAGE(STATUS "LLVM_INCLUDE_DIRS: ${LLVM_INCLUDE_DIRS}") MESSAGE(STATUS "LLVM_ALL_TARGETS: ${LLVM_ALL_TARGETS}") MESSAGE(STATUS "LLVM_HOST_TARGET: ${LLVM_HOST_TARGET}") MESSAGE(STATUS "LLC_TRIPLE: ${LLC_TRIPLE}") MESSAGE(STATUS "LLC_HOST_CPU: ${LLC_HOST_CPU}") MESSAGE(STATUS "") endif() MESSAGE(STATUS "MAX_EXTENDED_ALIGNMENT: ${MAX_EXTENDED_ALIGNMENT}") MESSAGE(STATUS "OCL_KERNEL_TARGET: ${OCL_KERNEL_TARGET}") MESSAGE(STATUS "OCL_KERNEL_TARGET_CPU: ${OCL_KERNEL_TARGET_CPU}") MESSAGE(STATUS "HOST_DEVICE_ADDRESS_BITS: ${HOST_DEVICE_ADDRESS_BITS}") if (ENABLE_TCE) MESSAGE(STATUS "") MESSAGE(STATUS "TCE_TARGET_CLANG_FLAGS: ${TCE_TARGET_CLANG_FLAGS}") MESSAGE(STATUS "TCE_TARGET_LLC_FLAGS: ${TCE_TARGET_LLC_FLAGS}") MESSAGE(STATUS "TCE_CXXFLAGS: ${TCE_CXXFLAGS}") MESSAGE(STATUS "TCE_INCLUDES: ${TCE_INCLUDES}") MESSAGE(STATUS "TCE_LIBS: ${TCE_LIBS}") MESSAGE(STATUS "TCE_VERSION: ${TCE_VERSION}") MESSAGE(STATUS "TCE_PREFIX: ${TCE_PREFIX}") endif() MESSAGE(STATUS "") if (OCS_AVAILABLE) MESSAGE(STATUS "----------- -------------------------------- --------") MESSAGE(STATUS "llvm libs libpocl will be linked to (POCL_LLVM_LIBS):") MESSAGE(STATUS "${POCL_LLVM_LIBS}") MESSAGE(STATUS "----------- -------------------------------- --------") MESSAGE(STATUS "clang libs libpocl will be linked to (CLANG_LIBFILES):") MESSAGE(STATUS "${CLANG_LIBFILES}") MESSAGE(STATUS "----------- -------------------------------- --------") MESSAGE(STATUS "system libs libpocl will be linked to (LLVM_SYSLIBS):") MESSAGE(STATUS "${LLVM_SYSLIBS}") MESSAGE(STATUS "----------- -------------------------------- --------") endif() MESSAGE(STATUS "******* Enabled features:") MESSAGE(STATUS " ") MESSAGE(STATUS "DEVELOPER_MODE: ${DEVELOPER_MODE}") MESSAGE(STATUS "ENABLE_CONFORMANCE: ${ENABLE_CONFORMANCE}") if(ARM) MESSAGE(STATUS "ENABLE_FP64: ${ENABLE_FP64}") endif() MESSAGE(STATUS "ENABLE_ICD: ${ENABLE_ICD}") MESSAGE(STATUS "ENABLE_TCE: ${ENABLE_TCE}") MESSAGE(STATUS "ENABLE_TCEMC: ${ENABLE_TCEMC}") MESSAGE(STATUS "ENABLE_HSA: ${ENABLE_HSA}") MESSAGE(STATUS "ENABLE_CUDA: ${ENABLE_CUDA}") MESSAGE(STATUS "ENABLE_ASAN (address sanitizer): ${ENABLE_ASAN}") MESSAGE(STATUS "ENABLE_LSAN (leak sanitizer): ${ENABLE_LSAN}") MESSAGE(STATUS "ENABLE_TSAN (thread sanitizer): ${ENABLE_TSAN}") MESSAGE(STATUS "ENABLE_UBSAN (UB sanitizer): ${ENABLE_UBSAN}") MESSAGE(STATUS "ENABLE_POCL_FLOAT_CONVERSION: ${ENABLE_POCL_FLOAT_CONVERSION}") MESSAGE(STATUS "ENABLE_RELOCATION: ${ENABLE_RELOCATION}") MESSAGE(STATUS "ENABLE_SLEEF: ${ENABLE_SLEEF}") MESSAGE(STATUS "ENABLE_SPIR: ${ENABLE_SPIR}") MESSAGE(STATUS "ENABLE_SPIRV: ${ENABLE_SPIRV}") MESSAGE(STATUS "ENABLE_POCL_BUILDING: ${ENABLE_POCL_BUILDING}") MESSAGE(STATUS "INSTALL_OPENCL_HEADERS (Install our headers): ${INSTALL_OPENCL_HEADERS}") MESSAGE(STATUS "OCL_DRIVERS (Drivers built): ${OCL_DRIVERS}") MESSAGE(STATUS "OCL_TARGETS (Targets built): ${OCL_TARGETS}") MESSAGE(STATUS "OCS_AVAILABLE: ${OCS_AVAILABLE}") MESSAGE(STATUS "POCL_ICD_ABSOLUTE_PATH: ${POCL_ICD_ABSOLUTE_PATH}") MESSAGE(STATUS "POCL_ASSERTS_BUILD: ${POCL_ASSERTS_BUILD}") MESSAGE(STATUS "SINGLE_LLVM_LIB: ${SINGLE_LLVM_LIB}") MESSAGE(STATUS "TESTS_USE_ICD: ${TESTS_USE_ICD}") MESSAGE(STATUS "Available testsuites: ${ALL_TESTSUITES}") MESSAGE(STATUS "Enabled testsuites: ${ACTUALLY_ENABLED_TESTSUITES}") MESSAGE(STATUS "Disabled testsuites: ${DISABLED_TESTSUITES}") MESSAGE(STATUS "Testsuites are built from git master: ${EXAMPLES_USE_GIT_MASTER}") MESSAGE(STATUS "Kernel caching: ${KERNEL_CACHE_DEFAULT}") MESSAGE(STATUS "Kernel library CPU variants: ${KERNELLIB_HOST_CPU_VARIANTS}") MESSAGE(STATUS "Kernel library distro build: ${KERNELLIB_HOST_DISTRO_VARIANTS}") MESSAGE(STATUS "Use pocl custom memory allocator: ${USE_POCL_MEMMANAGER}") MESSAGE(STATUS "L1d cacheline size: ${HOST_CPU_CACHELINE_SIZE}") pocl-1.4/COPYING000066400000000000000000000020431355011147700133510ustar00rootroot00000000000000Copyright (c) 2011 pocl developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. pocl-1.4/CREDITS000066400000000000000000000036571355011147700133520ustar00rootroot00000000000000This file tries to list all the people contributed code, documentation, test cases, etc. to the pocl project in the chronological order of their first contribution. Please tell if we missed your name (sorry in advance). Carlos Sánchez de La Lama Pekka Jääskeläinen Erik Schnetter Heikki Kultala Vladimír Guzma Kalle Raiskila Vincent Danjean Timo Viitanen Cyril Roelandt Victor Oliveira Jesse Towner Brandon Surmanski Bryan Bell Andreas Klöckner Oliver Hartmann Ville Korhonen Giuseppe Bilotta Peter Colberg Mikael Lepistö Clément Léger Logan Chien Richard Sandiford (Scalarizer.cpp) Michal Babej Kristian Mört Felix Bytow Matias Koskela Martin Stumpf James Price Lars-Dominik Braun Daniel Sanders Lee Ki-ju Krishnaraj Bhat Martin Hauke Volkan Keleş Lassi Koskinen Hugo van der Wijst Mateusz Szpakowski Lars Buitinck (larsmans) Chen Chou-chuan Shao-chung Wang Pavan Yalamanchili Romaric Jodin Masataro Asai Richard Crowder Matthias Noack Sam McKelvie Tom Gall Arda Coskunses Minh Quan HO Matt Wala Jonas Hahnfeld Ronan Keryell Rodrigo Tobar Martin Krastev Tom Stellard Nick Curtis Konstantin Bakanov Andreas Beckmann Isuru Fernando Jeff Hammond Julius Ikkala Steve Holland Wilfried Holzke Maxim Eremenko Andrew Gozillon Jan Solanti Stefan Brüns pocl-1.4/INSTALL000077700000000000000000000000001355011147700211212doc/sphinx/source/install.rstustar00rootroot00000000000000pocl-1.4/LICENSE000066400000000000000000000020431355011147700133230ustar00rootroot00000000000000Copyright (c) 2011 pocl developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. pocl-1.4/README000066400000000000000000000006141355011147700132000ustar00rootroot00000000000000Portable Computing Language (pocl) ---------------------------------- pocl is being developed towards an efficient implementation of OpenCL standard which can be easily adapted for new targets. Please refer to the file INSTALL in this directory for building and installing pocl. More documentation available at http://portablecl.org/docs/html/ The main web page is at http://portablecl.org pocl-1.4/README.ARM000066400000000000000000000015721355011147700136220ustar00rootroot00000000000000pocl builds (as of Aug 2017) on ODROID XU3 and ODROID C2 but some tests fail. How to build: * get a clang / llvm. DO NOT use the ones downloaded from llvm.org, they only work on the distro where they were compiled. Ubuntu LTS these days ships multiple llvm versions even quite recent ones; get the clang+llvm from your distro's packages. * read the pocl build instructions in docs * LLVM will likely not recognize your cpu, and running cmake will give you a warning. run cmake with -DLLC_HOST_CPU=. "yourcpu" must be something LLVM recognizes, usually it's simply "cortex-aXX" like cortex-a15 etc. You can get the full list by running `llc -mcpu=help`. * example for building pocl on Ubuntu 16.04 + ARM: apt install ocl-icd-libopencl1 ocl-icd-opencl-dev cmake libltdl-dev libhwloc-dev pkg-config build-essential llvm-4.0-dev llvm-4.0 clang-4.0 libclang-4.0-dev pocl-1.4/README.FreeBSD000066400000000000000000000003201355011147700144030ustar00rootroot00000000000000It should just work if you beware some known issues: * https://github.com/pocl/pocl/issues/263 In short, you should build pocl with Clang 3.6 or newer even though the default Clang of FreeBSD can be older. pocl-1.4/README.Windows000066400000000000000000000022211355011147700146250ustar00rootroot00000000000000# Compiling pocl on Windows ## Dependencies: - Visual Studio 2013 - Git and Git bash http://git-scm.com/downloads - CMake 2.8 or newer http://www.cmake.org/download/ - Python 2.7 for LLVM - Pthreads-win32 binary distribution https://www.sourceware.org/pthreads-win32/ - Hwloc for Windows x64 binary distribution http://www.open-mpi.org/software/hwloc/v1.10/ - LLVM + Clang latest release sources ## Support: - Only 64bit compiling for now - No ICD compiling - No VML (no stdcxxlib finding done for windows) - Static compilation ## Building There is shell script in `pocl/windows/setup_and_build_win64.sh` Shell script may be ran in `Git Bash` and it downloads and installs pocl and all the library dependencies and builds them to `/c/pocl-playground`. To download and build everything without first fetching pocl repository one can do simply: curl https://raw.githubusercontent.com/pocl/master/windows/setup_and_build_win64.sh | sh Script requires following software installed on Windows 7 or later (64bit only): - Visual Studio 2013 (e.g. community edition) - Cmake 2.8 or later (must be added to PATH) - Git + Git Bash - Python 2.7 for compiling LLVM pocl-1.4/README.mips000066400000000000000000000013251355011147700141470ustar00rootroot00000000000000MIPS ==== Added initial support. With LLVM 3.5 and a big-endian MIPS32r2 system, the majority of tests fail due to two bugs: * Vectors are not correctly passed in varargs. This causes cl_printf to misbehave, causing most reference checks to fail. This has been fixed on LLVM's trunk. * Vectors whose size is not a multiple of 4 bytes (such as char3) trigger an optimisation bug in SROA. This is still being debugged. It's expected that most of this has been fixed in LLVM 3.7 but this has not yet been re-tested. Testing ------- Tested on an EdgeRouter Pro running 32-bit big-endian Debian Jessie with the following configure command: ./configure --host=mips-unknown-linux-gnu --build mips-unknown-linux-gnu pocl-1.4/README.mipsel000066400000000000000000000006161355011147700144720ustar00rootroot00000000000000MIPS Little Endian ================== With LLVM 3.7.0 plus a couple minor patches, almost all Little-endian MIPS32r2 tests from pocl's set pass. Testing ------- Tested on a Creator CI20 running 32-bit little-endian Debian Jessie with the following cmake command: PATH=/path/to/clang-3.7.0-patched/bin/:$PATH CC=clang CXX=clang++ \ cmake -GNinja ../pocl.src/ -DLLC_HOST_CPU=mips32r2 pocl-1.4/README.packaging000066400000000000000000000027301355011147700151240ustar00rootroot00000000000000This file contains notes for making distribution packages of pocl. ICD --- Pocl should probably be built with ICD enabled (``-DENABLE_ICD=ON`` CMake option) for desktop distributions. Pocl does not have an ICD loader, so a dependancy on one would be beneficial. CMake options for a distribution build -------------------------------------- - ``-DKERNELLIB_HOST_CPU_VARIANTS=distro`` Note: this note only works for x86(-64) platform currently, on other platforms, it has zero effect. Enables runtime detection of CPU and builds separate kernel libraries for most common x86 CPUs. - ``-DPOCL_ICD_ABSOLUTE_PATH=OFF`` The pocl.icd file (which the ICD loader uses to load the pocl lib) by default has a full path to the installed libpocl.so file. Set this option to OFF and pocl will only put the dynamic library name into pocl.icd. - ``-DENABLE_POCL_BUILDING=OFF`` When OFF, POCL_BUILDING option (which causes pocl to look for required files in build / source directories) will be ignored and pocl will always look in installed paths only. Mesa (OpenGL) interoperability ------------------------------ On some current (Jan 2014) Linux distibutions, mesa is built with LLVMpipe. If pocl is built against a shared LLVM library, the mesa calls to its LLVM will be re-routed to the LLVM linked in pocl, causing a segfault. Consider linking LLVM statically to pocl. At least 'nouveau' and 'swrast_dri' are known to suffer from this. See https://github.com/pocl/pocl/issues/46 pocl-1.4/README.powerpc000066400000000000000000000030071355011147700146550ustar00rootroot00000000000000PPPC-32 ------- Most pocl basic tests run successfully in a PPC32 userland. When compiling in "LLVM API mode", i.e. with --enable-llvmapi passed to confiure (note, this is the default now), an issue with linking occurred: LLVM assumes on PPC that the linker is able to generate branch islands, so there is no support for them in LLVM PPC codegen. GNU BFD ld does not support them on PPC32 (and GNU gold suffers from an internal error, at the time of writing this), so no linker is available on most linuxes for PPC32 that would work with pocl. The error shows itself when loading the compiled kernel .so with "file not found" errors, which when investigating turn out to be too long jumps from the kernel. When linking in LLVM lib, libpocl grows just enough that this issue shows with some OCL programs. Using the scripts to compile kernels (i.e. pass --disable-llvmapi) seems to be more robust. LLVM 3.4 does not work on PowerPC, as the address space cast operation cannot be selected on the PPC backend. Use LLVM 3.3 instead. status 2012-11-21 ----------------- All pocl basic tests run successfully on a Debian Sid with PowerPC 64 in a 64 bit userland with LLVM 3.2. Tested using the Cell of PlayStation 3. Did not yet test the external OpenCL project tests. to do ----- - The threshold for wirepl is too large. The PowerPC of the Cell PPU supports AltiVec which is capable of 4-wide f32 vectorization only. Thus, a sensible threshold is probably at most 8 or so. This should be sniffed from the device driver somehow. pocl-1.4/TODO000066400000000000000000000031271355011147700130120ustar00rootroot00000000000000Version roadmap --------------- High priority (1.0 blockers): * make NVIDIA OpenCL SDK examples to work * make Intel OpenCL SDK examples to work Medium priority: * device supporting AMD GPU cards. Known ambiguous OpenCL 1.2 features ----------------------------------- The OpenCL 1.2 and later standards are very ambiguous when it comes to sub-devices. On the one hand, they claim that sub-devices can be used wherever their parent devices can be used, on the other hand various parts of the standard hint that they should be treated independently. In particular, it's not clear whether sub-devices can be used within a context that only holds their parent device, or not. This might even depend on whether the context was created "from type" or not. The implementation of subdevices in pocl currently converts subdevices to their parents in most places, with the exception being clEnqueueNDRangeKernel. This means, for example, that sub-devices can be used in a context that does not contain them (but contains their parent device). Note this is equivalent to the AMD behavior (which is tested in the DeviceFission AMD APP SDK example), but differs from e.g. Intel's behavior. Clarification from the standard body is needed on which behavior is correct. Known missing OpenCL 1.2 features --------------------------------- Missing APIs used by the tested OpenCL example suites are entered here. OpenCL 1.2 Extensions * 9.7 Sharing Memory Objects with OpenGL / OpenGL ES Buffer, Texture and Renderbuffer Objects * 9.7.6 Sharing memory objects that map to GL objects between GL and CL contexts pocl-1.4/ToolchainExample.cmake000066400000000000000000000027001355011147700165540ustar00rootroot00000000000000# This is an example Toolchain file to cross-compile for ARM/MIPS/other # boards from x86_64. Copy & modify # # Steps: # 1) Install g++ and gcc cross-compilers # (apt install gcc-arm-linux-gnueabihf g++-arm-linux-gnueabihf) # 2) On your board, install libltdl, ocl-icd and libhwloc + their development headers # 3) copy the entire root filesystem of the board somewhere on your host, then set CMAKE_FIND_ROOT_PATH below to this path # 4) run cmake like this: # cmake -DHOST_DEVICE_BUILD_HASH= -DOCS_AVAILABLE=0 # -DCMAKE_TOOLCHAIN_FILE= # -DLLC_TRIPLE= # SET(CMAKE_SYSTEM_NAME Linux) # specify the cross compiler SET(CMAKE_C_COMPILER /usr/bin/arm-linux-gnueabihf-gcc) SET(CMAKE_CXX_COMPILER /usr/bin/arm-linux-gnueabihf-g++) # should work, but does not yet. Instead set FIND_ROOT below # set(CMAKE_SYSROOT /home/a/zynq/ZYNQ_ROOT) # where is the target environment SET(CMAKE_FIND_ROOT_PATH /path/to/target_ROOT) # where to find libraries in target environment SET(CMAKE_LIBRARY_PATH /path/to/target_ROOT/usr/lib/arm-linux-gnueabihf) # search for programs in the build host directories SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) # for libraries and headers in the target directories SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) pocl-1.4/bin/000077500000000000000000000000001355011147700130675ustar00rootroot00000000000000pocl-1.4/bin/CMakeLists.txt000066400000000000000000000027621355011147700156360ustar00rootroot00000000000000#============================================================================= # CMake build system files # # Copyright (c) 2016 pocl developers # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # #============================================================================= add_executable(poclcc poclcc.c "${CMAKE_SOURCE_DIR}/lib/poclu/misc.c") target_link_libraries(poclcc ${OPENCL_LIBS}) install(TARGETS "poclcc" RUNTIME DESTINATION "${POCL_INSTALL_PUBLIC_BINDIR}") pocl-1.4/bin/poclcc.c000066400000000000000000000223631355011147700145040ustar00rootroot00000000000000/* Pocl tool: poclcc Copyright (c) 2016 pocl developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include "poclu.h" #define DEVICE_INFO_MAX_LENGTH 2048 #define NUM_OF_DEVICE_ID 32 #define NUM_OPTIONS 6 #define ERRNO_EXIT(filename) do { \ printf("IO error on file %s: %s\n", filename, strerror(errno)); \ exit(2); \ } while(0) char *kernel_source = NULL; char *output_file = NULL; cl_uint opencl_device = CL_DEVICE_TYPE_DEFAULT; unsigned opencl_device_id = 0; int list_devices = 0; int list_devices_only = 0; char *build_options = NULL; /**********************************************************/ typedef int(*poclcc_process)(int, char **, int); typedef struct _poclcc_option { poclcc_process fct; char *id; char *helper; int num_args_read; } poclcc_option; /**********************************************************/ poclcc_option *options_help; static int print_help() { printf("USAGE: poclcc [OPTION]... [FILE]\n"); printf("\n"); printf("OPTIONS:\n"); int i; for (i=0; i= argc) return poclcc_error("Incomplete argument for input file!\n"); char *filename = argv[arg]; char *ext = ".pocl"; kernel_source = poclu_read_file(filename); if (!kernel_source) ERRNO_EXIT(filename); if (output_file == NULL) { output_file = malloc(strlen(filename)+strlen(ext)); strcpy(output_file, filename); strcat(output_file, ext); } return 0; } /********************************************************** * OPTIONS PROCESS FUNCTIONS*/ static int process_help(int arg, char **argv, int argc) { print_help(); return 0; } static int process_output(int arg, char **argv, int argc) { if (arg >= argc) return poclcc_error("Incomplete argument for output file!\n"); output_file = argv[arg]; return 0; } static int process_opencl_device(int arg, char **argv, int argc) { if (arg >= argc) return poclcc_error("Incomplete argument for device_type!\n"); char *opencl_string = argv[arg]; if (!strcmp(opencl_string, "CL_DEVICE_TYPE_CPU")) opencl_device = CL_DEVICE_TYPE_CPU; else if (!strcmp(opencl_string, "CL_DEVICE_TYPE_GPU")) opencl_device = CL_DEVICE_TYPE_GPU; else if (!strcmp(opencl_string, "CL_DEVICE_TYPE_ACCELERATOR")) opencl_device = CL_DEVICE_TYPE_ACCELERATOR; else if (!strcmp(opencl_string, "CL_DEVICE_TYPE_DEFAULT")) opencl_device = CL_DEVICE_TYPE_DEFAULT; else if (!strcmp(opencl_string, "CL_DEVICE_TYPE_ALL")) opencl_device = CL_DEVICE_TYPE_ALL; else { printf("Invalid argument for device_type!\n"); return print_help(); } return 0; } static int process_build_options(int arg, char **argv, int argc) { if (arg >= argc) return poclcc_error("Incomplete argument for build_options!\n"); build_options = argv[arg]; return 0; } static int process_device_id(int arg, char **argv, int argc) { if (arg >= argc) return poclcc_error("Incomplete argument for build_options!\n"); opencl_device_id = atoi(argv[arg]); return 0; } static int process_list_devices(int arg, char **argv, int argc) { list_devices = 1; opencl_device = CL_DEVICE_TYPE_ALL; return 0; } /**********************************************************/ static poclcc_option options[NUM_OPTIONS] = { {process_help, "-h", "\t-h\n" "\t\tDisplay the help\n", 1}, {process_build_options, "-b", "\t-b \n" "\t\tBuild the program with options\n", 2}, {process_opencl_device, "-d", "\t-d \n" "\t\tSelect as the device_type for clGetDeviceIDs.\n" "\t\tDefault: CL_DEVICE_TYPE_DEFAULT\n", 2}, {process_list_devices, "-l", "\t-l\n" "\t\tList the opencl device found (that match the \n", 1}, {process_device_id, "-i", "\t-i \n" "\t\tSelect the opencl device to generate the pocl binary file\n" "\t\tDefault: 0\n", 2}, {process_output, "-o", "\t-o \n" "\t\tWrite output to \n", 2} }; /**********************************************************/ static int search_process(char *arg) { int i; for (i=0; inum_args_read; *arg = prev_arg + num_args_read; return current_option->fct(prev_arg + 1, argv, argc); } } /**********************************************************/ int main(int argc, char **argv) { //MANAGEMENT OF ARGUMENTS options_help = options; int arg_num=1; if (argc < 2) return poclcc_error("Invalid argument!\n"); while (arg_num < argc-1) if (process_arg(&arg_num, argv, argc)) return -1; if (arg_num >= argc && list_devices) list_devices_only = 1; else if (arg_num >= argc) poclcc_error("Invalid arguments!\n"); else { int current_process = search_process(argv[arg_num]); if (current_process == -1 && process_kernel_file(arg_num, argv, argc)) return -1; else if (current_process != -1) { process_arg(&arg_num, argv, argc); list_devices_only = 1; } } //OPENCL STUFF cl_platform_id cpPlatform; cl_device_id device_ids[NUM_OF_DEVICE_ID]; cl_context context; cl_program program; cl_int err; cl_uint num_devices, i; CHECK_CL_ERROR(clGetPlatformIDs(1, &cpPlatform, NULL)); CHECK_CL_ERROR(clGetDeviceIDs(cpPlatform, opencl_device, NUM_OF_DEVICE_ID, device_ids, &num_devices)); if (opencl_device_id >= num_devices) return poclcc_error("Invalid opencl device_id!\n"); if (list_devices) { context = clCreateContext(0, num_devices, device_ids, NULL, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateContext"); printf("LIST OF DEVICES:\n"); for (i=0; ipoclbin_hash_string CHECK_CL_ERROR(clGetDeviceInfo(device_ids[i], CL_DEVICE_VERSION, DEVICE_INFO_MAX_LENGTH, str, NULL)); printf(" Version: %s\n", str); } clReleaseContext(context); } if (list_devices_only) return 0; context = clCreateContext(0, 1, &device_ids[opencl_device_id], NULL, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateContext"); program = clCreateProgramWithSource(context, 1, (const char **)&kernel_source, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateProgramWithSource"); CHECK_CL_ERROR(clBuildProgram(program, 0, NULL, build_options, NULL, NULL)); size_t binary_sizes; char *binary; CHECK_CL_ERROR(clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binary_sizes, NULL)); binary = malloc(sizeof(char)*binary_sizes); if (!binary) { printf("malloc(binary) failed\n"); exit(1); } CHECK_CL_ERROR(clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(unsigned char*), &binary, NULL)); CHECK_CL_ERROR(clReleaseProgram(program)); CHECK_CL_ERROR(clReleaseContext(context)); if (poclu_write_file(output_file, binary, binary_sizes)) ERRNO_EXIT(output_file); free(binary); return 0; } pocl-1.4/cmake/000077500000000000000000000000001355011147700133775ustar00rootroot00000000000000pocl-1.4/cmake/FindHwloc.cmake000066400000000000000000000120471355011147700162620ustar00rootroot00000000000000#.rst: # FindHwloc # ---------- # # Try to find Portable Hardware Locality (hwloc) libraries. # http://www.open-mpi.org/software/hwloc # # You may declare HWLOC_ROOT environment variable to tell where # your hwloc library is installed. # # Once done this will define:: # # Hwloc_FOUND - True if hwloc was found # Hwloc_INCLUDE_DIRS - include directories for hwloc # Hwloc_LIBRARIES - link against these libraries to use hwloc # Hwloc_VERSION - version # Hwloc_CFLAGS - include directories as compiler flags # Hwloc_LDLFAGS - link paths and libs as compiler flags # #============================================================================= # Copyright 2014 Mikael Lepistö # # Distributed under the OSI-approved BSD License (the "License"); # # This software is distributed WITHOUT ANY WARRANTY; without even the # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # See the License for more information. #============================================================================= if(WIN32) find_path(Hwloc_INCLUDE_DIR NAMES hwloc.h PATHS ENV "PROGRAMFILES(X86)" ENV HWLOC_ROOT PATH_SUFFIXES include ) find_library(Hwloc_LIBRARY NAMES libhwloc.lib PATHS ENV "PROGRAMFILES(X86)" ENV HWLOC_ROOT PATH_SUFFIXES lib ) # # Check if the found library can be used to linking # SET (_TEST_SOURCE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/linktest.c") FILE (WRITE "${_TEST_SOURCE}" " #include int main() { hwloc_topology_t topology; int nbcores; hwloc_topology_init(&topology); hwloc_topology_load(topology); nbcores = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_CORE); hwloc_topology_destroy(topology); return 0; } " ) TRY_COMPILE(_LINK_SUCCESS ${CMAKE_BINARY_DIR} "${_TEST_SOURCE}" CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${Hwloc_INCLUDE_DIR}" CMAKE_FLAGS "-DLINK_LIBRARIES:STRING=${Hwloc_LIBRARY}" ) IF(NOT _LINK_SUCCESS) if(CMAKE_SIZEOF_VOID_P EQUAL 8) message(STATUS "You are building 64bit target.") ELSE() message(STATUS "You are building 32bit code. If you like to build x64 use e.g. -G 'Visual Studio 12 Win64' generator." ) ENDIF() message(FATAL_ERROR "Library found, but linking test program failed.") ENDIF() # # Resolve version if some compiled binary found... # find_program(HWLOC_INFO_EXECUTABLE NAMES hwloc-info PATHS ENV HWLOC_ROOT PATH_SUFFIXES bin ) if(HWLOC_INFO_EXECUTABLE) execute_process( COMMAND ${HWLOC_INFO_EXECUTABLE} "--version" OUTPUT_VARIABLE HWLOC_VERSION_LINE OUTPUT_STRIP_TRAILING_WHITESPACE ) string(REGEX MATCH "([0-9]+.[0-9]+)$" Hwloc_VERSION "${HWLOC_VERSION_LINE}") unset(HWLOC_VERSION_LINE) endif() # # All good # set(Hwloc_LIBRARIES ${Hwloc_LIBRARY}) set(Hwloc_INCLUDE_DIRS ${Hwloc_INCLUDE_DIR}) include(FindPackageHandleStandardArgs) find_package_handle_standard_args( Hwloc FOUND_VAR Hwloc_FOUND REQUIRED_VARS Hwloc_LIBRARY Hwloc_INCLUDE_DIR VERSION_VAR Hwloc_VERSION) mark_as_advanced( Hwloc_INCLUDE_DIR Hwloc_LIBRARY) foreach(arg ${Hwloc_INCLUDE_DIRS}) set(Hwloc_CFLAGS "${Hwloc_CFLAGS} /I${arg}") endforeach() set(Hwloc_LDFLAGS "${Hwloc_LIBRARY}") else() if(CMAKE_CROSSCOMPILING) find_path(Hwloc_INCLUDE_DIRS NAMES hwloc.h PATHS ENV HWLOC_ROOT ) find_library(Hwloc_LIBRARIES NAMES hwloc PATHS ENV HWLOC_ROOT ) if(Hwloc_INCLUDE_DIRS AND Hwloc_LIBRARIES) message(WARNING "HWLOC library found using find_library() - cannot determine version. Assuming 1.7.0") set(Hwloc_FOUND 1) set(Hwloc_VERSION "1.7.0") endif() else() # Find with pkgconfig for non-crosscompile builds find_package(PkgConfig) if(HWLOC_ROOT) set(ENV{PKG_CONFIG_PATH} "${HWLOC_ROOT}/lib/pkgconfig") else() foreach(PREFIX ${CMAKE_PREFIX_PATH}) set(PKG_CONFIG_PATH "${PKG_CONFIG_PATH}:${PREFIX}/lib/pkgconfig") endforeach() set(ENV{PKG_CONFIG_PATH} "${PKG_CONFIG_PATH}:$ENV{PKG_CONFIG_PATH}") endif() if(hwloc_FIND_REQUIRED) set(_hwloc_OPTS "REQUIRED") elseif(hwloc_FIND_QUIETLY) set(_hwloc_OPTS "QUIET") else() set(_hwloc_output 1) endif() if(hwloc_FIND_VERSION) if(hwloc_FIND_VERSION_EXACT) pkg_check_modules(Hwloc ${_hwloc_OPTS} hwloc=${hwloc_FIND_VERSION}) else() pkg_check_modules(Hwloc ${_hwloc_OPTS} hwloc>=${hwloc_FIND_VERSION}) endif() else() pkg_check_modules(Hwloc ${_hwloc_OPTS} hwloc) endif() if(Hwloc_FOUND) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(Hwloc DEFAULT_MSG Hwloc_LIBRARIES) if(NOT ${Hwloc_VERSION} VERSION_LESS 1.7.0) set(Hwloc_GL_FOUND 1) endif() if(_hwloc_output) message(STATUS "Found hwloc ${Hwloc_VERSION} in ${Hwloc_INCLUDE_DIRS}:${Hwloc_LIBRARIES}") endif() endif() endif() # cross-compile else endif() pocl-1.4/cmake/FindPthreadsWin32.cmake000066400000000000000000000026211355011147700176000ustar00rootroot00000000000000#.rst: # FindPthreadsWin32 library # ------------------------- # # Try to find pthreads libraries. # https://sourceware.org/pthreads-win32/ # # You may declare PTHREADS_ROOT environment variable to tell where # your library is installed. # # Once done this will define:: # # Pthreads_FOUND - True if pthreads was found # Pthreads_INCLUDE_DIRS - include directories for pthreads # Pthreads_LIBRARIES - link against this library to use pthreads # # The module will also define two cache variables:: # # Pthreads_INCLUDE_DIR - the pthreads include directory # Pthreads_LIBRARY - the path to the pthreads library # find_path(Pthreads_INCLUDE_DIR NAMES pthread.h PATHS ENV "PROGRAMFILES(X86)" ENV PTHREADS_ROOT PATH_SUFFIXES include ) if(CMAKE_SIZEOF_VOID_P EQUAL 8) set(LIB_PATH lib/x64) else() set(LIB_PATH lib/x86) endif() find_library(Pthreads_LIBRARY NAMES pthread.lib pthreadVC2.lib pthreadVC2.lib PATHS ENV PTHREADS_ROOT PATH_SUFFIXES ${LIB_PATH} ) # # All good # set(Pthreads_LIBRARIES ${Pthreads_LIBRARY}) set(Pthreads_INCLUDE_DIRS ${Pthreads_INCLUDE_DIR}) include(FindPackageHandleStandardArgs) find_package_handle_standard_args( Pthreads FOUND_VAR Pthreads_FOUND REQUIRED_VARS Pthreads_LIBRARY Pthreads_INCLUDE_DIR VERSION_VAR Pthreads_VERSION_STRING) mark_as_advanced( Pthreads_INCLUDE_DIR Pthreads_LIBRARY) pocl-1.4/cmake/HSA.cmake000066400000000000000000000101741355011147700150170ustar00rootroot00000000000000#============================================================================= # CMake build system files # # Copyright (c) 2014-2018 pocl developers # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # #============================================================================= if (DEFINED ENABLE_HSAIL AND NOT ENABLE_HSAIL) set(HSAIL_ENABLED 0) else() message(STATUS "Trying HSA support in LLVM") # test that Clang supports the amdgcn--amdhsa target custom_try_compile_clangxx("" "return 0;" RESULT "-target" "amdgcn--amdhsa" "-emit-llvm" "-S") if(RESULT) message(FATAL_ERROR "LLVM support for amdgcn--amdhsa target is required") endif() set(HSAIL_ENABLED 1) endif() if (NOT DEFINED AMD_HSA) set(AMD_HSA 1) endif() # find the headers & the library if(DEFINED WITH_HSA_RUNTIME_DIR AND WITH_HSA_RUNTIME_DIR) set(HSA_RUNTIME_DIR "${WITH_HSA_RUNTIME_DIR}") else() message(STATUS "WITH_HSA_RUNTIME_DIR not given, trying default path") set(HSA_RUNTIME_DIR "/opt/hsa") endif() if(DEFINED WITH_HSA_RUNTIME_LIB_DIR AND WITH_HSA_RUNTIME_LIB_DIR) set(HSA_LIBDIR "${WITH_HSA_RUNTIME_LIB_DIR}") elseif((IS_ABSOLUTE "${HSA_RUNTIME_DIR}") AND (EXISTS "${HSA_RUNTIME_DIR}")) set(HSA_INCLUDEDIR "${HSA_RUNTIME_DIR}/include") set(HSA_LIBDIR "${HSA_RUNTIME_DIR}/lib") else() message(WARNING "${HSA_RUNTIME_DIR} is not a directory (using default system paths for search)") set(HSA_INCLUDEDIR "") set(HSA_LIBDIR "") endif() if(DEFINED WITH_HSA_RUNTIME_INCLUDE_DIR AND WITH_HSA_RUNTIME_INCLUDE_DIR) set(HSA_INCLUDEDIR "${WITH_HSA_RUNTIME_INCLUDE_DIR}") elseif((IS_ABSOLUTE "${HSA_RUNTIME_DIR}") AND (EXISTS "${HSA_RUNTIME_DIR}")) set(HSA_INCLUDEDIR "${HSA_RUNTIME_DIR}/include") else() message(WARNING "${HSA_RUNTIME_DIR} is not a directory (using default system paths for search)") set(HSA_INCLUDEDIR "") endif() find_path(HSA_INCLUDES "hsa.h" PATHS "${HSA_INCLUDEDIR}" NO_DEFAULT_PATH) find_path(HSA_INCLUDES "hsa.h") if(NOT HSA_INCLUDES) message(FATAL_ERROR "hsa.h header not found (use -DHSA_RUNTIME_DIR=... to specify path to HSA runtime)") endif() find_library(HSALIB NAMES "hsa-runtime64" "hsa-runtime" "phsa-runtime64" PATHS "${HSA_LIBDIR}" NO_DEFAULT_PATH) find_library(HSALIB NAMES "hsa-runtime64" "hsa-runtime" "phsa-runtime64") if(NOT HSALIB) message(FATAL_ERROR "libhsa-runtime not found (use -DWITH_HSA_RUNTIME_DIR=... to specify path to HSA runtime) ${HSA_LIBDIR}") endif() if (HSAIL_ENABLED) if(DEFINED WITH_HSAILASM_PATH) set(HSAILASM_SEARCH_PATH "${WITH_HSAILASM_PATH}") else() set(HSAILASM_SEARCH_PATH "${HSA_RUNTIME_DIR}") endif() if((EXISTS "${HSAILASM_SEARCH_PATH}") AND (NOT IS_DIRECTORY "${HSAILASM_SEARCH_PATH}")) set(HSAIL_ASM "${HSAILASM_SEARCH_PATH}") else() find_program(HSAIL_ASM "HSAILasm${CMAKE_EXECUTABLE_SUFFIX}" PATHS "${HSAILASM_SEARCH_PATH}" "${HSAILASM_SEARCH_PATH}/bin") endif() if(NOT HSAIL_ASM) message(FATAL_ERROR "HSAILasm executable not found (use -DWITH_HSAILASM_PATH=... to specify)") endif() endif() if (HSAIL_ENABLED) message(STATUS "OK, building HSA with HSAIL") else() message(STATUS "OK, building HSA with native code generation") endif() pocl-1.4/cmake/LLVM.cmake000066400000000000000000000726351355011147700151700ustar00rootroot00000000000000 #============================================================================= # CMake build system files for detecting Clang and LLVM # # Copyright (c) 2014-2018 pocl developers # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # #============================================================================= if(DEFINED WITH_LLVM_CONFIG AND WITH_LLVM_CONFIG) # search for preferred version if(IS_ABSOLUTE "${WITH_LLVM_CONFIG}") if(EXISTS "${WITH_LLVM_CONFIG}") set(LLVM_CONFIG "${WITH_LLVM_CONFIG}") endif() else() find_program(LLVM_CONFIG NAMES "${WITH_LLVM_CONFIG}") endif() else() # search for any version find_program(LLVM_CONFIG NAMES "llvm-config-mp-9.0" "llvm-config-9" "llvm-config90" "llvm-config-mp-8.0" "llvm-config-8" "llvm-config80" "llvm-config-mp-7.0" "llvm-config-7" "llvm-config70" "llvm-config-mp-6.0" "llvm-config-6.0" "llvm-config60" "llvm-config" DOC "llvm-config executable") endif() set(WITH_LLVM_CONFIG "${WITH_LLVM_CONFIG}" CACHE PATH "Path to preferred llvm-config") if(NOT LLVM_CONFIG) message(FATAL_ERROR "llvm-config not found !") else() file(TO_CMAKE_PATH "${LLVM_CONFIG}" LLVM_CONFIG) message(STATUS "Using llvm-config: ${LLVM_CONFIG}") if(LLVM_CONFIG MATCHES "llvm-config${CMAKE_EXECUTABLE_SUFFIX}$") set(LLVM_BINARY_SUFFIX "") elseif(LLVM_CONFIG MATCHES "llvm-config(.*)${CMAKE_EXECUTABLE_SUFFIX}$") set(LLVM_BINARY_SUFFIX "${CMAKE_MATCH_1}") else() message(WARNING "Cannot determine llvm binary suffix from ${LLVM_CONFIG}") endif() message(STATUS "LLVM binaries suffix : ${LLVM_BINARY_SUFFIX}") endif() get_filename_component(LLVM_CONFIG_LOCATION "${LLVM_CONFIG}" DIRECTORY) ########################################################################## # A macro to run llvm config macro(run_llvm_config VARIABLE_NAME) execute_process( COMMAND "${LLVM_CONFIG}" ${ARGN} OUTPUT_VARIABLE ${VARIABLE_NAME} RESULT_VARIABLE LLVM_CONFIG_RETVAL OUTPUT_STRIP_TRAILING_WHITESPACE ) if(LLVM_CONFIG_RETVAL) message(SEND_ERROR "Error running llvm-config with arguments: ${ARGN}") else() message(STATUS "llvm-config's ${VARIABLE_NAME} is: ${${VARIABLE_NAME}}") endif() endmacro(run_llvm_config) run_llvm_config(LLVM_PREFIX --prefix) # on windows, llvm-config returs "C:\llvm_prefix/bin" mixed style paths, # and cmake doesn't like the "\" - thinks its an escape char.. file(TO_CMAKE_PATH "${LLVM_PREFIX}" LLVM_PREFIX_CMAKE) set(LLVM_PREFIX_BIN "${LLVM_PREFIX_CMAKE}/bin") run_llvm_config(LLVM_VERSION_FULL --version) # sigh, sanitize version... `llvm --version` on debian might return 3.4.1 but llvm command names are still -3.4 string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\1.\\2" LLVM_VERSION "${LLVM_VERSION_FULL}") message(STATUS "LLVM_VERSION: ${LLVM_VERSION}") run_llvm_config(LLVM_CFLAGS --cflags) string(REPLACE "${LLVM_PREFIX}" "${LLVM_PREFIX_CMAKE}" LLVM_CFLAGS "${LLVM_CFLAGS}") run_llvm_config(LLVM_CXXFLAGS --cxxflags) string(REPLACE "${LLVM_PREFIX}" "${LLVM_PREFIX_CMAKE}" LLVM_CXXFLAGS "${LLVM_CXXFLAGS}") run_llvm_config(LLVM_CPPFLAGS --cppflags) string(REPLACE "${LLVM_PREFIX}" "${LLVM_PREFIX_CMAKE}" LLVM_CPPFLAGS "${LLVM_CPPFLAGS}") run_llvm_config(LLVM_LDFLAGS --ldflags) string(REPLACE "${LLVM_PREFIX}" "${LLVM_PREFIX_CMAKE}" LLVM_LDFLAGS "${LLVM_LDFLAGS}") run_llvm_config(LLVM_BINDIR --bindir) string(REPLACE "${LLVM_PREFIX}" "${LLVM_PREFIX_CMAKE}" LLVM_BINDIR "${LLVM_BINDIR}") run_llvm_config(LLVM_LIBDIR --libdir) string(REPLACE "${LLVM_PREFIX}" "${LLVM_PREFIX_CMAKE}" LLVM_LIBDIR "${LLVM_LIBDIR}") run_llvm_config(LLVM_INCLUDEDIR --includedir) string(REPLACE "${LLVM_PREFIX}" "${LLVM_PREFIX_CMAKE}" LLVM_INCLUDEDIR "${LLVM_INCLUDEDIR}") run_llvm_config(LLVM_LIBS --libs) # Convert LLVM_LIBS from string -> list format to make handling them easier separate_arguments(LLVM_LIBS) # workaround for a bug in current HSAIL LLVM # it forgets to report one HSAIL library in llvm-config if(ENABLE_HSA) list(APPEND LLVM_LIBS "-lLLVMHSAILUtil") endif() run_llvm_config(LLVM_SRC_ROOT --src-root) run_llvm_config(LLVM_OBJ_ROOT --obj-root) string(REPLACE "${LLVM_PREFIX}" "${LLVM_PREFIX_CMAKE}" LLVM_OBJ_ROOT "${LLVM_OBJ_ROOT}") run_llvm_config(LLVM_ALL_TARGETS --targets-built) run_llvm_config(LLVM_HOST_TARGET --host-target) run_llvm_config(LLVM_BUILD_MODE --build-mode) run_llvm_config(LLVM_ASSERTS_BUILD --assertion-mode) run_llvm_config(LLVM_SYSLIBS --system-libs) string(STRIP "${LLVM_SYSLIBS}" LLVM_SYSLIBS) if(MSVC) string(REPLACE "-L${LLVM_LIBDIR}" "" LLVM_LDFLAGS "${LLVM_LDFLAGS}") string(STRIP "${LLVM_LDFLAGS}" LLVM_LDFLAGS) endif() if(LLVM_BUILD_MODE MATCHES "Debug") set(LLVM_BUILD_MODE_DEBUG 1) else() set(LLVM_BUILD_MODE_DEBUG 0) endif() # Ubuntu's llvm reports "arm-unknown-linux-gnueabihf" triple, then if one tries # `clang --target=arm-unknown-linux-gnueabihf ...` it will produce armv6 code, # even if one's running armv7; # Here we replace the "arm" string with whatever's in CMAKE_HOST_SYSTEM_PROCESSOR # which should be "armv6l" on rasp pi, or "armv7l" on my cubieboard, hopefully its # more reasonable and reliable than llvm's own host flags if(NOT CMAKE_CROSSCOMPILING) string(REPLACE "arm-" "${CMAKE_HOST_SYSTEM_PROCESSOR}-" LLVM_HOST_TARGET "${LLVM_HOST_TARGET}") endif() # In windows llvm-config reports --target=x86_64-pc-windows-msvc # however this causes clang to use MicrosoftCXXMangler, which does not # yet support mangling for extended vector types (with llvm 3.5) # so for now hardcode LLVM_HOST_TARGET to be x86_64-pc with windows if(WIN32) set(LLVM_HOST_TARGET "x86_64-pc") endif(WIN32) # required for sources.. if(LLVM_VERSION MATCHES "^6[.]0") set(LLVM_MAJOR 6) set(LLVM_6_0 1) set(LLVM_OLDER_THAN_7_0 1) set(LLVM_OLDER_THAN_8_0 1) set(LLVM_OLDER_THAN_9_0 1) elseif(LLVM_VERSION MATCHES "^7[.]") set(LLVM_MAJOR 7) set(LLVM_7_0 1) set(LLVM_OLDER_THAN_8_0 1) set(LLVM_OLDER_THAN_9_0 1) elseif(LLVM_VERSION MATCHES "^8[.]") set(LLVM_MAJOR 8) set(LLVM_8_0 1) set(LLVM_OLDER_THAN_9_0 1) elseif(LLVM_VERSION MATCHES "^9[.]") set(LLVM_MAJOR 9) set(LLVM_9_0 1) else() message(FATAL_ERROR "LLVM version between 6.0 and 9.0 required, found: ${LLVM_VERSION}") endif() ############################################################# run_llvm_config(LLVM_HAS_RTTI --has-rtti) run_llvm_config(LLVM_LIB_IS_SHARED --shared-mode) if(LLVM_LIB_IS_SHARED MATCHES "shared") set(LLVM_LIB_MODE --link-shared) else() set(LLVM_LIB_MODE --link-static) endif() unset(LLVM_LIBS) run_llvm_config(LLVM_LIBS --libs ${LLVM_LIB_MODE}) # Convert LLVM_LIBS from string -> list format to make handling them easier separate_arguments(LLVM_LIBS) run_llvm_config(LLVM_SYSLIBS --system-libs ${LLVM_LIB_MODE}) string(STRIP "${LLVM_SYSLIBS}" LLVM_SYSLIBS) #################################################################### # A few work-arounds for llvm-config issues # - pocl doesn't compile with '-pedantic' #LLVM_CXX_FLAGS=$($LLVM_CONFIG --cxxflags | sed -e 's/ -pedantic / /g') string(REPLACE " -pedantic" "" LLVM_CXXFLAGS "${LLVM_CXXFLAGS}") #llvm-config clutters CXXFLAGS with a lot of -W flags. #(They are not needed - we want to use -Wall anyways) #This is a problem if LLVM was built with a different compiler than we use here, #and our compiler chokes on unrecognized command-line options. string(REGEX REPLACE "-W[^ ]*" "" LLVM_CXXFLAGS "${LLVM_CXXFLAGS}") # Llvm-config does not include clang libs set(CLANG_LIBNAMES clangCodeGen clangFrontendTool clangFrontend clangDriver clangSerialization clangParse clangSema clangRewrite clangRewriteFrontend clangStaticAnalyzerFrontend clangStaticAnalyzerCheckers clangStaticAnalyzerCore clangAnalysis clangEdit clangAST clangASTMatchers clangLex clangBasic) foreach(LIBNAME ${CLANG_LIBNAMES}) find_library(C_LIBFILE_${LIBNAME} NAMES "${LIBNAME}" HINTS "${LLVM_LIBDIR}") list(APPEND CLANG_LIBFILES "${C_LIBFILE_${LIBNAME}}") if(UNIX AND (NOT APPLE)) set(LLVM_LDFLAGS "${LLVM_LDFLAGS} -Wl,--exclude-libs,lib${LIBNAME}") endif() endforeach() # With Visual Studio llvm-config gives invalid list of static libs (libXXXX.a instead of XXXX.lib) # we extract the pure names (LLVMLTO, LLVMMipsDesc etc) and let find_library do its job foreach(LIBFLAG ${LLVM_LIBS}) STRING(REGEX REPLACE "^-l(.*)$" "\\1" LIB_NAME ${LIBFLAG}) list(APPEND LLVM_LIBNAMES "${LIB_NAME}") endforeach() foreach(LIBNAME ${LLVM_LIBNAMES}) find_library(L_LIBFILE_${LIBNAME} NAMES "${LIBNAME}" HINTS "${LLVM_LIBDIR}") list(APPEND LLVM_LIBFILES "${L_LIBFILE_${LIBNAME}}") endforeach() #################################################################### macro(find_program_or_die OUTPUT_VAR PROG_NAME DOCSTRING) find_program(${OUTPUT_VAR} NAMES "${PROG_NAME}${LLVM_BINARY_SUFFIX}${CMAKE_EXECUTABLE_SUFFIX}" "${PROG_NAME}${CMAKE_EXECUTABLE_SUFFIX}" HINTS "${LLVM_BINDIR}" "${LLVM_CONFIG_LOCATION}" "${LLVM_PREFIX}" "${LLVM_PREFIX_BIN}" DOC "${DOCSTRING}") if(${OUTPUT_VAR}) message(STATUS "Found ${PROG_NAME}: ${${OUTPUT_VAR}}") else() message(FATAL_ERROR "${PROG_NAME} executable not found!") endif() endmacro() find_program_or_die( CLANG "clang" "clang binary") execute_process(COMMAND "${CLANG}" "--version" OUTPUT_VARIABLE LLVM_CLANG_VERSION RESULT_VARIABLE CLANG_RES) # TODO this should be optional find_program_or_die( CLANGXX "clang++" "clang++ binary") execute_process(COMMAND "${CLANGXX}" "--version" OUTPUT_VARIABLE LLVM_CLANGXX_VERSION RESULT_VARIABLE CLANGXX_RES) if(CLANGXX_RES OR CLANG_RES) message(FATAL_ERROR "Failed running clang/clang++ --version") endif() find_program_or_die(LLVM_OPT "opt" "LLVM optimizer") find_program_or_die(LLVM_LLC "llc" "LLVM static compiler") find_program_or_die(LLVM_AS "llvm-as" "LLVM assembler") find_program_or_die(LLVM_LINK "llvm-link" "LLVM IR linker") find_program_or_die(LLVM_LLI "lli" "LLVM interpreter") if(NOT DEFINED LLVM_SPIRV) find_program(LLVM_SPIRV NAMES "llvm-spirv${LLVM_BINARY_SUFFIX}${CMAKE_EXECUTABLE_SUFFIX}" "llvm-spirv${CMAKE_EXECUTABLE_SUFFIX}" HINTS "${LLVM_BINDIR}" "${LLVM_CONFIG_LOCATION}" "${LLVM_PREFIX}" "${LLVM_PREFIX_BIN}") if(LLVM_SPIRV) message(STATUS "Found llvm-spirv: ${LLVM_SPIRV}") endif() endif() #################################################################### # try compile with any compiler (supplied as argument) macro(custom_try_compile_any SILENT COMPILER SUFFIX SOURCE RES_VAR) string(RANDOM RNDNAME) set(RANDOM_FILENAME "${CMAKE_BINARY_DIR}/compile_test_${RNDNAME}.${SUFFIX}") file(WRITE "${RANDOM_FILENAME}" "${SOURCE}") math(EXPR LSIZE "${ARGC} - 4") execute_process(COMMAND "${COMPILER}" ${ARGN} "${RANDOM_FILENAME}" RESULT_VARIABLE ${RES_VAR} OUTPUT_VARIABLE OV ERROR_VARIABLE EV) if(${${RES_VAR}} AND (NOT ${SILENT})) message(STATUS " ########## The command: ") string(REPLACE ";" " " ARGN_STR "${ARGN}") message(STATUS "${COMPILER} ${ARGN_STR} ${RANDOM_FILENAME}") message(STATUS " ########## Exited with nonzero status: ${${RES_VAR}}") if(OV) message(STATUS "STDOUT: ${OV}") endif() if(EV) message(STATUS "STDERR: ${EV}") endif() endif() file(REMOVE "${RANDOM_FILENAME}") endmacro() # convenience c/c++ source wrapper macro(custom_try_compile_c_cxx COMPILER SUFFIX SOURCE1 SOURCE2 RES_VAR) set(SOURCE_PROG " ${SOURCE1} int main(int argc, char** argv) { ${SOURCE2} }") custom_try_compile_any(FALSE "${COMPILER}" ${SUFFIX} "${SOURCE_PROG}" ${RES_VAR} ${ARGN}) endmacro() # convenience c/c++ source wrapper macro(custom_try_compile_c_cxx_silent COMPILER SUFFIX SOURCE1 SOURCE2 RES_VAR) set(SOURCE_PROG " ${SOURCE1} int main(int argc, char** argv) { ${SOURCE2} }") custom_try_compile_any(TRUE "${COMPILER}" ${SUFFIX} "${SOURCE_PROG}" ${RES_VAR} ${ARGN}) endmacro() # clang++ try-compile macro macro(custom_try_compile_clangxx SOURCE1 SOURCE2 RES_VAR) custom_try_compile_c_cxx("${CLANGXX}" "cc" "${SOURCE1}" "${SOURCE2}" ${RES_VAR} "-c" ${ARGN}) endmacro() # clang++ try-compile macro macro(custom_try_compile_clang SOURCE1 SOURCE2 RES_VAR) custom_try_compile_c_cxx("${CLANG}" "c" "${SOURCE1}" "${SOURCE2}" ${RES_VAR} "-c" ${ARGN}) endmacro() # clang++ try-compile macro macro(custom_try_compile_clang_silent SOURCE1 SOURCE2 RES_VAR) custom_try_compile_c_cxx_silent("${CLANG}" "c" "${SOURCE1}" "${SOURCE2}" ${RES_VAR} "-c" ${ARGN}) endmacro() # clang++ try-link macro macro(custom_try_link_clang SOURCE1 SOURCE2 RES_VAR) set(RANDOM_FILENAME "${CMAKE_BINARY_DIR}/compile_test_${RNDNAME}.${SUFFIX}") custom_try_compile_c_cxx_silent("${CLANG}" "c" "${SOURCE1}" "${SOURCE2}" ${RES_VAR} "-o" "${RANDOM_FILENAME}" ${ARGN}) file(REMOVE "${RANDOM_FILENAME}") endmacro() # clang try-compile-run macro, running via native executable macro(custom_try_run_exe SOURCE1 SOURCE2 OUTPUT_VAR RES_VAR) set(OUTF "${CMAKE_BINARY_DIR}/try_run${CMAKE_EXECUTABLE_SUFFIX}") if(EXISTS "${OUTF}") file(REMOVE "${OUTF}") endif() custom_try_compile_c_cxx("${CLANG}" "c" "${SOURCE1}" "${SOURCE2}" RESV "-o" "${OUTF}" "-x" "c") set(${OUTPUT_VAR} "") set(${RES_VAR} "") if(RESV OR (NOT EXISTS "${OUTF}")) message(STATUS " ########## Compilation failed") else() execute_process(COMMAND "${OUTF}" RESULT_VARIABLE RESV OUTPUT_VARIABLE ${OUTPUT_VAR} ERROR_VARIABLE EV) set(${RES_VAR} ${RESV}) file(REMOVE "${OUTF}") if(${RESV}) message(STATUS " ########## Running ${OUTF}") message(STATUS " ########## Exited with nonzero status: ${RESV}") if(${${OUTPUT_VAR}}) message(STATUS " ########## STDOUT: ${${OUTPUT_VAR}}") endif() if(EV) message(STATUS " ########## STDERR: ${EV}") endif() endif() endif() endmacro() # clang try-compile-run macro, run via lli, the llvm interpreter macro(custom_try_run_lli SILENT SOURCE1 SOURCE2 OUTPUT_VAR RES_VAR) # this uses "lli" - the interpreter, so we can run any -target # TODO variable for target !! set(OUTF "${CMAKE_BINARY_DIR}/try_run.bc") if(EXISTS "${OUTF}") file(REMOVE "${OUTF}") endif() custom_try_compile_c_cxx("${CLANG}" "c" "${SOURCE1}" "${SOURCE2}" RESV "-o" "${OUTF}" "-x" "c" "-emit-llvm" "-c" ${ARGN}) set(${OUTPUT_VAR} "") set(${RES_VAR} "") if(RESV OR (NOT EXISTS "${OUTF}")) message(STATUS " ########## Compilation failed") else() execute_process(COMMAND "${LLVM_LLI}" "-force-interpreter" "${OUTF}" RESULT_VARIABLE RESV OUTPUT_VARIABLE ${OUTPUT_VAR} ERROR_VARIABLE EV) set(${RES_VAR} ${RESV}) file(REMOVE "${OUTF}") if(${RESV} AND (NOT ${SILENT})) message(STATUS " ########## The command ${LLVM_LLI} -force-interpreter ${OUTF}") message(STATUS " ########## Exited with nonzero status: ${RESV}") if(${${OUTPUT_VAR}}) message(STATUS " ########## STDOUT: ${${OUTPUT_VAR}}") endif() if(EV) message(STATUS " ########## STDERR: ${EV}") endif() endif() endif() endmacro() #################################################################### #################################################################### # The option for specifying the target changed; try the modern syntax # first, and fall back to the old-style syntax if this failed if(NOT DEFINED CLANG_TARGET_OPTION AND ENABLE_HOST_CPU_DEVICES) custom_try_compile_clangxx("" "return 0;" RES "--target=${LLVM_HOST_TARGET}") if(NOT RES) set(CLANG_TGT "--target=") else() #EXECUTE_PROCESS(COMMAND "${CLANG}" "-target ${LLVM_HOST_TARGET}" "-x" "c" "/dev/null" "-S" RESULT_VARIABLE RES) custom_try_compile_clangxx("" "return 0;" RES "-target ${LLVM_HOST_TARGET}") if(NOT RES) set(CLANG_TGT "-target ") else() message(FATAL_ERROR "Cannot determine Clang option to specify the target") endif() endif() set(CLANG_TARGET_OPTION ${CLANG_TGT} CACHE INTERNAL "Clang option used to specify the target" ) endif() #################################################################### #################################################################### if(NOT DEFINED CLANG_NEEDS_RTLIB) set(RT128 OFF) set(RT64 OFF) set(NEEDS_RTLIB_FLAG OFF) # on 32bit systems, we need 64bit emulation if(CMAKE_SIZEOF_VOID_P EQUAL 4) set(INC "#include \n#include ") set(SRC "int64_t a = argc; int64_t b = argc-1; int64_t c = a / b; return (int)c; ") custom_try_link_clang("${INC}" "${SRC}" RES) if(NOT RES) message(STATUS "64bit division compiles without extra flags") set(RT64 ON) else() custom_try_link_clang("${INC}" "${SRC}" RES "--rtlib=compiler-rt") if(NOT RES) message(STATUS "64bit division compiles WITH --rtlib=compiler-rt") set(NEEDS_RTLIB_FLAG ON) set(RT64 ON) else() message(WARNING "64bit division doesn't compile at all!") endif() endif() else() set(RT64 ON) # on 64bit systems, we need 128bit integers for Errol set(INC "extern __uint128_t __udivmodti4(__uint128_t a, __uint128_t b, __uint128_t* rem);") set(SRC "__uint128_t low, mid, tmp1, pow19 = (__uint128_t)1000000000; mid = __udivmodti4(low, pow19, &tmp1); return 0;") custom_try_link_clang("${INC}" "${SRC}" RES) if(NOT RES) message(STATUS "udivmodti4 compiles without extra flags") set(RT128 ON) else() custom_try_link_clang("${INC}" "${SRC}" RES "--rtlib=compiler-rt") if(NOT RES) message(STATUS "udivmodti4 compiles WITH --rtlib=compiler-rt") set(NEEDS_RTLIB_FLAG ON) set(RT128 ON) else() message(WARNING "udivmodti4 doesn't compile at all!") endif() endif() endif() set(CLANG_HAS_64B_MATH ${RT64} CACHE INTERNAL "Clang's available with 64bit math") set(CLANG_HAS_128B_MATH ${RT128} CACHE INTERNAL "Clang's available with 128bit math") set(CLANG_NEEDS_RTLIB ${NEEDS_RTLIB_FLAG} CACHE INTERNAL "Clang needs extra --rtlib flag for compiler-rt math") endif() #################################################################### macro(CHECK_ALIGNOF TYPE TYPEDEF OUT_VAR) if(NOT DEFINED "${OUT_VAR}") custom_try_run_lli(TRUE " #ifndef offsetof #define offsetof(type, member) ((char *) &((type *) 0)->member - (char *) 0) #endif ${TYPEDEF}" "typedef struct { char x; ${TYPE} y; } ac__type_alignof_; int r = offsetof(ac__type_alignof_, y); return r;" SIZEOF_STDOUT RESULT "${CLANG_TARGET_OPTION}${LLC_TRIPLE}") #message(FATAL_ERROR "SIZEOF: ${SIZEOF_STDOUT} RES: ${RESULT}") if(NOT ${RESULT}) message(SEND_ERROR "Could not determine align of(${TYPE})") endif() set(${OUT_VAR} "${RESULT}" CACHE INTERNAL "Align of ${TYPE}") endif() endmacro() #################################################################### # # clangxx works check # # TODO clang + vecmathlib doesn't work on Windows yet... if(CLANGXX AND (NOT WIN32) AND ENABLE_HOST_CPU_DEVICES) message(STATUS "Checking if clang++ works (required by vecmathlib)") set(CXX_WORKS 0) set(CXX_STDLIB "") if(NOT DEFINED CLANGXX_WORKS) custom_try_compile_clangxx("namespace std { class type_info; } \n #include \n #include " "std::cout << \"Hello clang++ world!\" << std::endl;" _STATUS_FAIL "-std=c++11") if(NOT _STATUS_FAIL) set(CXX_WORKS 1) else() custom_try_compile_clangxx("namespace std { class type_info; } \n #include \n #include " "std::cout << \"Hello clang++ world!\" << std::endl;" _STATUS_FAIL "-stdlib=libstdc++" "-std=c++11") if (NOT _STATUS_FAIL) set(CXX_STDLIB "-stdlib=libstdc++") set(CXX_WORKS 1) else() custom_try_compile_clangxx("namespace std { class type_info; } \n #include \n #include " "std::cout << \"Hello clang++ world!\" << std::endl;" _STATUS_FAIL "-stdlib=libc++" "-std=c++11") if(NOT _STATUS_FAIL) set(CXX_STDLIB "-stdlib=libc++") set(CXX_WORKS 1) endif() endif() endif() set(CLANGXX_WORKS ${CXX_WORKS} CACHE INTERNAL "Clang++ ") set(CLANGXX_STDLIB ${CXX_STDLIB} CACHE INTERNAL "Clang++ stdlib") endif() endif() if(CLANGXX_STDLIB AND (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) set(LLVM_CXXFLAGS "${CLANGXX_STDLIB} ${LLVM_CXXFLAGS}") set(LLVM_LDFLAGS "${CLANGXX_STDLIB} ${LLVM_LDFLAGS}") endif() #################################################################### # # - '-DNDEBUG' is a work-around for llvm bug 18253 # # llvm-config does not always report the "-DNDEBUG" flag correctly # (see LLVM bug 18253). If LLVM and the pocl passes are built with # different NDEBUG settings, problems arise if(NOT DEFINED LLVM_NDEBUG_BUILD) message(STATUS "Checking if LLVM is a DEBUG build") separate_arguments(_FLAGS UNIX_COMMAND "${LLVM_CXXFLAGS}") set(_TEST_SOURCE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/llvmNDEBUG.cc") file(WRITE "${_TEST_SOURCE}" " #include int main(int argc, char** argv) { llvm::DebugFlag=true; } ") set(TRY_COMPILE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${LLVM_CXXFLAGS} -UNDEBUG") try_compile(_TRY_SUCCESS ${CMAKE_BINARY_DIR} "${_TEST_SOURCE}" CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${LLVM_INCLUDE_DIRS}" CMAKE_FLAGS "-DLINK_DIRECTORIES:STRING=${LLVM_LIBDIR}" LINK_LIBRARIES "${LLVM_LIBS} ${LLVM_SYSLIBS} ${LLVM_LDFLAGS}" COMPILE_DEFINITIONS ${TRY_COMPILE_CXX_FLAGS} OUTPUT_VARIABLE _TRY_COMPILE_OUTPUT ) file(APPEND "${CMAKE_BINARY_DIR}/CMakeFiles/CMakeOutput.log" "Test -NDEBUG flag: ${_TRY_COMPILE_OUTPUT}\n") if(_TRY_SUCCESS) message(STATUS "DEBUG build") set(LLVM_NDEBUG_BUILD 0 CACHE INTERNAL "DNDEBUG") else() message(STATUS "Not a DEBUG build") set(LLVM_NDEBUG_BUILD 1 CACHE INTERNAL "DNDEBUG") endif() endif() if((NOT LLVM_CXXFLAGS MATCHES "-DNDEBUG") AND LLVM_NDEBUG_BUILD) message(STATUS "adding -DNDEBUG explicitly") set(LLVM_CXXFLAGS "${LLVM_CXXFLAGS} -DNDEBUG") endif() #################################################################### # TODO: We need to set both target-triple and cpu-type when # building, since the ABI depends on both. We can either add flags # to all the scripts, or set the respective flags here in # *_CLANG_FLAGS and *_LLC_FLAGS. Note that clang and llc use # different option names to set these. Note that clang calls the # triple "target" and the cpu "architecture", which is different # from llc. # Normalise the triple. Otherwise, clang normalises it when # passing it to llc, which is then different from the triple we # pass to llc. This would lead to inconsistent bytecode files, # depending on whether they are generated via clang or directly # via llc. if(ENABLE_HOST_CPU_DEVICES AND NOT DEFINED LLC_TRIPLE) message(STATUS "Find out LLC target triple (for host ${LLVM_HOST_TARGET})") set(_EMPTY_C_FILE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/tripletfind.c") file(WRITE "${_EMPTY_C_FILE}" "") execute_process(COMMAND ${CLANG} "${CLANG_TARGET_OPTION}${LLVM_HOST_TARGET}" -x c ${_EMPTY_C_FILE} -S -emit-llvm -o - RESULT_VARIABLE RES_VAR OUTPUT_VARIABLE OUTPUT_VAR) if(RES_VAR) message(FATAL_ERROR "Error ${RES_VAR} while determining target triple") endif() if(OUTPUT_VAR MATCHES "target triple = \"([^\"]+)") string(STRIP "${CMAKE_MATCH_1}" LLC_TRIPLE) else() message(FATAL_ERROR "Could not find target triple in llvm output") endif() # TODO the armv7hl normalize string(REPLACE "armv7l-" "armv7-" LLC_TRIPLE "${LLC_TRIPLE}") set(LLC_TRIPLE "${LLC_TRIPLE}" CACHE INTERNAL "LLC_TRIPLE") endif() # FIXME: The cpu name printed by llc --version is the same cpu that will be # targeted if you pass -mcpu=native to llc, so we could replace this auto-detection # with just: set(LLC_HOST_CPU "native"), however, we can't do this at the moment # because of the work-around for arm1176jz-s. if(ENABLE_HOST_CPU_DEVICES AND NOT DEFINED LLC_HOST_CPU_AUTO) message(STATUS "Find out LLC host CPU with ${LLVM_LLC}") execute_process(COMMAND ${LLVM_LLC} "--version" RESULT_VARIABLE RES_VAR OUTPUT_VARIABLE OUTPUT_VAR) # WTF, ^^ has return value 1 #if(RES_VAR) # message(FATAL_ERROR "Error ${RES_VAR} while determining LLC host CPU") #endif() if(OUTPUT_VAR MATCHES "Host CPU: ([^ ]*)") # sigh... STRING(STRIP is to workaround regexp bug in cmake string(STRIP "${CMAKE_MATCH_1}" LLC_HOST_CPU_AUTO) else() message(FATAL_ERROR "Couldnt determine host CPU from llc output") endif() #TODO better if(CMAKE_LIBRARY_ARCHITECTURE MATCHES "gnueabihf" AND LLC_HOST_CPU_AUTO MATCHES "arm1176jz-s") set(LLC_HOST_CPU_AUTO "arm1176jzf-s") endif() endif() if((LLC_HOST_CPU_AUTO MATCHES "unknown") AND (NOT LLC_HOST_CPU)) message(FATAL_ERROR "LLVM could not recognize your CPU model automatically. Please run CMake with -DLLC_HOST_CPU= (you can find valid names with: llc -mcpu=help)") else() set(LLC_HOST_CPU_AUTO "${LLC_HOST_CPU_AUTO}" CACHE INTERNAL "Autodetected CPU") endif() if((DEFINED LLC_HOST_CPU) AND (NOT LLC_HOST_CPU STREQUAL LLC_HOST_CPU_AUTO)) message(STATUS "Autodetected CPU ${LLC_HOST_CPU_AUTO} overriden by user to ${LLC_HOST_CPU}") set(HOST_CPU_FORCED 1 CACHE INTERNAL "CPU is forced by user") else() set(LLC_HOST_CPU "${LLC_HOST_CPU_AUTO}" CACHE STRING "The Host CPU to use with llc") set(HOST_CPU_FORCED 0 CACHE INTERNAL "CPU is forced by user") endif() #################################################################### # Some architectures have -march and -mcpu reversed if(NOT DEFINED ${CLANG_MARCH_FLAG}) message(STATUS "Checking clang -march vs. -mcpu flag") custom_try_compile_clang_silent("" "return 0;" RES ${CLANG_TARGET_OPTION}${LLC_TRIPLE} -march=${LLC_HOST_CPU}) if(NOT RES) set(CLANG_MARCH_FLAG "-march=") else() custom_try_compile_clang_silent("" "return 0;" RES ${CLANG_TARGET_OPTION}${LLC_TRIPLE} -mcpu=${LLC_HOST_CPU}) if(NOT RES) set(CLANG_MARCH_FLAG "-mcpu=") else() message(FATAL_ERROR "Could not determine whether to use -march or -mcpu with clang") endif() endif() set(CLANG_MARCH_FLAG ${CLANG_MARCH_FLAG} CACHE INTERNAL "Clang option used to specify the target cpu") endif() #################################################################### # This tests that we can actually link to the llvm libraries. # Mostly to catch issues like #295 - cannot find -ledit if(NOT DEFINED LLVM_LINK_TEST) set(LLVM_LINK_TEST_SOURCE " #include #include \"llvm/IR/LLVMContext.h\" #include \"llvm/Support/SourceMgr.h\" #include \"llvm/IR/Module.h\" #include \"llvm/IRReader/IRReader.h\" int main( int argc, char* argv[] ) { if( argc < 2 ) exit(2); llvm::LLVMContext context; llvm::SMDiagnostic err; std::unique_ptr module = llvm::parseIRFile( argv[1], err, context ); if( !module ) exit(1); else printf(\"DataLayout = %s\\n\", module->getDataLayoutStr().c_str()); return 0; }") string(RANDOM RNDNAME) set(LLVM_LINK_TEST_FILENAME "${CMAKE_BINARY_DIR}/llvm_link_test_${RNDNAME}.cc") file(WRITE "${LLVM_LINK_TEST_FILENAME}" "${LLVM_LINK_TEST_SOURCE}") try_compile(LLVM_LINK_TEST ${CMAKE_BINARY_DIR} "${LLVM_LINK_TEST_FILENAME}" CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${LLVM_INCLUDE_DIRS}" CMAKE_FLAGS "-DLINK_DIRECTORIES:STRING=${LLVM_LIBDIR}" LINK_LIBRARIES "${LLVM_LDFLAGS} ${LLVM_LIBS} ${LLVM_SYSLIBS}" COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${LLVM_CXXFLAGS}" OUTPUT_VARIABLE _TRY_COMPILE_OUTPUT) if (LLVM_LINK_TEST) message(STATUS "LLVM link test OK") set(LLVM_LINK_TEST 1 CACHE INTERNAL "LLVM link test result") else() message(STATUS "LLVM link test output: ${_TRY_COMPILE_OUTPUT}") message(FATAL_ERROR "LLVM link test FAILED. This mostly happens when your LLVM installation does not have all dependencies installed.") endif() endif() #################################################################### if(ENABLE_HOST_CPU_DEVICES AND NOT DEFINED ${CL_DISABLE_HALF}) set(CL_DISABLE_HALF 0) message(STATUS "Checking fp16 support") custom_try_compile_clang_silent("__fp16 callfp16(__fp16 a) { return a * (__fp16)1.8; };" "__fp16 x=callfp16((__fp16)argc);" RESV ${CLANG_TARGET_OPTION}${LLC_TRIPLE} ${CLANG_MARCH_FLAG}${LLC_HOST_CPU}) if(RESV) set(CL_DISABLE_HALF 1) endif() endif() set(CL_DISABLE_HALF "${CL_DISABLE_HALF}" CACHE INTERNAL "Disable cl_khr_fp16 because fp16 is not supported") message(STATUS "FP16 is disabled: ${CL_DISABLE_HALF}") ##################################################################### execute_process(COMMAND "${CLANG}" "--print-resource-dir" OUTPUT_VARIABLE RESOURCE_DIR) string(STRIP "${RESOURCE_DIR}" RESOURCE_DIR) set(CLANG_RESOURCE_DIR "${RESOURCE_DIR}" CACHE INTERNAL "Clang resource dir") set(CLANG_OPENCL_HEADERS "${CLANG_RESOURCE_DIR}/include/opencl-c.h") if(NOT LLVM_OLDER_THAN_9_0) list(APPEND CLANG_OPENCL_HEADERS "${CLANG_RESOURCE_DIR}/include/opencl-c-base.h") endif() pocl-1.4/cmake/Sphinx.cmake000066400000000000000000000031231355011147700156510ustar00rootroot00000000000000#============================================================================= # CMake build system files # # Copyright (c) 2014 pocl developers # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # #============================================================================= find_program(SPHINX_EXECUTABLE NAMES sphinx-build HINTS $ENV{SPHINX_DIR} PATH_SUFFIXES bin DOC "Sphinx documentation generator" ) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(Sphinx DEFAULT_MSG SPHINX_EXECUTABLE ) mark_as_advanced(SPHINX_EXECUTABLE) pocl-1.4/cmake/add_test_pocl.cmake000066400000000000000000000060201355011147700172030ustar00rootroot00000000000000#============================================================================= # CMake build system files - add_test_pocl() test wrapper # # Copyright (c) 2014-2017 pocl developers # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # #============================================================================= include(CMakeParseArguments) # This is a wrapper around add_test # Solves several problems: # 1) allows expected outputs (optionally sorted) # 2) handles the exit status problem (test properties WILL_FAIL does not work if # the test exits with !0 exit status) function(add_test_pocl) set(options SORT_OUTPUT) set(oneValueArgs EXPECTED_OUTPUT NAME WORKING_DIRECTORY) set(multiValueArgs COMMAND) cmake_parse_arguments(POCL_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) unset(RUN_CMD) foreach(LOOPVAR ${POCL_TEST_COMMAND}) if(NOT RUN_CMD) set(RUN_CMD "${CMAKE_CURRENT_BINARY_DIR}/${LOOPVAR}") else() set(RUN_CMD "${RUN_CMD}####${LOOPVAR}") endif() endforeach() set(POCL_TEST_ARGLIST "NAME" "${POCL_TEST_NAME}") if(POCL_TEST_WORKING_DIRECTORY) list(APPEND POCL_TEST_ARGLIST "WORKING_DIRECTORY") list(APPEND POCL_TEST_ARGLIST "${POCL_TEST_WORKING_DIRECTORY}") endif() list(APPEND POCL_TEST_ARGLIST "COMMAND" "${CMAKE_COMMAND}" "-Dtest_cmd=${RUN_CMD}") if(INTEL_SDE_AVX512) list(APPEND POCL_TEST_ARGLIST "-DSDE=${INTEL_SDE_AVX512}") endif() if(POCL_TEST_EXPECTED_OUTPUT) list(APPEND POCL_TEST_ARGLIST "-Doutput_blessed=${CMAKE_CURRENT_SOURCE_DIR}/${POCL_TEST_EXPECTED_OUTPUT}") endif() if(POCL_TEST_SORT_OUTPUT) list(APPEND POCL_TEST_ARGLIST "-Dsort_output=1") endif() list(APPEND POCL_TEST_ARGLIST "-P" "${CMAKE_SOURCE_DIR}/cmake/run_test.cmake") add_test(${POCL_TEST_ARGLIST} ) if(NOT ENABLE_ANYSAN) set_tests_properties("${POCL_TEST_NAME}" PROPERTIES PASS_REGULAR_EXPRESSION "OK" FAIL_REGULAR_EXPRESSION "FAIL") endif() endfunction() pocl-1.4/cmake/bitcode_rules.cmake000066400000000000000000000210261355011147700172250ustar00rootroot00000000000000#============================================================================= # CMake build system files # # Copyright (c) 2014 pocl developers # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # #============================================================================= # cmake version of lib/kernel/rules.mk separate_arguments(KERNEL_C_FLAGS) separate_arguments(KERNEL_CL_FLAGS) separate_arguments(KERNEL_CXX_FLAGS) function(compile_c_to_bc FILENAME SUBDIR BC_FILE_LIST) get_filename_component(FNAME "${FILENAME}" NAME) set(BC_FILE "${CMAKE_CURRENT_BINARY_DIR}/${SUBDIR}/${FNAME}.bc") set(${BC_FILE_LIST} ${${BC_FILE_LIST}} ${BC_FILE} PARENT_SCOPE) set(FULL_F_PATH "${CMAKE_SOURCE_DIR}/lib/kernel/${FILENAME}") add_custom_command( OUTPUT "${BC_FILE}" DEPENDS "${FULL_F_PATH}" "${CMAKE_SOURCE_DIR}/include/pocl_types.h" "${CMAKE_SOURCE_DIR}/include/_kernel_c.h" COMMAND "${CLANG}" ${CLANG_FLAGS} ${DEVICE_CL_FLAGS} "-O1" ${KERNEL_C_FLAGS} "-o" "${BC_FILE}" "-c" "${FULL_F_PATH}" "-I${CMAKE_SOURCE_DIR}/include" "-include" "${CMAKE_SOURCE_DIR}/include/_kernel_c.h" COMMENT "Building C to LLVM bitcode ${BC_FILE}" VERBATIM) endfunction() function(compile_cc_to_bc FILENAME SUBDIR BC_FILE_LIST) get_filename_component(FNAME "${FILENAME}" NAME) set(BC_FILE "${CMAKE_CURRENT_BINARY_DIR}/${SUBDIR}/${FNAME}.bc") set(${BC_FILE_LIST} ${${BC_FILE_LIST}} ${BC_FILE} PARENT_SCOPE) set(FULL_F_PATH "${CMAKE_SOURCE_DIR}/lib/kernel/${FILENAME}") add_custom_command(OUTPUT "${BC_FILE}" DEPENDS "${FULL_F_PATH}" COMMAND "${CLANGXX}" ${CLANG_FLAGS} ${KERNEL_CXX_FLAGS} ${DEVICE_C_FLAGS} "-o" "${BC_FILE}" "-c" "${FULL_F_PATH}" "-O1" COMMENT "Building C++ to LLVM bitcode ${BC_FILE}" VERBATIM) endfunction() function(compile_cl_to_bc FILENAME SUBDIR BC_FILE_LIST EXTRA_CONFIG) get_filename_component(FNAME "${FILENAME}" NAME) get_filename_component(FNAME_WE "${FILENAME}" NAME_WE) set(BC_FILE "${CMAKE_CURRENT_BINARY_DIR}/${SUBDIR}/${FNAME}.bc") set(${BC_FILE_LIST} ${${BC_FILE_LIST}} ${BC_FILE} PARENT_SCOPE) set(FULL_F_PATH "${CMAKE_SOURCE_DIR}/lib/kernel/${FILENAME}") set(DEPENDLIST "${CMAKE_SOURCE_DIR}/include/_kernel.h" "${CMAKE_SOURCE_DIR}/include/_kernel_c.h" "${CMAKE_SOURCE_DIR}/include/pocl_types.h") set(INCLUDELIST "-include" "${CMAKE_SOURCE_DIR}/include/_kernel.h" "-include" "${CMAKE_SOURCE_DIR}/include/_enable_all_exts.h") if(FILENAME MATCHES "sleef") list(APPEND DEPENDLIST "${EXTRA_CONFIG}" ) list(APPEND DEPENDLIST ${SLEEF_CL_KERNEL_DEPEND_HEADERS}) list(APPEND INCLUDELIST "-DMAX_PRECISION" "-I" "${CMAKE_SOURCE_DIR}/lib/kernel/sleef/include" # for sleef_cl.h "-include" "${EXTRA_CONFIG}") endif() if(FILENAME MATCHES "libclc") list(APPEND DEPENDLIST ${LIBCLC_KERNEL_DEPEND_HEADERS}) set(I32 "${CMAKE_SOURCE_DIR}/lib/kernel/libclc/${FNAME_WE}_fp32.cl") if(EXISTS "${I32}") list(APPEND DEPENDLIST "${I32}") endif() set(I64 "${CMAKE_SOURCE_DIR}/lib/kernel/libclc/${FNAME_WE}_fp64.cl") if(EXISTS "${I64}") list(APPEND DEPENDLIST "${I64}") endif() list(APPEND INCLUDELIST "-I" "${CMAKE_SOURCE_DIR}/lib/kernel/libclc") endif() add_custom_command( OUTPUT "${BC_FILE}" DEPENDS "${FULL_F_PATH}" ${DEPENDLIST} COMMAND "${CLANG}" ${CLANG_FLAGS} ${KERNEL_CL_FLAGS} ${DEVICE_CL_FLAGS} "-o" "${BC_FILE}" "-c" "${FULL_F_PATH}" ${INCLUDELIST} COMMENT "Building CL to LLVM bitcode ${BC_FILE}" VERBATIM) endfunction() # ARGN - extra defines / arguments to clang # can't use c_to_bc, since SLEEF's C files need to be prefixed with EXT # (because the same files are compiled multiple times) function(compile_sleef_c_to_bc EXT FILENAME SUBDIR BCLIST) get_filename_component(FNAME "${FILENAME}" NAME) set(BC_FILE "${CMAKE_CURRENT_BINARY_DIR}/${SUBDIR}/${EXT}_${FNAME}.bc") list(APPEND ${BCLIST} "${BC_FILE}") set(${BCLIST} ${${BCLIST}} PARENT_SCOPE) set(FULL_F_PATH "${CMAKE_SOURCE_DIR}/lib/kernel/${FILENAME}") add_custom_command( OUTPUT "${BC_FILE}" DEPENDS "${FULL_F_PATH}" ${SLEEF_C_KERNEL_DEPEND_HEADERS} COMMAND "${CLANG}" ${CLANG_FLAGS} ${DEVICE_C_FLAGS} ${KERNEL_C_FLAGS} ${ARGN} "-I" "${CMAKE_SOURCE_DIR}/lib/kernel/sleef/arch" "-I" "${CMAKE_SOURCE_DIR}/lib/kernel/sleef/libm" "-I" "${CMAKE_SOURCE_DIR}/lib/kernel/sleef/include" "-O1" "-o" "${BC_FILE}" "-c" "${FULL_F_PATH}" COMMENT "Building SLEEF to LLVM bitcode ${BC_FILE}" VERBATIM) endfunction() function(compile_ll_to_bc FILENAME SUBDIR BC_FILE_LIST) get_filename_component(FNAME "${FILENAME}" NAME) set(BC_FILE "${CMAKE_CURRENT_BINARY_DIR}/${SUBDIR}/${FNAME}.bc") set(${BC_FILE_LIST} ${${BC_FILE_LIST}} ${BC_FILE} PARENT_SCOPE) set(FULL_F_PATH "${CMAKE_SOURCE_DIR}/lib/kernel/${FILENAME}") add_custom_command( OUTPUT "${BC_FILE}" DEPENDS "" COMMAND "${LLVM_AS}" "-o" "${BC_FILE}" "${CMAKE_CURRENT_SOURCE_DIR}/../${FILENAME}" COMMENT "Building LL to LLVM bitcode ${BC_FILE}" VERBATIM) endfunction() macro(compile_to_bc SUBDIR OUTPUT_FILE_LIST EXTRA_CONFIG) foreach(FILENAME ${ARGN}) if(FILENAME MATCHES "[.]c$") compile_c_to_bc("${FILENAME}" "${SUBDIR}" ${OUTPUT_FILE_LIST}) elseif(FILENAME MATCHES "[.]cc$") compile_cc_to_bc("${FILENAME}" "${SUBDIR}" ${OUTPUT_FILE_LIST}) elseif(FILENAME MATCHES "[.]cl$") compile_cl_to_bc("${FILENAME}" "${SUBDIR}" ${OUTPUT_FILE_LIST} "${EXTRA_CONFIG}") elseif(FILENAME MATCHES "[.]ll$") compile_ll_to_bc("${FILENAME}" "${SUBDIR}" ${OUTPUT_FILE_LIST}) else() message(FATAL_ERROR "Dont know how to compile ${FILENAME} to .bc !") endif() endforeach() endmacro() function(make_kernel_bc OUTPUT_VAR NAME SUBDIR USE_SLEEF EXTRA_BC EXTRA_CONFIG) set(KERNEL_BC "${CMAKE_CURRENT_BINARY_DIR}/kernel-${NAME}.bc") set(${OUTPUT_VAR} "${KERNEL_BC}" PARENT_SCOPE) file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${SUBDIR}") compile_to_bc("${SUBDIR}" BC_LIST "${EXTRA_CONFIG}" ${ARGN}) set(DEPENDLIST ${BC_LIST}) # fix too long commandline with cat and xargs set(BC_LIST_FILE_TXT "") foreach(FILENAME ${BC_LIST}) # straight parsing semicolon separated list with xargs -d didn't work on windows.. no such switch available set(BC_LIST_FILE_TXT "${BC_LIST_FILE_TXT} \"${FILENAME}\"") endforeach() if(USE_SLEEF) set(BC_LIST_FILE_TXT "${BC_LIST_FILE_TXT} \"${EXTRA_BC}\"") list(APPEND DEPENDLIST ${EXTRA_BC} "sleef_config_${VARIANT}") endif() set(BC_LIST_FILE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/kernel_${NAME}_linklist.txt") file(WRITE "${BC_LIST_FILE}" "${BC_LIST_FILE_TXT}") # don't waste time optimizing the kernels IR when in developer mode if(DEVELOPER_MODE) set(LINK_OPT_COMMAND COMMAND "${XARGS_EXEC}" "${LLVM_LINK}" "-o" "${KERNEL_BC}" < "${BC_LIST_FILE}") else() set(LINK_CMD COMMAND "${XARGS_EXEC}" "${LLVM_LINK}" "-o" "kernel-${NAME}-unoptimized.bc" < "${BC_LIST_FILE}") set(OPT_CMD COMMAND "${LLVM_OPT}" ${LLC_FLAGS} "-O3" "-fp-contract=off" "-o" "${KERNEL_BC}" "kernel-${NAME}-unoptimized.bc") set(LINK_OPT_COMMAND ${LINK_CMD} ${OPT_CMD}) endif() add_custom_command( OUTPUT "${KERNEL_BC}" DEPENDS ${DEPENDLIST} ${LINK_OPT_COMMAND} COMMENT "Linking & optimizing Kernel bitcode ${KERNEL_BC}" VERBATIM) endfunction() pocl-1.4/cmake/kernellib_hash.cmake000066400000000000000000000016121355011147700173530ustar00rootroot00000000000000# TODO this is duplicated in top CMakeLists.txt function(rename_if_different SRC DST) if(EXISTS "${DST}") file(MD5 "${SRC}" OLD_MD5) file(MD5 "${DST}" NEW_MD5) if(NOT OLD_MD5 STREQUAL NEW_MD5) message(STATUS "Renaming ${SRC} to ${DST}") file(RENAME "${SRC}" "${DST}") endif() else() message(STATUS "Renaming ${SRC} to ${DST}") file(RENAME "${SRC}" "${DST}") endif() endfunction() string(REPLACE "****" ";" KERNEL_BC_LIST "${KERNEL_BC_LIST_ESCAPED}") foreach(KERNEL_BC IN LISTS KERNEL_BC_LIST) if(EXISTS ${KERNEL_BC}) file(SHA1 "${KERNEL_BC}" S) set(S1 "${S}__${S1}") endif() endforeach() file(SHA1 "${INCLUDEDIR}/_kernel.h" S2) file(SHA1 "${INCLUDEDIR}/_kernel_c.h" S3) file(SHA1 "${INCLUDEDIR}/pocl_types.h" S4) file(WRITE "${OUTPUT}.new" "#define POCL_KERNELLIB_SHA1 \"${S1}${S2}_${S3}_${S4}\"") rename_if_different("${OUTPUT}.new" "${OUTPUT}") pocl-1.4/cmake/run_test.cmake000066400000000000000000000041571355011147700162530ustar00rootroot00000000000000# some argument checking: # test_cmd is the command to run with all its arguments, separated by "####" if( NOT test_cmd ) message( FATAL_ERROR "Variable test_cmd not defined" ) endif() # output_blessed contains the name of the file with expected output if(output_blessed) message(STATUS "Expecting output: ${output_blessed}") endif() string(REPLACE "####" ";" test_cmd_separated "${test_cmd}") execute_process( COMMAND ${test_cmd_separated} RESULT_VARIABLE test_not_successful OUTPUT_VARIABLE stdout ERROR_VARIABLE stderr ) # the first run would fail, but still pre-compile the kernels # for the 2nd run through SDE if(SDE) execute_process( COMMAND "${SDE}" -skx -- ${test_cmd_separated} RESULT_VARIABLE test_not_successful OUTPUT_VARIABLE stdout ERROR_VARIABLE stderr ) endif() if( test_not_successful ) message( SEND_ERROR "FAIL: Test exited with nonzero code (${test_not_successful}): ${test_cmd_separated}\nSTDOUT:\n${stdout}\nSTDERR:\n${stderr}" ) else() message("${stdout}") message("${stderr}") endif() if(output_blessed) string(RANDOM RAND_STR) set(RANDOM_FILE "/tmp/cmake_testrun_${RAND_STR}") file(WRITE "${RANDOM_FILE}" "${stdout}") if( sort_output ) message(STATUS "SORTING FILE") file(STRINGS "${RANDOM_FILE}" output_string_list) list(SORT output_string_list) # for some reason sorting doesn't work when list contains newlines, # have to add them after the sort file(REMOVE "${RANDOM_FILE}") string(REPLACE ";" "\n" OUTPUT "${output_string_list}") set(RANDOM_FILE "${RANDOM_FILE}_sorted") file(WRITE "${RANDOM_FILE}" "${OUTPUT}\n") endif() message(STATUS "Comparing output..") execute_process( COMMAND ${CMAKE_COMMAND} -E compare_files "${output_blessed}" "${RANDOM_FILE}" RESULT_VARIABLE test_not_successful ) if( test_not_successful ) message(SEND_ERROR "FAIL: Test output does not match the expected output; output stored in ${RANDOM_FILE}" ) else() file(REMOVE "${RANDOM_FILE}") endif() endif() if ((NOT "${stdout}${stderr}" MATCHES "OK") AND (NOT "${stdout}${stderr}" MATCHES "FAIL")) message(STATUS "OK") endif() pocl-1.4/config.h.in.cmake000066400000000000000000000100101355011147700154110ustar00rootroot00000000000000 #cmakedefine BUILD_HSA #cmakedefine BUILD_CUDA #cmakedefine BUILD_BASIC #cmakedefine BUILD_PTHREAD #cmakedefine BUILD_ACCEL #define BUILDDIR "@BUILDDIR@" /* "Build with ICD" */ #cmakedefine BUILD_ICD #define CMAKE_BUILD_TYPE "@CMAKE_BUILD_TYPE@" #cmakedefine ENABLE_ASAN #cmakedefine ENABLE_LSAN #cmakedefine ENABLE_TSAN #cmakedefine ENABLE_UBSAN #cmakedefine ENABLE_CONFORMANCE #cmakedefine ENABLE_HWLOC #cmakedefine ENABLE_HOST_CPU_DEVICES #cmakedefine ENABLE_POCL_BUILDING #cmakedefine ENABLE_POCL_FLOAT_CONVERSION #cmakedefine ENABLE_RELOCATION #cmakedefine ENABLE_SLEEF #cmakedefine ENABLE_SPIR #cmakedefine ENABLE_SPIRV #cmakedefine HAVE_FORK #cmakedefine HAVE_VFORK #cmakedefine HAVE_CLOCK_GETTIME #cmakedefine HAVE_FDATASYNC #cmakedefine HAVE_FSYNC #cmakedefine HAVE_GETRLIMIT #cmakedefine HAVE_MKOSTEMPS #cmakedefine HAVE_MKSTEMPS #cmakedefine HAVE_MKDTEMP #cmakedefine HAVE_FUTIMENS #cmakedefine HAVE_LTTNG_UST #cmakedefine HAVE_LIBDL #cmakedefine HAVE_OCL_ICD #cmakedefine HAVE_POSIX_MEMALIGN #cmakedefine HAVE_SLEEP #cmakedefine HAVE_UTIME #cmakedefine OCS_AVAILABLE /* this is used all over the runtime code */ #define HOST_CPU_CACHELINE_SIZE @HOST_CPU_CACHELINE_SIZE@ #ifdef ENABLE_HOST_CPU_DEVICES #define HOST_AS_FLAGS "@HOST_AS_FLAGS@" #define HOST_CLANG_FLAGS "@HOST_CLANG_FLAGS@" #define HOST_DEVICE_EXTENSIONS "@HOST_DEVICE_EXTENSIONS@" #cmakedefine HOST_CPU_FORCED #define HOST_LD_FLAGS "@HOST_LD_FLAGS@" #define HOST_LLC_FLAGS "@HOST_LLC_FLAGS@" #cmakedefine HOST_FLOAT_SOFT_ABI #define HOST_DEVICE_BUILD_HASH "@HOST_DEVICE_BUILD_HASH@" #endif #ifdef BUILD_HSA #cmakedefine HAVE_HSA_EXT_AMD_H #define AMD_HSA @AMD_HSA@ #define HSA_DEVICE_EXTENSIONS "@HSA_DEVICE_EXTENSIONS@" #define HSAIL_ASM "@HSAIL_ASM@" #define HSAIL_ENABLED @HSAIL_ENABLED@ #endif #define CMAKE_BUILD_TYPE "@CMAKE_BUILD_TYPE@" #define LINK_COMMAND "@LINK_COMMAND@" #ifdef OCS_AVAILABLE #define KERNELLIB_HOST_CPU_VARIANTS "@KERNELLIB_HOST_CPU_VARIANTS@" #cmakedefine KERNELLIB_HOST_DISTRO_VARIANTS #define CLANG "@CLANG@" #define CLANG_RESOURCE_DIR "@CLANG_RESOURCE_DIR@" #define CLANGXX "@CLANGXX@" #define LLVM_LLC "@LLVM_LLC@" #define LLVM_SPIRV "@LLVM_SPIRV@" /* "Using LLVM 6.0" */ #cmakedefine LLVM_6_0 /* "Using LLVM 7.0" */ #cmakedefine LLVM_7_0 /* "Using LLVM 8.0" */ #cmakedefine LLVM_8_0 #cmakedefine LLVM_9_0 #cmakedefine LLVM_BUILD_MODE_DEBUG #ifndef LLVM_VERSION #define LLVM_VERSION "@LLVM_VERSION_FULL@" #endif #endif /* Defined to greatest expected alignment for extended types, in bytes. */ #define MAX_EXTENDED_ALIGNMENT @MAX_EXTENDED_ALIGNMENT@ /* used in lib/CL/devices/basic */ #define OCL_KERNEL_TARGET "@OCL_KERNEL_TARGET@" #define OCL_KERNEL_TARGET_CPU "@OCL_KERNEL_TARGET_CPU@" #define PACKAGE_VERSION "@PACKAGE_VERSION@" #define POCL_KERNEL_CACHE_DEFAULT @POCL_KERNEL_CACHE_DEFAULT@ #define HOST_DEVICE_ADDRESS_BITS @HOST_DEVICE_ADDRESS_BITS@ #cmakedefine POCL_DEBUG_MESSAGES #define POCL_INSTALL_PRIVATE_HEADER_DIR "@POCL_INSTALL_PRIVATE_HEADER_DIR@" #define POCL_INSTALL_PRIVATE_DATADIR "@POCL_INSTALL_PRIVATE_DATADIR@" #define POCL_INSTALL_PRIVATE_DATADIR_REL "@POCL_INSTALL_PRIVATE_DATADIR_REL@" #cmakedefine POCL_ASSERTS_BUILD /* these are *host* values */ /* used in tce_common.c & pocl_llvm_api.cc */ #define SRCDIR "@SRCDIR@" #cmakedefine TCEMC_AVAILABLE #cmakedefine TCE_AVAILABLE #define TCE_DEVICE_EXTENSIONS "@TCE_DEVICE_EXTENSIONS@" /* Defined on big endian systems */ #define WORDS_BIGENDIAN @WORDS_BIGENDIAN@ /* Disable cl_khr_fp16 because fp16 is not supported */ #cmakedefine _CL_DISABLE_HALF /* Disable cl_khr_fp64 because fp64 is not supported */ #cmakedefine _CL_DISABLE_DOUBLE #define POCL_CL_VERSION "1.2" #define HSA_DEVICE_CL_VERSION_MAJOR 1 #define HSA_DEVICE_CL_VERSION_MINOR 2 #define CUDA_DEVICE_CL_VERSION_MAJOR 1 #define CUDA_DEVICE_CL_VERSION_MINOR 2 #define HOST_DEVICE_CL_VERSION_MAJOR 1 #define HOST_DEVICE_CL_VERSION_MINOR 2 #define TCE_DEVICE_CL_VERSION_MAJOR 1 #define TCE_DEVICE_CL_VERSION_MINOR 2 #cmakedefine USE_POCL_MEMMANAGER pocl-1.4/config2.h.in.cmake000066400000000000000000000002651355011147700155060ustar00rootroot00000000000000/* this config file is for values NOT escaped for C/C++ * required e.g. for values with doublequotes, like C string arrays */ #define HOST_LD_FLAGS_ARRAY "@HOST_LD_FLAGS_ARRAY@" pocl-1.4/doc/000077500000000000000000000000001355011147700130645ustar00rootroot00000000000000pocl-1.4/doc/sphinx/000077500000000000000000000000001355011147700143755ustar00rootroot00000000000000pocl-1.4/doc/sphinx/Makefile000066400000000000000000000063331355011147700160420ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = build # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source .PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " changes to make an overview of all changed/added/deprecated items" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: -rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/PortableComputingLanguagepocl.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/PortableComputingLanguagepocl.qhc" latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ "run these through (pdf)latex." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." publish_to_web: rsync -r build/html $(SOURCEFORGE_USER)@web.sourceforge.net:/home/project-web/pocl/htdocs/docs/ pocl-1.4/doc/sphinx/source/000077500000000000000000000000001355011147700156755ustar00rootroot00000000000000pocl-1.4/doc/sphinx/source/accel.rst000066400000000000000000000153351355011147700175050ustar00rootroot00000000000000=========================== Fixed-Function Accelerators =========================== The ``accel`` driver can be used for easy integration of custom fixed-function accelerators through a standardized hardware interface and a standardized procedure for enqueuing commands. Interface --------- The control register interface for the fixed-function accelerators is quite simple. The address space of the device is split into four regions, the size of which is determined by the largest of the memories in these regions. Therefore, the region is selected with the highest bits of the address space of the accelerator: +-------------+--------------------+ | High bits | Address Space | | | | +=============+====================+ | 00 | Control registers | +-------------+--------------------+ | 01 | Instruction memory | +-------------+--------------------+ | 10 | Data memory | +-------------+--------------------+ | 11 | Parameter memory | +-------------+--------------------+ The size of the memories is read from the control registers, which is sufficient to determine the size of the address space of the accelerator as well as the offsets of each memory. The control registers are also used to control the execution of the accelerator: .. list-table:: :widths: 20 25 55 :header-rows: 1 * - Offset - Name - Description * - 0x000 - STATUS - Status of the accelerator. Bit 0 is high when the execution is stalled due to any reason, bit 1 is high when the external stall signal is active, and bit 2 is high when the accelerator reset is active. * - 0x100 - AQL_READ_IDX_LOW - Read index of the AQL queue (low 32 bits). Read only. * - 0x104 - AQL_READ_IDX_HIGH - Read index of the AQL queue (high 32 bits). Read only. * - 0x108 - AQL_WRITE_IDX_LOW - Write index of the AQL queue (low 32 bits). Writing to this register increments the 64-bit value. * - 0x10C - AQL_WRITE_IDX_HIGH - Write index of the AQL queue (high 32 bits). Read only. * - 0x200 - COMMAND - Command register to control execution. Writing 1 to this register resets the accelerator, writing 2 lifts reset and external stall, and writing 4 enables the external stall signal, pausing execution. * - 0x300 - DEVICE_CLASS - Device class (vendor ID) of the accelerator. Currently unused by the driver. * - 0x304 - DEVICE_ID - Device ID of the accelerator. Currently unused by the driver. * - 0x308 - INTERFACE_TYPE - Version number of the interface. This describes interface version 2. * - 0x30C - CORE_COUNT - Core count of the accelerator. Multicore devices are currently not supported. * - 0x310 - CTRL_SIZE - Size of control memory (this register space) in bytes. Must be at least 1024. * - 0x314 - DMEM_SIZE - Size of the data memory in bytes * - 0x318 - IMEM_SIZE - Size of the instruction memory in bytes * - 0x31c - PMEM_SIZE - Size of the parameter memory in bytes. The instruction memory can be used to configure the accelerator. However, it currently has to be done manually, and is not managed by pocl. The data memory is used to store an AQL Queue, as defined by the `HSA Runtime Programmer’s Reference Manual `_, the write and read indexes of which are exposed by the control registers. The size of the queue is such that it uses all of the data memory. Finally, the parameter memory is used to store data and argument buffers as well as completion signals for the kernels. As a practical example, enqueuing a kernel dispatch packet proceeds as follows: - The driver allocates and populates the OpenCL buffers and the argument buffer for the kernel, as well as space for a 32-bit completion signal. - The driver writes the kernel packet, excluding the header, to the device. Its position depends on the value of the write index. The completion signal address as well as the argument buffer address and pointers to buffer arguments are given as physical addresses in the accelerator's address space. The kernel object simply corresponds to the kernel IDs shown in the table below. - The driver sets the packet header and increments the queue write index. - The device executes the kernel and writes a 1 in case of a success or a 2 in case of a failure to the completion signal address, if it is not 0. - The driver sees the completion signal change, and can consider the command completed. Usage ----- To enable this driver, simply add ``-DENABLE_ACCEL_DEVICE=1`` to the cmake arguments. On small FPGA SoCs and other relatively low performance hosts, you may wish to follow the instructions in :ref:`pocl-without-llvm`. The fixed-function accelerators need to be told what kernel to execute. For this, the accel driver has a list of builtin kernels that can be referred to in the ``clCreateProgramWithBuiltInKernels`` call: .. list-table:: :widths: 20 20 60 :header-rows: 1 * - Kernel name - Kernel ID - Function * - pocl.copy - 0 - Copies from argument 0 to argument 1 as many bytes as there are work items * - pocl.add32 - 1 - 32-bit element-wise addtion on arrays pointed to by arguments 0 and 1, stored in an array pointed to by argument 3 * - pocl.mul32 - 2 - As pocl.add32, but with 32-bit multiplication This list will be expanded in the future. There is an example program using the accel driver in ``examples/accel`` which also includes the VHDL code for synthesizing the accelerator. The accelerator has been developed with the `TCE toolset `_. In order to synthesize the accelerator for a Xilinx FPGA SoC, you can follow the instructions in the `TCE manual `_, in the section titled System-on-a-Chip design with AlmaIF Integrator. Make sure to check the accelerator base address from Vivado. Driver arguments are used to tell pocl where the accelerator is and what functions it supports. To run this example manually, execute:: POCL_DEVICES=accel POCL_ACCEL0_PARAMETERS=0x43C00000,1,2 ./accel_example The environment variables define an accelerator with base physical address of 0x43C0_0000 that can execute pocl.add32 and pocl.mul32. When running the example, verify that the address given in the parameter matches the base address of the accelerator. Note that as the driver requires write access to ``/dev/mem`` for memory mapping, you may need to execute the application with elevated privileges. In this case, note that ``sudo`` by default overrides your environment variables. You can either assign them in the same command, or use ``sudo`` with the ``--preserve-env`` switch. pocl-1.4/doc/sphinx/source/conf.py000066400000000000000000000144461355011147700172050ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Portable Computing Language (pocl) documentation build configuration file, created by # sphinx-quickstart on Fri May 3 10:53:18 2013. # # This file is execfile()d with the current directory set to its containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import sys, os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. #sys.path.append(os.path.abspath('.')) # -- General configuration ----------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = ['sphinx.ext.imgmath'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix of source filenames. source_suffix = '.rst' # The encoding of source files. #source_encoding = 'utf-8' # The master toctree document. master_doc = 'index' # General information about the project. project = u'Portable Computing Language (pocl)' copyright = u'2010-2019 pocl developers' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = '1.4' # The full version, including alpha/beta/rc tags. release = '1.4' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. #language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: #today = '' # Else, today_fmt is used as the format for a strftime call. #today_fmt = '%B %d, %Y' # List of documents that shouldn't be included in the build. #unused_docs = [] # List of directories, relative to source directory, that shouldn't be searched # for source files. exclude_trees = [] # The reST default role (used for this markup: `text`) to use for all documents. #default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. #add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). #add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. #show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. Major themes that come with # Sphinx are currently 'default' and 'sphinxdoc'. html_theme = 'default' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. #html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". #html_title = None # A shorter title for the navigation bar. Default is the same as html_title. #html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. #html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. #html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. #html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. #html_use_smartypants = True # Custom sidebar templates, maps document names to template names. #html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. #html_additional_pages = {} # If false, no module index is generated. #html_use_modindex = True # If false, no index is generated. #html_use_index = True # If true, the index is split into individual pages for each letter. #html_split_index = False # If true, links to the reST sources are added to the pages. #html_show_sourcelink = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. #html_use_opensearch = '' # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). #html_file_suffix = '' # Output file base name for HTML help builder. htmlhelp_basename = 'PortableComputingLanguagepocldoc' # -- Options for LaTeX output -------------------------------------------------- # The paper size ('letter' or 'a4'). #latex_paper_size = 'letter' # The font size ('10pt', '11pt' or '12pt'). #latex_font_size = '10pt' # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', 'PortableComputingLanguagepocl.tex', u'Portable Computing Language (pocl) Documentation', u'pocl developers', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. #latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. #latex_use_parts = False # Additional stuff for the LaTeX preamble. #latex_preamble = '' # Documents to append as an appendix to all manuals. #latex_appendices = [] # If false, no module index is generated. #latex_use_modindex = True pocl-1.4/doc/sphinx/source/conformance.rst000066400000000000000000000323001355011147700207170ustar00rootroot00000000000000.. _pocl-conformance: ======================= OpenCL conformance ======================= Conformance related CMake options --------------------------------- - ``-DENABLE_CONFORMANCE=ON/OFF`` This is mostly related to the kernel library (the runtime is always built to be conformant on x86). Defaults to ON. This option by itself does not guarantee OpenCL-conformant build; it merely ensures that a build fails if some options which would result in non-conformant kernel library are given. Non-conformant kernel library might be somewhat faster, at the expense of precision and/or range. Note that conformance was tested **only** on certain hardware and software (Linux, x86-64, CPU with AVX & FMA instructions). How to run the conformance test suite on your hardware ------------------------------------------------------ First you need to enable the suite in the pocl's external test suite set. This is done by adding switch ``-DENABLE_TESTSUITES=conformance`` to the cmake command line. After this ``make prepare_examples`` fetches and prepares the conformance suite for testing. To run a shortened version of the conformance suite, run: ``ctest -L conformance_suite_mini`` This might take a few hours on slow hardware. There is also a ``conformance_suite_micro`` label, which takes about 20-30 minutes on slow hardware. To run the full conformance testsuite, run: ``ctest -L conformance_suite_full`` Note that this can take a week to finish on slow hardware, and about a day on relatively fast hardware (6C/12T Intel or equivalent). Known issues with the conformance testsuite ------------------------------------------- - a few tests from ``basic/test_basic`` may fail / segfault because they request a huge amount of memory for buffers. - a few tests from ``conversions/test_conversions`` may report failures. This is likely a bug in the test; the same test from branch cl20_trunk of CTS passes. - math_brute_force tests may occasionally fail with an empty build log, this is a bug in CTS. See pocl issue #614. - a few tests may run much faster if you limit the reported Global memory size with POCL_MEMORY_LIMIT env var. In particular, "kernel_image_methods" test with "max_images" argument. - two tests in ``api/test_api`` fail with LLVM 5.0 because of LLVM commit 1c1154229a41b688f9: ``[OpenCL] Do not generate "kernel_arg_type_qual" metadata for non-pointer args`` This is a bug in CTS, which tests for non-pointer type qualifiers, not in pocl. See: https://www.khronos.org/registry/OpenCL/specs/opencl-1.2.pdf page 169: ``CL_KERNEL_ARG_TYPE_VOLATILE`` is returned if the **argument is a pointer** and the referenced type is declared with the volatile qualifier. Similarly, ``CL_KERNEL_ARG_TYPE_RESTRICT`` or ``CL_KERNEL_ARG_TYPE_CONST`` is returned if the **argument is a pointer** and the referenced type is declared with the restrict or const qualifier - the compiler test may fail with pocl-1.1 when it's built with SPIR support, because cl_khr_spir extension is not recognized with OpenCL 1.2 - officially it's only recognized since OpenCL 2.0. .. _sigfpe-handler: Known issues in pocl / things to be aware of -------------------------------------------- - Integer division by zero. OpenCL 1.2 specification requires that division by zero on integers results in undefined values, instead of raising exceptions. This requires pocl to install a handler of SIGFPE. Unfortunately signal handlers are per-process not per-thread, and pocl drivers do not run in a separate process, which means that integer division by zero will not raise SIGFPE for the entire pocl library and also the user's program. The handler may be disabled by setting the env variable POCL_SIGFPE_HANDLER to 0. Note that this is currently only relevant for x86(-64) + Linux, on all other systems this issue is not handled in any way (thus Pocl is likely non-conformant there). - Several options to clBuildProgram() are accepted but currently have no effect. This is related mostly to optimization options like `-cl-fast-relaxed-math`. The `-cl-denorms-are-zero` and `-cl-fp32-correctly-rounded-divide-sqrt` options are honored. - Many of ``native_`` and ``half_`` variants of kernel library functions are mapped to the "full" variants. - the optional OpenGL / D3D extensions are not supported. There is experimental support for SPIR - clUnloadCompiler() only actually unload LLVM after all programs & kernels have been released. - clSetUserEventStatus() called with negative status. The Spec leaves the behaviour in this case as "implementation defined", and this part of pocl is only very lightly tested by the conformance tests. clSetUserEventStatus() called with CL_COMPLETE works as expected, and is heavily used by the conversions conformance test. Conformance tests results (kernel library precision) on tested hardware ----------------------------------------------------------------------- Note that it's impossible to test double precision on the entire range, therefore the results may vary. x86-64 CPU with AVX2+FMA, LLVM 4.0, tested on Nov 1, 2017 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ==================== ========================= =========================================================== NAME Worst ULP WHERE ==================== ========================= =========================================================== add 0.00 {0x0p+0, 0x0p+0} addD 0.00 {0x0p+0, 0x0p+0} assignment 0.00 0x0p+0 assignmentD 0.00 0x0p+0 cbrt 0.50 -0x1.5629d2p+116 cbrtD 0.59 0x1.0000000000136p+1022 ceil 0.00 0x0p+0 ceilD 0.00 0x0p+0 copysign 0.00 {0x0p+0, 0x0p+0} copysignD 0.00 {0x0p+0, 0x0p+0} cos 2.37 0x1.1338ccp+20 cosD 2.27 -0x1.d10000000074p+380 cosh 2.41 -0x1.602166p+2 coshD 1.43 -0x1.98000000003efp+5 cospi 1.94 0x1.d73b56p-2 cospiD 2.46 -0x1.adffffffffa91p-2 divide 0.00 {0x0p+0, 0x0p+0} divideD 0.00 {0x0p+0, 0x0p+0} exp 0.95 -0x1.762532p+2 expD 0.94 0x1.2f0000000023dp+7 exp10 0.79 -0x1.309022p+5 exp10D 0.64 -0x1.34ffffffffcc9p+8 exp2 0.79 -0x1.fa3d0ep+6 exp2D 0.75 -0x1.ff00000000417p+9 expm1 1.00 -0x1.7a0002p-25 expm1D 0.99 -0x1.26p+5 fabs 0.00 0x0p+0 fabsD 0.00 0x0p+0 fdim 0.00 {0x0p+0, 0x0p+0} fdimD 0.00 {0x0p+0, 0x0p+0} floor 0.00 0x0p+0 floorD 0.00 0x0p+0 fma 0.00 {0x0p+0, 0x0p+0, 0x0p+0} fmaD 0.00 {0x0p+0, 0x0p+0, 0x0p+0} fmax 0.00 {0x0p+0, 0x0p+0} fmaxD 0.00 {0x0p+0, 0x0p+0} fmin 0.00 {0x0p+0, 0x0p+0} fminD 0.00 {0x0p+0, 0x0p+0} fmod 0.00 {0x0p+0, 0x0p+0} fmodD 0.00 {0x0p+0, 0x0p+0} fract { 0.00, 0.00} {0x0p+0, 0x0p+0} fractD { 0.00, 0.00} {0x0p+0, 0x0p+0} frexp { 0.00, 0} 0x0p+0 frexpD { 0.00, 0} 0x0p+0 hypot 1.93 {0x1.17c998p-127, -0x1.5fedb8p-127} hypotD 1.73 {0x1.5d2ebeed7663cp-1022, 0x1.67457048a2318p-1022} ldexp 0.00 {0x0p+0, 0} ldexpD 0.00 {0x0p+0, 0} log10 0.50 0x1.7fee2ep-1 log10D 0.50 0x1.9100000000639p+1022 log 0.63 0x1.7fcb3ep-1 logD 0.75 0x1.7d00000000381p+0 log1p 1.00 -0x1.fa0002p-126 log1pD 1.00 -0x1.e000000000001p-1022 log2 0.59 0x1.1107a2p+0 log2D 0.72 0x1.120000000063dp+0 logb 0.00 0x0p+0 logbD 0.00 0x0p+0 mad 0.00 {0x0p+0, 0x0p+0, 0x0p+0} no ULP check madD 0.00 {0x0p+0, 0x0p+0, 0x0p+0} no ULP check maxmag 0.00 {0x0p+0, 0x0p+0} maxmagD 0.00 {0x0p+0, 0x0p+0} minmag 0.00 {0x0p+0, 0x0p+0} minmagD 0.00 {0x0p+0, 0x0p+0} modf { 0.00, 0.00} {0x0p+0, 0x0p+0} modfD { 0.00, 0.00} {0x0p+0, 0x0p+0} multiply 0.00 {0x0p+0, 0x0p+0} multiplyD 0.00 {0x0p+0, 0x0p+0} nan 0.00 0x0p+0 nanD 0.00 0x0p+0 nextafter 0.00 {0x0p+0, 0x0p+0} nextafterD 0.00 {0x0p+0, 0x0p+0} pow 0.82 {0x1.91237cp-1, 0x1.4da146p+8} powD 0.80 {0x1.2bfb4b18164c9p+65, -0x1.b78438ae9c3bdp-8} pown 0.65 {-0x1.9p+6, -2} pownD 0.62 {-0x1.7ffffffffffffp+1, 3} powr 0.82 {0x1.91237cp-1, 0x1.4da146p+8} powrD 0.80 {0x1.2bfb4b18164c9p+65, -0x1.b78438ae9c3bdp-8} remainder 0.00 {0x0p+0, 0x0p+0} remainderD 0.00 {0x0p+0, 0x0p+0} remquo { 0.00, 0} 0x0p+0 remquoD { 0.00, 0} 0x0p+0 rint 0.00 0x0p+0 rintD 0.00 0x0p+0 rootn 0.69 {-0x1.e2fe6ep-74, -141} rootnD 0.68 {-0x1.8000000000001p+1, 3} round 0.00 0x0p+0 roundD 0.00 0x0p+0 rsqrt 1.49 0x1.019566p+124 rsqrtD 1.49 0x1.01ffffffffa39p+1016 sin 2.48 -0x1.09f07ap+21 sinD 1.87 -0x1.f2fffffffffbap+32 sincos { 2.48, 2.37} {0x1.09f07ap+21, 0x1.1338ccp+20} sincosD { 1.87, 2.27} {0x1.f2fffffffffbap+32, 0x1.d10000000074p+380} sinh 2.32 0x1.e76078p+2 sinhD 1.53 -0x1.3100000000278p+4 sinpi 2.13 -0x1.45f3ep-9 sinpiD 2.50 -0x1.46000000000dap-7 sqrt 0.00 0x0p+0 sqrtD 0.00 0x0p+0 subtract 0.00 {0x0p+0, 0x0p+0} subtractD 0.00 {0x0p+0, 0x0p+0} tan 4.35 -0x1.b4eba2p+22 tanD 4.00 -0x1.2f000000003edp+333 tanh 1.18 -0x1.ca742ap-1 tanhD 1.19 0x1.f400000000395p-1 tanpi 4.21 -0x1.f99d16p-3 tanpiD 4.09 0x1.f6000000001d3p-3 trunc 0.00 0x0p+0 truncD 0.00 0x0p+0 ==================== ========================= =========================================================== pocl-1.4/doc/sphinx/source/cuda.rst000066400000000000000000000117311355011147700173460ustar00rootroot00000000000000================== NVIDIA GPU support ================== NOTE: Support for NVIDIA GPUs via the CUDA backend is currently experimental and many features may be missing or incomplete. The experimental CUDA backend provides support for CUDA-capable NVIDIA GPUs under Linux or macOS. The goal of this backend is to provide an open-source alternative to the proprietary NVIDIA OpenCL implementation. This makes use of the NVPTX backend in LLVM and the CUDA driver API. Building pocl with CUDA support ------------------------------- 1) Install prerequisites ~~~~~~~~~~~~~~~~~~~~~~~~ Aside from the usual pocl dependencies, you will also need the CUDA toolkit. Currently this backend has only been tested against CUDA 8.0, but it may also be possible to build against other versions. If you experience build failures regarding missing CUDA headers or libraries, you may need to add the include directory containing ``cuda.h`` to your header search path, and/or the library directory containing ``libcuda.{so,dylib}`` to your library search path. The CUDA backend requires LLVM built with the NVPTX backend enabled. 2) Build pocl ~~~~~~~~~~~~~ To enable the CUDA backend, add ``-DENABLE_CUDA=ON`` to your CMake configuration command line. Otherwise, build and install pocl as normal. 3) Run tests ~~~~~~~~~~~~ After building pocl, you can smoke test the CUDA backend by executing the subset of pocl's tests that are known to pass on NVIDIA GPUs:: ../tools/scripts/run_cuda_tests 4) Configuration ~~~~~~~~~~~~~~~~ Use ``POCL_DEVICES=CUDA`` to select only CUDA devices. If the system has more than one GPU, specify the ``CUDA`` device multiple times (e.g. ``POCL_DEVICES=CUDA,CUDA`` for two GPUs). The CUDA backend currently has a runtime dependency on the CUDA toolkit. If you receive errors regarding a failure to load ``libdevice``, you may need to set the ``POCL_CUDA_TOOLKIT_PATH`` environment variable to tell pocl where the CUDA toolkit is installed. Set this variable to the root of the toolkit installation (the directory containing the ``nvvm`` directory). The ``POCL_CUDA_GPU_ARCH`` environment variable can be set to override the target GPU architecture (e.g. ``POCL_CUDA_GPU_ARCH=sm_35``), which may be necessary in cases where LLVM doesn't yet support the architecture. The ``POCL_CUDA_VERIFY_MODULE`` environment variable can be set to ``0`` to skip verification that the LLVM module produced by the CUDA backend is well formed. Currently defaults to 1 = ON. The ``POCL_CUDA_DUMP_NVVM`` environment variable can be set to ``1`` to dump the LLVM IR that is fed into the NVPTX backend for debugging purposes (requires ``POCL_DEBUG=1``). The ``POCL_CUDA_DISABLE_QUEUE_THREADS`` environment variable can be set to ``1`` to disable background threads for handling command submission. This can potentially reduce command launch latency, but can cause problems if using user events or sharing a context with a non-CUDA device. CUDA backend status ------------------- (last updated: 2017-06-02) The CUDA backend currently passes 73 tests from pocl's internal testsuite, and is capable of running various real OpenCL codes. Unlike NVIDIA's proprietary OpenCL implementation, pocl supports SPIR consumption, and so this backend has also been able to run (for example) SYCL codes using Codeplay's ComputeCpp implementation on NVIDIA GPUs. Since it uses CUDA under-the-hood, this backend also works with all of the NVIDIA CUDA profiling and debugging tools, many of which don't work with NVIDIA's own OpenCL implementation. Conformance status ~~~~~~~~~~~~~~~~~~ The Khronos OpenCL 1.2 conformance tests are `available here `_. The following test categories are known to pass on at least one NVIDIA GPU using pocl's CUDA backend: * allocations * api * atomics * basic * commonfns * computeinfo * contractions * events * profiling * relationals * thread_dimensions * vec_step Tested platforms ~~~~~~~~~~~~~~~~ The CUDA backend has been tested on Linux (CentOS 7.3) with SM_35, SM_52, SM_60, and SM_61 capable NVIDIA GPUs. The backend is also functional on macOS, with just one additional test failure compared to Linux (``test_event_cycle``). Known issues ~~~~~~~~~~~~ The following is a non-comprehensive list of known issues in the CUDA backend: * image types and samplers are unimplemented * printf format support is incomplete Additionally, there has been little effort to optimize the performance of this backend so far - the current effort is on implementing remaining functionality. Once the core functionality is completed, optimization of the code generation and runtime can begin. Support ~~~~~~~ For bug reports and questions, please use pocl's `GitHub issue tracker `_. Pull requests and other contributions are also very welcome. This work has primarily been done by James Price from the `University of Bristol's High Performance Computing Group `_. pocl-1.4/doc/sphinx/source/design.rst000066400000000000000000000003671355011147700177060ustar00rootroot00000000000000Notes on internal design =========================== Higher-level notes of pocl software design and implementation are collected to this part. .. toctree:: :maxdepth: 2 host_library kernel_compiler memory_management pocl_binary pocl-1.4/doc/sphinx/source/development.rst000066400000000000000000000321631355011147700207560ustar00rootroot00000000000000Information for Pocl developers =================================== Testsuite ---------- Before changes are committed to the mainline, all tests in the 'make check' tier-1 suite should pass:: make check_tier1 "make check_tier1" will invoke ctest with tier-1 testsuites. See `maintenance-policy`_ for list of what's included in tier-1. Under the 'examples' directory there are placeholder directories for external OpenCL application projects which are used as test suites for pocl (e.g. ViennaCL). These test suites can be enabled for cmake with -DENABLE_TESTSUITES (you can specify a list of test suites if you do not want to enabled all of them, see configure help for the available list). Note that these additional test suites require additional software (tools and libraries). The configure script checks some of them but the check is not exhautive. Test suites are disabled if their requirement files are not available. You can run the tests or built examples using "ctest" directly; ``ctest --print-labels`` prints the available labels (testsuites); Invoke ctest with -jX option to run X tests in parallel. In order to prepare the external OpenCL examples for the testsuite, you need to run the following build command once:: make prepare_examples IMPORTANT: using the ICD for in tree 'make check' requires an icd loader that allows overriding the icd search path. Other ICD loaders wont be able to work in tree (they require the ICD config file to be installed in the system). There are now two options for such a loder: the open source ocl-icd loader and the Khronos supplied loader with a patch applied. Debugging a Failed Test ^^^^^^^^^^^^^^^^^^^^^^^ If there are failing tests in the suite, the usual way to start debugging is to look what was printed to the logs for the failing cases. After running the test suite, the logs are stored under ``Testing/Temporary/*.log`` Or one could re-run the test with more verbose output. Useful ctest options are "-V" and "--output-on-failure"; to make pocl more chatty, use the POCL_DEBUG env variable. Ocl-icd ------- Ocl-icd is packaged for most popular linux distributions, but can also be downloaded from: https://forge.imag.fr/projects/ocl-icd/. It allows overriding the path from which the icd files are searched which is used to select only the OpenCL library in the build tree of pocl for the make check. Note, however, if you run the tests or examples manually this overriding is not done automatically. To direct the ocl-icd to use only the pocl *in the build tree*, export the following environment variable in your shell:: export OCL_ICD_VENDORS="PATH_TO_THE_POCL_BUILD_TREE/ocl-vendors" Inside the 'ocl-vendors' directory there's a single .icd file which is generated to point to the pocl library in the build tree. Coding Style ------------ The code base of pocl consists most of pure C sources and C++ sources. 1) In the C sources, follow the GNU C style, but with spaces for indent. The GNU C style guide is here: http://www.gnu.org/prep/standards/html_node/Writing-C.html This guide should be followed except please use 2 spaces instead of the confusing "smart" mix of tabs and spaces for indentation. 2) In the C++ sources (mostly the LLVM passes), follow the LLVM coding guidelines so it is easier to upstream general code to the LLVM project at any point. http://llvm.org/docs/CodingStandards.html It's acknowledged that the pocl code base does not fully adhere to these principles at the moment, but the aim is to gradually fix the style with every new commit improving the style. There are clang-format scripts to help in getting the style gradually improved. Running ``tools/scripts/format-branch.sh`` in the root of the repository diffs against a ``master`` branch and formats the difference, and leaves the diff uncommitted in the working tree. ``tools/scripts/format-last-commit.sh`` formats only the last commit and can be used in an interactive rebase session. An example emacs configuration to help get the pocl code style correct:: (setq default-tab-width 2) (setq-default indent-tabs-mode nil) (setq-default show-trailing-whitespace t) (defun my-c-mode-common-hook () (c-set-style "gnu") (setq tab-width 2) (setq c-basic-offset 2) ) (add-hook 'c-mode-common-hook 'my-c-mode-common-hook) (defun my-cpp-mode-common-hook () (c-set-style "stroustrup") (setq tab-width 4) (setq c-basic-offset 4) ) (add-hook 'c++-mode-hook 'my-cpp-mode-common-hook) (add-to-list 'auto-mode-alist '("\\.cl$" . c-mode)) (add-to-list 'auto-mode-alist '("\\.icc$" . c++-mode)) (add-to-list 'auto-mode-alist '("\\.cc$" . c++-mode)) Khronos ICD Loader ------------------ The ICD loader supplied by Khronos can be used for pocl development by applying a minor patch that enables overriding the ICD search path as explained above (OCL-ICD). The steps to build and install the Khronos ICD loader so it can be used to run the pocl test suite: #. Download the loader from http://www.khronos.org/registry/cl Unpack it. Copy the OpenCL headers to inc/CL like instructed in inc/README.txt. #. Apply a patch from the pocl checkout:: cd icd patch -p1 < ~/pocl/tools/patches/khronos-icd-loader.patch #. Build it with 'make'. #. Copy the loader to a library search path: sudo cp bin/libOpenCL* /usr/lib Now it should use the Khronos loader for ICD dispatching and you (and the pocl build system) should be able to override the icd search path with OCL_ICD_VENDORS environment variable. Using pocl from the Build Tree ------------------------------ If you want use the pocl from the build tree, you must export POCL_BUILDING=1 so pocl searches for its utility scripts from the build tree first, then the installation location. The "make check" testsuite does this automatically. There's a helper script that, when sourced, in addition to setting POCL_BUILDING setups the OCL_ICD_VENDORS path to point to the pocl in the build tree. This removes the need to install pocl to test the built version. It should be executed in the build root, typically:: . ../tools/scripts/devel-envs.sh Target and Host CPU Architectures for 'basic' and 'pthread' Devices ------------------------------------------------------------------- By default, pocl build system compiles the kernel libraries for the host CPU architecture, to be used by 'basic' and 'pthread' devices. LLVM is used to detect the CPU variant to be used as target. This can be overridden by passing -DLLC_HOST_CPU=... to CMake. See the documentation for LLC_HOST_CPU build option. Cross-compilation where 'build' is different from 'host' has not been tested. Cross-compilation where 'host' is a different architecture from 'target' has not been tested for 'basic' and 'pthread' devices. Writing Documentation --------------------- The documentation is written using the `Sphinx documentation generator `_ and the reStructuredText markup. This Sphinx documentation can be built by:: cd doc/sphinx make html This builds the html version of the documents under the 'build/html' directory. .. _maintenance-policy: Maintenance Policy ------------------- pocl development is currently managed mostly by researchers and research assistants of the `Customized Parallel Computing `_ group of Tampere University. We provide general maintenance for pocl on the side of our research projects (which on the other hand might use and/or extend it) because we consider it an important project that helps the "heterogeneous parallel programming cause". However, doing maintenance "on the side" unfortunately means that there is limited time to respond to external support requests due to other activities. To make pocl maintenance feasible within our limited time, we have set the following policy regarding releases: **External projects using OpenCL that have a test suite included in "regularly tested suites" (we later call 'tier-1' test suites) will be kept regression free, but for the rest we cannot make any promises.** Tier-1 tests will be executed successfully before the lead developer pushes new pull requests (PR) to the master branch, and some of them are additionally executed with multiple continuous integration (buildbot) servers on different platforms. Active developers are also assumed to run them locally before submitting PRs. Thus, regressions on these suites should be detected early. The required testsuites can be enabled at buildtime with ``-DENABLE_TESTSUITES=tier1`` cmake option. Currently (2017-03-16) the following are included in the tier-1 test suites: * The standard test suite of pocl. * AMD SDK 3.0 test suite * PyOpenCL test suite * piglit test suite * conformance_suite_micro test suite * CLBlast tests (excluding the longest running ones) * HSA test suite (uses the LLVM 3.7 with an HSAIL backend and targets an AMD Kaveri GPU) * TCE short smoke test suite (against the latest TCE open source release) Please note that not neccessarily all the tests currently pass in the suites, we just ensure the currently passing ones do not regress with new commits (expected failing ones are marked as XFAILs or skipped). The primary test platform is x86-64. The latest LLVM release is given priority when testing, and we cannot guarantee older LLVM versions keep working over pocl releases due to the constantly changing library API. If you would like get your favourite OpenCL-using project's test suite included in the tier-1 suite, please send a pull request that adds the suite under the 'examples' dir and the main CMakeLists.txt along with instructions (a README will do) on how to setup it so it is included in the 'make check' run. Please make the test suite short enough to be suitable for frequent "smoke testing" (under 5 minutes per typical run preferred). If your favourite project is already under 'example', but not listed as a tier-1 test suite, please update its status so that 'make check' passes with the current HEAD of pocl and let us know, and we do our best to add it. Naturally this policy/support promise concerns only the lead developers (the CPC group). Any community involvement to provide a wider support/maintance level will be heartily welcomed. .. _releasing: Release management ---------------------------------- We aim to make a new release according to the Clang/LLVM release schedule. For each release, a release manager is assigned. Release manager is responsible for creating and uploading new release candidate tar balls and requesting for testers from different platforms. After a release candidate round with success reports and no failure reports, a release is published. See the `maintenance-policy`_ for the current release criteria. A checklist and hints for testing and making a release successfully: * Check that CHANGES has the most interesting updates done during the release cycle. Add missing notable changes from git log. * Update the release notes in *doc/notes-VERNUM.txt*. * Create a single commit in master branch: change the version to the release one (without -pre), in all relevant places (CHANGES, docs, CMakeLists.txt, etc); update the .so version (if required); check that supported LLVM versions in cmake/LLVM.cmake are correct. Create the release branch from this commit and push it to github. * In the master branch, create a new commit: increase version number (with -pre) in all relevant places; update the .so version; increase the supported LLVM versions in cmake/LLVM.cmake. Commit, push master to github. Now development can go on in master while the release branch is being stabilized. * The previous two steps ensure that merge-base of release & master is the start of release branch, which ensures that merging release to the master will not screw up the version numbers in the master. Bugs which need to be fixed in both branches, should be comitted to the release branch, then release branch merged to master. * Create a new release on Github. Mark it as pre-release. This should create both a tarball and a git tag. * Upload the package to portablecl.org/downloads via SFTP or to the sourceforge file listing for the pocl project. * Request for testers in Twitter and/or mailing list. Point the testers to send their test reports to you privately or by adding them to the wiki. A good way is to create a wiki page for the release schedule and a test log. See https://github.com/pocl/pocl/wiki/pocl-0.10-release-testing for an example. * To publish a release, create a new release on Github without the checking the pre-release checkbox. Upload the tar ball to the sourceforge download page and to http://portablecl.org/downloads. * Update the CHANGES and ANNOUNCEMENT text files in these directories. ANNOUNCEMENT is a copy of the latest release notes. A direct link to it can be easily circulated in IRC, for example. * Update the http://portablecl.org web page with the release information. * Advertise everywhere you can. At least in Twitter and the mailing list. In case of any problems, ask any previous release manager for help. Previous releases were managed by the following pocl developers: * 0.14: Pekka Jääskeläinen * 0.11: Michal Babej * 0.10: Pekka Jääskeläinen * 0.9: Kalle Raiskila * 0.8: Erik Schnetter * 0.6 and 0.7: Pekka Jääskeläinen pocl-1.4/doc/sphinx/source/faq.rst000066400000000000000000000163421355011147700172040ustar00rootroot00000000000000Frequently asked questions ========================== Common problems and questions related to using and developing pocl are listed here. Using pocl ---------- .. _supported-compilers: Supported compilers and compiler combinations ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Pocl usually uses two different compilers (though may be built using only one). One is used to compile C and C++ files - this is usually the "system compiler". It's specified by CC and CXX vars to configure script, or CMAKE_C{,XX}_COMPILER variables to cmake, but usually just left to default. The second compiler is used to build OpenCL files - this is always clang+llvm. It's specified by LLVM_CONFIG= to configure, or -DWITH_LLVM_CONFIG= to cmake. You may use clang as both "system" and OpenCL compiler for pocl. Note however that pocl uses the CXX_FLAGS *which the 2nd compiler (clang) was built with*, to build parts of pocl that link with that compiler. This may cause some issues, if you try to build pocl with a different compiler as the one used to build the 2nd compiler - because gcc and clang are not 100% compatible with each other in flags. So far though we've only seen warnings about unknown flags, not actual bugs. Anyway, the most trouble-free solution is to use the same "system" compiler to build pocl, as the one that was used to build the 2nd compiler. Note that while most Linux distributions use gcc to build their clang/llvm, the official downloads from llvm.org are built using clang. Pocl is not listed by clinfo / is not found ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Occasionally, proprietary implementations rewrite the ICD loader by their own version. E.g. Intel SDK installer silently replaces ``/usr/lib/x86_64-linux-gnu/libOpenCL.so`` with a link to ``/etc/alternatives/opencl-libOpenCL.so`` which itself is a link to the intel's libOpenCL implementation. The fix is to remove the symlinks manually and reinstall the ICD loader after which both pocl and the Intel SDK can be used through the ICD loader. Deadlocks (freezes) on FreeBSD ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The issue here is that a library may not initialize the threading on BSD independently. This will cause pocl to stall on some uninitialized internal mutex. See: http://www.freebsd.org/cgi/query-pr.cgi?pr=163512 A simple work-around is to compile the OpenCL application with "-pthread", but this of course cannot be enforced from pocl, especially if an ICD loader is used. The internal testsuite works only if "-pthread" is passed to ./configure in CFLAGS and CXXFLAGS, even if an ICD loader is used. clReleaseDevice or clCreateImage missing when linking against -lOpenCL (ICD) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ These functions were introduced in OpenCL 1.2. If you have built your ICD loader against 1.1 headers, you cannot access the pocl implementations of them because they are missing from the ICD dispatcher. The solution is to rebuild the ICD loader against OpenCL 1.2 headers. See: https://github.com/pocl/pocl/issues/27 "Two passes with the same argument (-barriers) attempted to be registered!" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ If you see this error:: Two passes with the same argument (-barriers) attempted to be registered! UNREACHABLE executed at /include/llvm/Support/PassNameParser.h:73! It's caused by initializers of static variables (like pocl's LLVM Pass names) called more than once. This happens for example when you link libpocl twice to your program. One way that could happen, is building pocl with ``--disable-icd`` while having hwloc "plugins" package installed (with the opencl plugin). What happens is: * libpocl.so gets built, and also libOpenCL.so which is it's copy * program gets linked to the built libOpenCL.so; that is linked to hwloc * at runtime, hwloc will try to open the hwloc-opencl plugin; that links to system-installed libOpenCL.so (usually the ICD loader); * the ICD loader will try to dlopen libpocl.so -> you get the error. The solution is either to use ``--enable-icd --disable-direct-linkage``, or to uninstall the hwloc "plugins" package. Why is pocl slow? ^^^^^^^^^^^^^^^^^ If pocl's kernel build seems really slow, it is very possible you have built your LLVM with Debug+Asserts on (not configure --enable-optimized). This should result in up to 10x kernel compiler slow downs. You can really feel it when running 'make check', for example. The kernel compiler cache often removes that overhead when you run your OpenCL app the next time. If pocl is otherwise slower than other OpenCL implementations, it's normal. pocl is known to run certain benchmarks faster, certain ones slower, when comparing against the Intel and AMD OpenCL SDKs. We hope to improve the performance in each release, so if you encounter performance regressions (an older pocl/LLVM version used to run an app faster), please report a bug. pocl source code ---------------- Why C99 in host library? ^^^^^^^^^^^^^^^^^^^^^^^^ The kernel compiler passes and some of the driver implementations are in C++11 and it's much faster to implement things in C++11. Why require using C99 in the host library? pocl is meant to be very portable to various type of devices, also to those with very little resources (no operating system at all and with pruned runtime libraries). C has better portability to low end CPUs and VMs. Thus, in order for a CPU to act as an OpenCL host without online kernel compilation support, only C99 support is required from the target, no C++ compiler, runtime or STL is needed. Also, C programs are said to sometimes produce more "lightweight" binaries, but that is debatable. Benchmarks ============== CLPeak issues ---------------- Currently (Dec 2017) does not work. First, there's a global memory size detection bug in CLPeak which makes it fail on all OpenCL calls (this can be workarounded by using POCL_MEMORY_LIMIT=1). Second, compilation takes forever - this can't be fixed in pocl and needs to be fixed in either CLPeak or LLVM. CLPeak sources use recursive macros to create a giant stream of instructions. Certain optimization passes in LLVM seem to explode exponentially on this code. The second consequence of giant instruction stream is, it easily overflows the instruction caches of a CPU, therefore CLPeak results are highly dependent on whether the compiler manages to fit the code into icache, perhaps using loop re-rolling, and as such are not a reliable measure of peak device FLOPS. Luxmark issues --------------- * Using the binary downloaded from www.luxmark.info might lead to pocl abort on creating cache directory. This is not a bug in Pocl, it's a consequence of the two programs (pocl & luxmark) having been compiled with different libstdc++. Using a distribution packaged Luxmark fixes this problem. * It's recommended to remove luxmark cache (~/.config/luxrender.net) after updating pocl version. * There's another bug (http://www.luxrender.net/mantis/view.php?id=1640) - it crashes after compiling kernels, because it doesn't recognize an OpenCL device. This requires editing scenes//render.cfg, you must add ``opencl.cpu.use = 0`` and ``film.opencl.device = 0`` * All scenes (Microphone, Luxball and Hotel) should compile & run with LLVM 6 and newer. pocl-1.4/doc/sphinx/source/features.rst000066400000000000000000000005141355011147700202450ustar00rootroot00000000000000Supported features and devices =============================== Pocl currently supports CPUs (x86-64 with full 1.2 conformance, ARM 32b/64b ligthly tested), NVidia GPUs via CUDA backend, HSA devices, TCE devices and fixed-function accelerators. .. toctree:: :maxdepth: 2 opencl_status conformance hsa cuda accel pocl-1.4/doc/sphinx/source/host_library.rst000066400000000000000000000011171355011147700211300ustar00rootroot00000000000000OpenCL host library ------------------- The API implementations of The OpenCL Runtime and the The OpenCL Platform Layer are compiled to a single dynamic library (e.g., ``libpocl.so``). This library contains all implementations and, if pocl is compiled in the `ICD mode `_, is what the ICD loader accesses. In case pocl is instructed (via -DENABLE_ICD=0) to compile a "directly linkable library", ``libOpenCL.so`` is produced which can be linked directly to the OpenCL programs (instead of linking against the ICD loader). pocl-1.4/doc/sphinx/source/hsa.rst000066400000000000000000000277651355011147700172230ustar00rootroot00000000000000=== HSA === Note: pocl's HSA support is currently in experimental stage. The experimental HSA driver works with AMD Kaveri or Carrizo APUs using an AMD's HSA Runtime implementation using the HSAIL-supported LLVM and Clang. Also, generic HSA Agent support (e.g. for your CPU) can be enabled using the phsa project. Installing prerequisite software --------------------------------- 1) Install an HSA AMD runtime library implementation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For AMD devices, pre-built binaries can be found here: https://github.com/HSAFoundation/HSA-Runtime-AMD This usually installs into /opt/hsa. Make sure to read Q&A in README.md (it lists some common issues (like /dev/kfd permissions) and run sample/vector_copy to verify you have a working runtime. Alternatively, you can use *phsa* to add generic HSA support on your gcc-supported CPU. Its installation instructions are here: https://github.com/HSAFoundation/phsa 2) Build & install the LLVM with HSAIL support ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fetch the HSAIL branch of LLVM 3.7:: git clone https://github.com/HSAFoundation/HLC-HSAIL-Development-LLVM/ -b hsail-stable-3.7 Fetch the upstream Clang 3.7 branch:: cd HLC-HSAIL-Development-LLVM/tools svn co http://llvm.org/svn/llvm-project/cfe/branches/release_37 clang Patch it:: cd clang; patch -p0 < PATHTO-POCL/tools/patches/clang-3.7-hsail-branch.patch An LLVM cmake configuration command like this worked for me:: cd ../../ mkdir build cd build cmake .. -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=HSAIL \ -DBUILD_SHARED_LIBS=off -DCMAKE_INSTALL_PREFIX=INSTALL_DIR \ -DLLVM_ENABLE_RTTI=on -DLLVM_BUILD_LLVM_DYLIB=on -DLLVM_ENABLE_EH=ON -DHSAIL_USE_LIBHSAIL=OFF ``-DHSAIL_USE_LIBHSAIL=OFF`` is only for safety. If you accidentally build clang with libHSAIL, it will cause mysterious link errors later when building pocl. Change the INSTALL_DIR to your installation location of choice. Note that these are **required**:: -DLLVM_ENABLE_RTTI=ON -DLLVM_ENABLE_EH=ON -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=HSAIL Also, if you don't want to build all the default targets, you'll need AMDGPU. Then build and install the Clang/LLVM:: make -j4 && make install 3) Get HSAIL-Tools ~~~~~~~~~~~~~~~~~~~~~ Clone the repo:: git clone https://github.com/HSAFoundation/HSAIL-Tools Then either copy ``HSAILasm`` executable to /opt/hsa/bin, or give the path to ``HSAILasm`` on the build command line (see below) 4) Build pocl ~~~~~~~~~~~~~ Using cmake:: mkdir build ; cd build cmake -DENABLE_HSA=ON -DWITH_HSA_RUNTIME_DIR=\ \ -DWITH_HSAILASM_PATH=\ -DSINGLE_LLVM_LIB=off .. It should result in "hsa" appearing in pocl's targets to build. ``-DSINGLE_LLVM_LIB=off`` workarounds an LLVM 3.7 build system issue. 5) Run tests & play around ~~~~~~~~~~~~~~~~~~~~~~~~~~~ After building pocl, you can smoke test the HSA driver by executing the HSA tests of the pocl testsuite:: ../tools/scripts/run_hsa_tests HSA Support notes ------------------ Note that the support is still experimental and very much unfinished. You're welcome to try it out and report any issues, though. HSA support implementation status as of 2016-05-17 -------------------------------------------------- What’s Implemented ~~~~~~~~~~~~~~~~~~~ * global/local/private memory * barriers * most of the OpenCL 1.2 kernel builtins * OpenCL 2.0 shared virtual memory (SVM) * OpenCL 2.0 atomics What's Missing ~~~~~~~~~~~~~~~ * printf() is not implemented, this should wait until we have a proper in-tree printf() in pocl with a stdout ring buffer * several builtins are not implemented yet (logb, remainder, nextafter); some are suboptimal or may give incorrect results with under/overflows (most of the builtins are taken from vecmathlib library, rewritten to fit HSAIL). * image support is not implemented * support for GPU devices other than Kaveri; currently only Kaveri and phsa-based CPU Agents have been tested * support for 32bit HSA devices About the Shared Virtual Memory Implementation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ OpenCL 2.0 SVM is a feature that lets you share virtual memory between CPU and GPUs. Note that while SVM works in pocl, one must carefully align all structs explicitly (both struct members and struct itself). This is because the alignment of the structs with the host's compiler might differ from the one in the device. For example, you can see the issue in Intel's SVM examples: .. code-block:: c typedef struct _Element { global float* internal; //points to the "value" of another Element from the same array global float* external; //points to the entry in a separate array of floating-point values float value; } Element; This *may* work with Intel's OpenCL SDK in case only using CPU devices, but crashes when offlodaing to HSA via pocl's HSA driver. The reason is that when using HSA, pocl compiles this header with two different compilers: usually gcc/clang for host C code and, llvm-HSAIL (Clang) for the device side, and they do *not* use the same alignment rules. The C standard specify almost nothing with regards to struct alignment in memory, so one must take care to explicitly specify alignment when using structs in shared memory. A proper way to declare the struct would be to utilize the widely supported 'aligned' attribute. .. code-block:: c typedef struct _Element { global float* internal __attribute__ ((aligned (8))); //points to the "value" of another Element from the same array global float* external __attribute__ ((aligned (8))); //points to the entry in a separate array of floating-point values float value __attribute__ ((aligned (8))); } Element __attribute__ ((aligned (32))); phsa ~~~~~ `Portable HSA (phsa) `_ provides similar portable HSA implementation for CPUs/DSPs and other processors as pocl aims to do for OpenCL. Using phsa one can implement HSA Agent support for any processor which has a gcc backend with ease. pocl supports phsa as a backend for its HSA driver, thus any processor utilizing phsa for HSA Agent support can get OpenCL support via pocl. We used phsa for testing the HSA driver works with other devices and runtimes than AMD's. Known Issues --------------- OpenCL 2.0 Atomics and HSA Memory Scope ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ There is a "memory scope" parameter present in HSA, which applies to atomic memory instructions or memory fences. Its purpose is to limit the scope of these instructions. However, pocl translates to HSAIL via LLVM bitcode, and the "atomicrmw" LLVM instruction only takes a memory order parameter, not scope. For this reason the memory scope in HSAIL is always the widest "system" scope. Multiple HSA Agent Support ~~~~~~~~~~~~~~~~~~~~~~~~~~~ While multiple OpenCL device support is not a problem for pocl, the HSA 1.0 specification lacks a "loader/proxy" feature that OpenCL has in ICD. Thus, support for devices is limited to what the linked HSA runtime supports. Currently, if one wants to control multiple HSA Agents as multiple pocl OpenCL devices, one needs to implement a HSA runtime that lists all the Agents to pocl. There is no capability to load multiple HSA runtimes in pocl as we consider it out of scope and a job for a proxy HSA runtime similar to ICD. Performance ------------- We conducted preliminary benchmarking with a set of test cases to serve as a basis for future optimization efforts. Evaluation Setup ~~~~~~~~~~~~~~~~~~ Hardware: AMD A10-7800, 8GB 1600Mhz of dual-channel memory, TDP set to 65W * Configuration 1: Windows 10 x86-64, AMD Crimson drivers * Configuration 2: Ubuntu 15.04 x86-64, kernel 4.0.0 & runtime 1.0.3 from https://github.com/HSAFoundation Test applications from AMD SDK 3.0 samples/opencl/bin/x86_64. The tests were run with -i (iterations) parameter ranging from 10 to 200 (longer tests were ran with fewer iterations). The performance currently lags behind the AMD's proprietary OpenCL on Windows by a factor of 1x to 5x =================================================== ============== ====================== =============== ============= ============================= AMD SDK example with arguments AMD runtime(s) other(GB/s,opts/s etc) POCL runtime(s) other POCL/AMD (>1.0 = POCL slower) =================================================== ============== ====================== =============== ============= ============================= BitonicSort -q -t -x 1048576 0.0978 10713500 0.2116 4954540 2.162 BinomialOption -q -t -x 10000 0.0164 25855.1 0.0233 37030.3 1.416 BlackScholes -s -q -t -x 16777216 0.0098 1708340000 0.0790 212347000 8.045 DCT -q -t -x 4000 -y 4000 0.0493 - 0.0582 - 1.181 FastWalshTransform -q -t -x 134217728 1.5895 - 2.4367 - 1.533 FloydWarshall -q -t -x 512 0.0671 - 0.1802 - 2.682 MatrixTranspose -t -x 8192 -q 0.0317 16920500000 0.1675 3204580000 5.280 MatrixMultiplication -q -t -x 1024 -y 1024 -z 2048 0.0175 245.07 0.0776 55.29 4.432 QuasiRandomSequence -q -t -y 10200 -x 10000 0.0009 2754120000 0.0100 1188730000 10.603 Reduction -q -t -x 100000000 0.1108 - 0.1165 - 1.051 SimpleConvolution -q -t -x 204800 0.1056 0.565378 0.1154 1.68136 2.973 =================================================== ============== ====================== =============== ============= ============================= We briefly analyzed the bottlenecks and the first clear issue is that we have recently introduced out-of-order queues in pocl, and the driver layer changed significantly with this regard, and it has not yet been fully optimized for HSA. There is ongoing work in this area. The slow kernel launches may be the reason why extremely short kernels like QuasiRandomSequence are >5x slower. The other major issue is that the LLVM 3.7 based HSAIL compiler is sometimes producing clearly suboptimal code. If we take MatrixMultiplication as an example, the GPU code generated by the proprietary AMD OpenCL driver on windows uses 76 VGPRs, 26 SGPRs and has no spills. The HSAIL code from pocl contains about 70 spills! While the HSA PRM (programmer's reference manual) states "the finalizer might be able to deploy extra hardware registers and remove the spills", it's likely not successful in this case, assuming AMD's HSAIL finalizer is putting only minimal effort to optimize the code to provide fast finalization times. This hopefully will change when LLVM-HSAIL is updated to later LLVM versions and its main bottlenecks are optimized, or in case new AMD SDK versions do optimization in the finalization of the suboptimal HSAIL input. Credits ---------- The current implementation was mainly done by our `Customized Parallel Computing `_ group of Tampere University, Finland with early prototype code contributions from the Programming Language Lab at National Tsing-Hua University, Hsinchu, Taiwan. CPC group thanks HSA Foundation and ARTEMIS JU (under grant agreement no 621439, ALMARVI) for funding this initial pocl HSA driver work. This driver added GPU device support to pocl for the first time, and, on the other hand, produced an easier path for HSA-supported devices to implement the OpenCL API by utilizing the pocl code base as a starting point. In the future we hope to see more effort put in optimizing the results to reach the performance of the proprietary SDKs on HSA devices. pocl-1.4/doc/sphinx/source/index.rst000066400000000000000000000011641355011147700175400ustar00rootroot00000000000000.. Portable Computing Language (pocl) documentation master file, created by sphinx-quickstart on Fri May 3 10:53:18 2013. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. Welcome to Portable Computing Language (pocl)'s documentation! ============================================================== Contents: .. toctree:: :maxdepth: 2 install using features faq development design Back to `pocl home page `_. Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` pocl-1.4/doc/sphinx/source/install.rst000066400000000000000000000312431355011147700201000ustar00rootroot00000000000000.. _pocl-install: ============ Installation ============ Requirements ------------ In order to build pocl, you need the following support libraries and tools: * Latest released version of LLVM & Clang * development files for LLVM & Clang + their transitive dependencies (e.g. libclang-dev, libllvm-dev, zlib1g-dev, libtinfo-dev...) * GNU make or ninja * pthread (should be installed by default) * Optional: hwloc v1.0 or newer (e.g. libhwloc-dev) * pkg-config * cmake Installing requirements for Ubuntu:: apt install -y build-essential ocl-icd-libopencl1 cmake git pkg-config libclang-dev clang llvm make ninja-build ocl-icd-libopencl1 ocl-icd-dev ocl-icd-opencl-dev libhwloc-dev zlib1g zlib1g-dev clinfo dialog apt-utils Installing requirements for Arch Linux:: pacman -S gcc patch hwloc cmake git pkg-config make ninja ocl-icd clang llvm llvm-libs clinfo opencl-headers Installing requirements for Fedora:: dnf install gcc gcc-c++ clinfo hwloc-devel hwloc-libs cmake git-core pkgconfig make ninja-build ocl-icd ocl-icd-devel clang clang-devel clang-libs llvm llvm-devel llvm-libs patch redhat-rpm-config findutils There are also Dockerfiles available for a few most common linux distributions in ``tools/docker``, looking into them might be helpful. Clang / LLVM Notes ------------------ **IMPORTANT NOTE!** Some targets (TCE and possibly HSA) require that you compile & build LLVM with RTTI on. It can be enabled on cmake command line, as follows:: cmake [other CMake options] -DLLVM_ENABLE_RTTI=ON -DLLVM_ENABLE_EH=ON Supported LLVM versions ~~~~~~~~~~~~~~~~~~~~~~~~ Note that pocl aims to support **the latest LLVM version** at the time of pocl release, **plus the previous** LLVM version. All older LLVM versions are supported with "best effort" basis; there might not be build bots continuously testing the code base nor anyone fixing their possible breakage. Configure & Build ----------------- CMake version 2.8.12 or higher is required. The build+install is the usual CMake way:: cd mkdir build cd build cmake [-D